In [14]:
%load_ext autoreload
%autoreload 2

import numpy as np
import cPickle as pickle
import sklearn.metrics as skmetrics
import csv
import sys
from NN import AutoEncoder as AE 

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Turn Data into all binary

In [3]:
with open('Data.pkl', 'rb') as f:
            data = pickle.load(f)
            X = data['X']
            Y = data['Y']

print("Y.shape : {0}".format(Y.shape))
print("X.shape : {0}".format(X.shape))

Y.shape : (4209L,)
X.shape : (4209L, 376L)


In [4]:
maxes = np.max(X, axis = 0)[:8]
print maxes

mins = np.min(X, axis = 0)[:8]
print mins

[55 28 51  7  4 34 12 25]
[1 1 1 1 1 3 1 1]


In [5]:
one_hot_tables = []
for i,m in enumerate(maxes):
    current_table = np.zeros((X.shape[0], m))
    
    for j in range(X.shape[0]):
        current_table[j, X[j,i]-1] = 1.0
        
    one_hot_tables.append(current_table)
one_hot_tables.append(X[:,8:])
one_hot_data = np.concatenate(one_hot_tables, axis=1)
print one_hot_data.shape

(4209L, 584L)


In [6]:
with open('Data_OneHot.pkl', 'wb') as f:
    pickle.dump({"X":one_hot_data,"Y":Y}, f)

## Load Data

In [2]:
with open('Data_OneHot.pkl', 'rb') as f:
            data = pickle.load(f)
            X = data['X']
            Y = data['Y']


#Shuffle data :
data = np.concatenate([Y.reshape((Y.shape[0],1)),X], axis=1)
np.random.shuffle(data)

Y = data[:,0]
Y=Y.reshape((Y.shape[0]),1)
X = data[:,1:]

x_train = X[:4000,:]
y_train = Y[:4000]

x_valid = X[4000:,:]
y_valid = Y[4000:]


print("X_train shape : {0}".format(x_train.shape))
print("Y_train shape : {0}".format(y_train.shape))
print("X_valid.shape : {0}".format(x_valid.shape))
print("Y_valid shape : {0}".format(y_valid.shape))

X_train shape : (4000L, 584L)
Y_train shape : (4000L, 1L)
X_valid.shape : (209L, 584L)
Y_valid shape : (209L, 1L)


## Auto-encoder

In [5]:
n_units = [64, 16]
ae_non_linearty = 'sigmoid'
ae = AE.AutoEncoder(X.shape[1], n_units, ae_non_linearty, learning_rate=0.004, L2=0.001, p_drop=0.0)

In [6]:
n_samples = x_train.shape[0]
n_dims = x_train.shape[1]
mb_size = 64
nb_iter_epochs = 500
max_epochs = 50
out_freq = 10
best_ae_encoder_states = ae.get_encoder_states()
best_valid_score = 1e100

train_costs = []
valid_costs = []

x_rep, x_rec, cst = ae.test(x_valid, x_valid)
print "Initial valid cost: %f" % cst
print

for epoch in range(max_epochs):
    epoch_train_costs = []
    
    for it in range(nb_iter_epochs): 
        rand_indices = np.random.choice(n_samples, mb_size)
        mb_x = x_train[rand_indices, :]
        x_rep, x_rec, cst = ae.train(mb_x, mb_x)
        epoch_train_costs.append(cst)
    
    train_costs.append(np.mean(epoch_train_costs))
    
    x_rep, x_rec, cst = ae.test(x_valid, x_valid)
    valid_costs.append(cst)
    
    if cst < best_valid_score:
        best_valid_score=cst
        best_ae_encoder_states = ae.get_encoder_states()
    
    if epoch % out_freq == 0:
        print "Epoch %d" % epoch
        print "Avg. train cost: %f" % train_costs[-1]
        print "Avg. valid cost: %f" % valid_costs[-1]
        print

print "Epoch %d" % epoch
print "Avg. train cost: %f" % train_costs[-1]
print "Avg. valid cost: %f" % valid_costs[-1]
print
print "Done"
print "Best epoch = %d" % np.argmin(valid_costs)
print "With train cost = %f" % train_costs[np.argmin(valid_costs)]
print "With valid cost = %f" % valid_costs[np.argmin(valid_costs)]

Initial valid cost: 146.533890

Epoch 0
Avg. train cost: 30.262960
Avg. valid cost: 28.138475

Epoch 10
Avg. train cost: 8.896500
Avg. valid cost: 9.045081

Epoch 20
Avg. train cost: 7.152565
Avg. valid cost: 8.278563

Epoch 30
Avg. train cost: 6.593589
Avg. valid cost: 8.214208

Epoch 40
Avg. train cost: 6.284857
Avg. valid cost: 8.332643

Epoch 49
Avg. train cost: 6.123948
Avg. valid cost: 8.477248

Done
Best epoch = 27
With train cost = 6.713456
With valid cost = 8.193463


### Regressor

In [12]:
r_units = [64, 32]
regressor_non_linearty = 'rectifier'
aer = AE.AutoEncoderRegressor(X.shape[1], n_units, r_units, ae_non_linearty, regressor_non_linearty,
                              learning_rate=0.004, L2=0.005, p_drop=0.05)

#use pre-trained AE :
aer.set_encoder_states(best_ae_encoder_states)

In [16]:
n_samples = x_train.shape[0]
n_dims = x_train.shape[1]
mb_size = 64
nb_iter_epochs = 500
max_epochs = 100
out_freq = 10
best_valid_score = 1e100
recon_ratio = 0.1

#We want to monitor REGRESSION cost here.
train_costs = []
valid_costs = []

y_reg, x_rec, ae_cst, regress_cst, cst = aer.train(x_valid, x_valid, y_valid, recon_ratio, 0.0)
print "Initial valid cost: %f" % regress_cst
print "Initial valid R2 score: %f" % skmetrics.r2_score(y_valid,y_reg)
print

for epoch in range(max_epochs):
    epoch_train_costs = []
    
    for it in range(nb_iter_epochs): 
        rand_indices = np.random.choice(n_samples, mb_size)
        mb_x = x_train[rand_indices, :]
        mb_y = y_train[rand_indices, :]
        y_reg, x_rec, ae_cst, regress_cst, cst = aer.train(mb_x, mb_x, mb_y, recon_ratio, 1.0)
        epoch_train_costs.append(regress_cst)
    
    train_costs.append(np.mean(epoch_train_costs))
    
    y_reg, x_rec, ae_cst, regress_cst, cst = aer.train(x_valid, x_valid, y_valid, recon_ratio, 0.0)
    valid_costs.append(regress_cst)
    
    if regress_cst < best_valid_score:
        best_valid_score=regress_cst
        
    if epoch % out_freq == 0:
        print "Epoch %d" % epoch
        print "Avg. train cost: %f" % train_costs[-1]
        print "Avg. valid cost: %f" % valid_costs[-1]
        print "Valid R2 score: %f" % skmetrics.r2_score(y_valid,y_reg)
        print

print "Epoch %d" % epoch
print "Avg. train cost: %f" % train_costs[-1]
print "Avg. valid cost: %f" % valid_costs[-1]
print
print "Done"
print "Best epoch = %d" % np.argmin(valid_costs)
print "With train cost = %f" % train_costs[np.argmin(valid_costs)]
print "With valid cost = %f" % valid_costs[np.argmin(valid_costs)]

Initial valid cost: 51.800343
Initial valid R2 score: 0.588350



KeyboardInterrupt: 

In [38]:
# Some notes...

#TODO :
# Targets: 
#   Standardize targets + linear output
# Overfitting:
#   Try Dropout (we overfit a lot on the regression)
#   Try bigger recon_ratio
#   Try to add some noise?
# Go beyond MLP ?
#   Could we do better than let's say this setup+spearmint optimization ?

# AE:

# A)
# n_units = [64, 16], L2=0.001
# Best epoch = 23
# With train cost = 6.754955, 6.713456
# With valid cost = 9.313946, 8.193463


# REGRESSOR: 
# 1)
# With A) + r_units = [32, 16], L2 = 0.01 learning_rate=0.004, L2=0.005, ratio =0.1, p_drop=0.0
# Best epoch = 94
# With train cost = 5.114531
# With valid cost = 36.887135

# 2)
# With A) + r_units = [32, 16], L2 = 0.01 learning_rate=0.004, L2=0.005, ratio =0.1, p_drop=0.2
# Best epoch = 49
# With train cost = 77.319702
# With valid cost = 52.567116

# 3)
# With A) + r_units = [64, 32], L2 = 0.01 learning_rate=0.004, L2=0.005, ratio =0.1, p_drop=0.1
#Best epoch = 98
#With train cost = 24.144192
#With valid cost = 48.137314

