In [3]:
import tensorflow as tf
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import random
# sklearn modules
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import ShuffleSplit
# Metric mean squared error, the lower the better
from sklearn.metrics import mean_squared_error

def read_csv(csv_file):
    '''Returns numpy array and panda version of the file'''
    csv_file = csv_file +".csv"
    df = pd.DataFrame()
    df = df.from_csv(csv_file, header=0, sep=',', index_col=0)
#n=df.shape[0] # number of samples
#d=df.shape[1] # number of features
    array = np.asarray(df,dtype="float64")
    if np.shape(array)[1] == 1:
        array = array.ravel()
    return array, df

def publish_pred(y_pred, file_name):
    df = pd.DataFrame()
    df = df.from_csv("data/reg_sample_submission.csv", 
                     header=0, sep=',', index_col=0)
    df["Output"] = y_pred
    df.to_csv(file_name)

# Load data

In [6]:
tr_in, df_in = read_csv("data/reg_train_in")
tr_in = np.asarray(tr_in,dtype="float64")
tr_out, df_out = read_csv("data/reg_train_out")
tr_out = np.asarray(tr_out,dtype="float64")
te_in, df = read_csv("data/reg_test_in") # Still have to deal with NaNs best

# Load file with NaNs replace using GP inference
df = pd.DataFrame()
df = df.from_csv("data/reg_test_gp2.csv", header=None, sep=',', index_col=None)
te_gp= np.asarray(df,dtype="float64")

# Split data into training set and validation set
Use test % between 20% and 30% to ensure no-overfitting

In [7]:
cv = ShuffleSplit(n_splits=1,  test_size=0.30, random_state=random.randint(0,20), train_size=None)
for train_index, test_index in cv.split(tr_in):
    X_tr , y_tr = tr_in[list(train_index)], tr_out[list(train_index)]
    X_vl , y_vl = tr_in[list(test_index)], tr_out[list(test_index)]

# Clean data: normalise train and val set according to train set
#### Note: normalisation is performed after the splitting (GOAL: ensure NN has normalised inputs)

In [8]:
tr_scale = preprocessing.StandardScaler().fit(X_tr)

X_train = tr_scale.transform(X_tr)
y_train = y_tr.reshape(-1,1)

X_val = tr_scale.transform(X_vl)
y_val = y_vl.reshape(-1,1) # only works if shape = (*,1) //(*, ) fails

X = tr_scale.transform(tr_in)
y = tr_out.reshape(-1,1)

# DATA NOW PREPARED TO ENTER NETWORK
#X_test = tr_scale.transform(te_gp)

# Computation graph // Shape neural net

In [9]:
def shape_tensor(T):
    dim = tuple( T.get_shape()[i].value for i in range(len(T.get_shape())))
    return dim

def hidden_layer(input_tensor, out_nodes):
    in_nodes = shape_tensor(input_tensor)[1]
    W = tf.Variable(tf.truncated_normal([in_nodes,out_nodes],stddev=0.1))
    b = tf.Variable(tf.ones([out_nodes])/10)
    logits = tf.add(tf.matmul(input_tensor, W),b)
    a = tf.nn.dropout(tf.nn.relu(logits),keep_prob=0.95)
    return a

# BY DEFAULT (work best)
## Tip : avoid dim reduction techniques (eg: PCA, LDA, even autoencoder)
#### 14x80x50x50x50x1
#### Activation: ReLu 
#### Initiation weights: tf.truncated_normal([n_nodes,n_nodes],stddev=0.1)
#### Initiation bias: tf.ones([n_nodes])/10 (small positive value)
#### Dropout prob: 0.75 (see documentation)
#### Minibatchs
#### Adaptative Learning Rate
#### Adam optimizer
#### No bias in output layer

In [10]:
# Placeholders for input and output
inputX = tf.placeholder(tf.float32, [None, 14])
inputY = tf.placeholder(tf.float32, [None, 1])

n_nodes = 50
# Weights and bias for INPUT LAYER
W = tf.Variable(tf.truncated_normal([14,50],stddev=0.1))
b = tf.Variable(tf.ones([50])/10)
nodes = tf.add(tf.matmul(inputX, W),b)

# USE PREVIOUS WEIGHTS
#W = tf.Variable(W_past)
#b = tf.Variable(b_past)

with tf.name_scope("feature_compression_layer") as scope:
    a = tf.nn.dropout(tf.nn.relu(nodes),keep_prob=0.95) 

# ADD SUMMARY OPTIONS TO COLLECT DATA (make net understandable)
w_h = tf.summary.histogram("weights", W)
b_h = tf.summary.histogram("weights", b)

# Next 5 hidden layers
a_out =hidden_layer(hidden_layer(hidden_layer(hidden_layer(a, n_nodes),n_nodes),n_nodes),n_nodes)
# Weights and bias for OUTPUT LAYER
W_out = tf.Variable(tf.truncated_normal([n_nodes,1],stddev=0.1))
#b_out = tf.Variable(tf.ones([n_nodes])/10)
y_pred = tf.matmul(a_out, W_out)

# Model Hyperparameters
![learning_rate](http://cs231n.github.io/assets/nn3/learningrates.jpeg)
## TO-DO: try different learning rates

In [11]:
global_step = tf.Variable(0, trainable=False) # keep track of numb of epochs
# Fancy Learning rate with decay
starter_learning_rate = 0.001
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                         500, 0.96, staircase=True)

n_samples = y_train.size
n_epochs = 15000
display_step = 800

def minibach(X, y,batch_size): 
    ind = np.random.choice(y.size, size=batch_size, replace=False)
    X_batch, y_batch = X[ind,:], y[ind,:]
    # GET RID OFF normalisation in every minibatch
    #X_batch = preprocessing.scale(X_batch) 
    return X_batch, y_batch

# Training step
#### Loss: sqrt(MSE)
#### Optimizer: Adam (100x better than gradient descent)

In [12]:
with tf.name_scope("loss_function"):
    loss = tf.sqrt(tf.reduce_sum(tf.pow(y_pred-inputY,2))/n_samples)
    loss2 = tf.sqrt(tf.reduce_sum(tf.pow(y_pred-inputY,2))/y_val.size) 
    tf.summary.scalar("loss_train", loss)
    tf.summary.scalar("loss_val", loss2)
    
with tf.name_scope("train"):
    #traning_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
    traning_step = tf.train.AdamOptimizer(
                            learning_rate).minimize(loss, global_step=global_step)
    
#merged_summary_op = tf.summary.merge_all()

# Initialise Session
## TO-DO: use tensorboard to display information about weights, etc.

In [13]:
init = tf.global_variables_initializer()
session = tf.Session()
session.run(init)
#summary_writer = tf.summary.FileWriter('/home/sp838/KAGGLE_thing/Regression/logs',
#                                        graph=session.graph)
for i in range(n_epochs):
    X_batch, y_batch = minibach(X_train, y_train,1000)
    session.run(traning_step,feed_dict={inputX:X_batch, inputY:y_batch})
    if (i) % display_step == 0:
        cc = session.run(loss,feed_dict={inputX:X_train, inputY:y_train})
        cc2 = session.run(loss2,feed_dict={inputX:X_val, inputY:y_val})
        print "loss = {}, val_loss = {}".format(cc,cc2)

loss = 0.995014369488, val_loss = 0.992360949516
loss = 0.632192254066, val_loss = 0.633347630501
loss = 0.617373347282, val_loss = 0.620917916298
loss = 0.606648743153, val_loss = 0.614287793636
loss = 0.595017790794, val_loss = 0.604884624481
loss = 0.589996278286, val_loss = 0.606197416782
loss = 0.58389633894, val_loss = 0.602258145809
loss = 0.579094290733, val_loss = 0.59907323122
loss = 0.576981067657, val_loss = 0.599213957787
loss = 0.570965588093, val_loss = 0.596397042274
loss = 0.568369686604, val_loss = 0.599679768085
loss = 0.563012421131, val_loss = 0.596962630749
loss = 0.561224877834, val_loss = 0.59470063448
loss = 0.55857115984, val_loss = 0.595360159874
loss = 0.558405995369, val_loss = 0.59671652317
loss = 0.553875684738, val_loss = 0.591514766216
loss = 0.550064325333, val_loss = 0.593555271626
loss = 0.54964184761, val_loss = 0.592898130417
loss = 0.547704398632, val_loss = 0.592928349972


# Keep training running
With higher batch size

In [11]:
for i in range(5000):
    X_batch, y_batch = minibach(X, y,1000)
    session.run(traning_step,feed_dict={inputX:X_batch, inputY:y_batch})
    if (i) % display_step == 0:
        cc = session.run(loss,feed_dict={inputX:X_train, inputY:y_train})
        cc2 = session.run(loss2,feed_dict={inputX:X_val, inputY:y_val})
        print "loss = {}, val_loss = {}".format(cc,cc2)


loss = 0.547165155411, val_loss = 0.571042120457
loss = 0.550785541534, val_loss = 0.565855205059
loss = 0.547003030777, val_loss = 0.566982209682
loss = 0.548365056515, val_loss = 0.561747014523
loss = 0.54721146822, val_loss = 0.563212990761
loss = 0.544652223587, val_loss = 0.562495648861
loss = 0.545519828796, val_loss = 0.561309576035


# Test Neural Network

In [15]:
# Testing in the whole set
y_ = session.run(y_pred,feed_dict={inputX:X})
score = mean_squared_error(y, y_)
print score, np.sqrt(score)

0.302963840338 0.550421511515


## Best results: 
loss = 0.612698078156, val_loss = 0.626387417316 (after 3000 * 5)

- Watch out for the local minima with loss = 0.999330401421, val_loss = 0.99540913105

- Check on final updates of certain parameters

- Save Net weights

In [None]:
session.run(learning_rate)
session.run(W)
session.run(a, feed_dict={inputX:X_train})
W_past = session.run(W)
b_past = session.run(b)

- View summarise info

In [12]:
#! tensorboard --help

In [13]:
# ! tensorboard --logdir=/home/sp838/KAGGLE_thing/Regression/logs/

# Building a separate Neural Net for NaNs and NonNaNs in X_test

In [None]:
# Types of indices in te_in
NaN_ind = list(np.argwhere(np.isnan(te_in[:,2])).transpose())
NonNaN = list(np.argwhere(~np.isnan(te_in[:,2])).transpose())

# Select data to make predictions on
X_test_nonNaN = tr_scale.transform(te_in[NonNaN])

y_test_nonNaN = session.run(y_pred,feed_dict={inputX:X_test_nonNaN})

# Create empty array to store predictions in NonNaN entries
y_test = np.zeros(dtype=float,shape=(1800,1))
y_test[NonNaN] = y_test_nonNaN

In [27]:
# PUBLISH (PARTIAL) RESULTS IN A TEMP FILE 
# LOAD THEM AGAIN IN one_feature_problem.iptnb
publish_pred(y_test, "predictions/reg_nonNaN_pred_TF.csv")