In [42]:
import tensorflow as tf
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
# sklearn modules
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import ShuffleSplit
# Metric mean squared error, the lower the better
from sklearn.metrics import mean_squared_error

def read_csv(csv_file):
    '''Returns numpy array and panda version of the file'''
    csv_file = csv_file +".csv"
    df = pd.DataFrame()
    df = df.from_csv(csv_file, header=0, sep=',', index_col=0)
    array = np.asarray(df,dtype="float64")
    if np.shape(array)[1] == 1:
        array = array.ravel()
    return array, df

def publish_pred(y_pred, file_name):
    df = pd.DataFrame()
    df = df.from_csv("data/reg_sample_submission.csv", header=0, sep=',', index_col=0)
    df["Output"] = list(y_pred)
    df.to_csv(file_name)
    
#LOAD DATA    
tr_in, df_in = read_csv("data/reg_train_in")
tr_in = np.asarray(tr_in,dtype="float64")
tr_out, df_out = read_csv("data/reg_train_out")
tr_out = np.asarray(tr_out,dtype="float64")
te_in, df = read_csv("data/reg_test_in") # Still have to deal with NaNs best

# Here we don't care about the GP-prediction of the NaNs

# Split and Clean data

In [49]:
cv = ShuffleSplit(n_splits=1,  test_size=0.30, random_state=random.randint(0,20), train_size=None)
for train_index, test_index in cv.split(tr_in):
    X_tr , y_tr = tr_in[list(train_index),0].reshape(-1,1), tr_out[list(train_index)].reshape(-1,1)
    X_vl , y_vl = tr_in[list(test_index),0].reshape(-1,1), tr_out[list(test_index)].reshape(-1,1)

In [51]:
# VALIDATION AND TRAINING SET:
X_tr_scale = preprocessing.StandardScaler().fit(X_tr)

X = X_tr_scale.transform(tr_in[:,0].reshape(-1,1))
y = tr_out.reshape(-1,1)

X_train = X_tr_scale.transform(X_tr)
y_train = y_tr

X_val = X_tr_scale.transform(X_vl)
y_val = y_vl # only works if shape = (*,1) //(*, ) fails

# DATA NOW PREPARED TO ENTER NETWORK
X_test = X_tr_scale.transform(te_in[:,0].reshape(-1,1))

# Build Neural Net Regressor

In [37]:
def shape_tensor(T):
    dim = tuple( T.get_shape()[i].value for i in range(len(T.get_shape())))
    return dim

def hidden_layer(input_tensor, out_nodes):
    in_nodes = shape_tensor(input_tensor)[1]
    W = tf.Variable(tf.truncated_normal([in_nodes,out_nodes],stddev=0.1))
    b = tf.Variable(tf.ones([out_nodes])/10)
    logits = tf.add(tf.matmul(input_tensor, W),b)
    a = tf.nn.dropout(tf.nn.relu(logits),keep_prob=0.95)
    return a

GO WIDER INSTEAD OF DEEPER

In [38]:
# Placeholders for input and output
inputX = tf.placeholder(tf.float32, [None, 1])
inputY = tf.placeholder(tf.float32, [None, 1])

n_nodes = 500
# Weights and bias for INPUT LAYER
W = tf.Variable(tf.truncated_normal([1,100],stddev=0.1))
b = tf.Variable(tf.ones([100])/10)
nodes = tf.add(tf.matmul(inputX, W),b)

# USE PREVIOUS WEIGHTS
#W = tf.Variable(W_past)
#b = tf.Variable(b_past)

with tf.name_scope("feature_compression_layer") as scope:
    a = tf.nn.dropout(tf.nn.relu(nodes),keep_prob=0.95) 

# ADD SUMMARY OPTIONS TO COLLECT DATA (make net understandable)
w_h = tf.summary.histogram("weights", W)
b_h = tf.summary.histogram("weights", b)

# Next 5 hidden layers
a_out =hidden_layer(hidden_layer(a, n_nodes),n_nodes)
# Weights and bias for OUTPUT LAYER
W_out = tf.Variable(tf.truncated_normal([n_nodes,1],stddev=0.1))
#b_out = tf.Variable(tf.ones([n_nodes])/10)
y_pred = tf.matmul(a_out, W_out)

In [39]:
global_step = tf.Variable(0, trainable=False) # keep track of numb of epochs
# Fancy Learning rate with decay
starter_learning_rate = 0.001
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                         500, 0.96, staircase=True)

n_samples = y_train.size
n_epochs = 15000
display_step = 800

def minibach(X, y,batch_size): 
    ind = np.random.choice(y.size, size=batch_size, replace=False)
    X_batch, y_batch = X[ind,:], y[ind,:]
    # GET RID OFF normalisation in every minibatch
    #X_batch = preprocessing.scale(X_batch) 
    return X_batch, y_batch

In [40]:
with tf.name_scope("loss_function"):
    loss = tf.sqrt(tf.reduce_sum(tf.pow(y_pred-inputY,2))/n_samples)
    loss2 = tf.sqrt(tf.reduce_sum(tf.pow(y_pred-inputY,2))/y_val.size) 
    tf.summary.scalar("loss_train", loss)
    tf.summary.scalar("loss_val", loss2)
    
with tf.name_scope("train"):
    #traning_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
    traning_step = tf.train.AdamOptimizer(
                            learning_rate).minimize(loss, global_step=global_step)
    
#merged_summary_op = tf.summary.merge_all()

# Initialize session

In [None]:
init = tf.global_variables_initializer()
session = tf.Session()
session.run(init)
#summary_writer = tf.summary.FileWriter('/home/sp838/KAGGLE_thing/Regression/logs',
#                                        graph=session.graph)
for i in range(n_epochs):
    X_batch, y_batch = minibach(X_train, y_train,500)
    session.run(traning_step,feed_dict={inputX:X_batch, inputY:y_batch})
    if (i) % display_step == 0:
        cc = session.run(loss,feed_dict={inputX:X_train, inputY:y_train})
        cc2 = session.run(loss2,feed_dict={inputX:X_val, inputY:y_val})
        print "loss = {}, val_loss = {}".format(cc,cc2)

In [52]:
session.run(learning_rate)
session.run(W)
session.run(a, feed_dict={inputX:X_train})

array([[ 0.01273801,  0.06102614,  0.25166133, ...,  0.06001911,
         0.0116888 ,  0.21419793],
       [ 0.2616742 ,  0.21861073,  0.01593253, ...,  0.2156716 ,
         0.25472808,  0.06252324],
       [ 0.16550475,  0.1577324 ,  0.10699967, ...,  0.15553968,
         0.        ,  0.12111846],
       ..., 
       [ 0.17199931,  0.16184366,  0.10084968, ...,  0.15960054,
         0.16717745,  0.11716138],
       [ 0.        ,  0.28095382,  0.        , ...,  0.27725032,
         0.35087842,  0.00251822],
       [ 0.22091885,  0.19281131,  0.05452557, ...,  0.19018848,
         0.        ,  0.08735511]], dtype=float32)

KEEP RUNNING FOR FINE-TUNNING

In [60]:
for i in range(5000):
    X_batch, y_batch = minibach(X, y,1000)
    session.run(traning_step,feed_dict={inputX:X_batch, inputY:y_batch})
    if (i) % display_step == 0:
        cc = session.run(loss,feed_dict={inputX:X_train, inputY:y_train})
        cc2 = session.run(loss2,feed_dict={inputX:X_val, inputY:y_val})
        print "loss = {}, val_loss = {}".format(cc,cc2)
        

loss = 0.867838323116, val_loss = 0.861643671989
loss = 0.860314369202, val_loss = 0.85356760025
loss = 0.859369456768, val_loss = 0.852475464344
loss = 0.854378163815, val_loss = 0.849298357964
loss = 0.853880941868, val_loss = 0.848384976387
loss = 0.855462014675, val_loss = 0.849990844727
loss = 0.853386521339, val_loss = 0.84497231245


In [33]:
y_ = session.run(y_pred,feed_dict={inputX:X})
score = mean_squared_error(y, y_)
print score, np.sqrt(score)

0.756965163596 0.870037449536


# Replace remaining entries in y_test (NaN ind)

In [30]:
# LOAD Y_TEST FROM FILE. THIS ONLY CONTAINS PREDITIONS IN NONNAN INDICES
y_test ,df = read_csv("predictions/reg_nonNaN_pred_TF")
y_test = y_test.reshape(-1,1)

# DEFINE INDICES
NaN_ind = list(np.argwhere(np.isnan(te_in[:,2])).transpose())
NonNaN = list(np.argwhere(~np.isnan(te_in[:,2])).transpose())

In [31]:
# SELECT INPUTS FOR NN IN NAN_IND AND PREDICT
X_test_NaN = X_test[NaN_ind]
y_test_NaN = session.run(y_pred,feed_dict={inputX:X_test_NaN})
# STORE THESE PREDCITIONS IN NAN_IND OF Y_TES
y_test[NaN_ind] = y_test_NaN

In [69]:
publish_pred(y_test,"predictions/reg_pred_TF.csv")

# Alternative: Use classic Kernel SVRs
They don't perform better

In [12]:
from sklearn.svm import SVR
regression = SVR(kernel='rbf')
y_train.reshape(-1,)
regression.fit(X_train, y_train)
y_pred = regression.predict(X_val)
score = mean_squared_error(y_val, y_pred)
print np.sqrt(score)

0.978248864524
