# Test single neural net

Code to train a single neural network model on training data, calculate predictions for test data, and do 5-fold cross validation.

## Import data and packages

In [31]:
import numpy as np 
from sklearn.model_selection import KFold
import tensorflow as tf 
#import tflearn
import keras
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Flatten, Dropout
from keras.layers.advanced_activations import LeakyReLU
from keras.utils.np_utils import to_categorical

In [3]:
# Import the data from training_data.txt
traindata = np.genfromtxt('training_data.txt', delimiter = ' ', skip_header=1)
X_train = traindata[:, 1:]
Y_train = traindata[:, 0]

# Import the data from test_data.txt
testdata = np.genfromtxt('test_data.txt', delimiter = ' ', skip_header=1)

In [4]:
Y_train_final = to_categorical(Y_train, 2)

## Create and train model

In [67]:
modelcopy = Sequential()
#model.add(Dropout(0.35, input_shape=(len(X_train[0]),)))
modelcopy.add(Dense(300, input_shape=(len(X_train[0]),)))
modelcopy.add(Activation('relu'))
modelcopy.add(Dropout(0.2))
modelcopy.add(Dense(300))
modelcopy.add(Activation('relu'))
modelcopy.add(Dropout(0.2))
modelcopy.add(Dense(400))
modelcopy.add(Activation('relu'))
modelcopy.add(Dense(2))
modelcopy.add(Activation('softmax'))

# Printing a summary of the layers and weights in your model
modelcopy.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_55 (Dense)             (None, 300)               300300    
_________________________________________________________________
activation_55 (Activation)   (None, 300)               0         
_________________________________________________________________
dropout_27 (Dropout)         (None, 300)               0         
_________________________________________________________________
dense_56 (Dense)             (None, 300)               90300     
_________________________________________________________________
activation_56 (Activation)   (None, 300)               0         
_________________________________________________________________
dropout_28 (Dropout)         (None, 300)               0         
_________________________________________________________________
dense_57 (Dense)             (None, 400)               120400    
__________

In [68]:
modelcopy.compile(loss='categorical_crossentropy',optimizer='Adam', metrics=['accuracy'])

normalization = True
debug = False

if normalization:
    fit = modelcopy.fit(X_train/float(len(X_train[0])), Y_train_final, batch_size=200, epochs=10, verbose=1)
else:
    fit = modelcopy.fit(X_train, Y_train_final, batch_size=20, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Predict on test data

In [54]:
Ypredict = modelcopy.predict(testdata/float(len(testdata[0])))
Ypredictfinal = np.zeros(len(Ypredict))
Ypredictfinal[np.where(Ypredict[:,1]>Ypredict[:,0])] = 1

In [55]:
printarray = np.array([np.arange(1,len(Ypredictfinal)+1),Ypredictfinal]).T

np.savetxt('NN_500_500units_dropout25.txt',printarray,fmt='%i',delimiter=',',header='Id,Prediction',comments='')

## Validate single model

Calculate mean test scores for a single model using 5-fold cross validation.

In [57]:
from sklearn.model_selection import KFold

# Loop over several dropout probabilities
#dropouts = np.arange(0.1,0.85,0.05)
#trainaccuracy = np.zeros(len(dropouts))

In [69]:
# Split input set into 5 partitions
kf = KFold(n_splits=5)

# Initialize training and test errors
trainscore = 0
testscore  = 0

# Iterate 5 times, each time with a different partition used for validation
for train_index, test_index in kf.split(X_train):
    x_train_i, x_test_i = X_train[train_index]/float(len(X_train[0])), X_train[test_index]/float(len(X_train[0]))
    y_train_i, y_test_i  = Y_train_final[train_index], Y_train_final[test_index]
    
    fit = modelcopy.fit(x_train_i, y_train_i, batch_size=200, epochs=10, verbose=0)
    
    trainscore += 1. - modelcopy.evaluate(x_train_i, y_train_i, verbose=1)[0]
    testscore += 1. - modelcopy.evaluate(x_test_i, y_test_i, verbose=1)[0]
    
print('Final training score: ', trainscore/5.)
print('Final test score: ', testscore/5.)

('Final training score: ', 0.7736448423489929)
('Final test score: ', 0.719533021569252)


## Validation results

For two-layer (500 units each) architecture:
 - Training score = 0.7473236334592104
 - Test score = 0.7052634637951851

For three-layer (300/300/400) architecture:
 - Training score = 0.7736448423489929
 - Test score = 0.719533021569252

Note that scores are calculated as 1 - loss, where loss is defined by keras function - different from score computed earlier!