In [1]:
import numpy as np
import scipy.stats
import pickle
import util
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler

In [2]:
seed=229
state = np.random.RandomState(seed)

trainfile = 'data/train.csv'
header, ids, X, Y = util.fetch_data(trainfile, impute=True)

test_size = 0.20
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=state)

Neural nets need to work with standardized data:

In [3]:
scaler = StandardScaler()
scaler.fit_transform(X)

scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

A function to determine the number of neurons in a hidden layer:

In [4]:
# Based on https://stats.stackexchange.com/a/136542
def num_hidden(a):
    return int(m/(a * (n_inputs + n_outputs)))

avals = range(2,10)

# One hidden layer: Training
Skip to `Evaluation` to produce plots and an output.

In [None]:
m = X.shape[0]
n_inputs = X.shape[1]
n_outputs = 1

params = {'solver': ['adam'],
        'hidden_layer_sizes': [(n,) for n in np.arange(100,300,20)],
        'activation': ['relu', 'logistic'], 
        'alpha': np.logspace(-7, -2, 10),
        'early_stopping': [True, False], 
        #'warm_start': True,
       }

mlp_onelayer = MLPClassifier(verbose=False)
cv_onelayer = GridSearchCV(mlp_onelayer, param_grid=params, scoring=util.gini_proba_scorer, n_jobs=-1,
                          verbose=2)
cv_onelayer.fit(X, Y)

Fitting 3 folds for each of 400 candidates, totalling 1200 fits
[CV] alpha=1e-07, activation=relu, hidden_layer_sizes=(100,), early_stopping=True, solver=adam 
[CV] alpha=1e-07, activation=relu, hidden_layer_sizes=(100,), early_stopping=True, solver=adam 
[CV] alpha=1e-07, activation=relu, hidden_layer_sizes=(100,), early_stopping=True, solver=adam 
[CV] alpha=1e-07, activation=relu, hidden_layer_sizes=(120,), early_stopping=True, solver=adam 
[CV]  alpha=1e-07, activation=relu, hidden_layer_sizes=(100,), early_stopping=True, solver=adam, total=  22.0s
[CV] alpha=1e-07, activation=relu, hidden_layer_sizes=(120,), early_stopping=True, solver=adam 
[CV]  alpha=1e-07, activation=relu, hidden_layer_sizes=(100,), early_stopping=True, solver=adam, total=  23.8s
[CV] alpha=1e-07, activation=relu, hidden_layer_sizes=(120,), early_stopping=True, solver=adam 
[CV]  alpha=1e-07, activation=relu, hidden_layer_sizes=(100,), early_stopping=True, solver=adam, total=  26.2s
[CV] alpha=1e-07, activatio

[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:  7.0min


[CV]  alpha=1e-07, activation=relu, hidden_layer_sizes=(100,), early_stopping=False, solver=adam, total= 3.1min
[CV] alpha=1e-07, activation=relu, hidden_layer_sizes=(140,), early_stopping=False, solver=adam 
[CV]  alpha=1e-07, activation=relu, hidden_layer_sizes=(120,), early_stopping=False, solver=adam, total= 2.7min
[CV] alpha=1e-07, activation=relu, hidden_layer_sizes=(140,), early_stopping=False, solver=adam 
[CV]  alpha=1e-07, activation=relu, hidden_layer_sizes=(120,), early_stopping=False, solver=adam, total= 3.1min
[CV] alpha=1e-07, activation=relu, hidden_layer_sizes=(160,), early_stopping=False, solver=adam 
[CV]  alpha=1e-07, activation=relu, hidden_layer_sizes=(140,), early_stopping=False, solver=adam, total= 2.9min
[CV] alpha=1e-07, activation=relu, hidden_layer_sizes=(160,), early_stopping=False, solver=adam 
[CV]  alpha=1e-07, activation=relu, hidden_layer_sizes=(140,), early_stopping=False, solver=adam, total= 4.2min
[CV] alpha=1e-07, activation=relu, hidden_layer_size

[CV]  alpha=3.5938136638e-07, activation=relu, hidden_layer_sizes=(180,), early_stopping=True, solver=adam, total=  33.0s
[CV] alpha=3.5938136638e-07, activation=relu, hidden_layer_sizes=(200,), early_stopping=True, solver=adam 
[CV]  alpha=1e-07, activation=relu, hidden_layer_sizes=(280,), early_stopping=False, solver=adam, total= 6.4min
[CV] alpha=3.5938136638e-07, activation=relu, hidden_layer_sizes=(200,), early_stopping=True, solver=adam 
[CV]  alpha=3.5938136638e-07, activation=relu, hidden_layer_sizes=(180,), early_stopping=True, solver=adam, total=  33.2s
[CV] alpha=3.5938136638e-07, activation=relu, hidden_layer_sizes=(200,), early_stopping=True, solver=adam 
[CV]  alpha=3.5938136638e-07, activation=relu, hidden_layer_sizes=(200,), early_stopping=True, solver=adam, total=  34.8s
[CV] alpha=3.5938136638e-07, activation=relu, hidden_layer_sizes=(220,), early_stopping=True, solver=adam 
[CV]  alpha=3.5938136638e-07, activation=relu, hidden_layer_sizes=(200,), early_stopping=True,

In [None]:
mlp_onelayer_out = open('models/multilayer_perceptron_onelayer.pickle', 'wb')
pickle.dump(cv_onelayer.best_estimator_, mlp_onelayer_out)
mlp_onelayer_out.close()

# One hidden layer: Evaluation

In [None]:
mlp_onelayer = pickle.load(open('models/multilayer_perceptron_onelayer.pickle'))

In [None]:
trainsizes, traingini, testgini = util.learning_curves(mlp_onelayer, X_train, X_test, y_train, y_test,
                                                      scorer=util.gini_proba_scorer)
print(traingini)
print('')
print(testgini)

In [None]:
plt.figure()
plt.plot(trainsizes, traingini, label='train gini')
plt.plot(trainsizes, testgini, label='test gini')
plt.xlabel('Training set size')
plt.ylabel('Normalized Gini coefficient')
plt.title('Multi-layer perceptron, one layer')
plt.legend()
plt.savefig('figures/learning_curves/mlp_onelayer.eps', format='eps', dpi=1000)

In [None]:
plt.show()

# One hidden layer: Output

In [None]:
mlp_onelayer.fit(X, Y)

In [None]:
testfile = 'data/test.csv'
util.make_prediction(mlp_onelayer, testfile, 'predictions/mlp_onelayer.csv', scale=True,
                     method=util.proba_method(mlp_onelayer))