# Multi-Layered Perceptron via Randomized Search CV # 

Note: For simplicity and computational efficiency, we stick to the Adam optimizer. 

In [25]:
#import libraries
import numpy as np
import pandas
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import classification_report
import time

### 28 x 28 images ###

In [26]:
#import data
data_small = pandas.read_csv('data28.csv')
y = data_small['label']
X = data_small[data_small.columns[1:]]

# 70-30 train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

In [27]:
#preprocess

indices_to_keep=~X_train.isin([np.nan,np.inf,-np.inf]).any(1)

X_train=X_train[indices_to_keep]
y_train=y_train[indices_to_keep]

indices_to_keep_test=~X_test.isin([np.nan,np.inf,-np.inf]).any(1)

X_test=X_test[indices_to_keep_test]
y_test=y_test[indices_to_keep_test]

In [28]:
#Neural Network to generate predictions
def predNN_2(X_train, y_train, im_size = 28):
    clf = RandomizedSearchCV(MLPClassifier(max_iter=50000),
                      {'early_stopping' : [True, False],
                      'hidden_layer_sizes': [100, 200, 300, 400, 500],
                      'activation': ['relu', 'tanh', 'logistic'],
                      'learning_rate': ['constant', 'invscaling', 'adaptive'],
                      'learning_rate_init': [.0001, .001, .01, .1]},
                      cv=5,n_jobs=-1,verbose=6,random_state=1,n_iter=20)
    
    # Note: GridSearchCV by default performs 5-fold CV. 
    
    # Pick 20 sample configurations to perform CV on. 
    
    print("Fitting")
    t0 = time.time()
    clf.fit(X_train, y_train)
    t1 = time.time()
    print('Grid search CV time for', im_size, 'x', im_size, 'images took', t1 - t0, 'seconds')
    print("Optimized parameters:", clf.best_params_)
    print("Weighted validation score:", clf.best_score_)
    return clf

In [29]:
#Calling Neural Network for Predictions

classifier = predNN_2(X_train, y_train, 28)
preds = classifier.predict(X_test)

Fitting
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Grid search CV time for 28 x 28 images took 539.9790909290314 seconds
Optimized parameters: {'learning_rate_init': 0.0001, 'learning_rate': 'constant', 'hidden_layer_sizes': 400, 'early_stopping': False, 'activation': 'logistic'}
Weighted validation score: 0.8724999999999999


In [30]:
# write cv results (such as mean fit times for each hyperparam configuration) to file
with open('fcnn_randomcv_28_adam.txt', 'w') as file:
    file.write(str(classifier.cv_results_))

In [31]:
activation = classifier.best_params_['activation']
hidden_layer_sizes = classifier.best_params_['hidden_layer_sizes']
learning_rate = classifier.best_params_['learning_rate']
learning_rate_init = classifier.best_params_['learning_rate_init']
early_stopping = classifier.best_params_['early_stopping']

classifier = MLPClassifier(early_stopping=early_stopping, activation=activation, hidden_layer_sizes=hidden_layer_sizes, max_iter=50000, learning_rate=learning_rate, learning_rate_init=learning_rate_init)
t0 = time.time()
classifier.fit(X_train, y_train)
t1 = time.time()
print('Training time time for', 28, 'x', 28, 'images via Adam took', t1 - t0, 'seconds')

Training time time for 28 x 28 images via Adam took 31.42546510696411 seconds


In [32]:
#training accuracy
train_accNN = accuracy_score(y_train, classifier.predict(X_train)) 
train_accNN

0.89175

In [33]:
#test accuracy
test_accNN = accuracy_score(y_test, classifier.predict(X_test)) 
test_accNN

0.844

### 64 x 64 images ###

In [34]:
#import data
data_large = pandas.read_csv('data64.csv')
y = data_large['label']
X = data_large[data_large.columns[1:]]

# 70-30 train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

In [35]:
#preprocess

indices_to_keep=~X_train.isin([np.nan,np.inf,-np.inf]).any(1)

X_train=X_train[indices_to_keep]
y_train=y_train[indices_to_keep]

indices_to_keep_test=~X_test.isin([np.nan,np.inf,-np.inf]).any(1)

X_test=X_test[indices_to_keep_test]
y_test=y_test[indices_to_keep_test]

In [36]:
#Calling Neural Network for Predictions

classifier = predNN_2(X_train, y_train, 64)
preds = classifier.predict(X_test)

Fitting
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Grid search CV time for 64 x 64 images took 2366.9517533779144 seconds
Optimized parameters: {'learning_rate_init': 0.001, 'learning_rate': 'invscaling', 'hidden_layer_sizes': 400, 'early_stopping': True, 'activation': 'relu'}
Weighted validation score: 0.75075


In [37]:
# write cv results (such as mean fit times for each hyperparam configuration) to file
with open('fcnn_randomcv_64_adam.txt', 'w') as file:
    file.write(str(classifier.cv_results_))

In [38]:
activation = classifier.best_params_['activation']
hidden_layer_sizes = classifier.best_params_['hidden_layer_sizes']
learning_rate = classifier.best_params_['learning_rate']
learning_rate_init = classifier.best_params_['learning_rate_init']
early_stopping = classifier.best_params_['early_stopping']

classifier = MLPClassifier(early_stopping=early_stopping, activation=activation, hidden_layer_sizes=hidden_layer_sizes, max_iter=50000, learning_rate=learning_rate, learning_rate_init=learning_rate_init)
t0 = time.time()
classifier.fit(X_train, y_train)
t1 = time.time()
print('Training time time for', 64, 'x', 64, 'images took', t1 - t0, 'seconds')

Training time time for 64 x 64 images took 77.53696775436401 seconds


In [39]:
#training accuracy
train_accNN = accuracy_score(y_train, classifier.predict(X_train)) 
train_accNN

0.86675

In [40]:
#test accuracy
test_accNN = accuracy_score(y_test, classifier.predict(X_test)) 
test_accNN

0.826