In [None]:
#import libraries
import numpy as np
import pandas
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import classification_report
import time

In [None]:
#import data
data_small = pandas.read_csv('emails.csv')
y = data_small['Prediction'] #original
X = data_small[data_small.columns[1:-1]] #original

# 80-20 train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

In [None]:
#preprocess

indices_to_keep=~X_train.isin([np.nan,np.inf,-np.inf]).any(1)

X_train=X_train[indices_to_keep]
y_train=y_train[indices_to_keep]

indices_to_keep_test=~X_test.isin([np.nan,np.inf,-np.inf]).any(1)

X_test=X_test[indices_to_keep_test]
y_test=y_test[indices_to_keep_test]

In [None]:
#Neural Network to generate predictions
def predNN_2(X_train, y_train):
    clf = RandomizedSearchCV(MLPClassifier(max_iter=50000),
                      {'early_stopping' : [True, False],
                      'hidden_layer_sizes': [100, 150],
                      'activation': ['relu','tanh'],
                      'learning_rate': ['constant','adaptive'],
                      'learning_rate_init': [0.01, 0.1],
                      'solver': ['adam']},
                      cv=5,n_jobs=-1,verbose=6, random_state=1, n_iter=32) 
    
    print("Fitting")
    t0 = time.time()
    clf.fit(X_train, y_train)
    t1 = time.time()
    print("Optimized parameters:", clf.best_params_)
    print("Weighted validation score:", clf.best_score_)
    print("time:", t1-t0)
    return clf

In [None]:
#Calling Neural Network for Predictions

classifier = predNN_2(X_train.values, y_train.values)
preds = classifier.predict(X_test.values)

Fitting
Fitting 5 folds for each of 32 candidates, totalling 160 fits
Optimized parameters: {'solver': 'adam', 'learning_rate_init': 0.01, 'learning_rate': 'adaptive', 'hidden_layer_sizes': 150, 'early_stopping': False, 'activation': 'relu'}
Weighted validation score: 0.981386362441512
time: 2056.2531270980835


In [None]:
with open('randomcv_adam.txt', 'w') as file:
    file.write(str(classifier.cv_results_))

In [None]:
activation = classifier.best_params_['activation']
hidden_layer_sizes = classifier.best_params_['hidden_layer_sizes']
learning_rate = classifier.best_params_['learning_rate']
learning_rate_init = classifier.best_params_['learning_rate_init']
early_stopping = classifier.best_params_['early_stopping']

classifier = MLPClassifier(early_stopping=early_stopping, activation=activation, hidden_layer_sizes=hidden_layer_sizes, max_iter=50000, learning_rate=learning_rate, learning_rate_init=learning_rate_init)
t0 = time.time()
classifier.fit(X_train.values, y_train.values)
t1 = time.time()
print('Training time is', t1 - t0, 'seconds')

Training time is 47.68662929534912 seconds


In [None]:
#training accuracy
train_accNN = accuracy_score(y_train.values, classifier.predict(X_train.values)) 
train_accNN

1.0

In [None]:
#test accuracy
test_accNN = accuracy_score(y_test.values, classifier.predict(X_test.values)) 
test_accNN

0.9797101449275363