In [1]:
# Importing Required Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
from sklearn.metrics import classification_report, confusion_matrix
from sklearn import preprocessing
from IPython.display import Image
import pydotplus
import matplotlib.pyplot as plt
import config
import ovs
import uns
from mpl_toolkits.mplot3d import Axes3D
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from multiprocessing import Process, Manager
import datetime

# Required imports for RandomSearch
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import scipy.stats as stats
from sklearn.utils.fixes import loguniform

import warnings
warnings.filterwarnings("ignore")

In [2]:
# Definition of personal functions
def report_metric(test, pred):
    print("Balanced accuracy of neural network is \n",
        metrics.balanced_accuracy_score(test, pred))
    print("Average_precision_score of neural network is \n",
        metrics.average_precision_score(test, pred))
    print('Confusion Matrix:\n', confusion_matrix(test, pred))
    print('Classification Report:\n', classification_report(test, pred))
        
def print_train_test_stats(y_train, y_pred_train, y_test, y_pred_test):
    '''Print Statistics of recent predictions on training & test data'''
    print('Training Error Statistics:')
    print('Mean Absolute Error:', metrics.mean_absolute_error(y_train, y_pred_train))
    print('Mean Absolute Error:', np.sqrt(metrics.mean_squared_error(y_train, y_pred_train)))
    print('Confusion Matrix\n', confusion_matrix(y_train, y_pred_train))
    print('Classification Report:\n', classification_report(y_train, y_pred_train))
    print('Accuracy Score:', accuracy_score(y_train, y_pred_train))

    print('\nTest Error Statistics:')
    print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred_test))
    print('Mean Absolute Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred_test)))
    print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred_test))
    print('Classification Report:\n', classification_report(y_test, y_pred_test))
    print('Accuracy Score:', accuracy_score(y_test, y_pred_test))
    
# Utility function to report best scores
# Taken from https://scikit-learn.org/stable/auto_examples/model_selection/plot_randomized_search.html
def report(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.3f} (std: {1:.3f})"
                  .format(results['mean_test_score'][candidate],
                          results['std_test_score'][candidate]))
            print("Parameters: {0}".format(results['params'][candidate]))
            print("")

In [3]:
# Loading the Data
col_names=config.col_names2
feature_cols=config.feature_cols2
path_to_dataset=config.path_to_dataset2
target=config.target2

prob_data = pd.read_csv(path_to_dataset, header=None, names=col_names)
prob_data.head()

#split dataset in features and target variable
X = prob_data[feature_cols] # Features
y = prob_data[target] # Target variable

In [4]:
#feature scaling, Fit only to the feature X data
scaler = StandardScaler()

# Create Dataset Oversampling=False, normalization=False, standardization=True
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8,
                                                    random_state=7,shuffle=True,stratify=y) # 80% training and 20% test
# Scale input feature values
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
# Imbalanced Base Model with Standardized data with Standard Scaler - what we are hoping to improve from the base
print('\nImbalanced dataset Base Model Results with Adam Solver: \n')

# Create NN and fit and test it
NeuralNet = MLPClassifier(hidden_layer_sizes=(15,), random_state=7, max_iter=1000, alpha=1e-7,
                          solver='adam',tol=1e-6,learning_rate='adaptive')
NNclf = NeuralNet.fit(X_train,y_train)
y_train_pred = NNclf.predict(X_train)
y_train_score = NNclf.predict_proba(X_train)
# params = NNclf.get_params()
print("Training Data Performance Metrics:")
report_metric(test=y_train, pred=y_train_pred)

y_test_pred = NNclf.predict(X_test)
y_test_score = NNclf.predict_proba(X_test)
print("Test Data Performance Metrics:")
report_metric(test=y_test, pred=y_test_pred)


Imbalanced dataset Base Model Results with Adam Solver: 

Training Data Performance Metrics:
Balanced accuracy of neural network is 
 0.9466785248574876
Average_precision_score of neural network is 
 0.8689057650147415
Confusion Matrix:
 [[227441     10]
 [    42    352]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    227451
           1       0.97      0.89      0.93       394

    accuracy                           1.00    227845
   macro avg       0.99      0.95      0.97    227845
weighted avg       1.00      1.00      1.00    227845

Test Data Performance Metrics:
Balanced accuracy of neural network is 
 0.9029820883052151
Average_precision_score of neural network is 
 0.7240116633104816
Confusion Matrix:
 [[56855     9]
 [   19    79]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.90      0.81  

### RandomSearch Round 1 Testing

In [60]:
# 32*np.linspace(4,14, num=10, dtype=int)
1e-6*np.logspace(0,9, base=4, num=10)
# 200*np.linspace(1,10, num=10, dtype=int)

array([1.00000e-06, 4.00000e-06, 1.60000e-05, 6.40000e-05, 2.56000e-04,
       1.02400e-03, 4.09600e-03, 1.63840e-02, 6.55360e-02, 2.62144e-01])

In [10]:
# # Setup Model and parameters
# NeuralNet = MLPClassifier(hidden_layer_sizes=(15,), random_state=7, max_iter=1000, solver='sgd',
#                           tol=1e-6,learning_rate='adaptive') # Default we are comparing to
print('Round 1 of RandomSearch Testing')
print('Testing started at:', datetime.datetime.now())
NeuralNet = MLPClassifier(random_state=7, solver='sgd',
                          tol=1e-6,learning_rate='adaptive')
param_dist = {'learning_rate_init': 1e-6*np.logspace(0,9, base=4, num=10),
              'batch_size': 32*np.linspace(4,14, num=10, dtype=int),
              'max_iter': 200*np.linspace(1,10, num=10, dtype=int),
              'hidden_layer_sizes': 5*np.linspace(1,10, num=10, dtype=int),
              'alpha': 1e-9*np.logspace(0,9, base=10, num=10)
             }
random_search = RandomizedSearchCV(NeuralNet, param_distributions=param_dist, scoring='average_precision',
                                   n_iter=60, cv=5, random_state=7)
clfsearch = random_search.fit(X_train,y_train)

print('Top 3 Results of Round 1')
report(random_search.cv_results_)

# Give training and test results with the best found parameters
print('Training and Test Results of best model from current round')
y_train_pred = clfsearch.predict(X_train)
y_train_score = clfsearch.predict_proba(X_train)
# params = NNclf.get_params()
print("Training Data Performance Metrics:")
report_metric(test=y_train, pred=y_train_pred)

y_test_pred = clfsearch.predict(X_test)
y_test_score = clfsearch.predict_proba(X_test)
print("Test Data Performance Metrics:")
report_metric(test=y_test, pred=y_test_pred)
print('Testing completed at:', datetime.datetime.now())

Round 1 of RandomSearch Testing
Testing started at: 2020-07-19 13:54:30.539604
Top 3 Results of Round 1
Model with rank: 1
Mean validation score: 0.848 (std: 0.041)
Parameters: {'max_iter': 1400, 'learning_rate_init': 0.001024, 'hidden_layer_sizes': 30, 'batch_size': 160, 'alpha': 0.0001}

Model with rank: 2
Mean validation score: 0.847 (std: 0.035)
Parameters: {'max_iter': 600, 'learning_rate_init': 0.004096, 'hidden_layer_sizes': 50, 'batch_size': 384, 'alpha': 1.0000000000000001e-07}

Model with rank: 3
Mean validation score: 0.845 (std: 0.040)
Parameters: {'max_iter': 800, 'learning_rate_init': 0.004096, 'hidden_layer_sizes': 25, 'batch_size': 448, 'alpha': 0.01}

Training and Test Results of best model from current round
Training Data Performance Metrics:
Balanced accuracy of neural network is 
 0.9085942692136044
Average_precision_score of neural network is 
 0.7788880765085405
Confusion Matrix:
 [[227435     16]
 [    72    322]]
Classification Report:
               precision  

In [11]:
# Print top 5 results as I don't want to re-run this...
print('Top 5 Results of Round 1')
report(random_search.cv_results_, n_top=5)

Top 5 Results of Round 1
Model with rank: 1
Mean validation score: 0.848 (std: 0.041)
Parameters: {'max_iter': 1400, 'learning_rate_init': 0.001024, 'hidden_layer_sizes': 30, 'batch_size': 160, 'alpha': 0.0001}

Model with rank: 2
Mean validation score: 0.847 (std: 0.035)
Parameters: {'max_iter': 600, 'learning_rate_init': 0.004096, 'hidden_layer_sizes': 50, 'batch_size': 384, 'alpha': 1.0000000000000001e-07}

Model with rank: 3
Mean validation score: 0.845 (std: 0.040)
Parameters: {'max_iter': 800, 'learning_rate_init': 0.004096, 'hidden_layer_sizes': 25, 'batch_size': 448, 'alpha': 0.01}

Model with rank: 4
Mean validation score: 0.844 (std: 0.038)
Parameters: {'max_iter': 200, 'learning_rate_init': 0.004096, 'hidden_layer_sizes': 40, 'batch_size': 224, 'alpha': 1e-08}

Model with rank: 5
Mean validation score: 0.843 (std: 0.036)
Parameters: {'max_iter': 200, 'learning_rate_init': 0.004096, 'hidden_layer_sizes': 50, 'batch_size': 288, 'alpha': 1e-05}



### RandomSearch Round 2 Testing

In [31]:
# Used for determining new bounds of parameter values for random test
# 32*np.linspace(4,14, num=10, dtype=int)
# 1e-4*np.logspace(0,6, base=4, num=10)
# 200*np.linspace(1,7, num=7, dtype=int)
# 3*np.linspace(7,17, num=10, dtype=int)
1e-8*np.logspace(0,7, base=10, num=10)

array([1.00000000e-08, 5.99484250e-08, 3.59381366e-07, 2.15443469e-06,
       1.29154967e-05, 7.74263683e-05, 4.64158883e-04, 2.78255940e-03,
       1.66810054e-02, 1.00000000e-01])

In [32]:
# # Setup Model and parameters
# NeuralNet = MLPClassifier(hidden_layer_sizes=(15,), random_state=7, max_iter=1000, solver='sgd',
#                           tol=1e-6,learning_rate='adaptive') # Default we are comparing to
print('Round 2 of RandomSearch Testing')
print('Testing started at:', datetime.datetime.now())
NeuralNet = MLPClassifier(random_state=7, solver='sgd',
                          tol=1e-6,learning_rate='adaptive')
param_dist = {'learning_rate_init': 1e-4*np.logspace(0,6, base=4, num=10),
              'batch_size': 32*np.linspace(4,14, num=10, dtype=int),
              'max_iter': 200*np.linspace(1,7, num=7, dtype=int),
              'hidden_layer_sizes': 3*np.linspace(7,17, num=10, dtype=int),
              'alpha': 1e-8*np.logspace(0,7, base=10, num=10)
             }
random_search = RandomizedSearchCV(NeuralNet, param_distributions=param_dist, scoring='average_precision',
                                   n_iter=60, cv=5, random_state=7)
clfsearch2 = random_search.fit(X_train,y_train)

print('Top 5 Results of Round 2')
report(random_search.cv_results_)

# Give training and test results with the best found parameters
print('Training and Test Results of best model from Round 2')
y_train_pred = clfsearch2.predict(X_train)
y_train_score = clfsearch2.predict_proba(X_train)
# params = NNclf.get_params()
print("Training Data Performance Metrics:")
report_metric(test=y_train, pred=y_train_pred)

y_test_pred = clfsearch2.predict(X_test)
y_test_score = clfsearch2.predict_proba(X_test)
print("Test Data Performance Metrics:")
report_metric(test=y_test, pred=y_test_pred)
print('Testing completed at:', datetime.datetime.now())

Round 2 of RandomSearch Testing
Testing started at: 2020-07-20 15:40:52.119305
Top 5 Results of Round 2
Model with rank: 1
Mean validation score: 0.852 (std: 0.037)
Parameters: {'max_iter': 800, 'learning_rate_init': 0.004031747359663594, 'hidden_layer_sizes': 51, 'batch_size': 448, 'alpha': 3.5938136638046275e-07}

Model with rank: 2
Mean validation score: 0.849 (std: 0.041)
Parameters: {'max_iter': 1000, 'learning_rate_init': 0.0016, 'hidden_layer_sizes': 30, 'batch_size': 256, 'alpha': 2.1544346900318848e-06}

Model with rank: 3
Mean validation score: 0.847 (std: 0.040)
Parameters: {'max_iter': 1200, 'learning_rate_init': 0.0016, 'hidden_layer_sizes': 24, 'batch_size': 224, 'alpha': 0.0027825594022071257}

Training and Test Results of best model from Round 2
Training Data Performance Metrics:
Balanced accuracy of neural network is 
 0.9200309769402668
Average_precision_score of neural network is 
 0.818140045119152
Confusion Matrix:
 [[227442      9]
 [    63    331]]
Classification

In [34]:
# Print top 5 results as I don't want to re-run this...
print('Top 5 Results of Round 2')
report(clfsearch2.cv_results_, n_top=5)

Top 5 Results of Round 2
Model with rank: 1
Mean validation score: 0.852 (std: 0.037)
Parameters: {'max_iter': 800, 'learning_rate_init': 0.004031747359663594, 'hidden_layer_sizes': 51, 'batch_size': 448, 'alpha': 3.5938136638046275e-07}

Model with rank: 2
Mean validation score: 0.849 (std: 0.041)
Parameters: {'max_iter': 1000, 'learning_rate_init': 0.0016, 'hidden_layer_sizes': 30, 'batch_size': 256, 'alpha': 2.1544346900318848e-06}

Model with rank: 3
Mean validation score: 0.847 (std: 0.040)
Parameters: {'max_iter': 1200, 'learning_rate_init': 0.0016, 'hidden_layer_sizes': 24, 'batch_size': 224, 'alpha': 0.0027825594022071257}

Model with rank: 4
Mean validation score: 0.845 (std: 0.040)
Parameters: {'max_iter': 200, 'learning_rate_init': 0.0256, 'hidden_layer_sizes': 36, 'batch_size': 128, 'alpha': 0.016681005372000592}

Model with rank: 5
Mean validation score: 0.845 (std: 0.038)
Parameters: {'max_iter': 600, 'learning_rate_init': 0.0256, 'hidden_layer_sizes': 39, 'batch_size': 2

### RandomSearch Round 3 Testing

In [50]:
# Used for determining new bounds of parameter values for random testing round 3
# 1e-4*np.logspace(0,6, base=4, num=10) # learning_rate_init
# 22*np.linspace(11,20, num=10, dtype=int) # batch size
# 100*np.linspace(6,12, num=7, dtype=int) # max iterations
# 2*np.linspace(15,25, num=10, dtype=int) # hidden layer sizes
# 1e-7*np.logspace(0,6, base=10, num=10) # l2 regularization

In [51]:
# # Setup Model and parameters
# NeuralNet = MLPClassifier(hidden_layer_sizes=(15,), random_state=7, max_iter=1000, solver='adam',
#                           tol=1e-6,learning_rate='adaptive') # Default we are comparing to
print('Round 3 of RandomSearch Testing')
print('Testing started at:', datetime.datetime.now())
NeuralNet = MLPClassifier(random_state=7, solver='sgd',
                          tol=1e-6,learning_rate='adaptive')
param_dist = {'learning_rate_init': 1e-4*np.logspace(0,6, base=4, num=10),
              'batch_size': 22*np.linspace(11,20, num=10, dtype=int),
              'max_iter': 100*np.linspace(6,12, num=7, dtype=int),
              'hidden_layer_sizes': 2*np.linspace(15,25, num=10, dtype=int),
              'alpha': 1e-7*np.logspace(0,6, base=10, num=10) 
             }
random_search = RandomizedSearchCV(NeuralNet, param_distributions=param_dist, scoring='average_precision',
                                   n_iter=60, cv=5, random_state=7)
clfsearch3 = random_search.fit(X_train,y_train)

print('Top 5 Results of Round 3')
report(clfsearch3.cv_results_, n_top=5)

# Give training and test results with the best found parameters
print('Training and Test Results of best model from Round 3')
y_train_pred = clfsearch3.predict(X_train)
y_train_score = clfsearch3.predict_proba(X_train)
# params = NNclf.get_params()
print("Training Data Performance Metrics:")
report_metric(test=y_train, pred=y_train_pred)

y_test_pred = clfsearch3.predict(X_test)
y_test_score = clfsearch3.predict_proba(X_test)
print("Test Data Performance Metrics:")
report_metric(test=y_test, pred=y_test_pred)
print('Testing completed at:', datetime.datetime.now())

Round 3 of RandomSearch Testing
Testing started at: 2020-07-21 11:08:03.173000
Top 5 Results of Round 3
Model with rank: 1
Mean validation score: 0.854 (std: 0.040)
Parameters: {'max_iter': 800, 'learning_rate_init': 0.0256, 'hidden_layer_sizes': 42, 'batch_size': 308, 'alpha': 0.021544346900318822}

Model with rank: 2
Mean validation score: 0.849 (std: 0.037)
Parameters: {'max_iter': 1200, 'learning_rate_init': 0.004031747359663594, 'hidden_layer_sizes': 36, 'batch_size': 242, 'alpha': 0.004641588833612772}

Model with rank: 3
Mean validation score: 0.849 (std: 0.037)
Parameters: {'max_iter': 700, 'learning_rate_init': 0.010159366732596474, 'hidden_layer_sizes': 42, 'batch_size': 308, 'alpha': 0.004641588833612772}

Model with rank: 4
Mean validation score: 0.847 (std: 0.042)
Parameters: {'max_iter': 600, 'learning_rate_init': 0.0256, 'hidden_layer_sizes': 40, 'batch_size': 242, 'alpha': 0.021544346900318822}

Model with rank: 5
Mean validation score: 0.846 (std: 0.034)
Parameters: {'

### RandomSearch Round 4 Testing

In [65]:
# Used for determining new bounds of parameter values for random testing round 4
# 1e-3*np.logspace(0,3, base=4, num=10) # learning_rate_init
# 22*np.linspace(11,20, num=10, dtype=int) # batch size
# 100*np.linspace(7,12, num=6, dtype=int) # max iterations
# 2*np.linspace(18,25, num=8, dtype=int) # hidden layer sizes
# 1e-4*np.logspace(0,3, base=10, num=10) # l2 regularization

array([ 700,  800,  900, 1000, 1100, 1200])

In [66]:
# # Setup Model and parameters
# NeuralNet = MLPClassifier(hidden_layer_sizes=(15,), random_state=7, max_iter=1000, solver='adam',
#                           tol=1e-6,learning_rate='adaptive') # Default we are comparing to
print('Round 4 of RandomSearch Testing')
print('Testing started at:', datetime.datetime.now())
NeuralNet = MLPClassifier(random_state=7, solver='sgd',
                          tol=1e-6,learning_rate='adaptive')
param_dist = {'learning_rate_init': 1e-3*np.logspace(0,3, base=4, num=10),
              'batch_size': 22*np.linspace(11,20, num=10, dtype=int),
              'max_iter': 100*np.linspace(7,12, num=6, dtype=int),
              'hidden_layer_sizes': 2*np.linspace(18,25, num=8, dtype=int),
              'alpha': 1e-4*np.logspace(0,3, base=10, num=10) 
             }
random_search = RandomizedSearchCV(NeuralNet, param_distributions=param_dist, scoring='average_precision',
                                   n_iter=60, cv=5, random_state=7)
clfsearch4 = random_search.fit(X_train,y_train)

print('Top 5 Results of Round 4')
report(clfsearch4.cv_results_, n_top=5)

# Give training and test results with the best found parameters
print('Training and Test Results of best model from Round 4')
y_train_pred = clfsearch4.predict(X_train)
y_train_score = clfsearch4.predict_proba(X_train)
# params = NNclf.get_params()
print("Training Data Performance Metrics:")
report_metric(test=y_train, pred=y_train_pred)

y_test_pred = clfsearch4.predict(X_test)
y_test_score = clfsearch4.predict_proba(X_test)
print("Test Data Performance Metrics:")
report_metric(test=y_test, pred=y_test_pred)
print('Testing completed at:', datetime.datetime.now())

Round 4 of RandomSearch Testing
Testing started at: 2020-07-22 09:12:52.399294
Top 5 Results of Round 4
Model with rank: 1
Mean validation score: 0.854 (std: 0.037)
Parameters: {'max_iter': 700, 'learning_rate_init': 0.016, 'hidden_layer_sizes': 46, 'batch_size': 264, 'alpha': 0.01}

Model with rank: 2
Mean validation score: 0.852 (std: 0.040)
Parameters: {'max_iter': 900, 'learning_rate_init': 0.02539841683149118, 'hidden_layer_sizes': 46, 'batch_size': 242, 'alpha': 0.021544346900318825}

Model with rank: 3
Mean validation score: 0.852 (std: 0.040)
Parameters: {'max_iter': 1000, 'learning_rate_init': 0.016, 'hidden_layer_sizes': 38, 'batch_size': 242, 'alpha': 0.021544346900318825}

Model with rank: 4
Mean validation score: 0.850 (std: 0.036)
Parameters: {'max_iter': 900, 'learning_rate_init': 0.010079368399158984, 'hidden_layer_sizes': 46, 'batch_size': 396, 'alpha': 0.004641588833612777}

Model with rank: 5
Mean validation score: 0.850 (std: 0.036)
Parameters: {'max_iter': 900, 'le

## Test of the top results from the Random Search Round 4
As Rank 1 is already recorded above - testing for ranks 2 through 5

In [7]:
print('\nTesting Random Search Round 4 Rank 2 Model: \n')

# Create NN and fit and test it
NeuralNet = MLPClassifier(max_iter=900, learning_rate_init=0.0253984, hidden_layer_sizes=(46,),
                          batch_size=242, alpha=0.02154435,
                          solver='sgd',tol=1e-6,learning_rate='adaptive', random_state=7)
NNclf = NeuralNet.fit(X_train,y_train)
y_train_pred = NNclf.predict(X_train)
y_train_score = NNclf.predict_proba(X_train)
# params = NNclf.get_params()
print("Training Data Performance Metrics:")
report_metric(test=y_train, pred=y_train_pred)

y_test_pred = NNclf.predict(X_test)
y_test_score = NNclf.predict_proba(X_test)
print("Test Data Performance Metrics:")
report_metric(test=y_test, pred=y_test_pred)


Testing Random Search Round 4 Rank 2 Model: 

Training Data Performance Metrics:
Balanced accuracy of neural network is 
 0.8946282835236425
Average_precision_score of neural network is 
 0.7442575299288101
Confusion Matrix:
 [[227432     19]
 [    83    311]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    227451
           1       0.94      0.79      0.86       394

    accuracy                           1.00    227845
   macro avg       0.97      0.89      0.93    227845
weighted avg       1.00      1.00      1.00    227845

Test Data Performance Metrics:
Balanced accuracy of neural network is 
 0.8724226008636431
Average_precision_score of neural network is 
 0.7159329814577424
Confusion Matrix:
 [[56861     3]
 [   25    73]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.96      0.74      0.84    

In [8]:
print('\nTesting Random Search Round 4 Rank 3 Model: \n')

# Create NN and fit and test it
NeuralNet = MLPClassifier(max_iter=1000, learning_rate_init=0.016, hidden_layer_sizes=(38,),
                          batch_size=242, alpha=0.02154435,
                          solver='sgd',tol=1e-6,learning_rate='adaptive', random_state=7)
NNclf = NeuralNet.fit(X_train,y_train)
y_train_pred = NNclf.predict(X_train)
y_train_score = NNclf.predict_proba(X_train)
# params = NNclf.get_params()
print("Training Data Performance Metrics:")
report_metric(test=y_train, pred=y_train_pred)

y_test_pred = NNclf.predict(X_test)
y_test_score = NNclf.predict_proba(X_test)
print("Test Data Performance Metrics:")
report_metric(test=y_test, pred=y_test_pred)


Testing Random Search Round 4 Rank 3 Model: 

Training Data Performance Metrics:
Balanced accuracy of neural network is 
 0.8971707511409777
Average_precision_score of neural network is 
 0.7538472753356625
Confusion Matrix:
 [[227434     17]
 [    81    313]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    227451
           1       0.95      0.79      0.86       394

    accuracy                           1.00    227845
   macro avg       0.97      0.90      0.93    227845
weighted avg       1.00      1.00      1.00    227845

Test Data Performance Metrics:
Balanced accuracy of neural network is 
 0.8775158487705719
Average_precision_score of neural network is 
 0.7168001927565947
Confusion Matrix:
 [[56860     4]
 [   24    74]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.95      0.76      0.84    

In [9]:
print('\nTesting Random Search Round 4 Rank 4 Model: \n')

# Create NN and fit and test it
NeuralNet = MLPClassifier(max_iter=900, learning_rate_init=0.01007937, hidden_layer_sizes=(46,),
                          batch_size=396, alpha=0.00464159,
                          solver='sgd',tol=1e-6,learning_rate='adaptive', random_state=7)
NNclf = NeuralNet.fit(X_train,y_train)
y_train_pred = NNclf.predict(X_train)
y_train_score = NNclf.predict_proba(X_train)
# params = NNclf.get_params()
print("Training Data Performance Metrics:")
report_metric(test=y_train, pred=y_train_pred)

y_test_pred = NNclf.predict(X_test)
y_test_score = NNclf.predict_proba(X_test)
print("Test Data Performance Metrics:")
report_metric(test=y_test, pred=y_test_pred)


Testing Random Search Round 4 Rank 4 Model: 

Training Data Performance Metrics:
Balanced accuracy of neural network is 
 0.9276473884139088
Average_precision_score of neural network is 
 0.8357463813569795
Confusion Matrix:
 [[227443      8]
 [    57    337]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    227451
           1       0.98      0.86      0.91       394

    accuracy                           1.00    227845
   macro avg       0.99      0.93      0.96    227845
weighted avg       1.00      1.00      1.00    227845

Test Data Performance Metrics:
Balanced accuracy of neural network is 
 0.867302974228521
Average_precision_score of neural network is 
 0.6874429275521591
Confusion Matrix:
 [[56859     5]
 [   26    72]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.94      0.73      0.82     

In [10]:
print('\nTesting Random Search Round 4 Rank 5 Model: \n')

# Create NN and fit and test it
NeuralNet = MLPClassifier(max_iter=900, learning_rate_init=0.004, hidden_layer_sizes=(46,),
                          batch_size=418, alpha=0.000464159,
                          solver='sgd',tol=1e-6,learning_rate='adaptive', random_state=7)
NNclf = NeuralNet.fit(X_train,y_train)
y_train_pred = NNclf.predict(X_train)
y_train_score = NNclf.predict_proba(X_train)
# params = NNclf.get_params()
print("Training Data Performance Metrics:")
report_metric(test=y_train, pred=y_train_pred)

y_test_pred = NNclf.predict(X_test)
y_test_score = NNclf.predict_proba(X_test)
print("Test Data Performance Metrics:")
report_metric(test=y_test, pred=y_test_pred)


Testing Random Search Round 4 Rank 5 Model: 

Training Data Performance Metrics:
Balanced accuracy of neural network is 
 0.9263695597782237
Average_precision_score of neural network is 
 0.8236398207042656
Confusion Matrix:
 [[227439     12]
 [    58    336]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    227451
           1       0.97      0.85      0.91       394

    accuracy                           1.00    227845
   macro avg       0.98      0.93      0.95    227845
weighted avg       1.00      1.00      1.00    227845

Test Data Performance Metrics:
Balanced accuracy of neural network is 
 0.8724138079542454
Average_precision_score of neural network is 
 0.7066408503879529
Confusion Matrix:
 [[56860     4]
 [   25    73]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.95      0.74      0.83    

## Test of the top results from the Random Search 3
As Rank 1 is already recorded above - testing for ranks 2 through 5

In [11]:
print('\nTesting Random Search Round 3 Rank 2 Model: \n')

# Create NN and fit and test it
NeuralNet = MLPClassifier(max_iter=1200, learning_rate_init=0.00403175, hidden_layer_sizes=(36,),
                          batch_size=242, alpha=0.00464159,
                          solver='sgd',tol=1e-6,learning_rate='adaptive', random_state=7)
NNclf = NeuralNet.fit(X_train,y_train)
y_train_pred = NNclf.predict(X_train)
y_train_score = NNclf.predict_proba(X_train)
# params = NNclf.get_params()
print("Training Data Performance Metrics:")
report_metric(test=y_train, pred=y_train_pred)

y_test_pred = NNclf.predict(X_test)
y_test_score = NNclf.predict_proba(X_test)
print("Test Data Performance Metrics:")
report_metric(test=y_test, pred=y_test_pred)


Testing Random Search Round 3 Rank 2 Model: 

Training Data Performance Metrics:
Balanced accuracy of neural network is 
 0.9009910473939977
Average_precision_score of neural network is 
 0.7753931770311058
Confusion Matrix:
 [[227440     11]
 [    78    316]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    227451
           1       0.97      0.80      0.88       394

    accuracy                           1.00    227845
   macro avg       0.98      0.90      0.94    227845
weighted avg       1.00      1.00      1.00    227845

Test Data Performance Metrics:
Balanced accuracy of neural network is 
 0.867302974228521
Average_precision_score of neural network is 
 0.6874429275521591
Confusion Matrix:
 [[56859     5]
 [   26    72]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.94      0.73      0.82     

In [15]:
print('\nTesting Random Search Round 3 Rank 3 Model: \n')

# Create NN and fit and test it
NeuralNet = MLPClassifier(max_iter=700, learning_rate_init=0.01015937, hidden_layer_sizes=(42,),
                          batch_size=308, alpha=0.00464159,
                          solver='sgd',tol=1e-6,learning_rate='adaptive', random_state=7)
NNclf = NeuralNet.fit(X_train,y_train)
y_train_pred = NNclf.predict(X_train)
y_train_score = NNclf.predict_proba(X_train)
# params = NNclf.get_params()
print("Training Data Performance Metrics:")
report_metric(test=y_train, pred=y_train_pred)

y_test_pred = NNclf.predict(X_test)
y_test_score = NNclf.predict_proba(X_test)
print("Test Data Performance Metrics:")
report_metric(test=y_test, pred=y_test_pred)


Testing Random Search Round 3 Rank 3 Model: 

Training Data Performance Metrics:
Balanced accuracy of neural network is 
 0.9251115156235916
Average_precision_score of neural network is 
 0.8331098993746008
Confusion Matrix:
 [[227444      7]
 [    59    335]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    227451
           1       0.98      0.85      0.91       394

    accuracy                           1.00    227845
   macro avg       0.99      0.93      0.96    227845
weighted avg       1.00      1.00      1.00    227845

Test Data Performance Metrics:
Balanced accuracy of neural network is 
 0.8877199304032248
Average_precision_score of neural network is 
 0.7371209162714276
Confusion Matrix:
 [[56860     4]
 [   22    76]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.95      0.78      0.85    

In [16]:
print('\nTesting Random Search Round 3 Rank 4 Model: \n')

# Create NN and fit and test it
NeuralNet = MLPClassifier(max_iter=600, learning_rate_init=0.0256, hidden_layer_sizes=(40,),
                          batch_size=242, alpha=0.0215443,
                          solver='sgd',tol=1e-6,learning_rate='adaptive', random_state=7)
NNclf = NeuralNet.fit(X_train,y_train)
y_train_pred = NNclf.predict(X_train)
y_train_score = NNclf.predict_proba(X_train)
# params = NNclf.get_params()
print("Training Data Performance Metrics:")
report_metric(test=y_train, pred=y_train_pred)

y_test_pred = NNclf.predict(X_test)
y_test_score = NNclf.predict_proba(X_test)
print("Test Data Performance Metrics:")
report_metric(test=y_test, pred=y_test_pred)


Testing Random Search Round 3 Rank 4 Model: 

Training Data Performance Metrics:
Balanced accuracy of neural network is 
 0.8984397866739725
Average_precision_score of neural network is 
 0.7563742422416538
Confusion Matrix:
 [[227434     17]
 [    80    314]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    227451
           1       0.95      0.80      0.87       394

    accuracy                           1.00    227845
   macro avg       0.97      0.90      0.93    227845
weighted avg       1.00      1.00      1.00    227845

Test Data Performance Metrics:
Balanced accuracy of neural network is 
 0.8826178895868984
Average_precision_score of neural network is 
 0.72696022332019
Confusion Matrix:
 [[56860     4]
 [   23    75]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.95      0.77      0.85      

In [17]:
print('\nTesting Random Search Round 3 Rank 5 Model: \n')

# Create NN and fit and test it
NeuralNet = MLPClassifier(max_iter=900, learning_rate_init=0.00403175, hidden_layer_sizes=(50,),
                          batch_size=440, alpha=2.154437e-6,
                          solver='sgd',tol=1e-6,learning_rate='adaptive', random_state=7)
NNclf = NeuralNet.fit(X_train,y_train)
y_train_pred = NNclf.predict(X_train)
y_train_score = NNclf.predict_proba(X_train)
# params = NNclf.get_params()
print("Training Data Performance Metrics:")
report_metric(test=y_train, pred=y_train_pred)

y_test_pred = NNclf.predict(X_test)
y_test_score = NNclf.predict_proba(X_test)
print("Test Data Performance Metrics:")
report_metric(test=y_test, pred=y_test_pred)


Testing Random Search Round 3 Rank 5 Model: 

Training Data Performance Metrics:
Balanced accuracy of neural network is 
 0.9174951041499495
Average_precision_score of neural network is 
 0.8154881014050128
Confusion Matrix:
 [[227443      8]
 [    65    329]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    227451
           1       0.98      0.84      0.90       394

    accuracy                           1.00    227845
   macro avg       0.99      0.92      0.95    227845
weighted avg       1.00      1.00      1.00    227845

Test Data Performance Metrics:
Balanced accuracy of neural network is 
 0.8877199304032248
Average_precision_score of neural network is 
 0.7371209162714276
Confusion Matrix:
 [[56860     4]
 [   22    76]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.95      0.78      0.85    