In [103]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#### MLP

In [104]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SelectFromModel
from sklearn.pipeline import make_pipeline
from sklearn import metrics
from sklearn import preprocessing
from sklearn.model_selection import GridSearchCV

X = pd.read_csv('final_training_data.csv', index_col=0).drop(columns=['grade', 'username'])
y = pd.read_csv('final_training_data.csv').loc[:,'grade'] #1 is failed grade, 0 is pass
print(y.value_counts())


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

scaler = preprocessing.StandardScaler()
#scaler.fit(X_train)

#X_train_transformed = scaler.transform(X_train)
#X_test_transformed = scaler.transform(X_test)

X = scaler.fit_transform(X)

scoring = ['roc_auc', 'average_precision', 'recall', 'precision', 'f1', 'accuracy']


param_grid_1_layer = [
               {'alpha': [3, 1, 0.5, 0.3, 1.e-01, 1.e-02, 1.e-03, 1.e-04, 1.e-05, 1.e-06, 1.e-07, 1.e-08], 
                'hidden_layer_sizes': [(5,), (10,), (15,), (20,), (25,), (30,), (35,), (40,)]},
               ]
param_grid_2_layer = [
               {'alpha': [3, 1, 0.5, 0.3, 1.e-01, 1.e-02, 1.e-03, 1.e-04, 1.e-05, 1.e-06, 1.e-07, 1.e-08],
               'hidden_layer_sizes': [(5,5), (10,10), (15,15), (20,20), (25,25), (30,30), (35,35), (40,40)]}
               ]
param_grid_3_layer = [
               {'alpha': [3, 1, 0.5, 0.3, 1.e-01, 1.e-02, 1.e-03, 1.e-04, 1.e-05, 1.e-06, 1.e-07, 1.e-08],
               'hidden_layer_sizes': [(5,5,5), (10,10,10), (15,15,15), (20,20,20), (25,25,25), (30,30,30), (35,35,35), (40,40,40)]}
               ]

mlp = MLPClassifier(solver='lbfgs', max_iter=1000)

grid_search = GridSearchCV(mlp, param_grid_3_layer, cv=5, scoring=scoring, refit='average_precision', verbose=1, n_jobs = -1)
grid_search.fit(X, y)

print("Best parameters: {}".format(grid_search.best_params_))
print(f'Best score: {grid_search.best_score_}')

0    183
1     37
Name: grade, dtype: int64
Fitting 5 folds for each of 96 candidates, totalling 480 fits
Best parameters: {'alpha': 0.01, 'hidden_layer_sizes': (15, 15, 15)}
Best score: 0.6812661451526998


In [106]:
grid_search.cv_results_
#pd.DataFrame(grid_search.cv_results_).to_csv('mlp_3_results.csv')

{'mean_fit_time': array([0.2370008 , 0.35219998, 0.61399961, 0.69540024, 1.04400101,
        0.94320021, 1.09139967, 1.17560024, 0.3306015 , 0.69879928,
        0.55159988, 0.88520055, 1.11359997, 0.97280068, 1.07380028,
        1.50019951, 0.42720037, 0.69020028, 0.77640128, 0.70600052,
        1.04840145, 0.95320144, 1.16219969, 1.46039796, 0.47779951,
        0.58640003, 0.49619932, 0.76960049, 1.03240061, 1.07440081,
        1.2437993 , 1.12639961, 0.3199996 , 0.41100011, 0.74840035,
        0.50560102, 0.49060025, 0.94140153, 1.25059953, 1.5354002 ,
        0.20040045, 0.40100002, 0.55180011, 0.5444005 , 0.68720036,
        0.51340051, 0.70300002, 0.88840084, 0.18539948, 0.38019972,
        0.37079978, 0.50080032, 0.58460031, 0.68480005, 0.89100018,
        0.93599873, 0.1248004 , 0.23279948, 0.45120034, 0.60280032,
        0.71800008, 0.76300106, 0.70859938, 0.7797997 , 0.2262001 ,
        0.36779938, 0.36639977, 0.60800085, 0.56519995, 0.47540035,
        0.72779984, 0.88499889,