> In this notebook a number of models are trained on the UCI data. Required libraries and the UCI dataset are first imported.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_curve, auc, confusion_matrix, f1_score, classification_report
from sklearn.metrics import make_scorer, precision_score, recall_score
from scipy.special import expit, logit

In [2]:
df = pd.read_csv('EEG_UCI_dataset_powers.csv')
df

Unnamed: 0.1,Unnamed: 0,Fp1a delta,Fp1a theta,Fp1a alpha,Fp1a beta,Fp1a gamma,Fp2a delta,Fp2a theta,Fp2a alpha,Fp2a beta,...,P3/P4 delta,P3/P4 theta,P3/P4 alpha,P3/P4 beta,P3/P4 gamma,O1/O2 delta,O1/O2 theta,O1/O2 alpha,O1/O2 beta,O1/O2 gamma
0,co2a0000364,34.951542,6.329759,1.577445,5.040983,2.695547,30.629374,6.105156,2.187954,13.458793,...,0.459534,0.554567,0.547984,0.579646,0.637105,0.518184,0.540152,0.546512,0.580773,0.634882
1,co2a0000365,9.299793,2.568554,8.272992,10.244713,4.960425,7.650179,2.816241,7.805378,6.147001,...,0.481977,0.456597,0.477621,0.494127,0.489962,0.465665,0.490152,0.462667,0.486200,0.528533
2,co2a0000368,3.515035,1.541957,5.433650,2.411241,0.941431,3.768631,1.358769,5.551373,2.415200,...,0.458276,0.450038,0.563601,0.481220,0.480997,0.438477,0.458350,0.504987,0.474034,0.360506
3,co2a0000369,4.795953,4.360560,18.934703,3.818618,0.956625,4.472783,4.256171,18.158179,3.582341,...,0.563004,0.503810,0.528841,0.496117,0.521641,0.530270,0.528711,0.517796,0.546580,0.520409
4,co2a0000370,3.736512,1.392974,5.378064,4.374204,1.306564,4.511004,1.353615,5.034374,5.871832,...,0.414734,0.428938,0.371896,0.451796,0.560019,0.467281,0.505975,0.513976,0.594723,0.724742
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117,co3a0000458,5.034276,1.297364,2.776220,7.503791,1.389794,6.732382,1.791624,3.072971,8.030404,...,0.488738,0.554300,0.502656,0.517526,0.541235,0.461837,0.480084,0.494039,0.503561,0.577265
118,co3a0000459,4.925952,1.284041,3.818269,4.602541,1.256179,4.730081,1.297648,3.812826,8.845018,...,0.521518,0.476234,0.598763,0.568120,0.557745,0.508269,0.549875,0.526056,0.553384,0.539971
119,co3a0000460,5.413555,2.577622,2.853991,3.192140,0.737959,5.674537,2.723768,2.885980,3.208463,...,0.522837,0.545288,0.595220,0.539650,0.551718,0.521833,0.565657,0.516360,0.473023,0.439638
120,co3a0000461,7.399629,1.507018,1.856139,2.528717,0.567038,5.505387,1.312715,2.250694,2.524480,...,0.473300,0.605074,0.520692,0.566241,0.594854,0.525616,0.526378,0.529584,0.537405,0.588164


In [3]:
df = df.rename(columns={'Unnamed: 0': 'subject'})
df['status'] = (df['subject'].str.slice(start=3, stop=4) == "a").astype(int)
df.tail()

Unnamed: 0,subject,Fp1a delta,Fp1a theta,Fp1a alpha,Fp1a beta,Fp1a gamma,Fp2a delta,Fp2a theta,Fp2a alpha,Fp2a beta,...,P3/P4 theta,P3/P4 alpha,P3/P4 beta,P3/P4 gamma,O1/O2 delta,O1/O2 theta,O1/O2 alpha,O1/O2 beta,O1/O2 gamma,status
117,co3a0000458,5.034276,1.297364,2.77622,7.503791,1.389794,6.732382,1.791624,3.072971,8.030404,...,0.5543,0.502656,0.517526,0.541235,0.461837,0.480084,0.494039,0.503561,0.577265,1
118,co3a0000459,4.925952,1.284041,3.818269,4.602541,1.256179,4.730081,1.297648,3.812826,8.845018,...,0.476234,0.598763,0.56812,0.557745,0.508269,0.549875,0.526056,0.553384,0.539971,1
119,co3a0000460,5.413555,2.577622,2.853991,3.19214,0.737959,5.674537,2.723768,2.88598,3.208463,...,0.545288,0.59522,0.53965,0.551718,0.521833,0.565657,0.51636,0.473023,0.439638,1
120,co3a0000461,7.399629,1.507018,1.856139,2.528717,0.567038,5.505387,1.312715,2.250694,2.52448,...,0.605074,0.520692,0.566241,0.594854,0.525616,0.526378,0.529584,0.537405,0.588164,1
121,co3c0000402,5.284542,1.946691,1.135617,2.015422,0.492179,5.092513,1.986647,1.3372,2.166617,...,0.497312,0.492201,0.49275,0.493309,0.498716,0.516931,0.544294,0.517307,0.497229,0


In [4]:
print("Alcoholic subjects:", len(df.status.loc[df.status ==1]))
print("Control subjects:", len(df.status.loc[df.status == 0]))
print("Proportion of alcoholic subjects:", round(len(df.status.loc[df.status == 1]) / (len(df.status.loc[df.status == 1]) + len(df.status.loc[df.status == 0])), 3))

Alcoholic subjects: 77
Control subjects: 45
Proportion of alcoholic subjects: 0.631


> The dataset is then split into training and test sets and pipelines are prepared.

In [5]:
y = df.status
X = df.drop(['subject', 'status'], axis=1)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1234, stratify=df.status)

In [7]:
print(len(X_train), len(X_test), len(y_train), len(y_test))

91 31 91 31


In [8]:
pipelines = {
    'l1': make_pipeline(StandardScaler(), LogisticRegression(penalty='l1', solver='liblinear', random_state=123)),
    'l2': make_pipeline(StandardScaler(), LogisticRegression(penalty='l2', random_state=123)),
    'rf': make_pipeline(StandardScaler(), RandomForestClassifier(random_state=123)),
    'gb': make_pipeline(StandardScaler(), GradientBoostingClassifier(random_state=123)),
    'svm_linear': make_pipeline(StandardScaler(), LinearSVC(random_state=123)),
    'svm_rbf': make_pipeline(StandardScaler(), SVC(kernel='rbf', random_state=123))
}

In [9]:
l1_hyperparameters = {'logisticregression__C': np.logspace(-3, 3, 13)}
l2_hyperparameters = {'logisticregression__C': np.logspace(-3, 3, 13)}
rf_hyperparameters = {
    'randomforestclassifier__n_estimators': [25, 50, 75, 100, 150, 200],
    'randomforestclassifier__max_features': [None, 'sqrt',0.2, 0.33]
}
gb_hyperparameters = {
    'gradientboostingclassifier__n_estimators': [25, 50, 75, 100, 150, 200],
    'gradientboostingclassifier__learning_rate': [0.05, 0.1, 0.2],
    'gradientboostingclassifier__max_depth': [1, 3, 5]
}
svm_linear_hyperparameters = {'linearsvc__C': [0.1, 1, 10, 100]}
svm_rbf_hyperparameters = {
    'svc__C': [0.1, 1, 10, 100],
    'svc__gamma': [0.0001, 0.001, 0.01, 0.1, 1, 10]
}

In [10]:
hyperparameters = {
    'l1': l1_hyperparameters,
    'l2': l2_hyperparameters,
    'rf': rf_hyperparameters,
    'gb': gb_hyperparameters,
    'svm_linear': svm_linear_hyperparameters,
    'svm_rbf': svm_rbf_hyperparameters
}

> The models are then trained and fitted using 3-fold cross validation to find optimal hyperparameters. Accuracy scores are shown, obtained from the best cross-validated models using the training data.

In [11]:
fitted_models = {}
for i in ['l1', 'l2', 'rf', 'gb', 'svm_linear', 'svm_rbf']:
    model = GridSearchCV(pipelines[i], hyperparameters[i], cv=3, scoring='f1', n_jobs=-1)
    model.fit(X_train, y_train)
    fitted_models[i] = model
    print(i, 'has been fitted')

l1 has been fitted
l2 has been fitted
rf has been fitted
gb has been fitted
svm_linear has been fitted
svm_rbf has been fitted


In [12]:
for name, model in fitted_models.items():
    print(name, model.best_score_)

l1 0.7722945617682461
l2 0.8184637068357999
rf 0.8032520325203253
gb 0.7457264957264957
svm_linear 0.7676623992413466
svm_rbf 0.8092352092352092


> The models are then used to make predictions on the test dataset, with metrics displayed in detail below. The confusion matrix in each case is formatted as follows:  
> 
> &nbsp; &nbsp; \[[ true negatives | false positives ]  
> &nbsp; &nbsp; &nbsp;[ false negatives | true positives ]]  
> 
> **L2-regularised logistic regression** is the winning model for both accuracy and (class 1) F1 score.

In [13]:
print("Scores for each model:\n")
for name, model in fitted_models.items():
    if name in ['l1', 'l2', 'rf', 'gb']:
        pred = model.predict_proba(X_test)
        pred = pred[:, 1]
        y_pred = [int(p > 0.5) for p in pred]
    else:
        pred = model.decision_function(X_test)
        y_pred = [int(p > 0) for p in pred]
    fpr, tpr, thresholds = roc_curve(y_test, pred)
    print(name, "- AUROC:", auc(fpr, tpr), " F1 score:", f1_score(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))
    print()

Scores for each model:

l1 - AUROC: 0.7727272727272727  F1 score: 0.7058823529411764
[[ 9  2]
 [ 8 12]]
              precision    recall  f1-score   support

           0       0.53      0.82      0.64        11
           1       0.86      0.60      0.71        20

    accuracy                           0.68        31
   macro avg       0.69      0.71      0.67        31
weighted avg       0.74      0.68      0.68        31


l2 - AUROC: 0.7727272727272728  F1 score: 0.8292682926829269
[[ 7  4]
 [ 3 17]]
              precision    recall  f1-score   support

           0       0.70      0.64      0.67        11
           1       0.81      0.85      0.83        20

    accuracy                           0.77        31
   macro avg       0.75      0.74      0.75        31
weighted avg       0.77      0.77      0.77        31


rf - AUROC: 0.7840909090909091  F1 score: 0.7567567567567567
[[ 8  3]
 [ 6 14]]
              precision    recall  f1-score   support

           0       0.57  

> The next cells use **nested cross validation**. This was tried because of the relatively small size of the dataset. In this process, there are 4 outer loops of validation. Within each outer loop, an inner loop of 3-fold cross validation is performed as before, and the best model for each algorithm is tested on a hold-out set. Because nested cross validation does not produce a single best fitted model, the aim is to find the best *process* from amongst the various algorithms tested, based on average metrics across the 4 outer loops.
> 
> Additionally here, **threshold optimisation** is applied. In an attempt to improve upon the default threshold of 0.5 for the prediction probability (or 0 for decision function, which becomes 0.5 when the logistic function is applied), thresholds from 0.01 to 0.99 are tested on the models to find the best average F1 score across the 3 inner loops of cross validation. Note that a different best threshold might be produced for each outer loop of validation, so the average of these 4 results, obtained by testing the best model on the hold-out set in each case, is shown in the summary at the end of this cell's output.

In [14]:
summary = "Mean scores of best models:\n\n"
for i in ['l1', 'l2', 'rf', 'gb', 'svm_linear', 'svm_rbf']:
    print("****", i.upper(), "****\n")
    cv_outer = StratifiedKFold(n_splits=4, shuffle=True, random_state=1)
    outer_results_def_thr = list()
    outer_results_best_thr = list()
    outloop = 0
    for train_ix, test_ix in cv_outer.split(X, y):
        outloop += 1
        X_train, X_test = X.iloc[train_ix, :], X.iloc[test_ix, :]
        y_train, y_test = y.iloc[train_ix], y.iloc[test_ix]
        cv_inner = StratifiedKFold(n_splits=3, shuffle=True, random_state=1)
        clf = GridSearchCV(pipelines[i], hyperparameters[i], cv=cv_inner, scoring='f1', n_jobs=-1)
        clf.fit(X_train, y_train)
        f1_array = np.zeros(99)
        for itrain_ix, itest_ix in cv_inner.split(X_train, y_train):
            iX_train, iX_test = X_train.iloc[itrain_ix, :], X_train.iloc[itest_ix, :]
            iy_train, iy_test = y_train.iloc[itrain_ix], y_train.iloc[itest_ix]
            model = pipelines[i]
            model.set_params(**clf.best_params_)
            model.fit(iX_train, iy_train)
            if i in ['l1', 'l2', 'rf', 'gb']:
                pred = model.predict_proba(iX_test)
                pred = pred[:, 1]
            else:
                pred = model.decision_function(iX_test)
            for thr in range(1, 100):
                if i in ['l1', 'l2', 'rf', 'gb']:
                    y_pred = [int(p > thr / 100) for p in pred]
                else:
                    y_pred = [int(expit(p) > thr / 100) for p in pred]
                f1_array[int(thr - 1)] += f1_score(iy_test, y_pred, zero_division=0)
        best_threshold = (np.argmax(f1_array) + 1) / 100
        if i in ['svm_linear', 'svm_rbf']:
            best_threshold = logit(best_threshold)
        if i in ['l1', 'l2', 'rf', 'gb']:
            pred = clf.predict_proba(X_test)
            pred = pred[:, 1]
            y_pred = [int(p > 0.5) for p in pred]
        else:
            pred = clf.decision_function(X_test)
            y_pred = [int(p > 0) for p in pred]
        print("Outer loop " + str(outloop) + ":\n")
        print("Default threshold")
        fpr, tpr, thresholds = roc_curve(y_test, pred)
        print(" F1 score:", f1_score(y_test, y_pred))
        print(confusion_matrix(y_test, y_pred))
        print()
        outer_results_def_thr.append({'f1': f1_score(y_test, y_pred, zero_division=0), 'AUROC': auc(fpr, tpr),
                                      'sensitivity': recall_score(y_test, y_pred), 'specificity': recall_score(y_test, y_pred, pos_label=0),
                                      'PPV': precision_score(y_test, y_pred, zero_division=0), 'NPV': precision_score(y_test, y_pred, pos_label=0, zero_division=0)})
        print("Best threshold:", best_threshold)
        if i in ['l1', 'l2', 'rf', 'gb']:
            y_pred = [int(p > best_threshold) for p in pred]
        else:
            y_pred = [int(expit(p) > best_threshold) for p in pred]
        fpr, tpr, thresholds = roc_curve(y_test, pred)
        print(" F1 score:", f1_score(y_test, y_pred))
        print(confusion_matrix(y_test, y_pred))
        print()
        outer_results_best_thr.append({'f1': f1_score(y_test, y_pred, zero_division=0), 'AUROC': auc(fpr, tpr),
                                      'sensitivity': recall_score(y_test, y_pred), 'specificity': recall_score(y_test, y_pred, pos_label=0),
                                      'PPV': precision_score(y_test, y_pred, zero_division=0), 'NPV': precision_score(y_test, y_pred, pos_label=0, zero_division=0)})
    summary = summary + i + "\n  Default threshold\n    AUROC: " + str(np.array([dict['AUROC'] for dict in outer_results_def_thr]).mean()) + "  F1 score: " + str(np.array([dict['f1'] for dict in outer_results_def_thr]).mean())
    summary = summary + "\n    Sensitivity: " + str(np.array([dict['sensitivity'] for dict in outer_results_def_thr]).mean())[:6] + "  Specificity: " + str(np.array([dict['specificity'] for dict in outer_results_def_thr]).mean())[:6]
    summary = summary + "\n    PPV: " + str(np.array([dict['PPV'] for dict in outer_results_def_thr]).mean())[:6] + "  NPV: " + str(np.array([dict['NPV'] for dict in outer_results_def_thr]).mean())[:6]
    summary = summary + "\n  Best thresholds\n    AUROC: " + str(np.array([dict['AUROC'] for dict in outer_results_best_thr]).mean()) + "  F1 score: " + str(np.array([dict['f1'] for dict in outer_results_best_thr]).mean())
    summary = summary + "\n    Sensitivity: " + str(np.array([dict['sensitivity'] for dict in outer_results_best_thr]).mean())[:6] + "  Specificity: " + str(np.array([dict['specificity'] for dict in outer_results_best_thr]).mean())[:6]
    summary = summary + "\n    PPV: " + str(np.array([dict['PPV'] for dict in outer_results_best_thr]).mean())[:6] + "  NPV: " + str(np.array([dict['NPV'] for dict in outer_results_best_thr]).mean())[:6] + "\n"
print(summary) 

**** L1 ****

Outer loop 1:

Default threshold
 F1 score: 0.7368421052631577
[[ 7  4]
 [ 6 14]]

Best threshold: 0.03
 F1 score: 0.75
[[ 6  5]
 [ 5 15]]

Outer loop 2:

Default threshold
 F1 score: 0.8571428571428571
[[ 7  5]
 [ 1 18]]

Best threshold: 0.11
 F1 score: 0.7916666666666666
[[ 2 10]
 [ 0 19]]

Outer loop 3:

Default threshold
 F1 score: 0.6666666666666667
[[ 6  5]
 [ 7 12]]

Best threshold: 0.46
 F1 score: 0.6486486486486486
[[ 5  6]
 [ 7 12]]

Outer loop 4:

Default threshold
 F1 score: 0.6470588235294117
[[ 7  4]
 [ 8 11]]

Best threshold: 0.43
 F1 score: 0.7692307692307692
[[ 6  5]
 [ 4 15]]

**** L2 ****

Outer loop 1:

Default threshold
 F1 score: 0.7894736842105262
[[ 8  3]
 [ 5 15]]

Best threshold: 0.48
 F1 score: 0.8205128205128205
[[ 8  3]
 [ 4 16]]

Outer loop 2:

Default threshold
 F1 score: 0.8837209302325582
[[ 7  5]
 [ 0 19]]

Best threshold: 0.55
 F1 score: 0.8837209302325582
[[ 7  5]
 [ 0 19]]

Outer loop 3:

Default threshold
 F1 score: 0.7999999999999999

> Nested cross validation is used again, this time using area under receiver operating curve (AUROC) as the optimising variable for hyperparameters, and geometric mean to optimise thresholds.

In [15]:
def g_mean(y_test, y_pred):
    g_mean = np.sqrt(recall_score(y_test, y_pred) * recall_score(y_test, y_pred, pos_label=0))
    return g_mean

summary = "Mean scores of best models:\n\n"

for i in ['l1', 'l2', 'rf', 'gb', 'svm_linear', 'svm_rbf']:
    print("****", i.upper(), "****\n")
    cv_outer = StratifiedKFold(n_splits=4, shuffle=True, random_state=1)
    outer_results_def_thr = list()
    outer_results_best_thr = list()
    outloop = 0
    for train_ix, test_ix in cv_outer.split(X, y):
        outloop += 1
        X_train, X_test = X.iloc[train_ix, :], X.iloc[test_ix, :]
        y_train, y_test = y.iloc[train_ix], y.iloc[test_ix]
        cv_inner = StratifiedKFold(n_splits=3, shuffle=True, random_state=1)
        clf = GridSearchCV(pipelines[i], hyperparameters[i], cv=cv_inner, scoring='roc_auc', n_jobs=-1)
        clf.fit(X_train, y_train)
        gm_array = np.zeros(99)
        for itrain_ix, itest_ix in cv_inner.split(X_train, y_train):
            iX_train, iX_test = X_train.iloc[itrain_ix, :], X_train.iloc[itest_ix, :]
            iy_train, iy_test = y_train.iloc[itrain_ix], y_train.iloc[itest_ix]
            model = pipelines[i]
            model.set_params(**clf.best_params_)
            model.fit(iX_train, iy_train)
            if i in ['l1', 'l2', 'rf', 'gb']:
                pred = model.predict_proba(iX_test)
                pred = pred[:, 1]
            else:
                pred = model.decision_function(iX_test)
            for thr in range(1, 100):
                if i in ['l1', 'l2', 'rf', 'gb']:
                    y_pred = [int(p > thr / 100) for p in pred]
                else:
                    y_pred = [int(expit(p) > thr / 100) for p in pred]
                gm_array[int(thr - 1)] += g_mean(iy_test, y_pred)
        best_threshold = (np.argmax(gm_array) + 1) / 100
        if i in ['svm_linear', 'svm_rbf']:
            best_threshold = logit(best_threshold)
        if i in ['l1', 'l2', 'rf', 'gb']:
            pred = clf.predict_proba(X_test)
            pred = pred[:, 1]
            y_pred = [int(p > 0.5) for p in pred]
        else:
            pred = clf.decision_function(X_test)
            y_pred = [int(p > 0) for p in pred]
        print("Outer loop " + str(outloop) + ":\n")
        print("Default threshold")
        fpr, tpr, thresholds = roc_curve(y_test, pred)
        print(" G-mean:", g_mean(y_test, y_pred))
        print(confusion_matrix(y_test, y_pred))
        print()
        outer_results_def_thr.append({'f1': f1_score(y_test, y_pred, zero_division=0), 'AUROC': auc(fpr, tpr), 'g-mean': g_mean(y_test, y_pred),
                                      'sensitivity': recall_score(y_test, y_pred), 'specificity': recall_score(y_test, y_pred, pos_label=0),
                                      'PPV': precision_score(y_test, y_pred, zero_division=0), 'NPV': precision_score(y_test, y_pred, pos_label=0, zero_division=0)})
        print("Best threshold:", best_threshold)
        if i in ['l1', 'l2', 'rf', 'gb']:
            y_pred = [int(p > best_threshold) for p in pred]
        else:
            y_pred = [int(expit(p) > best_threshold) for p in pred]
        fpr, tpr, thresholds = roc_curve(y_test, pred)
        print(" G-mean:", g_mean(y_test, y_pred))
        print(confusion_matrix(y_test, y_pred))
        print()
        outer_results_best_thr.append({'f1': f1_score(y_test, y_pred, zero_division=0), 'AUROC': auc(fpr, tpr), 'g-mean': g_mean(y_test, y_pred),
                                      'sensitivity': recall_score(y_test, y_pred), 'specificity': recall_score(y_test, y_pred, pos_label=0),
                                      'PPV': precision_score(y_test, y_pred, zero_division=0), 'NPV': precision_score(y_test, y_pred, pos_label=0, zero_division=0)})
    summary = summary + i + "\n  Default threshold\n    AUROC: " + str(np.array([dict['AUROC'] for dict in outer_results_def_thr]).mean()) + "  F1 score: " + str(np.array([dict['f1'] for dict in outer_results_def_thr]).mean())
    summary = summary + "\n    Sensitivity: " + str(np.array([dict['sensitivity'] for dict in outer_results_def_thr]).mean())[:6] + "  Specificity: " + str(np.array([dict['specificity'] for dict in outer_results_def_thr]).mean())[:6]
    summary = summary + "\n    PPV: " + str(np.array([dict['PPV'] for dict in outer_results_def_thr]).mean())[:6] + "  NPV: " + str(np.array([dict['NPV'] for dict in outer_results_def_thr]).mean())[:6]
    summary = summary + "\n    G-mean: " + str(np.array([dict['g-mean'] for dict in outer_results_def_thr]).mean())
    summary = summary + "\n  Best thresholds\n    AUROC: " + str(np.array([dict['AUROC'] for dict in outer_results_best_thr]).mean()) + "  F1 score: " + str(np.array([dict['f1'] for dict in outer_results_best_thr]).mean())
    summary = summary + "\n    Sensitivity: " + str(np.array([dict['sensitivity'] for dict in outer_results_best_thr]).mean())[:6] + "  Specificity: " + str(np.array([dict['specificity'] for dict in outer_results_best_thr]).mean())[:6]
    summary = summary + "\n    PPV: " + str(np.array([dict['PPV'] for dict in outer_results_best_thr]).mean())[:6] + "  NPV: " + str(np.array([dict['NPV'] for dict in outer_results_best_thr]).mean())[:6]
    summary = summary + "\n    G-mean: " + str(np.array([dict['g-mean'] for dict in outer_results_best_thr]).mean()) + "\n"

print(summary) 

**** L1 ****

Outer loop 1:

Default threshold
 G-mean: 0.6875516509523286
[[ 8  3]
 [ 7 13]]

Best threshold: 0.42
 G-mean: 0.6431456783935997
[[ 7  4]
 [ 7 13]]

Outer loop 2:

Default threshold
 G-mean: 0.7433919416750282
[[ 7  5]
 [ 1 18]]

Best threshold: 0.63
 G-mean: 0.7433919416750282
[[ 7  5]
 [ 1 18]]

Outer loop 3:

Default threshold
 G-mean: 0.659853543268954
[[ 7  4]
 [ 6 13]]

Best threshold: 0.56
 G-mean: 0.6488856845230502
[[ 8  3]
 [ 8 11]]

Outer loop 4:

Default threshold
 G-mean: 0.606976978666884
[[ 7  4]
 [ 8 11]]

Best threshold: 0.52
 G-mean: 0.606976978666884
[[ 7  4]
 [ 8 11]]

**** L2 ****

Outer loop 1:

Default threshold
 G-mean: 0.7292586522065966
[[ 9  2]
 [ 7 13]]

Best threshold: 0.57
 G-mean: 0.6708203932499369
[[ 9  2]
 [ 9 11]]

Outer loop 2:

Default threshold
 G-mean: 0.5
[[ 3  9]
 [ 0 19]]

Best threshold: 0.66
 G-mean: 0.8191780219091253
[[ 9  3]
 [ 2 17]]

Outer loop 3:

Default threshold
 G-mean: 0.30151134457776363
[[ 1 10]
 [ 0 19]]

Best thr

> The most favourable result overall appears to be the one using **L2-regularised logistic regression** with F1 score as the optimising variable. This process is then applied anew using 3-fold cross validation on the full dataset in order to produce a fitted model and a best threshold specific to this model.

In [16]:
i = 'l2'
results_best_thr = list()
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=2)
clf = GridSearchCV(pipelines[i], hyperparameters[i], cv=cv, scoring='f1', n_jobs=-1)
clf.fit(X, y)
f1_array = np.zeros(99)
for train_ix, test_is in cv.split(X, y):
    X_train, X_test = X.iloc[train_ix, :], X.iloc[test_ix, :]
    y_train, y_test = y.iloc[train_ix], y.iloc[test_ix]
    model = pipelines[i]
    model.set_params(**clf.best_params_)
    model.fit(X_train, y_train)
    if i in ['l1', 'l2', 'rf', 'gb']:
        pred = model.predict_proba(X_test)
        pred = pred[:, 1]
    else:
        pred = model.decision_function(X_test)
    for thr in range(1, 100):
        if i in ['l1', 'l2', 'rf', 'gb']:
            y_pred = [int(p > thr / 100) for p in pred]
        else:
            y_pred = [int(expit(p) > thr / 100) for p in pred]
        f1_array[int(thr - 1)] += f1_score(y_test, y_pred)
best_threshold = (np.argmax(f1_array) + 1) / 100
if i in ['svm_linear', 'svm_rbf']:
    best_threshold = logit(best_threshold)
fitted_model_ncv = clf
best_thr_ncv = best_threshold
print("Best threshold for", i.upper(), "model =", best_thr_ncv)

Best threshold for L2 model = 0.38


> **WITHOUT ABSOLUTE POWERS**
> 
> In the following sections I've repeated all the above processes with absolute powers removed from the dataset to leave only  relative powers and lateralisation variables. The presumption I've made is that different testing conditions such as different machinery and different ground lead placement may lead to variation in absolute powers, whereas if these are all varied in a similar manner then relative powers should be more comparable across testing locations.

In [17]:
np.array(df.columns)

array(['subject', 'Fp1a delta', 'Fp1a theta', 'Fp1a alpha', 'Fp1a beta',
       'Fp1a gamma', 'Fp2a delta', 'Fp2a theta', 'Fp2a alpha',
       'Fp2a beta', 'Fp2a gamma', 'F3a delta', 'F3a theta', 'F3a alpha',
       'F3a beta', 'F3a gamma', 'F4a delta', 'F4a theta', 'F4a alpha',
       'F4a beta', 'F4a gamma', 'F7a delta', 'F7a theta', 'F7a alpha',
       'F7a beta', 'F7a gamma', 'F8a delta', 'F8a theta', 'F8a alpha',
       'F8a beta', 'F8a gamma', 'C3a delta', 'C3a theta', 'C3a alpha',
       'C3a beta', 'C3a gamma', 'C4a delta', 'C4a theta', 'C4a alpha',
       'C4a beta', 'C4a gamma', 'P3a delta', 'P3a theta', 'P3a alpha',
       'P3a beta', 'P3a gamma', 'P4a delta', 'P4a theta', 'P4a alpha',
       'P4a beta', 'P4a gamma', 'O1a delta', 'O1a theta', 'O1a alpha',
       'O1a beta', 'O1a gamma', 'O2a delta', 'O2a theta', 'O2a alpha',
       'O2a beta', 'O2a gamma', 'Fp1r delta', 'Fp1r theta', 'Fp1r alpha',
       'Fp1r beta', 'Fp1r gamma', 'Fp2r delta', 'Fp2r theta',
       'Fp2r alp

In [18]:
y = df.status
X = df.drop(['subject', 'Fp1a delta', 'Fp1a theta', 'Fp1a alpha', 'Fp1a beta',
       'Fp1a gamma', 'Fp2a delta', 'Fp2a theta', 'Fp2a alpha',
       'Fp2a beta', 'Fp2a gamma', 'F3a delta', 'F3a theta', 'F3a alpha',
       'F3a beta', 'F3a gamma', 'F4a delta', 'F4a theta', 'F4a alpha',
       'F4a beta', 'F4a gamma', 'F7a delta', 'F7a theta', 'F7a alpha',
       'F7a beta', 'F7a gamma', 'F8a delta', 'F8a theta', 'F8a alpha',
       'F8a beta', 'F8a gamma', 'C3a delta', 'C3a theta', 'C3a alpha',
       'C3a beta', 'C3a gamma', 'C4a delta', 'C4a theta', 'C4a alpha',
       'C4a beta', 'C4a gamma', 'P3a delta', 'P3a theta', 'P3a alpha',
       'P3a beta', 'P3a gamma', 'P4a delta', 'P4a theta', 'P4a alpha',
       'P4a beta', 'P4a gamma', 'O1a delta', 'O1a theta', 'O1a alpha',
       'O1a beta', 'O1a gamma', 'O2a delta', 'O2a theta', 'O2a alpha',
       'O2a beta', 'O2a gamma', 'status'], axis=1)

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1234, stratify=df.status)

> Cross validation is performed as before to produce a fitted model for each algorithm.

In [20]:
fitted_models_no_ap = {}
for i in ['l1', 'l2', 'rf', 'gb', 'svm_linear', 'svm_rbf']:
    model = GridSearchCV(pipelines[i], hyperparameters[i], cv=3, scoring='f1', n_jobs=-1)
    model.fit(X_train, y_train)
    fitted_models_no_ap[i] = model
    print(i, 'has been fitted')

l1 has been fitted
l2 has been fitted
rf has been fitted
gb has been fitted
svm_linear has been fitted
svm_rbf has been fitted


In [21]:
for name, model in fitted_models_no_ap.items():
    print(name, model.best_score_)

l1 0.703545650914072
l2 0.7780345707544899
rf 0.7447089947089948
gb 0.7540650406504065
svm_linear 0.6964076858813701
svm_rbf 0.7703401360544216


> Choosing the best model below is somewhat subjective. The support vector machine using radial basis function kernel (svm_rbf) gives the best F1 score but the model is useless because every subject is predicted as being alcoholic. I've chosen the **support vector machine** using a **linear kernel (svm_linear)** as this appears to be the most clinically useful model in terms of correctly identifying a non-alcoholic subject out of the 4 models with an equal highest accuracy of 0.68.

In [22]:
print("Scores for each model:\n")
for name, model in fitted_models_no_ap.items():
    if name in ['l1', 'l2', 'rf', 'gb']:
        pred = model.predict_proba(X_test)
        pred = [p[1] for p in pred]
        y_pred = [int(p > 0.5) for p in pred]
    else:
        pred = model.decision_function(X_test)
        y_pred = [int(p > 0) for p in pred]
    fpr, tpr, thresholds = roc_curve(y_test, pred)
    print(name, "- AUROC:", auc(fpr, tpr), " F1 score:", f1_score(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))
    print()

Scores for each model:

l1 - AUROC: 0.7818181818181819  F1 score: 0.6666666666666667
[[ 9  2]
 [ 9 11]]
              precision    recall  f1-score   support

           0       0.50      0.82      0.62        11
           1       0.85      0.55      0.67        20

    accuracy                           0.65        31
   macro avg       0.67      0.68      0.64        31
weighted avg       0.72      0.65      0.65        31


l2 - AUROC: 0.7363636363636364  F1 score: 0.761904761904762
[[ 5  6]
 [ 4 16]]
              precision    recall  f1-score   support

           0       0.56      0.45      0.50        11
           1       0.73      0.80      0.76        20

    accuracy                           0.68        31
   macro avg       0.64      0.63      0.63        31
weighted avg       0.67      0.68      0.67        31


rf - AUROC: 0.6522727272727272  F1 score: 0.7368421052631577
[[ 7  4]
 [ 6 14]]
              precision    recall  f1-score   support

           0       0.54   

  _warn_prf(average, modifier, msg_start, len(result))


> Nested cross validation is performed as previously.

In [23]:
summary = "Mean scores of best models:\n\n"
for i in ['l1', 'l2', 'rf', 'gb', 'svm_linear', 'svm_rbf']:
    print("****", i.upper(), "****\n")
    cv_outer = StratifiedKFold(n_splits=4, shuffle=True, random_state=1)
    outer_results_def_thr = list()
    outer_results_best_thr = list()
    outloop = 0
    for train_ix, test_ix in cv_outer.split(X, y):
        outloop += 1
        X_train, X_test = X.iloc[train_ix, :], X.iloc[test_ix, :]
        y_train, y_test = y.iloc[train_ix], y.iloc[test_ix]
        cv_inner = StratifiedKFold(n_splits=3, shuffle=True, random_state=1)
        clf = GridSearchCV(pipelines[i], hyperparameters[i], cv=cv_inner, scoring='f1', n_jobs=-1)
        clf.fit(X_train, y_train)
        f1_array = np.zeros(99)
        for itrain_ix, itest_ix in cv_inner.split(X_train, y_train):
            iX_train, iX_test = X_train.iloc[itrain_ix, :], X_train.iloc[itest_ix, :]
            iy_train, iy_test = y_train.iloc[itrain_ix], y_train.iloc[itest_ix]
            model = pipelines[i]
            model.set_params(**clf.best_params_)
            model.fit(iX_train, iy_train)
            if i in ['l1', 'l2', 'rf', 'gb']:
                pred = model.predict_proba(iX_test)
                pred = pred[:, 1]
            else:
                pred = model.decision_function(iX_test)
            for thr in range(1, 100):
                if i in ['l1', 'l2', 'rf', 'gb']:
                    y_pred = [int(p > thr / 100) for p in pred]
                else:
                    y_pred = [int(expit(p) > thr / 100) for p in pred]
                f1_array[int(thr - 1)] += f1_score(iy_test, y_pred, zero_division=0)
        best_threshold = (np.argmax(f1_array) + 1) / 100
        if i in ['svm_linear', 'svm_rbf']:
            best_threshold = logit(best_threshold)
        if i in ['l1', 'l2', 'rf', 'gb']:
            pred = clf.predict_proba(X_test)
            pred = pred[:, 1]
            y_pred = [int(p > 0.5) for p in pred]
        else:
            pred = clf.decision_function(X_test)
            y_pred = [int(p > 0) for p in pred]
        print("Outer loop " + str(outloop) + ":\n")
        print("Default threshold")
        fpr, tpr, thresholds = roc_curve(y_test, pred)
        print(" F1 score:", f1_score(y_test, y_pred))
        print(confusion_matrix(y_test, y_pred))
        print()
        outer_results_def_thr.append({'f1': f1_score(y_test, y_pred, zero_division=0), 'AUROC': auc(fpr, tpr),
                                      'sensitivity': recall_score(y_test, y_pred), 'specificity': recall_score(y_test, y_pred, pos_label=0),
                                      'PPV': precision_score(y_test, y_pred, zero_division=0), 'NPV': precision_score(y_test, y_pred, pos_label=0, zero_division=0)})
        print("Best threshold:", best_threshold)
        if i in ['l1', 'l2', 'rf', 'gb']:
            y_pred = [int(p > best_threshold) for p in pred]
        else:
            y_pred = [int(expit(p) > best_threshold) for p in pred]
        fpr, tpr, thresholds = roc_curve(y_test, pred)
        print(" F1 score:", f1_score(y_test, y_pred))
        print(confusion_matrix(y_test, y_pred))
        print()
        outer_results_best_thr.append({'f1': f1_score(y_test, y_pred, zero_division=0), 'AUROC': auc(fpr, tpr),
                                      'sensitivity': recall_score(y_test, y_pred), 'specificity': recall_score(y_test, y_pred, pos_label=0),
                                      'PPV': precision_score(y_test, y_pred, zero_division=0), 'NPV': precision_score(y_test, y_pred, pos_label=0, zero_division=0)})
    summary = summary + i + "\n  Default threshold\n    AUROC: " + str(np.array([dict['AUROC'] for dict in outer_results_def_thr]).mean()) + "  F1 score: " + str(np.array([dict['f1'] for dict in outer_results_def_thr]).mean())
    summary = summary + "\n    Sensitivity: " + str(np.array([dict['sensitivity'] for dict in outer_results_def_thr]).mean())[:6] + "  Specificity: " + str(np.array([dict['specificity'] for dict in outer_results_def_thr]).mean())[:6]
    summary = summary + "\n    PPV: " + str(np.array([dict['PPV'] for dict in outer_results_def_thr]).mean())[:6] + "  NPV: " + str(np.array([dict['NPV'] for dict in outer_results_def_thr]).mean())[:6]
    summary = summary + "\n  Best thresholds\n    AUROC: " + str(np.array([dict['AUROC'] for dict in outer_results_best_thr]).mean()) + "  F1 score: " + str(np.array([dict['f1'] for dict in outer_results_best_thr]).mean())
    summary = summary + "\n    Sensitivity: " + str(np.array([dict['sensitivity'] for dict in outer_results_best_thr]).mean())[:6] + "  Specificity: " + str(np.array([dict['specificity'] for dict in outer_results_best_thr]).mean())[:6]
    summary = summary + "\n    PPV: " + str(np.array([dict['PPV'] for dict in outer_results_best_thr]).mean())[:6] + "  NPV: " + str(np.array([dict['NPV'] for dict in outer_results_best_thr]).mean())[:6] + "\n"
print(summary) 

**** L1 ****

Outer loop 1:

Default threshold
 F1 score: 0.631578947368421
[[ 5  6]
 [ 8 12]]

Best threshold: 0.01
 F1 score: 0.7111111111111111
[[ 2  9]
 [ 4 16]]

Outer loop 2:

Default threshold
 F1 score: 0.8292682926829269
[[ 7  5]
 [ 2 17]]

Best threshold: 0.01
 F1 score: 0.76
[[ 0 12]
 [ 0 19]]

Outer loop 3:

Default threshold
 F1 score: 0.8421052631578947
[[ 8  3]
 [ 3 16]]

Best threshold: 0.11
 F1 score: 0.7755102040816326
[[ 0 11]
 [ 0 19]]

Outer loop 4:

Default threshold
 F1 score: 0.7027027027027027
[[ 6  5]
 [ 6 13]]

Best threshold: 0.01
 F1 score: 0.7500000000000001
[[ 5  6]
 [ 4 15]]

**** L2 ****

Outer loop 1:

Default threshold
 F1 score: 0.7441860465116279
[[ 4  7]
 [ 4 16]]

Best threshold: 0.46
 F1 score: 0.7555555555555556
[[ 3  8]
 [ 3 17]]

Outer loop 2:

Default threshold
 F1 score: 0.76
[[ 0 12]
 [ 0 19]]

Best threshold: 0.01
 F1 score: 0.76
[[ 0 12]
 [ 0 19]]

Outer loop 3:

Default threshold
 F1 score: 0.8085106382978724
[[ 2  9]
 [ 0 19]]

Best thr

In [24]:
def g_mean(y_test, y_pred):
    g_mean = np.sqrt(recall_score(y_test, y_pred) * recall_score(y_test, y_pred, pos_label=0))
    return g_mean

summary = "Mean scores of best models:\n\n"

for i in ['l1', 'l2', 'rf', 'gb', 'svm_linear', 'svm_rbf']:
    print("****", i.upper(), "****\n")
    cv_outer = StratifiedKFold(n_splits=4, shuffle=True, random_state=1)
    outer_results_def_thr = list()
    outer_results_best_thr = list()
    outloop = 0
    for train_ix, test_ix in cv_outer.split(X, y):
        outloop += 1
        X_train, X_test = X.iloc[train_ix, :], X.iloc[test_ix, :]
        y_train, y_test = y.iloc[train_ix], y.iloc[test_ix]
        cv_inner = StratifiedKFold(n_splits=3, shuffle=True, random_state=1)
        clf = GridSearchCV(pipelines[i], hyperparameters[i], cv=cv_inner, scoring='roc_auc', n_jobs=-1)
        clf.fit(X_train, y_train)
        gm_array = np.zeros(99)
        for itrain_ix, itest_ix in cv_inner.split(X_train, y_train):
            iX_train, iX_test = X_train.iloc[itrain_ix, :], X_train.iloc[itest_ix, :]
            iy_train, iy_test = y_train.iloc[itrain_ix], y_train.iloc[itest_ix]
            model = pipelines[i]
            model.set_params(**clf.best_params_)
            model.fit(iX_train, iy_train)
            if i in ['l1', 'l2', 'rf', 'gb']:
                pred = model.predict_proba(iX_test)
                pred = pred[:, 1]
            else:
                pred = model.decision_function(iX_test)
            for thr in range(1, 100):
                if i in ['l1', 'l2', 'rf', 'gb']:
                    y_pred = [int(p > thr / 100) for p in pred]
                else:
                    y_pred = [int(expit(p) > thr / 100) for p in pred]
                gm_array[int(thr - 1)] += g_mean(iy_test, y_pred)
        best_threshold = (np.argmax(gm_array) + 1) / 100
        if i in ['svm_linear', 'svm_rbf']:
            best_threshold = logit(best_threshold)
        if i in ['l1', 'l2', 'rf', 'gb']:
            pred = clf.predict_proba(X_test)
            pred = pred[:, 1]
            y_pred = [int(p > 0.5) for p in pred]
        else:
            pred = clf.decision_function(X_test)
            y_pred = [int(p > 0) for p in pred]
        print("Outer loop " + str(outloop) + ":\n")
        print("Default threshold")
        fpr, tpr, thresholds = roc_curve(y_test, pred)
        print(" G-mean:", g_mean(y_test, y_pred))
        print(confusion_matrix(y_test, y_pred))
        print()
        outer_results_def_thr.append({'f1': f1_score(y_test, y_pred, zero_division=0), 'AUROC': auc(fpr, tpr), 'g-mean': g_mean(y_test, y_pred),
                                      'sensitivity': recall_score(y_test, y_pred), 'specificity': recall_score(y_test, y_pred, pos_label=0),
                                      'PPV': precision_score(y_test, y_pred, zero_division=0), 'NPV': precision_score(y_test, y_pred, pos_label=0, zero_division=0)})
        print("Best threshold:", best_threshold)
        if i in ['l1', 'l2', 'rf', 'gb']:
            y_pred = [int(p > best_threshold) for p in pred]
        else:
            y_pred = [int(expit(p) > best_threshold) for p in pred]
        fpr, tpr, thresholds = roc_curve(y_test, pred)
        print(" G-mean:", g_mean(y_test, y_pred))
        print(confusion_matrix(y_test, y_pred))
        print()
        outer_results_best_thr.append({'f1': f1_score(y_test, y_pred, zero_division=0), 'AUROC': auc(fpr, tpr), 'g-mean': g_mean(y_test, y_pred),
                                      'sensitivity': recall_score(y_test, y_pred), 'specificity': recall_score(y_test, y_pred, pos_label=0),
                                      'PPV': precision_score(y_test, y_pred, zero_division=0), 'NPV': precision_score(y_test, y_pred, pos_label=0, zero_division=0)})
    summary = summary + i + "\n  Default threshold\n    AUROC: " + str(np.array([dict['AUROC'] for dict in outer_results_def_thr]).mean()) + "  F1 score: " + str(np.array([dict['f1'] for dict in outer_results_def_thr]).mean())
    summary = summary + "\n    Sensitivity: " + str(np.array([dict['sensitivity'] for dict in outer_results_def_thr]).mean())[:6] + "  Specificity: " + str(np.array([dict['specificity'] for dict in outer_results_def_thr]).mean())[:6]
    summary = summary + "\n    PPV: " + str(np.array([dict['PPV'] for dict in outer_results_def_thr]).mean())[:6] + "  NPV: " + str(np.array([dict['NPV'] for dict in outer_results_def_thr]).mean())[:6]
    summary = summary + "\n    G-mean: " + str(np.array([dict['g-mean'] for dict in outer_results_def_thr]).mean())
    summary = summary + "\n  Best thresholds\n    AUROC: " + str(np.array([dict['AUROC'] for dict in outer_results_best_thr]).mean()) + "  F1 score: " + str(np.array([dict['f1'] for dict in outer_results_best_thr]).mean())
    summary = summary + "\n    Sensitivity: " + str(np.array([dict['sensitivity'] for dict in outer_results_best_thr]).mean())[:6] + "  Specificity: " + str(np.array([dict['specificity'] for dict in outer_results_best_thr]).mean())[:6]
    summary = summary + "\n    PPV: " + str(np.array([dict['PPV'] for dict in outer_results_best_thr]).mean())[:6] + "  NPV: " + str(np.array([dict['NPV'] for dict in outer_results_best_thr]).mean())[:6]
    summary = summary + "\n    G-mean: " + str(np.array([dict['g-mean'] for dict in outer_results_best_thr]).mean()) + "\n"

print(summary) 

**** L1 ****

Outer loop 1:

Default threshold
 G-mean: 0.543557306504609
[[ 5  6]
 [ 7 13]]

Best threshold: 0.79
 G-mean: 0.5222329678670935
[[ 5  6]
 [ 8 12]]

Outer loop 2:

Default threshold
 G-mean: 0.7224471084871221
[[ 7  5]
 [ 2 17]]

Best threshold: 0.61
 G-mean: 0.7163503994113789
[[ 9  3]
 [ 6 13]]

Outer loop 3:

Default threshold
 G-mean: 0.7825855808712295
[[ 8  3]
 [ 3 16]]

Best threshold: 0.57
 G-mean: 0.7320417798624029
[[ 8  3]
 [ 5 14]]

Outer loop 4:

Default threshold
 G-mean: 0.5357997197768198
[[ 5  6]
 [ 7 12]]

Best threshold: 0.31
 G-mean: 0.618688224889746
[[ 5  6]
 [ 3 16]]

**** L2 ****

Outer loop 1:

Default threshold
 G-mean: 0.48617243480439776
[[ 4  7]
 [ 7 13]]

Best threshold: 0.7
 G-mean: 0.5477225575051661
[[ 6  5]
 [ 9 11]]

Outer loop 2:

Default threshold
 G-mean: 0.0
[[ 0 12]
 [ 0 19]]

Best threshold: 0.63
 G-mean: 0.8191780219091253
[[ 9  3]
 [ 2 17]]

Outer loop 3:

Default threshold
 G-mean: 0.0
[[ 0 11]
 [ 0 19]]

Best threshold: 0.62
 G

> Again the choice of a best process is subjective, and **L1-regularised linear regression** optimised for AUROC (and with thresholds optimised for geometric mean) is chosen on the basis of having a sensitivity and a specificity which are both of some clinical use.

In [25]:
i = 'l1'
results_best_thr = list()
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=2)
clf = GridSearchCV(pipelines[i], hyperparameters[i], cv=cv, scoring='roc_auc', n_jobs=-1)
clf.fit(X, y)
gm_array = np.zeros(99)
for train_ix, test_is in cv.split(X, y):
    X_train, X_test = X.iloc[train_ix, :], X.iloc[test_ix, :]
    y_train, y_test = y.iloc[train_ix], y.iloc[test_ix]
    model = pipelines[i]
    model.set_params(**clf.best_params_)
    model.fit(X_train, y_train)
    if i in ['l1', 'l2', 'rf', 'gb']:
        pred = model.predict_proba(X_test)
        pred = pred[:, 1]
    else:
        pred = model.decision_function(X_test)
    for thr in range(1, 100):
        if i in ['l1', 'l2', 'rf', 'gb']:
            y_pred = [int(p > thr / 100) for p in pred]
        else:
            y_pred = [int(expit(p) > thr / 100) for p in pred]
        gm_array[int(thr - 1)] += g_mean(y_test, y_pred)
best_threshold = (np.argmax(f1_array) + 1) / 100
if i in ['svm_linear', 'svm_rbf']:
    best_threshold = logit(best_threshold)
fitted_model_no_ap_ncv = clf
best_thr_no_ap_ncv = best_threshold
print("Best threshold for", i.upper(), "model =", best_thr_no_ap_ncv)

Best threshold for L1 model = 0.52


> Hyperparameters of the best models are shown below, then the models and best thresholds are saved for further use using the Pickle module.

In [26]:
fitted_models['l2'].best_estimator_

Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('logisticregression',
                 LogisticRegression(C=0.0031622776601683794, class_weight=None,
                                    dual=False, fit_intercept=True,
                                    intercept_scaling=1, l1_ratio=None,
                                    max_iter=100, multi_class='auto',
                                    n_jobs=None, penalty='l2', random_state=123,
                                    solver='lbfgs', tol=0.0001, verbose=0,
                                    warm_start=False))],
         verbose=False)

In [27]:
fitted_model_ncv.best_estimator_

Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('logisticregression',
                 LogisticRegression(C=0.01, class_weight=None, dual=False,
                                    fit_intercept=True, intercept_scaling=1,
                                    l1_ratio=None, max_iter=100,
                                    multi_class='auto', n_jobs=None,
                                    penalty='l2', random_state=123,
                                    solver='lbfgs', tol=0.0001, verbose=0,
                                    warm_start=False))],
         verbose=False)

In [28]:
fitted_models_no_ap['svm_linear'].best_estimator_

Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('linearsvc',
                 LinearSVC(C=0.1, class_weight=None, dual=True,
                           fit_intercept=True, intercept_scaling=1,
                           loss='squared_hinge', max_iter=1000,
                           multi_class='ovr', penalty='l2', random_state=123,
                           tol=0.0001, verbose=0))],
         verbose=False)

In [29]:
fitted_model_no_ap_ncv.best_estimator_

Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('logisticregression',
                 LogisticRegression(C=1.0, class_weight=None, dual=False,
                                    fit_intercept=True, intercept_scaling=1,
                                    l1_ratio=None, max_iter=100,
                                    multi_class='auto', n_jobs=None,
                                    penalty='l1', random_state=123,
                                    solver='liblinear', tol=0.0001, verbose=0,
                                    warm_start=False))],
         verbose=False)

In [30]:
with open("eeg_model.pkl", "wb") as f:
    pickle.dump(fitted_models['l2'].best_estimator_, f)
with open("eeg_model_ncv.pkl", "wb") as f:
    pickle.dump(fitted_model_ncv.best_estimator_, f)
with open("eeg_best_thr_ncv.pkl", "wb") as f:
    pickle.dump(best_thr_ncv, f)
with open("eeg_model_no_ap.pkl", "wb") as f:
    pickle.dump(fitted_models_no_ap['svm_linear'].best_estimator_, f)
with open("eeg_model_no_ap_ncv.pkl", "wb") as f:
    pickle.dump(fitted_model_no_ap_ncv.best_estimator_, f)
with open("eeg_best_thr_no_ap_ncv.pkl", "wb") as f:
    pickle.dump(best_thr_no_ap_ncv, f)