In [1]:
import numpy as np
from joblib import load
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report

In [2]:
class MyMinMax:
    def __init__(self, axis):
        self.sc = MinMaxScaler()
        self.axis = axis

    def fit(self, X):
        if self.axis==1:
            self.sc = self.sc.fit(X.transpose())
        elif self.axis==0:
            self.sc = self.sc.fit(X)
        return self.sc

    def transform(self, X):
        if self.axis==1:
            Xn = self.sc.transform(X.transpose()).transpose()
        elif self.axis==0:
            Xn = self.sc.transform(X)
        return Xn

    def fit_transform(self, X):
        if self.axis==1:
            self.sc = self.sc.fit(X.transpose())
            Xn = self.sc.transform(X.transpose()).transpose()
        elif self.axis==0:
            self.sc = self.sc.fit(X)
            Xn = self.sc.transform(X)
        return Xn

In [3]:
def mynormalize(df, allfeats=False):
    scalersdict = {}
    if allfeats:
        sc = MyMinMax(axis=1)
        XN = sc.fit_transform(df.values)
        scalersdict['allfeat'] = sc
    else:
        morph_feats = ['area', 'curv', 'thickness', 'volume']
        XN = np.array([], dtype=np.double)
        for ind, morph_feat in enumerate(morph_feats):
            morph_cols = [col for col in df.columns if morph_feat in col]
            X_morph = df.loc[:, morph_cols].values
            Xn = (X_morph-np.min(X_morph, axis=1).reshape(-1,1))/(np.max(X_morph, axis=1).reshape(-1,1)-np.min(X_morph, axis=1).reshape(-1,1))
            if ind == 0:
                XN = np.append(XN, Xn).reshape(Xn.shape[0], -1)
            else:
                XN = np.concatenate([XN, Xn], axis=1)
    return XN


## Goal of this notebook:
For each normalization method:<br>
$\;\;\;\;\;$ For each RFE classifier core:<br>
$\;\;\;\;\;$ $\;\;\;\;\;$ For each data matrix (corr; uncorr; ucorrleft; ucorrright):<br>
$\;\;\;\;\;$ $\;\;\;\;\;$ $\;\;\;\;\;$ 1. Find the classifier with highest performance <br>
$\;\;\;\;\;$ $\;\;\;\;\;$ $\;\;\;\;\;$ 2. Use this classifier to train on all the training set<br>
$\;\;\;\;\;$ $\;\;\;\;\;$ $\;\;\;\;\;$ 3. Measure the performance on the testing set<br><br>

Measure the performance in the testing set:
1. Load the testing set
2. Get the normalization object corresponding to the current normalization method
3. Normalize the testing set using the normalization object of the training set
4. Load the rfe+(RFE classifier core)
5. Get the selected features used for learning the best ML model
6. Select those features out of the normalized testing set
7. Predict the labels of the output matrix from step 6



### Logistic regression l1-norm

In [4]:
clf = load('./Final_Results/ML/clf_lg1_train.joblib')
clf_corr = load('./Final_Results/ML/clf_lg1_train_corr.joblib')
clf_corr_l = load('./Final_Results/ML/clf_lg1_train_corr_l.joblib')
clf_corr_r = load('./Final_Results/ML/clf_lg1_train_corr_r.joblib')

In [5]:
print("clf_lg1_train")
print(clf['lSVM'].best_score_)
print(clf['pagg'].best_score_)
print(clf['lg'].best_score_)
print(clf['XGB'].best_score_)
print(clf['GNB'].best_score_)
print(clf['SVC'].best_score_)
print(clf['Rf'].best_score_)
print(clf['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_lg1_train_corr")
print(clf_corr['lSVM'].best_score_)
print(clf_corr['pagg'].best_score_)
print(clf_corr['lg'].best_score_)
print(clf_corr['XGB'].best_score_)
print(clf_corr['GNB'].best_score_)
print(clf_corr['SVC'].best_score_)
print(clf_corr['Rf'].best_score_)
print(clf_corr['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_lg1_train_corr_l")
print(clf_corr_l['lSVM'].best_score_)
print(clf_corr_l['pagg'].best_score_)
print(clf_corr_l['lg'].best_score_)
print(clf_corr_l['XGB'].best_score_)
print(clf_corr_l['GNB'].best_score_)
print(clf_corr_l['SVC'].best_score_)
print(clf_corr_l['Rf'].best_score_)
print(clf_corr_l['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_lg1_train_corr_r")
print(clf_corr_r['lSVM'].best_score_)
print(clf_corr_r['pagg'].best_score_)
print(clf_corr_r['lg'].best_score_)
print(clf_corr_r['XGB'].best_score_)
print(clf_corr_r['GNB'].best_score_)
print(clf_corr_r['SVC'].best_score_)
print(clf_corr_r['Rf'].best_score_)
print(clf_corr_r['nn'].best_score_)

clf_lg1_train
0.6062994350282487
0.5453107344632768
0.6079378531073447
0.5963276836158192
0.5971186440677967
0.6128813559322035
0.5843502824858757
0.6249152542372881
0000000000000000000000000000000000000000000
clf_lg1_train_corr
0.5944350282485875
0.5651694915254237
0.5995197740112995
0.6412429378531074
0.5615254237288136
0.6079661016949153
0.6196327683615819
0.608361581920904
0000000000000000000000000000000000000000000
clf_lg1_train_corr_l
0.5709039548022599
0.5407909604519774
0.5742372881355933
0.557542372881356
0.546186440677966
0.5742090395480226
0.5779378531073446
0.5813841807909604
0000000000000000000000000000000000000000000
clf_lg1_train_corr_r
0.5745197740112994
0.5503954802259887
0.5761864406779661
0.5865254237288136
0.5727683615819209
0.5748587570621468
0.5879661016949151
0.5812711864406779


Based on the current results, I am going to proceed with "lg1_train_corr", classifier XGB

In [8]:
selected_clc = clf_corr['XGB'].best_estimator_
clf_corr['XGB'].cv_results_

{'mean_fit_time': array([0.17755857, 0.19208336, 0.01805224, ..., 0.07403364, 0.01862526,
        0.08339696]),
 'std_fit_time': array([0.02003806, 0.03068035, 0.00342025, ..., 0.00885162, 0.00110732,
        0.00612191]),
 'mean_score_time': array([0.00246334, 0.00265994, 0.00179572, ..., 0.00243511, 0.00194354,
        0.00157528]),
 'std_score_time': array([4.50125286e-04, 3.67582214e-04, 3.19151972e-04, ...,
        3.24667023e-04, 1.73267782e-04, 5.49364005e-05]),
 'param_reg_lambda': masked_array(data=[10, 0, 0.001, ..., 0, 0.001, 0],
              mask=[False, False, False, ..., False, False, False],
        fill_value='?',
             dtype=object),
 'param_reg_alpha': masked_array(data=[0.001, 10, 0, ..., 0.5, 0.5, 1],
              mask=[False, False, False, ..., False, False, False],
        fill_value='?',
             dtype=object),
 'param_min_child_weight': masked_array(data=[0.5, 1, 0.01, ..., 10, 0.5, 10],
              mask=[False, False, False, ..., False, False, Fa

In [10]:
# Load test dataset
df_test = pd.read_csv('./Final_Results/INITIAL_SPLIT/test_fullbrain.csv', index_col=0)
print(df_test.shape)
print(df_test['labels'].value_counts())

(67, 545)
0    36
1    31
Name: labels, dtype: int64


In [12]:
print('baseline score: ',36/(31+36))
XN = mynormalize(df_test, allfeats=False)

baseline score:  0.5373134328358209


In [13]:
# Load the corresponding rfe object
selected_rfe1 = load('./Final_Results/FS/rfetrain_corr_lg1.joblib')
Xtest = XN[:, np.where(selected_rfe1.support_)[0]]
Xtest.shape

(67, 11)

In [14]:
# Load training dataset to train the current model using all training set
Xtrain = np.load('./Final_Results/FS/Xtrain_corr_lg1.npy')
ytrain = np.load('./Final_Results/FS/ytrain_corr.npy')
Xtrain.shape, ytrain.shape

((597, 11), (597,))

In [15]:
print(classification_report(df_test['labels'].values, selected_clc.predict(Xtest)))

              precision    recall  f1-score   support

           0       0.54      1.00      0.70        36
           1       0.00      0.00      0.00        31

    accuracy                           0.54        67
   macro avg       0.27      0.50      0.35        67
weighted avg       0.29      0.54      0.38        67



  _warn_prf(average, modifier, msg_start, len(result))


In [16]:
selected_clc = selected_clc.fit(Xtrain, ytrain)
print(classification_report(ytrain, selected_clc.predict(Xtrain)))

              precision    recall  f1-score   support

           0       0.81      0.82      0.82       300
           1       0.82      0.80      0.81       297

    accuracy                           0.81       597
   macro avg       0.81      0.81      0.81       597
weighted avg       0.81      0.81      0.81       597



In [17]:
print(classification_report(df_test['labels'].values, selected_clc.predict(Xtest)))

              precision    recall  f1-score   support

           0       0.53      0.64      0.58        36
           1       0.46      0.35      0.40        31

    accuracy                           0.51        67
   macro avg       0.50      0.50      0.49        67
weighted avg       0.50      0.51      0.50        67



### Logistic regression l2-norm

In [21]:
clf = load('./Final_Results/ML/clf_lg2_train.joblib')
clf_corr = load('./Final_Results/ML/clf_lg2_train_corr.joblib')
clf_corr_l = load('./Final_Results/ML/clf_lg2_train_corr_l.joblib')
clf_corr_r = load('./Final_Results/ML/clf_lg2_train_corr_r.joblib')

In [22]:
print("clf_lg2_train")
print(clf['lSVM'].best_score_)
print(clf['pagg'].best_score_)
print(clf['lg'].best_score_)
print(clf['XGB'].best_score_)
print(clf['GNB'].best_score_)
print(clf['SVC'].best_score_)
print(clf['Rf'].best_score_)
print(clf['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_lg2_train_corr")
print(clf_corr['lSVM'].best_score_)
print(clf_corr['pagg'].best_score_)
print(clf_corr['lg'].best_score_)
print(clf_corr['XGB'].best_score_)
print(clf_corr['GNB'].best_score_)
print(clf_corr['SVC'].best_score_)
print(clf_corr['Rf'].best_score_)
print(clf_corr['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_lg2_train_corr_l")
print(clf_corr_l['lSVM'].best_score_)
print(clf_corr_l['pagg'].best_score_)
print(clf_corr_l['lg'].best_score_)
print(clf_corr_l['XGB'].best_score_)
print(clf_corr_l['GNB'].best_score_)
print(clf_corr_l['SVC'].best_score_)
print(clf_corr_l['Rf'].best_score_)
print(clf_corr_l['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_lg2_train_corr_r")
print(clf_corr_r['lSVM'].best_score_)
print(clf_corr_r['pagg'].best_score_)
print(clf_corr_r['lg'].best_score_)
print(clf_corr_r['XGB'].best_score_)
print(clf_corr_r['GNB'].best_score_)
print(clf_corr_r['SVC'].best_score_)
print(clf_corr_r['Rf'].best_score_)
print(clf_corr_r['nn'].best_score_)

clf_lg2_train
0.6535310734463277
0.5135593220338983
0.6433898305084746
0.6364406779661016
0.545
0.6450564971751412
0.5795762711864407
0.628135593220339
0000000000000000000000000000000000000000000
clf_lg2_train_corr
0.6231920903954802
0.5349999999999999
0.6198587570621469
0.6130225988700565
0.5448587570621469
0.6180508474576272
0.6060169491525423
0.612994350282486
0000000000000000000000000000000000000000000
clf_lg2_train_corr_l
0.5844632768361582
0.5463559322033898
0.5861864406779661
0.598135593220339
0.5564124293785311
0.5980508474576272
0.5828813559322035
0.5929378531073446
0000000000000000000000000000000000000000000
clf_lg2_train_corr_r
0.5979378531073446
0.5527401129943502
0.5961299435028249
0.6046610169491525
0.5314689265536723
0.6348305084745762
0.587542372881356
0.6055932203389831


In [34]:
selected_clc1 = clf['lSVM'].best_estimator_
selected_clc2 = clf['SVC'].best_estimator_
selected_clc3 = clf_corr['lSVM'].best_estimator_
print(f'selected classifier 1: lSVM_alldata with accuracy {clf["lSVM"].best_score_}')
print(f'selected classifier 2: SVC_alldata with accuracy {clf["SVC"].best_score_}')
print(f'selected classifier 3: lSVM_corr with accuracy {clf_corr["lSVM"].best_score_}')

selected classifier 1: lSVM_alldata with accuracy 0.6535310734463277
selected classifier 2: SVC_alldata with accuracy 0.6450564971751412
selected classifier 3: lSVM_corr with accuracy 0.6231920903954802


In [35]:
# Load test dataset
df_test = pd.read_csv('./Final_Results/INITIAL_SPLIT/test_fullbrain.csv', index_col=0)
print(df_test.shape)
print(df_test['labels'].value_counts())

print('baseline score: ',36/(31+36))
XN = mynormalize(df_test, allfeats=False)

# Load the corresponding rfe object
#clc1 & 2
selected_rfe12 = load('./Final_Results/FS/rfetrain_lg2.joblib')
Xtest12 = XN[:, np.where(selected_rfe12.support_)[0]]

# clc3
selected_rfe3 = load('./Final_Results/FS/rfetrain_corr_lg2.joblib')
Xtest3 = XN[:, np.where(selected_rfe3.support_)[0]]
Xtest.shape

(67, 545)
0    36
1    31
Name: labels, dtype: int64
baseline score:  0.5373134328358209


(67, 11)

In [40]:
# Load training dataset to train the current model using all training set
# clc1&2
Xtrain12 = np.load('./Final_Results/FS/Xtrain_lg2.npy')

# clc3
Xtrain3 = np.load('./Final_Results/FS/Xtrain_corr_lg2.npy')

ytrain = np.load('./Final_Results/FS/ytrain_corr.npy')


In [37]:
# clc1
print(classification_report(df_test['labels'].values, selected_clc1.predict(Xtest12)))

              precision    recall  f1-score   support

           0       0.59      0.64      0.61        36
           1       0.54      0.48      0.51        31

    accuracy                           0.57        67
   macro avg       0.56      0.56      0.56        67
weighted avg       0.56      0.57      0.56        67



In [38]:
# clc2
print(classification_report(df_test['labels'].values, selected_clc2.predict(Xtest12)))

              precision    recall  f1-score   support

           0       0.56      0.61      0.59        36
           1       0.50      0.45      0.47        31

    accuracy                           0.54        67
   macro avg       0.53      0.53      0.53        67
weighted avg       0.53      0.54      0.53        67



In [39]:
# clc3
print(classification_report(df_test['labels'].values, selected_clc3.predict(Xtest3)))

              precision    recall  f1-score   support

           0       0.56      0.97      0.71        36
           1       0.75      0.10      0.17        31

    accuracy                           0.57        67
   macro avg       0.65      0.53      0.44        67
weighted avg       0.65      0.57      0.46        67



In [41]:
selected_clc1 = selected_clc1.fit(Xtrain12, ytrain)
selected_clc2 = selected_clc2.fit(Xtrain12, ytrain)
selected_clc3 = selected_clc3.fit(Xtrain3, ytrain)


In [42]:
# clc1
print(classification_report(df_test['labels'].values, selected_clc1.predict(Xtest12)))

              precision    recall  f1-score   support

           0       0.59      0.64      0.61        36
           1       0.54      0.48      0.51        31

    accuracy                           0.57        67
   macro avg       0.56      0.56      0.56        67
weighted avg       0.56      0.57      0.56        67



In [43]:
# clc2
print(classification_report(df_test['labels'].values, selected_clc2.predict(Xtest12)))

              precision    recall  f1-score   support

           0       0.56      0.61      0.59        36
           1       0.50      0.45      0.47        31

    accuracy                           0.54        67
   macro avg       0.53      0.53      0.53        67
weighted avg       0.53      0.54      0.53        67



In [44]:
# clc3
print(classification_report(df_test['labels'].values, selected_clc3.predict(Xtest3)))

              precision    recall  f1-score   support

           0       0.56      0.97      0.71        36
           1       0.75      0.10      0.17        31

    accuracy                           0.57        67
   macro avg       0.65      0.53      0.44        67
weighted avg       0.65      0.57      0.46        67



### Linear SVM

In [45]:
clf = load('./Final_Results/ML/clf_svm_train.joblib')
clf_corr = load('./Final_Results/ML/clf_svm_train_corr.joblib')
clf_corr_l = load('./Final_Results/ML/clf_svm_train_corr_l.joblib')
clf_corr_r = load('./Final_Results/ML/clf_svm_train_corr_r.joblib')

In [46]:
print("clf_svm_train")
print(clf['lSVM'].best_score_)
print(clf['pagg'].best_score_)
print(clf['lg'].best_score_)
print(clf['XGB'].best_score_)
print(clf['GNB'].best_score_)
print(clf['SVC'].best_score_)
print(clf['Rf'].best_score_)
print(clf['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_svm_train_corr")
print(clf_corr['lSVM'].best_score_)
print(clf_corr['pagg'].best_score_)
print(clf_corr['lg'].best_score_)
print(clf_corr['XGB'].best_score_)
print(clf_corr['GNB'].best_score_)
print(clf_corr['SVC'].best_score_)
print(clf_corr['Rf'].best_score_)
print(clf_corr['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_svm_train_corr_l")
print(clf_corr_l['lSVM'].best_score_)
print(clf_corr_l['pagg'].best_score_)
print(clf_corr_l['lg'].best_score_)
print(clf_corr_l['XGB'].best_score_)
print(clf_corr_l['GNB'].best_score_)
print(clf_corr_l['SVC'].best_score_)
print(clf_corr_l['Rf'].best_score_)
print(clf_corr_l['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_svm_train_corr_r")
print(clf_corr_r['lSVM'].best_score_)
print(clf_corr_r['pagg'].best_score_)
print(clf_corr_r['lg'].best_score_)
print(clf_corr_r['XGB'].best_score_)
print(clf_corr_r['GNB'].best_score_)
print(clf_corr_r['SVC'].best_score_)
print(clf_corr_r['Rf'].best_score_)
print(clf_corr_r['nn'].best_score_)

clf_lg2_train
0.6329943502824859
0.5241525423728813
0.6380508474576271
0.6149435028248588
0.5632203389830509
0.651723163841808
0.5977966101694915
0.6200564971751412
0000000000000000000000000000000000000000000
clf_lg2_train_corr
0.6331638418079096
0.518502824858757
0.6466666666666667
0.6045197740112994
0.5331638418079097
0.6518079096045198
0.5976836158192091
0.6128248587570622
0000000000000000000000000000000000000000000
clf_lg2_train_corr_l
0.5945480225988701
0.5149717514124295
0.6062994350282487
0.5927683615819209
0.5433333333333333
0.6096327683615821
0.5761016949152542
0.5875988700564972
0000000000000000000000000000000000000000000
clf_lg2_train_corr_r
0.5895762711864407
0.5017514124293785
0.5930225988700564
0.5845480225988701
0.5317231638418078
0.6316666666666666
0.5607909604519774
0.601186440677966


In [48]:
selected_clc1 = clf['SVC'].best_estimator_
selected_clc2 = clf_corr['SVC'].best_estimator_
selected_clc3 = clf_corr['lg'].best_estimator_
print(f"selected classifier 1: SVC_alldata with accuracy {clf['SVC'].best_score_}")
print(f"selected classifier 2: SVC_corr with accuracy {clf_corr['SVC'].best_score_}")
print(f"selected classifier 3: lg_corr with accuracy {clf_corr['lg'].best_score_}")

selected classifier 1: SVC_alldata with accuracy 0.651723163841808
selected classifier 2: SVC_corr with accuracy 0.6518079096045198
selected classifier 3: lg_corr with accuracy 0.6466666666666667


In [49]:
# Load test dataset
df_test = pd.read_csv('./Final_Results/INITIAL_SPLIT/test_fullbrain.csv', index_col=0)
print(df_test.shape)
print(df_test['labels'].value_counts())

print('baseline score: ',36/(31+36))
XN = mynormalize(df_test, allfeats=False)

# Load the corresponding rfe object
#clc1
selected_rfe1 = load('./Final_Results/FS/rfetrain_svm.joblib')
Xtest1 = XN[:, np.where(selected_rfe1.support_)[0]]

# clc2,3
selected_rfe23 = load('./Final_Results/FS/rfetrain_corr_svm.joblib')
Xtest23 = XN[:, np.where(selected_rfe23.support_)[0]]
Xtest.shape

(67, 545)
0    36
1    31
Name: labels, dtype: int64
baseline score:  0.5373134328358209


(67, 11)

In [51]:
# Load training dataset to train the current model using all training set
# clc1
Xtrain1 = np.load('./Final_Results/FS/Xtrain_svm.npy')

# clc23
Xtrain23 = np.load('./Final_Results/FS/Xtrain_corr_svm.npy')

ytrain = np.load('./Final_Results/FS/ytrain_corr.npy')


In [52]:
# clc1
print(classification_report(df_test['labels'].values, selected_clc1.predict(Xtest1)))

              precision    recall  f1-score   support

           0       0.59      0.53      0.56        36
           1       0.51      0.58      0.55        31

    accuracy                           0.55        67
   macro avg       0.55      0.55      0.55        67
weighted avg       0.56      0.55      0.55        67



In [53]:
# clc2
print(classification_report(df_test['labels'].values, selected_clc2.predict(Xtest23)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        36
           1       0.46      1.00      0.63        31

    accuracy                           0.46        67
   macro avg       0.23      0.50      0.32        67
weighted avg       0.21      0.46      0.29        67



  _warn_prf(average, modifier, msg_start, len(result))


In [54]:
# clc3
print(classification_report(df_test['labels'].values, selected_clc3.predict(Xtest23)))

              precision    recall  f1-score   support

           0       0.59      0.81      0.68        36
           1       0.61      0.35      0.45        31

    accuracy                           0.60        67
   macro avg       0.60      0.58      0.57        67
weighted avg       0.60      0.60      0.57        67



In [55]:
selected_clc1 = selected_clc1.fit(Xtrain1, ytrain)
selected_clc2 = selected_clc2.fit(Xtrain23, ytrain)
selected_clc3 = selected_clc3.fit(Xtrain23, ytrain)


  "Setting penalty='none' will ignore the C and l1_ratio "


In [56]:
# clc1
print(classification_report(df_test['labels'].values, selected_clc1.predict(Xtest1)))

              precision    recall  f1-score   support

           0       0.59      0.53      0.56        36
           1       0.51      0.58      0.55        31

    accuracy                           0.55        67
   macro avg       0.55      0.55      0.55        67
weighted avg       0.56      0.55      0.55        67



In [57]:
# clc2
print(classification_report(df_test['labels'].values, selected_clc2.predict(Xtest23)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        36
           1       0.46      1.00      0.63        31

    accuracy                           0.46        67
   macro avg       0.23      0.50      0.32        67
weighted avg       0.21      0.46      0.29        67



  _warn_prf(average, modifier, msg_start, len(result))


In [58]:
# clc3
print(classification_report(df_test['labels'].values, selected_clc3.predict(Xtest23)))

              precision    recall  f1-score   support

           0       0.59      0.81      0.68        36
           1       0.61      0.35      0.45        31

    accuracy                           0.60        67
   macro avg       0.60      0.58      0.57        67
weighted avg       0.60      0.60      0.57        67



### RF


In [60]:
clf = load('./Final_Results/ML/clf__rf_train.joblib')
clf_corr = load('./Final_Results/ML/clf__rf_train_corr.joblib')
clf_corr_l = load('./Final_Results/ML/clf__rf_train_corr_l.joblib')
clf_corr_r = load('./Final_Results/ML/clf__rf_train_corr_r.joblib')

In [68]:
print("clf__rf_train")
print(clf['lSVM'].best_score_)
print(clf['pagg'].best_score_)
print(clf['lg'].best_score_)
print(clf['XGB'].best_score_)
print(clf['GNB'].best_score_)
print(clf['SVC'].best_score_)
print(clf['Rf'].best_score_)
print(clf['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf__rf_train_corr")
print(clf_corr['lSVM'].best_score_)
print(clf_corr['pagg'].best_score_)
print(clf_corr['lg'].best_score_)
print(clf_corr['XGB'].best_score_)
print(clf_corr['GNB'].best_score_)
print(clf_corr['SVC'].best_score_)
print(clf_corr['Rf'].best_score_)
print(clf_corr['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf__rf_train_corr_l")
print(clf_corr_l['lSVM'].best_score_)
print(clf_corr_l['pagg'].best_score_)
print(clf_corr_l['lg'].best_score_)
print(clf_corr_l['XGB'].best_score_)
print(clf_corr_l['GNB'].best_score_)
print(clf_corr_l['SVC'].best_score_)
print(clf_corr_l['Rf'].best_score_)
print(clf_corr_l['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf__rf_train_corr_r")
print(clf_corr_r['lSVM'].best_score_)
print(clf_corr_r['pagg'].best_score_)
print(clf_corr_r['lg'].best_score_)
print(clf_corr_r['XGB'].best_score_)
print(clf_corr_r['GNB'].best_score_)
print(clf_corr_r['SVC'].best_score_)
print(clf_corr_r['Rf'].best_score_)
print(clf_corr_r['nn'].best_score_)

clf__rf_train
0.5711016949152542
0.5050847457627119
0.5696327683615819
0.5863559322033899
0.5046892655367232
0.5814124293785311
0.5491525423728814
0.552683615819209
0000000000000000000000000000000000000000000
clf__rf_train_corr
0.5946610169491525
0.5305084745762711
0.6080508474576272
0.6178248587570622
0.5431073446327683
0.5910734463276837
0.5926553672316384
0.601412429378531
0000000000000000000000000000000000000000000
clf__rf_train_corr_l
0.5711016949152542
0.515
0.5793502824858756
0.5781073446327685
0.5469491525423729
0.5792937853107345
0.5695762711864407
0.572683615819209
0000000000000000000000000000000000000000000
clf__rf_train_corr_r
0.5828813559322035
0.505
0.5828531073446328
0.5778531073446328
0.5198305084745762
0.5965254237288136
0.5524011299435028
0.5894350282485876


In [69]:
selected_clc1 = clf_corr['lg'].best_estimator_
selected_clc2 = clf_corr['nn'].best_estimator_
print(f"selected classifier 1: SVC_alldata with accuracy {clf_corr['lg'].best_score_}")
print(f"selected classifier 2: SVC_corr with accuracy {clf_corr['nn'].best_score_}")


selected classifier 1: SVC_alldata with accuracy 0.6080508474576272
selected classifier 2: SVC_corr with accuracy 0.601412429378531


In [70]:
# Load test dataset
df_test = pd.read_csv('./Final_Results/INITIAL_SPLIT/test_fullbrain.csv', index_col=0)
print(df_test.shape)
print(df_test['labels'].value_counts())

print('baseline score: ',36/(31+36))
XN = mynormalize(df_test, allfeats=False)

# Load the corresponding rfe object
#clc1
selected_rfe1 = load('./Final_Results/FS/rfetrain_corr_rf.joblib')
Xtest1 = XN[:, np.where(selected_rfe1.support_)[0]]

# clc2,3
selected_rfe2 = load('./Final_Results/FS/rfetrain_corr_rf.joblib')
Xtest2 = XN[:, np.where(selected_rfe2.support_)[0]]
Xtest.shape

(67, 545)
0    36
1    31
Name: labels, dtype: int64
baseline score:  0.5373134328358209


(67, 11)

In [71]:
# Load training dataset to train the current model using all training set
# clc1
Xtrain1 = np.load('./Final_Results/FS/Xtrain_corr_rf.npy')
ytrain = np.load('./Final_Results/FS/ytrain_corr.npy')


In [78]:
# clc1
print(classification_report(df_test['labels'].values, selected_clc1.predict(Xtest1)))

              precision    recall  f1-score   support

           0       0.56      0.89      0.69        36
           1       0.60      0.19      0.29        31

    accuracy                           0.57        67
   macro avg       0.58      0.54      0.49        67
weighted avg       0.58      0.57      0.51        67



In [79]:
# clc2
print(classification_report(df_test['labels'].values, selected_clc2.predict(Xtest2)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        36
           1       0.46      1.00      0.63        31

    accuracy                           0.46        67
   macro avg       0.23      0.50      0.32        67
weighted avg       0.21      0.46      0.29        67



  _warn_prf(average, modifier, msg_start, len(result))


In [84]:
selected_clc1 = selected_clc1.fit(Xtrain1, ytrain)
selected_clc2 = selected_clc2.fit(Xtrain1, ytrain)


In [85]:
# clc1
print(classification_report(df_test['labels'].values, selected_clc1.predict(Xtest1)))

              precision    recall  f1-score   support

           0       0.56      0.89      0.69        36
           1       0.60      0.19      0.29        31

    accuracy                           0.57        67
   macro avg       0.58      0.54      0.49        67
weighted avg       0.58      0.57      0.51        67



In [86]:
# clc2
print(classification_report(df_test['labels'].values, selected_clc2.predict(Xtest2)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        36
           1       0.46      1.00      0.63        31

    accuracy                           0.46        67
   macro avg       0.23      0.50      0.32        67
weighted avg       0.21      0.46      0.29        67



  _warn_prf(average, modifier, msg_start, len(result))
