In [3]:
import numpy as np
from joblib import load
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report

In [4]:
class MyMinMax:
    def __init__(self, axis):
        self.sc = MinMaxScaler()
        self.axis = axis

    def fit(self, X):
        if self.axis==1:
            self.sc = self.sc.fit(X.transpose())
        elif self.axis==0:
            self.sc = self.sc.fit(X)
        return self.sc

    def transform(self, X):
        if self.axis==1:
            Xn = self.sc.transform(X.transpose()).transpose()
        elif self.axis==0:
            Xn = self.sc.transform(X)
        return Xn

    def fit_transform(self, X):
        if self.axis==1:
            self.sc = self.sc.fit(X.transpose())
            Xn = self.sc.transform(X.transpose()).transpose()
        elif self.axis==0:
            self.sc = self.sc.fit(X)
            Xn = self.sc.transform(X)
        return Xn

In [5]:
def mynormalize(df, allfeats=False):
    scalersdict = {}
    if allfeats:
        sc = MyMinMax(axis=1)
        XN = sc.fit_transform(df.values)
        scalersdict['allfeat'] = sc
    else:
        morph_feats = ['area', 'curv', 'thickness', 'volume']
        XN = np.array([], dtype=np.double)
        for ind, morph_feat in enumerate(morph_feats):
            morph_cols = [col for col in df.columns if morph_feat in col]
            X_morph = df.loc[:, morph_cols].values
            Xn = (X_morph-np.min(X_morph, axis=1).reshape(-1,1))/(np.max(X_morph, axis=1).reshape(-1,1)-np.min(X_morph, axis=1).reshape(-1,1))
            if ind == 0:
                XN = np.append(XN, Xn).reshape(Xn.shape[0], -1)
            else:
                XN = np.concatenate([XN, Xn], axis=1)
    return XN


## Goal of this notebook:
For each normalization method:<br>
$\;\;\;\;\;$ For each RFE classifier core:<br>
$\;\;\;\;\;$ $\;\;\;\;\;$ For each data matrix (corr; uncorr; ucorrleft; ucorrright):<br>
$\;\;\;\;\;$ $\;\;\;\;\;$ $\;\;\;\;\;$ 1. Find the classifier with highest performance <br>
$\;\;\;\;\;$ $\;\;\;\;\;$ $\;\;\;\;\;$ 2. Use this classifier to train on all the training set<br>
$\;\;\;\;\;$ $\;\;\;\;\;$ $\;\;\;\;\;$ 3. Measure the performance on the testing set<br><br>

Measure the performance in the testing set:
1. Load the testing set
2. Get the normalization object corresponding to the current normalization method
3. Normalize the testing set using the normalization object of the training set
4. Load the rfe+(RFE classifier core)
5. Get the selected features used for learning the best ML model
6. Select those features out of the normalized testing set
7. Predict the labels of the output matrix from step 6



### Logistic regression l1-norm

In [18]:
clf = load('./Final_Results_DP/ML/clf_lg1_train.joblib')
clf_corr = load('./Final_Results_DP/ML/clf_lg1_train_corr.joblib')
clf_corr_l = load('./Final_Results_DP/ML/clf_lg1_train_corr_l.joblib')
clf_corr_r = load('./Final_Results_DP/ML/clf_lg1_train_corr_r.joblib')

In [19]:
print("clf_lg1_train")
print(clf['lSVM'].best_score_)
print(clf['pagg'].best_score_)
print(clf['lg'].best_score_)
print(clf['GNB'].best_score_)
print(clf['SVC'].best_score_)
print(clf['Rf'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_lg1_train_corr")
print(clf_corr['lSVM'].best_score_)
print(clf_corr['pagg'].best_score_)
print(clf_corr['lg'].best_score_)
print(clf_corr['GNB'].best_score_)
print(clf_corr['SVC'].best_score_)
print(clf_corr['Rf'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_lg1_train_corr_l")
print(clf_corr_l['lSVM'].best_score_)
print(clf_corr_l['pagg'].best_score_)
print(clf_corr_l['lg'].best_score_)
print(clf_corr_l['GNB'].best_score_)
print(clf_corr_l['SVC'].best_score_)
print(clf_corr_l['Rf'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_lg1_train_corr_r")
print(clf_corr_r['lSVM'].best_score_)
print(clf_corr_r['pagg'].best_score_)
print(clf_corr_r['lg'].best_score_)
print(clf_corr_r['GNB'].best_score_)
print(clf_corr_r['SVC'].best_score_)
print(clf_corr_r['Rf'].best_score_)

clf_lg1_train
0.6776490132635875
0.6583750519308025
0.6761797859734646
0.5884697143652369
0.6825626188268857
0.6153220016740648
0000000000000000000000000000000000000000000
clf_lg1_train_corr
0.6607855314132751
0.6393951263837128
0.656329172729524
0.5933545354265284
0.6546790011276403
0.5929751489365186
0000000000000000000000000000000000000000000
clf_lg1_train_corr_l
0.6260678917219742
0.6100080428789824
0.6231945885545535
0.6025791343570097
0.6460843663214163
0.6095766948883717
0000000000000000000000000000000000000000000
clf_lg1_train_corr_r
0.5971593185860087
0.5740063974655721
0.5957127054976045
0.5786779185111056
0.5948770646970822
0.5824622056316526


Based on the current results, I am going to proceed with "lg1_train_corr", classifier XGB

In [20]:
selected_clc1 = clf['lSVM'].best_estimator_
selected_clc2 = clf['lg'].best_estimator_
selected_clc3 = clf['SVC'].best_estimator_

selected_clc4 = clf_corr['lSVM'].best_estimator_
selected_clc5 = clf_corr['lg'].best_estimator_
selected_clc6 = clf_corr['SVC'].best_estimator_

selected_clc7 = clf_corr_l['lSVM'].best_estimator_
selected_clc8 = clf_corr_l['SVC'].best_estimator_


print(f"selected classifier 1: lSVM_alldata with accuracy {clf['lSVM'].best_score_}")
print(f"selected classifier 2: lg_alldata with accuracy {clf['lg'].best_score_}")
print(f"selected classifier 3: SVC_alldata with accuracy {clf['SVC'].best_score_}")
print(f"selected classifier 4: lSVM_alldata with accuracy {clf_corr['lSVM'].best_score_}")
print(f"selected classifier 5: lg_alldata with accuracy {clf_corr['lg'].best_score_}")
print(f"selected classifier 6: SVC_alldata with accuracy {clf_corr['SVC'].best_score_}")
print(f"selected classifier 7: lSVM_alldata with accuracy {clf_corr_l['lSVM'].best_score_}")
print(f"selected classifier 8: lg_alldata with accuracy {clf_corr_l['SVC'].best_score_}")


selected classifier 1: lSVM_alldata with accuracy 0.6776490132635875
selected classifier 2: lg_alldata with accuracy 0.6761797859734646
selected classifier 3: SVC_alldata with accuracy 0.6825626188268857
selected classifier 4: lSVM_alldata with accuracy 0.6607855314132751
selected classifier 5: lg_alldata with accuracy 0.656329172729524
selected classifier 6: SVC_alldata with accuracy 0.6546790011276403
selected classifier 7: lSVM_alldata with accuracy 0.6260678917219742
selected classifier 8: lg_alldata with accuracy 0.6460843663214163


In [24]:
# clf['SVC'].best_params_, clf['SVC'].n_features_in_, clf['SVC'].best_score_
clf['GNB'].n_features_in_


31

In [7]:
# Load test dataset
df_test = pd.read_csv('./Final_Results_DP/INITIAL_SPLIT/test_fullbrain.csv', index_col=0)
print(df_test.shape)
print(df_test['labels'].value_counts())

(67, 545)
0    36
1    31
Name: labels, dtype: int64


In [8]:
print('baseline score: ',36/(31+36))
XN = mynormalize(df_test, allfeats=False)

baseline score:  0.5373134328358209


In [9]:
# Load the corresponding rfe object
selected_rfe1 = load('./Final_Results/FS/rfetrain_corr_lg1.joblib')
Xtest = XN[:, np.where(selected_rfe1.support_)[0]]
Xtest.shape

(67, 11)

In [10]:
# Load training dataset to train the current model using all training set
Xtrain = np.load('./Final_Results/FS/Xtrain_corr_lg1.npy')
ytrain = np.load('./Final_Results/FS/ytrain_corr.npy')
Xtrain.shape, ytrain.shape

((597, 11), (597,))

In [11]:
print(classification_report(df_test['labels'].values, selected_clc.predict(Xtest)))

              precision    recall  f1-score   support

           0       0.54      1.00      0.70        36
           1       0.00      0.00      0.00        31

    accuracy                           0.54        67
   macro avg       0.27      0.50      0.35        67
weighted avg       0.29      0.54      0.38        67



  _warn_prf(average, modifier, msg_start, len(result))


In [12]:
selected_clc = selected_clc.fit(Xtrain, ytrain)
print(classification_report(ytrain, selected_clc.predict(Xtrain)))

              precision    recall  f1-score   support

           0       0.81      0.82      0.82       300
           1       0.82      0.80      0.81       297

    accuracy                           0.81       597
   macro avg       0.81      0.81      0.81       597
weighted avg       0.81      0.81      0.81       597



In [13]:
print(classification_report(df_test['labels'].values, selected_clc.predict(Xtest)))

              precision    recall  f1-score   support

           0       0.53      0.64      0.58        36
           1       0.46      0.35      0.40        31

    accuracy                           0.51        67
   macro avg       0.50      0.50      0.49        67
weighted avg       0.50      0.51      0.50        67



### Logistic regression l2-norm

In [9]:
clf = load('./Final_Results_DP/ML/clf_lg2_train.joblib')
clf_corr = load('./Final_Results_DP/ML/clf_lg2_train_corr.joblib')
clf_corr_l = load('./Final_Results_DP/ML/clf_lg2_train_corr_l.joblib')
clf_corr_r = load('./Final_Results_DP/ML/clf_lg2_train_corr_r.joblib')

In [10]:
print("clf_lg2_train")
print(clf['lSVM'].best_score_)
print(clf['pagg'].best_score_)
print(clf['lg'].best_score_)
print(clf['GNB'].best_score_)
print(clf['SVC'].best_score_)
print(clf['Rf'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_lg2_train_corr")
print(clf_corr['lSVM'].best_score_)
print(clf_corr['pagg'].best_score_)
print(clf_corr['lg'].best_score_)
print(clf_corr['GNB'].best_score_)
print(clf_corr['SVC'].best_score_)
print(clf_corr['Rf'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_lg2_train_corr_l")
print(clf_corr_l['lSVM'].best_score_)
print(clf_corr_l['pagg'].best_score_)
print(clf_corr_l['lg'].best_score_)
print(clf_corr_l['GNB'].best_score_)
print(clf_corr_l['SVC'].best_score_)
print(clf_corr_l['Rf'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_lg2_train_corr_r")
print(clf_corr_r['lSVM'].best_score_)
print(clf_corr_r['pagg'].best_score_)
print(clf_corr_r['lg'].best_score_)
print(clf_corr_r['GNB'].best_score_)
print(clf_corr_r['SVC'].best_score_)
print(clf_corr_r['Rf'].best_score_)

clf_lg2_train
0.6718001927835116
0.6411071647112033
0.6748771158604346
0.5954639799767104
0.6777237526886342
0.6020947504354001
0000000000000000000000000000000000000000000
clf_lg2_train_corr
0.6442305441120191
0.6057018997975978
0.6472834613440408
0.5502858394166559
0.6516473064541546
0.5839528089703683
0000000000000000000000000000000000000000000
clf_lg2_train_corr_l
0.6212269995149715
0.5898393061430813
0.6212749805067628
0.6084847666234848
0.6383680527842073
0.6007139538834008
0000000000000000000000000000000000000000000
clf_lg2_train_corr_r
0.6320134007052356
0.582801735870217
0.6305428124699415
0.5459638970920797
0.6407877109720832
0.5927770342037242


In [16]:
selected_clc1 = clf['lSVM'].best_estimator_
selected_clc2 = clf['SVC'].best_estimator_
selected_clc3 = clf_corr['lSVM'].best_estimator_
print(f'selected classifier 1: lSVM_alldata with accuracy {clf["lSVM"].best_score_}')
print(f'selected classifier 2: SVC_alldata with accuracy {clf["SVC"].best_score_}')
print(f'selected classifier 3: lSVM_corr with accuracy {clf_corr["lSVM"].best_score_}')

selected classifier 1: lSVM_alldata with accuracy 0.6535310734463277
selected classifier 2: SVC_alldata with accuracy 0.6450564971751412
selected classifier 3: lSVM_corr with accuracy 0.6231920903954802


In [17]:
# Load test dataset
df_test = pd.read_csv('./Final_Results/INITIAL_SPLIT/test_fullbrain.csv', index_col=0)
print(df_test.shape)
print(df_test['labels'].value_counts())

print('baseline score: ',36/(31+36))
XN = mynormalize(df_test, allfeats=False)

# Load the corresponding rfe object
#clc1 & 2
selected_rfe12 = load('./Final_Results/FS/rfetrain_lg2.joblib')
Xtest12 = XN[:, np.where(selected_rfe12.support_)[0]]

# clc3
selected_rfe3 = load('./Final_Results/FS/rfetrain_corr_lg2.joblib')
Xtest3 = XN[:, np.where(selected_rfe3.support_)[0]]
Xtest.shape

(67, 545)
0    36
1    31
Name: labels, dtype: int64
baseline score:  0.5373134328358209


(67, 11)

In [18]:
# Load training dataset to train the current model using all training set
# clc1&2
Xtrain12 = np.load('./Final_Results/FS/Xtrain_lg2.npy')

# clc3
Xtrain3 = np.load('./Final_Results/FS/Xtrain_corr_lg2.npy')

ytrain = np.load('./Final_Results/FS/ytrain_corr.npy')


In [19]:
# clc1
print(classification_report(df_test['labels'].values, selected_clc1.predict(Xtest12)))

              precision    recall  f1-score   support

           0       0.59      0.64      0.61        36
           1       0.54      0.48      0.51        31

    accuracy                           0.57        67
   macro avg       0.56      0.56      0.56        67
weighted avg       0.56      0.57      0.56        67



In [20]:
# clc2
print(classification_report(df_test['labels'].values, selected_clc2.predict(Xtest12)))

              precision    recall  f1-score   support

           0       0.56      0.61      0.59        36
           1       0.50      0.45      0.47        31

    accuracy                           0.54        67
   macro avg       0.53      0.53      0.53        67
weighted avg       0.53      0.54      0.53        67



In [21]:
# clc3
print(classification_report(df_test['labels'].values, selected_clc3.predict(Xtest3)))

              precision    recall  f1-score   support

           0       0.56      0.97      0.71        36
           1       0.75      0.10      0.17        31

    accuracy                           0.57        67
   macro avg       0.65      0.53      0.44        67
weighted avg       0.65      0.57      0.46        67



In [22]:
selected_clc1 = selected_clc1.fit(Xtrain12, ytrain)
selected_clc2 = selected_clc2.fit(Xtrain12, ytrain)
selected_clc3 = selected_clc3.fit(Xtrain3, ytrain)


In [23]:
# clc1
print(classification_report(df_test['labels'].values, selected_clc1.predict(Xtest12)))

              precision    recall  f1-score   support

           0       0.59      0.64      0.61        36
           1       0.54      0.48      0.51        31

    accuracy                           0.57        67
   macro avg       0.56      0.56      0.56        67
weighted avg       0.56      0.57      0.56        67



In [24]:
# clc2
print(classification_report(df_test['labels'].values, selected_clc2.predict(Xtest12)))

              precision    recall  f1-score   support

           0       0.56      0.61      0.59        36
           1       0.50      0.45      0.47        31

    accuracy                           0.54        67
   macro avg       0.53      0.53      0.53        67
weighted avg       0.53      0.54      0.53        67



In [25]:
# clc3
print(classification_report(df_test['labels'].values, selected_clc3.predict(Xtest3)))

              precision    recall  f1-score   support

           0       0.56      0.97      0.71        36
           1       0.75      0.10      0.17        31

    accuracy                           0.57        67
   macro avg       0.65      0.53      0.44        67
weighted avg       0.65      0.57      0.46        67



### Linear SVM

In [12]:
clf = load('./Final_Results_DP/ML/clf_svm_train.joblib')
clf_corr = load('./Final_Results_DP/ML/clf_svm_train_corr.joblib')
clf_corr_l = load('./Final_Results_DP/ML/clf_svm_train_corr_l.joblib')
clf_corr_r = load('./Final_Results_DP/ML/clf_svm_train_corr_r.joblib')

In [13]:
print("clf_svm_train")
print(clf['lSVM'].best_score_)
print(clf['pagg'].best_score_)
print(clf['lg'].best_score_)
print(clf['GNB'].best_score_)
print(clf['SVC'].best_score_)
print(clf['Rf'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_svm_train_corr")
print(clf_corr['lSVM'].best_score_)
print(clf_corr['pagg'].best_score_)
print(clf_corr['lg'].best_score_)
print(clf_corr['GNB'].best_score_)
print(clf_corr['SVC'].best_score_)
print(clf_corr['Rf'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_svm_train_corr_l")
print(clf_corr_l['lSVM'].best_score_)
print(clf_corr_l['pagg'].best_score_)
print(clf_corr_l['lg'].best_score_)
print(clf_corr_l['GNB'].best_score_)
print(clf_corr_l['SVC'].best_score_)
print(clf_corr_l['Rf'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_svm_train_corr_r")
print(clf_corr_r['lSVM'].best_score_)
print(clf_corr_r['pagg'].best_score_)
print(clf_corr_r['lg'].best_score_)
print(clf_corr_r['GNB'].best_score_)
print(clf_corr_r['SVC'].best_score_)
print(clf_corr_r['Rf'].best_score_)

clf_svm_train
0.6183756454256895
0.6179362136254147
0.6140563942934444
0.5833434329790783
0.6199826453908983
0.6022306198337806
0000000000000000000000000000000000000000000
clf_svm_train_corr
0.667025086414902
0.5953776264706905
0.6671169348649595
0.5590827024891994
0.673043155264402
0.5956003303106024
0000000000000000000000000000000000000000000
clf_svm_train_corr_l
0.6248221766527298
0.6175906870419601
0.6232378215872509
0.6011620322902148
0.6326042760283321
0.6009642245375344
0000000000000000000000000000000000000000000
clf_svm_train_corr_r
0.5617032484635646
0.5588516180921801
0.5667843014462857
0.5545365828201649
0.5840966598517081
0.5622388162028197


In [48]:
clf_corr['lg'].best_params_


{'solver': 'newton-cg', 'penalty': 'none', 'C': 0.1}

In [28]:
selected_clc1 = clf['SVC'].best_estimator_
selected_clc2 = clf_corr['SVC'].best_estimator_
selected_clc3 = clf_corr['lg'].best_estimator_
print(f"selected classifier 1: SVC_alldata with accuracy {clf['SVC'].best_score_}")
print(f"selected classifier 2: SVC_corr with accuracy {clf_corr['SVC'].best_score_}")
print(f"selected classifier 3: lg_corr with accuracy {clf_corr['lg'].best_score_}")

selected classifier 1: SVC_alldata with accuracy 0.651723163841808
selected classifier 2: SVC_corr with accuracy 0.6518079096045198
selected classifier 3: lg_corr with accuracy 0.6466666666666667


In [29]:
# Load test dataset
df_test = pd.read_csv('./Final_Results/INITIAL_SPLIT/test_fullbrain.csv', index_col=0)
print(df_test.shape)
print(df_test['labels'].value_counts())

print('baseline score: ',36/(31+36))
XN = mynormalize(df_test, allfeats=False)

# Load the corresponding rfe object
#clc1
selected_rfe1 = load('./Final_Results/FS/rfetrain_svm.joblib')
Xtest1 = XN[:, np.where(selected_rfe1.support_)[0]]

# clc2,3
selected_rfe23 = load('./Final_Results/FS/rfetrain_corr_svm.joblib')
Xtest23 = XN[:, np.where(selected_rfe23.support_)[0]]
Xtest.shape

(67, 545)
0    36
1    31
Name: labels, dtype: int64
baseline score:  0.5373134328358209


(67, 11)

In [30]:
# Load training dataset to train the current model using all training set
# clc1
Xtrain1 = np.load('./Final_Results/FS/Xtrain_svm.npy')

# clc23
Xtrain23 = np.load('./Final_Results/FS/Xtrain_corr_svm.npy')

ytrain = np.load('./Final_Results/FS/ytrain_corr.npy')


In [31]:
# clc1
print(classification_report(df_test['labels'].values, selected_clc1.predict(Xtest1)))

              precision    recall  f1-score   support

           0       0.59      0.53      0.56        36
           1       0.51      0.58      0.55        31

    accuracy                           0.55        67
   macro avg       0.55      0.55      0.55        67
weighted avg       0.56      0.55      0.55        67



In [32]:
# clc2
print(classification_report(df_test['labels'].values, selected_clc2.predict(Xtest23)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        36
           1       0.46      1.00      0.63        31

    accuracy                           0.46        67
   macro avg       0.23      0.50      0.32        67
weighted avg       0.21      0.46      0.29        67



  _warn_prf(average, modifier, msg_start, len(result))


In [33]:
# clc3
print(classification_report(df_test['labels'].values, selected_clc3.predict(Xtest23)))

              precision    recall  f1-score   support

           0       0.59      0.81      0.68        36
           1       0.61      0.35      0.45        31

    accuracy                           0.60        67
   macro avg       0.60      0.58      0.57        67
weighted avg       0.60      0.60      0.57        67



In [34]:
selected_clc1 = selected_clc1.fit(Xtrain1, ytrain)
selected_clc2 = selected_clc2.fit(Xtrain23, ytrain)
selected_clc3 = selected_clc3.fit(Xtrain23, ytrain)


  "Setting penalty='none' will ignore the C and l1_ratio "


In [35]:
# clc1
print(classification_report(df_test['labels'].values, selected_clc1.predict(Xtest1)))

              precision    recall  f1-score   support

           0       0.59      0.53      0.56        36
           1       0.51      0.58      0.55        31

    accuracy                           0.55        67
   macro avg       0.55      0.55      0.55        67
weighted avg       0.56      0.55      0.55        67



In [36]:
# clc2
print(classification_report(df_test['labels'].values, selected_clc2.predict(Xtest23)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        36
           1       0.46      1.00      0.63        31

    accuracy                           0.46        67
   macro avg       0.23      0.50      0.32        67
weighted avg       0.21      0.46      0.29        67



  _warn_prf(average, modifier, msg_start, len(result))


In [37]:
# clc3
print(classification_report(df_test['labels'].values, selected_clc3.predict(Xtest23)))

              precision    recall  f1-score   support

           0       0.59      0.81      0.68        36
           1       0.61      0.35      0.45        31

    accuracy                           0.60        67
   macro avg       0.60      0.58      0.57        67
weighted avg       0.60      0.60      0.57        67



### RF


In [14]:
clf = load('./Final_Results_DP/ML/clf__rf_train.joblib')
clf_corr = load('./Final_Results_DP/ML/clf__rf_train_corr.joblib')
clf_corr_l = load('./Final_Results_DP/ML/clf__rf_train_corr_l.joblib')
clf_corr_r = load('./Final_Results_DP/ML/clf__rf_train_corr_r.joblib')

In [15]:
print("clf__rf_train")
print(clf['lSVM'].best_score_)
print(clf['pagg'].best_score_)
print(clf['lg'].best_score_)
print(clf['GNB'].best_score_)
print(clf['SVC'].best_score_)
print(clf['Rf'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf__rf_train_corr")
print(clf_corr['lSVM'].best_score_)
print(clf_corr['pagg'].best_score_)
print(clf_corr['lg'].best_score_)
print(clf_corr['GNB'].best_score_)
print(clf_corr['SVC'].best_score_)
print(clf_corr['Rf'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf__rf_train_corr_l")
print(clf_corr_l['lSVM'].best_score_)
print(clf_corr_l['pagg'].best_score_)
print(clf_corr_l['lg'].best_score_)
print(clf_corr_l['GNB'].best_score_)
print(clf_corr_l['SVC'].best_score_)
print(clf_corr_l['Rf'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf__rf_train_corr_r")
print(clf_corr_r['lSVM'].best_score_)
print(clf_corr_r['pagg'].best_score_)
print(clf_corr_r['lg'].best_score_)
print(clf_corr_r['GNB'].best_score_)
print(clf_corr_r['SVC'].best_score_)
print(clf_corr_r['Rf'].best_score_)

clf__rf_train
0.6423945574472352
0.592404053365423
0.6380067678882428
0.5548148193626684
0.6421218260814398
0.6077900194625392
0000000000000000000000000000000000000000000
clf__rf_train_corr
0.5732096101147901
0.553707521626749
0.5972072688797885
0.5437919923214041
0.5885222693607242
0.5965520300594926
0000000000000000000000000000000000000000000
clf__rf_train_corr_l
0.6032624925557322
0.5747999717578296
0.6018604427471855
0.5289584676371331
0.6126722414255338
0.5954385210926036
0000000000000000000000000000000000000000000
clf__rf_train_corr_r
0.5752937492709223
0.5450725803315795
0.5751799517427261
0.5577725011307101
0.5947266956046587
0.568341294350952


In [69]:
selected_clc1 = clf_corr['lg'].best_estimator_
selected_clc2 = clf_corr['nn'].best_estimator_
print(f"selected classifier 1: SVC_alldata with accuracy {clf_corr['lg'].best_score_}")
print(f"selected classifier 2: SVC_corr with accuracy {clf_corr['nn'].best_score_}")


selected classifier 1: SVC_alldata with accuracy 0.6080508474576272
selected classifier 2: SVC_corr with accuracy 0.601412429378531


In [70]:
# Load test dataset
df_test = pd.read_csv('./Final_Results/INITIAL_SPLIT/test_fullbrain.csv', index_col=0)
print(df_test.shape)
print(df_test['labels'].value_counts())

print('baseline score: ',36/(31+36))
XN = mynormalize(df_test, allfeats=False)

# Load the corresponding rfe object
#clc1
selected_rfe1 = load('./Final_Results/FS/rfetrain_corr_rf.joblib')
Xtest1 = XN[:, np.where(selected_rfe1.support_)[0]]

# clc2,3
selected_rfe2 = load('./Final_Results/FS/rfetrain_corr_rf.joblib')
Xtest2 = XN[:, np.where(selected_rfe2.support_)[0]]
Xtest.shape

(67, 545)
0    36
1    31
Name: labels, dtype: int64
baseline score:  0.5373134328358209


(67, 11)

In [71]:
# Load training dataset to train the current model using all training set
# clc1
Xtrain1 = np.load('./Final_Results/FS/Xtrain_corr_rf.npy')
ytrain = np.load('./Final_Results/FS/ytrain_corr.npy')


In [78]:
# clc1
print(classification_report(df_test['labels'].values, selected_clc1.predict(Xtest1)))

              precision    recall  f1-score   support

           0       0.56      0.89      0.69        36
           1       0.60      0.19      0.29        31

    accuracy                           0.57        67
   macro avg       0.58      0.54      0.49        67
weighted avg       0.58      0.57      0.51        67



In [79]:
# clc2
print(classification_report(df_test['labels'].values, selected_clc2.predict(Xtest2)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        36
           1       0.46      1.00      0.63        31

    accuracy                           0.46        67
   macro avg       0.23      0.50      0.32        67
weighted avg       0.21      0.46      0.29        67



  _warn_prf(average, modifier, msg_start, len(result))


In [84]:
selected_clc1 = selected_clc1.fit(Xtrain1, ytrain)
selected_clc2 = selected_clc2.fit(Xtrain1, ytrain)


In [85]:
# clc1
print(classification_report(df_test['labels'].values, selected_clc1.predict(Xtest1)))

              precision    recall  f1-score   support

           0       0.56      0.89      0.69        36
           1       0.60      0.19      0.29        31

    accuracy                           0.57        67
   macro avg       0.58      0.54      0.49        67
weighted avg       0.58      0.57      0.51        67



In [86]:
# clc2
print(classification_report(df_test['labels'].values, selected_clc2.predict(Xtest2)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        36
           1       0.46      1.00      0.63        31

    accuracy                           0.46        67
   macro avg       0.23      0.50      0.32        67
weighted avg       0.21      0.46      0.29        67



  _warn_prf(average, modifier, msg_start, len(result))
