In [54]:
import numpy as np
from joblib import load
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report

In [55]:
class MyMinMax:
    def __init__(self, axis):
        self.sc = MinMaxScaler()
        self.axis = axis

    def fit(self, X):
        if self.axis==1:
            self.sc = self.sc.fit(X.transpose())
        elif self.axis==0:
            self.sc = self.sc.fit(X)
        return self.sc

    def transform(self, X):
        if self.axis==1:
            Xn = self.sc.transform(X.transpose()).transpose()
        elif self.axis==0:
            Xn = self.sc.transform(X)
        return Xn

    def fit_transform(self, X):
        if self.axis==1:
            self.sc = self.sc.fit(X.transpose())
            Xn = self.sc.transform(X.transpose()).transpose()
        elif self.axis==0:
            self.sc = self.sc.fit(X)
            Xn = self.sc.transform(X)
        return Xn

In [56]:
def mynormalize(df, allfeats=False):
    scalersdict = {}
    if allfeats:
        sc = MyMinMax(axis=1)
        XN = sc.fit_transform(df.values)
        scalersdict['allfeat'] = sc
    else:
        morph_feats = ['area', 'curv', 'thickness', 'volume']
        XN = np.array([], dtype=np.double)
        for ind, morph_feat in enumerate(morph_feats):
            morph_cols = [col for col in df.columns if morph_feat in col]
            X_morph = df.loc[:, morph_cols].values
            Xn = (X_morph-np.min(X_morph, axis=1).reshape(-1,1))/(np.max(X_morph, axis=1).reshape(-1,1)-np.min(X_morph, axis=1).reshape(-1,1))
            if ind == 0:
                XN = np.append(XN, Xn).reshape(Xn.shape[0], -1)
            else:
                XN = np.concatenate([XN, Xn], axis=1)
    return XN


## Goal of this notebook:
For each normalization method:<br>
$\;\;\;\;\;$ For each RFE classifier core:<br>
$\;\;\;\;\;$ $\;\;\;\;\;$ For each data matrix (corr; uncorr; ucorrleft; ucorrright):<br>
$\;\;\;\;\;$ $\;\;\;\;\;$ $\;\;\;\;\;$ 1. Find the classifier with highest performance <br>
$\;\;\;\;\;$ $\;\;\;\;\;$ $\;\;\;\;\;$ 2. Use this classifier to train on all the training set<br>
$\;\;\;\;\;$ $\;\;\;\;\;$ $\;\;\;\;\;$ 3. Measure the performance on the testing set<br><br>

Measure the performance in the testing set:
1. Load the testing set
2. Get the normalization object corresponding to the current normalization method
3. Normalize the testing set using the normalization object of the training set
4. Load the rfe+(RFE classifier core)
5. Get the selected features used for learning the best ML model
6. Select those features out of the normalized testing set
7. Predict the labels of the output matrix from step 6



### Logistic regression l1-norm

In [57]:
clf = load('./Final_Results/ML/allfeats/clf_lg1_train.joblib')
clf_corr = load('./Final_Results/ML/allfeats/clf_lg1_train_corr.joblib')
clf_corr_l = load('./Final_Results/ML/allfeats/clf_lg1_train_corr_l.joblib')
clf_corr_r = load('./Final_Results/ML/allfeats/clf_lg1_train_corr_r.joblib')

In [58]:
print("clf_lg1_train")
print(clf['lSVM'].best_score_)
print(clf['pagg'].best_score_)
print(clf['lg'].best_score_)
print(clf['XGB'].best_score_)
print(clf['GNB'].best_score_)
print(clf['SVC'].best_score_)
print(clf['Rf'].best_score_)
print(clf['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_lg1_train_corr")
print(clf_corr['lSVM'].best_score_)
print(clf_corr['pagg'].best_score_)
print(clf_corr['lg'].best_score_)
print(clf_corr['XGB'].best_score_)
print(clf_corr['GNB'].best_score_)
print(clf_corr['SVC'].best_score_)
print(clf_corr['Rf'].best_score_)
print(clf_corr['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_lg1_train_corr_l")
print(clf_corr_l['lSVM'].best_score_)
print(clf_corr_l['pagg'].best_score_)
print(clf_corr_l['lg'].best_score_)
print(clf_corr_l['XGB'].best_score_)
print(clf_corr_l['GNB'].best_score_)
print(clf_corr_l['SVC'].best_score_)
print(clf_corr_l['Rf'].best_score_)
print(clf_corr_l['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_lg1_train_corr_r")
print(clf_corr_r['lSVM'].best_score_)
print(clf_corr_r['pagg'].best_score_)
print(clf_corr_r['lg'].best_score_)
print(clf_corr_r['XGB'].best_score_)
print(clf_corr_r['GNB'].best_score_)
print(clf_corr_r['SVC'].best_score_)
print(clf_corr_r['Rf'].best_score_)
print(clf_corr_r['nn'].best_score_)

clf_lg1_train
0.5408474576271186
0.5172033898305084
0.5475141242937853
0.5928248587570623
0.557090395480226
0.563954802259887
0.5629378531073447
0.568954802259887
0000000000000000000000000000000000000000000
clf_lg1_train_corr
0.5675988700564971
0.5287853107344633
0.579858757062147
0.5659604519774011
0.542316384180791
0.5877683615819209
0.5628248587570621
0.5813559322033899
0000000000000000000000000000000000000000000
clf_lg1_train_corr_l
0.5642372881355933
0.5507627118644068
0.5726271186440679
0.5993220338983052
0.5305367231638417
0.5707062146892655
0.5858757062146893
0.5827683615819209
0000000000000000000000000000000000000000000
clf_lg1_train_corr_r
0.55954802259887
0.5274293785310735
0.5612146892655367
0.599774011299435
0.5524858757062147
0.5775706214689266
0.5813841807909604
0.5779378531073446


Based on the current results, I am going to proceed with "lg1_train_corr", classifier XGB

In [59]:
selected_clc = clf_corr['XGB'].best_estimator_
clf_corr['XGB'].cv_results_

{'mean_fit_time': array([0.03234625, 0.0131959 , 0.01123857, ..., 0.01522493, 0.01436658,
        0.05259418]),
 'std_fit_time': array([0.00136898, 0.00190569, 0.0002922 , ..., 0.00064087, 0.00121955,
        0.0128072 ]),
 'mean_score_time': array([0.0020525 , 0.00148807, 0.00149236, ..., 0.00182972, 0.00185828,
        0.00162644]),
 'std_score_time': array([2.09010917e-04, 3.28372883e-04, 3.99831867e-05, ...,
        7.99501930e-05, 8.84095698e-05, 2.43755891e-04]),
 'param_reg_lambda': masked_array(data=[10, 0, 10, ..., 1, 0.001, 0],
              mask=[False, False, False, ..., False, False, False],
        fill_value='?',
             dtype=object),
 'param_reg_alpha': masked_array(data=[0.5, 0.001, 0.5, ..., 0.5, 1, 0.5],
              mask=[False, False, False, ..., False, False, False],
        fill_value='?',
             dtype=object),
 'param_min_child_weight': masked_array(data=[0.5, 1, 10, ..., 0.5, 0.01, 0.01],
              mask=[False, False, False, ..., False, False, 

In [60]:
# Load test dataset
df_test = pd.read_csv('./Final_Results/INITIAL_SPLIT/test_fullbrain.csv', index_col=0)
print(df_test.shape)
print(df_test['labels'].value_counts())

(67, 545)
0    36
1    31
Name: labels, dtype: int64


In [61]:
print('baseline score: ',36/(31+36))
XN = mynormalize(df_test, allfeats=False)

baseline score:  0.5373134328358209


In [62]:
# Load the corresponding rfe object
selected_rfe1 = load('./Final_Results/FS/rfetrain_corr_lg1.joblib')
Xtest = XN[:, np.where(selected_rfe1.support_)[0]]
Xtest.shape

(67, 11)

In [63]:
# Load training dataset to train the current model using all training set
Xtrain = np.load('./Final_Results/FS/Xtrain_corr_lg1.npy')
ytrain = np.load('./Final_Results/FS/ytrain_corr.npy')
Xtrain.shape, ytrain.shape

((597, 11), (597,))

In [64]:
print(classification_report(df_test['labels'].values, selected_clc.predict(Xtest)))

ValueError: feature_names mismatch: ['f0', 'f1', 'f2'] ['f0', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10']
training data did not have the following fields: f6, f10, f3, f4, f7, f9, f8, f5

In [None]:
selected_clc = selected_clc.fit(Xtrain, ytrain)
print(classification_report(ytrain, selected_clc.predict(Xtrain)))

In [65]:
print(classification_report(df_test['labels'].values, selected_clc.predict(Xtest)))

ValueError: feature_names mismatch: ['f0', 'f1', 'f2'] ['f0', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10']
training data did not have the following fields: f6, f10, f3, f4, f7, f9, f8, f5

### Logistic regression l2-norm

In [66]:
clf = load('./Final_Results/ML/allfeats/clf_lg2_train.joblib')
clf_corr = load('./Final_Results/ML/allfeats/clf_lg2_train_corr.joblib')
clf_corr_l = load('./Final_Results/ML/allfeats/clf_lg2_train_corr_l.joblib')
clf_corr_r = load('./Final_Results/ML/allfeats/clf_lg2_train_corr_r.joblib')

In [67]:
print("clf_lg2_train")
print(clf['lSVM'].best_score_)
print(clf['pagg'].best_score_)
print(clf['lg'].best_score_)
print(clf['XGB'].best_score_)
print(clf['GNB'].best_score_)
print(clf['SVC'].best_score_)
print(clf['Rf'].best_score_)
print(clf['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_lg2_train_corr")
print(clf_corr['lSVM'].best_score_)
print(clf_corr['pagg'].best_score_)
print(clf_corr['lg'].best_score_)
print(clf_corr['XGB'].best_score_)
print(clf_corr['GNB'].best_score_)
print(clf_corr['SVC'].best_score_)
print(clf_corr['Rf'].best_score_)
print(clf_corr['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_lg2_train_corr_l")
print(clf_corr_l['lSVM'].best_score_)
print(clf_corr_l['pagg'].best_score_)
print(clf_corr_l['lg'].best_score_)
print(clf_corr_l['XGB'].best_score_)
print(clf_corr_l['GNB'].best_score_)
print(clf_corr_l['SVC'].best_score_)
print(clf_corr_l['Rf'].best_score_)
print(clf_corr_l['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_lg2_train_corr_r")
print(clf_corr_r['lSVM'].best_score_)
print(clf_corr_r['pagg'].best_score_)
print(clf_corr_r['lg'].best_score_)
print(clf_corr_r['XGB'].best_score_)
print(clf_corr_r['GNB'].best_score_)
print(clf_corr_r['SVC'].best_score_)
print(clf_corr_r['Rf'].best_score_)
print(clf_corr_r['nn'].best_score_)

clf_lg2_train
0.5912429378531072
0.5534180790960452
0.5996610169491525
0.5928248587570621
0.5720621468926554
0.6112146892655368
0.5743220338983052
0.5911581920903954
0000000000000000000000000000000000000000000
clf_lg2_train_corr
0.5843785310734464
0.5625141242937853
0.5878813559322034
0.5790677966101695
0.5330508474576272
0.5875706214689266
0.5742655367231638
0.604406779661017
0000000000000000000000000000000000000000000
clf_lg2_train_corr_l
0.5674858757062147
0.5433898305084746
0.5673728813559322
0.5760734463276835
0.5350282485875706
0.5824858757062147
0.5643502824858757
0.5767231638418078
0000000000000000000000000000000000000000000
clf_lg2_train_corr_r
0.5830225988700566
0.5661016949152542
0.5863559322033899
0.5947457627118643
0.5541525423728814
0.587994350282486
0.5979661016949154
0.6033050847457627


In [68]:
selected_clc1 = clf['SVC'].best_estimator_
selected_clc2 = clf['lg'].best_estimator_
selected_clc3 = clf_corr['nn'].best_estimator_
selected_clc4 = clf_corr_r['nn'].best_estimator_

print(f'selected classifier 1: SVC_alldata with accuracy {clf["SVC"].best_score_}')
print(f'selected classifier 2: lg_alldata with accuracy {clf["lg"].best_score_}')
print(f'selected classifier 3: nn_corr with accuracy {clf_corr["nn"].best_score_}')
print(f'selected classifier 4: nn_corr_r with accuracy {clf_corr_r["nn"].best_score_}')

selected classifier 1: SVC_alldata with accuracy 0.6112146892655368
selected classifier 2: lg_alldata with accuracy 0.5996610169491525
selected classifier 3: nn_corr with accuracy 0.604406779661017
selected classifier 4: nn_corr_r with accuracy 0.6033050847457627


In [69]:
# Load test dataset
df_test = pd.read_csv('./Final_Results/INITIAL_SPLIT/test_fullbrain.csv', index_col=0)
print(df_test.shape)
print(df_test['labels'].value_counts())

print('baseline score: ',36/(31+36))
XN = mynormalize(df_test, allfeats=True)

# Load the corresponding rfe object
#clc1 & 2
selected_rfe12 = load('./Final_Results/FS/Normalize_allMorphFeats/rfetrain_lg2.joblib')
Xtest12 = XN[:, np.where(selected_rfe12.support_)[0]]

# clc3
selected_rfe3 = load('./Final_Results/FS/Normalize_allMorphFeats/rfetrain_corr_lg2.joblib')
Xtest3 = XN[:, np.where(selected_rfe3.support_)[0]]

# clc3
selected_rfe4 = load('./Final_Results/FS/Normalize_allMorphFeats/rfetrain_corr_r_lg2.joblib')
Xtest4 = XN[:, np.where(selected_rfe4.support_)[0]]


(67, 545)
0    36
1    31
Name: labels, dtype: int64
baseline score:  0.5373134328358209


In [70]:
# Load training dataset to train the current model using all training set
# clc1&2
Xtrain12 = np.load('./Final_Results/FS/Normalize_allMorphFeats/Xtrain_lg2.npy')

# clc3
Xtrain3 = np.load('./Final_Results/FS/Normalize_allMorphFeats/Xtrain_corr_lg2.npy')

# clc4
Xtrain3 = np.load('./Final_Results/FS/Normalize_allMorphFeats/Xtrain_corr_r_lg2.npy')


ytrain = np.load('./Final_Results/FS/Normalize_allMorphFeats/ytrain_corr.npy')


In [71]:
# clc1
print(classification_report(df_test['labels'].values, selected_clc1.predict(Xtest12)))

              precision    recall  f1-score   support

           0       0.57      0.58      0.58        36
           1       0.50      0.48      0.49        31

    accuracy                           0.54        67
   macro avg       0.53      0.53      0.53        67
weighted avg       0.54      0.54      0.54        67



In [72]:
# clc2
print(classification_report(df_test['labels'].values, selected_clc2.predict(Xtest12)))

              precision    recall  f1-score   support

           0       0.56      0.56      0.56        36
           1       0.48      0.48      0.48        31

    accuracy                           0.52        67
   macro avg       0.52      0.52      0.52        67
weighted avg       0.52      0.52      0.52        67



In [73]:
# clc3
print(classification_report(df_test['labels'].values, selected_clc3.predict(Xtest3)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        36
           1       0.46      1.00      0.63        31

    accuracy                           0.46        67
   macro avg       0.23      0.50      0.32        67
weighted avg       0.21      0.46      0.29        67



  _warn_prf(average, modifier, msg_start, len(result))


In [74]:
# clc4
print(classification_report(df_test['labels'].values, selected_clc4.predict(Xtest4)))

              precision    recall  f1-score   support

           0       0.54      1.00      0.70        36
           1       0.00      0.00      0.00        31

    accuracy                           0.54        67
   macro avg       0.27      0.50      0.35        67
weighted avg       0.29      0.54      0.38        67



  _warn_prf(average, modifier, msg_start, len(result))


In [75]:
selected_clc1 = selected_clc1.fit(Xtrain12, ytrain)
selected_clc2 = selected_clc2.fit(Xtrain12, ytrain)
selected_clc3 = selected_clc3.fit(Xtrain3, ytrain)
selected_clc4 = selected_clc4.fit(Xtrain3, ytrain)


  "Setting penalty='none' will ignore the C and l1_ratio "


In [76]:
# clc1
print(classification_report(df_test['labels'].values, selected_clc1.predict(Xtest12)))

              precision    recall  f1-score   support

           0       0.57      0.58      0.58        36
           1       0.50      0.48      0.49        31

    accuracy                           0.54        67
   macro avg       0.53      0.53      0.53        67
weighted avg       0.54      0.54      0.54        67



In [77]:
# clc2
print(classification_report(df_test['labels'].values, selected_clc2.predict(Xtest12)))

              precision    recall  f1-score   support

           0       0.56      0.56      0.56        36
           1       0.48      0.48      0.48        31

    accuracy                           0.52        67
   macro avg       0.52      0.52      0.52        67
weighted avg       0.52      0.52      0.52        67



In [78]:
# clc3
print(classification_report(df_test['labels'].values, selected_clc3.predict(Xtest3)))

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 27 is different from 9)

### Linear SVM

In [79]:
clf = load('./Final_Results/ML/allfeats/clf_svm_train.joblib')
clf_corr = load('./Final_Results/ML/allfeats/clf_svm_train_corr.joblib')
clf_corr_l = load('./Final_Results/ML/allfeats/clf_svm_train_corr_l.joblib')
clf_corr_r = load('./Final_Results/ML/allfeats/clf_svm_train_corr_r.joblib')

In [80]:
print("clf_svm_train")
print(clf['lSVM'].best_score_)
print(clf['pagg'].best_score_)
print(clf['lg'].best_score_)
print(clf['XGB'].best_score_)
print(clf['GNB'].best_score_)
print(clf['SVC'].best_score_)
print(clf['Rf'].best_score_)
print(clf['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_svm_train_corr")
print(clf_corr['lSVM'].best_score_)
print(clf_corr['pagg'].best_score_)
print(clf_corr['lg'].best_score_)
print(clf_corr['XGB'].best_score_)
print(clf_corr['GNB'].best_score_)
print(clf_corr['SVC'].best_score_)
print(clf_corr['Rf'].best_score_)
print(clf_corr['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_svm_train_corr_l")
print(clf_corr_l['lSVM'].best_score_)
print(clf_corr_l['pagg'].best_score_)
print(clf_corr_l['lg'].best_score_)
print(clf_corr_l['XGB'].best_score_)
print(clf_corr_l['GNB'].best_score_)
print(clf_corr_l['SVC'].best_score_)
print(clf_corr_l['Rf'].best_score_)
print(clf_corr_l['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf_svm_train_corr_r")
print(clf_corr_r['lSVM'].best_score_)
print(clf_corr_r['pagg'].best_score_)
print(clf_corr_r['lg'].best_score_)
print(clf_corr_r['XGB'].best_score_)
print(clf_corr_r['GNB'].best_score_)
print(clf_corr_r['SVC'].best_score_)
print(clf_corr_r['Rf'].best_score_)
print(clf_corr_r['nn'].best_score_)

clf_svm_train
0.6414689265536724
0.5259604519774012
0.6498870056497175
0.603050847457627
0.5453672316384182
0.6397175141242938
0.5561864406779661
0.6024011299435028
0000000000000000000000000000000000000000000
clf_svm_train_corr
0.6347175141242938
0.5585593220338982
0.6281073446327683
0.5912429378531074
0.5386440677966103
0.6565254237288135
0.5577966101694916
0.5981638418079096
0000000000000000000000000000000000000000000
clf_svm_train_corr_l
0.5860734463276837
0.5464689265536723
0.6011016949152543
0.5775141242937853
0.5487005649717516
0.6213841807909605
0.5677683615819209
0.5781638418079096
0000000000000000000000000000000000000000000
clf_svm_train_corr_r
0.5914689265536723
0.5668926553672315
0.5947457627118644
0.6064971751412429
0.5439830508474576
0.5980508474576272
0.5878813559322034
0.589406779661017


In [81]:
selected_clc1 = clf['lg'].best_estimator_
selected_clc2 = clf['SVC'].best_estimator_
selected_clc3 = clf['lSVM'].best_estimator_

selected_clc4 = clf_corr['lg'].best_estimator_
selected_clc5 = clf_corr['SVC'].best_estimator_
selected_clc6 = clf_corr['lSVM'].best_estimator_

selected_clc7 = clf_corr_l['SVC'].best_estimator_

print(f"selected classifier 1: lg_alldata with accuracy {clf['lg'].best_score_}")
print(f"selected classifier 2: SVC_alldata with accuracy {clf['SVC'].best_score_}")
print(f"selected classifier 3: lSVM_alldata with accuracy {clf['lSVM'].best_score_}")
print(f"selected classifier 4: lg_corr with accuracy {clf_corr['lg'].best_score_}")
print(f"selected classifier 5: SVC_corr with accuracy {clf_corr['SVC'].best_score_}")
print(f"selected classifier 6: lSVM_corr with accuracy {clf_corr['lSVM'].best_score_}")
print(f"selected classifier 7: SVC_corr_r with accuracy {clf_corr_l['SVC'].best_score_}")

selected classifier 1: lg_alldata with accuracy 0.6498870056497175
selected classifier 2: SVC_alldata with accuracy 0.6397175141242938
selected classifier 3: lSVM_alldata with accuracy 0.6414689265536724
selected classifier 4: lg_corr with accuracy 0.6281073446327683
selected classifier 5: SVC_corr with accuracy 0.6565254237288135
selected classifier 6: lSVM_corr with accuracy 0.6347175141242938
selected classifier 7: SVC_corr_r with accuracy 0.6213841807909605


In [87]:
# Load test dataset
df_test = pd.read_csv('./Final_Results/INITIAL_SPLIT/test_fullbrain.csv', index_col=0)
print(df_test.shape)
print(df_test['labels'].value_counts())

print('baseline score: ',36/(31+36))
XN = mynormalize(df_test, allfeats=True)

# Load the corresponding rfe object
#clc1,2,3
selected_rfe123 = load('./Final_Results/FS/Normalize_allMorphFeats/rfetrain_svm.joblib')
Xtest123 = XN[:, np.where(selected_rfe123.support_)[0]]

# clc4,5,6
selected_rfe456 = load('./Final_Results/FS/Normalize_allMorphFeats/rfetrain_corr_svm.joblib')
Xtest456 = XN[:, np.where(selected_rfe456.support_)[0]]

# clc7
selected_rfe7 = load('./Final_Results/FS/Normalize_allMorphFeats/rfetrain_corr_l_svm.joblib')
Xtest7 = XN[:, np.where(selected_rfe7.support_)[0]]


(67, 545)
0    36
1    31
Name: labels, dtype: int64
baseline score:  0.5373134328358209


In [96]:
# Load training dataset to train the current model using all training set
# clc1
Xtrain123 = np.load('./Final_Results/FS/Normalize_allMorphFeats/Xtrain_svm.npy')

# clc23
Xtrain456 = np.load('./Final_Results/FS/Normalize_allMorphFeats/Xtrain_corr_svm.npy')


# clc23
Xtrain7 = np.load('./Final_Results/FS/Normalize_allMorphFeats/Xtrain_corr_l_svm.npy')

ytrain = np.load('./Final_Results/FS/Normalize_allMorphFeats/ytrain_corr.npy')


In [88]:
# clc1
print(classification_report(df_test['labels'].values, selected_clc1.predict(Xtest123)))

              precision    recall  f1-score   support

           0       0.55      0.64      0.59        36
           1       0.48      0.39      0.43        31

    accuracy                           0.52        67
   macro avg       0.51      0.51      0.51        67
weighted avg       0.52      0.52      0.52        67



In [89]:
# clc2
print(classification_report(df_test['labels'].values, selected_clc2.predict(Xtest123)))

              precision    recall  f1-score   support

           0       0.57      0.58      0.58        36
           1       0.50      0.48      0.49        31

    accuracy                           0.54        67
   macro avg       0.53      0.53      0.53        67
weighted avg       0.54      0.54      0.54        67



In [90]:
# clc3
print(classification_report(df_test['labels'].values, selected_clc3.predict(Xtest123)))

              precision    recall  f1-score   support

           0       0.59      0.64      0.61        36
           1       0.54      0.48      0.51        31

    accuracy                           0.57        67
   macro avg       0.56      0.56      0.56        67
weighted avg       0.56      0.57      0.56        67



In [91]:
# clc4
print(classification_report(df_test['labels'].values, selected_clc4.predict(Xtest456)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        36
           1       0.46      1.00      0.63        31

    accuracy                           0.46        67
   macro avg       0.23      0.50      0.32        67
weighted avg       0.21      0.46      0.29        67



  _warn_prf(average, modifier, msg_start, len(result))


In [92]:
# clc5
print(classification_report(df_test['labels'].values, selected_clc5.predict(Xtest456)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        36
           1       0.46      1.00      0.63        31

    accuracy                           0.46        67
   macro avg       0.23      0.50      0.32        67
weighted avg       0.21      0.46      0.29        67



  _warn_prf(average, modifier, msg_start, len(result))


In [93]:
# clc6
print(classification_report(df_test['labels'].values, selected_clc6.predict(Xtest456)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        36
           1       0.46      1.00      0.63        31

    accuracy                           0.46        67
   macro avg       0.23      0.50      0.32        67
weighted avg       0.21      0.46      0.29        67



  _warn_prf(average, modifier, msg_start, len(result))


In [94]:
# clc7
print(classification_report(df_test['labels'].values, selected_clc7.predict(Xtest7)))

              precision    recall  f1-score   support

           0       0.53      0.92      0.67        36
           1       0.40      0.06      0.11        31

    accuracy                           0.52        67
   macro avg       0.47      0.49      0.39        67
weighted avg       0.47      0.52      0.41        67



In [97]:
selected_clc1 = selected_clc1.fit(Xtrain123, ytrain)
selected_clc2 = selected_clc2.fit(Xtrain123, ytrain)
selected_clc3 = selected_clc3.fit(Xtrain123, ytrain)
selected_clc4 = selected_clc4.fit(Xtrain456, ytrain)
selected_clc5 = selected_clc5.fit(Xtrain456, ytrain)
selected_clc6 = selected_clc6.fit(Xtrain456, ytrain)
selected_clc7 = selected_clc7.fit(Xtrain7, ytrain)


  "Setting penalty='none' will ignore the C and l1_ratio "
  "Setting penalty='none' will ignore the C and l1_ratio "


In [100]:
# clc1
print(classification_report(df_test['labels'].values, selected_clc1.predict(Xtest123)))

              precision    recall  f1-score   support

           0       0.55      0.64      0.59        36
           1       0.48      0.39      0.43        31

    accuracy                           0.52        67
   macro avg       0.51      0.51      0.51        67
weighted avg       0.52      0.52      0.52        67



In [101]:
# clc2
print(classification_report(df_test['labels'].values, selected_clc2.predict(Xtest123)))

              precision    recall  f1-score   support

           0       0.57      0.58      0.58        36
           1       0.50      0.48      0.49        31

    accuracy                           0.54        67
   macro avg       0.53      0.53      0.53        67
weighted avg       0.54      0.54      0.54        67



In [104]:
# clc3
print(classification_report(df_test['labels'].values, selected_clc3.predict(Xtest123)))

              precision    recall  f1-score   support

           0       0.59      0.64      0.61        36
           1       0.54      0.48      0.51        31

    accuracy                           0.57        67
   macro avg       0.56      0.56      0.56        67
weighted avg       0.56      0.57      0.56        67



In [105]:
# clc4
print(classification_report(df_test['labels'].values, selected_clc4.predict(Xtest456)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        36
           1       0.46      1.00      0.63        31

    accuracy                           0.46        67
   macro avg       0.23      0.50      0.32        67
weighted avg       0.21      0.46      0.29        67



  _warn_prf(average, modifier, msg_start, len(result))


In [106]:
# clc5
print(classification_report(df_test['labels'].values, selected_clc5.predict(Xtest456)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        36
           1       0.46      1.00      0.63        31

    accuracy                           0.46        67
   macro avg       0.23      0.50      0.32        67
weighted avg       0.21      0.46      0.29        67



  _warn_prf(average, modifier, msg_start, len(result))


In [107]:
# clc6
print(classification_report(df_test['labels'].values, selected_clc6.predict(Xtest456)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        36
           1       0.46      1.00      0.63        31

    accuracy                           0.46        67
   macro avg       0.23      0.50      0.32        67
weighted avg       0.21      0.46      0.29        67



  _warn_prf(average, modifier, msg_start, len(result))


In [108]:
# clc7
print(classification_report(df_test['labels'].values, selected_clc7.predict(Xtest7)))

              precision    recall  f1-score   support

           0       0.53      0.92      0.67        36
           1       0.40      0.06      0.11        31

    accuracy                           0.52        67
   macro avg       0.47      0.49      0.39        67
weighted avg       0.47      0.52      0.41        67



### RF


In [109]:
clf = load('./Final_Results/ML/allfeats/clf__rf_train.joblib')
clf_corr = load('./Final_Results/ML/allfeats/clf__rf_train_corr.joblib')
clf_corr_l = load('./Final_Results/ML/allfeats/clf__rf_train_corr_l.joblib')
clf_corr_r = load('./Final_Results/ML/allfeats/clf__rf_train_corr_r.joblib')

In [110]:
print("clf__rf_train")
print(clf['lSVM'].best_score_)
print(clf['pagg'].best_score_)
print(clf['lg'].best_score_)
print(clf['XGB'].best_score_)
print(clf['GNB'].best_score_)
print(clf['SVC'].best_score_)
print(clf['Rf'].best_score_)
print(clf['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf__rf_train_corr")
print(clf_corr['lSVM'].best_score_)
print(clf_corr['pagg'].best_score_)
print(clf_corr['lg'].best_score_)
print(clf_corr['XGB'].best_score_)
print(clf_corr['GNB'].best_score_)
print(clf_corr['SVC'].best_score_)
print(clf_corr['Rf'].best_score_)
print(clf_corr['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf__rf_train_corr_l")
print(clf_corr_l['lSVM'].best_score_)
print(clf_corr_l['pagg'].best_score_)
print(clf_corr_l['lg'].best_score_)
print(clf_corr_l['XGB'].best_score_)
print(clf_corr_l['GNB'].best_score_)
print(clf_corr_l['SVC'].best_score_)
print(clf_corr_l['Rf'].best_score_)
print(clf_corr_l['nn'].best_score_)
print("0000000000000000000000000000000000000000000")
print("clf__rf_train_corr_r")
print(clf_corr_r['lSVM'].best_score_)
print(clf_corr_r['pagg'].best_score_)
print(clf_corr_r['lg'].best_score_)
print(clf_corr_r['XGB'].best_score_)
print(clf_corr_r['GNB'].best_score_)
print(clf_corr_r['SVC'].best_score_)
print(clf_corr_r['Rf'].best_score_)
print(clf_corr_r['nn'].best_score_)

clf__rf_train
0.5964124293785311
0.5017514124293785
0.5947175141242939
0.5879943502824859
0.5203389830508474
0.5965254237288135
0.5393220338983051
0.5536440677966101
0000000000000000000000000000000000000000000
clf__rf_train_corr
0.5877966101694916
0.5572033898305084
0.609774011299435
0.5811864406779661
0.5352824858757061
0.606186440677966
0.5626553672316386
0.5925423728813559
0000000000000000000000000000000000000000000
clf__rf_train_corr_l
0.5642655367231638
0.5248870056497175
0.567683615819209
0.5876836158192089
0.5503954802259887
0.599180790960452
0.5592655367231638
0.5877683615819208
0000000000000000000000000000000000000000000
clf__rf_train_corr_r
0.5796045197740114
0.532768361581921
0.5881073446327683
0.5964971751412429
0.5287005649717513
0.5947175141242939
0.5727401129943503
0.5846610169491526


In [111]:
selected_clc1 = clf_corr['lg'].best_estimator_
selected_clc2 = clf_corr['SVC'].best_estimator_
print(f"selected classifier 1: lg_corr with accuracy {clf_corr['lg'].best_score_}")
print(f"selected classifier 2: SVC_corr with accuracy {clf_corr['SVC'].best_score_}")


selected classifier 1: lg_corr with accuracy 0.609774011299435
selected classifier 2: SVC_corr with accuracy 0.606186440677966


In [112]:
# Load test dataset
df_test = pd.read_csv('./Final_Results/INITIAL_SPLIT/test_fullbrain.csv', index_col=0)
print(df_test.shape)
print(df_test['labels'].value_counts())

print('baseline score: ',36/(31+36))
XN = mynormalize(df_test, allfeats=False)

# Load the corresponding rfe object
#clc1,2
selected_rfe1 = load('./Final_Results/FS/Normalize_allMorphFeats/rfetrain_corr_rf.joblib')
Xtest1 = XN[:, np.where(selected_rfe1.support_)[0]]



(67, 545)
0    36
1    31
Name: labels, dtype: int64
baseline score:  0.5373134328358209


In [113]:
# Load training dataset to train the current model using all training set
# clc1
Xtrain1 = np.load('./Final_Results/FS/Normalize_allMorphFeats/Xtrain_corr_rf.npy')
ytrain = np.load('./Final_Results/FS/Normalize_allMorphFeats/ytrain_corr.npy')


In [78]:
# clc1
print(classification_report(df_test['labels'].values, selected_clc1.predict(Xtest1)))

              precision    recall  f1-score   support

           0       0.56      0.89      0.69        36
           1       0.60      0.19      0.29        31

    accuracy                           0.57        67
   macro avg       0.58      0.54      0.49        67
weighted avg       0.58      0.57      0.51        67



In [79]:
# clc2
print(classification_report(df_test['labels'].values, selected_clc2.predict(Xtest2)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        36
           1       0.46      1.00      0.63        31

    accuracy                           0.46        67
   macro avg       0.23      0.50      0.32        67
weighted avg       0.21      0.46      0.29        67



  _warn_prf(average, modifier, msg_start, len(result))


In [84]:
selected_clc1 = selected_clc1.fit(Xtrain1, ytrain)
selected_clc2 = selected_clc2.fit(Xtrain1, ytrain)


In [85]:
# clc1
print(classification_report(df_test['labels'].values, selected_clc1.predict(Xtest1)))

              precision    recall  f1-score   support

           0       0.56      0.89      0.69        36
           1       0.60      0.19      0.29        31

    accuracy                           0.57        67
   macro avg       0.58      0.54      0.49        67
weighted avg       0.58      0.57      0.51        67



In [86]:
# clc2
print(classification_report(df_test['labels'].values, selected_clc2.predict(Xtest2)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        36
           1       0.46      1.00      0.63        31

    accuracy                           0.46        67
   macro avg       0.23      0.50      0.32        67
weighted avg       0.21      0.46      0.29        67



  _warn_prf(average, modifier, msg_start, len(result))
