In [1]:
import numpy as np
from joblib import load
import pandas as pd
import os

# Questions
## Q1: Given the optimum hyperparameters, Which classifier can best classify ASD?
- Find the maximum average accuracy in the mean_test_score, compare it to the mean train score at the same index to make sure that the result is consistant.
## Q2: Is there any performance difference between the best classifier used for left and right hemisphere?
- Check if its the same classifier that produced the best results of left and right hemisphere
- Compare the 2 results 
## Q3: Is there any performance difference between using Median&IQR and using Median-IQR&Median+IQR
- Repeat the same steps of question 1 and 2
- Compare the end results 

In [27]:
# Load data for question one
clf_left_rf_orig_file = './Results/ML/clf_left_rf.joblib'
clf_left_svm_orig_file = './Results/ML/clf_left_svm.joblib'
clf_right_rf_orig_file = './Results/ML/clf_right_rf.joblib'
clf_right_svm_orig_file = './Results/ML/clf_right_svm.joblib'

clf_left_rf_update_file = './Results/ML/clf_left_rf_modifiedMedPIQR.joblib'
clf_left_svm_update_file = './Results/ML/clf_left_svm_modifiedMedPIQR.joblib'
clf_right_rf_update_file = './Results/ML/clf_right_rf_modifiedMedPIQR.joblib'
clf_right_svm_update_file = './Results/ML/clf_right_svm_modifiedMedPIQR.joblib'

clf_left_rf_orig = load(clf_left_rf_orig_file)
clf_left_svm_orig = load(clf_left_svm_orig_file)
clf_right_rf_orig = load(clf_right_rf_orig_file)
clf_right_svm_orig = load(clf_right_svm_orig_file)

clf_left_rf_update = load(clf_left_rf_update_file)
clf_left_svm_update = load(clf_left_svm_update_file)
clf_right_rf_update = load(clf_right_rf_update_file)
clf_right_svm_update = load(clf_right_svm_update_file)


In [28]:
# Define function
def extract_best_results(clc):
    clc_best_acc = dict()
    best_acc_all = 0
    best_clc_all=None
    best_index_all=None
    for key in clc:
        grid_obj = clc[key]
        cv_dict = grid_obj.cv_results_
        mean_acc_perhyper = cv_dict['mean_test_score']
        best_acc = np.max(mean_acc_perhyper)
        best_acc_index = np.argmax(mean_acc_perhyper)
        clc_best_acc[key] = (best_acc, best_acc_index)
        if best_acc > best_acc_all:
            best_acc_all = best_acc
            best_clc_all = key
            best_index_all = best_acc_index
    return (best_acc_all, best_clc_all, best_index_all), clc_best_acc
        
def compare_results (clc1, clc2):
    (best_acc1_all, best_clc1_all, best_index1_all), clc_best_acc1 = extract_best_results(clc1)
    (best_acc2_all, best_clc2_all, best_index2_all), clc_best_acc2 = extract_best_results(clc2)
    print(best_acc1_all, best_clc1_all, best_index1_all)
    print(best_acc2_all, best_clc2_all, best_index2_all)


In [29]:
raw_acc, dict_acc = extract_best_results(clf_left_rf_orig)
raw_acc

(0.6082056590752243, 'SVC', 600)

In [30]:
raw_acc, dict_acc = extract_best_results(clf_left_svm_orig)
raw_acc

(0.600358337314859, 'SVC', 156)

In [31]:
raw_acc, dict_acc = extract_best_results(clf_right_rf_orig)
raw_acc

(0.6165185539098582, 'SVC', 195)

In [32]:
raw_acc, dict_acc = extract_best_results(clf_right_svm_orig)
raw_acc

(0.600358337314859, 'SVC', 156)

In [33]:
raw_acc, dict_acc = extract_best_results(clf_left_rf_update)
raw_acc

(0.6167579763231937, 'nn', 583)

In [34]:
raw_acc, dict_acc = extract_best_results(clf_left_svm_update)
raw_acc

(0.5816770186335404, 'nn', 1124)

In [35]:
raw_acc, dict_acc = extract_best_results(clf_right_rf_update)
raw_acc

(0.596845039018952, 'nn', 1099)

In [36]:
raw_acc, dict_acc = extract_best_results(clf_right_svm_update)
raw_acc

(0.5869522747783618, 'nn', 758)

## Questions (For results without FS)
## Q1: Given the optimum hyperparameters, Which classifier can best classify ASD?
- Find the maximum average accuracy in the mean_test_score, compare it to the mean train score at the same index to make sure that the result is consistant.
## Q2: Is there any performance difference between the best classifier used for left and right hemisphere?
- Check if its the same classifier that produced the best results of left and right hemisphere
- Compare the 2 results 
## Q3: Is there any performance difference between using Median&IQR and using Median-IQR&Median+IQR
- Repeat the same steps of question 1 and 2
- Compare the end results 

In [3]:
# Load data for question one
clf_left_orig_file = './Results/ML/clf_left_OnlyCorr_50.joblib'
clf_left_50 = load(clf_left_orig_file)


In [6]:
raw_acc, dict_acc = extract_best_results(clf_left_50)
raw_acc

(0.5943318999840738, 'nn', 40)

In [21]:
df_test_left = pd.read_csv('./Results/INITIAL_SPLIT/left_test_modifiedMedPIQR.csv', index_col=0)
df_test_right = pd.read_csv('./Results/INITIAL_SPLIT/right_test_modifiedMedPIQR.csv', index_col=0)
df_correlation_left = pd.read_csv('./Results/CORR_ANA/left_train_noColliniarity_modifiedMedPIQR_50.csv', index_col=0)
df_correlation_right = pd.read_csv('./Results/CORR_ANA/right_train_noColliniarity_modifiedMedPIQR_50.csv', index_col=0)


In [22]:
df_test_left_corr = df_test_left[df_correlation_left.columns.to_list()]
df_test_right_corr = df_test_right[df_correlation_right.columns.to_list()]


In [23]:
df_test_left.head()

Unnamed: 0,thick_lbankssts_medMIQR,thick_lbankssts_medPIQR,thick_lcaudalanteriorcingulate_medMIQR,thick_lcaudalanteriorcingulate_medPIQR,thick_lcaudalmiddlefrontal_medMIQR,thick_lcaudalmiddlefrontal_medPIQR,thick_lcuneus_medMIQR,thick_lcuneus_medPIQR,thick_lentorhinal_medMIQR,thick_lentorhinal_medPIQR,...,vol_lfrontalpole_medPIQR,vol_ltemporalpole_medMIQR,vol_ltemporalpole_medPIQR,vol_ltransversetemporal_medMIQR,vol_ltransversetemporal_medPIQR,vol_linsula_medMIQR,vol_linsula_medPIQR,age,sex,labels
UCLA_2_0051316,1.372436,3.498534,1.139166,4.492524,1.486065,3.777496,1.211377,3.274007,1.855718,4.868339,...,7.242625,-1.498976,8.970892,0.053882,3.677292,-2.144106,2.144106,12.66,1.0,0
Leuven_2_0050724,1.613562,3.662806,1.546067,4.069052,1.354834,3.920249,1.126931,3.232941,1.907912,4.944193,...,6.045684,-1.57397,8.438231,0.127821,3.837382,-1.926027,2.562963,15.1,1.0,0
NYU_0051069,2.108517,4.3423,1.951656,3.654624,2.149486,4.131291,1.385836,3.739188,3.105746,5.467828,...,8.863983,-1.077441,8.397695,0.32291,3.442504,-1.974721,1.974721,8.15,1.0,0
UM_1_0050329,1.524942,3.780062,1.52558,5.055458,2.034651,3.999624,0.915229,2.798962,1.4022,5.390509,...,5.25193,-1.74925,8.247222,0.020822,2.748648,-2.008974,2.008974,17.1,1.0,0
UM_1_0050345,0.523566,3.200475,1.981103,3.886278,1.607541,4.140425,0.757585,2.709261,0.411164,4.235251,...,6.464328,-0.52912,3.686837,0.036139,2.828017,-1.851623,2.245469,17.3,1.0,0


In [24]:
df_test_left_corr.head()

Unnamed: 0,thick_lcaudalanteriorcingulate_medMIQR,thick_listhmuscingulate_medMIQR,thick_lmedialorbitofrontal_medPIQR,thick_lparahippocampal_medPIQR,thick_lposteriorcingulate_medMIQR,thick_lrostralanteriorcingulate_medPIQR,thick_lfrontalpole_medMIQR,thick_ltemporalpole_medMIQR,curv_lbankssts_medPIQR,curv_lcaudalanteriorcingulate_medMIQR,...,vol_lfusiform_medMIQR,vol_listhmuscingulate_medMIQR,vol_llateraloccipital_medPIQR,vol_lmedialorbitofrontal_medMIQR,vol_lparahippocampal_medMIQR,vol_lsuperiortemporal_medMIQR,vol_lfrontalpole_medMIQR,vol_ltemporalpole_medMIQR,vol_ltransversetemporal_medMIQR,labels
UCLA_2_0051316,1.139166,1.079948,4.895005,4.377817,0.961509,5.117865,1.526271,2.716174,0.248083,-0.244442,...,-0.722583,-0.720601,3.452683,-0.834325,-0.22184,-0.591447,-0.195907,-1.498976,0.053882,0
Leuven_2_0050724,1.546067,1.051037,4.367829,4.623127,1.038661,4.559638,1.716619,1.939212,0.328835,-0.258392,...,-0.485051,-0.68358,3.807115,-0.576954,-0.035994,-0.580187,-0.192784,-1.57397,0.127821,0
NYU_0051069,1.951656,1.477995,4.294952,4.536819,1.823674,4.389538,3.351933,2.914286,0.229267,-0.376027,...,-0.665995,-1.03799,3.432285,-0.421762,-0.332557,-0.288487,-0.761049,-1.077441,0.32291,0
UM_1_0050329,1.52558,1.388235,3.985483,4.73696,1.83875,4.388633,1.429612,1.40374,0.242997,-0.333686,...,-0.536843,-0.337207,3.268002,-0.523066,-0.26899,-0.410499,-0.846937,-1.74925,0.020822,0
UM_1_0050345,1.981103,0.982263,4.093164,3.799942,1.532805,3.651844,2.073032,0.551598,0.3028,-0.336548,...,-0.682542,-0.604987,2.90201,-0.784227,-0.464377,-0.276401,-0.819602,-0.52912,0.036139,0


In [25]:
estimator = clf_left_50['nn'].best_estimator_
from sklearn.metrics import confusion_matrix

In [26]:
ytrue = df_test_left_corr.pop('labels')
yest = estimator.predict(df_test_left_corr)
confusion_matrix(ytrue, yest)

array([[58,  0],
       [58,  0]], dtype=int64)

In [38]:
estimator = clf_left_rf_update['nn'].best_estimator_

In [39]:
df_left_test = pd.read_csv('./Results/INITIAL_SPLIT/left_test_modifiedMedPIQR.csv', index_col=0)

In [60]:
df1 = pd.read_csv('./Results/CORR_ANA/left_train_noColliniarity_modifiedMedPIQR.csv', index_col=0)

df_test_nocorr = df_left_test[df1.columns]
fs_obj = load('./Results/FS/left_rf_modifiedMedPIQR.joblib')
ml_obj = load('./Results/ML/clf_left_rf_modifiedMedPIQR.joblib')

In [68]:
df_test_nocorr.shape
ml_obj['lg'].best_estimator_.n_features_in_

150

In [49]:
ytest = df_test_nocorr.pop('labels')

In [55]:
df_test_nocorr_fs = df_test_nocorr.iloc[:, np.where(fs_obj.ranking_==1)[0]]

In [56]:
df_test_nocorr_fs.head()

Unnamed: 0,thick_lcaudalanteriorcingulate_medMIQR,thick_lcuneus_medMIQR,thick_lentorhinal_medMIQR,thick_lfusiform_medPIQR,thick_listhmuscingulate_medMIQR,thick_listhmuscingulate_medPIQR,thick_llateraloccipital_medMIQR,thick_llingual_medMIQR,thick_lmedialorbitofrontal_medPIQR,thick_lparahippocampal_medMIQR,...,vol_lparstriangularis_medMIQR,vol_lposteriorcingulate_medMIQR,vol_lposteriorcingulate_medPIQR,vol_lrostralanteriorcingulate_medMIQR,vol_lsuperiortemporal_medMIQR,vol_lsuperiortemporal_medPIQR,vol_lfrontalpole_medMIQR,vol_lfrontalpole_medPIQR,vol_ltemporalpole_medMIQR,vol_ltransversetemporal_medMIQR
UCLA_2_0051316,1.139166,1.211377,1.855718,4.803106,1.079948,4.919478,1.173175,1.032534,4.895005,1.31144,...,-0.910552,-1.330254,5.550843,-0.924632,-0.591447,3.602504,-0.195907,7.242625,-1.498976,0.053882
Leuven_2_0050724,1.546067,1.126931,1.907912,4.169976,1.051037,4.28557,1.376269,0.999079,4.367829,1.966897,...,-0.62452,-1.061409,4.720491,-0.352267,-0.580187,4.612998,-0.192784,6.045684,-1.57397,0.127821
NYU_0051069,1.951656,1.385836,3.105746,4.624345,1.477995,5.085954,1.293423,1.335146,4.294952,1.218505,...,-0.635838,-0.338879,4.430961,-0.408415,-0.288487,4.620739,-0.761049,8.863983,-1.077441,0.32291
UM_1_0050329,1.52558,0.915229,1.4022,4.302385,1.388235,4.596015,0.961463,0.69111,3.985483,1.693692,...,-0.596823,-0.467444,4.253399,-0.723723,-0.410499,3.661186,-0.846937,5.25193,-1.74925,0.020822
UM_1_0050345,1.981103,0.757585,0.411164,3.875574,0.982263,4.221299,0.737426,0.498802,4.093164,0.370766,...,-0.699006,-0.466844,3.932599,-0.696258,-0.276401,3.101682,-0.819602,6.464328,-0.52912,0.036139


In [52]:
estimator.n_features_in_

150

In [59]:
len(np.unique(df_test_nocorr_fs.columns.to_list()))

158

In [63]:
len(np.where(fs_obj.ranking_==1)[0])

158

In [70]:
estimator.predict(df_test_nocorr_fs.iloc[:,:-8])

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0], dtype=int64)

In [71]:
ml_obj.keys()

dict_keys(['lSVM', 'pagg', 'lg', 'XGB', 'GNB', 'Rf', 'SVC', 'nn'])

In [83]:
ml_obj['nn'].best_estimator_.predict(df_test_nocorr_fs.iloc[:,:-8])

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0], dtype=int64)

In [84]:
# Load original data
df_train_left = pd.read_csv('./Results/INITiAL_SPLIT/left_train_modifiedMedPIQR.csv', index_col=0)
df_train_right = pd.read_csv('./Results/INITiAL_SPLIT/right_train_modifiedMedPIQR.csv', index_col=0)

df_test_left = pd.read_csv('./Results/INITiAL_SPLIT/left_test_modifiedMedPIQR.csv', index_col=0)
df_test_right = pd.read_csv('./Results/INITiAL_SPLIT/right_test_modifiedMedPIQR.csv', index_col=0)

In [85]:
# Extract un corrlated feats
df_train_left_nocorr = pd.read_csv('./Results/CORR_ANA/left_train_noColliniarity_modifiedMedPIQR.csv', index_col=0)
df_train_right_nocorr = pd.read_csv('./Results/CORR_ANA/right_train_noColliniarity_modifiedMedPIQR.csv', index_col=0)

df_test_left_nocorr = df_test_left[df_train_left_nocorr.columns]
df_test_right_nocorr = df_test_right[df_train_right_nocorr.columns]

In [89]:
assert(df_train_left_nocorr.shape[1] == df_test_left_nocorr.shape[1])
assert(df_train_right_nocorr.shape[1] == df_test_right_nocorr.shape[1])

In [90]:
# Load and extract FS objects
left_rf_obj = load('./Results/FS/left_rf_modifiedMedPIQR.joblib')
left_svc_obj = load('./Results/FS/left_svc_modifiedMedPIQR.joblib')
right_rf_obj = load('./Results/FS/right_rf_modifiedMedPIQR.joblib')
right_svc_obj = load('./Results/FS/right_svc_modifiedMedPIQR.joblib')

df_train_left_nocorr_rf = df_train_left_nocorr.iloc[:, np.where(left_rf_obj.ranking_==1)[0]]
df_train_left_nocorr_svc = df_train_left_nocorr.iloc[:, np.where(left_svc_obj.ranking_==1)[0]]
df_train_right_nocorr_rf = df_train_right_nocorr.iloc[:, np.where(right_rf_obj.ranking_==1)[0]]
df_train_right_corr_svc = df_train_right_nocorr.iloc[:, np.where(right_svc_obj.ranking_==1)[0]]

df_test_left_nocorr_rf = df_test_left_nocorr.iloc[:, np.where(left_rf_obj.ranking_==1)[0]]
df_test_left_nocorr_svc = df_test_left_nocorr.iloc[:, np.where(left_svc_obj.ranking_==1)[0]]
df_test_right_nocorr_rf = df_test_right_nocorr.iloc[:, np.where(right_rf_obj.ranking_==1)[0]]
df_test_right_corr_svc = df_test_right_nocorr.iloc[:, np.where(right_svc_obj.ranking_==1)[0]]

In [105]:
# Load ML
left_rf_obj = load('./Results/ML/clf_left_rf_modifiedMedPIQR.joblib')
left_svc_obj = load('./Results/ML/clf_left_svm_modifiedMedPIQR.joblib')
right_rf_obj = load('./Results/ML/clf_right_rf_modifiedMedPIQR.joblib')
right_svc_obj = load('./Results/ML/clf_right_svm_modifiedMedPIQR.joblib')

In [106]:
left_rf_obj['nn'].best_estimator_.n_features_in_,\
left_svc_obj['nn'].best_estimator_.n_features_in_,\
right_rf_obj['nn'].best_estimator_.n_features_in_,\
right_svc_obj['nn'].best_estimator_.n_features_in_,\

(150, 134, 138, 134)

In [107]:
df_test_left_nocorr_rf.shape,df_test_left_nocorr_svc.shape,df_test_right_nocorr_rf.shape,df_test_right_corr_svc.shape

((116, 158), (116, 66), (116, 115), (116, 66))