In [1]:
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, cross_val_predict, StratifiedKFold, train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, classification_report, multilabel_confusion_matrix
import tensorflow as tf
import datetime, os
from tensorflow.keras.layers import Input, Dense, Activation, Dropout, Flatten, concatenate
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.constraints import MaxNorm
from sklearn.model_selection import GridSearchCV
from splitrepeat import splitrepeat_cv, splitrepeat_mcn

config = tf.compat.v1.ConfigProto(gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.8))
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)
tf.compat.v1.keras.backend.set_session(session)

## All Features 3-class MCN

In [6]:
features=['moca_digits','moca_letters','moca_serial7','MMSPELL_early','MMSPELL_late','ADAS_Q1','moca_orient','ADAS_Q7','ADAS_Q8','ADAS_Q9','MMDATE',
       'MMYEAR', 'MMMONTH', 'MMDAY', 'MMSEASON', 'MMHOSPIT', 'MMFLOOR','MMCITY', 'MMAREA', 'MMSTATE','nbspan_forward',
       'faq7','faq8','moca_recall','ADAS_Q4','MMBALLDL', 'MMFLAGDL', 'MMTREEDL', 'LDELTOTAL', 'AVRECALL', 'AVDEL30MIN', 'AVDELTOT', 'AVDELERR2',
       'faq9','moca_clock','MMDRAW','CLOCKCIRC', 'CLOCKSYM','CLOCKNUM', 'CLOCKHAND', 'CLOCKTIME', 'COPYCIRC', 'COPYSYM',
       'COPYNUM', 'COPYHAND', 'COPYTIME', 'tmab_time', 'TRAAERRCOM','TRAAERROM', 'TRABERRCOM', 'TRABERROM','moca_naming','moca_repeat','moca_fluency',
       'moca_similarities','ADAS_Q2', 'ADAS_Q5','ADAS_Q10','ADAS_Q11','ADAS_Q12','MMBALL', 'MMFLAG', 'MMTREE',
       'MMWATCH', 'MMPENCIL','MMREPEAT','MMREAD', 'MMWRITE', 'CATANIMSC', 'CATVEGESC', 'moca_visuo_exec','ADAS_Q3','ADAS_Q6','ADAS_Q13','MMHAND', 'MMFOLD',
       'MMONFLR','nbspan_backward','faq1','faq2','faq3', 'faq4','faq5','faq6','faq10','PXGENAPP', 'PXHEADEY', 'PXNECK', 'PXCHEST',
       'PXHEART', 'PXABDOM', 'PXEXTREM', 'PXPERIPH', 'PXSKIN', 'PXMUSCUL', 'e_memory_pt', 'e_lang_pt', 'e_visspat_pt', 'e_plan_pt', 'e_organ_pt',
       'e_divatt_pt','e_memory_cg', 'e_lang_cg', 'e_visspat_cg','e_plan_cg', 'e_organ_cg', 'e_divatt_cg']


categorical_features = np.in1d(features, ['MMONFLR', 'faq10', 'faq6', 'COPYSYM', 'COPYNUM', 'faq5', 'MMTREE', 'COPYHAND', 'moca_clock', 'MMSEASON', 'moca_letters', 
                                          'MMBALL', 'faq9', 'MMFLOOR', 'MMDRAW', 'MMMONTH', 'PXGENAPP', 'MMWATCH', 'CLOCKCIRC', 'faq8', 'MMHOSPIT', 'moca_naming', 
                                          'PXEXTREM', 'CLOCKNUM', 'PXMUSCUL', 'faq1', 'MMTREEDL', 'CLOCKTIME', 'PXABDOM', 'MMFLAG', 'COPYCIRC', 'MMAREA', 'faq3', 
                                          'moca_digits', 'CLOCKHAND', 'MMREAD', 'MMYEAR', 'MMREPEAT', 'moca_visuo_exec','MMHAND', 'MMBALLDL', 'PXCHEST', 'MMDATE', 
                                          'MMFLAGDL',  'MMSTATE','moca_repeat', 'MMFOLD', 'MMPENCIL', 'MMDAY',  'faq2', 'PXHEART', 'CLOCKSYM', 'faq4', 'moca_serial7', 
                                          'faq7', 'MMCITY', 'PXHEADEY', 'COPYTIME', 'PXPERIPH', 'PXSKIN', 'moca_fluency', 'moca_similarities', 'PXNECK', 'MMWRITE',
                                          'e_memory_pt', 'e_lang_pt', 'e_visspat_pt', 'e_plan_pt', 'e_organ_pt', 'e_divatt_pt','e_memory_cg', 'e_lang_cg', 
                                          'e_visspat_cg','e_plan_cg', 'e_organ_cg', 'e_divatt_cg'])


data = pd.read_csv('../data/interim/data_adni.csv')
X = data[features]
y = data['CDGLOBAL']

from sklearn.ensemble import RandomForestClassifier
rf0 = RandomForestClassifier(n_estimators=1500, max_features=.2, max_depth = 70, min_samples_split = 2, min_samples_leaf = 1, bootstrap=False, random_state=33433)
rf5 = RandomForestClassifier(n_estimators=400, max_features='log2', max_depth = 30, min_samples_split = 10, min_samples_leaf = 1, bootstrap=False, random_state=33433)
rf1 = RandomForestClassifier(n_estimators=1500, max_features='log2', max_depth = 30, min_samples_split = 5, min_samples_leaf = 2, bootstrap=False, random_state=33433)

df_mcn = splitrepeat_mcn(X, y, model_list=[rf0,rf5,rf1], splits=list(int(x)*42+42 for x in range(20)), repeats=list(int(x)*42+42 for x in range(20)),
                feature_list=[features,features,features], avg_strategy='weighted', num_classes=3, class_labels=None, imbalanced='over', initial_split_seed=33433, initial_split_ratio=.25)

display(df_mcn.describe())

### Save outputs ###
q = 'CDR_UMCN_AllFeatures_400'

df_mcn['set'] = q
df_mcn.to_csv(('../models/outputs/' + q + '.csv'), index=False)

Unnamed: 0,Sensitivity0,Specificity0,PPV0,NPV0,Accuracy0,Sensitivity1,Specificity1,PPV1,NPV1,Accuracy1,...,Specificity2,PPV2,NPV2,Accuracy2,Sensitivity,Specificity,PPV,NPV,F1_Score,Accuracy
count,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,...,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0
mean,0.892005,0.902901,0.875327,0.915046,0.897483,0.860193,0.886509,0.889439,0.854949,0.872194,...,0.98174,0.699118,0.991625,0.974711,0.915046,0.875327,0.902901,0.892005,0.897351,0.897483
std,0.027947,0.01557,0.023086,0.02548,0.014081,0.018123,0.028303,0.031681,0.022543,0.016944,...,0.004603,0.077395,0.004654,0.006225,0.02548,0.023086,0.01557,0.027947,0.014027,0.014081
min,0.835714,0.869318,0.823077,0.859756,0.870748,0.819355,0.832258,0.823129,0.809524,0.836735,...,0.968198,0.470588,0.981949,0.959184,0.859756,0.823077,0.869318,0.835714,0.870383,0.870748
25%,0.877628,0.892216,0.861538,0.902439,0.887755,0.847682,0.863946,0.870748,0.836735,0.860544,...,0.978799,0.647059,0.98917,0.969388,0.902439,0.861538,0.892216,0.877628,0.887799,0.887755
50%,0.887218,0.903903,0.876923,0.908537,0.894558,0.859873,0.882759,0.884354,0.857143,0.870748,...,0.982079,0.705882,0.99278,0.97619,0.908537,0.876923,0.903903,0.887218,0.894599,0.894558
75%,0.915254,0.913872,0.892308,0.939024,0.908163,0.873263,0.912567,0.918367,0.870748,0.884354,...,0.985573,0.764706,0.99639,0.979592,0.939024,0.892308,0.913872,0.915254,0.907952,0.908163
max,0.958333,0.936709,0.923077,0.969512,0.931973,0.901408,0.955556,0.959184,0.904762,0.918367,...,0.989247,0.823529,1.0,0.986395,0.969512,0.923077,0.936709,0.958333,0.931703,0.931973


## 3-class BorutaSHAP MCN

In [2]:
features=['moca_digits','moca_letters','moca_serial7','MMSPELL_early','MMSPELL_late','ADAS_Q1','moca_orient','ADAS_Q7','ADAS_Q8','ADAS_Q9','MMDATE',
       'MMYEAR', 'MMMONTH', 'MMDAY', 'MMSEASON', 'MMHOSPIT', 'MMFLOOR','MMCITY', 'MMAREA', 'MMSTATE','nbspan_forward',
       'faq7','faq8','moca_recall','ADAS_Q4','MMBALLDL', 'MMFLAGDL', 'MMTREEDL', 'LDELTOTAL', 'AVRECALL', 'AVDEL30MIN', 'AVDELTOT', 'AVDELERR2',
       'faq9','moca_clock','MMDRAW','CLOCKCIRC', 'CLOCKSYM','CLOCKNUM', 'CLOCKHAND', 'CLOCKTIME', 'COPYCIRC', 'COPYSYM',
       'COPYNUM', 'COPYHAND', 'COPYTIME', 'tmab_time', 'TRAAERRCOM','TRAAERROM', 'TRABERRCOM', 'TRABERROM','moca_naming','moca_repeat','moca_fluency',
       'moca_similarities','ADAS_Q2', 'ADAS_Q5','ADAS_Q10','ADAS_Q11','ADAS_Q12','MMBALL', 'MMFLAG', 'MMTREE',
       'MMWATCH', 'MMPENCIL','MMREPEAT','MMREAD', 'MMWRITE', 'CATANIMSC', 'CATVEGESC', 'moca_visuo_exec','ADAS_Q3','ADAS_Q6','ADAS_Q13','MMHAND', 'MMFOLD',
       'MMONFLR','nbspan_backward','faq1','faq2','faq3', 'faq4','faq5','faq6','faq10','PXGENAPP', 'PXHEADEY', 'PXNECK', 'PXCHEST',
       'PXHEART', 'PXABDOM', 'PXEXTREM', 'PXPERIPH', 'PXSKIN', 'PXMUSCUL', 'e_memory_pt', 'e_lang_pt', 'e_visspat_pt', 'e_plan_pt', 'e_organ_pt',
       'e_divatt_pt','e_memory_cg', 'e_lang_cg', 'e_visspat_cg','e_plan_cg', 'e_organ_cg', 'e_divatt_cg']



features_0 = ['ADAS_Q4', 'e_organ_cg', 'e_memory_cg', 'ADAS_Q1', 'e_plan_cg', 'tmab_time', 'AVDEL30MIN', 'faq1', 'faq10', 'moca_recall', 'faq2', 'LDELTOTAL', 
              'e_lang_pt', 'e_memory_pt', 'faq9', 'e_lang_cg', 'e_divatt_cg']

features_5 = ['faq8', 'moca_orient', 'AVDELTOT', 'e_plan_pt', 'MMTREEDL', 'e_lang_cg', 'ADAS_Q11', 'LDELTOTAL', 'e_divatt_cg', 'e_memory_pt', 'faq4', 'faq1', 'e_plan_cg', 
              'ADAS_Q7', 'ADAS_Q8', 'faq3', 'e_visspat_pt', 'AVDEL30MIN', 'e_visspat_cg', 'faq2', 'e_organ_cg', 'moca_recall', 'faq10', 'e_lang_pt', 'CATANIMSC', 
              'e_memory_cg', 'ADAS_Q1', 'faq9', 'ADAS_Q4', 'tmab_time', 'e_divatt_pt']

features_1 = ['e_visspat_cg', 'LDELTOTAL', 'ADAS_Q12', 'ADAS_Q9', 'ADAS_Q4', 'AVDELTOT', 'moca_clock', 'MMDAY', 'CLOCKTIME', 'moca_serial7', 'MMBALLDL', 'MMTREEDL', 
              'AVRECALL', 'faq7', 'faq1', 'CATANIMSC', 'TRABERRCOM', 'ADAS_Q5', 'MMMONTH', 'MMFLOOR', 'nbspan_forward', 'ADAS_Q11', 'faq8', 'faq6', 'faq3', 
              'ADAS_Q13', 'nbspan_backward', 'PXHEADEY', 'faq10', 'e_plan_pt', 'e_divatt_pt', 'MMFLAGDL', 'ADAS_Q7', 'AVDEL30MIN', 'CLOCKSYM', 'ADAS_Q1', 
              'tmab_time', 'faq9', 'moca_visuo_exec', 'ADAS_Q2', 'moca_recall', 'faq2', 'faq4', 'TRABERROM', 'moca_similarities', 'e_memory_cg', 'TRAAERRCOM', 
              'moca_orient', 'MMSPELL_late', 'ADAS_Q10', 'PXSKIN', 'AVDELERR2', 'e_divatt_cg', 'ADAS_Q8', 'e_organ_cg', 'CATVEGESC', 'e_plan_cg', 'MMDRAW', 
              'COPYTIME', 'ADAS_Q3', 'ADAS_Q6', 'PXHEART', 'MMDATE', 'e_lang_cg']



categorical_features = np.in1d(features, ['MMONFLR', 'faq10', 'faq6', 'COPYSYM', 'COPYNUM', 'faq5', 'MMTREE', 'COPYHAND', 'moca_clock', 'MMSEASON', 'moca_letters', 
                                          'MMBALL', 'faq9', 'MMFLOOR', 'MMDRAW', 'MMMONTH', 'PXGENAPP', 'MMWATCH', 'CLOCKCIRC', 'faq8', 'MMHOSPIT', 'moca_naming', 
                                          'PXEXTREM', 'CLOCKNUM', 'PXMUSCUL', 'faq1', 'MMTREEDL', 'CLOCKTIME', 'PXABDOM', 'MMFLAG', 'COPYCIRC', 'MMAREA', 'faq3', 
                                          'moca_digits', 'CLOCKHAND', 'MMREAD', 'MMYEAR', 'MMREPEAT', 'moca_visuo_exec','MMHAND', 'MMBALLDL', 'PXCHEST', 'MMDATE', 
                                          'MMFLAGDL',  'MMSTATE','moca_repeat', 'MMFOLD', 'MMPENCIL', 'MMDAY',  'faq2', 'PXHEART', 'CLOCKSYM', 'faq4', 'moca_serial7', 
                                          'faq7', 'MMCITY', 'PXHEADEY', 'COPYTIME', 'PXPERIPH', 'PXSKIN', 'moca_fluency', 'moca_similarities', 'PXNECK', 'MMWRITE',
                                          'e_memory_pt', 'e_lang_pt', 'e_visspat_pt', 'e_plan_pt', 'e_organ_pt', 'e_divatt_pt','e_memory_cg', 'e_lang_cg', 
                                          'e_visspat_cg','e_plan_cg', 'e_organ_cg', 'e_divatt_cg'])

data = pd.read_csv('../data/interim/data_adni.csv')
X = data[features]
y = data['CDGLOBAL']

from sklearn.ensemble import RandomForestClassifier
rf0 = RandomForestClassifier(n_estimators=800, max_features=.3, max_depth = 20, min_samples_split = 10, min_samples_leaf = 1, bootstrap=True, random_state=33433)
rf5 = RandomForestClassifier(n_estimators=90, max_features= 'log2', max_depth = 90, min_samples_split = 2, min_samples_leaf = 1, bootstrap=False, random_state=33433)
rf1 = RandomForestClassifier(n_estimators=600, max_features='sqrt', max_depth = 70, min_samples_split = 2, min_samples_leaf = 1, bootstrap=True, random_state=33433)

%time \
df_mcn = splitrepeat_mcn(X, y, model_list=[rf0,rf5,rf1], splits=list(int(x)*42+42 for x in range(20)), repeats=list(int(x)*42+42 for x in range(20)),\
                feature_list=[features_0,features_5,features_1], num_classes=3, class_labels=None, avg_strategy='weighted', imbalanced='over', initial_split_seed=33433, initial_split_ratio=.25)
display(df_mcn.describe())

### Save outputs ###
q = 'CDR_UMCN_BorutaSHAP_400'

df_mcn['set'] = q
df_mcn.to_csv(('../models/outputs/' + q + '.csv'), index=False)

Wall time: 25min 39s


Unnamed: 0,Sensitivity0,Specificity0,PPV0,NPV0,Accuracy0,Sensitivity1,Specificity1,PPV1,NPV1,Accuracy1,...,Specificity2,PPV2,NPV2,Accuracy2,Sensitivity,Specificity,PPV,NPV,F1_Score,Accuracy
count,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,...,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0
mean,0.87125,0.909634,0.884998,0.899423,0.892662,0.877381,0.851871,0.855956,0.874787,0.864626,...,0.98843,0.79457,0.981954,0.971964,0.909634,0.87125,0.899423,0.884998,0.892546,0.892662
std,0.023549,0.022442,0.024766,0.016438,0.015227,0.027847,0.024895,0.021039,0.024962,0.018664,...,0.005383,0.084539,0.00476,0.007155,0.022442,0.023549,0.016438,0.024766,0.015216,0.015227
min,0.823077,0.853659,0.827338,0.863095,0.857143,0.809524,0.795918,0.811688,0.820513,0.826531,...,0.974729,0.588235,0.97153,0.952381,0.853659,0.823077,0.863095,0.827338,0.856887,0.857143
25%,0.846154,0.896341,0.872,0.883721,0.880952,0.857143,0.829932,0.836601,0.857143,0.846939,...,0.98556,0.75,0.978495,0.965986,0.896341,0.846154,0.883721,0.872,0.880549,0.880952
50%,0.876923,0.908537,0.885246,0.90214,0.891156,0.877551,0.857143,0.859155,0.872412,0.860544,...,0.98917,0.8,0.982014,0.972789,0.908537,0.876923,0.90214,0.885246,0.891309,0.891156
75%,0.892308,0.920732,0.899419,0.911243,0.904762,0.897959,0.870748,0.873333,0.894366,0.880952,...,0.99278,0.833333,0.985612,0.97619,0.920732,0.892308,0.911243,0.899419,0.904896,0.904762
max,0.923077,0.95122,0.935484,0.936709,0.928571,0.931973,0.904762,0.900662,0.927536,0.911565,...,1.0,1.0,0.99278,0.986395,0.95122,0.923077,0.936709,0.935484,0.928478,0.928571


## 3-class Manual-4/8+ECOG MCN

In [3]:
features=['moca_digits','moca_letters','moca_serial7','MMSPELL_early','MMSPELL_late','ADAS_Q1','moca_orient','ADAS_Q7','ADAS_Q8','ADAS_Q9','MMDATE',
       'MMYEAR', 'MMMONTH', 'MMDAY', 'MMSEASON', 'MMHOSPIT', 'MMFLOOR','MMCITY', 'MMAREA', 'MMSTATE','nbspan_forward',
       'faq7','faq8','moca_recall','ADAS_Q4','MMBALLDL', 'MMFLAGDL', 'MMTREEDL', 'LDELTOTAL', 'AVRECALL', 'AVDEL30MIN', 'AVDELTOT', 'AVDELERR2',
       'faq9','moca_clock','MMDRAW','CLOCKCIRC', 'CLOCKSYM','CLOCKNUM', 'CLOCKHAND', 'CLOCKTIME', 'COPYCIRC', 'COPYSYM',
       'COPYNUM', 'COPYHAND', 'COPYTIME', 'tmab_time', 'TRAAERRCOM','TRAAERROM', 'TRABERRCOM', 'TRABERROM','moca_naming','moca_repeat','moca_fluency',
       'moca_similarities','ADAS_Q2', 'ADAS_Q5','ADAS_Q10','ADAS_Q11','ADAS_Q12','MMBALL', 'MMFLAG', 'MMTREE',
       'MMWATCH', 'MMPENCIL','MMREPEAT','MMREAD', 'MMWRITE', 'CATANIMSC', 'CATVEGESC', 'moca_visuo_exec','ADAS_Q3','ADAS_Q6','ADAS_Q13','MMHAND', 'MMFOLD',
       'MMONFLR','nbspan_backward','faq1','faq2','faq3', 'faq4','faq5','faq6','faq10','PXGENAPP', 'PXHEADEY', 'PXNECK', 'PXCHEST',
       'PXHEART', 'PXABDOM', 'PXEXTREM', 'PXPERIPH', 'PXSKIN', 'PXMUSCUL', 'e_memory_pt', 'e_lang_pt', 'e_visspat_pt', 'e_plan_pt', 'e_organ_pt',
       'e_divatt_pt','e_memory_cg', 'e_lang_cg', 'e_visspat_cg','e_plan_cg', 'e_organ_cg', 'e_divatt_cg']


categorical_features = np.in1d(features, ['MMONFLR', 'faq10', 'faq6', 'COPYSYM', 'COPYNUM', 'faq5', 'MMTREE', 'COPYHAND', 'moca_clock', 'MMSEASON', 'moca_letters', 
                                          'MMBALL', 'faq9', 'MMFLOOR', 'MMDRAW', 'MMMONTH', 'PXGENAPP', 'MMWATCH', 'CLOCKCIRC', 'faq8', 'MMHOSPIT', 'moca_naming', 
                                          'PXEXTREM', 'CLOCKNUM', 'PXMUSCUL', 'faq1', 'MMTREEDL', 'CLOCKTIME', 'PXABDOM', 'MMFLAG', 'COPYCIRC', 'MMAREA', 'faq3', 
                                          'moca_digits', 'CLOCKHAND', 'MMREAD', 'MMYEAR', 'MMREPEAT', 'moca_visuo_exec','MMHAND', 'MMBALLDL', 'PXCHEST', 'MMDATE', 
                                          'MMFLAGDL',  'MMSTATE','moca_repeat', 'MMFOLD', 'MMPENCIL', 'MMDAY',  'faq2', 'PXHEART', 'CLOCKSYM', 'faq4', 'moca_serial7', 
                                          'faq7', 'MMCITY', 'PXHEADEY', 'COPYTIME', 'PXPERIPH', 'PXSKIN', 'moca_fluency', 'moca_similarities', 'PXNECK', 'MMWRITE',
                                          'e_memory_pt', 'e_lang_pt', 'e_visspat_pt', 'e_plan_pt', 'e_organ_pt', 'e_divatt_pt','e_memory_cg', 'e_lang_cg', 
                                          'e_visspat_cg','e_plan_cg', 'e_organ_cg', 'e_divatt_cg'])

features_0 = ['ADAS_Q1', 'ADAS_Q4', 'LDELTOTAL', 'tmab_time', 'e_memory_cg', 'e_memory_pt', 'e_lang_cg', 'e_divatt_cg']

features_5 = ['LDELTOTAL', 'e_memory_pt', 'tmab_time', 'e_memory_cg']

features_1 = ['ADAS_Q1', 'ADAS_Q4', 'LDELTOTAL','tmab_time', 
           'e_memory_pt', 'e_lang_pt', 'e_visspat_pt', 'e_plan_pt', 'e_organ_pt', 'e_divatt_pt','e_memory_cg', 'e_lang_cg', 
           'e_visspat_cg','e_plan_cg', 'e_organ_cg', 'e_divatt_cg']


data = pd.read_csv('../data/interim/data_adni.csv')
X = data[features]
y = data['CDGLOBAL']

from sklearn.ensemble import RandomForestClassifier
rf0 = RandomForestClassifier(n_estimators=100, max_features=.5, max_depth = 50, min_samples_split = 4, min_samples_leaf = 5, bootstrap=True, random_state=33433)
rf5 = RandomForestClassifier(n_estimators=200, max_features= .3, max_depth = 90, min_samples_split = 4, min_samples_leaf = 4, bootstrap=True, random_state=33433)
rf1 = RandomForestClassifier(n_estimators=300, max_features=.3, max_depth = 50, min_samples_split = 9, min_samples_leaf = 2, bootstrap=False, random_state=33433)

%time \
df_mcn = splitrepeat_mcn(X, y, model_list=[rf0,rf5,rf1], splits=list(int(x)*42+42 for x in range(20)), repeats=list(int(x)*42+42 for x in range(20)), feature_list=[features_0,features_5,features_1], avg_strategy='weighted', num_classes=3, class_labels=None, imbalanced='over', initial_split_seed=33433, initial_split_ratio=.25)
display(df_mcn.describe())

### Save outputs ###
q = 'CDR_UMCN_Boruta4_400'

df_mcn['set'] = q
df_mcn.to_csv(('../models/outputs/' + q + '.csv'), index=False)

Wall time: 10min 12s


Unnamed: 0,Sensitivity0,Specificity0,PPV0,NPV0,Accuracy0,Sensitivity1,Specificity1,PPV1,NPV1,Accuracy1,...,Specificity2,PPV2,NPV2,Accuracy2,Sensitivity,Specificity,PPV,NPV,F1_Score,Accuracy
count,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,...,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0
mean,0.851173,0.92846,0.904647,0.887529,0.894286,0.877993,0.813078,0.824799,0.869778,0.845536,...,0.977608,0.596823,0.970885,0.95125,0.92846,0.851173,0.887529,0.904647,0.893863,0.894286
std,0.026364,0.020469,0.025259,0.01812,0.017428,0.02194,0.025675,0.021052,0.021538,0.01885,...,0.00971,0.143637,0.007506,0.013318,0.020469,0.026364,0.01812,0.025259,0.017511,0.017428
min,0.784615,0.878049,0.850746,0.842697,0.857143,0.829932,0.755102,0.779141,0.827338,0.806122,...,0.956679,0.214286,0.95,0.914966,0.878049,0.784615,0.842697,0.850746,0.856002,0.857143
25%,0.838462,0.914634,0.88189,0.876471,0.880952,0.857143,0.795918,0.810127,0.853147,0.833333,...,0.971119,0.5,0.967972,0.942177,0.914634,0.838462,0.876471,0.88189,0.880647,0.880952
50%,0.846154,0.926829,0.903226,0.886955,0.891156,0.877551,0.809524,0.825806,0.868415,0.846939,...,0.978339,0.615385,0.97153,0.952381,0.926829,0.846154,0.886955,0.903226,0.89059,0.891156
75%,0.869231,0.945122,0.92437,0.9,0.911565,0.891156,0.829932,0.837662,0.882353,0.857143,...,0.98556,0.714286,0.975089,0.962585,0.945122,0.869231,0.9,0.92437,0.910858,0.911565
max,0.9,0.969512,0.956522,0.923077,0.928571,0.931973,0.870748,0.876623,0.925926,0.894558,...,0.99278,0.8,0.98556,0.972789,0.969512,0.9,0.923077,0.956522,0.928407,0.928571


## 3-class Manual-4+ECOG/FAQ MCN

In [4]:
features=['moca_digits','moca_letters','moca_serial7','MMSPELL_early','MMSPELL_late','ADAS_Q1','moca_orient','ADAS_Q7','ADAS_Q8','ADAS_Q9','MMDATE',
       'MMYEAR', 'MMMONTH', 'MMDAY', 'MMSEASON', 'MMHOSPIT', 'MMFLOOR','MMCITY', 'MMAREA', 'MMSTATE','nbspan_forward',
       'faq7','faq8','moca_recall','ADAS_Q4','MMBALLDL', 'MMFLAGDL', 'MMTREEDL', 'LDELTOTAL', 'AVRECALL', 'AVDEL30MIN', 'AVDELTOT', 'AVDELERR2',
       'faq9','moca_clock','MMDRAW','CLOCKCIRC', 'CLOCKSYM','CLOCKNUM', 'CLOCKHAND', 'CLOCKTIME', 'COPYCIRC', 'COPYSYM',
       'COPYNUM', 'COPYHAND', 'COPYTIME', 'tmab_time', 'TRAAERRCOM','TRAAERROM', 'TRABERRCOM', 'TRABERROM','moca_naming','moca_repeat','moca_fluency',
       'moca_similarities','ADAS_Q2', 'ADAS_Q5','ADAS_Q10','ADAS_Q11','ADAS_Q12','MMBALL', 'MMFLAG', 'MMTREE',
       'MMWATCH', 'MMPENCIL','MMREPEAT','MMREAD', 'MMWRITE', 'CATANIMSC', 'CATVEGESC', 'moca_visuo_exec','ADAS_Q3','ADAS_Q6','ADAS_Q13','MMHAND', 'MMFOLD',
       'MMONFLR','nbspan_backward','faq1','faq2','faq3', 'faq4','faq5','faq6','faq10','PXGENAPP', 'PXHEADEY', 'PXNECK', 'PXCHEST',
       'PXHEART', 'PXABDOM', 'PXEXTREM', 'PXPERIPH', 'PXSKIN', 'PXMUSCUL', 'e_memory_pt', 'e_lang_pt', 'e_visspat_pt', 'e_plan_pt', 'e_organ_pt',
       'e_divatt_pt','e_memory_cg', 'e_lang_cg', 'e_visspat_cg','e_plan_cg', 'e_organ_cg', 'e_divatt_cg']


features_0 = ['ADAS_Q1', 'ADAS_Q4', 'LDELTOTAL', 'AVDEL30MIN', 'tmab_time', 'e_memory_cg', 'e_lang_cg', 'e_divatt_cg']

features_5 = ['ADAS_Q1', 'ADAS_Q4', 'LDELTOTAL', 'AVDEL30MIN', 'faq9', 'tmab_time', 'faq2', 'faq3', 'faq10', 'e_memory_pt', 'e_plan_pt', 
              'e_memory_cg', 'e_lang_cg', 'e_plan_cg', 'e_divatt_cg']

features_1 = ['ADAS_Q1', 'ADAS_Q4', 'LDELTOTAL', 'AVDEL30MIN','tmab_time', 
           'e_memory_pt', 'e_lang_pt', 'e_visspat_pt', 'e_plan_pt', 'e_organ_pt', 'e_divatt_pt','e_memory_cg', 'e_lang_cg', 
           'e_visspat_cg','e_plan_cg', 'e_organ_cg', 'e_divatt_cg', 'faq1','faq2','faq3','faq4','faq5','faq6','faq7','faq8','faq9','faq10']

categorical_features = np.in1d(features, ['MMONFLR', 'faq10', 'faq6', 'COPYSYM', 'COPYNUM', 'faq5', 'MMTREE', 'COPYHAND', 'moca_clock', 'MMSEASON', 'moca_letters', 
                                          'MMBALL', 'faq9', 'MMFLOOR', 'MMDRAW', 'MMMONTH', 'PXGENAPP', 'MMWATCH', 'CLOCKCIRC', 'faq8', 'MMHOSPIT', 'moca_naming', 
                                          'PXEXTREM', 'CLOCKNUM', 'PXMUSCUL', 'faq1', 'MMTREEDL', 'CLOCKTIME', 'PXABDOM', 'MMFLAG', 'COPYCIRC', 'MMAREA', 'faq3', 
                                          'moca_digits', 'CLOCKHAND', 'MMREAD', 'MMYEAR', 'MMREPEAT', 'moca_visuo_exec','MMHAND', 'MMBALLDL', 'PXCHEST', 'MMDATE', 
                                          'MMFLAGDL',  'MMSTATE','moca_repeat', 'MMFOLD', 'MMPENCIL', 'MMDAY',  'faq2', 'PXHEART', 'CLOCKSYM', 'faq4', 'moca_serial7', 
                                          'faq7', 'MMCITY', 'PXHEADEY', 'COPYTIME', 'PXPERIPH', 'PXSKIN', 'moca_fluency', 'moca_similarities', 'PXNECK', 'MMWRITE',
                                          'e_memory_pt', 'e_lang_pt', 'e_visspat_pt', 'e_plan_pt', 'e_organ_pt', 'e_divatt_pt','e_memory_cg', 'e_lang_cg', 
                                          'e_visspat_cg','e_plan_cg', 'e_organ_cg', 'e_divatt_cg'])

data = pd.read_csv('../data/interim/data_adni.csv')
X = data[features]
y = data['CDGLOBAL']

from sklearn.ensemble import RandomForestClassifier
rf0 = RandomForestClassifier(n_estimators=100, max_features=.5, max_depth = 50, min_samples_split = 4, min_samples_leaf = 5, bootstrap=True, random_state=33433)
rf5 = RandomForestClassifier(n_estimators=200, max_features= .3, max_depth = 90, min_samples_split = 4, min_samples_leaf = 4, bootstrap=True, random_state=33433)
rf1 = RandomForestClassifier(n_estimators=300, max_features=.3, max_depth = 50, min_samples_split = 9, min_samples_leaf = 2, bootstrap=False, random_state=33433)

%time \
df_mcn = splitrepeat_mcn(X, y, model_list=[rf0,rf5,rf1], splits=list(int(x)*42+42 for x in range(20)), repeats=list(int(x)*42+42 for x in range(20)), feature_list=[features_0,features_5,features_1], num_classes=3, class_labels=None, avg_strategy='weighted', imbalanced='over', initial_split_seed=33433, initial_split_ratio=.25)
display(df_mcn.describe())

### Save outputs ###
q = 'CDR_UMCN_BorutaECOGFAQ_400'

df_mcn['set'] = q
df_mcn.to_csv(('../models/outputs/' + q + '.csv'), index=False)

Wall time: 14min 33s


Unnamed: 0,Sensitivity0,Specificity0,PPV0,NPV0,Accuracy0,Sensitivity1,Specificity1,PPV1,NPV1,Accuracy1,...,Specificity2,PPV2,NPV2,Accuracy2,Sensitivity,Specificity,PPV,NPV,F1_Score,Accuracy
count,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,...,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0
mean,0.843673,0.919665,0.893355,0.881547,0.886063,0.873759,0.828214,0.835963,0.868402,0.850986,...,0.980569,0.703887,0.982191,0.964923,0.919665,0.843673,0.881547,0.893355,0.885637,0.886063
std,0.02636,0.02098,0.02564,0.017999,0.016931,0.030447,0.024918,0.021614,0.028624,0.021401,...,0.009091,0.104886,0.004716,0.009973,0.02098,0.02636,0.017999,0.02564,0.016987,0.016931
min,0.792308,0.878049,0.84252,0.845714,0.853741,0.802721,0.761905,0.771242,0.794326,0.782313,...,0.956679,0.368421,0.963636,0.92517,0.878049,0.792308,0.845714,0.84252,0.852884,0.853741
25%,0.823077,0.902439,0.872982,0.867052,0.873299,0.85034,0.809524,0.821192,0.84876,0.836735,...,0.971119,0.625,0.978552,0.959184,0.902439,0.823077,0.867052,0.872982,0.872446,0.873299
50%,0.846154,0.914634,0.890625,0.88024,0.884354,0.877551,0.823129,0.833333,0.869565,0.85034,...,0.981949,0.705882,0.982014,0.965986,0.914634,0.846154,0.88024,0.890625,0.884027,0.884354
75%,0.861538,0.932927,0.912175,0.895438,0.901361,0.891156,0.843537,0.851673,0.886698,0.867347,...,0.98917,0.8,0.985612,0.972789,0.932927,0.861538,0.895438,0.912175,0.901027,0.901361
max,0.9,0.963415,0.946903,0.921212,0.92517,0.938776,0.884354,0.882759,0.933333,0.901361,...,0.99639,0.923077,0.992593,0.982993,0.963415,0.9,0.921212,0.946903,0.924873,0.92517
