In [4]:
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, cross_val_predict, StratifiedKFold, train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, classification_report, multilabel_confusion_matrix
import tensorflow as tf
import datetime, os
from tensorflow.keras.layers import Input, Dense, Activation, Dropout, Flatten, concatenate
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.constraints import MaxNorm
from sklearn.model_selection import GridSearchCV
from splitrepeat import splitrepeat_cv

features=['moca_digits','moca_letters','moca_serial7','MMSPELL_early','MMSPELL_late','ADAS_Q1','moca_orient','ADAS_Q7','ADAS_Q8','ADAS_Q9','MMDATE',
       'MMYEAR', 'MMMONTH', 'MMDAY', 'MMSEASON', 'MMHOSPIT', 'MMFLOOR','MMCITY', 'MMAREA', 'MMSTATE','nbspan_forward',
       'faq7','faq8','moca_recall','ADAS_Q4','MMBALLDL', 'MMFLAGDL', 'MMTREEDL', 'LDELTOTAL', 'AVRECALL', 'AVDEL30MIN', 'AVDELTOT', 'AVDELERR2',
       'faq9','moca_clock','MMDRAW','CLOCKCIRC', 'CLOCKSYM','CLOCKNUM', 'CLOCKHAND', 'CLOCKTIME', 'COPYCIRC', 'COPYSYM',
       'COPYNUM', 'COPYHAND', 'COPYTIME', 'tmab_time', 'TRAAERRCOM','TRAAERROM', 'TRABERRCOM', 'TRABERROM','moca_naming','moca_repeat','moca_fluency',
       'moca_similarities','ADAS_Q2', 'ADAS_Q5','ADAS_Q10','ADAS_Q11','ADAS_Q12','MMBALL', 'MMFLAG', 'MMTREE',
       'MMWATCH', 'MMPENCIL','MMREPEAT','MMREAD', 'MMWRITE', 'CATANIMSC', 'CATVEGESC', 'moca_visuo_exec','ADAS_Q3','ADAS_Q6','ADAS_Q13','MMHAND', 'MMFOLD',
       'MMONFLR','nbspan_backward','faq1','faq2','faq3', 'faq4','faq5','faq6','faq10','PXGENAPP', 'PXHEADEY', 'PXNECK', 'PXCHEST',
       'PXHEART', 'PXABDOM', 'PXEXTREM', 'PXPERIPH', 'PXSKIN', 'PXMUSCUL', 'e_memory_pt', 'e_lang_pt', 'e_visspat_pt', 'e_plan_pt', 'e_organ_pt',
       'e_divatt_pt','e_memory_cg', 'e_lang_cg', 'e_visspat_cg','e_plan_cg', 'e_organ_cg', 'e_divatt_cg']


categorical_features = np.in1d(features, ['MMONFLR', 'faq10', 'faq6', 'COPYSYM', 'COPYNUM', 'faq5', 'MMTREE', 'COPYHAND', 'moca_clock', 'MMSEASON', 'moca_letters', 
                                          'MMBALL', 'faq9', 'MMFLOOR', 'MMDRAW', 'MMMONTH', 'PXGENAPP', 'MMWATCH', 'CLOCKCIRC', 'faq8', 'MMHOSPIT', 'moca_naming', 
                                          'PXEXTREM', 'CLOCKNUM', 'PXMUSCUL', 'faq1', 'MMTREEDL', 'CLOCKTIME', 'PXABDOM', 'MMFLAG', 'COPYCIRC', 'MMAREA', 'faq3', 
                                          'moca_digits', 'CLOCKHAND', 'MMREAD', 'MMYEAR', 'MMREPEAT', 'moca_visuo_exec','MMHAND', 'MMBALLDL', 'PXCHEST', 'MMDATE', 
                                          'MMFLAGDL',  'MMSTATE','moca_repeat', 'MMFOLD', 'MMPENCIL', 'MMDAY',  'faq2', 'PXHEART', 'CLOCKSYM', 'faq4', 'moca_serial7', 
                                          'faq7', 'MMCITY', 'PXHEADEY', 'COPYTIME', 'PXPERIPH', 'PXSKIN', 'moca_fluency', 'moca_similarities', 'PXNECK', 'MMWRITE',
                                          'e_memory_pt', 'e_lang_pt', 'e_visspat_pt', 'e_plan_pt', 'e_organ_pt', 'e_divatt_pt','e_memory_cg', 'e_lang_cg', 
                                          'e_visspat_cg','e_plan_cg', 'e_organ_cg', 'e_divatt_cg'])


data = pd.read_csv('../data/processed/data_adni.csv')

# CDR - detect DX

In [25]:
features = ['CDSOURCE', 'CDVERSION', 'CDMEMORY',
       'CDORIENT', 'CDJUDGE', 'CDCOMMUN', 'CDHOME', 'CDCARE']

X = data[features]
y = data['DX']

In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV

X, X_test, y, y_test = train_test_split(X, y, train_size=0.25, random_state=33433, stratify=y)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.25, random_state=33433, stratify=y)    
rf = RandomForestClassifier(n_estimators=100, max_features=.5, random_state=0)

# Search parameters
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
max_features = ['sqrt', 'log2', .3, .5, .7, .9]
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]

search_params = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
rf_search_rand = RandomizedSearchCV(rf, search_params, n_iter=400,cv=3,verbose=2,random_state=33433, n_jobs=-1)
rf_search_rand.fit(X_train, y_train)
rf_search_rand.best_params_

Fitting 3 folds for each of 400 candidates, totalling 1200 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    7.5s
[Parallel(n_jobs=-1)]: Done 138 tasks      | elapsed:   35.6s
[Parallel(n_jobs=-1)]: Done 341 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 624 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 989 tasks      | elapsed:  4.0min
[Parallel(n_jobs=-1)]: Done 1200 out of 1200 | elapsed:  4.8min finished


{'n_estimators': 200,
 'min_samples_split': 2,
 'min_samples_leaf': 1,
 'max_features': 0.7,
 'max_depth': 70,
 'bootstrap': True}

In [13]:
search_params = {'n_estimators': [100, 200, 300],
               'max_features': [.6, .7, .8],
               'max_depth': [60, 70, 80],
               'min_samples_split': [1, 2, 3],
               'min_samples_leaf': [1, 2],
               'bootstrap': [False, True]}

rf_search_grid = GridSearchCV(rf, search_params, 
                          cv = 3, n_jobs = -1, verbose = 2)
rf_search_grid.fit(X_train, y_train)
rf_search_grid.best_params_

Fitting 3 folds for each of 324 candidates, totalling 972 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    3.1s
[Parallel(n_jobs=-1)]: Done 138 tasks      | elapsed:    7.5s
[Parallel(n_jobs=-1)]: Done 341 tasks      | elapsed:   15.2s
[Parallel(n_jobs=-1)]: Done 624 tasks      | elapsed:   26.4s
[Parallel(n_jobs=-1)]: Done 972 out of 972 | elapsed:   41.3s finished


{'bootstrap': True,
 'max_depth': 60,
 'max_features': 0.7,
 'min_samples_leaf': 2,
 'min_samples_split': 2,
 'n_estimators': 300}

In [26]:
best = RandomForestClassifier(n_estimators=300, max_features=.7, max_depth = 60, min_samples_split = 2, min_samples_leaf = 2, bootstrap=True, random_state=33433)
df = splitrepeat_cv(X,y,best,splits=list(int(x)*42+42 for x in range(10)),repeats=list(int(x)*42+42 for x in range(10)), num_classes=3)
display(df.describe())

### Save outputs ###
q = 'CExam_CDR_0'

df['set'] = q
df.to_csv(('../models/outputs/' + q + '.csv'), index=False)

Unnamed: 0,Sensitivity0,Specificity0,Accuracy0,AUC0,Sensitivity1,Specificity1,Accuracy1,AUC1,Sensitivity2,Specificity2,Accuracy2,AUC2,Sensitivity,Specificity,Accuracy,AUC
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,0.999432,0.995833,0.997449,0.997633,0.94977,0.952156,0.951097,0.950963,0.754048,0.9776,0.953648,0.865824,0.901083,0.975196,0.967398,0.990004
std,0.001713,0.003257,0.001986,0.001876,0.013878,0.013634,0.008519,0.008454,0.072744,0.007463,0.008581,0.035419,0.023183,0.0046,0.00568,0.00254
min,0.994318,0.990741,0.994898,0.994844,0.925287,0.917431,0.931122,0.932854,0.571429,0.965714,0.933673,0.774286,0.839901,0.963315,0.954082,0.983432
25%,1.0,0.99537,0.994898,0.99537,0.936782,0.949541,0.946429,0.945455,0.738095,0.974286,0.94898,0.85619,0.891626,0.973052,0.964286,0.988605
50%,1.0,0.99537,0.997449,0.997685,0.951149,0.954128,0.951531,0.949489,0.761905,0.977143,0.951531,0.876667,0.909562,0.974971,0.967687,0.989261
75%,1.0,1.0,1.0,1.0,0.95977,0.963303,0.961735,0.960956,0.809524,0.98,0.961735,0.892262,0.917219,0.978766,0.97449,0.991715
max,1.0,1.0,1.0,1.0,0.971264,0.972477,0.961735,0.962696,0.857143,0.991429,0.966837,0.911429,0.929392,0.981678,0.97449,0.994465


# MMSE - detect CDR

In [5]:
features = ['MMDATE', 'MMYEAR', 'MMMONTH',
       'MMDAY', 'MMSEASON', 'MMHOSPIT', 'MMFLOOR', 'MMCITY', 'MMAREA',
       'MMSTATE', 'MMBALL', 'MMFLAG', 'MMTREE', 'MMBALLDL', 'MMFLAGDL',
       'MMTREEDL', 'MMWATCH', 'MMPENCIL', 'MMREPEAT', 'MMHAND', 'MMFOLD',
       'MMONFLR', 'MMREAD', 'MMWRITE', 'MMDRAW','MMSPELL_early', 'MMSPELL_late']

X = data[features]
y = data['CDGLOBAL']

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV

X, X_test, y, y_test = train_test_split(X, y, train_size=0.25, random_state=33433, stratify=y)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.25, random_state=33433, stratify=y)    
rf = RandomForestClassifier(n_estimators=100, max_features=.5, random_state=0)

# Search parameters
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
max_features = ['sqrt', 'log2', .3, .5, .7, .9]
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]

search_params = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
rf_search_rand = RandomizedSearchCV(rf, search_params, n_iter=400,cv=3,verbose=2,random_state=33433, n_jobs=-1)
rf_search_rand.fit(X_train, y_train)
rf_search_rand.best_params_

In [None]:
search_params = {'n_estimators': [700, 800, 900],
               'max_features': [.4, .5, .6],
               'max_depth': [40, 50, 60],
               'min_samples_split': [1, 2, 3],
               'min_samples_leaf': [1, 2, 3],
               'bootstrap': [False, True]}

rf_search_grid = GridSearchCV(rf, search_params, 
                          cv = 3, n_jobs = -1, verbose = 2)
rf_search_grid.fit(X_train, y_train)
rf_search_grid.best_params_

In [6]:
best = RandomForestClassifier(n_estimators=800, max_features=.4, max_depth = 40, min_samples_split = 3, min_samples_leaf = 1, bootstrap=True, random_state=33433)
df = splitrepeat_cv(X,y,best,splits=list(int(x)*42+42 for x in range(20)),repeats=list(int(x)*42+42 for x in range(20)), num_classes=3, imbalanced='over', avg_strategy='weighted', initial_split_seed=33433, initial_split_ratio=.25)
display(df.describe())

### Save outputs ###
q = 'CExam_MMSE_400'

df['set'] = q
df.to_csv(('../models/outputs/' + q + '.csv'), index=False)

Unnamed: 0,Sensitivity0,Specificity0,PPV0,NPV0,Accuracy0,Sensitivity1,Specificity1,PPV1,NPV1,Accuracy1,...,Specificity2,PPV2,NPV2,Accuracy2,Sensitivity,Specificity,PPV,NPV,F1_Score,Accuracy
count,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,...,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0
mean,0.781269,0.611936,0.615149,0.780151,0.686811,0.535765,0.741939,0.675729,0.615364,0.638852,...,0.979585,0.548143,0.962578,0.944864,0.611936,0.781269,0.780151,0.615149,0.68642,0.686811
std,0.040302,0.034922,0.023772,0.033501,0.025572,0.035808,0.035439,0.032106,0.019554,0.022937,...,0.008657,0.135661,0.005575,0.010599,0.034922,0.040302,0.033501,0.023772,0.025715,0.025572
min,0.684615,0.518293,0.563536,0.713178,0.629252,0.44898,0.653061,0.62406,0.57672,0.598639,...,0.960289,0.285714,0.951557,0.921769,0.518293,0.684615,0.713178,0.563536,0.629102,0.629252
25%,0.753846,0.591463,0.6,0.755556,0.670068,0.508503,0.721088,0.650466,0.597811,0.619048,...,0.973827,0.4375,0.958188,0.935374,0.591463,0.753846,0.755556,0.6,0.670194,0.670068
50%,0.776923,0.609756,0.617284,0.773958,0.683673,0.537415,0.734694,0.666667,0.615819,0.636054,...,0.978339,0.545455,0.961404,0.945578,0.609756,0.776923,0.773958,0.617284,0.684675,0.683673
75%,0.807692,0.634146,0.631016,0.804735,0.707483,0.564626,0.761905,0.696429,0.62963,0.656463,...,0.98556,0.666667,0.964912,0.952381,0.634146,0.807692,0.804735,0.631016,0.706745,0.707483
max,0.876923,0.70122,0.668919,0.855856,0.734694,0.619048,0.829932,0.75,0.655172,0.687075,...,0.99278,0.8,0.978495,0.965986,0.70122,0.876923,0.855856,0.668919,0.734227,0.734694


# MOCA - detect CDR

In [7]:
features = ['moca_visuo_exec', 'moca_clock',
       'moca_naming', 'moca_digits', 'moca_letters', 'moca_serial7',
       'moca_repeat', 'moca_fluency', 'moca_similarities', 'moca_recall',
       'moca_orient']

X = data[features]
y = data['CDGLOBAL']

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV

X, X_test, y, y_test = train_test_split(X, y, train_size=0.25, random_state=33433, stratify=y)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.25, random_state=33433, stratify=y)    
rf = RandomForestClassifier(n_estimators=100, max_features=.5, random_state=0)

# Search parameters
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
max_features = ['sqrt', 'log2', .3, .5, .7, .9]
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]

search_params = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
rf_search_rand = RandomizedSearchCV(rf, search_params, n_iter=400,cv=3,verbose=2,random_state=33433, n_jobs=-1)
rf_search_rand.fit(X_train, y_train)
rf_search_rand.best_params_

In [None]:
search_params = {'n_estimators': [100, 200, 300],
               'max_features': [.2, 'sqrt', 'log2'],
               'max_depth': [90, 100, 110],
               'min_samples_split': [8, 9, 10],
               'min_samples_leaf': [3, 4, 5],
               'bootstrap': [False, True]}

rf_search_grid = GridSearchCV(rf, search_params, 
                          cv = 3, n_jobs = -1, verbose = 2)
rf_search_grid.fit(X_train, y_train)
rf_search_grid.best_params_

In [8]:
best = RandomForestClassifier(n_estimators=100, max_features=.2, max_depth = 90, min_samples_split = 8, min_samples_leaf = 5, bootstrap=True, random_state=33433)
df = splitrepeat_cv(X,y,best,splits=list(int(x)*42+42 for x in range(20)),repeats=list(int(x)*42+42 for x in range(20)), num_classes=3, avg_strategy='weighted', imbalanced='over', initial_split_seed=33433, initial_split_ratio=.25)
display(df.describe())

### Save outputs ###
q = 'CExam_MOCA_400'

df['set'] = q
df.to_csv(('../models/outputs/' + q + '.csv'), index=False)

Unnamed: 0,Sensitivity0,Specificity0,PPV0,NPV0,Accuracy0,Sensitivity1,Specificity1,PPV1,NPV1,Accuracy1,...,Specificity2,PPV2,NPV2,Accuracy2,Sensitivity,Specificity,PPV,NPV,F1_Score,Accuracy
count,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,...,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0
mean,0.772654,0.65311,0.63897,0.784683,0.705969,0.53568,0.7375,0.672135,0.613871,0.63659,...,0.956137,0.381325,0.96426,0.925281,0.65311,0.772654,0.784683,0.63897,0.706324,0.705969
std,0.038929,0.035158,0.02304,0.027619,0.022167,0.037004,0.038637,0.03408,0.021525,0.024911,...,0.01362,0.107583,0.00645,0.016072,0.035158,0.038929,0.027619,0.02304,0.022284,0.022167
min,0.669231,0.567073,0.586826,0.713333,0.656463,0.44898,0.619048,0.573643,0.552326,0.561224,...,0.916968,0.130435,0.948339,0.884354,0.567073,0.669231,0.713333,0.586826,0.656021,0.656463
25%,0.746154,0.621951,0.620425,0.764502,0.690476,0.510204,0.707483,0.650019,0.601086,0.622449,...,0.945848,0.329545,0.960289,0.918367,0.621951,0.746154,0.764502,0.620425,0.690079,0.690476
50%,0.776923,0.658537,0.638426,0.787402,0.705782,0.534014,0.741497,0.67619,0.615619,0.639456,...,0.960289,0.380952,0.963899,0.928571,0.658537,0.776923,0.787402,0.638426,0.705533,0.705782
75%,0.8,0.682927,0.656667,0.804433,0.721939,0.564626,0.768707,0.696429,0.627329,0.653061,...,0.967509,0.457576,0.97037,0.936224,0.682927,0.8,0.804433,0.656667,0.72255,0.721939
max,0.853846,0.737805,0.705479,0.844444,0.761905,0.659864,0.823129,0.738739,0.6875,0.704082,...,0.981949,0.642857,0.981203,0.955782,0.737805,0.853846,0.844444,0.705479,0.762701,0.761905


# ADAS - detect CDR

In [9]:
features = ['ADAS_Q1', 'ADAS_Q2', 'ADAS_Q3',
       'ADAS_Q4', 'ADAS_Q5', 'ADAS_Q6', 'ADAS_Q7', 'ADAS_Q8', 'ADAS_Q9',
       'ADAS_Q10', 'ADAS_Q11', 'ADAS_Q12', 'ADAS_Q13']

X = data[features]
y = data['CDGLOBAL']

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV

X, X_test, y, y_test = train_test_split(X, y, train_size=0.25, random_state=33433, stratify=y)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.25, random_state=33433, stratify=y)    
rf = RandomForestClassifier(n_estimators=100, max_features=.5, random_state=0)

# Search parameters
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
max_features = ['sqrt', 'log2', .3, .5, .7, .9]
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]

search_params = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
rf_search_rand = RandomizedSearchCV(rf, search_params, n_iter=400,cv=3,verbose=2,random_state=33433, n_jobs=-1)
rf_search_rand.fit(X_train, y_train)
rf_search_rand.best_params_

In [None]:
search_params = {'n_estimators': [100, 200, 300],
               'max_features': [.2, 'sqrt', 'log2'],
               'max_depth': [90, 100, 110],
               'min_samples_split': [8, 9, 10],
               'min_samples_leaf': [3, 4, 5],
               'bootstrap': [False, True]}

rf_search_grid = GridSearchCV(rf, search_params, 
                          cv = 3, n_jobs = -1, verbose = 2)
rf_search_grid.fit(X_train, y_train)
rf_search_grid.best_params_

In [10]:
best = RandomForestClassifier(n_estimators=100, max_features='sqrt', max_depth = 90, min_samples_split = 8, min_samples_leaf = 5, bootstrap=True, random_state=33433)
df = splitrepeat_cv(X,y,best,splits=list(int(x)*42+42 for x in range(20)),repeats=list(int(x)*42+42 for x in range(20)), num_classes=3, imbalanced='over', avg_strategy='weighted', initial_split_seed=33433, initial_split_ratio=.25)
display(df.describe())

### Save outputs ###
q = 'CExam_ADAS_400'

df['set'] = q
df.to_csv(('../models/outputs/' + q + '.csv'), index=False)

Unnamed: 0,Sensitivity0,Specificity0,PPV0,NPV0,Accuracy0,Sensitivity1,Specificity1,PPV1,NPV1,Accuracy1,...,Specificity2,PPV2,NPV2,Accuracy2,Sensitivity,Specificity,PPV,NPV,F1_Score,Accuracy
count,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,...,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0
mean,0.753423,0.746311,0.702928,0.793176,0.749456,0.658946,0.739932,0.717678,0.685208,0.699439,...,0.969206,0.561989,0.977557,0.949983,0.746311,0.753423,0.793176,0.702928,0.749927,0.749456
std,0.039638,0.036717,0.030801,0.026264,0.024219,0.040729,0.035908,0.029497,0.026843,0.025192,...,0.008542,0.087145,0.007151,0.011371,0.036717,0.039638,0.026264,0.030801,0.02408,0.024219
min,0.661538,0.664634,0.634483,0.731707,0.690476,0.571429,0.653061,0.646617,0.621118,0.632653,...,0.945848,0.357143,0.957143,0.92517,0.664634,0.661538,0.731707,0.634483,0.691159,0.690476
25%,0.723077,0.719512,0.679487,0.772294,0.738095,0.62585,0.707483,0.694444,0.666667,0.683673,...,0.963899,0.5,0.974335,0.942177,0.719512,0.723077,0.772294,0.679487,0.737422,0.738095
50%,0.753846,0.75,0.708333,0.795251,0.755102,0.666667,0.741497,0.718634,0.686667,0.70068,...,0.971119,0.5625,0.978022,0.94898,0.75,0.753846,0.795251,0.708333,0.754397,0.755102
75%,0.792308,0.77439,0.722324,0.815366,0.768707,0.687075,0.768707,0.740233,0.703345,0.717687,...,0.974729,0.625,0.981685,0.955782,0.77439,0.792308,0.815366,0.722324,0.768951,0.768707
max,0.830769,0.853659,0.794872,0.840278,0.795918,0.789116,0.823129,0.783333,0.772059,0.761905,...,0.98556,0.777778,0.992701,0.97619,0.853659,0.830769,0.840278,0.794872,0.794856,0.795918


# EverydayCognition - detect CDR

In [11]:
features = ['e_memory_pt', 'e_lang_pt', 'e_visspat_pt', 'e_plan_pt', 'e_organ_pt', 
            'e_divatt_pt','e_memory_cg', 'e_lang_cg', 
           'e_visspat_cg','e_plan_cg', 'e_organ_cg', 'e_divatt_cg']

X = data[features]
y = data['CDGLOBAL']

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV

X, X_test, y, y_test = train_test_split(X, y, train_size=0.25, random_state=33433, stratify=y)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.25, random_state=33433, stratify=y)    
rf = RandomForestClassifier(n_estimators=100, max_features=.5, random_state=0)

# Search parameters
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
max_features = ['sqrt', 'log2', .3, .5, .7, .9]
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]

search_params = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
rf_search_rand = RandomizedSearchCV(rf, search_params, n_iter=400,cv=3,verbose=2,random_state=33433, n_jobs=-1)
rf_search_rand.fit(X_train, y_train)
rf_search_rand.best_params_

In [None]:
search_params = {'n_estimators': [1800, 1900, 2000],
               'max_features': [.2, .3, .4],
               'max_depth': [80, 90, 100],
               'min_samples_split': [1, 2, 3],
               'min_samples_leaf': [3, 4, 5],
               'bootstrap': [False, True]}

rf_search_grid = GridSearchCV(rf, search_params, 
                          cv = 3, n_jobs = -1, verbose = 2)
rf_search_grid.fit(X_train, y_train)
rf_search_grid.best_params_

In [12]:
best = RandomForestClassifier(n_estimators=2000, max_features=.2, max_depth = 80, min_samples_split = 4, min_samples_leaf = 2, bootstrap=True, random_state=33433)
df = splitrepeat_cv(X,y,best,splits=list(int(x)*42+42 for x in range(20)),repeats=list(int(x)*42+42 for x in range(20)), imbalanced='over', num_classes=3, avg_strategy='weighted', initial_split_seed=33433, initial_split_ratio=.25)
display(df.describe())

### Save outputs ###
q = 'CExam_ECog_400'

df['set'] = q
df.to_csv(('../models/outputs/' + q + '.csv'), index=False)

Unnamed: 0,Sensitivity0,Specificity0,PPV0,NPV0,Accuracy0,Sensitivity1,Specificity1,PPV1,NPV1,Accuracy1,...,Specificity2,PPV2,NPV2,Accuracy2,Sensitivity,Specificity,PPV,NPV,F1_Score,Accuracy
count,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,...,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0
mean,0.815904,0.794802,0.7598,0.845588,0.804133,0.710629,0.763146,0.750579,0.725779,0.736888,...,0.965903,0.368093,0.959056,0.928946,0.794802,0.815904,0.845588,0.7598,0.804553,0.804133
std,0.035798,0.02826,0.024888,0.025006,0.020491,0.037916,0.032308,0.026645,0.026213,0.023539,...,0.010842,0.095633,0.006814,0.009538,0.02826,0.035798,0.025006,0.024888,0.020467,0.020491
min,0.738462,0.737805,0.710345,0.803468,0.765306,0.62585,0.707483,0.702128,0.664634,0.683673,...,0.935018,0.2,0.947183,0.908163,0.737805,0.738462,0.803468,0.710345,0.766083,0.765306
25%,0.792308,0.778963,0.741007,0.825296,0.789116,0.680272,0.734694,0.728477,0.704111,0.72449,...,0.960289,0.307692,0.953737,0.92517,0.778963,0.792308,0.825296,0.741007,0.789116,0.789116
50%,0.807692,0.792683,0.761905,0.833333,0.79932,0.721088,0.761905,0.751773,0.727273,0.738095,...,0.963899,0.352941,0.960289,0.928571,0.792683,0.807692,0.833333,0.761905,0.799397,0.79932
75%,0.846154,0.818598,0.777178,0.871083,0.819728,0.741497,0.795918,0.775194,0.745098,0.751701,...,0.974729,0.4,0.96419,0.931973,0.818598,0.846154,0.871083,0.777178,0.8203,0.819728
max,0.869231,0.847561,0.80292,0.888889,0.846939,0.77551,0.823129,0.791667,0.78,0.785714,...,0.98556,0.666667,0.971326,0.955782,0.847561,0.869231,0.888889,0.80292,0.84739,0.846939


# MMSE - detect CDR

In [13]:
features = ['MMDATE', 'MMYEAR', 'MMMONTH',
       'MMDAY', 'MMSEASON', 'MMHOSPIT', 'MMFLOOR', 'MMCITY', 'MMAREA',
       'MMSTATE', 'MMBALL', 'MMFLAG', 'MMTREE', 'MMBALLDL', 'MMFLAGDL',
       'MMTREEDL', 'MMWATCH', 'MMPENCIL', 'MMREPEAT', 'MMHAND', 'MMFOLD',
       'MMONFLR', 'MMREAD', 'MMWRITE', 'MMDRAW','MMSPELL_early', 'MMSPELL_late']

X = data[features]
y = data['CDGLOBAL']
y.replace({2:1},inplace=True)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV

X, X_test, y, y_test = train_test_split(X, y, train_size=0.25, random_state=33433, stratify=y)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.25, random_state=33433, stratify=y)    
rf = RandomForestClassifier(n_estimators=100, max_features=.5, random_state=0)

# Search parameters
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
max_features = ['sqrt', 'log2', .3, .5, .7, .9]
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]

search_params = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
rf_search_rand = RandomizedSearchCV(rf, search_params, n_iter=400,cv=3,verbose=2,random_state=33433, n_jobs=-1)
rf_search_rand.fit(X_train, y_train)
rf_search_rand.best_params_

In [None]:
search_params = {'n_estimators': [300, 400, 500],
               'max_features': [.2, 'sqrt', 'log2'],
               'max_depth': [30, 40, 50],
               'min_samples_split': [8, 9, 10],
               'min_samples_leaf': [3, 4, 5],
               'bootstrap': [False, True]}

rf_search_grid = GridSearchCV(rf, search_params, 
                          cv = 3, n_jobs = -1, verbose = 2)
rf_search_grid.fit(X_train, y_train)
rf_search_grid.best_params_

In [14]:
best = RandomForestClassifier(n_estimators=300, max_features=.2, max_depth = 30, min_samples_split = 9, min_samples_leaf = 4, bootstrap=False, random_state=33433)
df = splitrepeat_cv(X,y,best,splits=list(int(x)*42+42 for x in range(20)),repeats=list(int(x)*42+42 for x in range(20)), avg_strategy='weighted', initial_split_seed=33433, initial_split_ratio=.25)
display(df.describe())

### Save outputs ###
q = 'CExam_2class_MMSE_400'

df['set'] = q
df.to_csv(('../models/outputs/' + q + '.csv'), index=False)

Unnamed: 0,Sensitivity0,Specificity0,PPV0,NPV0,Accuracy0,Sensitivity1,Specificity1,PPV1,NPV1,Accuracy1,Sensitivity,Specificity,PPV,NPV,F1_Score,Accuracy
count,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0
mean,0.724845,0.690515,0.647479,0.763347,0.705578,0.690515,0.724845,0.763347,0.647479,0.705578,0.690515,0.724845,0.763347,0.647479,0.706376,0.705578
std,0.042363,0.035503,0.025174,0.02647,0.022508,0.035503,0.042363,0.02647,0.025174,0.022508,0.035503,0.042363,0.02647,0.025174,0.022457,0.022508
min,0.635659,0.624242,0.597222,0.710526,0.656463,0.624242,0.635659,0.710526,0.597222,0.656463,0.624242,0.635659,0.710526,0.597222,0.657658,0.656463
25%,0.682171,0.654545,0.627586,0.740415,0.687075,0.654545,0.682171,0.740415,0.627586,0.687075,0.654545,0.682171,0.740415,0.627586,0.688192,0.687075
50%,0.732558,0.690909,0.652288,0.76087,0.705782,0.690909,0.732558,0.76087,0.652288,0.705782,0.690909,0.732558,0.76087,0.652288,0.70613,0.705782
75%,0.751938,0.727273,0.664474,0.785185,0.72449,0.727273,0.751938,0.785185,0.664474,0.72449,0.727273,0.751938,0.785185,0.664474,0.725499,0.72449
max,0.79845,0.745455,0.695652,0.808219,0.744898,0.745455,0.79845,0.808219,0.695652,0.744898,0.745455,0.79845,0.808219,0.695652,0.745854,0.744898


# MOCA - detect CDR

In [15]:
features = ['moca_visuo_exec', 'moca_clock',
       'moca_naming', 'moca_digits', 'moca_letters', 'moca_serial7',
       'moca_repeat', 'moca_fluency', 'moca_similarities', 'moca_recall',
       'moca_orient']

X = data[features]
y = data['CDGLOBAL']
y.replace({2:1},inplace=True)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV

X, X_test, y, y_test = train_test_split(X, y, train_size=0.25, random_state=33433, stratify=y)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.25, random_state=33433, stratify=y)    
rf = RandomForestClassifier(n_estimators=100, max_features=.5, random_state=0)

# Search parameters
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
max_features = ['sqrt', 'log2', .3, .5, .7, .9]
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]

search_params = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
rf_search_rand = RandomizedSearchCV(rf, search_params, n_iter=400,cv=3,verbose=2,random_state=33433, n_jobs=-1)
rf_search_rand.fit(X_train, y_train)
rf_search_rand.best_params_

In [None]:
search_params = {'n_estimators': [700, 800, 900],
               'max_features': [.2, 'sqrt', 'log2'],
               'max_depth': [40, 50, 60],
               'min_samples_split': [8, 9, 10],
               'min_samples_leaf': [3, 4, 5],
               'bootstrap': [False, True]}

rf_search_grid = GridSearchCV(rf, search_params, 
                          cv = 3, n_jobs = -1, verbose = 2)
rf_search_grid.fit(X_train, y_train)
rf_search_grid.best_params_

In [16]:
best = RandomForestClassifier(n_estimators=800, max_features='sqrt', max_depth = 40, min_samples_split = 10, min_samples_leaf = 4, bootstrap=True, random_state=33433)
df = splitrepeat_cv(X,y,best,splits=list(int(x)*42+42 for x in range(20)),repeats=list(int(x)*42+42 for x in range(20)), avg_strategy='weighted', initial_split_seed=33433, initial_split_ratio=.25)
display(df.describe())

### Save outputs ###
q = 'CExam_2class_MOCA_400'

df['set'] = q
df.to_csv(('../models/outputs/' + q + '.csv'), index=False)

Unnamed: 0,Sensitivity0,Specificity0,PPV0,NPV0,Accuracy0,Sensitivity1,Specificity1,PPV1,NPV1,Accuracy1,Sensitivity,Specificity,PPV,NPV,F1_Score,Accuracy
count,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0
mean,0.692229,0.69847,0.642802,0.744447,0.695731,0.69847,0.692229,0.744447,0.642802,0.695731,0.69847,0.692229,0.744447,0.642802,0.696406,0.695731
std,0.042522,0.034459,0.028157,0.027023,0.024949,0.034459,0.042522,0.027023,0.028157,0.024949,0.034459,0.042522,0.027023,0.028157,0.024848,0.024949
min,0.596899,0.618182,0.597222,0.695652,0.653061,0.618182,0.596899,0.695652,0.597222,0.653061,0.618182,0.596899,0.695652,0.597222,0.653581,0.653061
25%,0.666667,0.672727,0.622517,0.722973,0.680272,0.672727,0.666667,0.722973,0.622517,0.680272,0.672727,0.666667,0.722973,0.622517,0.680945,0.680272
50%,0.689922,0.69697,0.635714,0.741497,0.687075,0.69697,0.689922,0.741497,0.635714,0.687075,0.69697,0.689922,0.741497,0.635714,0.688196,0.687075
75%,0.715116,0.727273,0.664474,0.755245,0.714286,0.727273,0.715116,0.755245,0.664474,0.714286,0.727273,0.715116,0.755245,0.664474,0.714887,0.714286
max,0.782946,0.793939,0.703704,0.805556,0.748299,0.793939,0.782946,0.805556,0.703704,0.748299,0.793939,0.782946,0.805556,0.703704,0.748829,0.748299


# ADAS - detect CDR

In [17]:
features = ['ADAS_Q1', 'ADAS_Q2', 'ADAS_Q3',
       'ADAS_Q4', 'ADAS_Q5', 'ADAS_Q6', 'ADAS_Q7', 'ADAS_Q8', 'ADAS_Q9',
       'ADAS_Q10', 'ADAS_Q11', 'ADAS_Q12', 'ADAS_Q13']

X = data[features]
y = data['CDGLOBAL']
y.replace({2:1},inplace=True)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV

X, X_test, y, y_test = train_test_split(X, y, train_size=0.25, random_state=33433, stratify=y)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.25, random_state=33433, stratify=y)    
rf = RandomForestClassifier(n_estimators=100, max_features=.5, random_state=0)

# Search parameters
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
max_features = ['sqrt', 'log2', .3, .5, .7, .9]
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]

search_params = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
rf_search_rand = RandomizedSearchCV(rf, search_params, n_iter=400,cv=3,verbose=2,random_state=33433, n_jobs=-1)
rf_search_rand.fit(X_train, y_train)
rf_search_rand.best_params_

In [None]:
search_params = {'n_estimators': [700, 800, 900],
               'max_features': [.2, 'sqrt', 'log2'],
               'max_depth': [70, 80, 90],
               'min_samples_split': [4, 5, 6],
               'min_samples_leaf': [3, 4, 5],
               'bootstrap': [False, True]}

rf_search_grid = GridSearchCV(rf, search_params, 
                          cv = 3, n_jobs = -1, verbose = 2)
rf_search_grid.fit(X_train, y_train)
rf_search_grid.best_params_

In [18]:
best = RandomForestClassifier(n_estimators=800, max_features='sqrt', max_depth = 70, min_samples_split = 4, min_samples_leaf = 4, bootstrap=True, random_state=33433)
df = splitrepeat_cv(X,y,best,splits=list(int(x)*42+42 for x in range(20)),repeats=list(int(x)*42+42 for x in range(20)), avg_strategy='weighted', initial_split_seed=33433, initial_split_ratio=.25)
display(df.describe())

### Save outputs ###
q = 'CExam_2class_ADAS_400'

df['set'] = q
df.to_csv(('../models/outputs/' + q + '.csv'), index=False)

Unnamed: 0,Sensitivity0,Specificity0,PPV0,NPV0,Accuracy0,Sensitivity1,Specificity1,PPV1,NPV1,Accuracy1,Sensitivity,Specificity,PPV,NPV,F1_Score,Accuracy
count,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0
mean,0.728857,0.756,0.700897,0.781925,0.74409,0.756,0.728857,0.781925,0.700897,0.74409,0.756,0.728857,0.781925,0.700897,0.744358,0.74409
std,0.044487,0.032028,0.028344,0.028534,0.023851,0.032028,0.044487,0.028534,0.028344,0.023851,0.032028,0.044487,0.028534,0.028344,0.023791,0.023851
min,0.643411,0.690909,0.643357,0.727811,0.70068,0.690909,0.643411,0.727811,0.643357,0.70068,0.690909,0.643411,0.727811,0.643357,0.700116,0.70068
25%,0.697674,0.733333,0.679318,0.75974,0.727891,0.733333,0.697674,0.75974,0.679318,0.727891,0.733333,0.697674,0.75974,0.679318,0.727378,0.727891
50%,0.72093,0.745455,0.69863,0.781065,0.748299,0.745455,0.72093,0.781065,0.69863,0.748299,0.745455,0.72093,0.781065,0.69863,0.748074,0.748299
75%,0.75969,0.781818,0.728842,0.797546,0.761905,0.781818,0.75969,0.797546,0.728842,0.761905,0.781818,0.75969,0.797546,0.728842,0.760275,0.761905
max,0.852713,0.836364,0.765217,0.868966,0.806122,0.836364,0.852713,0.868966,0.765217,0.806122,0.836364,0.852713,0.868966,0.765217,0.80685,0.806122


# EverydayCognition - detect CDR

In [19]:
features = ['e_memory_pt', 'e_lang_pt', 'e_visspat_pt', 'e_plan_pt', 'e_organ_pt', 
            'e_divatt_pt','e_memory_cg', 'e_lang_cg', 
           'e_visspat_cg','e_plan_cg', 'e_organ_cg', 'e_divatt_cg']


X = data[features]
y = data['CDGLOBAL']
y.replace({2:1},inplace=True)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV

X, X_test, y, y_test = train_test_split(X, y, train_size=0.25, random_state=33433, stratify=y)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.25, random_state=33433, stratify=y)    
rf = RandomForestClassifier(n_estimators=100, max_features=.5, random_state=0)

# Search parameters
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
max_features = ['sqrt', 'log2', .3, .5, .7, .9]
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]

search_params = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
rf_search_rand = RandomizedSearchCV(rf, search_params, n_iter=400,cv=3,verbose=2,random_state=33433, n_jobs=-1)
rf_search_rand.fit(X_train, y_train)
rf_search_rand.best_params_

In [None]:
search_params = {'n_estimators': [700, 800, 900],
               'max_features': [.2, 'sqrt', 'log2'],
               'max_depth': [70, 80, 90],
               'min_samples_split': [4, 5, 6],
               'min_samples_leaf': [3, 4, 5],
               'bootstrap': [False, True]}

rf_search_grid = GridSearchCV(rf, search_params, 
                          cv = 3, n_jobs = -1, verbose = 2)
rf_search_grid.fit(X_train, y_train)
rf_search_grid.best_params_

In [20]:
best = RandomForestClassifier(n_estimators=800, max_features='sqrt', max_depth = 70, min_samples_split = 4, min_samples_leaf = 4, bootstrap=True, random_state=33433)
df = splitrepeat_cv(X,y,best,splits=list(int(x)*42+42 for x in range(20)),repeats=list(int(x)*42+42 for x in range(20)), avg_strategy='weighted', initial_split_seed=33433, initial_split_ratio=.25)
display(df.describe())

### Save outputs ###
q = 'CExam_2class_ECog_400'

df['set'] = q
df.to_csv(('../models/outputs/' + q + '.csv'), index=False)

Unnamed: 0,Sensitivity0,Specificity0,PPV0,NPV0,Accuracy0,Sensitivity1,Specificity1,PPV1,NPV1,Accuracy1,Sensitivity,Specificity,PPV,NPV,F1_Score,Accuracy
count,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0
mean,0.758101,0.84153,0.789408,0.817216,0.804923,0.84153,0.758101,0.817216,0.789408,0.804923,0.84153,0.758101,0.817216,0.789408,0.80431,0.804923
std,0.038565,0.019471,0.02016,0.022869,0.01742,0.019471,0.038565,0.022869,0.02016,0.01742,0.019471,0.038565,0.022869,0.02016,0.017828,0.01742
min,0.674419,0.793939,0.740458,0.767956,0.765306,0.793939,0.674419,0.767956,0.740458,0.765306,0.793939,0.674419,0.767956,0.740458,0.76418,0.765306
25%,0.736434,0.830303,0.779528,0.805556,0.792517,0.830303,0.736434,0.805556,0.779528,0.792517,0.830303,0.736434,0.805556,0.779528,0.792427,0.792517
50%,0.75969,0.842424,0.787879,0.816568,0.809524,0.842424,0.75969,0.816568,0.787879,0.809524,0.842424,0.75969,0.816568,0.787879,0.808947,0.809524
75%,0.790698,0.854545,0.803279,0.835366,0.816327,0.854545,0.790698,0.835366,0.803279,0.816327,0.854545,0.790698,0.835366,0.803279,0.816327,0.816327
max,0.829457,0.890909,0.839286,0.864198,0.843537,0.890909,0.829457,0.864198,0.839286,0.843537,0.890909,0.829457,0.864198,0.839286,0.843537,0.843537
