In [20]:
#Importing all packages needed 
#1) Fundatmental 
import pandas as pd
import numpy as np

#2) Preprocessing 
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import KNNImputer
from sklearn.impute import SimpleImputer
import imblearn

#3) Model 
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.multiclass import OneVsOneClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
import xgboost as xgb
from mlxtend.classifier import StackingClassifier
from catboost import CatBoostClassifier

#4) Validation  
from sklearn.model_selection import LeaveOneOut

#5) Performance metrics  
from sklearn.metrics import roc_auc_score, confusion_matrix,accuracy_score

#6) Custom Functions
#!pip install ipynb
from ipynb.fs.full.Functions import *

import warnings
warnings.filterwarnings("ignore")

In [21]:
import warnings
warnings.filterwarnings("ignore")

# Model Selection 
In this notebook we will discover the best model at predicting our data by testing a range of classification model and validating them using the metric area under the curve (AUC) through leave one out cross validation. 

We tested the below models:
1) Random Forest <br>
2) Logistic Regression<br> 
3) SVM (Linear Kernel)<br>
4) SVM (Polynomial Kernel)<br>
5) SVM (Radial Basis Function Kernel)<br>
6) Naive Bayes<br>
7) XGBoost <br>
8) Adaboost<br>
9) Hard Voting<br>
10) Stacking<br>

In [22]:
#Reading in the dataset
data=pd.read_excel(open("biomarkers.xlsx",'rb'))
#Removing the groups that are not needed 
data=data[data['biodiag'] < 5]

#Taking the columns that are needed 
data_class=data[["edad",
"años_escol",
"Sexo",
"biodiag",
"APOE",
"t_tau",
"p_tau",
"AB_42_TUB_B",                
"buschke_AL",
"buschke_AT",
"buschke_RDL",
"buschke_RDT",
"tam",
"vis_cerad",
"paisajes_tot",
"bnt",
"flu_anim",
"compren",
"ideom",
"prax_cerad",
"tdp",
"VOSP_num",
"VOSP_letras",
"tmtA",
"tmtB",
"fas_total",
"Stroop_lect",
"Stroop_color",
"Stroop_I",
"Clave_num"]]

feat_select=data_class.drop(["APOE","t_tau","p_tau","AB_42_TUB_B"],axis=1)

# 1) Random Forest


In [23]:
#1)Preclinical vs Control 
#Keeping only the subject in the control(1) and preclinical groups(2)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,2])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,RandomForestClassifier(random_state=1),MinMaxScaler,KNNImputer)
print("For control vs AD preclinical the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD preclinical the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

print("-------------------------------------------------------------------------------------------")

#2)Prodromal vs Control 
#Keeping only the subject in the control(1) and prodromal groups(3)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,3])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,RandomForestClassifier(random_state=1),MinMaxScaler,KNNImputer)
print("For control vs AD prodromal the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD prodromal the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

For control vs AD preclinical the accuracy is 0.762 and the auc is 0.555
For control vs AD preclinical the sensitivity is 0.160 and the specificity is 0.950
-------------------------------------------------------------------------------------------
For control vs AD prodromal the accuracy is 0.919 and the auc is 0.894
For control vs AD prodromal the sensitivity is 0.975 and the specificity is 0.814


# 2) Logistic Regression


In [24]:
#1)Preclinical vs Control 
#Keeping only the subject in the control(1) and preclinical groups(2)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,2])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,LogisticRegression(random_state=1,class_weight='balanced'),MinMaxScaler,KNNImputer)
print("For control vs AD preclinical the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD preclinical the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

print("-------------------------------------------------------------------------------------------")

#2)Prodromal vs Control 
#Keeping only the subject in the control(1) and prodromal groups(3)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,3])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,LogisticRegression(random_state=1),MinMaxScaler,KNNImputer)
print("For control vs AD prodromal the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD prodromal the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

For control vs AD preclinical the accuracy is 0.629 and the auc is 0.564
For control vs AD preclinical the sensitivity is 0.440 and the specificity is 0.688
-------------------------------------------------------------------------------------------
For control vs AD prodromal the accuracy is 0.919 and the auc is 0.894
For control vs AD prodromal the sensitivity is 0.975 and the specificity is 0.814


# 3) Support Vector Machines (Linear)

In [25]:
#1)Preclinical vs Control 
#Keeping only the subject in the control(1) and preclinical groups(2)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,2])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,SVC(kernel='linear',random_state=1),MinMaxScaler,KNNImputer)
print("For control vs AD preclinical the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD preclinical the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

print("-------------------------------------------------------------------------------------------")

#2)Prodromal vs Control 
#Keeping only the subject in the control(1) and prodromal groups(3)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,3])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,SVC(kernel='linear',random_state=1),MinMaxScaler,KNNImputer)
print("For control vs AD prodromal the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD prodromal the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

For control vs AD preclinical the accuracy is 0.771 and the auc is 0.520
For control vs AD preclinical the sensitivity is 0.040 and the specificity is 1.000
-------------------------------------------------------------------------------------------
For control vs AD prodromal the accuracy is 0.935 and the auc is 0.918
For control vs AD prodromal the sensitivity is 0.975 and the specificity is 0.860


# 4) Support Vector Machines (Polynomial)

In [26]:
#1)Preclinical vs Control 
#Keeping only the subject in the control(1) and preclinical groups(2)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,2])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,SVC(kernel='poly', degree=2,random_state=1),MinMaxScaler,KNNImputer)
print("For control vs AD preclinical the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD preclinical the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

print("-------------------------------------------------------------------------------------------")

#2)Prodromal vs Control 
#Keeping only the subject in the control(1) and prodromal groups(3)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,3])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,SVC(kernel='poly',degree=2,random_state=1),MinMaxScaler,KNNImputer)
print("For control vs AD prodromal the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD prodromal the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

For control vs AD preclinical the accuracy is 0.752 and the auc is 0.535
For control vs AD preclinical the sensitivity is 0.120 and the specificity is 0.950
-------------------------------------------------------------------------------------------
For control vs AD prodromal the accuracy is 0.911 and the auc is 0.894
For control vs AD prodromal the sensitivity is 0.950 and the specificity is 0.837


# 5) Support Vector Machines (RBF)

In [27]:
#1)Preclinical vs Control 
#Keeping only the subject in the control(1) and preclinical groups(2)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,2])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,SVC(kernel='rbf',random_state=1),MinMaxScaler,KNNImputer)
print("For control vs AD preclinical the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD preclinical the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

print("-------------------------------------------------------------------------------------------")

#2)Prodromal vs Control 
#Keeping only the subject in the control(1) and prodromal groups(3)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,3])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,SVC(kernel='rbf',random_state=1),MinMaxScaler,KNNImputer)
print("For control vs AD prodromal the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD prodromal the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

For control vs AD preclinical the accuracy is 0.762 and the auc is 0.500
For control vs AD preclinical the sensitivity is 0.000 and the specificity is 1.000
-------------------------------------------------------------------------------------------
For control vs AD prodromal the accuracy is 0.911 and the auc is 0.888
For control vs AD prodromal the sensitivity is 0.963 and the specificity is 0.814


# 6) Guassian Naive Bayes

In [28]:
#1)Preclinical vs Control 
#Keeping only the subject in the control(1) and preclinical groups(2)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,2])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,GaussianNB(),MinMaxScaler,KNNImputer)
print("For control vs AD preclinical the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD preclinical the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

print("-------------------------------------------------------------------------------------------")

#2)Prodromal vs Control 
#Keeping only the subject in the control(1) and prodromal groups(3)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,3])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,GaussianNB(),MinMaxScaler,KNNImputer)
print("For control vs AD prodromal the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD prodromal the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

For control vs AD preclinical the accuracy is 0.381 and the auc is 0.525
For control vs AD preclinical the sensitivity is 0.800 and the specificity is 0.250
-------------------------------------------------------------------------------------------
For control vs AD prodromal the accuracy is 0.902 and the auc is 0.893
For control vs AD prodromal the sensitivity is 0.925 and the specificity is 0.860


# 7) AdaBoostClassifier

In [29]:
#1)Preclinical vs Control 
#Keeping only the subject in the control(1) and preclinical groups(2)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,2])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,AdaBoostClassifier(),MinMaxScaler,KNNImputer)
print("For control vs AD preclinical the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD preclinical the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

print("-------------------------------------------------------------------------------------------")

#2)Prodromal vs Control 
#Keeping only the subject in the control(1) and prodromal groups(3)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,3])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,AdaBoostClassifier(),MinMaxScaler,KNNImputer)
print("For control vs AD prodromal the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD prodromal the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

For control vs AD preclinical the accuracy is 0.752 and the auc is 0.604
For control vs AD preclinical the sensitivity is 0.320 and the specificity is 0.887
-------------------------------------------------------------------------------------------
For control vs AD prodromal the accuracy is 0.902 and the auc is 0.887
For control vs AD prodromal the sensitivity is 0.938 and the specificity is 0.837


# 8) XGBoost

In [30]:
#1)Preclinical vs Control 
#Keeping only the subject in the control(1) and preclinical groups(2)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,2])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,xgb.XGBClassifier(eval_metric="logloss"),MinMaxScaler,KNNImputer)
print("For control vs AD preclinical the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD preclinical the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

print("-------------------------------------------------------------------------------------------")

#2)Prodromal vs Control 
#Keeping only the subject in the control(1) and prodromal groups(3)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,3])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,xgb.XGBClassifier(eval_metric="logloss"),MinMaxScaler,KNNImputer)
print("For control vs AD prodromal the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD prodromal the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

For control vs AD preclinical the accuracy is 0.743 and the auc is 0.570
For control vs AD preclinical the sensitivity is 0.240 and the specificity is 0.900
-------------------------------------------------------------------------------------------
For control vs AD prodromal the accuracy is 0.919 and the auc is 0.900
For control vs AD prodromal the sensitivity is 0.963 and the specificity is 0.837


# 9) Hard Voting

In [31]:
models = list()
models.append(('rf', RandomForestClassifier(random_state=1)))
#models.append(('svmlinear', SVC(kernel='linear')))
#models.append(('svmpoly', SVC(kernel='poly', degree=2,random_state=1)))
#models.append(('svmrbfr', SVC(kernel='rbf',random_state=1)))
models.append(('logistic', LogisticRegression(random_state=1)))
models.append(('adaboost', AdaBoostClassifier()))
    

#1)Preclinical vs Control 
#Keeping only the subject in the control(1) and preclinical groups(2)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,2])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,VotingClassifier(estimators=models, voting='hard'),MinMaxScaler,KNNImputer)
print("For control vs AD preclinical the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD preclinical the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

print("-------------------------------------------------------------------------------------------")

#2)Prodromal vs Control 
#Keeping only the subject in the control(1) and prodromal groups(3)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,3])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,VotingClassifier(estimators=models, voting='hard'),MinMaxScaler,KNNImputer)
print("For control vs AD prodromal the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD prodromal the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

For control vs AD preclinical the accuracy is 0.771 and the auc is 0.547
For control vs AD preclinical the sensitivity is 0.120 and the specificity is 0.975
-------------------------------------------------------------------------------------------
For control vs AD prodromal the accuracy is 0.919 and the auc is 0.894
For control vs AD prodromal the sensitivity is 0.975 and the specificity is 0.814


# 10) Stacking

In [32]:
# Stacking
stacking = StackingClassifier(classifiers=[
        AdaBoostClassifier(),
        LogisticRegression(),
        RandomForestClassifier(),
        xgb.XGBClassifier(eval_metric="logloss")],
        use_probas=True,
        meta_classifier=RandomForestClassifier())



#1)Preclinical vs Control 
#Keeping only the subject in the control(1) and preclinical groups(2)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,2])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,stacking,MinMaxScaler,KNNImputer)
print("For control vs AD preclinical the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD preclinical the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

print("-------------------------------------------------------------------------------------------")

#2)Prodromal vs Control 
#Keeping only the subject in the control(1) and prodromal groups(3)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,3])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,stacking,MinMaxScaler,KNNImputer)
print("For control vs AD prodromal the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD prodromal the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

For control vs AD preclinical the accuracy is 0.752 and the auc is 0.576
For control vs AD preclinical the sensitivity is 0.240 and the specificity is 0.912
-------------------------------------------------------------------------------------------
For control vs AD prodromal the accuracy is 0.902 and the auc is 0.882
For control vs AD prodromal the sensitivity is 0.950 and the specificity is 0.814


In [39]:
 #Reading in the dataset
data=pd.read_spss("FTT.sav")  

#Taking the columns that are needed 
data_class=data[['groups','P_TAU','AB42','FTT_rate','FTT_variability_total_Log10','age', 'gender','education','AL','AT','RD','RDT','BNT','ANIMALES','VOSP_NUM','TMT_A','TMT_B','STROOP_LECT','STROOP_COLOR','STROOP_INT','SDMT',"PL_FTT_time_interval"]]

#Renaming the group name to biodiag to match the previous dataset1 and numbering also in the same way
data_class = data_class.rename(columns={'groups': 'biodiag'})
data_class['biodiag'] = data_class['biodiag'].replace({"grupo control":1, "grupo preclinico":2, "grupo con EA":3})

#Turning gender into binary 
data_class['gender'] = data_class['gender'].replace({"male":1, "female":0})

feat_select=data_class[["biodiag","FTT_variability_total_Log10","PL_FTT_time_interval","FTT_rate"]]

# 1) Random Forest


In [41]:
#1)Preclinical vs Control 
#Keeping only the subject in the control(1) and preclinical groups(2)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,2])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,RandomForestClassifier(random_state=1),MinMaxScaler,None)
print("For control vs AD preclinical the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD preclinical the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

print("-------------------------------------------------------------------------------------------")

#2)Prodromal vs Control 
#Keeping only the subject in the control(1) and prodromal groups(3)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,3])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,RandomForestClassifier(random_state=1),MinMaxScaler,None)
print("For control vs AD prodromal the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD prodromal the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

For control vs AD preclinical the accuracy is 0.772 and the auc is 0.721
For control vs AD preclinical the sensitivity is 0.550 and the specificity is 0.892
-------------------------------------------------------------------------------------------
For control vs AD prodromal the accuracy is 0.827 and the auc is 0.740
For control vs AD prodromal the sensitivity is 0.533 and the specificity is 0.946


# 2) Logistic Regression


In [42]:
#1)Preclinical vs Control 
#Keeping only the subject in the control(1) and preclinical groups(2)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,2])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,LogisticRegression(random_state=1),MinMaxScaler,None)
print("For control vs AD preclinical the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD preclinical the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

print("-------------------------------------------------------------------------------------------")

#2)Prodromal vs Control 
#Keeping only the subject in the control(1) and prodromal groups(3)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,3])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,LogisticRegression(random_state=1),MinMaxScaler,None)
print("For control vs AD prodromal the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD prodromal the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

For control vs AD preclinical the accuracy is 0.684 and the auc is 0.573
For control vs AD preclinical the sensitivity is 0.200 and the specificity is 0.946
-------------------------------------------------------------------------------------------
For control vs AD prodromal the accuracy is 0.769 and the auc is 0.600
For control vs AD prodromal the sensitivity is 0.200 and the specificity is 1.000


# 3) Support Vector Machines (Linear)

In [43]:
#1)Preclinical vs Control 
#Keeping only the subject in the control(1) and preclinical groups(2)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,2])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,SVC(kernel='linear',random_state=1),MinMaxScaler,None)
print("For control vs AD preclinical the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD preclinical the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

print("-------------------------------------------------------------------------------------------")

#2)Prodromal vs Control 
#Keeping only the subject in the control(1) and prodromal groups(3)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,3])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,SVC(kernel='linear',random_state=1),MinMaxScaler,None)
print("For control vs AD prodromal the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD prodromal the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))


For control vs AD preclinical the accuracy is 0.561 and the auc is 0.444
For control vs AD preclinical the sensitivity is 0.050 and the specificity is 0.838
-------------------------------------------------------------------------------------------
For control vs AD prodromal the accuracy is 0.750 and the auc is 0.586
For control vs AD prodromal the sensitivity is 0.200 and the specificity is 0.973


# 4) Support Vector Machines (Polynomial)

In [44]:
#1)Preclinical vs Control 
#Keeping only the subject in the control(1) and preclinical groups(2)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,2])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,SVC(kernel='poly',random_state=1),MinMaxScaler,None)
print("For control vs AD preclinical the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD preclinical the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

print("-------------------------------------------------------------------------------------------")

#2)Prodromal vs Control 
#Keeping only the subject in the control(1) and prodromal groups(3)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,3])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,SVC(kernel='poly',random_state=1),MinMaxScaler,None)
print("For control vs AD prodromal the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD prodromal the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))


For control vs AD preclinical the accuracy is 0.719 and the auc is 0.669
For control vs AD preclinical the sensitivity is 0.500 and the specificity is 0.838
-------------------------------------------------------------------------------------------
For control vs AD prodromal the accuracy is 0.808 and the auc is 0.706
For control vs AD prodromal the sensitivity is 0.467 and the specificity is 0.946


# 5) Support Vector Machines (RBF)

In [45]:
#1)Preclinical vs Control 
#Keeping only the subject in the control(1) and preclinical groups(2)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,2])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,SVC(kernel='rbf',random_state=1),MinMaxScaler,None)
print("For control vs AD preclinical the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD preclinical the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

print("-------------------------------------------------------------------------------------------")

#2)Prodromal vs Control 
#Keeping only the subject in the control(1) and prodromal groups(3)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,3])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,SVC(kernel='rbf',random_state=1),MinMaxScaler,None)
print("For control vs AD prodromal the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD prodromal the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))


For control vs AD preclinical the accuracy is 0.772 and the auc is 0.721
For control vs AD preclinical the sensitivity is 0.550 and the specificity is 0.892
-------------------------------------------------------------------------------------------
For control vs AD prodromal the accuracy is 0.846 and the auc is 0.773
For control vs AD prodromal the sensitivity is 0.600 and the specificity is 0.946


# 6) Guassian Naive Bayes

In [46]:
#1)Preclinical vs Control 
#Keeping only the subject in the control(1) and preclinical groups(2)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,2])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,GaussianNB(),MinMaxScaler,None)
print("For control vs AD preclinical the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD preclinical the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

print("-------------------------------------------------------------------------------------------")

#2)Prodromal vs Control 
#Keeping only the subject in the control(1) and prodromal groups(3)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,3])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,GaussianNB(),MinMaxScaler,None)
print("For control vs AD prodromal the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD prodromal the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))


For control vs AD preclinical the accuracy is 0.667 and the auc is 0.640
For control vs AD preclinical the sensitivity is 0.550 and the specificity is 0.730
-------------------------------------------------------------------------------------------
For control vs AD prodromal the accuracy is 0.808 and the auc is 0.766
For control vs AD prodromal the sensitivity is 0.667 and the specificity is 0.865


# 7) AdaBoostClassifier

In [47]:
#1)Preclinical vs Control 
#Keeping only the subject in the control(1) and preclinical groups(2)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,2])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,AdaBoostClassifier(),MinMaxScaler,None)
print("For control vs AD preclinical the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD preclinical the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

print("-------------------------------------------------------------------------------------------")

#2)Prodromal vs Control 
#Keeping only the subject in the control(1) and prodromal groups(3)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,3])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,AdaBoostClassifier(random_state=0),MinMaxScaler,None)
print("For control vs AD prodromal the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD prodromal the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))


For control vs AD preclinical the accuracy is 0.649 and the auc is 0.603
For control vs AD preclinical the sensitivity is 0.450 and the specificity is 0.757
-------------------------------------------------------------------------------------------
For control vs AD prodromal the accuracy is 0.827 and the auc is 0.779
For control vs AD prodromal the sensitivity is 0.667 and the specificity is 0.892


# 8) XGBoost

In [48]:
#1)Preclinical vs Control 
#Keeping only the subject in the control(1) and preclinical groups(2)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,2])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,xgb.XGBClassifier(eval_metric="logloss",random_state=1),MinMaxScaler,None)
print("For control vs AD preclinical the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD preclinical the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

print("-------------------------------------------------------------------------------------------")

#2)Prodromal vs Control 
#Keeping only the subject in the control(1) and prodromal groups(3)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,3])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,xgb.XGBClassifier(eval_metric="logloss",random_state=1),MinMaxScaler,None)
print("For control vs AD prodromal the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD prodromal the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

For control vs AD preclinical the accuracy is 0.737 and the auc is 0.682
For control vs AD preclinical the sensitivity is 0.500 and the specificity is 0.865
-------------------------------------------------------------------------------------------
For control vs AD prodromal the accuracy is 0.846 and the auc is 0.793
For control vs AD prodromal the sensitivity is 0.667 and the specificity is 0.919


# 9) Hard Voting

In [49]:
models = list()
models.append(('rf', RandomForestClassifier(random_state=1)))
#models.append(('svmlinear', SVC(kernel='linear')))
#models.append(('svmpoly', SVC(kernel='poly', degree=2,random_state=1)))
#models.append(('svmrbfr', SVC(kernel='rbf',random_state=1)))
models.append(('logistic', LogisticRegression(random_state=1)))
models.append(('adaboost', AdaBoostClassifier()))
    

#1)Preclinical vs Control 
#Keeping only the subject in the control(1) and preclinical groups(2)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,2])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,VotingClassifier(estimators=models, voting='hard'),MinMaxScaler,None)
print("For control vs AD preclinical the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD preclinical the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

print("-------------------------------------------------------------------------------------------")

#2)Prodromal vs Control 
#Keeping only the subject in the control(1) and prodromal groups(3)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,3])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,VotingClassifier(estimators=models, voting='hard'),MinMaxScaler,None)
print("For control vs AD prodromal the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD prodromal the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

For control vs AD preclinical the accuracy is 0.719 and the auc is 0.646
For control vs AD preclinical the sensitivity is 0.400 and the specificity is 0.892
-------------------------------------------------------------------------------------------
For control vs AD prodromal the accuracy is 0.827 and the auc is 0.740
For control vs AD prodromal the sensitivity is 0.533 and the specificity is 0.946


# 10) Stacking

In [51]:
# Stacking
stacking = StackingClassifier(classifiers=[
        AdaBoostClassifier(),
        LogisticRegression(),
        RandomForestClassifier(),
        xgb.XGBClassifier(eval_metric="logloss")],
        use_probas=True,
        meta_classifier=RandomForestClassifier())



#1)Preclinical vs Control 
#Keeping only the subject in the control(1) and preclinical groups(2)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,2])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,stacking,MinMaxScaler,KNNImputer)
print("For control vs AD preclinical the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD preclinical the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

print("-------------------------------------------------------------------------------------------")

#2)Prodromal vs Control 
#Keeping only the subject in the control(1) and prodromal groups(3)
data_model=feat_select.loc[feat_select['biodiag'].isin([1,3])]
#Calling the function data_prep to further prepare the dataset:
#One hot encodes the group variable (biodiag)
#Splits the data into X and y, and turns then into the desired format, an array
X,y,coulmnNames=data_prep(data_model)
#Call the function featureimportance to run the model
acc, roc_auc,sensitivity,specificity=LOOCV(X,y,stacking,MinMaxScaler,KNNImputer)
print("For control vs AD prodromal the accuracy is %.3f and the auc is %.3f" % (acc, roc_auc))
print("For control vs AD prodromal the sensitivity is %.3f and the specificity is %.3f" % (sensitivity, specificity))

For control vs AD preclinical the accuracy is 0.737 and the auc is 0.694
For control vs AD preclinical the sensitivity is 0.550 and the specificity is 0.838
-------------------------------------------------------------------------------------------
For control vs AD prodromal the accuracy is 0.846 and the auc is 0.793
For control vs AD prodromal the sensitivity is 0.667 and the specificity is 0.919
