# Alzheimer's Disease and Frontotemporal Dementia Prediction


- Author: Elmo Chavez
- Date: 19-Jul-2023

**Description**

> [...]


## Read the Data


### Import Libraries


In [1]:
import pandas as pd
import numpy as np
import os

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import xgboost as xgb
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

### Reading Datasets


In [2]:
# Filenames
filename_participants = 'participants_selected.csv'
filename_epochs_fp1 = 'epoch_psd_features.csv'
filename_epochs_all = 'epoch_psd_features_all_channels.csv'
filename_epochs_bnds_fp1 = 'epoch_bands_psd_features.csv'
filename_epochs_bdns_all = 'epoch_bands_psd_features_all_channels.csv'

# path
path = os.getcwd()

# Participants info
df_participants = pd.read_csv(path+'/Feature_Extraction/'+filename_participants)

# Epochs based feature extraction
df_epochs_fp1 = pd.read_csv(path+'/Feature_Extraction/'+filename_epochs_fp1)
df_epochs_all = pd.read_csv(path+'/Feature_Extraction/'+filename_epochs_all)

# Epochs and Frequency Bands feature extraction
df_epochs_bands_fp1 = pd.read_csv(path+'/Feature_Extraction/'+filename_epochs_bnds_fp1)
df_epochs_bands_all = pd.read_csv(path+'/Feature_Extraction/'+filename_epochs_bdns_all)

# Shape of the data
print('Participants info:',df_participants.shape)
print('Epochs base Feature Extraction - FP1:',df_epochs_fp1.shape)
print('Epochs base Feature Extraction - All Channels:',df_epochs_all.shape)
print('Epochs base with Freq. Bands Feature Extraction - FP1:',df_epochs_bands_fp1.shape)
print('Epochs base with Freq. Bands Feature Extraction - All Channels:',df_epochs_bands_all.shape)

Participants info: (51, 5)
Epochs base Feature Extraction - FP1: (714, 11)
Epochs base Feature Extraction - All Channels: (714, 11)
Epochs base with Freq. Bands Feature Extraction - FP1: (714, 31)
Epochs base with Freq. Bands Feature Extraction - All Channels: (714, 31)


## Train Test Split


In [3]:
df_participants.head(10)

Unnamed: 0,participant_id,Gender,Age,Group,MMSE
0,sub-002,1,78,0,22
1,sub-004,1,67,0,20
2,sub-005,0,70,0,22
3,sub-006,1,61,0,14
4,sub-007,1,79,0,20
5,sub-008,0,62,0,16
6,sub-009,1,77,0,23
7,sub-010,0,69,0,20
8,sub-011,0,71,0,22
9,sub-012,0,63,0,18


In [4]:
df_participants.groupby('Group')['participant_id'].count()

Group
0    33
1    18
Name: participant_id, dtype: int64

In [5]:
X = df_participants.drop(columns=['Group'], axis=1)
y = df_participants['Group']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

print('Train',len(y_train))
print('Test',len(y_test))

Train 38
Test 13


### Train Test split transfer to Epochs based Dataframes


#### Epoch based FP1


In [6]:
df_epochs_fp1.head()

Unnamed: 0,subject_id,age,gender,epoch_id,total_channels,total_power,avg_power,relative_power,peak_freq,spectral_entropy,group
0,sub-002,78,1,0,19.0,8.138737e-07,3.255495e-08,0.480204,1.953125,1.3e-05,0
1,sub-002,78,1,1,19.0,6.457802e-07,2.583121e-08,0.373726,1.953125,1e-05,0
2,sub-002,78,1,2,19.0,7.862074e-07,3.14483e-08,0.397717,1.953125,1.3e-05,0
3,sub-002,78,1,3,19.0,8.496011e-07,3.398405e-08,0.458611,1.953125,1.3e-05,0
4,sub-002,78,1,4,19.0,7.50318e-07,3.001272e-08,0.402417,1.953125,1.2e-05,0


In [7]:
cols_to_drop = ['subject_id','group']

train_fp1 = df_epochs_fp1[df_epochs_fp1['subject_id'].isin(X_train['participant_id'])]
test_fp1 = df_epochs_fp1[df_epochs_fp1['subject_id'].isin(X_test['participant_id'])]

print(train_fp1.shape)
print(test_fp1.shape)

scaler_1 = StandardScaler()

X_train_fp1 = scaler_1.fit_transform(train_fp1.drop(cols_to_drop, axis=1))
y_train_fp1 = train_fp1['group']

X_test_fp1 = scaler_1.transform(test_fp1.drop(cols_to_drop, axis=1))
y_test_fp1 = test_fp1['group']

(532, 11)
(182, 11)


#### Epochs based All Channels


In [8]:
df_epochs_all.head()

Unnamed: 0,subject_id,age,gender,epoch_id,total_channels,total_power,avg_power,relative_power,peak_freq,spectral_entropy,group
0,sub-002,78,1,0,19.0,8.34604e-07,3.338416e-08,0.478456,1.953125,1.3e-05,0
1,sub-002,78,1,1,19.0,6.761889e-07,2.704755e-08,0.374997,1.953125,1.1e-05,0
2,sub-002,78,1,2,19.0,8.086165e-07,3.234466e-08,0.39457,1.953125,1.3e-05,0
3,sub-002,78,1,3,19.0,8.440792e-07,3.376317e-08,0.447835,1.953125,1.3e-05,0
4,sub-002,78,1,4,19.0,7.673421e-07,3.069369e-08,0.416419,1.953125,1.2e-05,0


In [9]:
cols_to_drop = ['subject_id','group']

train_all = df_epochs_all[df_epochs_all['subject_id'].isin(X_train['participant_id'])]
test_all = df_epochs_all[df_epochs_all['subject_id'].isin(X_test['participant_id'])]

print(train_fp1.shape)
print(test_fp1.shape)

scaler_2 = StandardScaler()

X_train_all = scaler_2.fit_transform(train_all.drop(cols_to_drop, axis=1))
y_train_all = train_all['group']

X_test_all = scaler_2.transform(test_all.drop(cols_to_drop, axis=1))
y_test_all = test_all['group']

(532, 11)
(182, 11)


#### Epochs based with Frequency Bands FP1


In [10]:
df_epochs_bands_fp1.head()

Unnamed: 0,epoch_id,total_channels,delta_total_power,delta_avg_power,delta_relative_power,delta_peak_freq,delta_spectral_entropy,theta_total_power,theta_avg_power,theta_relative_power,...,beta_spectral_entropy,gamma_total_power,gamma_avg_power,gamma_relative_power,gamma_peak_freq,gamma_spectral_entropy,subject_id,age,gender,group
0,0,19.0,5.35644e-07,2.67822e-07,0.316042,1.953125,8e-06,1.703684e-08,8.518421e-09,0.010052,...,1e-06,1.29257e-07,1.615713e-08,0.076265,44.921875,2e-06,sub-002,78,1,0
1,1,19.0,3.903346e-07,1.951673e-07,0.225894,1.953125,6e-06,2.239278e-08,1.119639e-08,0.012959,...,2e-06,1.108229e-07,1.385286e-08,0.064135,35.15625,2e-06,sub-002,78,1,0
2,2,19.0,5.090068e-07,2.545034e-07,0.25749,1.953125,8e-06,3.471327e-08,1.735663e-08,0.01756,...,2e-06,8.779028e-08,1.097378e-08,0.04441,37.109375,2e-06,sub-002,78,1,0
3,3,19.0,5.687169e-07,2.843584e-07,0.306991,1.953125,8e-06,2.759734e-08,1.379867e-08,0.014897,...,1e-06,1.364043e-07,1.705053e-08,0.073631,39.0625,2e-06,sub-002,78,1,0
4,4,19.0,5.352662e-07,2.676331e-07,0.287079,1.953125,8e-06,2.820804e-08,1.410402e-08,0.015129,...,1e-06,9.871103e-08,1.233888e-08,0.052942,42.96875,2e-06,sub-002,78,1,0


## Predictions


### Epochs Based - FP1 Channel


XGBoost


In [11]:
xgb_classifier = xgb.XGBClassifier()
xgb_classifier.fit(X_train_fp1, y_train_fp1)

xgb_predict = xgb_classifier.predict(X_test_fp1)

# Accuracy
accuracy = accuracy_score(y_test_fp1, xgb_predict)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 29.12%


AdaBoost


In [12]:
adaboost_classifier = AdaBoostClassifier()
adaboost_classifier.fit(X_train_fp1, y_train_fp1)

adaboost_predict = adaboost_classifier.predict(X_test_fp1)

# Accuracy
accuracy = accuracy_score(y_test_fp1, adaboost_predict)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 37.91%


Random Forest


In [13]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train_fp1, y_train_fp1)

rf_predict = rf_classifier.predict(X_test_fp1)

# Accuracy
accuracy = accuracy_score(y_test_fp1, rf_predict)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 46.70%


Support Vector Classifier


In [14]:
svc_classifier = SVC()
svc_classifier.fit(X_train_fp1, y_train_fp1)

svc_predict = svc_classifier.predict(X_test_fp1)

# Accuracy
accuracy = accuracy_score(y_test_fp1, svc_predict)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 64.84%


### Epochs Based - All Channels


XGBoost


In [15]:
xgb_classifier = xgb.XGBClassifier()
xgb_classifier.fit(X_train_all, y_train_all)

xgb_predict = xgb_classifier.predict(X_test_all)

# Accuracy
accuracy = accuracy_score(y_test_all, xgb_predict)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 32.97%


AdaBoost


In [16]:
adaboost_classifier = AdaBoostClassifier()
adaboost_classifier.fit(X_train_all, y_train_all)

adaboost_predict = adaboost_classifier.predict(X_test_all)

# Accuracy
accuracy = accuracy_score(y_test_all, adaboost_predict)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 37.91%


Random Forest


In [17]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train_all, y_train_all)

rf_predict = rf_classifier.predict(X_test_all)

# Accuracy
accuracy = accuracy_score(y_test_all, rf_predict)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 46.15%


Support Vector Classifier


In [18]:
svc_classifier = SVC()
svc_classifier.fit(X_train_all, y_train_all)

svc_predict = svc_classifier.predict(X_test_all)

# Accuracy
accuracy = accuracy_score(y_test_all, svc_predict)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 62.09%


In [19]:
test_all['Predict'] = svc_predict

test_all.groupby('subject_id')['Predict'].nunique()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_all['Predict'] = svc_predict


subject_id
sub-006    1
sub-011    2
sub-015    1
sub-016    2
sub-020    2
sub-027    1
sub-034    2
sub-035    2
sub-036    2
sub-076    1
sub-079    2
sub-082    2
sub-087    2
Name: Predict, dtype: int64

In [20]:
test_all.groupby('subject_id')[['group','Predict']].value_counts()

subject_id  group  Predict
sub-006     0      0          14
sub-011     0      0          12
                   1           2
sub-015     0      1          14
sub-016     0      0          12
                   1           2
sub-020     0      0          13
                   1           1
sub-027     0      0          14
sub-034     0      0          13
                   1           1
sub-035     0      0           9
                   1           5
sub-036     0      0          11
                   1           3
sub-076     1      0          14
sub-079     1      0          11
                   1           3
sub-082     1      1          11
                   0           3
sub-087     1      0          13
                   1           1
dtype: int64

### Epochs Based with Frequency Bands - FP1 Channel


XGBoost


In [21]:
xgb_classifier = xgb.XGBClassifier()
xgb_classifier.fit(X_train_fp1, y_train_fp1)

xgb_predict = xgb_classifier.predict(X_test_fp1)

# Accuracy
accuracy = accuracy_score(y_test_fp1, xgb_predict)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 29.12%


AdaBoost


In [22]:
adaboost_classifier = AdaBoostClassifier()
adaboost_classifier.fit(X_train_fp1, y_train_fp1)

adaboost_predict = adaboost_classifier.predict(X_test_fp1)

# Accuracy
accuracy = accuracy_score(y_test_fp1, adaboost_predict)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 37.91%


Random Forest


In [23]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train_fp1, y_train_fp1)

rf_predict = rf_classifier.predict(X_test_fp1)

# Accuracy
accuracy = accuracy_score(y_test_fp1, rf_predict)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 40.66%


Support Vector Classifier


In [24]:
svc_classifier = SVC()
svc_classifier.fit(X_train_fp1, y_train_fp1)

svc_predict = svc_classifier.predict(X_test_fp1)

# Accuracy
accuracy = accuracy_score(y_test_fp1, svc_predict)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 64.84%
