# The Full Pipeline for Exploiting Loop-Like Exercices in The Classification of Parkinson's Disease

In [1]:
from dataaccess.filedatareader import FileDataReader

reader = FileDataReader('../data/Patients Records 05_04_2019')
info, data = reader.load_french(tasks=[3])

Loading the data, please wait.
Data loaded successfully.


In [2]:
from datamanipulation.datageneration import get_pd_hc_only, match_age_gender_pd
info, data = get_pd_hc_only(info, data)
info, data = match_age_gender_pd(info, data)
data.sort_index(inplace=True)

In [3]:
from datamanipulation.extraction import *
extracted = extract_features(data)

Started extracting features.
The following features were extracted successfully: ['Displacement x', 'Displacement y', 'Distance x', 'Distance y', 'Distance x-y', 'Velocity x', 'Velocity y', 'Velocity x-y', 'Acceleration x', 'Acceleration y', 'Acceleration x-y', 'Jerk x', 'Jerk y', 'Jerk x-y', 'ROC p / time', 'ROC al / time', 'ROC az / time', 'Slope', 'Slant']
Number of features: 19


In [4]:
standardized = extracted.copy()
for col in standardized.columns[1:]:
    mean = standardized[col].mean()
    std = standardized[col].std()
    standardized[col] = (standardized[col] - mean) / std

In [5]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2'
from datamanipulation.datageneration import get_samples
from tensorflow.keras.preprocessing.sequence import pad_sequences
X, y = get_samples(standardized, 'PD')
padding_val = 0
X_padded = pad_sequences(X, dtype='float32', padding='post', value=padding_val)

  _warn(("h5py is running against HDF5 {0} when it was built against {1}, "


In [10]:
from sklearn.model_selection import train_test_split
stratify_with=standardized.groupby(['ID', 'Language', 'Task']).first().merge(info, on='ID')[['PD_x', 'Gender']]
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, stratify=stratify_with, random_state=42, test_size=0.25)

In [23]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2'
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dropout, Dense, Masking
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score, confusion_matrix

kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

confusions = []

for train_index, val_index in kf.split(X_train, y_train):
    X_train_cv, X_val = X_train[train_index], X_train[val_index]
    y_train_cv, y_val = y_train[train_index], y_train[val_index]

    # cnn
    model = Sequential()
    model.add(Masking(mask_value=padding_val, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Conv1D(32, 3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(64, 3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(64, 3, activation='relu'))
    
    # fcl
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))  # Output layer for binary classification
    
    # compilation
    model.compile(optimizer=Adam(learning_rate=0.0001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # training
    model.fit(X_train_cv, y_train_cv, epochs=6, batch_size=5, validation_data=(X_val, y_val))

    # evaluation on validation set
    y_pred = (model.predict(X_val) > 0.5).astype(int)
    confusion = confusion_matrix(y_val, y_pred)
    confusions.append(confusion)

confusions = np.array(confusions)
mean_conf = np.array([
    [confusions[:,0,0].sum()/confusions.shape[0], confusions[:,0,1].sum()/confusions.shape[0]],
    [confusions[:,1,0].sum()/confusions.shape[0], confusions[:,1,1].sum()/confusions.shape[0]]
])
tn, fp, fn, tp = mean_conf.reshape(-1)
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)
accuracy = (tp + tn) / mean_conf.sum()
print('The mean accuracy, sensitivity and specificity over the 5 iterations of the 5 fold stratified cross validation:\n', accuracy, sensitivity, specificity)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
The mean accuracy, sensitivity and specificity over the 5 iterations of the 5 fold stratified cross validation:
 0.8000000000000002 0.7333333333333334 0.8666666666666667


In [68]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2'
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Masking
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score, confusion_matrix

kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

confusions = []

for train_index, val_index in kf.split(X_train, y_train):
    X_train_cv, X_val = X_train[train_index], X_train[val_index]
    y_train_cv, y_val = y_train[train_index], y_train[val_index]

    # cnn
    model = Sequential()
    model.add(Masking(mask_value=padding_val, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Conv1D(25, 25, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(25, 25, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(25, 25, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(25, 25, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(25, 25, activation='relu'))
    
    # fcl
    model.add(Flatten())
    #model.add(Dense(25, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))  # Output layer for binary classification
    
    # compilation
    model.compile(optimizer=Adam(learning_rate=0.0005),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # training
    model.fit(X_train_cv, y_train_cv, epochs=10, batch_size=30, validation_data=(X_val, y_val))

    # evaluation on validation set
    y_pred = (model.predict(X_val) > 0.5).astype(int)
    confusion = confusion_matrix(y_val, y_pred)
    confusions.append(confusion)

confusions = np.array(confusions)
mean_conf = np.array([
    [confusions[:,0,0].sum()/confusions.shape[0], confusions[:,0,1].sum()/confusions.shape[0]],
    [confusions[:,1,0].sum()/confusions.shape[0], confusions[:,1,1].sum()/confusions.shape[0]]
])
tn, fp, fn, tp = mean_conf.reshape(-1)
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)
accuracy = (tp + tn) / mean_conf.sum()
print('The mean accuracy, sensitivity and specificity over the 5 iterations of the 5 fold stratified cross validation:\n', accuracy, sensitivity, specificity)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
The mean accuracy, sensitivity and specificity over the 5 iterations of the 5 fold stratified cross validation:
 0.8333333333333334 0.7999999999999999 0.8666666666666667


use these guys next: 'French Level Written', 'Writing Frequency', 'Age', 'Gender'

In [64]:
model = ?
y_pred = (model.predict(X_test) > 0.5).astype(int)
confusion = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = confusion.reshape(-1)
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)
accuracy = (tp + tn) / confusion.sum()
print('accuracy, sensitivity, specificity:', accuracy, sensitivity, specificity)

SyntaxError: invalid syntax (749672396.py, line 1)