ADHD vs control - SVM

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.preprocessing import StandardScaler, MinMaxScaler 
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay 
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import cross_val_score

import os
import sys
import random

from matplotlib import pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import SGD, Nadam, Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras.models import Sequential, load_model

from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras import optimizers
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.svm import SVC

In [113]:
# hyperparameters

step = 60 
batch_size = 16
verbose = 1
data_dir = "Thesis_files"
segment_length = 960
learning_rate = 0.0001

In [116]:
def segments_and_labels(dataset_dir, segment_length, step):
    scores = pd.read_csv(os.path.join(dataset_dir, 'scores6.csv'))
    scores['exp_or_cont'].fillna(0, inplace=True)

    segments = []
    labels = []

    for person in scores['number']:
        p = scores[scores['number'] == person]
        filepath = os.path.join(dataset_dir, person.split('_')[0], f'{person}.csv')
        df_activity = pd.read_csv(filepath)

        for i in range(0, len(df_activity) - segment_length, step):
            segment = df_activity['Magnitude.mean'].values[i : i + segment_length]
            
            segments.append([segment])
            labels.append(p['exp_or_cont'].values[0])

    segments = np.asarray(segments)
    segments = segments.reshape(-1, segment_length, 1)

    input_shape = segments.shape[1]
    segments = segments.reshape(segments.shape[0], input_shape).astype('float32')
    labels = np.asarray(labels).astype('float32')

    return segments, labels, input_shape



In [117]:
print(segments_and_labels(data_dir,segment_length,step))

(array([[0.043232, 0.08168 , 0.01811 , ..., 0.062163, 0.011337, 0.010288],
       [0.128714, 0.101046, 0.331317, ..., 0.055553, 0.031634, 0.05851 ],
       [0.006379, 0.007886, 0.01208 , ..., 0.016275, 0.085776, 0.035873],
       ...,
       [0.028381, 0.112372, 0.047427, ..., 0.037865, 0.024527, 0.059238],
       [0.073108, 0.043552, 0.070973, ..., 0.02016 , 0.026588, 0.008958],
       [0.042946, 0.062584, 0.074234, ..., 0.006881, 0.008216, 0.00673 ]],
      dtype=float32), array([1., 1., 1., ..., 0., 0., 0.], dtype=float32), 960)


In [118]:
segments, labels, input_shape = segments_and_labels(data_dir, segment_length, step)

In [119]:
y = labels
X = segments
print(y.shape)
print(X.shape)
print(y)
print(X)

(21696,)
(21696, 960)
[1. 1. 1. ... 0. 0. 0.]
[[0.043232 0.08168  0.01811  ... 0.062163 0.011337 0.010288]
 [0.128714 0.101046 0.331317 ... 0.055553 0.031634 0.05851 ]
 [0.006379 0.007886 0.01208  ... 0.016275 0.085776 0.035873]
 ...
 [0.028381 0.112372 0.047427 ... 0.037865 0.024527 0.059238]
 [0.073108 0.043552 0.070973 ... 0.02016  0.026588 0.008958]
 [0.042946 0.062584 0.074234 ... 0.006881 0.008216 0.00673 ]]


16hours - 960 timesteps

In [103]:
svclassifier = SVC(kernel = "rbf", C=1.2, random_state=777)

kf = KFold(n_splits = 5, shuffle = True, random_state = 777)

scores_acc = cross_validate(svclassifier, X, y, scoring = ['accuracy','precision','recall','f1'], cv=kf)

print("Accuracy:")
print(scores_acc['test_accuracy'])
print("Mean accuracy:")
print(np.mean(scores_acc['test_accuracy']))
print()

print("Precision:")
print(scores_acc['test_precision'])
print("Mean precision:")
print(np.mean(scores_acc['test_precision']))
print()

print("Recall:")
print(scores_acc['test_recall'])
print("Mean recall:")
print(np.mean(scores_acc['test_recall']))
print()

print("f1:")
print(scores_acc['test_f1'])
print("Mean f1:")
print(np.mean(scores_acc['test_f1']))
print()

{'fit_time': array([11.06829619, 10.97786307, 10.86656642, 10.91953444, 10.8577733 ]), 'score_time': array([3.97131968, 3.97108865, 3.96656418, 3.94510531, 3.94220257]), 'test_accuracy': array([0.55865922, 0.56584198, 0.54988029, 0.57781325, 0.53115016]), 'test_precision': array([0.5352518 , 0.57878315, 0.54957983, 0.5806962 , 0.54290172]), 'test_recall': array([0.6179402 , 0.5751938 , 0.52487961, 0.58161648, 0.54205607]), 'test_f1': array([0.57363146, 0.57698289, 0.53694581, 0.58115598, 0.54247857])}
Accuracy:
[0.55865922 0.56584198 0.54988029 0.57781325 0.53115016]
Mean accuracy:
0.5566689784772139

Precision:
[0.5352518  0.57878315 0.54957983 0.5806962  0.54290172]
Mean precision:
0.5574425400840531

Recall:
[0.6179402  0.5751938  0.52487961 0.58161648 0.54205607]
Mean recall:
0.5683372338187462

f1:
[0.57363146 0.57698289 0.53694581 0.58115598 0.54247857]
Mean f1:
0.5622389412798328



24hours - 1440 timesteps

In [104]:
segment_length = 1440

segments, labels, input_shape = segments_and_labels(data_dir, segment_length, step)

y = labels
X = segments
print(y.shape)
print(X.shape)

(5787,)
(5787, 1440)


In [105]:
svclassifier = SVC(kernel = "rbf", C=1.2, random_state=777)

kf = KFold(n_splits = 5, shuffle = True, random_state = 777)

scores_acc = cross_validate(svclassifier, X, y, scoring = ['accuracy','precision','recall','f1'], cv=kf)

print("Accuracy:")
print(scores_acc['test_accuracy'])
print("Mean accuracy:")
print(np.mean(scores_acc['test_accuracy']))
print()

print("Precision:")
print(scores_acc['test_precision'])
print("Mean precision:")
print(np.mean(scores_acc['test_precision']))
print()

print("Recall:")
print(scores_acc['test_recall'])
print("Mean recall:")
print(np.mean(scores_acc['test_recall']))
print()

print("f1:")
print(scores_acc['test_f1'])
print("Mean f1:")
print(np.mean(scores_acc['test_f1']))
print()

Accuracy:
[0.55181347 0.57772021 0.54624028 0.55834054 0.55229041]
Mean accuracy:
0.5572809794850897

Precision:
[0.56572379 0.57142857 0.54003407 0.55144695 0.55205047]
Mean precision:
0.5561367710359544

Recall:
[0.56856187 0.61005199 0.5541958  0.59652174 0.59931507]
Mean recall:
0.5857292955593358

f1:
[0.56713928 0.59010897 0.5470233  0.57309942 0.57471264]
Mean f1:
0.5704167213097965



48 hours - 2880 timesteps

In [106]:
segment_length = 2880

segments, labels, input_shape = segments_and_labels(data_dir, segment_length, step)

y = labels
X = segments
print(y.shape)
print(X.shape)

(4376,)
(4376, 2880)


In [107]:
svclassifier = SVC(kernel = "rbf", C=1.2, random_state=777)

kf = KFold(n_splits = 5, shuffle = True, random_state = 777)

scores_acc = cross_validate(svclassifier, X, y, scoring = ['accuracy','precision','recall','f1'], cv=kf)

print("Accuracy:")
print(scores_acc['test_accuracy'])
print("Mean accuracy:")
print(np.mean(scores_acc['test_accuracy']))
print()

print("Precision:")
print(scores_acc['test_precision'])
print("Mean precision:")
print(np.mean(scores_acc['test_precision']))
print()

print("Recall:")
print(scores_acc['test_recall'])
print("Mean recall:")
print(np.mean(scores_acc['test_recall']))
print()

print("f1:")
print(scores_acc['test_f1'])
print("Mean f1:")
print(np.mean(scores_acc['test_f1']))
print()

Accuracy:
[0.57191781 0.52       0.59657143 0.57028571 0.552     ]
Mean accuracy:
0.5621549902152642

Precision:
[0.58526316 0.52941176 0.57407407 0.57264957 0.55060729]
Mean precision:
0.5624011713547317

Recall:
[0.60964912 0.5625     0.65647059 0.6036036  0.61538462]
Mean recall:
0.6095215860061061

f1:
[0.5972073  0.54545455 0.61251372 0.5877193  0.58119658]
Mean f1:
0.5848182900112945



72 hours - 4320 timesteps

In [108]:
segment_length = 4320

segments, labels, input_shape = segments_and_labels(data_dir, segment_length, step)

y = labels
X = segments
print(y.shape)
print(X.shape)

(2984,)
(2984, 4320)


In [109]:
svclassifier = SVC(kernel = "rbf", C=1.2, random_state=777)

kf = KFold(n_splits = 5, shuffle = True, random_state = 777)

scores_acc = cross_validate(svclassifier, X, y, scoring = ['accuracy','precision','recall','f1'], cv=kf)

print("Accuracy:")
print(scores_acc['test_accuracy'])
print("Mean accuracy:")
print(np.mean(scores_acc['test_accuracy']))
print()

print("Precision:")
print(scores_acc['test_precision'])
print("Mean precision:")
print(np.mean(scores_acc['test_precision']))
print()

print("Recall:")
print(scores_acc['test_recall'])
print("Mean recall:")
print(np.mean(scores_acc['test_recall']))
print()

print("f1:")
print(scores_acc['test_f1'])
print("Mean f1:")
print(np.mean(scores_acc['test_f1']))
print()

Accuracy:
[0.55443886 0.55443886 0.50921273 0.49916248 0.5033557 ]
Mean accuracy:
0.524121727204254

Precision:
[0.56233422 0.55807365 0.53072626 0.50284091 0.51575931]
Mean precision:
0.5339468700585265

Recall:
[0.67731629 0.64169381 0.6031746  0.58803987 0.58631922]
Mean recall:
0.6193087587059821

f1:
[0.61449275 0.5969697  0.56463596 0.54211332 0.54878049]
Mean f1:
0.5733984439834103



96 hours - 5760 timesteps

In [110]:
segment_length = 5760

segments, labels, input_shape = segments_and_labels(data_dir, segment_length, step)

y = labels
X = segments
print(y.shape)
print(X.shape)

(1614,)
(1614, 5760)


In [111]:
svclassifier = SVC(kernel = "rbf", C=1.2, random_state=777)

kf = KFold(n_splits = 5, shuffle = True, random_state = 777)

scores_acc = cross_validate(svclassifier, X, y, scoring = ['accuracy','precision','recall','f1'], cv=kf)

print("Accuracy:")
print(scores_acc['test_accuracy'])
print("Mean accuracy:")
print(np.mean(scores_acc['test_accuracy']))
print()

print("Precision:")
print(scores_acc['test_precision'])
print("Mean precision:")
print(np.mean(scores_acc['test_precision']))
print()

print("Recall:")
print(scores_acc['test_recall'])
print("Mean recall:")
print(np.mean(scores_acc['test_recall']))
print()

print("f1:")
print(scores_acc['test_f1'])
print("Mean f1:")
print(np.mean(scores_acc['test_f1']))
print()

Accuracy:
[0.54179567 0.52012384 0.59133127 0.57585139 0.57453416]
Mean accuracy:
0.5607272657346691

Precision:
[0.56306306 0.53556485 0.60730594 0.56223176 0.59808612]
Mean precision:
0.5732503473502348

Recall:
[0.71022727 0.74418605 0.74301676 0.78915663 0.70224719]
Mean recall:
0.7377667793065394

f1:
[0.6281407  0.62287105 0.66834171 0.6566416  0.64599483]
Mean f1:
0.6443979788680761

