depression vs control - linear SVM

In [74]:
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.preprocessing import StandardScaler, MinMaxScaler 
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay 
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import cross_val_score

import os
import sys
import random

from matplotlib import pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import SGD, Nadam, Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras.models import Sequential, load_model

from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras import optimizers
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.svm import SVC

In [75]:
# hyperparameters

step = 60 
batch_size = 16
verbose = 1
data_dir = "C:/Users/086096/thesis_files/"
segment_length = 960

In [76]:
def segments_and_labels(dataset_dir, segment_length, step):
    scores = pd.read_csv(os.path.join(dataset_dir, 'scores7.csv'))
    scores['exp_or_cont'].fillna(0, inplace=True)

    segments = []
    labels = []

    for person in scores['number']:
        p = scores[scores['number'] == person]
        filepath = os.path.join(dataset_dir, person.split('_')[0], f'{person}.csv')
        df_activity = pd.read_csv(filepath)

        for i in range(0, len(df_activity) - segment_length, step):
            segment = df_activity['Magnitude.mean'].values[i : i + segment_length]
            
            segments.append([segment])
            labels.append(p['exp_or_cont'].values[0])

    segments = np.asarray(segments)
    segments = segments.reshape(-1, segment_length, 1)

    input_shape = segments.shape[1]
    segments = segments.reshape(segments.shape[0], input_shape).astype('float32')
    labels = np.asarray(labels).astype('float32')

    return segments, labels, input_shape

In [77]:
print(segments_and_labels(data_dir,segment_length,step))

(array([[0.043232, 0.08168 , 0.01811 , ..., 0.062163, 0.011337, 0.010288],
       [0.128714, 0.101046, 0.331317, ..., 0.055553, 0.031634, 0.05851 ],
       [0.006379, 0.007886, 0.01208 , ..., 0.016275, 0.085776, 0.035873],
       ...,
       [0.028381, 0.112372, 0.047427, ..., 0.037865, 0.024527, 0.059238],
       [0.073108, 0.043552, 0.070973, ..., 0.02016 , 0.026588, 0.008958],
       [0.042946, 0.062584, 0.074234, ..., 0.006881, 0.008216, 0.00673 ]],
      dtype=float32), array([1., 1., 1., ..., 0., 0., 0.], dtype=float32), 960)


In [78]:
segments, labels, input_shape = segments_and_labels(data_dir, segment_length, step)

In [79]:
print(segments.shape)

(4368, 960)


In [80]:
y = labels
X = segments
#X = np.nan_to_num(segments,nan = 0.5)
print(y.shape)
print(X.shape)
print(y)
print(X)
print(np.isnan(X).any())


(4368,)
(4368, 960)
[1. 1. 1. ... 0. 0. 0.]
[[0.043232 0.08168  0.01811  ... 0.062163 0.011337 0.010288]
 [0.128714 0.101046 0.331317 ... 0.055553 0.031634 0.05851 ]
 [0.006379 0.007886 0.01208  ... 0.016275 0.085776 0.035873]
 ...
 [0.028381 0.112372 0.047427 ... 0.037865 0.024527 0.059238]
 [0.073108 0.043552 0.070973 ... 0.02016  0.026588 0.008958]
 [0.042946 0.062584 0.074234 ... 0.006881 0.008216 0.00673 ]]
False


16hours - 960 timesteps

In [81]:
svclassifier = SVC(kernel = "rbf", C=1.2, random_state=777)

kf = KFold(n_splits = 5, shuffle = True, random_state = 777)

scores_acc = cross_validate(svclassifier, X, y, scoring = ['accuracy','precision','recall','f1'], cv=kf)

print("Accuracy:")
print(scores_acc['test_accuracy'])
print("Mean accuracy:")
print(np.mean(scores_acc['test_accuracy']))
print()

print("Precision:")
print(scores_acc['test_precision'])
print("Mean precision:")
print(np.mean(scores_acc['test_precision']))
print()

print("Recall:")
print(scores_acc['test_recall'])
print("Mean recall:")
print(np.mean(scores_acc['test_recall']))
print()

print("f1:")
print(scores_acc['test_f1'])
print("Mean f1:")
print(np.mean(scores_acc['test_f1']))
print()

Accuracy:
[0.61899314 0.60411899 0.62128146 0.61741123 0.62084765]
Mean accuracy:
0.6165304940222962

Precision:
[0.62135922 0.64016736 0.59217877 0.62809917 0.61623616]
Mean precision:
0.6196081388365541

Recall:
[0.69868996 0.6375     0.73953488 0.66375546 0.73085339]
Mean recall:
0.6940667380505986

f1:
[0.65775951 0.6388309  0.65770424 0.64543524 0.66866867]
Mean f1:
0.6536797114262434



24hours - 1440 timesteps

In [83]:
segment_length = 1440

segments, labels, input_shape = segments_and_labels(data_dir, segment_length, step)

y = labels
X = segments
print(y.shape)
print(X.shape)

(4048,)
(4048, 1440)


In [84]:
svclassifier = SVC(kernel = "rbf", C=1.2, random_state=777)

kf = KFold(n_splits = 5, shuffle = True, random_state = 777)

scores_acc = cross_validate(svclassifier, X, y, scoring = ['accuracy','precision','recall','f1'], cv=kf)

print("Accuracy:")
print(scores_acc['test_accuracy'])
print("Mean accuracy:")
print(np.mean(scores_acc['test_accuracy']))
print()

print("Precision:")
print(scores_acc['test_precision'])
print("Mean precision:")
print(np.mean(scores_acc['test_precision']))
print()

print("Recall:")
print(scores_acc['test_recall'])
print("Mean recall:")
print(np.mean(scores_acc['test_recall']))
print()

print("f1:")
print(scores_acc['test_f1'])
print("Mean f1:")
print(np.mean(scores_acc['test_f1']))
print()

Accuracy:
[0.64814815 0.66296296 0.65185185 0.61681088 0.631644  ]
Mean accuracy:
0.6422835691068076

Precision:
[0.66949153 0.66863905 0.62473795 0.60455487 0.62601626]
Mean precision:
0.6386879299515723

Recall:
[0.71011236 0.76351351 0.74314214 0.71046229 0.72985782]
Mean recall:
0.7314176249424632

f1:
[0.68920393 0.71293375 0.67881549 0.65324385 0.67396061]
Mean f1:
0.6816315260207964



48 hours - 2880 timesteps

In [85]:
segment_length = 2880

segments, labels, input_shape = segments_and_labels(data_dir, segment_length, step)

y = labels
X = segments
print(y.shape)
print(X.shape)

(3088,)
(3088, 2880)


In [87]:
svclassifier = SVC(kernel = "rbf", C=1.2, random_state=777)

kf = KFold(n_splits = 5, shuffle = True, random_state = 777)

scores_acc = cross_validate(svclassifier, X, y, scoring = ['accuracy','precision','recall','f1'], cv=kf)

print("Accuracy:")
print(scores_acc['test_accuracy'])
print("Mean accuracy:")
print(np.mean(scores_acc['test_accuracy']))
print()

print("Precision:")
print(scores_acc['test_precision'])
print("Mean precision:")
print(np.mean(scores_acc['test_precision']))
print()

print("Recall:")
print(scores_acc['test_recall'])
print("Mean recall:")
print(np.mean(scores_acc['test_recall']))
print()

print("f1:")
print(scores_acc['test_f1'])
print("Mean f1:")
print(np.mean(scores_acc['test_f1']))
print()

Accuracy:
[0.70550162 0.66343042 0.70226537 0.66774716 0.66450567]
Mean accuracy:
0.6806900494615873

Precision:
[0.71621622 0.65625    0.69109948 0.65289256 0.66129032]
Mean precision:
0.6755497154440245

Recall:
[0.7748538  0.76829268 0.8        0.75       0.75229358]
Mean recall:
0.7690880124156142

f1:
[0.74438202 0.70786517 0.74157303 0.69808542 0.70386266]
Mean f1:
0.7191536610796423



72 hours - 4320 timesteps

In [88]:
segment_length = 4320

segments, labels, input_shape = segments_and_labels(data_dir, segment_length, step)

y = labels
X = segments
print(y.shape)
print(X.shape)

(2128,)
(2128, 4320)


In [89]:
svclassifier = SVC(kernel = "rbf", C=1.2, random_state=777)

kf = KFold(n_splits = 5, shuffle = True, random_state = 777)

scores_acc = cross_validate(svclassifier, X, y, scoring = ['accuracy','precision','recall','f1'], cv=kf)

print("Accuracy:")
print(scores_acc['test_accuracy'])
print("Mean accuracy:")
print(np.mean(scores_acc['test_accuracy']))
print()

print("Precision:")
print(scores_acc['test_precision'])
print("Mean precision:")
print(np.mean(scores_acc['test_precision']))
print()

print("Recall:")
print(scores_acc['test_recall'])
print("Mean recall:")
print(np.mean(scores_acc['test_recall']))
print()

print("f1:")
print(scores_acc['test_f1'])
print("Mean f1:")
print(np.mean(scores_acc['test_f1']))
print()

Accuracy:
[0.68779343 0.74178404 0.70187793 0.67529412 0.67764706]
Mean accuracy:
0.6968793151063243

Precision:
[0.69348659 0.75510204 0.70075758 0.66666667 0.67132867]
Mean precision:
0.6974683089215108

Recall:
[0.77350427 0.78723404 0.79399142 0.77876106 0.81702128]
Mean recall:
0.790102414181825

f1:
[0.73131313 0.77083333 0.7444668  0.71836735 0.73704415]
Mean f1:
0.7404049516526781



96 hours - 5760 timesteps

In [90]:
segment_length = 5760

segments, labels, input_shape = segments_and_labels(data_dir, segment_length, step)

y = labels
X = segments
print(y.shape)
print(X.shape)

(1217,)
(1217, 5760)


In [91]:
svclassifier = SVC(kernel = "rbf", C=1.2, random_state=777)

kf = KFold(n_splits = 5, shuffle = True, random_state = 777)

scores_acc = cross_validate(svclassifier, X, y, scoring = ['accuracy','precision','recall','f1'], cv=kf)

print("Accuracy:")
print(scores_acc['test_accuracy'])
print("Mean accuracy:")
print(np.mean(scores_acc['test_accuracy']))
print()

print("Precision:")
print(scores_acc['test_precision'])
print("Mean precision:")
print(np.mean(scores_acc['test_precision']))
print()

print("Recall:")
print(scores_acc['test_recall'])
print("Mean recall:")
print(np.mean(scores_acc['test_recall']))
print()

print("f1:")
print(scores_acc['test_f1'])
print("Mean f1:")
print(np.mean(scores_acc['test_f1']))
print()

Accuracy:
[0.72540984 0.70491803 0.74485597 0.7037037  0.74897119]
Mean accuracy:
0.725571746609998

Precision:
[0.68306011 0.72189349 0.76506024 0.70481928 0.72413793]
Mean precision:
0.7197942099041299

Recall:
[0.93283582 0.82993197 0.84666667 0.83571429 0.90647482]
Mean recall:
0.8703247132418952

f1:
[0.78864353 0.7721519  0.80379747 0.76470588 0.80511182]
Mean f1:
0.7868821207301678

