In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.preprocessing import StandardScaler, MinMaxScaler 
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay 
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import cross_val_score

import os
import sys
import random

from matplotlib import pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import SGD, Nadam, Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras.models import Sequential, load_model

from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, SimpleRNN, BatchNormalization, GRU
from keras import optimizers
from keras.regularizers import l2, l1
from keras.callbacks import ReduceLROnPlateau, EarlyStopping

import math
import xgboost as xgb
from xgboost import cv, XGBClassifier
from sklearn.model_selection import cross_val_score, cross_validate

In [2]:
# hyperparameters
step = 60 
batch_size = 16
verbose = 1
data_dir = "C:/Users/086096/thesis_files"
segment_length = 960

In [3]:
def segments_and_labels(dataset_dir, segment_length, step):
    scores = pd.read_csv(os.path.join(dataset_dir, 'scores6.csv'))
    scores['exp_or_cont'].fillna(0, inplace=True)

    segments = []
    labels = []

    for person in scores['number']:
        p = scores[scores['number'] == person]
        filepath = os.path.join(dataset_dir, person.split('_')[0], f'{person}.csv')
        df_activity = pd.read_csv(filepath)

        for i in range(0, len(df_activity) - segment_length, step):
            segment = df_activity['Magnitude.mean'].values[i : i + segment_length]
            
            segments.append([segment])
            labels.append(p['exp_or_cont'].values[0])

    segments = np.asarray(segments)
    segments = segments.reshape(-1, segment_length, 1)

    input_shape = segments.shape[1]
    segments = segments.reshape(segments.shape[0], input_shape).astype('float32')
    labels = np.asarray(labels).astype('float32')

    return segments, labels, input_shape

In [4]:
print(segments_and_labels(data_dir,segment_length,step))

(array([[0.073133, 0.021325, 0.029802, ..., 0.154651, 0.212445, 0.085252],
       [0.030406, 0.049192, 0.054587, ..., 0.01228 , 0.010198, 0.008677],
       [0.028689, 0.007497, 0.013025, ..., 0.027254, 0.014322, 0.018141],
       ...,
       [0.032726, 0.185987, 0.146053, ..., 0.032171, 0.032139, 0.032255],
       [0.029076, 0.033472, 0.044819, ..., 0.035565, 0.036123, 0.036146],
       [0.043793, 0.048195, 0.052622, ..., 0.033941, 0.031614, 0.035029]],
      dtype=float32), array([1., 1., 1., ..., 0., 0., 0.], dtype=float32), 960)


In [5]:
segments, labels, input_shape = segments_and_labels(data_dir, segment_length, step)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(segments, labels, test_size = 0.2, random_state = 777)

16hours - 960timesteps

In [None]:
XGBClassifier = XGBClassifier()

In [11]:
kf = KFold(n_splits = 5, shuffle = True, random_state = 777)

scores_acc = cross_validate(XGBClassifier, segments, labels, scoring = ['accuracy','precision','recall','f1'], cv=kf)

print("Accuracy:")
print(scores_acc['test_accuracy'])
print("Mean accuracy:")
print(np.mean(scores_acc['test_accuracy']))
print()

print("Precision:")
print(scores_acc['test_precision'])
print("Mean precision:")
print(np.mean(scores_acc['test_precision']))
print()

print("Recall:")
print(scores_acc['test_recall'])
print("Mean recall:")
print(np.mean(scores_acc['test_recall']))
print()

print("f1:")
print(scores_acc['test_f1'])
print("Mean f1:")
print(np.mean(scores_acc['test_f1']))
print()

Accuracy:
[0.56185156 0.55307263 0.54509178 0.54509178 0.54472843]
Mean accuracy:
0.5499672351850766

Precision:
[0.54226475 0.56910569 0.54438861 0.5497553  0.55901639]
Mean precision:
0.5529061497600598

Recall:
[0.56478405 0.54263566 0.52166934 0.5340729  0.53115265]
Mean recall:
0.5388629204196984

f1:
[0.55329536 0.55555556 0.53278689 0.54180064 0.54472843]
Mean f1:
0.5456333760952121



24hours - 1440 timesteps

In [12]:
segment_length = 1440

segments, labels, input_shape = segments_and_labels(data_dir, segment_length, step)

y = labels
X = segments
print(y.shape)
print(X.shape)

(5787,)
(5787, 1440)


In [13]:
kf = KFold(n_splits = 5, shuffle = True, random_state = 777)

scores_acc = cross_validate(XGBClassifier, segments, labels, scoring = ['accuracy','precision','recall','f1'], cv=kf)

print("Accuracy:")
print(scores_acc['test_accuracy'])
print("Mean accuracy:")
print(np.mean(scores_acc['test_accuracy']))
print()

print("Precision:")
print(scores_acc['test_precision'])
print("Mean precision:")
print(np.mean(scores_acc['test_precision']))
print()

print("Recall:")
print(scores_acc['test_recall'])
print("Mean recall:")
print(np.mean(scores_acc['test_recall']))
print()

print("f1:")
print(scores_acc['test_f1'])
print("Mean f1:")
print(np.mean(scores_acc['test_f1']))
print()

Accuracy:
[0.53626943 0.53195164 0.55229041 0.56266206 0.55747623]
Mean accuracy:
0.5481299531424699

Precision:
[0.5539823  0.53001715 0.54770318 0.56238698 0.56338028]
Mean precision:
0.5514939791108544

Recall:
[0.52341137 0.5355286  0.54195804 0.54086957 0.54794521]
Mean recall:
0.5379425560159038

f1:
[0.53826311 0.53275862 0.54481547 0.55141844 0.55555556]
Mean f1:
0.5445622388661195



48 hours - 2880 timesteps

In [14]:
segment_length = 2880

segments, labels, input_shape = segments_and_labels(data_dir, segment_length, step)

y = labels
X = segments
print(y.shape)
print(X.shape)

(4376,)
(4376, 2880)


In [15]:
kf = KFold(n_splits = 5, shuffle = True, random_state = 777)

scores_acc = cross_validate(XGBClassifier, segments, labels, scoring = ['accuracy','precision','recall','f1'], cv=kf)

print("Accuracy:")
print(scores_acc['test_accuracy'])
print("Mean accuracy:")
print(np.mean(scores_acc['test_accuracy']))
print()

print("Precision:")
print(scores_acc['test_precision'])
print("Mean precision:")
print(np.mean(scores_acc['test_precision']))
print()

print("Recall:")
print(scores_acc['test_recall'])
print("Mean recall:")
print(np.mean(scores_acc['test_recall']))
print()

print("f1:")
print(scores_acc['test_f1'])
print("Mean f1:")
print(np.mean(scores_acc['test_f1']))
print()

Accuracy:
[0.51369863 0.52914286 0.52457143 0.53257143 0.52228571]
Mean accuracy:
0.5244540117416829

Precision:
[0.53537736 0.54347826 0.50997783 0.5389755  0.52654867]
Mean precision:
0.5308715240182172

Recall:
[0.49780702 0.50223214 0.54117647 0.54504505 0.53846154]
Mean recall:
0.5249444428991643

f1:
[0.51590909 0.52204176 0.52511416 0.54199328 0.53243848]
Mean f1:
0.5274993538647063



72hours - 4320timesteps

In [16]:
segment_length = 4320

segments, labels, input_shape = segments_and_labels(data_dir, segment_length, step)

y = labels
X = segments
print(y.shape)
print(X.shape)

(2984,)
(2984, 4320)


In [17]:
kf = KFold(n_splits = 5, shuffle = True, random_state = 777)

scores_acc = cross_validate(XGBClassifier, segments, labels, scoring = ['accuracy','precision','recall','f1'], cv=kf)

print("Accuracy:")
print(scores_acc['test_accuracy'])
print("Mean accuracy:")
print(np.mean(scores_acc['test_accuracy']))
print()

print("Precision:")
print(scores_acc['test_precision'])
print("Mean precision:")
print(np.mean(scores_acc['test_precision']))
print()

print("Recall:")
print(scores_acc['test_recall'])
print("Mean recall:")
print(np.mean(scores_acc['test_recall']))
print()

print("f1:")
print(scores_acc['test_f1'])
print("Mean f1:")
print(np.mean(scores_acc['test_f1']))
print()

Accuracy:
[0.52261307 0.49246231 0.50251256 0.52931323 0.53691275]
Mean accuracy:
0.5167627848414332

Precision:
[0.54320988 0.50598802 0.52866242 0.53225806 0.55016181]
Mean precision:
0.532056039538267

Recall:
[0.56230032 0.5504886  0.52698413 0.54817276 0.55374593]
Mean recall:
0.5483383463270648

f1:
[0.55259027 0.52730109 0.52782194 0.5400982  0.55194805]
Mean f1:
0.5399519100254057



96hours - 5760timesteps

In [18]:
segment_length = 5760

segments, labels, input_shape = segments_and_labels(data_dir, segment_length, step)

y = labels
X = segments
print(y.shape)
print(X.shape)

(1614,)
(1614, 5760)


In [19]:
kf = KFold(n_splits = 5, shuffle = True, random_state = 777)

scores_acc = cross_validate(XGBClassifier, segments, labels, scoring = ['accuracy','precision','recall','f1'], cv=kf)

print("Accuracy:")
print(scores_acc['test_accuracy'])
print("Mean accuracy:")
print(np.mean(scores_acc['test_accuracy']))
print()

print("Precision:")
print(scores_acc['test_precision'])
print("Mean precision:")
print(np.mean(scores_acc['test_precision']))
print()

print("Recall:")
print(scores_acc['test_recall'])
print("Mean recall:")
print(np.mean(scores_acc['test_recall']))
print()

print("f1:")
print(scores_acc['test_f1'])
print("Mean f1:")
print(np.mean(scores_acc['test_f1']))
print()

Accuracy:
[0.44582043 0.45201238 0.43034056 0.44891641 0.54658385]
Mean accuracy:
0.46473472684268213

Precision:
[0.49246231 0.48780488 0.48677249 0.46938776 0.58695652]
Mean precision:
0.5046767906440455

Recall:
[0.55681818 0.58139535 0.51396648 0.55421687 0.60674157]
Mean recall:
0.5626276903211812

f1:
[0.52266667 0.53050398 0.5        0.50828729 0.59668508]
Mean f1:
0.531628604227423

