In [57]:
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.preprocessing import StandardScaler, MinMaxScaler 
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay 
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import cross_val_score

import os
import sys
import random

from matplotlib import pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import SGD, Nadam, Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras.models import Sequential, load_model

from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, SimpleRNN, BatchNormalization, GRU
from keras import optimizers
from keras.regularizers import l2, l1
from keras.callbacks import ReduceLROnPlateau, EarlyStopping

import math
import xgboost as xgb
from xgboost import cv, XGBClassifier
from sklearn.model_selection import cross_val_score, cross_validate

In [2]:
# hyperparameters
step = 60 
batch_size = 16
verbose = 1
data_dir = "C:/Users/086096/thesis_files"
segment_length = 960

In [3]:
def segments_and_labels(dataset_dir, segment_length, step):
    scores = pd.read_csv(os.path.join(dataset_dir, 'scores7.csv'))
    scores['exp_or_cont'].fillna(0, inplace=True)

    segments = []
    labels = []

    for person in scores['number']:
        p = scores[scores['number'] == person]
        filepath = os.path.join(dataset_dir, person.split('_')[0], f'{person}.csv')
        df_activity = pd.read_csv(filepath)

        for i in range(0, len(df_activity) - segment_length, step):
            segment = df_activity['Magnitude.mean'].values[i : i + segment_length]
            
            segments.append([segment])
            labels.append(p['exp_or_cont'].values[0])

    segments = np.asarray(segments)
    segments = segments.reshape(-1, segment_length, 1)

    input_shape = segments.shape[1]
    segments = segments.reshape(segments.shape[0], input_shape).astype('float32')
    labels = np.asarray(labels).astype('float32')

    return segments, labels, input_shape

In [5]:
print(segments_and_labels(data_dir,segment_length,step))

(array([[0.043232, 0.08168 , 0.01811 , ..., 0.062163, 0.011337, 0.010288],
       [0.128714, 0.101046, 0.331317, ..., 0.055553, 0.031634, 0.05851 ],
       [0.006379, 0.007886, 0.01208 , ..., 0.016275, 0.085776, 0.035873],
       ...,
       [0.028381, 0.112372, 0.047427, ..., 0.037865, 0.024527, 0.059238],
       [0.073108, 0.043552, 0.070973, ..., 0.02016 , 0.026588, 0.008958],
       [0.042946, 0.062584, 0.074234, ..., 0.006881, 0.008216, 0.00673 ]],
      dtype=float32), array([1., 1., 1., ..., 0., 0., 0.], dtype=float32), 960)


In [40]:
segments, labels, input_shape = segments_and_labels(data_dir, segment_length, step)

In [42]:
X_train, X_test, y_train, y_test = train_test_split(segments, labels, test_size = 0.2, random_state = 777)

In [58]:
XGBClassifier = XGBClassifier()

16hours - 960timesteps

In [59]:
kf = KFold(n_splits = 5, shuffle = True, random_state = 777)

scores_acc = cross_validate(XGBClassifier, segments, labels, scoring = ['accuracy','precision','recall','f1'], cv=kf)

In [60]:
print("Accuracy:")
print(scores_acc['test_accuracy'])
print("Mean accuracy:")
print(np.mean(scores_acc['test_accuracy']))
print()

print("Precision:")
print(scores_acc['test_precision'])
print("Mean precision:")
print(np.mean(scores_acc['test_precision']))
print()

print("Recall:")
print(scores_acc['test_recall'])
print("Mean recall:")
print(np.mean(scores_acc['test_recall']))
print()

print("f1:")
print(scores_acc['test_f1'])
print("Mean f1:")
print(np.mean(scores_acc['test_f1']))
print()

Accuracy:
[0.62356979 0.61327231 0.61670481 0.63115693 0.6139748 ]
Mean accuracy:
0.6197357280845922

Precision:
[0.63991323 0.66284404 0.60127932 0.64847162 0.62195122]
Mean precision:
0.6348918843462633

Recall:
[0.6441048  0.60208333 0.65581395 0.64847162 0.66958425]
Mean recall:
0.6440115902224532

f1:
[0.64200218 0.63100437 0.62736374 0.64847162 0.64488936]
Mean f1:
0.638746250703107



24hours - 1440timesteps

In [61]:
segment_length = 1440

segments, labels, input_shape = segments_and_labels(data_dir, segment_length, step)

y = labels
X = segments
print(y.shape)
print(X.shape)

(4048,)
(4048, 1440)


In [62]:
kf = KFold(n_splits = 5, shuffle = True, random_state = 777)

scores_acc = cross_validate(XGBClassifier, segments, labels, scoring = ['accuracy','precision','recall','f1'], cv=kf)

print("Accuracy:")
print(scores_acc['test_accuracy'])
print("Mean accuracy:")
print(np.mean(scores_acc['test_accuracy']))
print()

print("Precision:")
print(scores_acc['test_precision'])
print("Mean precision:")
print(np.mean(scores_acc['test_precision']))
print()

print("Recall:")
print(scores_acc['test_recall'])
print("Mean recall:")
print(np.mean(scores_acc['test_recall']))
print()

print("f1:")
print(scores_acc['test_f1'])
print("Mean f1:")
print(np.mean(scores_acc['test_f1']))
print()

Accuracy:
[0.63333333 0.66666667 0.64320988 0.6328801  0.62051916]
Mean accuracy:
0.6393218269773688

Precision:
[0.67370892 0.6835443  0.62389381 0.62780269 0.62527233]
Mean precision:
0.6468444102065279

Recall:
[0.64494382 0.72972973 0.7032419  0.68126521 0.68009479]
Mean recall:
0.6878550877517607

f1:
[0.65901263 0.70588235 0.66119578 0.65344224 0.65153235]
Mean f1:
0.6662130703361171



48hours - 2880timesteps

In [63]:
segment_length = 2880

segments, labels, input_shape = segments_and_labels(data_dir, segment_length, step)

y = labels
X = segments
print(y.shape)
print(X.shape)

(3088,)
(3088, 2880)


In [64]:
kf = KFold(n_splits = 5, shuffle = True, random_state = 777)

scores_acc = cross_validate(XGBClassifier, segments, labels, scoring = ['accuracy','precision','recall','f1'], cv=kf)

print("Accuracy:")
print(scores_acc['test_accuracy'])
print("Mean accuracy:")
print(np.mean(scores_acc['test_accuracy']))
print()

print("Precision:")
print(scores_acc['test_precision'])
print("Mean precision:")
print(np.mean(scores_acc['test_precision']))
print()

print("Recall:")
print(scores_acc['test_recall'])
print("Mean recall:")
print(np.mean(scores_acc['test_recall']))
print()

print("f1:")
print(scores_acc['test_f1'])
print("Mean f1:")
print(np.mean(scores_acc['test_f1']))
print()

Accuracy:
[0.65695793 0.64239482 0.66990291 0.64505673 0.64505673]
Mean accuracy:
0.6518738231236855

Precision:
[0.68786127 0.65781711 0.68       0.65203762 0.65517241]
Mean precision:
0.666577682433761

Recall:
[0.69590643 0.67987805 0.72121212 0.65822785 0.69724771]
Mean recall:
0.6904944314528862

f1:
[0.69186047 0.66866567 0.7        0.65511811 0.67555556]
Mean f1:
0.6782399596148945



72hours - 4320timesteps

In [65]:
segment_length = 4320

segments, labels, input_shape = segments_and_labels(data_dir, segment_length, step)

y = labels
X = segments
print(y.shape)
print(X.shape)

(2128,)
(2128, 4320)


In [66]:
kf = KFold(n_splits = 5, shuffle = True, random_state = 777)

scores_acc = cross_validate(XGBClassifier, segments, labels, scoring = ['accuracy','precision','recall','f1'], cv=kf)

print("Accuracy:")
print(scores_acc['test_accuracy'])
print("Mean accuracy:")
print(np.mean(scores_acc['test_accuracy']))
print()

print("Precision:")
print(scores_acc['test_precision'])
print("Mean precision:")
print(np.mean(scores_acc['test_precision']))
print()

print("Recall:")
print(scores_acc['test_recall'])
print("Mean recall:")
print(np.mean(scores_acc['test_recall']))
print()

print("f1:")
print(scores_acc['test_f1'])
print("Mean f1:")
print(np.mean(scores_acc['test_f1']))
print()

Accuracy:
[0.6056338  0.65492958 0.65023474 0.64       0.64      ]
Mean accuracy:
0.6381596244131456

Precision:
[0.63306452 0.68803419 0.67948718 0.63878327 0.65185185]
Mean precision:
0.6582442010928459

Recall:
[0.67094017 0.68510638 0.68240343 0.74336283 0.74893617]
Mean recall:
0.7061497978932925

f1:
[0.65145228 0.68656716 0.68094218 0.68711656 0.6970297 ]
Mean f1:
0.6806215795756863



96hours - 5760timesteps

In [67]:
segment_length = 5760

segments, labels, input_shape = segments_and_labels(data_dir, segment_length, step)

y = labels
X = segments
print(y.shape)
print(X.shape)

(1217,)
(1217, 5760)


In [68]:
kf = KFold(n_splits = 5, shuffle = True, random_state = 777)

scores_acc = cross_validate(XGBClassifier, segments, labels, scoring = ['accuracy','precision','recall','f1'], cv=kf)

print("Accuracy:")
print(scores_acc['test_accuracy'])
print("Mean accuracy:")
print(np.mean(scores_acc['test_accuracy']))
print()

print("Precision:")
print(scores_acc['test_precision'])
print("Mean precision:")
print(np.mean(scores_acc['test_precision']))
print()

print("Recall:")
print(scores_acc['test_recall'])
print("Mean recall:")
print(np.mean(scores_acc['test_recall']))
print()

print("f1:")
print(scores_acc['test_f1'])
print("Mean f1:")
print(np.mean(scores_acc['test_f1']))
print()

Accuracy:
[0.69672131 0.7295082  0.67078189 0.68312757 0.68312757]
Mean accuracy:
0.6926533090467517

Precision:
[0.66853933 0.76821192 0.73333333 0.70588235 0.69620253]
Mean precision:
0.7144338928585156

Recall:
[0.8880597  0.78911565 0.73333333 0.77142857 0.79136691]
Mean recall:
0.7946608317975531

f1:
[0.76282051 0.77852349 0.73333333 0.73720137 0.74074074]
Mean f1:
0.7505238884030373

