In [24]:
import os
import pickle
import scipy.signal
from scipy import fft
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import classification_report

In [2]:
DATA_PATH = os.path.realpath("../data/WESAD")

In [33]:
class Subject:

    def __init__(self, main_path, subject_number):
        self.name = f'S{subject_number}'
        self.subject_keys = ['signal', 'label', 'subject']
        self.signal_keys = ['chest', 'wrist']
        self.chest_keys = ['ACC', 'ECG', 'EMG', 'EDA', 'Temp', 'Resp']
        self.wrist_keys = ['ACC', 'BVP', 'EDA', 'TEMP']
        with open(os.path.join(main_path, self.name) + '/' + self.name + '.pkl', 'rb') as file:
            self.data = pickle.load(file, encoding='latin1')
        self.labels = self.data['label']

    def get_wrist_data(self):
        data = self.data['signal']['wrist']
        return data

    def get_chest_data(self):
        return self.data['signal']['chest']
    
    def get_subject_dataframe(self):
        wrist_data = self.get_wrist_data()
        bvp_signal = wrist_data['BVP'][:,0]
        eda_signal = wrist_data['EDA'][:,0]
        acc_x_signal = wrist_data['ACC'][:,0]
        acc_y_signal = wrist_data['ACC'][:,1]
        acc_z_signal = wrist_data['ACC'][:,2]
        temp_signal = wrist_data['TEMP'][:,0]
        # Upsampling data to match BVP data sampling rate using fourier method as described in Paper/dataset
        eda_upsampled = scipy.signal.resample(eda_signal, len(bvp_signal))
        temp_upsampled = scipy.signal.resample(temp_signal, len(bvp_signal))
        acc_x_upsampled = scipy.signal.resample(acc_x_signal, len(bvp_signal))
        acc_y_upsampled = scipy.signal.resample(acc_y_signal, len(bvp_signal))
        acc_z_upsampled = scipy.signal.resample(acc_z_signal, len(bvp_signal))
        label_df = pd.DataFrame(self.labels, columns=['label'])
        label_df.index = [(1 / 700) * i for i in range(len(label_df))] # 700 is the sampling rate of the label
        label_df.index = pd.to_datetime(label_df.index, unit='s')
        data_arrays = zip(bvp_signal, eda_upsampled, acc_x_upsampled, acc_y_upsampled, acc_z_upsampled, temp_upsampled)
        df = pd.DataFrame(data=data_arrays, columns=['BVP', 'EDA', 'ACC_x', 'ACC_y', 'ACC_z', 'TEMP'])
        df.index = [(1 / 64) * i for i in range(len(df))] # 64 = sampling rate of BVP
        df.index = pd.to_datetime(df.index, unit='s')
        df = df.join(label_df)
        df['label'] = df['label'].fillna(method='ffill')
        df.reset_index(drop=True, inplace=True)
        # df.drop(df[df['label'].isin([0.0, 4.0, 5.0, 6.0, 7.0])].index, inplace=True)
        # df['label'] = df['label'].replace([1.0, 2.0, 3.0], [0, 1, 0])
        df.reset_index(drop=True, inplace=True)
        return df


In [34]:
s2 = Subject(DATA_PATH, 2)
df = s2.get_subject_dataframe()

In [37]:
df

Unnamed: 0,BVP,EDA,ACC_x,ACC_y,ACC_z,TEMP,label
0,-59.37,1.138257,62.000000,-21.000000,107.000000,35.410000,0.0
1,-53.42,1.185245,66.111191,-9.448018,102.395428,35.457267,0.0
2,-44.40,1.224870,66.000000,13.000000,53.000000,35.496639,0.0
3,-33.17,1.256872,54.773562,18.437849,15.665055,35.527974,0.0
4,-20.79,1.281158,41.000000,9.000000,15.000000,35.551305,0.0
...,...,...,...,...,...,...,...
389051,18.33,0.812653,36.844784,19.921364,22.402427,35.072457,0.0
389052,18.26,0.887287,39.000000,27.000000,22.000000,35.151016,0.0
389053,18.26,0.958059,47.303737,36.272928,5.292793,35.224931,0.0
389054,18.68,1.024019,56.000000,26.000000,10.000000,35.293251,0.0


In [38]:
y = df['label']
df.drop('label', axis=1, inplace=True)

In [39]:
df

Unnamed: 0,BVP,EDA,ACC_x,ACC_y,ACC_z,TEMP
0,-59.37,1.138257,62.000000,-21.000000,107.000000,35.410000
1,-53.42,1.185245,66.111191,-9.448018,102.395428,35.457267
2,-44.40,1.224870,66.000000,13.000000,53.000000,35.496639
3,-33.17,1.256872,54.773562,18.437849,15.665055,35.527974
4,-20.79,1.281158,41.000000,9.000000,15.000000,35.551305
...,...,...,...,...,...,...
389051,18.33,0.812653,36.844784,19.921364,22.402427,35.072457
389052,18.26,0.887287,39.000000,27.000000,22.000000,35.151016
389053,18.26,0.958059,47.303737,36.272928,5.292793,35.224931
389054,18.68,1.024019,56.000000,26.000000,10.000000,35.293251


In [42]:
normalized_x=(df-df.min())/(df.max()-df.min())

In [21]:
normalized_x

Unnamed: 0,BVP,EDA,ACC_x,ACC_y,ACC_z,TEMP,label
0,0.536850,0.993856,0.661565,0.717624,0.545219,0.950902,0.0
1,0.548782,0.995152,0.676264,0.736944,0.507794,0.950913,0.0
2,0.560715,0.996392,0.683334,0.736449,0.507154,0.950923,0.0
3,0.571374,0.997521,0.691702,0.691200,0.545094,0.950932,0.0
4,0.580215,0.998489,0.674626,0.642324,0.583283,0.950940,0.0
...,...,...,...,...,...,...,...
135739,0.533648,0.089840,0.499794,0.927696,0.460655,0.441595,0.0
135740,0.536857,0.089739,0.500468,0.929406,0.464332,0.441532,0.0
135741,0.540788,0.089649,0.503081,0.929464,0.466159,0.441435,0.0
135742,0.544653,0.089569,0.504822,0.929406,0.464332,0.441301,0.0


In [40]:
x_train, x_test, y_train, y_test=train_test_split(df,y,test_size=0.2)

In [43]:
norm_x_train,norm_x_test,norm_y_train,norm_y_test=train_test_split(normalized_x,y,test_size=0.2)

In [44]:
LDA= LinearDiscriminantAnalysis(solver = 'svd')
y_out = LDA.fit(x_train, y_train).predict(x_test)
#confusion_matrix(y_test, y_out)
print(classification_report(y_test, y_out, digits=4))#target_names=['Not Six', 'Six']

              precision    recall  f1-score   support

         0.0     0.8689    0.7090    0.7809     39191
         1.0     0.7864    0.9082    0.8429     14570
         2.0     0.7403    0.9755    0.8418      7889
         3.0     0.6601    0.3635    0.4688      4669
         4.0     0.6249    0.9895    0.7660      9864
         6.0     0.2302    0.1137    0.1523       844
         7.0     0.0000    0.0000    0.0000       785

    accuracy                         0.7746     77812
   macro avg     0.5587    0.5799    0.5504     77812
weighted avg     0.7813    0.7746    0.7634     77812



In [46]:
LDA= LinearDiscriminantAnalysis(solver = 'svd')
y_out = LDA.fit(norm_x_train, norm_y_train).predict(norm_x_test)
#confusion_matrix(y_test, y_out)
print(classification_report(norm_y_test, y_out, digits=4))#target_names=['Not Six', 'Six']

              precision    recall  f1-score   support

         0.0     0.8702    0.7069    0.7801     39252
         1.0     0.7888    0.9143    0.8469     14605
         2.0     0.7351    0.9721    0.8371      7839
         3.0     0.6720    0.3787    0.4844      4632
         4.0     0.6237    0.9893    0.7651      9821
         6.0     0.2091    0.1112    0.1452       827
         7.0     0.0000    0.0000    0.0000       836

    accuracy                         0.7748     77812
   macro avg     0.5570    0.5818    0.5513     77812
weighted avg     0.7820    0.7748    0.7638     77812



In [41]:
norm_x_train

Unnamed: 0,BVP,EDA,ACC_x,ACC_y,ACC_z,TEMP,label
119016,0.620716,0.081745,0.639795,0.651736,0.321589,0.499836,0.0
86565,0.599383,0.371923,0.434766,0.387330,0.514872,0.347136,1.0
71092,0.629343,0.004832,0.713812,0.755274,0.559493,0.868585,0.0
23452,0.585132,0.131084,0.626733,0.769393,0.535703,0.951500,0.0
28638,0.568584,0.075338,0.622379,0.750568,0.554735,0.950932,0.0
...,...,...,...,...,...,...,...
8456,0.560016,0.346963,0.548361,0.835281,0.602316,0.936302,0.0
32024,0.557851,0.063840,0.374203,0.689386,0.764090,0.973331,0.0
124162,0.558853,0.064084,0.644149,0.661149,0.388202,0.464684,0.0
35268,0.555444,0.048199,0.613671,0.712918,0.302557,0.980403,0.0
