In [2]:
import pickle
import os
import pandas as pd

In [26]:
DATA_PATH = "/Users/Nils/Library/Mobile Documents/com~apple~CloudDocs/master/sommersemester_22/big_data_prak/data/WESAD"

In [9]:
class Subject:

    def __init__(self, main_path, subject_number):
        self.name = f'S{subject_number}'
        self.subject_keys = ['signal', 'label', 'subject']
        self.signal_keys = ['chest', 'wrist']
        self.chest_keys = ['ACC', 'ECG', 'EMG', 'EDA', 'Temp', 'Resp']
        self.wrist_keys = ['ACC', 'BVP', 'EDA', 'TEMP']
        with open(os.path.join(main_path, self.name) + '/' + self.name + '.pkl', 'rb') as file:
            self.data = pickle.load(file, encoding='latin1')
        self.labels = self.data['label']

    def get_wrist_data(self):
        data = self.data['signal']['wrist']
        data.update({'Resp': self.data['signal']['chest']['Resp']})
        return data

    def get_chest_data(self):
        return self.data['signal']['chest']

In [27]:
s10 = Subject(DATA_PATH, '10')

In [28]:
s10_wrist_data = s10.get_wrist_data()

# Data Preparation

**Wrist - Empatica E4**

ACC: three-axis acceleration (32 Hz)

BVP: blood volume pulse (64 Hz)

EDA: electrodermal activity (4 Hz)

TEMP: skin temperature (4 Hz)

In [29]:
s10_wrist_data

{'ACC': array([[ 107., -105.,  127.],
        [  67.,  -52.,   45.],
        [  26.,   40.,    0.],
        ...,
        [  54.,   23.,   24.],
        [  54.,   23.,   24.],
        [  54.,   23.,   24.]]),
 'BVP': array([[10.17],
        [12.04],
        [13.01],
        ...,
        [57.59],
        [65.93],
        [69.86]]),
 'EDA': array([[0.349215],
        [0.346656],
        [0.350494],
        ...,
        [0.931574],
        [0.932854],
        [0.932854]]),
 'TEMP': array([[33.13],
        [33.16],
        [33.16],
        ...,
        [30.83],
        [30.83],
        [30.83]]),
 'Resp': array([[ 0.21362305],
        [ 0.19226074],
        [ 0.20599365],
        ...,
        [-2.18353271],
        [-2.003479  ],
        [-2.03094482]])}

In [30]:
# Turn e4-wrist-data into datafames

eda_df = pd.DataFrame(s10_wrist_data['EDA'], columns=['EDA'])
bvp_df = pd.DataFrame(s10_wrist_data['BVP'], columns=['BVP'])
acc_df = pd.DataFrame(s10_wrist_data['ACC'], columns=['ACC_x', 'ACC_y', 'ACC_z'])
temp_df = pd.DataFrame(s10_wrist_data['TEMP'], columns=['TEMP'])
label_df = pd.DataFrame(s10.labels, columns=['label'])

In [31]:
eda_df.index

RangeIndex(start=0, stop=21984, step=1)

In [32]:
# DATA 

In [33]:
fs_dict = {'ACC': 32, 'BVP': 64, 'EDA': 4, 'TEMP': 4, 'label': 64}

In [34]:
eda_df.index = [(1 / fs_dict['EDA']) * i for i in range(len(eda_df))]
bvp_df.index = [(1 / fs_dict['BVP']) * i for i in range(len(bvp_df))]
acc_df.index = [(1 / fs_dict['ACC']) * i for i in range(len(acc_df))]
temp_df.index = [(1 / fs_dict['TEMP']) * i for i in range(len(temp_df))]
label_df.index = [(1 / fs_dict['label']) * i for i in range(len(label_df))]

In [35]:
eda_df.index = pd.to_datetime(eda_df.index, unit='s')
bvp_df.index = pd.to_datetime(bvp_df.index, unit='s')
temp_df.index = pd.to_datetime(temp_df.index, unit='s')
acc_df.index = pd.to_datetime(acc_df.index, unit='s')
label_df.index = pd.to_datetime(label_df.index, unit='s')

In [36]:
bvp_df.index

DatetimeIndex([       '1970-01-01 00:00:00', '1970-01-01 00:00:00.015625',
               '1970-01-01 00:00:00.031250', '1970-01-01 00:00:00.046875',
               '1970-01-01 00:00:00.062500', '1970-01-01 00:00:00.078125',
               '1970-01-01 00:00:00.093750', '1970-01-01 00:00:00.109375',
               '1970-01-01 00:00:00.125000', '1970-01-01 00:00:00.140625',
               ...
               '1970-01-01 01:31:35.843750', '1970-01-01 01:31:35.859375',
               '1970-01-01 01:31:35.875000', '1970-01-01 01:31:35.890625',
               '1970-01-01 01:31:35.906250', '1970-01-01 01:31:35.921875',
               '1970-01-01 01:31:35.937500', '1970-01-01 01:31:35.953125',
               '1970-01-01 01:31:35.968750', '1970-01-01 01:31:35.984375'],
              dtype='datetime64[ns]', length=351744, freq=None)

In [37]:
eda_df.index

DatetimeIndex([       '1970-01-01 00:00:00', '1970-01-01 00:00:00.250000',
               '1970-01-01 00:00:00.500000', '1970-01-01 00:00:00.750000',
                      '1970-01-01 00:00:01', '1970-01-01 00:00:01.250000',
               '1970-01-01 00:00:01.500000', '1970-01-01 00:00:01.750000',
                      '1970-01-01 00:00:02', '1970-01-01 00:00:02.250000',
               ...
               '1970-01-01 01:31:33.500000', '1970-01-01 01:31:33.750000',
                      '1970-01-01 01:31:34', '1970-01-01 01:31:34.250000',
               '1970-01-01 01:31:34.500000', '1970-01-01 01:31:34.750000',
                      '1970-01-01 01:31:35', '1970-01-01 01:31:35.250000',
               '1970-01-01 01:31:35.500000', '1970-01-01 01:31:35.750000'],
              dtype='datetime64[ns]', length=21984, freq=None)

In [38]:
# Combined dataframe - not used yet
df = eda_df.join(bvp_df, how='outer')
df = df.join(temp_df, how='outer')
df = df.join(acc_df, how='outer')
df = df.join(label_df, how='outer')
df['label'] = df['label'].fillna(method='bfill')
df.reset_index(drop=True, inplace=True)

In [49]:
len(df[df['label'] == 1])

826000