In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from scipy import signal
import pywt

import os
import time
import datetime
import random
import h5py
import pickle

import tensorflow as tf
keras = tf.keras
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.callbacks import History

from sklearn.preprocessing import LabelEncoder

from platform import python_version
print(python_version())

3.8.8


In [2]:
def readData(accDir, annotFile):
    files = os.listdir(accDir)
    files_csv = [f for f in files if f[-3:] == 'csv']
    empatica_dict = dict()
    for f in files_csv:
        data = np.genfromtxt(accDir+f, delimiter=',') # creates numpy array for each Empatica acc csv file
        key = int(float(f.strip("ACC.csv")))
        empatica_dict[key] = data
    tmp = pd.read_excel(annotFile, sheet_name=None)
    annot_dict = dict(zip(tmp.keys(), [i.dropna() for i in tmp.values()])) # Remove the rows with NaN values (some with ladder 2 missing)
    return empatica_dict, annot_dict

In [3]:
def getLabeledDict(empatica_dict, annot_dict, subject_ids):
    labeled_dict = {}; taskInd_dict = {}
    for id in subject_ids:
        start_time = int(empatica_dict[id][0,0])
        acc = empatica_dict[id][2:,:]
        label = list(map(lambda i: i.replace("_end", "").replace("_start", ""), annot_dict['P'+ str(id)].taskName.tolist()))
        task_time= list(map(lambda i: time.mktime(datetime.datetime.strptime(i[:6] + '20' + i[6:], "%m/%d/%Y %H:%M:%S").timetuple()),
                            annot_dict['P'+ str(id)].startTime_global.tolist()))
        task_ind = [int(x - start_time)*SR for x in task_time]
        taskInd_dict[id] = task_ind
        label_tmp = np.empty(acc.shape[0], dtype=object)
        for i, (j, k) in enumerate(zip(task_ind[0::2], task_ind[1::2])):
            tmpInd = 2*i
            label_tmp[j:k] = label[tmpInd]
        acc_mag = np.sqrt(np.sum(acc**2, axis=1))[:,None]
        accel = np.hstack((acc, acc_mag))
        labeled_dict[id] = pd.DataFrame(np.hstack((accel, label_tmp.reshape(label_tmp.shape[0],1))), columns=['X', 'Y', 'Z', 'Mag', 'label'])
    return labeled_dict, taskInd_dict

In [4]:
sepAccDict, sepAnnotDict = readData(accDir='./Data/50_subs/Acc Data/separate/', annotFile='./Data/50_subs/Annotation Data/separate.xlsx')
SR=int(sepAccDict[8][1,0])

sepSubIDs = list(range(8,45))
sepLabeledDict_, sepTaskIndDict = getLabeledDict(sepAccDict, sepAnnotDict, sepSubIDs)

# Apply Filter on All Subjects
n=4; fc=2; w=fc/(SR/2)
b, a = signal.butter(n, w, 'low')
sepLabeledDict_filtered = dict(map(lambda key: (key, signal.filtfilt(b, a, x=sepLabeledDict_[key].drop(columns='label'), axis=0)), sepLabeledDict_.keys()))
# back to DF and add label
sepLabeledDict_filtered_dfs = dict(map(lambda key: (
                                                        key, pd.DataFrame(sepLabeledDict_filtered[key],columns=['X', 'Y', 'Z', 'Mag']).assign(label=sepLabeledDict_[key].label)
                                                    ), sepLabeledDict_filtered.keys()))
# Remove data without label
filt_noNA_dict = dict(map(lambda key: (key, sepLabeledDict_filtered_dfs[key].dropna()), sepLabeledDict_filtered_dfs.keys()))

In [5]:
filt_noNA_dict.keys()

dict_keys([8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44])

In [12]:
tasks = ['sit', 'stand', 'walk', 'hoist', 'lift', 'push', 'type', 'ladder1', 'ladder2', 'electricPanel', 'overhead']
scales = range(1,200)
waveletname = 'morl'
coeffs_dict = {}; labels_dict = {}
for sub in filt_noNA_dict.keys():
    sig_ = filt_noNA_dict[sub]
    sig = sig_[sig_.label.isin(tasks)]
    sig.label.replace({'ladder1':'ladder', 'ladder2':'ladder'}, inplace=True)

    acc_signals = sig[['X', 'Y', 'Z']].values.astype('float')
    coeff, freq = pywt.cwt(acc_signals, scales, waveletname, 1/SR, axis=0)
    coeffs_dict[sub] = coeff/np.max(coeff)
    labels_dict[sub] = sig.label#.values

In [7]:
random.seed(2021)
percentTrain = 80
all_subs = list(filt_noNA_dict.keys())
train_subs = random.sample(all_subs, k=int(len(all_subs)*(percentTrain/100)))
test_subs = list(set(all_subs) - set(train_subs))

# with open('test_subs.pickle', 'wb') as outfile:
#     pickle.dump(test_subs, outfile)

In [22]:
winLen = 320
########### Train
train_features_list = []
train_labels_list = []
for sub in train_subs:
    tmp = labels_dict[sub].ne(labels_dict[sub].shift())
    true_segs = list(np.where(tmp)[0])
    true_segs.append(len(labels_dict[sub]) - 1)
    for i in range(len(true_segs) - 1):
        segment_label = labels_dict[sub].values[true_segs[i]]
        segment = coeffs_dict[sub][:, true_segs[i]:true_segs[i+1], :]
        for j in range(segment.shape[1]//winLen):
            train_features_list.append(segment[:, (j*winLen):((j+1)*winLen), :])
            train_labels_list.append(segment_label)
x_train = np.array(train_features_list)
y_train = np.array(train_labels_list)

########### Test
test_features_list = []
test_labels_list = []
for sub in test_subs:
    tmp = labels_dict[sub].ne(labels_dict[sub].shift())
    true_segs = list(np.where(tmp)[0])
    true_segs.append(len(labels_dict[sub]) - 1)
    for i in range(len(true_segs) - 1):
        segment_label = labels_dict[sub].values[true_segs[i]]
        segment = coeffs_dict[sub][:, true_segs[i]:true_segs[i+1], :]
        for j in range(segment.shape[1]//winLen):
            test_features_list.append(segment[:, (j*winLen):((j+1)*winLen), :])
            test_labels_list.append(segment_label)
x_test = np.array(test_features_list)
y_test = np.array(test_labels_list)