In [1]:
import numpy as np
# %matplotlib notebook
%matplotlib widget
import matplotlib.pyplot as plt
import seaborn as sn
import pandas as pd

from scipy import signal
import pywt

import os
import time
import datetime
import random
# import h5py

from ggs import *
import ipywidgets as widgets

from platform import python_version
print(python_version())

3.9.2


In [2]:
def readData(accDir, annotFile):
    files = os.listdir(accDir)
    files_csv = [f for f in files if f[-3:] == 'csv']
    empatica_dict = dict()
    for f in files_csv:
        data = np.genfromtxt(accDir+f, delimiter=',') # creates numpy array for each Empatica acc csv file
        key = int(float(f.strip("ACC.csv")))
        empatica_dict[key] = data
    tmp = pd.read_excel(annotFile, sheet_name=None)
    annot_dict = dict(zip(tmp.keys(), [i.dropna() for i in tmp.values()])) # Remove the rows with NaN values (some with ladder 2 missing)
    return empatica_dict, annot_dict

def getLabeledDict(empatica_dict, annot_dict, subject_ids):
    labeled_dict = {}; taskInd_dict = {}
    for id in subject_ids:
        start_time = int(empatica_dict[id][0,0])
        acc = empatica_dict[id][2:,:]
        label = list(map(lambda i: i.replace("_end", "").replace("_start", ""), annot_dict['P'+ str(id)].taskName.tolist()))
        task_time= list(map(lambda i: time.mktime(datetime.datetime.strptime(i[:6] + '20' + i[6:], "%m/%d/%Y %H:%M:%S").timetuple()),
                            annot_dict['P'+ str(id)].startTime_global.tolist()))
        task_ind = [int(x - start_time)*SR for x in task_time]
        taskInd_dict[id] = task_ind
        label_tmp = np.empty(acc.shape[0], dtype=object)
        for i, (j, k) in enumerate(zip(task_ind[0::2], task_ind[1::2])):
            tmpInd = 2*i
            label_tmp[j:k] = label[tmpInd]
        acc_mag = np.sqrt(np.sum(acc**2, axis=1))[:,None]
        accel = np.hstack((acc, acc_mag))
        labeled_dict[id] = pd.DataFrame(np.hstack((accel, label_tmp.reshape(label_tmp.shape[0],1))), columns=['X', 'Y', 'Z', 'Mag', 'label'])
    return labeled_dict, taskInd_dict

In [3]:
sepAccDict, sepAnnotDict = readData(accDir='./Data/50_subs/Acc Data/separate/', annotFile='./Data/50_subs/Annotation Data/separate.xlsx')
SR=int(sepAccDict[8][1,0])

sepSubIDs = list(range(8,45))
sepSubIDs.remove(27) # does not have lift
sepLabeledDict_, sepTaskIndDict = getLabeledDict(sepAccDict, sepAnnotDict, sepSubIDs)

## Apply Low Pass Filter

In [4]:
# Apply Filter on All Subjects
n=4; fc=2; w=fc/(SR/2)
b, a = signal.butter(n, w, 'low')
sepLabeledDict_filtered = dict(map(lambda key: (key, signal.filtfilt(b, a, x=sepLabeledDict_[key].drop(columns='label'), axis=0)), sepLabeledDict_.keys()))
# back to DF and add label
sepLabeledDict_filtered_dfs = dict(map(lambda key: (
                                                        key, pd.DataFrame(sepLabeledDict_filtered[key],columns=['X', 'Y', 'Z', 'Mag']).assign(label=sepLabeledDict_[key].label)
                                                    ), sepLabeledDict_filtered.keys()))
# Remove data without label
filt_noNA_dict = dict(map(lambda key: (key, sepLabeledDict_filtered_dfs[key].dropna()), sepLabeledDict_filtered_dfs.keys()))

In [5]:
filt_noNA_dict[16].label.unique()

array(['sit', 'stand', 'walk', 'hoist', 'lift', 'push', 'type', 'ladder1',
       'ladder2', 'electricPanel', 'overhead'], dtype=object)

# GGS

In [6]:
sub = 16
# tasks = ['sit', 'stand', 'walk', 'hoist', 'lift', 'ladder1']
tasks = ['sit', 'stand', 'walk', 'hoist', 'lift', 'push', 'type', 'ladder1', 'ladder2', 'electricPanel', 'overhead']
sig_ = filt_noNA_dict[sub]
sig = sig_[sig_.label.isin(tasks)]

In [7]:
f, ax = plt.subplots(2,1,figsize=(13, 5))
sig_.plot(ax=ax[0])
ax[1].plot(sig.X.values, c='r')
ax[1].plot(sig.Y.values, c='g')
ax[1].plot(sig.Z.values, c='b')
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# GGS with all labels

In [8]:
tasks = ['sit', 'stand', 'walk', 'hoist', 'lift', 'push', 'type', 'ladder1', 'ladder2', 'electricPanel', 'overhead']
sub = 16

sig__ = filt_noNA_dict[sub]
sig_ = sig__[sig__.label.isin(tasks)]
sig = sig_.values[:,:3].astype(np.float).T
bps, objectives = GGS(sig, Kmax=20, lamb=1e4)

In [15]:
print(bps)

[[0, 73057], [0, 61562, 73057], [0, 11583, 61562, 73057], [0, 5760, 17251, 61563, 73057], [0, 88, 5760, 17251, 61563, 73057], [0, 88, 5760, 11580, 17280, 61563, 73057], [0, 88, 5760, 11580, 17280, 61564, 67296, 73057], [0, 88, 5760, 17251, 35406, 48640, 61563, 67296, 73057], [0, 88, 5760, 17251, 35407, 42889, 48640, 61563, 67296, 73057], [0, 88, 5760, 11580, 17280, 35407, 42889, 48640, 61563, 67296, 73057], [0, 88, 5760, 11580, 17280, 27316, 35406, 42889, 48640, 61563, 67296, 73057], [0, 88, 5760, 11580, 17280, 27316, 35406, 42934, 46094, 48640, 61563, 67296, 73057], [0, 88, 5760, 11580, 17280, 27316, 35406, 42934, 46094, 48640, 61562, 67355, 69433, 73057], [0, 88, 5760, 11580, 17280, 27316, 35406, 42934, 46094, 48640, 61563, 66376, 67355, 69433, 73057], [0, 88, 5760, 11580, 17280, 27316, 35406, 42934, 46094, 48640, 61563, 63287, 66148, 67355, 69433, 73057], [0, 88, 5760, 6523, 11570, 17280, 27316, 35406, 42934, 46094, 48640, 61563, 63287, 66148, 67355, 69433, 73057], [0, 88, 5760, 652

In [9]:
len(bps), type(bps[-1]), len(bps[-1])
print(bps[-1])

[0, 88, 5760, 6523, 11570, 17280, 19233, 27316, 35406, 42934, 46094, 48640, 61563, 63287, 66148, 67355, 69430, 69565, 70535, 70880, 71702, 73057]


In [10]:
f, ax = plt.subplots(1,figsize=(13,5))
ax.plot(sig[0,:], c='r')
ax.plot(sig[1,:], c='g')
ax.plot(sig[2,:], c='b')
ax.set_title('GGS Segments', fontsize=20)
for v in bps[-1]:
    ax.axvline(v, color='k', linestyle='--')
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [12]:
segment = bps
seg = segment[-1]
new_segment = []

thresh = 960 # ensure 30 seconds between segments
idx = np.where(np.diff(seg) < thresh)[0]
cluster_idxs_ = np.split(idx, np.where(np.diff(idx) != 1)[0]+1)
clusters_idxs = [np.append(elem, max(elem) + 1) for elem in cluster_idxs_]
cluster_vals = [np.array(seg)[idxs] for idxs in clusters_idxs]

new_seg_dict = {}
new_segs = list(set(seg) - set(np.concatenate(cluster_vals)))
for elem in cluster_vals:
    if any(elem==0):
        new_segs.append(0)
    elif any(elem==seg[-1]):
        new_segs.append(seg[-1])
    else:
        for early_seg in segment:
            if any((np.array(early_seg)>(min(elem) - 30)) & (np.array(early_seg)<(max(elem) + 30))):
                replacement = min(np.array(early_seg)[(np.array(early_seg)>(min(elem) - 30)) & (np.array(early_seg)<(max(elem) + 30))])
                new_segs.append(replacement)
                break
new_seg_dict[sub] = sorted(new_segs)

In [13]:
f, ax = plt.subplots(1,figsize=(13,5))
ax.plot(sig[0,:], c='r')
ax.plot(sig[1,:], c='g')
ax.plot(sig[2,:], c='b')
ax.set_title('GGS Segments', fontsize=20)
for v in new_seg_dict[sub]:
    ax.axvline(v, color='k', linestyle='--')
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Plot Raw GGS Segments

In [8]:
start_time = time.time()
tasks = ['sit', 'stand', 'walk', 'hoist', 'lift', 'ladder1']
subs = list(filt_noNA_dict.keys())
sub_tab=[widgets.Output() for i in range(len(subs))]
tab = widgets.Tab(sub_tab)
for i, sub in enumerate(subs):
    
    sig__ = filt_noNA_dict[sub]
    sig_ = sig__[sig__.label.isin(tasks)]
    sig = sig_.values[:,:3].astype(np.float).T
    bps, objectives = GGS(sig, Kmax=10, lamb=1e4)
    
    tab.set_title(i,str(sub))
    with sub_tab[i]:
        f, ax = plt.subplots(2, 1,figsize=(20,8))
        ax[0].plot(sig[0,:], c='r')
        ax[0].plot(sig[1,:], c='g')
        ax[0].plot(sig[2,:], c='b')
        ax[0].set_title('GGS Segments', fontsize=20)
        if isinstance(bps[0], list):
            for v in bps[6]:
                ax[0].axvline(v, color="r")
        else:
            for v in bps:
                ax[0].axvline(v, color="r")
        
        ax[1].plot(sig[0,:], c='r')
        ax[1].plot(sig[1,:], c='g')
        ax[1].plot(sig[2,:], c='b')
        ax[1].set_title('GGS Segments', fontsize=20)
        if isinstance(bps[-1], list):
            for v in bps[6]:
                ax[1].axvline(v, color="r")
        else:
            for v in bps:
                ax[1].axvline(v, color="r")
        plt.show()
            
display(tab)
print('Elapsed time = {}'.format(time.time() - start_time))

Tab(children=(Output(), Output(), Output(), Output(), Output(), Output(), Output(), Output(), Output(), Output…

Elapsed time = 4349.599684238434


# Plot GGS Results from OSC

In [5]:
segs = pd.read_pickle('segments.pickle')
sub_tab=[widgets.Output() for i in range(len(segs))]
tab = widgets.Tab(sub_tab)
tasks = ['sit', 'stand', 'walk', 'hoist', 'lift', 'ladder1']
for i, sub in enumerate(segs.keys()):
    tab.set_title(i,str(sub))
    with sub_tab[i]:
        sig__ = filt_noNA_dict[sub]
        sig_ = sig__[sig__.label.isin(tasks)]
        sig = sig_.values[:,:3].astype(np.float).T
        
        f, ax = plt.subplots(1,figsize=(13,4))
        ax.plot(sig[0,:], c='r')
        ax.plot(sig[1,:], c='g')
        ax.plot(sig[2,:], c='b')
        ax.set_title('GGS Segments', fontsize=20)
        ax.set_ylim([-150, 100])
        for v in segs[sub][-1]:
            ax.axvline(v, color="r")
        plt.show()
display(tab)

Tab(children=(Output(), Output(), Output(), Output(), Output(), Output(), Output(), Output(), Output(), Output…

# Fix close break points

In [6]:
segments = pd.read_pickle('segments.pickle')
new_seg_dict = {}
for sub in segments.keys():
    segment = segments[sub]
    seg = segment[-1]
    new_segment = []
    
    thresh = 960 # ensure 30 seconds between segments
    idx = np.where(np.diff(seg) < thresh)[0]
    cluster_idxs_ = np.split(idx, np.where(np.diff(idx) != 1)[0]+1)
    clusters_idxs = [np.append(elem, max(elem) + 1) for elem in cluster_idxs_]
    cluster_vals = [np.array(seg)[idxs] for idxs in clusters_idxs]

    new_segs = list(set(seg) - set(np.concatenate(cluster_vals)))
    for elem in cluster_vals:
        if any(elem==0):
            new_segs.append(0)
        elif any(elem==seg[-1]):
            new_segs.append(seg[-1])
        else:
            for early_seg in segment:
                if any((np.array(early_seg)>(min(elem) - 30)) & (np.array(early_seg)<(max(elem) + 30))):
                    replacement = min(np.array(early_seg)[(np.array(early_seg)>(min(elem) - 30)) & (np.array(early_seg)<(max(elem) + 30))])
                    new_segs.append(replacement)
                    break
    new_seg_dict[sub] = sorted(new_segs)

In [7]:
segs = new_seg_dict
sub_tab=[widgets.Output() for i in range(len(segs))]
tab = widgets.Tab(sub_tab)
tasks = ['sit', 'stand', 'walk', 'hoist', 'lift', 'ladder1']
for i, sub in enumerate(segs.keys()):
    tab.set_title(i,str(sub))
    with sub_tab[i]:
        sig__ = filt_noNA_dict[sub]
        sig_ = sig__[sig__.label.isin(tasks)]
        sig = sig_.values[:,:3].astype(np.float).T
        
        f, ax = plt.subplots(1,figsize=(13,4))
        ax.plot(sig[0,:], c='r')
        ax.plot(sig[1,:], c='g')
        ax.plot(sig[2,:], c='b')
        ax.set_title('GGS Segments', fontsize=20)
        ax.set_ylim([-150, 100])
        for v in segs[sub]:
            ax.axvline(v, color="r")
        plt.show()
display(tab)

Tab(children=(Output(), Output(), Output(), Output(), Output(), Output(), Output(), Output(), Output(), Output…

# Segments from OSC, all labels included

In [19]:
segments = pd.read_pickle('segments_all_labels.pickle')
new_seg_dict = {}
for sub in segments.keys():
    segment = segments[sub]
    seg = segment[-1]
    new_segment = []
    
    thresh = 960 # ensure 30 seconds between segments
    idx = np.where(np.diff(seg) < thresh)[0]
    cluster_idxs_ = np.split(idx, np.where(np.diff(idx) != 1)[0]+1)
    clusters_idxs = [np.append(elem, max(elem) + 1) for elem in cluster_idxs_]
    cluster_vals = [np.array(seg)[idxs] for idxs in clusters_idxs]

    new_segs = list(set(seg) - set(np.concatenate(cluster_vals)))
    for elem in cluster_vals:
        if any(elem==0):
            new_segs.append(0)
        elif any(elem==seg[-1]):
            new_segs.append(seg[-1])
        else:
            for early_seg in segment:
                if any((np.array(early_seg)>(min(elem) - 30)) & (np.array(early_seg)<(max(elem) + 30))):
                    replacement = min(np.array(early_seg)[(np.array(early_seg)>(min(elem) - 30)) & (np.array(early_seg)<(max(elem) + 30))])
                    new_segs.append(replacement)
                    break
    new_seg_dict[sub] = sorted(new_segs)

In [None]:
segs = new_seg_dict
sub_tab=[widgets.Output() for i in range(len(segs))]
tab = widgets.Tab(sub_tab)
# tasks = ['sit', 'stand', 'walk', 'hoist', 'lift', 'ladder1']
tasks = ['sit', 'stand', 'walk', 'hoist', 'lift', 'push', 'type', 'ladder1', 'ladder2', 'electricPanel', 'overhead']
for i, sub in enumerate(segs.keys()):
    tab.set_title(i,str(sub))
    with sub_tab[i]:
        sig__ = filt_noNA_dict[sub]
        sig_ = sig__[sig__.label.isin(tasks)]
        sig = sig_.values[:,:3].astype(np.float).T
        
        f, ax = plt.subplots(1,figsize=(13,4))
        ax.plot(sig[0,:], c='r')
        ax.plot(sig[1,:], c='g')
        ax.plot(sig[2,:], c='b')
        ax.set_title('GGS Segments', fontsize=20)
        ax.set_ylim([-150, 100])
        for v in segs[sub]:
            ax.axvline(v, color="r")
        plt.show()
display(tab)