# Debug HC01 processing

In [2]:
cd Y:\Inpatient Sensors -Stroke\Data\biostamp_data

Y:\Inpatient Sensors -Stroke\Data\biostamp_data


In [3]:
# Importing the Libraries
import os
import platform
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime as dt
import pathlib
import pickle #to save files
from itertools import product
from scipy.stats import skew, kurtosis, pearsonr
from scipy.signal import butter, welch, filtfilt, resample
import time
import re
import copy

In [4]:
if platform.system() == 'Windows':
    if platform.release() == '7':
        path = r'Y:\Inpatient Sensors -Stroke\Data\biostamp_data\controls'
        folder_path = r'Y:\Inpatient Sensors -Stroke\Data\biostamp_data'
        dict_path = r'Y:\Inpatient Sensors -Stroke\Data\biostamp_data\Data_dict'

In [5]:
# List based on Value data of Activity Recognition
complete= list(['LYING','SITTING','STANDING','WALKING','STAIRS DOWN','STAIRS UP'])

## Extract data without 'trial' structure

In [6]:
def process_annotations(path):
#---------------------------------------------------------------------------------------------------------
# Processes raw annotations file to extract start / end timestamps and remove unnecessary data
#
# Inputs:  path - filepath of the subject folder containing annotations.csv
#
# Outputs: df - dataframe containing list of activities and their start / end timestamps
#---------------------------------------------------------------------------------------------------------
    df = pd.read_csv(os.path.join(path, 'annotations.csv'))
    del df['Timestamp (ms)']
    del df['AnnotationId']
    del df['AuthorId']
    
    # subset Activity Recognition data by partially match EventType string
    df = df[df['EventType'].str.match('Activity')]
    del df['EventType']
    df.Value = df.Value.shift(-1)
    df = df.dropna()
    
    # Create Trial column for Value
    sorter = set(df.Value.unique().flatten())
    sorterIndex = dict(zip(sorter, range(len(sorter))))        
    df['Value_Rank'] = df['Value'].map(sorterIndex)
    df['Trial'] = df.groupby('Value')['Start Timestamp (ms)'].rank(ascending=True).astype(int)
    del df['Value_Rank']
    df = df.reset_index(drop=True).set_index('Value')
    
    return df

In [7]:
# Modifying without Trial

#For a given subject, extracts and separates accelerometer, gyroscope, and 
#EMG/ECG data into trials and sensor per activity
def  extract_data(SubID, path):

    ## This is the annotations.csv dataset cleaned
    ## Used to match timestamp ranges to the accel, gyro, elec data
    timestamps = process_annotations(path)
#    timestamps = fix_errors(SubID, timestamps)
#    timestamps = add_unstruct_data(timestamps)
    
    # Creates list of sensor locations from folders within subject's raw data directory
    locations = [locs for locs in os.listdir(path) if os.path.isdir(os.path.join(path, locs))]
    
    # Creates dictionary of empty dataframes to merge all accelerometer, gyroscope, and EMG/ECG data for each sensor
    accel = {locs: pd.DataFrame() for locs in locations}
    gyro = {locs: pd.DataFrame() for locs in locations}
    elec = {locs: pd.DataFrame() for locs in locations}
    
    # Finds and merges all accelerometer, gyroscope, and EMG/ECG data for each sensor, retains datetime information
    for root, dirs, files in os.walk(path, topdown=True):
        for filenames in files:
            if filenames.endswith('accel.csv'):
                p = pathlib.Path(os.path.join(root, filenames))
                location = str(p.relative_to(path)).split("\\")[0]
                temp_df = pd.read_csv(p).set_index('Timestamp (ms)')
                accel[location] = accel[location].append(temp_df)

            elif filenames.endswith('gyro.csv'):
                p = pathlib.Path(os.path.join(root, filenames))
                location = str(p.relative_to(path)).split("\\")[0]
                temp_df = pd.read_csv(p).set_index('Timestamp (ms)')
                gyro[location] = gyro[location].append(temp_df)

            elif filenames.endswith('elec.csv'):
                p = pathlib.Path(os.path.join(root, filenames))
                location = str(p.relative_to(path)).split("\\")[0]
                temp_df = pd.read_csv(p).set_index('Timestamp (ms)')
                elec[location] = elec[location].append(temp_df)
                
    complete_acts = complete
    
    # Complete dictionary of all activities
    act_dict = {acts: pd.DataFrame() for acts in complete_acts}
    
    # Populate dictionary keys per activity with every sensor
    for activities in complete_acts:
        
        startSize = timestamps.loc[activities, 'Start Timestamp (ms)']
        
        if np.size(startSize) == 1:
            startTimestamp = timestamps.loc[activities, 'Start Timestamp (ms)']
            endTimestamp = timestamps.loc[activities, 'Stop Timestamp (ms)']
        else:
            startTimestamp = timestamps.loc[activities, 'Start Timestamp (ms)'].values
            endTimestamp = timestamps.loc[activities, 'Stop Timestamp (ms)'].values

        # Create sensor location dictionary with each key corresponding to sensor locations
        sensor_dict = {locs: pd.DataFrame() for locs in locations}

        # Extract sensor data and populate sensor_dict with sensor data
        for location in locations:
            print(location)#######################################

            data = {'accel': pd.DataFrame(), 'gyro': pd.DataFrame(), 'elec': pd.DataFrame()}

            if not accel[location].empty:
                accelData = accel[location]
                data['accel'] = accelData[(accelData.index >= startTime) & (accelData.index <= endTime)]  
                ###########
                ###########
                #print(bool(data))
                #if not bool(data):
                #    data['accel'] = accelData[(accelData.index >= startTimestamp) & (accelData.index <= endTimestamp)]
                #    print(bool(data))
                #    print('added data') ###########
                #else:
                #    data['accel'] = data['accel'].append(accelData[(accelData.index >= startTimestamp) & (accelData.index <= endTimestamp)])

            if not gyro[location].empty:
                gyroData = gyro[location]
                data['gyro'] = gyroData[(gyroData.index >= startTimestamp) & (gyroData.index <= endTimestamp)]

            if not elec[location].empty:
                elecData = elec[location]
                data['elec'] = elecData[(elecData.index >= startTimestamp) & (elecData.index <= endTimestamp)]

            sensor_dict[location] = data

        act_dict[activities] = sensor_dict
    
    return act_dict, timestamps

In [None]:
SubID = 'HC02'
timestamps = process_annotations(os.path.join(path, SubID))

In [None]:
timestamps

## Manually extract HC01 data 

In [9]:
dict_path2 = r'Y:\Inpatient Sensors -Stroke\Data\biostamp_data\HC01test'
SubID = 'HC01'
path2 = os.path.join(path, SubID)

In [11]:
print(SubID)
print(path)
print(path2)

HC01
Y:\Inpatient Sensors -Stroke\Data\biostamp_data\controls
Y:\Inpatient Sensors -Stroke\Data\biostamp_data\controls\HC01


In [None]:
act_dict, timestamps = extract_data(SubID, os.path.join(path, SubID))
print('Extract data complete.')
filename = os.path.join(dict_path2, SubID + 'dict.pkl')
with open(filename,'wb') as f:
    pickle.dump(act_dict,f)
print(filename + ' ' + 'File Saved\n')

In [None]:
timestamps = process_annotations(path2)

In [None]:
timestamps

In [None]:
filename = os.path.join(dict_path2, SubID + 'dict.pkl')
with open(filename,'wb') as f:
    pickle.dump(act_dict,f)
print(filename + ' ' + 'File Saved\n')

In [None]:
# Plot Sacrum walking
rawdata = act_dict['WALKING']['sacrum']['accel']
rawdata.plot(figsize=(8,4))

## Load HC02 data

In [None]:
#load Pickle file dict
subj = 'HC02'
f = open(os.path.join(dict_path, subj + 'dict.pkl'), 'rb') # use for C: directory
act_dict = pickle.load(f)
f.close()

In [None]:
act_dict['WALKING'][0]['sacrum']['accel']

In [None]:
# Plot Sacrum walking
rawdata = act_dict['WALKING'][0]['sacrum']['accel']
rawdata.plot(figsize=(8,4))

In [None]:
# Plot Sacrum walking
rawdata = act_dict['WALKING'][1]['sacrum']['accel']
rawdata.plot(figsize=(8,4))

In [None]:
# Plot Sacrum walking
rawdata = act_dict['WALKING'][2]['sacrum']['accel']
rawdata.plot(figsize=(8,4))

In [None]:
# Plot Sacrum walking
rawdata = act_dict['WALKING'][3]['sacrum']['accel']
rawdata.plot(figsize=(8,4))

In [None]:
# Plot Sacrum walking - all trials
rawdata = act_dict['WALKING'][0]['sacrum']['accel']
rawdata = rawdata.append(act_dict['WALKING'][1]['sacrum']['accel'])
rawdata = rawdata.append(act_dict['WALKING'][2]['sacrum']['accel'])
rawdata = rawdata.append(act_dict['WALKING'][3]['sacrum']['accel'])
rawdata.plot(figsize=(8,4))

In [None]:
print(rawdata.head(5))
print(rawdata.tail(5))

## Explore features from individual subject

### ...Skip HC01 until error is fixed

In [None]:
#load Pickle file dict
subj = 'HC02'
f = open(os.path.join(dict_path, subj + 'dict.pkl'), 'rb')
act_dict = pickle.load(f)
f.close()

In [None]:
# Choose task, sensor location, data type, and trials

#task = 'LYING'
#task = 'SITTING'
#task = 'STANDING'
task = 'WALKING'
#task = 'STAIRS DOWN'
#task = 'STAIRS UP'

#loc = 'bicep_left'
#loc = 'bicep_right'
#loc = 'biceps_femoris_left'
#loc = 'biceps_femoris_right'
#loc = 'distal_lateral_shank_left' # has accel and gyro
#loc = 'distal_lateral_shank_right' # has accel and gyro
#loc = 'gastrocnemius_left'
#loc = 'gastrocnemius_right'
#loc = 'medial_chest'
#loc = 'posterior_forearm_left'
#loc = 'posterior_forearm_right'
#loc = 'rectus_femoris_left'
#loc = 'rectus_femoris_right'
loc = 'sacrum' # has accel and gyro
#loc = 'tibialis_anterior_left'
#loc = 'tibialis_anterior_right'

## want a + g
sensor = 'accel'
#sensor = 'gyro'
#sensor = 'elec'

trial = 1

### activity dictionary structure (TASK-TRIAL-LOCATION-SENSOR)

In [None]:
# Sacrum walking
rawdata = act_dict['WALKING'][0]['sacrum']['accel']
rawdata.plot(figsize=(8,4))

In [None]:
rawdata.index

In [None]:
rawdata = clip_data[0]['accel']
rawdata.plot(figsize=(8,4))

In [None]:
#HCO2 walking sacrum accel - all trials
# 4295x3

In [None]:
rawdata = clip_data
rawdata.plot(figsize=(8,4))

In [None]:
# Extract clips
clip_data = gen_clips(act_dict,task,loc,verbose=True,len_tol=0.95)
feature_extraction(clip_data)