# Test HC01 line by line

In [1]:
cd Y:\Inpatient Sensors -Stroke\Data\biostamp_data

Y:\Inpatient Sensors -Stroke\Data\biostamp_data


In [2]:
# Importing the Libraries
import os
import platform
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime as dt
import pathlib
import pickle #to save files
from itertools import product
from scipy.stats import skew, kurtosis, pearsonr
from scipy.signal import butter, welch, filtfilt, resample
import time
import re
import copy

In [3]:
if platform.system() == 'Windows':
    if platform.release() == '7':
        path = r'Y:\Inpatient Sensors -Stroke\Data\biostamp_data\controls'
        folder_path = r'Y:\Inpatient Sensors -Stroke\Data\biostamp_data'
        dict_path = r'Y:\Inpatient Sensors -Stroke\Data\biostamp_data\Data_dict'

In [4]:
# List based on Value data of Activity Recognition
complete= list(['LYING','SITTING','STANDING','WALKING','STAIRS DOWN','STAIRS UP'])

## Extract data without 'trial' structure

In [5]:
def process_annotations(path):
#---------------------------------------------------------------------------------------------------------
# Processes raw annotations file to extract start / end timestamps and remove unnecessary data
#
# Inputs:  path - filepath of the subject folder containing annotations.csv
#
# Outputs: df - dataframe containing list of activities and their start / end timestamps
#---------------------------------------------------------------------------------------------------------
    df = pd.read_csv(os.path.join(path, 'annotations.csv'))
    del df['Timestamp (ms)']
    del df['AnnotationId']
    del df['AuthorId']
    
    # subset Activity Recognition data by partially match EventType string
    df = df[df['EventType'].str.match('Activity')]
    del df['EventType']
    df.Value = df.Value.shift(-1)
    df = df.dropna()
    
    # Create Trial column for Value
    sorter = set(df.Value.unique().flatten())
    sorterIndex = dict(zip(sorter, range(len(sorter))))        
    df['Value_Rank'] = df['Value'].map(sorterIndex)
    df['Trial'] = df.groupby('Value')['Start Timestamp (ms)'].rank(ascending=True).astype(int)
    del df['Value_Rank']
    df = df.reset_index(drop=True).set_index('Value')
    
    return df

In [205]:
# original version
def extract_data(SubID, path):
#---------------------------------------------------------------------------------------------------------
# For a given subject, extracts and separates accelerometer, gyroscope, and EMG/ECG data into trials and sensor per activity
#
# Inputs: SubID - string of numbers corresponding to the subject ID
#         path - system path to corresponding subject's raw data files
#
# Outputs: act_dict - dictionary of both MDS-UPDRS and Motor Assessment activities separated by trial, sensor location, and
#                     accelerometer + gyroscope or accelerometer + EMG/ECG data. Every key within this dictionary is a dictionary
#---------------------------------------------------------------------------------------------------------
    timestamps = process_annotations(path)
  
    # Creates list of sensor locations from folders within subject's raw data directory
    locations = [locs for locs in os.listdir(path) if os.path.isdir(os.path.join(path, locs))]
    
    # Creates dictionary of empty dataframes to merge all accelerometer, gyroscope, and EMG/ECG data for each sensor
    accel = {locs: pd.DataFrame() for locs in locations}
    gyro = {locs: pd.DataFrame() for locs in locations}
    elec = {locs: pd.DataFrame() for locs in locations}
    
    # Finds and merges all accelerometer, gyroscope, and EMG/ECG data for each sensor, retains datetime information
    for root, dirs, files in os.walk(path, topdown=True):
        for filenames in files:
            if filenames.endswith('accel.csv'):
                p = pathlib.Path(os.path.join(root, filenames))
                location = str(p.relative_to(path)).split("\\")[0]
                temp_df = pd.read_csv(p).set_index('Timestamp (ms)')
                accel[location] = accel[location].append(temp_df)

            elif filenames.endswith('gyro.csv'):
                p = pathlib.Path(os.path.join(root, filenames))
                location = str(p.relative_to(path)).split("\\")[0]
                temp_df = pd.read_csv(p).set_index('Timestamp (ms)')
                gyro[location] = gyro[location].append(temp_df)

            elif filenames.endswith(('elec.csv', 'emg.csv', 'ecg.csv', 'ekg.csv')):
                p = pathlib.Path(os.path.join(root, filenames))
                location = str(p.relative_to(path)).split("\\")[0]
                temp_df = pd.read_csv(p).set_index('Timestamp (ms)')
                elec[location] = elec[location].append(temp_df)
    
    # Complete dictionary of all activities
    act_dict = {acts: pd.DataFrame() for acts in complete_acts}
    
    for activities in complete_acts:

        startSize = timestamps.loc[activities, 'Start Timestamp (ms)']

        if np.size(startSize) == 1:
            startTimestamp = timestamps.loc[activities, 'Start Timestamp (ms)']
            endTimestamp = timestamps.loc[activities, 'Stop Timestamp (ms)']
        else:
            startTimestamp = timestamps.loc[activities, 'Start Timestamp (ms)'].values
            endTimestamp = timestamps.loc[activities, 'Stop Timestamp (ms)'].values

        # Create trial dictionary with each key containing all sensor data related with each activity's trial
        trial_dict = {trials: pd.DataFrame() for trials in range(0, np.size(startTimestamp))}
        
        # Populate trial directory keys
        for trials in range(0, np.size(startTimestamp)):

            if np.size(startSize) == 1:
                startTime = startTimestamp
                endTime = endTimestamp
            else:
                startTime = startTimestamp[trials]
                endTime = endTimestamp[trials]

            # Create sensor location dictionary with each key corresponding to sensor locations
            sensor_dict = {locs: pd.DataFrame() for locs in locations}

############################
# function is good to here
############################
            
            # Extract sensor data and populate sensor_dict with sensor data
            for location in locations:

                data = {'accel': pd.DataFrame(), 'gyro': pd.DataFrame(), 'elec': pd.DataFrame()}

                if not accel[location].empty:
                    accelData = accel[location]
                    data['accel'] = accelData[(accelData.index >= startTime) & (accelData.index <= endTime)]
                    ###########
                    ###########
                    #print(bool(data))
                    #if not bool(data):
                    #    data['accel'] = accelData[(accelData.index >= startTimestamp) & (accelData.index <= endTimestamp)]
                    #    print(bool(data))
                    #    print('added data') ###########
                    #else:
                    #    data['accel'] = data['accel'].append(accelData[(accelData.index >= startTimestamp) & (accelData.index <= endTimestamp)])
                    print(accelData.head(1))


                if not gyro[location].empty:
                    gyroData = gyro[location]
                    data['gyro'] = gyroData[(gyroData.index >= startTime) & (gyroData.index <= endTime)]
                    print(gyroData.head(1))

                if not elec[location].empty:
                    elecData = elec[location]
                    data['elec'] = elecData[(elecData.index >= startTime) & (elecData.index <= endTime)]
                    print(elecData.head(1))

            trial_dict[trials] = sensor_dict

        act_dict[activities] = trial_dict
    return act_dict, timestamps

In [155]:
print(SubID)
print(path)

HC01
Y:\Inpatient Sensors -Stroke\Data\biostamp_data\controls


In [None]:
act_dict, timestamps = extract_data(SubID, os.path.join(path, SubID))

In [None]:
accel

In [167]:
act_dict

{'LYING': {0: {'tibialis_anterior_left': Empty DataFrame
   Columns: []
   Index: [], 'gastrocnemius_right': Empty DataFrame
   Columns: []
   Index: [], 'sacrum': Empty DataFrame
   Columns: []
   Index: [], 'distal_lateral_shank_right': Empty DataFrame
   Columns: []
   Index: [], 'tibialis_anterior_right': Empty DataFrame
   Columns: []
   Index: [], 'posterior_forearm_right': Empty DataFrame
   Columns: []
   Index: [], 'bicep_right': Empty DataFrame
   Columns: []
   Index: [], 'rectus_femoris_left': Empty DataFrame
   Columns: []
   Index: [], 'biceps_femoris_right': Empty DataFrame
   Columns: []
   Index: [], 'posterior_forearm_left': Empty DataFrame
   Columns: []
   Index: [], 'biceps_femoris_left': Empty DataFrame
   Columns: []
   Index: [], 'gastrocnemius_left': Empty DataFrame
   Columns: []
   Index: [], 'bicep_left': Empty DataFrame
   Columns: []
   Index: [], 'medial_chest': Empty DataFrame
   Columns: []
   Index: [], 'distal_lateral_shank_left': Empty DataFrame
   C

# Test line by line in function and check output

In [184]:
locations = [locs for locs in os.listdir(path2) if os.path.isdir(os.path.join(path2, locs))]
print(locations)

['tibialis_anterior_left', 'gastrocnemius_right', 'sacrum', 'distal_lateral_shank_right', 'tibialis_anterior_right', 'posterior_forearm_right', 'bicep_right', 'rectus_femoris_left', 'biceps_femoris_right', 'posterior_forearm_left', 'biceps_femoris_left', 'gastrocnemius_left', 'bicep_left', 'medial_chest', 'distal_lateral_shank_left', 'rectus_femoris_right']


In [188]:
# Creates dictionary of empty dataframes to merge all accelerometer, gyroscope, and EMG/ECG data for each sensor
accel = {locs: pd.DataFrame() for locs in locations}
gyro = {locs: pd.DataFrame() for locs in locations}
elec = {locs: pd.DataFrame() for locs in locations}

# Finds and merges all accelerometer, gyroscope, and EMG/ECG data for each sensor, retains datetime information
for root, dirs, files in os.walk(path2, topdown=True):
    for filenames in files:
        if filenames.endswith('accel.csv'):
            p = pathlib.Path(os.path.join(root, filenames))
            location = str(p.relative_to(path2)).split("\\")[0]
            temp_df = pd.read_csv(p).set_index('Timestamp (ms)')
            accel[location] = accel[location].append(temp_df)

        elif filenames.endswith('gyro.csv'):
            p = pathlib.Path(os.path.join(root, filenames))
            location = str(p.relative_to(path2)).split("\\")[0]
            temp_df = pd.read_csv(p).set_index('Timestamp (ms)')
            gyro[location] = gyro[location].append(temp_df)

        elif filenames.endswith(('elec.csv', 'emg.csv', 'ecg.csv', 'ekg.csv')):
            p = pathlib.Path(os.path.join(root, filenames))
            location = str(p.relative_to(path2)).split("\\")[0]
            temp_df = pd.read_csv(p).set_index('Timestamp (ms)')
            elec[location] = elec[location].append(temp_df)

In [193]:
complete_acts = complete
    
# Complete dictionary of all activities
act_dict = {acts: pd.DataFrame() for acts in complete_acts}

In [208]:
for activities in complete_acts:
        
    startSize = timestamps.loc[activities, 'Start Timestamp (ms)']

    if np.size(startSize) == 1:
        startTimestamp = timestamps.loc[activities, 'Start Timestamp (ms)']
        endTimestamp = timestamps.loc[activities, 'Stop Timestamp (ms)']
    else:
        startTimestamp = timestamps.loc[activities, 'Start Timestamp (ms)'].values
        endTimestamp = timestamps.loc[activities, 'Stop Timestamp (ms)'].values
    
    # Create trial dictionary with each key containing all sensor data related with each activity's trial
    trial_dict = {trials: pd.DataFrame() for trials in range(0, np.size(startTimestamp))}
    
    # Populate trial directory keys
    for trials in range(0, np.size(startTimestamp)):

        if np.size(startSize) == 1:
            startTime = startTimestamp
            endTime = endTimestamp
        else:
            startTime = startTimestamp[trials]
            endTime = endTimestamp[trials]
            
        # Create sensor location dictionary with each key corresponding to sensor locations
        sensor_dict = {locs: pd.DataFrame() for locs in locations}
        
### somewhere in this for loop

        # Extract sensor data and populate sensor_dict with sensor data
        for location in locations:

            data = {'accel': pd.DataFrame(), 'gyro': pd.DataFrame(), 'elec': pd.DataFrame()}

            if not accel[location].empty:
                accelData = accel[location]
                data['accel'] = accelData[(accelData.index >= startTime) & (accelData.index <= endTime)]  
                print(location)
                print(data['accel'].head(1))

            if not gyro[location].empty:
                gyroData = gyro[location]
                data['gyro'] = gyroData[(gyroData.index >= startTime) & (gyroData.index <= endTime)]
                print(location)
                print(data['gyro'].head(1))

            if not elec[location].empty:
                elecData = elec[location]
                data['elec'] = elecData[(elecData.index >= startTime) & (elecData.index <= endTime)]
                print(location)
                print(data['elec'].head(1))

            sensor_dict[location] = data

        trial_dict[trials] = sensor_dict

    act_dict[activities] = trial_dict

tibialis_anterior_left
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
tibialis_anterior_left
Empty DataFrame
Columns: [Sample (V)]
Index: []
gastrocnemius_right
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
gastrocnemius_right
Empty DataFrame
Columns: [Sample (V)]
Index: []
sacrum
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
sacrum
Empty DataFrame
Columns: [Gyro X (°/s), Gyro Y (°/s), Gyro Z (°/s)]
Index: []
distal_lateral_shank_right
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
distal_lateral_shank_right
Empty DataFrame
Columns: [Gyro X (°/s), Gyro Y (°/s), Gyro Z (°/s)]
Index: []
tibialis_anterior_right
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
tibialis_anterior_right
Empty DataFrame
Columns: [Sample (V)]
Index: []
posterior_forearm_right
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
posterior_forearm_right


bicep_right
Empty DataFrame
Columns: [Sample (V)]
Index: []
rectus_femoris_left
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
rectus_femoris_left
Empty DataFrame
Columns: [Sample (V)]
Index: []
biceps_femoris_right
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
biceps_femoris_right
Empty DataFrame
Columns: [Sample (V)]
Index: []
posterior_forearm_left
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
posterior_forearm_left
Empty DataFrame
Columns: [Gyro X (°/s), Gyro Y (°/s), Gyro Z (°/s)]
Index: []
biceps_femoris_left
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
biceps_femoris_left
Empty DataFrame
Columns: [Sample (V)]
Index: []
gastrocnemius_left
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
gastrocnemius_left
Empty DataFrame
Columns: [Sample (V)]
Index: []
bicep_left
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
bic

bicep_right
Empty DataFrame
Columns: [Sample (V)]
Index: []
rectus_femoris_left
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
rectus_femoris_left
Empty DataFrame
Columns: [Sample (V)]
Index: []
biceps_femoris_right
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
biceps_femoris_right
Empty DataFrame
Columns: [Sample (V)]
Index: []
posterior_forearm_left
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
posterior_forearm_left
Empty DataFrame
Columns: [Gyro X (°/s), Gyro Y (°/s), Gyro Z (°/s)]
Index: []
biceps_femoris_left
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
biceps_femoris_left
Empty DataFrame
Columns: [Sample (V)]
Index: []
gastrocnemius_left
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
gastrocnemius_left
Empty DataFrame
Columns: [Sample (V)]
Index: []
bicep_left
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
bic

Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
biceps_femoris_left
Empty DataFrame
Columns: [Sample (V)]
Index: []
gastrocnemius_left
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
gastrocnemius_left
Empty DataFrame
Columns: [Sample (V)]
Index: []
bicep_left
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
bicep_left
Empty DataFrame
Columns: [Sample (V)]
Index: []
medial_chest
                Accel X (g)  Accel Y (g)  Accel Z (g)
Timestamp (ms)                                       
1510002852170      -1.13814     0.177798     0.082582
medial_chest
                Sample (V)
Timestamp (ms)            
1510002852156      0.00428
distal_lateral_shank_left
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Accel Z (g)]
Index: []
distal_lateral_shank_left
Empty DataFrame
Columns: [Gyro X (°/s), Gyro Y (°/s), Gyro Z (°/s)]
Index: []
rectus_femoris_right
Empty DataFrame
Columns: [Accel X (g), Accel Y (g), Acce

In [231]:
accelData = accel['tibialis_anterior_left']
print(accelData.head(5))

                Accel X (g)  Accel Y (g)  Accel Z (g)
Timestamp (ms)                                       
1509387689614     -0.989212     0.068910    -0.059022
1509387689646     -0.988724     0.068910    -0.063905
1509387689678     -0.993606     0.060609    -0.050233
1509387689710     -0.990677     0.062074    -0.035584
1509387689742     -0.988724     0.059632    -0.065858


In [238]:
print(startTimestamp)
print(startTime)
print(endTimestamp)
print(endTime)

1510002568801
1510002568801
1510002581432
1510002581432


In [251]:
print(data['accel'].head(5))
print(data['accel'].tail(5))

                Accel X (g)  Accel Y (g)  Accel Z (g)
Timestamp (ms)                                       
1509387689614     -0.989212     0.068910    -0.059022
1509387689646     -0.988724     0.068910    -0.063905
1509387689678     -0.993606     0.060609    -0.050233
1509387689710     -0.990677     0.062074    -0.035584
1509387689742     -0.988724     0.059632    -0.065858
                Accel X (g)  Accel Y (g)  Accel Z (g)
Timestamp (ms)                                       
1509399289636      0.011780     0.032288     0.988846
1509399289667      0.008362     0.031312     1.012772
1509399289699      0.013245     0.020081     1.006912
1509399289731      0.008850     0.025452     0.994217
1509399289763      0.014221     0.023987     1.003983


In [253]:
# startTime filter is the problem!!!
data['accel'] = accelData[(accelData.index >= 1509399289763)]
data['accel'].head(5)

Unnamed: 0_level_0,Accel X (g),Accel Y (g),Accel Z (g)
Timestamp (ms),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1509399289763,0.014221,0.023987,1.003983


In [250]:
# endTime looks fine
data['accel'] = accelData[(accelData.index <= 1510002130051)]
data['accel'].head(5)

Unnamed: 0_level_0,Accel X (g),Accel Y (g),Accel Z (g)
Timestamp (ms),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1509387689614,-0.989212,0.06891,-0.059022
1509387689646,-0.988724,0.06891,-0.063905
1509387689678,-0.993606,0.060609,-0.050233
1509387689710,-0.990677,0.062074,-0.035584
1509387689742,-0.988724,0.059632,-0.065858


In [220]:
timestamps

Unnamed: 0_level_0,Start Timestamp (ms),Stop Timestamp (ms),Trial
Value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LYING,1510002069639,1510002130051,1
SITTING,1510002153976,1510002214893,1
STANDING,1510002250787,1510002311224,1
WALKING,1510002352692,1510002384115,1
STANDING,1510002415166,1510002475751,2
STAIRS DOWN,1510002535559,1510002547458,1
STAIRS UP,1510002568801,1510002581432,1
STANDING,1510002620624,1510002651580,3
WALKING,1510002668365,1510002721676,2
WALKING,1510002852155,1510002863746,3


In [257]:
import datetime
#lying start time
print(
    datetime.datetime.fromtimestamp(
        int("1510002069639")
    ).strftime('%Y-%m-%d %H:%M:%S')
)
# lying stop time
print(
    datetime.datetime.fromtimestamp(
        int("1510002130051")
    ).strftime('%Y-%m-%d %H:%M:%S')
)

OSError: [Errno 22] Invalid argument

In [None]:
# start of data
print(
    datetime.datetime.fromtimestamp(
        int("1509387689614")
    ).strftime('%Y-%m-%d %H:%M:%S')
)
# end of data
print(
    datetime.datetime.fromtimestamp(
        int("1509399289763")
    ).strftime('%Y-%m-%d %H:%M:%S')
)

In [236]:
#lying
1510002130051 - 1510002069639

60412

In [237]:
# stairs up
1510002581432 - 1510002568801

12631

## Manually extract HC01 data 

HC01dict.pkl seems too small, so run by itself

In [None]:
dict_path2 = r'Y:\Inpatient Sensors -Stroke\Data\biostamp_data\HC01test'
SubID = 'HC01'
path2 = os.path.join(path, SubID)

In [None]:
# Plot Sacrum walking
rawdata = act_dict['WALKING']['sacrum']['accel']
rawdata.plot(figsize=(8,4))