In [11]:
import os
import glob
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime as dt
import xlsxwriter as xw
import pathlib

%matplotlib inline

In [12]:
def process_activities(df):
# Helper function for process_annotations
# Sorts cycles after splitting raw annotations file into Day 1 and Day 2 data
#
# Inputs:  df - dataframe to sort annotations
#
# Outputs: df - processed dataframe
    complete = set(np.array(['Heart Rate Variability', 'MDS-UPDRS #1: Finger Tapping',
           'MDS-UPDRS #2: Hand Movements', 'MDS-UPDRS #3: Pronation-Supination',
           'MDS-UPDRS #4: Toe Tapping', 'MDS-UPDRS #5: Leg Agility',
           'MDS-UPDRS #6: Arising from Chair', 'MDS-UPDRS #7: Gait',
           'MDS-UPDRS #8: Postural Stability', 'MDS-UPDRS #9: Postural Hand Tremor',
           'MDS-UPDRS #10: Kinetic Hand Tremor', 'MDS-UPDRS #11: Rest Tremor',
           'Motor #1: Standing', 'Motor #2: Walking', 'Motor #3: Walking while Counting',
           'Motor #4: Finger to Nose', 'Motor #5: Alternating Hand Movements',
           'Motor #6: Sit to Stand', 'Motor #7: Drawing on Paper',
           'Motor #8: Typing on a Computer', 'Motor #9: Nuts and Bolts',
           'Motor #10: Drinking Water', 'Motor #11: Organizing Folder',
           'Motor #12: Folding Towels', 'Motor #13: Sitting']).flatten())

    sorter = set(df.EventType.unique().flatten())
    sorterIndex = dict(zip(sorter, range(len(sorter))))
    
    if (complete ^ sorter):
        print('Missing: ' + str(complete ^ sorter))
        
    df['EventType_Rank'] = df['EventType'].map(sorterIndex)
    df['Cycle'] = df.groupby('EventType')['Start Timestamp (ms)'].rank(ascending=True).astype(int)
    df[df['EventType'].str.contains('MDS-UPDRS')] = df[df['EventType'].str.contains('MDS-UPDRS')].replace(to_replace={'Cycle': {2: 3}})
    df[df['EventType'].str.contains('Heart')] = df[df['EventType'].str.contains('Heart')].replace(to_replace={'Cycle': {1: 'NaN', 2: 'NaN'}})
    df.sort_values(['EventType', 'EventType_Rank', 'Start Timestamp (ms)'], axis=0)
    
    return df


def process_annotations(path):
# Splits raw annotation file into Day 1 and Day 2 data by sheet in the .xlsx file
# 'RawAnnotations.xlsx'
#
# Inputs:  path - filepath of the subject folder containing annotations.csv
#
# Outputs: d1_df - dataframe containing all Day 1 activities and timestamps
#          d2_df - dataframe containing all Day 2 activities and timestamps
    df = pd.read_csv(os.path.join(path, 'annotations.csv'))
    del df['Timestamp (ms)']
    del df['AnnotationId']
    del df['AuthorId']
    
    df['Start Timestamp (ms)'] = pd.to_datetime(df['Start Timestamp (ms)'], unit='ms', utc=True).dt.tz_localize('UTC').dt.tz_convert('US/Central')
    df['Stop Timestamp (ms)'] = pd.to_datetime(df['Stop Timestamp (ms)'], unit='ms', utc=True).dt.tz_localize('UTC').dt.tz_convert('US/Central')
            
    testInfo = df[df.EventType == 'Testing Day'].dropna(how='any', axis=0)
    del testInfo['Stop Timestamp (ms)']
    del testInfo['EventType']
    del df['Value']
    
    testInfo = testInfo.rename(columns = {'Value':'Day', 'Start Timestamp (ms)':'Date'}).reset_index(drop=True)
    testInfo['Date'] = testInfo['Date'].dt.date
    
    df = df[(df.EventType != 'Testing Day')]

    Day1 = testInfo.loc[testInfo['Day'] == 'DAY 1', 'Date']
    Day2 = testInfo.loc[testInfo['Day'] == 'DAY 2', 'Date']

    d1_df = process_activities(df[df['Start Timestamp (ms)'].dt.date.isin(Day1)].reset_index(drop=True)).set_index('EventType')
    d2_df = process_activities(df[df['Start Timestamp (ms)'].dt.date.isin(Day2)].reset_index(drop=True)).set_index('EventType')

    file = os.path.join(path, 'RawAnnotations.xlsx')
    writer = pd.ExcelWriter(file, options={'remove_timezone': True})
    d1_df.to_excel(writer, sheet_name='Day 1')
    d2_df.to_excel(writer, sheet_name='Day 2')
    writer.save()
    
    return d1_df, d2_df

In [50]:
path = r'C:\Users\adai\Documents\PD Study Data\RawData\1020'

day1_df, day2_df = process_annotations(path)

print(day1_df)

accel = os.path.join(path, 'AccelData.xlsx')
gyro = os.path.join(path, 'GyroData.xlsx')
elec = os.path.join(path, 'ElecData.xlsx')

accel_writer = pd.ExcelWriter(accel, options={'remove_timezone': True})
gyro_writer = pd.ExcelWriter(gyro, options={'remove_timezone': True})
elec_writer = pd.ExcelWriter(elec, options={'remove_timezone': True})
temp = list()
for root, dirs, files in os.walk(path, topdown=True):
    for filenames in files:
        if filenames.endswith('accel.csv'):
            p = pathlib.Path(os.path.join(root, filenames))
            location = str(p.relative_to(path)).split("\\")[0]
            print(location)

                                                 Start Timestamp (ms)  \
EventType                                                               
Heart Rate Variability               2017-07-20 13:14:48.326000-05:00   
MDS-UPDRS #11: Rest Tremor           2017-07-20 13:36:55.425000-05:00   
MDS-UPDRS #9: Postural Hand Tremor   2017-07-20 13:37:17.851000-05:00   
MDS-UPDRS #10: Kinetic Hand Tremor   2017-07-20 13:37:25.201000-05:00   
MDS-UPDRS #1: Finger Tapping         2017-07-20 13:38:14.522000-05:00   
MDS-UPDRS #2: Hand Movements         2017-07-20 13:38:25.206000-05:00   
MDS-UPDRS #3: Pronation-Supination   2017-07-20 13:38:34.789000-05:00   
MDS-UPDRS #4: Toe Tapping            2017-07-20 13:38:42.755000-05:00   
MDS-UPDRS #5: Leg Agility            2017-07-20 13:38:51.011000-05:00   
MDS-UPDRS #6: Arising from Chair     2017-07-20 13:38:58.693000-05:00   
MDS-UPDRS #7: Gait                   2017-07-20 13:39:11.214000-05:00   
MDS-UPDRS #8: Postural Stability     2017-07-20 13: