In [1]:
import os
import glob
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime as dt
import xlsxwriter as xw

%matplotlib inline

In [2]:
def process_activities(df):
# Helper function for process_annotations
# Sorts cycles after splitting raw annotations file into Day 1 and Day 2 data
#
# Inputs:  df - dataframe to sort annotations
#
# Outputs: df - processed dataframe
    complete = set(np.array(['Heart Rate Variability', 'MDS-UPDRS #1: Finger Tapping',
           'MDS-UPDRS #2: Hand Movements', 'MDS-UPDRS #3: Pronation-Supination',
           'MDS-UPDRS #4: Toe Tapping', 'MDS-UPDRS #5: Leg Agility',
           'MDS-UPDRS #6: Arising from Chair', 'MDS-UPDRS #7: Gait',
           'MDS-UPDRS #8: Postural Stability', 'MDS-UPDRS #9: Postural Hand Tremor',
           'MDS-UPDRS #10: Kinetic Hand Tremor', 'MDS-UPDRS #11: Rest Tremor',
           'Motor #1: Standing', 'Motor #2: Walking', 'Motor #3: Walking while Counting',
           'Motor #4: Finger to Nose', 'Motor #5: Alternating Hand Movements',
           'Motor #6: Sit to Stand', 'Motor #7: Drawing on Paper',
           'Motor #8: Typing on a Computer', 'Motor #9: Nuts and Bolts',
           'Motor #10: Drinking Water', 'Motor #11: Organizing Folder',
           'Motor #12: Folding Towels', 'Motor #13: Sitting']).flatten())

    sorter = set(df.EventType.unique().flatten())
    sorterIndex = dict(zip(sorter, range(len(sorter))))

    if not (complete ^ sorter):
        print('All activities present')
    else:
        print(complete ^ sorter)
        
    df['EventType_Rank'] = df['EventType'].map(sorterIndex)
    df['Cycle'] = df.groupby('EventType')['Start Timestamp (ms)'].rank(ascending=True).astype(int)
    df[df['EventType'].str.contains('MDS-UPDRS')] = df[df['EventType'].str.contains('MDS-UPDRS')].replace(to_replace={'Cycle': {2: 3}})
    df[df['EventType'].str.contains('Heart')] = df[df['EventType'].str.contains('Heart')].replace(to_replace={'Cycle': {1: 'NaN', 2: 'NaN'}})
    df.sort_values(['EventType', 'EventType_Rank', 'Start Timestamp (ms)'], axis=0)
    
    return df


def process_annotations(path):
# Splits raw annotation file into Day 1 and Day 2 data by sheet in the .xlsx file
# 'RawAnnotations.xlsx'
#
# Inputs:  path - filepath of the subject folder containing annotations.csv
#
# Outputs: d1_df - dataframe containing all Day 1 activities and timestamps
#          d2_df - dataframe containing all Day 2 activities and timestamps
    df = pd.read_csv(os.path.join(path, 'annotations.csv'))
    del df['Timestamp (ms)']
    del df['AnnotationId']
    del df['AuthorId']
    
    df['Start Timestamp (ms)'] = pd.to_datetime(df['Start Timestamp (ms)'], unit='ms', utc=True).dt.tz_localize('UTC').dt.tz_convert('US/Central')
    df['Stop Timestamp (ms)'] = pd.to_datetime(df['Stop Timestamp (ms)'], unit='ms', utc=True).dt.tz_localize('UTC').dt.tz_convert('US/Central')
            
    testInfo = df[df.EventType == 'Testing Day'].dropna(how='any', axis=0)
    del testInfo['Stop Timestamp (ms)']
    del testInfo['EventType']
    del df['Value']
    
    testInfo = testInfo.rename(columns = {'Value':'Day', 'Start Timestamp (ms)':'Date'}).reset_index(drop=True)
    testInfo['Date'] = testInfo['Date'].dt.date
    
    df = df[(df.EventType != 'Testing Day')]

    Day1 = testInfo.loc[testInfo['Day'] == 'DAY 1', 'Date']
    Day2 = testInfo.loc[testInfo['Day'] == 'DAY 2', 'Date']

    d1_df = process_activities(df[df['Start Timestamp (ms)'].dt.date.isin(Day1)].reset_index(drop=True)).set_index('EventType')
    d2_df = process_activities(df[df['Start Timestamp (ms)'].dt.date.isin(Day2)].reset_index(drop=True)).set_index('EventType')

    file = os.path.join(path, 'RawAnnotations.xlsx')
    writer = pd.ExcelWriter(file, options={'remove_timezone': True})
    d1_df.to_excel(writer, sheet_name='Day1')
    d2_df.to_excel(writer, sheet_name='Day2')
    writer.save()
    
    return d1_df, d2_df

In [3]:
path = r'C:\Users\adai\Documents\PD Study Data\RawData\1004'
#path2 = r'C:\Users\adai\Documents\PD Study Data\RawData'
#path3 = r'C:\Users\adai\Documents\PD Study Data\RawData\*\*'
#print(path3)
#folderDepth = glob.glob(path3)
#dirsDepth = filter(lambda f: os.path.isdir(f), folderDepth)

#for folders in folderDepth:
    #if not folders.endswith('.csv'):
       # print(folders)

#testInfo, annotations = process_annotations(path)

#accel = xw.Workbook(os.path.join(path, 'AccelData.xlsx'))
#gyro = xw.Workbook(os.path.join(path, 'GyroData.xlsx'))
#elec = xw.Workbook(os.path.join(path, 'ElecData.xlsx'))

day1_df, day2_df = process_annotations(path)

#print(day1_df.loc['Motor #2: Walking']['Start Timestamp (ms)'])

for root, dirs, files in os.walk(path, topdown=True):
    for filenames in files:
        if filenames.endswith('accel.csv'):
            i = 1
            #filename_path = os.path.join(root, filenames)
            #print(filenames)
            #print(filename_path)
        elif filenames.endswith('elec.csv'):
            i = 1
            #filename_path = os.path.join(root, filenames)
            #print(filenames)
            #print(filename_path)
        elif filenames.endswith('gyro.csv'):
            i = 1
            #filename_path = os.path.join(root, filenames)
            #print(filenames)
            #print(filename_path)

All activities present
All activities present


In [None]:
def merge_timestamps(df, merge):
# Inputs:  df - original annotation dataframe
#          merge - dataframe of activities to merge
#
# Outputs: df_new - updated dataframe with merged timestampes per activities
    for labels in merge:
            
        
    

In [None]:
def absent_timestamps(df, absent):
# Inputs:  df - original annotation dataframe
#          absent - dataframe of absent (missing or skipped) activity labels
#
# Outputs: df_new - updated dataframe with placeholder activities to indicate
#                   the activity was missing
    for labels in missing:
            

In [None]:
def split_timestamps(df, split):
# Inputs:  df - original annotation dataframe
#          split - dataframe of activity labels needed to be split
#
# Outputs: df_new - updated dataframe with split activities
    for labels in split:
        