In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pathlib
import pickle
from scipy.stats import skew, kurtosis, pearsonr
from scipy.signal import butter, welch, filtfilt, resample
import copy

from PreprocessFcns import *

%matplotlib inline

In [2]:
# load file with clinician scores of tasks
mot_scores = pd.read_excel('MotorTasks.xls')

In [3]:
def DataAggregator(subjDict,taskList_Abb,taskScores,sessionList,freq):
    
    Data = pd.DataFrame()

    #dataframe to track lost/missing data
    numSamples = pd.DataFrame()

    s = 0
    for subj in list(subjDict.keys()):

        s += 1
        print('Subject %d (%d of %d)'%(subj,s,len(subjDict)))

        for t in range(len(taskList_Abb)):

            task = taskList_Abb[t]
            task_score = taskScores[t]

            for trial in range(len(sessionList)):

                visit = sessionList[trial]

                try:
                    data = pd.read_csv('Z:CIS-PD Study\\MJFF Curation\\TaskAcc\\' + str(subj) + '_' + str(trial) + 
                                      '_' + task + '.csv',parse_dates=['timestamp'])[['timestamp','x','y','z']]

                except:
                    #print('No data found for %s trial %d'%(task,trial))
                    continue

                side = subjDict[subj]
                if side == 'left':
                    otherside = 'right'
                else:
                    otherside = 'left'

                notBrady = ['standing', 'sitting', 'sit to stand', 'alternating '+otherside, 'finger to nose '+otherside]

                if (task_score not in notBrady):
                    subj_score = mot_scores.loc[mot_scores['subject']==subj,['subject','visit',
                                            task_score+ ' ' + 'tremor ' + side + ' upper limb',
                                            task_score+ ' ' + 'bradykinesia ' + side + ' upper limb']]
                else:
                    subj_score = mot_scores.loc[mot_scores['subject']==subj,['subject','visit',
                                            task_score+ ' ' + 'tremor ' + side + ' upper limb']]
                    subj_score[task_score+ ' ' + 'bradykinesia ' + side + ' upper limb'] = np.nan

    #                 print(task_score)
                subj_score = subj_score.rename(index=str,columns={subj_score.columns[3]:'Bradykinesia',subj_score.columns[2]:'Tremor'})
                subj_score.index = range(len(subj_score))

                data['timestamp'] = (data.timestamp.values - data.timestamp.values[0]).astype('timedelta64[ms]').astype(int)
                data = data.set_index('timestamp')

                data = HPfilter(data)

                clip_data = gen_clips_mc10(data,downsample=freq,basefreq=50)

                _,_ = feature_extraction(clip_data)

                if 'features' in clip_data.keys():
                    D = clip_data['features']
                    featcols = list(D.columns)
                    D['Bradykinesia'] = subj_score['Bradykinesia'][trial]
                    D['Tremor'] = subj_score['Tremor'][trial]
                    D['Visit'] = visit
                    D['Task'] = task
                    D['Subject'] = subj
                    Data = pd.concat([Data,D])

                clip_lens = []    

                for c in range(len(clip_data['data'])):
                    clip_lens.append(len(clip_data['data'][c]))
                N = pd.DataFrame(data=np.asarray(clip_lens),columns=['Samples'],dtype='int')
                N['Subject'] = subj
                N['Task'] = task
                N['Visit'] = sessionList[trial]
                numSamples = pd.concat([numSamples,N])

    cols = ['Subject','Visit','Task','Bradykinesia','Tremor'] + featcols
    Data = Data[cols]

    numSamples = numSamples[['Subject','Visit','Task','Samples']]

    if freq==50:
        numSamples.to_csv('Z:CIS-PD Study\\Watch Sample Lengths.csv')

    return Data

    #Data.to_csv('Z:CIS-PD Study\\Smartwatch Data 50Hz.csv')

In [4]:
subjDict = {'Subject1':'right'}

taskList = ['Standing', 'Walking', 'Walking while counting', 'Finger to nose--right hand', 
            'Finger to nose--left hand', 'Alternating right hand movements', 'Alternating left hand movements', 
            'Sit to stand', 'Drawing on a paper', 'Typing on a computer keyboard', 'Assembling nuts and bolts', 
            'Taking a glass of water and drinking', 'Organizing sheets in a folder', 'Folding towels', 'Sitting']

taskScores = ['standing','walking','walking while counting','finger to nose right','finger to nose left',
                   'alternating right','alternating left','sit to stand','drawing on a paper',
                   'typing on a computer keyboard','assembling nuts and bolts','taking a glass of water and drinking',
                   'organizing sheets in a folder','folding towels','sitting']

taskList_Abb = ['Stndg', 'Wlkg', 'WlkgCnt', 'FtnR', 'FtnL', 'RamR', 'RamL', 'SitStand', 'Drwg', 'Typg', 'NtsBts',
                'Drnkg', 'Sheets', 'Fldg', 'Sitng']

sessionList = ['2 Weeks: Time 0', '2 Weeks: Time 30', '2 Weeks: Time 60', '2 Weeks: Time 90', '2 Weeks: Time 120', 
               '2 Weeks: Time 150', '4 Weeks']

In [5]:
for freq in [50,45,40,30,25,20,15,10,7.5,5,3]:
    print(type(freq))
    Data = DataAggregator(subjDict,taskList_Abb,taskScores,sessionList,freq)
    Data.to_csv('Watch Data ' + str(freq) +  'Hz.csv')

<class 'float'>
Subject 1004 (1 of 14)
Subject 1016 (2 of 14)
Subject 1018 (3 of 14)
Subject 1019 (4 of 14)
Subject 1020 (5 of 14)
Subject 1024 (6 of 14)
Subject 1029 (7 of 14)
Subject 1030 (8 of 14)
Subject 1032 (9 of 14)
Subject 1038 (10 of 14)
Subject 1044 (11 of 14)
Subject 1046 (12 of 14)
Subject 1049 (13 of 14)
Subject 1051 (14 of 14)
<class 'int'>
Subject 1004 (1 of 14)
Subject 1016 (2 of 14)
Subject 1018 (3 of 14)
Subject 1019 (4 of 14)
Subject 1020 (5 of 14)
Subject 1024 (6 of 14)
Subject 1029 (7 of 14)
Subject 1030 (8 of 14)
Subject 1032 (9 of 14)
Subject 1038 (10 of 14)
Subject 1044 (11 of 14)
Subject 1046 (12 of 14)
Subject 1049 (13 of 14)
Subject 1051 (14 of 14)
