In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pathlib
import pickle
from scipy.stats import skew, kurtosis, pearsonr
from scipy.signal import butter, welch, filtfilt, resample
import copy

from PreprocessFcns import *

%matplotlib inline

In [2]:
#save corrected version of motor scores.xls 
mot_scores = pd.read_excel('Z:CIS-PD Study\Scores\MotorTasks.xls')
#fix error in original cols labels
mot_scores.columns.values[75] = 'alternating left hand movements tremor right upper limb'
mot_scores.columns.values[150] = 'sitting tremor right upper limb'
#remove words:(Qxx) and 'rating' from each column for readability
cols= mot_scores.columns
cols = cols.tolist()
colsnew = [x.split('(')[0] for x in cols]
colsnew = [x.strip() for x in colsnew] #remove whitespace
colsnew = [x.split('rating')[0] for x in colsnew]
colsnew = [x.strip() for x in colsnew]
colsnew = [x.lower() for x in colsnew] #make all lower case 
colsnew = [x.replace('\x97',' ') for x in colsnew]
colsnew = [x.replace('—',' ') for x in colsnew]    
#simplify notation
for i in range(len(colsnew)):
    x = colsnew[i]
    if x.find('finger to nose')>-1:
        colsnew[i] = x.replace(' hand','')
    if x.find('alternating')>-1:
        colsnew[i] = x.replace(' hand movements','')
c = dict(zip(cols,colsnew))
mot_scores = mot_scores.rename(index=str, columns=c)
#change 1 month label to 4 wks for proper sorting (to incorporate in xls file)
mot_scores.loc[mot_scores['visit']=='1 Month','visit']='4 Weeks'
mot_scores.to_excel('Z:CIS-PD Study\Scores\MotorTasks.xls') #note that timestamps are not properly converted

In [3]:
def DataAggregator(subjDict,taskList_Abb,taskScores,sessionList,freq):
    
    Data = pd.DataFrame()
    
    #dataframe to track lost/missing data
    numSamples = pd.DataFrame()
    
    s = 0
    for subj in list(subjDict.keys()):
        
        s += 1
        print('Subject %d (%d of %d)'%(subj,s,len(subjDict)))
        
        for t in range(len(taskList_Abb)):
            
            task = taskList_Abb[t]
            task_score = taskScores[t]
            
            for trial in range(len(sessionList)):
                
                visit = sessionList[trial]
                
                try:
                    data = pd.read_csv('Z:CIS-PD Study\\MJFF Curation\\TaskAcc\\' + str(subj) + '_' + str(trial) + 
                                      '_' + task + '.csv',parse_dates=['timestamp'])[['timestamp','x','y','z']]
                
                except:
                    #print('No data found for %s trial %d'%(task,trial))
                    continue
                    
                side = subjDict[subj]
                
                subj_score = mot_scores.loc[mot_scores['subject']==subj,['subject','visit',
                                            task_score+ ' ' + 'bradykinesia ' + side + ' upper limb',
                                            task_score+ ' ' + 'tremor ' + side + ' upper limb']]
                subj_score = subj_score.rename(index=str,columns={subj_score.columns[2]:'Bradykinesia',subj_score.columns[3]:'Tremor'})
                subj_score.index = range(len(subj_score))
                    
                data['timestamp'] = (data.timestamp.values - data.timestamp.values[0]).astype('timedelta64[ms]').astype(int)
                data = data.set_index('timestamp')
                
                data = HPfilter(data)
                
                clip_data = gen_clips_mc10(data,downsample=freq,basefreq=50)
                
                feature_extraction(clip_data)
                
                if 'features' in clip_data.keys():
                    D = clip_data['features']
                    featcols = list(D.columns)
                    D['Bradykinesia'] = subj_score['Bradykinesia'][trial]
                    D['Tremor'] = subj_score['Tremor'][trial]
                    D['Visit'] = visit
                    D['Task'] = task
                    D['Subject'] = subj
                    Data = pd.concat([Data,D])
                    
                clip_lens = []    
                
                for c in range(len(clip_data['data'])):
                    clip_lens.append(len(clip_data['data'][c]))
                N = pd.DataFrame(data=np.asarray(clip_lens),columns=['Samples'],dtype='int')
                N['Subject'] = subj
                N['Task'] = task
                N['Visit'] = sessionList[trial]
                numSamples = pd.concat([numSamples,N])
                    
    cols = ['Subject','Visit','Task','Bradykinesia','Tremor'] + featcols
    Data = Data[cols]
    
    numSamples = numSamples[['Subject','Visit','Task','Samples']]
    
    if freq==50:
        numSamples.to_csv('Z:CIS-PD Study\\Watch Sample Lengths.csv')
    
    return Data
    
    #Data.to_csv('Z:CIS-PD Study\\Smartwatch Data 50Hz.csv')

In [4]:
subjDict = {1004:'right',1016:'left',1018:'left',1019:'left',1020:'right',1024:'left',1029:'left',1030:'left',1032:'left',
            1038:'left',1044:'right',1046:'right',1049:'left',1051:'left'}
#subjDict = {1004:'right'}
#1047

taskList = ['Standing', 'Walking', 'Walking while counting', 'Finger to nose--right hand', 
            'Finger to nose--left hand', 'Alternating right hand movements', 'Alternating left hand movements', 
            'Sit to stand', 'Drawing on a paper', 'Typing on a computer keyboard', 'Assembling nuts and bolts', 
            'Taking a glass of water and drinking', 'Organizing sheets in a folder', 'Folding towels', 'Sitting']

taskScores = ['standing','walking','walking while counting','finger to nose right','finger to nose left',
                   'alternating right','alternating left','sit to stand','drawing on a paper',
                   'typing on a computer keyboard','assembling nuts and bolts','taking a glass of water and drinking',
                   'organizing sheets in a folder','folding towels','sitting']

taskList_Abb = ['Stndg', 'Wlkg', 'WlkgCnt', 'FtnR', 'FtnL', 'RamR', 'RamL', 'SitStand', 'Drwg', 'Typg', 'NtsBts',
                'Drnkg', 'Sheets', 'Fldg', 'Sitng']

sessionList = ['2 Weeks: Time 0', '2 Weeks: Time 30', '2 Weeks: Time 60', '2 Weeks: Time 90', '2 Weeks: Time 120', 
               '2 Weeks: Time 150', '4 Weeks']

In [6]:
#for freq in [50,45,40,35,30,25,20,15,10,5]:
for freq in [7.5,3]:
    print(freq)
    Data = DataAggregator(subjDict,taskList_Abb,taskScores,sessionList,freq)
    Data.to_csv('Z:CIS-PD Study\\Downsample\\Watch Data ' + str(freq) +  'Hz.csv')

7.5
Subject 1004 (1 of 14)


Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)


Subject 1016 (2 of 14)
Subject 1018 (3 of 14)
Subject 1019 (4 of 14)
Subject 1020 (5 of 14)
Subject 1024 (6 of 14)
Subject 1029 (7 of 14)
Subject 1030 (8 of 14)
Subject 1032 (9 of 14)
Subject 1038 (10 of 14)
Subject 1044 (11 of 14)
Subject 1046 (12 of 14)
Subject 1049 (13 of 14)
Subject 1051 (14 of 14)
3
Subject 1004 (1 of 14)
Subject 1016 (2 of 14)
Subject 1018 (3 of 14)
Subject 1019 (4 of 14)
Subject 1020 (5 of 14)
Subject 1024 (6 of 14)
Subject 1029 (7 of 14)
Subject 1030 (8 of 14)
Subject 1032 (9 of 14)
Subject 1038 (10 of 14)
Subject 1044 (11 of 14)
Subject 1046 (12 of 14)
Subject 1049 (13 of 14)
Subject 1051 (14 of 14)


In [8]:
freq = 50
print(freq)
Data = DataAggregator(subjDict,taskList_Abb,taskScores,sessionList,freq)
#Data.to_csv('Z:CIS-PD Study\\Watch Data No Resample.csv')

50
Subject 1004 (1 of 14)


Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)


Subject 1016 (2 of 14)
Subject 1018 (3 of 14)
Subject 1019 (4 of 14)
Subject 1020 (5 of 14)
Subject 1024 (6 of 14)
Subject 1029 (7 of 14)
Subject 1030 (8 of 14)
Subject 1032 (9 of 14)
Subject 1038 (10 of 14)
Subject 1044 (11 of 14)
Subject 1046 (12 of 14)
Subject 1049 (13 of 14)
Subject 1051 (14 of 14)


In [7]:
pd.read_csv('Z:CIS-PD Study\\Watch Data Resample 50Hz.csv')

Unnamed: 0.1,Unnamed: 0,Subject,Visit,Task,Bradykinesia,Tremor,RMSX,RMSY,RMSZ,rangeX,...,PSD_std,PSD_skew,PSD_kur,jerk_mean,jerk_std,jerk_skew,jerk_kur,Sen_X,Sen_Y,Sen_Z
0,0,1004,2 Weeks: Time 0,Stndg,,0.0,0.000780,0.001280,0.001405,0.087995,...,3.922479e-05,1.065437,0.747391,-7.471415e-05,0.015488,0.226425,6.096825,0.758698,0.450873,0.344170
1,1,1004,2 Weeks: Time 0,Stndg,,0.0,0.001689,0.002396,0.001976,0.195682,...,6.693931e-05,1.666988,1.399002,1.582323e-04,0.022475,0.231573,4.518329,0.782544,0.942793,0.865658
2,2,1004,2 Weeks: Time 0,Stndg,,0.0,0.002901,0.003484,0.002438,0.445838,...,1.203385e-04,1.745853,2.820366,-3.076687e-05,0.040088,-1.501902,21.121690,0.751976,1.151830,1.131325
3,3,1004,2 Weeks: Time 0,Stndg,,0.0,0.003422,0.004915,0.003581,0.445838,...,6.114038e-04,3.455990,12.404553,-2.764315e-05,0.049812,-0.838492,8.821618,1.276515,1.416580,1.013713
4,4,1004,2 Weeks: Time 0,Stndg,,0.0,0.002381,0.004040,0.002984,0.276044,...,9.746099e-05,3.084093,10.118404,-1.696081e-05,0.034058,-0.128093,5.834667,0.299053,0.241497,0.224483
5,5,1004,2 Weeks: Time 0,Stndg,,0.0,0.000629,0.000817,0.000480,0.070470,...,4.025207e-06,2.051781,4.513603,-1.743248e-05,0.005317,-0.012691,4.288078,1.057950,1.249675,1.269945
6,6,1004,2 Weeks: Time 0,Stndg,,0.0,0.000636,0.000981,0.000634,0.070470,...,1.405297e-05,2.409392,6.363299,2.782102e-05,0.006500,0.027952,1.821336,1.263383,1.261274,1.234744
7,7,1004,2 Weeks: Time 0,Stndg,,0.0,0.000427,0.000767,0.000589,0.041631,...,1.149310e-05,2.546654,6.897709,-3.253388e-05,0.006468,-0.015431,0.914990,1.708321,1.506817,1.548179
8,8,1004,2 Weeks: Time 0,Stndg,,0.0,0.000312,0.000435,0.000352,0.033990,...,2.912761e-06,1.505773,2.288394,-1.981531e-05,0.004498,0.161719,0.440121,1.749548,1.863968,1.779220
9,9,1004,2 Weeks: Time 0,Stndg,,0.0,0.000221,0.000372,0.000230,0.026072,...,1.280592e-06,1.429812,1.524792,-2.336335e-05,0.003492,0.135922,0.545586,1.840172,1.869602,1.960095


In [14]:
pd.read_csv('Z:CIS-PD Study\\Watch Data No Resample.csv')

Unnamed: 0.1,Unnamed: 0,Subject,Visit,Task,Bradykinesia,Tremor,RMSX,RMSY,RMSZ,rangeX,...,PSD_std,PSD_skew,PSD_kur,jerk_mean,jerk_std,jerk_skew,jerk_kur,Sen_X,Sen_Y,Sen_Z
0,0,1004,2 Weeks: Time 0,Stndg,,0.0,0.000780,0.001280,0.001405,0.087995,...,3.920003e-05,1.065437,0.747391,-7.471415e-05,0.015488,0.226425,6.096825,0.758698,0.450873,0.344170
1,1,1004,2 Weeks: Time 0,Stndg,,0.0,0.001689,0.002396,0.001976,0.195682,...,6.721661e-05,1.641338,1.303822,1.582323e-04,0.022475,0.231573,4.518329,0.782544,0.942793,0.865658
2,2,1004,2 Weeks: Time 0,Stndg,,0.0,0.002901,0.003484,0.002438,0.445838,...,1.214838e-04,1.727136,2.702820,-3.076687e-05,0.040088,-1.501902,21.121690,0.751976,1.151830,1.131325
3,3,1004,2 Weeks: Time 0,Stndg,,0.0,0.003422,0.004915,0.003581,0.445838,...,6.163699e-04,3.415011,12.084589,-2.764315e-05,0.049812,-0.838492,8.821618,1.276515,1.416580,1.013713
4,4,1004,2 Weeks: Time 0,Stndg,,0.0,0.002381,0.004040,0.002984,0.276044,...,9.816264e-05,3.048408,9.862165,-1.696081e-05,0.034058,-0.128093,5.834667,0.299053,0.241497,0.224483
5,5,1004,2 Weeks: Time 0,Stndg,,0.0,0.000629,0.000817,0.000480,0.070470,...,4.053584e-06,2.019448,4.357605,-1.743248e-05,0.005317,-0.012691,4.288078,1.057950,1.249675,1.269945
6,6,1004,2 Weeks: Time 0,Stndg,,0.0,0.000636,0.000981,0.000634,0.070470,...,1.407210e-05,2.409392,6.363299,2.782102e-05,0.006500,0.027952,1.821336,1.263383,1.261274,1.234744
7,7,1004,2 Weeks: Time 0,Stndg,,0.0,0.000427,0.000767,0.000589,0.041631,...,1.150898e-05,2.546654,6.897709,-3.253388e-05,0.006468,-0.015431,0.914990,1.708321,1.506817,1.548179
8,8,1004,2 Weeks: Time 0,Stndg,,0.0,0.000312,0.000435,0.000352,0.033990,...,2.914189e-06,1.491873,2.244010,-1.981531e-05,0.004498,0.161719,0.440121,1.749548,1.863968,1.779220
9,9,1004,2 Weeks: Time 0,Stndg,,0.0,0.000221,0.000372,0.000230,0.026072,...,1.283591e-06,1.407830,1.458386,-2.336335e-05,0.003492,0.135922,0.545586,1.840172,1.869602,1.960095


In [9]:
pd.read_csv('Z:CIS-PD Study\\Watch Sample Lengths.csv',index_col=0)

Unnamed: 0,Subject,Visit,Task,Samples
0,1004,2 Weeks: Time 0,Stndg,251
1,1004,2 Weeks: Time 0,Stndg,250
2,1004,2 Weeks: Time 0,Stndg,250
3,1004,2 Weeks: Time 0,Stndg,250
4,1004,2 Weeks: Time 0,Stndg,250
5,1004,2 Weeks: Time 0,Stndg,250
6,1004,2 Weeks: Time 0,Stndg,249
7,1004,2 Weeks: Time 0,Stndg,249
8,1004,2 Weeks: Time 0,Stndg,250
9,1004,2 Weeks: Time 0,Stndg,250
