In [2]:
import datetime
from dateutil.parser import parse
import itertools
from itertools import product
import math
import matplotlib.pyplot as plt
from multiprocessing.dummy import Pool as ThreadPool
import nolds
import numpy as np
import os
import pandas as pd
import pathlib
from PreprocessFcns import *
import pywt
import random
import scipy
from scipy.fftpack import fft
from scipy.signal import butter, welch, filtfilt, resample
from scipy.stats import skew, kurtosis, entropy, pearsonr
import seaborn as sns
import sklearn
from sklearn import multiclass
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import LeaveOneGroupOut
import time
%matplotlib inline

In [3]:
# set path to folder containing clinic watch data
path = r'//FS2.smpp.local\\RTO\\CIS-PD Study\MJFF Curation\Finalized Dataset'
# set path to destination folder
dest = r'//FS2.smpp.local\\RTO\\CIS-PD Study\Watch Features Data'
#---------------------------------------------------------------------------------------------------------

In [4]:
VisitNumber = {
    '2 Weeks: Time 0'   : 0,
    '2 Weeks: Time 30'  : 1,
    '2 Weeks: Time 60'  : 2,
    '2 Weeks: Time 90'  : 3,
    '2 Weeks: Time 120' : 4,
    '2 Weeks: Time 150' : 5,
    '1 Month'           : 6
}

ClinicTasks = {
    'Stndg'    : 'Standing',
    'Wlkg'     : 'Walking',
    'WlkgCnt'  : 'Walking while counting',
    'FtnR'     : 'Finger to nose--right hand',
    'FtnL'     : 'Finger to nose--left hand',
    'RamR'     : 'Alternating right hand movements',
    'RamL'     : 'Alternating left hand movements',
    'SitStand' : 'Sit to stand',
    'Drwg'     : 'Drawing on a paper',
    'Typg'     : 'Typing on a computer keyboard',
    'NtsBts'   : 'Assembling nuts and bolts',
    'Drnkg'    : 'Taking a glass of water and drinking',
    'Sheets'   : 'Organizing sheets in a folder',
    'Fldg'     : 'Folding towels',
    'Sitng'    : 'Sitting'
}

In [None]:
## Correlate Task Symp Scors to Mean FFT Power in Freq Ranges
s_time = time.time()

ClipData = pd.read_csv(os.path.join(dest, 'Clinic Data', 'ClinicDataFeaturesX.csv'))[['SubjID', 'Visit', 'TaskAbb', 'Clip', 
                                                                                      'Tremor', 'Bradykinesia', 'Dyskinesia', 
                                                                                      'Overall']]

ClinicFreqData_mean = pd.DataFrame()
ClipFreqData = pd.DataFrame()
for clip in ClipData.iterrows():
    clipacc = pd.read_csv(os.path.join(dest, 'Clinic Data', 'Feature Clips', 
                                       str(int(clip[1]['SubjID'])) + '_' + str(clip[1]['Visit']) + '_' + 
                                       clip[1]['TaskAbb'] + '_' + str(clip[1]['Clip']) + '.csv'), 
                          parse_dates = ['Timestamp'])
    
    clipacc_mag = np.sqrt((clipacc.X)**2 + (clipacc.Y)**2 + (clipacc.Z)**2)
    L = len(clipacc_mag)
    Fs = L / (((pd.to_datetime(clipacc.Timestamp).values.astype(np.int64) // 10**6)[-1] - 
                              (pd.to_datetime(clipacc.Timestamp).values.astype(np.int64) // 10**6)[0]) / 1000)
    P1 = np.abs((fft(clipacc_mag))/L)[:int(L/2 + 1)]
    f = Fs * range(int(L / 2) + 1) / L
    clipFFT = pd.DataFrame()
    clipFFT['f'] = f
    clipFFT['P1'] = P1
    
    ClipFreqData['SubjID'] = [int(clip[1]['SubjID'])]
    ClipFreqData['Visit'] = [clip[1]['Visit']]
    ClipFreqData['TaskAbb'] = [clip[1]['TaskAbb']]
    ClipFreqData['Clip'] = [clip[1]['Clip']]
    ClipFreqData['Tremor'] = [clip[1]['Tremor']]
    ClipFreqData['Bradykinesia'] = [clip[1]['Bradykinesia']]
    ClipFreqData['Dyskinesia'] = [clip[1]['Dyskinesia']]
    ClipFreqData['Overall'] = [clip[1]['Overall']]
    ClipFreqData['0-2'] = [np.mean(clipFFT.P1[(clipFFT.f >= 0) & (clipFFT.f <= 2)])]
    ClipFreqData['2-4'] = [np.mean(clipFFT.P1[(clipFFT.f >= 2) & (clipFFT.f <= 4)])]
    ClipFreqData['4-6'] = [np.mean(clipFFT.P1[(clipFFT.f >= 4) & (clipFFT.f <= 6)])]
    ClipFreqData['4-8'] = [np.mean(clipFFT.P1[(clipFFT.f >= 4) & (clipFFT.f <= 8)])] # thought to be optimal
    ClipFreqData['6-8'] = [np.mean(clipFFT.P1[(clipFFT.f >= 6) & (clipFFT.f <= 8)])]
    ClipFreqData['8-10'] = [np.mean(clipFFT.P1[(clipFFT.f >= 8) & (clipFFT.f <= 10)])]
    ClipFreqData['10-12'] = [np.mean(clipFFT.P1[(clipFFT.f >= 10) & (clipFFT.f <= 12)])]
    ClipFreqData['12-14'] = [np.mean(clipFFT.P1[(clipFFT.f >= 12) & (clipFFT.f <= 14)])]
    ClipFreqData['14-16'] = [np.mean(clipFFT.P1[(clipFFT.f >= 14) & (clipFFT.f <= 16)])]
    ClipFreqData['16-18'] = [np.mean(clipFFT.P1[(clipFFT.f >= 16) & (clipFFT.f <= 18)])]
    ClipFreqData['18-20'] = [np.mean(clipFFT.P1[(clipFFT.f >= 18) & (clipFFT.f <= 20)])]
    
    if ClinicFreqData_mean.empty:
        ClinicFreqData_mean = ClipFreqData
        continue
    ClinicFreqData_mean = pd.concat([ClinicFreqData_mean, ClipFreqData], ignore_index = True)

print(str(int(((time.time() - s_time) / 60) / 60)) + ' hours ' + 
      str(int(((time.time() - s_time) / 60) % 60)) + ' minutes ' + 
      str(int((time.time() - s_time) % 60)) + ' seconds')
    
ClinicFreqData_mean.head()

In [None]:
## Correlate Task Symp Scors to MAX FFT Power in Freq Ranges
s_time = time.time()

ClipData = pd.read_csv(os.path.join(dest, 'Clinic Data', 'ClinicDataFeaturesX.csv'))[['SubjID', 'Visit', 'TaskAbb', 'Clip', 
                                                                                      'Tremor', 'Bradykinesia', 'Dyskinesia', 
                                                                                      'Overall']]

ClinicFreqData_max = pd.DataFrame()
ClipFreqData = pd.DataFrame()
for clip in ClipData.iterrows():
    clipacc = pd.read_csv(os.path.join(dest, 'Clinic Data', 'Feature Clips', 
                                       str(int(clip[1]['SubjID'])) + '_' + str(clip[1]['Visit']) + '_' + 
                                       clip[1]['TaskAbb'] + '_' + str(clip[1]['Clip']) + '.csv'), 
                          parse_dates = ['Timestamp'])
    
    clipacc_mag = np.sqrt((clipacc.X)**2 + (clipacc.Y)**2 + (clipacc.Z)**2)
    L = len(clipacc_mag)
    Fs = L / (((pd.to_datetime(clipacc.Timestamp).values.astype(np.int64) // 10**6)[-1] - 
                              (pd.to_datetime(clipacc.Timestamp).values.astype(np.int64) // 10**6)[0]) / 1000)
    P1 = np.abs((fft(clipacc_mag))/L)[:int(L/2 + 1)]
    f = Fs * range(int(L / 2) + 1) / L
    clipFFT = pd.DataFrame()
    clipFFT['f'] = f
    clipFFT['P1'] = P1
    
    ClipFreqData['SubjID'] = [int(clip[1]['SubjID'])]
    ClipFreqData['Visit'] = [clip[1]['Visit']]
    ClipFreqData['TaskAbb'] = [clip[1]['TaskAbb']]
    ClipFreqData['Clip'] = [clip[1]['Clip']]
    ClipFreqData['Tremor'] = [clip[1]['Tremor']]
    ClipFreqData['Bradykinesia'] = [clip[1]['Bradykinesia']]
    ClipFreqData['Dyskinesia'] = [clip[1]['Dyskinesia']]
    ClipFreqData['Overall'] = [clip[1]['Overall']]
    ClipFreqData['0-2'] = [np.max(clipFFT.P1[(clipFFT.f >= 0) & (clipFFT.f <= 2)])]
    ClipFreqData['2-4'] = [np.max(clipFFT.P1[(clipFFT.f >= 2) & (clipFFT.f <= 4)])]
    ClipFreqData['4-6'] = [np.max(clipFFT.P1[(clipFFT.f >= 4) & (clipFFT.f <= 6)])]
    ClipFreqData['4-8'] = [np.max(clipFFT.P1[(clipFFT.f >= 4) & (clipFFT.f <= 8)])] # thought to be optimal
    ClipFreqData['6-8'] = [np.max(clipFFT.P1[(clipFFT.f >= 6) & (clipFFT.f <= 8)])]
    ClipFreqData['8-10'] = [np.max(clipFFT.P1[(clipFFT.f >= 8) & (clipFFT.f <= 10)])]
    ClipFreqData['10-12'] = [np.max(clipFFT.P1[(clipFFT.f >= 10) & (clipFFT.f <= 12)])]
    ClipFreqData['12-14'] = [np.max(clipFFT.P1[(clipFFT.f >= 12) & (clipFFT.f <= 14)])]
    ClipFreqData['14-16'] = [np.max(clipFFT.P1[(clipFFT.f >= 14) & (clipFFT.f <= 16)])]
    ClipFreqData['16-18'] = [np.max(clipFFT.P1[(clipFFT.f >= 16) & (clipFFT.f <= 18)])]
    ClipFreqData['18-20'] = [np.max(clipFFT.P1[(clipFFT.f >= 18) & (clipFFT.f <= 20)])]
    
    if ClinicFreqData_max.empty:
        ClinicFreqData_max = ClipFreqData
        continue
    ClinicFreqData_max = pd.concat([ClinicFreqData_max, ClipFreqData], ignore_index = True)

print(str(int(((time.time() - s_time) / 60) / 60)) + ' hours ' + 
      str(int(((time.time() - s_time) / 60) % 60)) + ' minutes ' + 
      str(int((time.time() - s_time) % 60)) + ' seconds')
    
ClinicFreqData_max.head()

In [11]:
def scoreTremFFT(FFTmetadata, task, freq):
    # FFTmetadata: clip-specific data of FFT powers (generated above) in freq ranges
    # task: Sitng or Stndg
    # freq: 0-2, 2-4, 4-6, 4-8, 6-8, 8-10, 10-12, 12-14, 14-16, 16-18, 18-20; 4-8 thought to be optimal

    # isolate FFT data corresponding to task of interest
    CombTaskFreq = FFTmetadata[(FFTmetadata.TaskAbb == task)]

    # generate and plot ROC using power in provided freq range as 'prediction'
    fpr, tpr, thresholds = sklearn.metrics.roc_curve(CombTaskFreq.Tremor.apply(lambda x: 1 if x > 0 else x).values, 
                                                     CombTaskFreq[freq].values) # must turn scores to bool for ROC
    plt.plot(fpr, tpr)
    plt.xlabel('False Positive Rate', fontsize = 12)
    plt.ylabel('True Positive Rate', fontsize = 12)
    plt.title('ROC Curve Power ' + freq + ' Hz', fontsize = 15)
    plt.show()

    # use the 'ideal' point of ROC to determine threshold power value in range for tremor determination
    ROCTremFFT = pd.DataFrame()
    ROCTremFFT['fpr'] = fpr
    ROCTremFFT['tpr'] = tpr
    ROCTremFFT['thresholds'] = thresholds
    ROCTremFFT['dist01'] = np.sqrt((1-ROCTremFFT.tpr)**2 + ROCTremFFT.fpr**2)
    TremPowerThreshold = ROCTremFFT.thresholds[ROCTremFFT.dist01 == min(ROCTremFFT.dist01)].values
    CombTaskFreq['TremorFreq'] = CombTaskFreq[freq].apply(lambda x: 1 if x > float(TremPowerThreshold) else 0)
    CombTaskFreq = CombTaskFreq[['SubjID', 'Visit', 'TaskAbb', 'Clip', 
                                   'Tremor', 'TremorFreq', freq]]
    
    # get and consolidate the condensed (by task - not clips) clinic metadata for correlation
    clinicpath = r'//FS2.smpp.local\\RTO\\CIS-PD Study\MJFF Curation\Finalized Dataset'
    clinicData = pd.read_csv(os.path.join(clinicpath, 'Metadata Tables', 'Table4.csv'))
    clinicTask = clinicData[(clinicData.TaskAbb == task)]
    TaskClinicData = pd.DataFrame()
    TaskData = pd.DataFrame()
    for task in clinicTask.iterrows():
        file = str(int(task[1]['SubjID'])) + '_' + str(VisitNumber[task[1]['Visit']]) + '_' + task[1]['TaskAbb']
        TaskData['SubjID'] = [int(task[1]['SubjID'])]
        TaskData['Visit'] = [VisitNumber[task[1]['Visit']]]
        TaskData['TaskAbb'] = [task[1]['TaskAbb']]
        if not type(task[1]['Side']) == str:
            continue
        TaskData['Tremor'] = [task[1]['Tremor - ' + task[1]['Side']]]
        if TaskClinicData.empty:
            TaskClinicData = TaskData
            continue
        TaskClinicData = pd.concat([TaskClinicData, TaskData], ignore_index = True)
        
    # score each task in Table 4 by the ratio of clips scored for tremor with total clips for that task
    ThresholdPred = pd.DataFrame()
    TaskPred = pd.DataFrame()
    for score in TaskClinicData.iterrows():
        totClips = len(CombTaskFreq[(CombTaskFreq.SubjID == score[1]['SubjID']) & 
                                     (CombTaskFreq.Visit == score[1]['Visit'])])
        if totClips == 0: # some of the metadata from Table 4 does not have acc clips associated
            continue
        TremFreqClips = len(CombTaskFreq[(CombTaskFreq.SubjID == score[1]['SubjID']) & 
                                         (CombTaskFreq.Visit == score[1]['Visit']) & 
                                         (CombTaskFreq.TremorFreq == 1)])
        TaskPred['SubjID'] = [score[1]['SubjID']]
        TaskPred['Visit'] = [score[1]['Visit']]
        TaskPred['TaskAbb'] = [score[1]['TaskAbb']]
        TaskPred['Tremor'] = [int(score[1]['Tremor'])]
        TaskPred['FFTscoreRatio'] = [TremFreqClips / totClips]
        if ThresholdPred.empty:
            ThresholdPred = TaskPred
            continue
        ThresholdPred = pd.concat([ThresholdPred, TaskPred], ignore_index = True)

    # generate and plot ROC using ratio of power-predicted tremor clips to total clips as 'prediction'
    fpr, tpr, thresholds = sklearn.metrics.roc_curve(ThresholdPred.Tremor.apply(lambda x: 1 if x > 0 else x).values,
                                                     ThresholdPred.FFTscoreRatio.values)
    plt.plot(fpr, tpr)
    plt.xlabel('False Positive Rate', fontsize = 12)
    plt.ylabel('True Positive Rate', fontsize = 12)
    plt.title('ROC Curve Threshold Scores', fontsize = 15)
    plt.show()
    
    # CombTaskFreq includes bool prediction for presence/absence of tremor by clip
    # ThresholdPred contains ratio of tremor pred clips to total clips for each task with attached acc data
    return CombTaskFreq, ThresholdPred

In [None]:
StndgClipFreq, StndgTaskPred = scoreTremFFT(ClinicFreqData_max, 'Stndg', '4-8')

In [291]:
StndgTremFreq[StndgTremFreq.Tremor != 
              StndgTremFreq['4-8'].apply(lambda x: 1 if x > float(StndgTremThreshold) else 0)].groupby(['SubjID', 
                                                                                                        'Visit', 
                                                                                                        'TaskAbb']).size()

SubjID  Visit  TaskAbb
1003    0      Stndg       6
        1      Stndg       4
        2      Stndg      10
        4      Stndg       8
        5      Stndg       8
1004    0      Stndg       5
        3      Stndg       9
        4      Stndg       4
        6      Stndg       7
1005    2      Stndg       2
1007    0      Stndg       3
        1      Stndg       2
        2      Stndg       1
1009    1      Stndg       6
        5      Stndg       1
        6      Stndg      11
1016    2      Stndg       7
        3      Stndg       3
        4      Stndg       2
        5      Stndg       4
        6      Stndg       4
1018    5      Stndg       2
1019    0      Stndg       1
1020    6      Stndg       9
1023    1      Stndg       2
        4      Stndg       1
        6      Stndg       1
1024    0      Stndg       2
        1      Stndg       5
        3      Stndg       9
                          ..
1029    6      Stndg       2
1032    1      Stndg       1
1038    0      Stndg