In [5]:
import os
import numpy as np
import pandas as pd
import pywt
import pathlib
import pickle
from itertools import product
from scipy.stats import skew, kurtosis, entropy
from scipy.signal import butter, welch, filtfilt, resample
import math
import nolds
import matplotlib.pyplot as plt
import time
import datetime
from multiprocessing.dummy import Pool as ThreadPool
from PreprocessFcns import *
%matplotlib inline

In [6]:
# set path to folder containing clinic watch data
clinicpath = r'//FS2.smpp.local\\RTO\\CIS-PD Study\MJFF Curation\Finalized Dataset'
# set path to folder containing home watch data
homepath = r'//FS2.smpp.local\\RTO\\CIS-PD Study\Patient Record Correlation'
# set path to destination folder
dest = r'//FS2.smpp.local\\RTO\\CIS-PD Study\Watch Features Data'
#---------------------------------------------------------------------------------------------------------

In [7]:
# Medication Reports
table_med = 'Table10.csv'
dest_ext_med = 'Medication Reports'
file_name_med = 'med_timepoints.csv'

# Symptom Reports
table_symt = 'Table11.csv'
dest_ext_symt = 'Symptom Reports'
file_name_symt = 'symt_timepoints.csv'

# Diaries
table_diar = 'Table12.csv'
dest_ext_diar = 'Diaries'
file_name_diar = 'diar_timepoints.csv'

# Load Consolidated DataFrames
timepoints_med = pd.read_csv(os.path.join(homepath, dest_ext_med, file_name_med), parse_dates = [1])
timepoints_symt = pd.read_csv(os.path.join(homepath, dest_ext_symt, file_name_symt), parse_dates = [1])
timepoints_diar = pd.read_csv(os.path.join(homepath, dest_ext_diar, file_name_diar), parse_dates = [1])

In [8]:
# generate task abbreviation dictionary
ClinicTasks = {
    'Stndg'    : 'Standing',
    'Wlkg'     : 'Walking',
    'WlkgCnt'  : 'Walking while counting',
    'FtnR'     : 'Finger to nose--right hand',
    'FtnL'     : 'Finger to nose--left hand',
    'RamR'     : 'Alternating right hand movements',
    'RamL'     : 'Alternating left hand movements',
    'SitStand' : 'Sit to stand',
    'Drwg'     : 'Drawing on a paper',
    'Typg'     : 'Typing on a computer keyboard',
    'NtsBts'   : 'Assembling nuts and bolts',
    'Drnkg'    : 'Taking a glass of water and drinking',
    'Sheets'   : 'Organizing sheets in a folder',
    'Fldg'     : 'Folding towels',
    'Sitng'    : 'Sitting'
}

In [9]:
# generate visit number dictionary
VisitNumber = {
    '2 Weeks: Time 0'   : 0,
    '2 Weeks: Time 30'  : 1,
    '2 Weeks: Time 60'  : 2,
    '2 Weeks: Time 90'  : 3,
    '2 Weeks: Time 120' : 4,
    '2 Weeks: Time 150' : 5,
    '1 Month'           : 6
}

In [20]:
def getTimestampsPaths(timepoints_med, timepoints_symt, timepoints_diar):
    
    # Initialize Empty Lists for Each Necessary Piece of Information from Each Record
    StartTimestamps = []
    EndTimestamps = []
    SaveFilePaths = []
    SubjID = []

    # Symptom Reports
    Subj = list(timepoints_symt.SubjID)
    StartTimes = timepoints_symt.apply(lambda row: row.Timestamp + pd.Timedelta(unit = 'minute', value = -30), axis = 1)
    EndTimes = timepoints_symt.apply(lambda row: row.Timestamp, axis = 1)
    SavePaths = timepoints_symt.apply(lambda row: os.path.join(homepath, dest_ext_symt, str(row.SubjID), 
                                                                                        str(row.Timestamp)[:7], 
                                                                                        str(row.Timestamp)[8:10],
                                                                                        str(row.Timestamp)[11:13] +
                                                                                        str(row.Timestamp)[14:16] +
                                                                                        str(row.Timestamp)[17:] + '.csv'), 
                                      axis = 1)
    SubjID = SubjID + Subj
    StartTimestamps = StartTimestamps + list(StartTimes)
    EndTimestamps = EndTimestamps + list(EndTimes)
    SaveFilePaths = SaveFilePaths + list(SavePaths)
    
    # Diaries
    Subj = list(timepoints_diar.SubjID)
    StartTimes = timepoints_diar.apply(lambda row: row.Timestamp + pd.Timedelta(unit = 'minute', value = -30), axis = 1)
    EndTimes = timepoints_diar.apply(lambda row: row.Timestamp, axis = 1)
    SavePaths = timepoints_diar.apply(lambda row: os.path.join(homepath, dest_ext_diar, str(row.SubjID), 
                                                                                        str(row.Timestamp)[:7], 
                                                                                        str(row.Timestamp)[8:10],
                                                                                        str(row.Timestamp)[11:13] +
                                                                                        str(row.Timestamp)[14:16] +
                                                                                        str(row.Timestamp)[17:] + '.csv'), 
                                      axis = 1)
    SubjID = SubjID + Subj
    StartTimestamps = StartTimestamps + list(StartTimes)
    EndTimestamps = EndTimestamps + list(EndTimes)
    SaveFilePaths = SaveFilePaths + list(SavePaths)
    

    # Medication Reports
    Subj = list(timepoints_med.SubjID)
    StartTimes = timepoints_med.apply(lambda row: row.Timestamp + pd.Timedelta(unit = 'minute', value = -30), axis = 1)
    EndTimes = timepoints_med.apply(lambda row: row.Timestamp + pd.Timedelta(unit = 'minute', value = 30), axis = 1)
    SavePaths = timepoints_med.apply(lambda row: os.path.join(homepath, dest_ext_med, str(row.SubjID), 
                                                                                      str(row.Timestamp)[:7], 
                                                                                      str(row.Timestamp)[8:10],
                                                                                      str(row.Timestamp)[11:13] +
                                                                                      str(row.Timestamp)[14:16] +
                                                                                      str(row.Timestamp)[17:] + '.csv'), 
                                     axis = 1)
    SubjID = SubjID + Subj
    StartTimestamps = StartTimestamps + list(StartTimes)
    EndTimestamps = EndTimestamps + list(EndTimes)
    SaveFilePaths = SaveFilePaths + list(SavePaths)
    
    Records = pd.DataFrame(columns = ['SubjID', 'StartTimestamps', 'EndTimestamps', 'SaveFilePaths'])
    
    Records['SubjID'] = SubjID
    Records['StartTimestamps'] = StartTimestamps
    Records['EndTimestamps'] = EndTimestamps
    Records['SaveFilePaths'] = SaveFilePaths
    
    return Records

In [171]:
# get all home data features - less efficient version
# uses reduced list of features
def DataAggregator(file, data_type):
    '''generates feature metrics for 5-second intervals of apple watch data
       data_type: \'clinic\' or \'home\'
       file: clinic data in the form (SubjID)_(VisitNum)_(TaskAbb); home data as full record file path'''
    
    TaskFeatures = pd.DataFrame()
    
    if data_type == 'clinic':
        subject = file[:4]
        visit = file[5:6]
        task = file[7:]
    if data_type == 'home':
        designation = file[63:-4]
    print(file)
    
    features_list = ['RMSX','RMSY','RMSZ','rangeX','rangeY','rangeZ','meanX','meanY','meanZ','varX','varY','varZ',
                    'skewX','skewY','skewZ','kurtX','kurtY','kurtZ','xcor_peakXY','xcorr_peakXZ','xcorr_peakYZ',
                    'xcorr_lagXY','xcorr_lagXZ','xcorr_lagYZ','Dom_freq','Pdom_rel','PSD_mean','PSD_std','PSD_skew',
                    'PSD_kur','jerk_mean','jerk_std','jerk_skew','jerk_kur']
        
    # get acc data
    try:
        if data_type == 'clinic':
            data = (pd.read_csv(os.path.join(clinicpath, 'Table8', 'TaskAcc', file + '.csv'), parse_dates = ['timestamp'])
                    [['timestamp', 'x', 'y', 'z']])
            data.columns = ['Timestamp', 'X', 'Y', 'Z']
        if data_type == 'home':
            data = pd.read_csv(file, parse_dates = ['Timestamp'])[['Timestamp', 'X', 'Y', 'Z']]
    except(FileNotFoundError):
        if data_type == 'clinic':
            print('No data found for subject %s %s visit %s' % (subject, task, visit))
        if data_type == 'home':
            print('No data found for %s' % (designation))
        return TaskFeatures
        
    # organize data and make 5 second clips
    data = data.sort_values(by = 'Timestamp', axis = 0)
    data['Timestamp2'] = [(tm - datetime.timedelta(minutes = 0,
                                                   seconds = tm.second % 5,
                                                   microseconds = tm.microsecond)) 
                          for tm in data.Timestamp]
    
    data['Timestamp'] = (data.Timestamp.values - data.Timestamp.values[0]).astype('timedelta64[ms]').astype(int)
    data = data.set_index('Timestamp')
    data.loc[:, ['X', 'Y', 'Z']] = filterdata(data[['X', 'Y', 'Z']])
                
    # "clip" the data into 5 second chunks    
    five_sec_intervals = data.Timestamp2.unique()
        
    # calculate features
    F = []
    num_empty = 0
    times = []
    for t in five_sec_intervals:
        clip = data.loc[(data.Timestamp2 == t)]
        # length of 5 second chunk should be 250 for 5 seconds
        if (clip.empty or (len(clip.Timestamp2) < 200)):
            num_empty += 1
        else:
            F.append(reduced_feature_extraction_from_1_clip(clip[['X', 'Y', 'Z']]))
            times.append(t)

    success_info = [file, len(five_sec_intervals), (len(five_sec_intervals) - num_empty)]
    df = pd.DataFrame(data = [success_info], columns = ['File', 'Expected Clips', 'Actual Clips'])
    if data_type == 'clinic':
        dest_x = 'Clinic Data'
    if data_type == 'home':
        dest_x = 'Home Data'
    if os.path.isfile(os.path.join(dest, dest_x, 'Success Info.csv')):
        dfo = pd.read_csv(os.path.join(dest, dest_x, 'Success Info.csv'))
        df = pd.concat([dfo, df], ignore_index = False)
    df.to_csv(os.path.join(dest, dest_x, 'Success Info.csv'), index = False)
            
    # create features dataframe
    TaskFeatures = pd.DataFrame(data = F, columns = features_list, dtype = 'float32')    
    
    return TaskFeatures

In [120]:
### GENERATE FEATURES FROM CLINIC DATA USING REDUCED FEATURES

s_time = time.time()

clinicData = pd.read_csv(os.path.join(clinicpath, 'Table4.csv'))

for task in clinicData.iterrows():
    file = str(int(task[1]['SubjID'])) + '_' + str(VisitNumber[task[1]['Visit']]) + '_' + task[1]['TaskAbb']
    TaskFeatures = DataAggregator(file, 'clinic')
    if TaskFeatures.empty:
        continue
    featcols = list(TaskFeatures.columns)
    
    TaskFeatures['SubjID'] = task[1]['SubjID']
    TaskFeatures['Visit'] = VisitNumber[task[1]['Visit']]
    TaskFeatures['TaskAbb'] = task[1]['TaskAbb']
    TaskFeatures['Tremor'] = task[1]['Tremor - ' + task[1]['Side']]
    TaskFeatures['Bradykinesia'] = task[1]['Bradykinesia - ' + task[1]['Side']]
    TaskFeatures['Dyskinesia'] = task[1]['Dyskinesia - ' + task[1]['Side']]
    TaskFeatures['Overall'] = task[1]['Overall']
    
    cols = ['SubjID', 'Visit', 'TaskAbb', 'Tremor', 'Bradykinesia', 'Dyskinesia', 'Overall'] + featcols
    TaskFeatures = TaskFeatures[cols]
    
    if DataFeatures.empty:
        DataFeatures = TaskFeatures
        continue
    DataFeatures = pd.concat([DataFeatures, TaskFeatures], ignore_index = True)

DataFeatures.to_csv(os.path.join(dest, 'Clinic Data', 'ClinicDataFeatures.csv'), index = False)

print(str(int(((time.time() - s_time) / 60) / 60)) + ' hours ' + 
      str(int(((time.time() - s_time) / 60) % 60)) + ' minutes ' + 
      str(int((time.time() - s_time) % 60)) + ' seconds')

1003_0_Drnkg
1003_0_Drwg
1003_0_Fldg
1003_0_FtnL
1003_0_FtnR
1003_0_NtsBts
1003_0_RamL
1003_0_RamR
1003_0_Sheets
1003_0_Sitng
1003_0_SitStand
1003_0_Stndg
1003_0_Typg
1003_0_Wlkg
1003_0_WlkgCnt
1003_1_Drnkg
1003_1_Drwg
1003_1_Fldg
1003_1_FtnL
1003_1_FtnR
1003_1_NtsBts
1003_1_RamL
1003_1_RamR
1003_1_Sheets
1003_1_Sitng
1003_1_SitStand
1003_1_Stndg
1003_1_Typg
1003_1_Wlkg
1003_1_WlkgCnt
1003_2_Drnkg
1003_2_Drwg
1003_2_Fldg
1003_2_FtnL
1003_2_FtnR
1003_2_NtsBts
1003_2_RamL
1003_2_RamR
1003_2_Sheets
1003_2_Sitng
1003_2_SitStand
1003_2_Stndg
1003_2_Typg
1003_2_Wlkg
1003_2_WlkgCnt
1003_3_Drnkg
1003_3_Drwg
1003_3_Fldg
1003_3_FtnL
1003_3_FtnR
1003_3_NtsBts
1003_3_RamL
1003_3_RamR
1003_3_Sheets
1003_3_Sitng
1003_3_SitStand
1003_3_Stndg
1003_3_Typg
1003_3_Wlkg
1003_3_WlkgCnt
1003_4_Drnkg
1003_4_Drwg
1003_4_Fldg
1003_4_FtnL
1003_4_FtnR
1003_4_NtsBts
1003_4_RamL
1003_4_RamR
1003_4_Sheets
1003_4_Sitng
1003_4_SitStand
1003_4_Stndg
1003_4_Typg
1003_4_Wlkg
1003_4_WlkgCnt
1003_5_Drnkg
1003_5_Drwg
1003_

1007_2_Fldg
1007_2_FtnL
1007_2_FtnR
1007_2_NtsBts
1007_2_RamL
1007_2_RamR
1007_2_Sheets
1007_2_Sitng
1007_2_SitStand
1007_2_Stndg
1007_2_Typg
1007_2_Wlkg
1007_2_WlkgCnt
1007_3_Drnkg
1007_3_Drwg
1007_3_Fldg
1007_3_FtnL
1007_3_FtnR
1007_3_NtsBts
1007_3_RamL
1007_3_RamR
1007_3_Sheets
1007_3_Sitng
1007_3_SitStand
1007_3_Stndg
1007_3_Typg
1007_3_Wlkg
1007_3_WlkgCnt
1007_4_Drnkg
No data found for subject 1007 Drnkg visit 4
1007_4_Drwg
No data found for subject 1007 Drwg visit 4
1007_4_Fldg
No data found for subject 1007 Fldg visit 4
1007_4_FtnL
No data found for subject 1007 FtnL visit 4
1007_4_FtnR
No data found for subject 1007 FtnR visit 4
1007_4_NtsBts
No data found for subject 1007 NtsBts visit 4
1007_4_RamL
No data found for subject 1007 RamL visit 4
1007_4_RamR
No data found for subject 1007 RamR visit 4
1007_4_Sheets
No data found for subject 1007 Sheets visit 4
1007_4_Sitng
No data found for subject 1007 Sitng visit 4
1007_4_SitStand
No data found for subject 1007 SitStand visit 4
1

1019_6_Drwg
1019_6_Fldg
1019_6_FtnL
1019_6_FtnR
1019_6_NtsBts
1019_6_RamL
1019_6_RamR
1019_6_Sheets
1019_6_Sitng
1019_6_SitStand
1019_6_Stndg
1019_6_Typg
1019_6_Wlkg
1019_6_WlkgCnt
1020_0_Drnkg
No data found for subject 1020 Drnkg visit 0
1020_0_Drwg
No data found for subject 1020 Drwg visit 0
1020_0_Fldg
No data found for subject 1020 Fldg visit 0
1020_0_FtnL
No data found for subject 1020 FtnL visit 0
1020_0_FtnR
No data found for subject 1020 FtnR visit 0
1020_0_NtsBts
No data found for subject 1020 NtsBts visit 0
1020_0_RamL
No data found for subject 1020 RamL visit 0
1020_0_RamR
No data found for subject 1020 RamR visit 0
1020_0_Sheets
No data found for subject 1020 Sheets visit 0
1020_0_Sitng
No data found for subject 1020 Sitng visit 0
1020_0_SitStand
No data found for subject 1020 SitStand visit 0
1020_0_Stndg
No data found for subject 1020 Stndg visit 0
1020_0_Typg
No data found for subject 1020 Typg visit 0
1020_0_Wlkg
No data found for subject 1020 Wlkg visit 0
1020_0_WlkgCn

1024_3_RamL
1024_3_RamR
1024_3_Sheets
1024_3_Sitng
1024_3_SitStand
1024_3_Stndg
1024_3_Typg
1024_3_Wlkg
1024_3_WlkgCnt
1024_4_Drnkg
1024_4_Drwg
1024_4_Fldg
1024_4_FtnL
1024_4_FtnR
1024_4_NtsBts
1024_4_RamL
1024_4_RamR
1024_4_Sheets
1024_4_Sitng
1024_4_SitStand
1024_4_Stndg
1024_4_Typg
1024_4_Wlkg
1024_4_WlkgCnt
1024_5_Drnkg
1024_5_Drwg
1024_5_Fldg
1024_5_FtnL
1024_5_FtnR
1024_5_NtsBts
1024_5_RamL
1024_5_RamR
1024_5_Sheets
1024_5_Sitng
1024_5_SitStand
1024_5_Stndg
1024_5_Typg
1024_5_Wlkg
1024_5_WlkgCnt
1024_6_Drnkg
1024_6_Drwg
1024_6_Fldg
1024_6_FtnL
1024_6_FtnR
1024_6_NtsBts
1024_6_RamL
1024_6_RamR
1024_6_Sheets
1024_6_Sitng
1024_6_SitStand
1024_6_Stndg
1024_6_Typg
1024_6_Wlkg
1024_6_WlkgCnt
1029_0_Drnkg
1029_0_Drwg
1029_0_Fldg
1029_0_FtnL
1029_0_FtnR
1029_0_NtsBts
1029_0_RamL
1029_0_RamR
1029_0_Sheets
1029_0_Sitng
1029_0_SitStand
1029_0_Stndg
1029_0_Typg
1029_0_Wlkg
1029_0_WlkgCnt
1029_1_Drnkg
1029_1_Drwg
1029_1_Fldg
1029_1_FtnL
1029_1_FtnR
1029_1_NtsBts
1029_1_RamL
1029_1_RamR
1029_1

1032_3_Typg
1032_3_Wlkg
1032_3_WlkgCnt
1032_4_Drnkg
1032_4_Drwg
1032_4_Fldg
1032_4_FtnL
1032_4_FtnR
1032_4_NtsBts
1032_4_RamL
1032_4_RamR
1032_4_Sheets
1032_4_Sitng
1032_4_SitStand
1032_4_Stndg
1032_4_Typg
1032_4_Wlkg
1032_4_WlkgCnt
1032_5_Drnkg
1032_5_Drwg
1032_5_Fldg
1032_5_FtnL
1032_5_FtnR
1032_5_NtsBts
1032_5_RamL
1032_5_RamR
1032_5_Sheets
1032_5_Sitng
1032_5_SitStand
1032_5_Stndg
1032_5_Typg
1032_5_Wlkg
1032_5_WlkgCnt
1032_6_Drnkg
1032_6_Drwg
1032_6_Fldg
1032_6_FtnL
1032_6_FtnR
1032_6_NtsBts
1032_6_RamL
1032_6_RamR
1032_6_Sheets
1032_6_Sitng
1032_6_SitStand
1032_6_Stndg
1032_6_Typg
1032_6_Wlkg
1032_6_WlkgCnt
1038_0_Drnkg
1038_0_Drwg
1038_0_Fldg
1038_0_FtnL
1038_0_FtnR
1038_0_NtsBts
1038_0_RamL
1038_0_RamR
1038_0_Sheets
1038_0_Sitng
1038_0_SitStand
1038_0_Stndg
1038_0_Typg
1038_0_Wlkg
1038_0_WlkgCnt
1038_1_Drnkg
1038_1_Drwg
1038_1_Fldg
1038_1_FtnL
1038_1_FtnR
1038_1_NtsBts
1038_1_RamL
1038_1_RamR
1038_1_Sheets
1038_1_Sitng
1038_1_SitStand
1038_1_Stndg
1038_1_Typg
1038_1_Wlkg
1038_1

No data found for subject 1047 Wlkg visit 6
1047_6_WlkgCnt
No data found for subject 1047 WlkgCnt visit 6
1048_0_Drnkg
1048_0_Drwg
1048_0_Fldg
1048_0_FtnL
1048_0_FtnR
1048_0_NtsBts
1048_0_RamL
1048_0_RamR
1048_0_Sheets
1048_0_Sitng
1048_0_SitStand
1048_0_Stndg
1048_0_Typg
1048_0_Wlkg
1048_0_WlkgCnt
1048_1_Drnkg
1048_1_Drwg
1048_1_Fldg
1048_1_FtnL
1048_1_FtnR
1048_1_NtsBts
1048_1_RamL
1048_1_RamR
1048_1_Sheets
1048_1_Sitng
1048_1_SitStand
1048_1_Stndg
1048_1_Typg
1048_1_Wlkg
1048_1_WlkgCnt
1048_2_Drnkg
1048_2_Drwg
1048_2_Fldg
1048_2_FtnL
1048_2_FtnR
1048_2_NtsBts
1048_2_RamL
1048_2_RamR
1048_2_Sheets
1048_2_Sitng
1048_2_SitStand
1048_2_Stndg
1048_2_Typg
1048_2_Wlkg
1048_2_WlkgCnt
1048_3_Drnkg
1048_3_Drwg
1048_3_Fldg
1048_3_FtnL
1048_3_FtnR
1048_3_NtsBts
1048_3_RamL
1048_3_RamR
1048_3_Sheets
1048_3_Sitng
1048_3_SitStand
1048_3_Stndg
1048_3_Typg
1048_3_Wlkg
1048_3_WlkgCnt
1048_4_Drnkg
1048_4_Drwg
1048_4_Fldg
1048_4_FtnL
1048_4_FtnR
1048_4_NtsBts
1048_4_RamL
1048_4_RamR
1048_4_Sheets
1048_

No data found for subject 1052 FtnL visit 2
1052_2_FtnR
No data found for subject 1052 FtnR visit 2
1052_2_NtsBts
No data found for subject 1052 NtsBts visit 2
1052_2_RamL
No data found for subject 1052 RamL visit 2
1052_2_RamR
No data found for subject 1052 RamR visit 2
1052_2_Sheets
No data found for subject 1052 Sheets visit 2
1052_2_Sitng
No data found for subject 1052 Sitng visit 2
1052_2_SitStand
No data found for subject 1052 SitStand visit 2
1052_2_Stndg
No data found for subject 1052 Stndg visit 2
1052_2_Typg
No data found for subject 1052 Typg visit 2
1052_2_Wlkg
No data found for subject 1052 Wlkg visit 2
1052_2_WlkgCnt
No data found for subject 1052 WlkgCnt visit 2
1052_3_Drnkg
No data found for subject 1052 Drnkg visit 3
1052_3_Drwg
No data found for subject 1052 Drwg visit 3
1052_3_Fldg
No data found for subject 1052 Fldg visit 3
1052_3_FtnL
No data found for subject 1052 FtnL visit 3
1052_3_FtnR
No data found for subject 1052 FtnR visit 3
1052_3_NtsBts
No data found for 

No data found for subject 1054 RamL visit 1
1054_1_RamR
No data found for subject 1054 RamR visit 1
1054_1_Sheets
No data found for subject 1054 Sheets visit 1
1054_1_Sitng
No data found for subject 1054 Sitng visit 1
1054_1_SitStand
No data found for subject 1054 SitStand visit 1
1054_1_Stndg
No data found for subject 1054 Stndg visit 1
1054_1_Typg
No data found for subject 1054 Typg visit 1
1054_1_Wlkg
No data found for subject 1054 Wlkg visit 1
1054_1_WlkgCnt
No data found for subject 1054 WlkgCnt visit 1
1054_2_Drnkg
No data found for subject 1054 Drnkg visit 2
1054_2_Drwg
No data found for subject 1054 Drwg visit 2
1054_2_Fldg
No data found for subject 1054 Fldg visit 2
1054_2_FtnL
No data found for subject 1054 FtnL visit 2
1054_2_FtnR
No data found for subject 1054 FtnR visit 2
1054_2_NtsBts
No data found for subject 1054 NtsBts visit 2
1054_2_RamL
No data found for subject 1054 RamL visit 2
1054_2_RamR
No data found for subject 1054 RamR visit 2
1054_2_Sheets
No data found for 

No data found for subject 1056 WlkgCnt visit 2
1056_3_Drnkg
No data found for subject 1056 Drnkg visit 3
1056_3_Drwg
No data found for subject 1056 Drwg visit 3
1056_3_Fldg
No data found for subject 1056 Fldg visit 3
1056_3_FtnL
No data found for subject 1056 FtnL visit 3
1056_3_FtnR
No data found for subject 1056 FtnR visit 3
1056_3_NtsBts
No data found for subject 1056 NtsBts visit 3
1056_3_RamL
No data found for subject 1056 RamL visit 3
1056_3_RamR
No data found for subject 1056 RamR visit 3
1056_3_Sheets
No data found for subject 1056 Sheets visit 3
1056_3_Sitng
No data found for subject 1056 Sitng visit 3
1056_3_SitStand
No data found for subject 1056 SitStand visit 3
1056_3_Stndg
No data found for subject 1056 Stndg visit 3
1056_3_Typg
No data found for subject 1056 Typg visit 3
1056_3_Wlkg
No data found for subject 1056 Wlkg visit 3
1056_3_WlkgCnt
No data found for subject 1056 WlkgCnt visit 3
1056_4_Drnkg
No data found for subject 1056 Drnkg visit 4
1056_4_Drwg
No data found f

In [36]:
def feature_extraction(data_clip):
    
    features_list = ['RMSX', 'RMSY', 'RMSZ', 
                     'rangeX', 'rangeY', 'rangeZ', 
                     'meanX', 'meanY', 'meanZ', 
                     'varX', 'varY', 'varZ', 
                     'skewX', 'skewY', 'skewZ', 
                     'kurtX', 'kurtY', 'kurtZ', 
                     'xcorr_peakXY', 'xcorr_peakXZ', 'xcorr_peakYZ', 
                     'xcorr_lagXY', 'xcorr_lagXZ', 'xcorr_lagYZ', 
                     'freq1', 'freq2', 'freq3', # added
                     'P1_rel', 'P2_rel', 'P3_rel', # added
                     'PSD_mean', 'PSD_std', 'PSD_skew', 'PSD_kur', 
                     'jerk_mean', 'jerk_std', 'jerk_skew', 'jerk_kur', 
                     'Sen_X', 'Sen_Y', 'Sen_Z', # not included in reduced features
                     'RMS_mag', 'range_mag', 'mean_mag', 'var_mag', 'skew_mag', 'kurt_mag', 'Sen_mag'] # not included
    
    rawdata = data_clip
    rawdata_wmag = rawdata.copy()
    rawdata_wmag['Accel_Mag'] = np.sort((rawdata**2).sum(axis = 1))
    
    N = len(rawdata)
    min_xyz = np.min(rawdata, axis = 0)
    max_xyz = np.max(rawdata, axis = 0)
    xcorr_xy = np.correlate(rawdata.iloc[:, 0], rawdata.iloc[:, 1], mode = 'same')
    xcorr_xz = np.correlate(rawdata.iloc[:,0], rawdata.iloc[:, 2], mode = 'same')
    xcorr_yz = np.correlate(rawdata.iloc[:, 1], rawdata.iloc[:, 2], mode = 'same')
    Pxx = power_spectra_welch(rawdata_wmag, fm = 0, fM = 10)
    sH_raw = []; sH_fft = []

    
    # RMSX, RMSY, RMSZ
    # root mean square
    RMS = 1 / N * np.sqrt(np.asarray(np.sum(rawdata**2, axis = 0)))
    
    # rangeX, rangeY, rangeZ
    # range
    r = np.asarray(max_xyz - min_xyz)
    
    # meanX, meanY, meanZ
    # average
    mean = np.asarray(np.mean(rawdata, axis = 0))

    # varX, varY, varZ
    # standard deviation
    var = np.asarray(np.std(rawdata, axis = 0))

    # skewX, skewY, skewZ
    # skewness: measure of data symmetry
    sk = skew(rawdata)
    
    # kurtX, kurtY, kurtZ
    # kurtosis: measure of data tail weight compared to normal dist
    kurt = kurtosis(rawdata)

    # xcorr_peakXY, xcorr_peakXZ, xcorr_peakYZ
    # max correlation between data of paired axes
    xcorr_peak_xy = np.max(xcorr_xy)
    xcorr_peak_xz = np.max(xcorr_xz)
    xcorr_peak_yz = np.max(xcorr_yz)
    xcorr_peak = np.array([xcorr_peak_xy, xcorr_peak_xz, xcorr_peak_yz])

    # xcorr_lagXY, xcorr_lagXZ, xcorr_lagYZ
    # relative location of max correlation between data of paired axes
    xcorr_lag_xy = (np.argmax(xcorr_xy)) / len(xcorr_xy)
    xcorr_lag_xz = (np.argmax(xcorr_xz)) / len(xcorr_xz)
    xcorr_lag_yz = (np.argmax(xcorr_yz)) / len(xcorr_yz)
    xcorr_lag = np.array([xcorr_lag_xy, xcorr_lag_xz, xcorr_lag_yz])

    # freq1, freq2, freq3
    # frequency with the highest power density
    freq1 = Pxx.iloc[:, -1].index[-1]
    freq2 = Pxx.iloc[:, -1].index[-2]
    freq3 = Pxx.iloc[:, -1].index[-3]
    domfreq = np.array([freq1, freq2, freq3])

    # P1_rel, P2_rel, P3_rel
    # relative power of the dominant frequency within the signal
    P1_rel = Pxx.loc[freq1].values / Pxx.iloc[:, -1].sum()
    P2_rel = Pxx.loc[freq2].values / Pxx.iloc[:, -1].sum()
    P3_rel = Pxx.loc[freq3].values / Pxx.iloc[:, -1].sum()
    Pdom_rel = np.concatenate((P1_rel, P2_rel, P3_rel))

    # PDS_mean, PDS_std, PDS_skew, PDS_kur
    # power spectral density summary stats
    Pxx_moments = np.array([np.nanmean(Pxx.values), np.nanstd(Pxx.values), skew(Pxx.values), kurtosis(Pxx.values)])

    # jerk_mean, jerk_std, jerk_skew, jerk_kur
    jerk = rawdata_wmag['Accel_Mag'].diff().values
    jerk_moments = np.array([np.nanmean(jerk), np.nanstd(jerk), skew(jerk[~np.isnan(jerk)]), kurtosis(jerk[~np.isnan(jerk)])])

    # Sen_X, Sen_Y, Sen_Z
    # sample entropy
    for a in range(3):
        x = rawdata.iloc[:, a]
        n = len(x)
        Fs = np.mean(1 / (np.diff(x.index) / 1000))
        sH_raw.append(nolds.sampen(x))
    
    # features of the acceleration magnitude (as opposed to the axis values)
    RMS_mag = 1 / N * np.sqrt(np.sum(rawdata_wmag['Accel_Mag']**2, axis = 0))
    r_mag = np.max(rawdata_wmag['Accel_Mag']) - np.min(rawdata_wmag['Accel_Mag'])
    mean_mag = np.mean(rawdata_wmag['Accel_Mag'])
    var_mag = np.std(rawdata_wmag['Accel_Mag'])
    sk_mag = skew(rawdata_wmag['Accel_Mag'])
    kurt_mag = kurtosis(rawdata_wmag['Accel_Mag'])
    sH_mag = nolds.sampen(rawdata_wmag['Accel_Mag'])
    
    Y = np.array([RMS_mag, r_mag, mean_mag, var_mag, sk_mag, kurt_mag, sH_mag])
    X = np.concatenate((RMS, r, mean, var, sk, kurt, xcorr_peak, xcorr_lag, 
                        domfreq, Pdom_rel, Pxx_moments, jerk_moments, sH_raw, Y))
    
    return X

In [43]:
# get all home data features - less efficient version
# uses full list of features and three dominant frequencies as opposed to one
def DataAggregator2(file, data_type):
    '''generates feature metrics for 5-second intervals of apple watch data
       utilizes additional features not present in previous DataAggregator
       data_type: \'clinic\' or \'home\'
       file: clinic data in the form (SubjID)_(VisitNum)_(TaskAbb); home data as full record file path'''
    
    TaskFeatures = pd.DataFrame()
    
    if data_type == 'clinic':
        subject = file[:4]
        visit = file[5:6]
        task = file[7:]
    if data_type == 'home':
        designation = file[63:-4]
    print(file)
    
    features_list = ['RMSX', 'RMSY', 'RMSZ', 
                     'rangeX', 'rangeY', 'rangeZ', 
                     'meanX', 'meanY', 'meanZ', 
                     'varX', 'varY', 'varZ', 
                     'skewX', 'skewY', 'skewZ', 
                     'kurtX', 'kurtY', 'kurtZ', 
                     'xcorr_peakXY', 'xcorr_peakXZ', 'xcorr_peakYZ', 
                     'xcorr_lagXY', 'xcorr_lagXZ', 'xcorr_lagYZ', 
                     'freq1', 'freq2', 'freq3', # added
                     'P1_rel', 'P2_rel', 'P3_rel', # added
                     'PSD_mean', 'PSD_std', 'PSD_skew', 'PSD_kur', 
                     'jerk_mean', 'jerk_std', 'jerk_skew', 'jerk_kur', 
                     'Sen_X', 'Sen_Y', 'Sen_Z', # not included in reduced features
                     'RMS_mag', 'range_mag', 'mean_mag', 'var_mag', 'skew_mag', 'kurt_mag', 'Sen_mag'] # not included
        
    # get acc data
    try:
        if data_type == 'clinic':
            data = (pd.read_csv(os.path.join(clinicpath, 'Table8', 'TaskAcc', file + '.csv'), parse_dates = ['timestamp'])
                    [['timestamp', 'x', 'y', 'z']])
            data.columns = ['Timestamp', 'X', 'Y', 'Z']
        if data_type == 'home':
            data = pd.read_csv(file, parse_dates = ['Timestamp'])[['Timestamp', 'X', 'Y', 'Z']]
    except(FileNotFoundError):
        if data_type == 'clinic':
            print('No data found for subject %s %s visit %s' % (subject, task, visit))
        if data_type == 'home':
            print('No data found for %s' % (designation))
        return TaskFeatures
        
    # organize data and make 5 second clips
    data = data.sort_values(by = 'Timestamp', axis = 0)
    data['Timestamp2'] = [(tm - datetime.timedelta(minutes = 0,
                                                   seconds = tm.second % 5,
                                                   microseconds = tm.microsecond)) 
                          for tm in data.Timestamp]
    
    data['Timestamp'] = (data.Timestamp.values - data.Timestamp.values[0]).astype('timedelta64[ms]').astype(int)
    data = data.set_index('Timestamp')
    data.loc[:, ['X', 'Y', 'Z']] = filterdata(data[['X', 'Y', 'Z']])
                
    # "clip" the data into 5 second chunks    
    five_sec_intervals = data.Timestamp2.unique()
        
    # calculate features
    F = []
    num_empty = 0
    times = []
    for t in five_sec_intervals:
        clip = data.loc[(data.Timestamp2 == t)]
        # length of 5 second chunk should be 250 for 5 seconds
        if (clip.empty or (len(clip.Timestamp2) < 200)):
            num_empty += 1
        else:
            F.append(feature_extraction(clip[['X', 'Y', 'Z']]))
            times.append(t)

#     success_info = [file, len(five_sec_intervals), (len(five_sec_intervals) - num_empty)]
#     df = pd.DataFrame(data = [success_info], columns = ['File', 'Expected Clips', 'Actual Clips'])
#     if data_type == 'clinic':
#         dest_x = 'Clinic Data'
#     if data_type == 'home':
#         dest_x = 'Home Data'
#     if os.path.isfile(os.path.join(dest, dest_x, 'Success Info 2.csv')):
#         dfo = pd.read_csv(os.path.join(dest, dest_x, 'Success Info 2.csv'))
#         df = pd.concat([dfo, df], ignore_index = False)
#     df.to_csv(os.path.join(dest, dest_x, 'Success Info 2.csv'), index = False)
            
    # create features dataframe
    TaskFeatures = pd.DataFrame(data = F, columns = features_list, dtype = 'float32')    
    
    return TaskFeatures

In [38]:
### GENERATING FEATURES FROM CLINIC DATA USING EXPANDED FEATURES LIST
s_time = time.time()

clinicData = pd.read_csv(os.path.join(clinicpath, 'Table4.csv'))
DataFeatures = pd.DataFrame()

for task in clinicData.iterrows():
    file = str(int(task[1]['SubjID'])) + '_' + str(VisitNumber[task[1]['Visit']]) + '_' + task[1]['TaskAbb']
    TaskFeatures = DataAggregator2(file, 'clinic')
    if TaskFeatures.empty:
        continue
    featcols = list(TaskFeatures.columns)
    
    TaskFeatures['SubjID'] = task[1]['SubjID']
    TaskFeatures['Visit'] = VisitNumber[task[1]['Visit']]
    TaskFeatures['TaskAbb'] = task[1]['TaskAbb']
    TaskFeatures['Tremor'] = task[1]['Tremor - ' + task[1]['Side']]
    TaskFeatures['Bradykinesia'] = task[1]['Bradykinesia - ' + task[1]['Side']]
    TaskFeatures['Dyskinesia'] = task[1]['Dyskinesia - ' + task[1]['Side']]
    TaskFeatures['Overall'] = task[1]['Overall']
    
    cols = ['SubjID', 'Visit', 'TaskAbb', 'Tremor', 'Bradykinesia', 'Dyskinesia', 'Overall'] + featcols
    TaskFeatures = TaskFeatures[cols]
    
    if DataFeatures.empty:
        DataFeatures = TaskFeatures
        continue
    DataFeatures = pd.concat([DataFeatures, TaskFeatures], ignore_index = True)

DataFeatures.to_csv(os.path.join(dest, 'Clinic Data', 'ClinicDataFeatures2.csv'), index = False)

print(str(int(((time.time() - s_time) / 60) / 60)) + ' hours ' + 
      str(int(((time.time() - s_time) / 60) % 60)) + ' minutes ' + 
      str(int((time.time() - s_time) % 60)) + ' seconds')

1003_0_Drnkg
1003_0_Drwg
1003_0_Fldg
1003_0_FtnL
1003_0_FtnR
1003_0_NtsBts
1003_0_RamL
1003_0_RamR
1003_0_Sheets
1003_0_Sitng
1003_0_SitStand
1003_0_Stndg
1003_0_Typg
1003_0_Wlkg
1003_0_WlkgCnt
1003_1_Drnkg
1003_1_Drwg
1003_1_Fldg
1003_1_FtnL
1003_1_FtnR
1003_1_NtsBts
1003_1_RamL
1003_1_RamR
1003_1_Sheets
1003_1_Sitng
1003_1_SitStand
1003_1_Stndg
1003_1_Typg
1003_1_Wlkg
1003_1_WlkgCnt
1003_2_Drnkg
1003_2_Drwg
1003_2_Fldg
1003_2_FtnL
1003_2_FtnR
1003_2_NtsBts
1003_2_RamL
1003_2_RamR
1003_2_Sheets
1003_2_Sitng
1003_2_SitStand
1003_2_Stndg
1003_2_Typg
1003_2_Wlkg
1003_2_WlkgCnt
1003_3_Drnkg
1003_3_Drwg
1003_3_Fldg
1003_3_FtnL
1003_3_FtnR
1003_3_NtsBts
1003_3_RamL
1003_3_RamR
1003_3_Sheets
1003_3_Sitng
1003_3_SitStand
1003_3_Stndg
1003_3_Typg
1003_3_Wlkg
1003_3_WlkgCnt
1003_4_Drnkg
1003_4_Drwg
1003_4_Fldg
1003_4_FtnL
1003_4_FtnR
1003_4_NtsBts
1003_4_RamL
1003_4_RamR
1003_4_Sheets
1003_4_Sitng
1003_4_SitStand
1003_4_Stndg
1003_4_Typg
1003_4_Wlkg
1003_4_WlkgCnt
1003_5_Drnkg
1003_5_Drwg
1003_

1007_2_Drwg
1007_2_Fldg
1007_2_FtnL
1007_2_FtnR
1007_2_NtsBts
1007_2_RamL
1007_2_RamR
1007_2_Sheets
1007_2_Sitng
1007_2_SitStand
1007_2_Stndg
1007_2_Typg
1007_2_Wlkg
1007_2_WlkgCnt
1007_3_Drnkg
1007_3_Drwg
1007_3_Fldg
1007_3_FtnL
1007_3_FtnR
1007_3_NtsBts
1007_3_RamL
1007_3_RamR
1007_3_Sheets
1007_3_Sitng
1007_3_SitStand
1007_3_Stndg
1007_3_Typg
1007_3_Wlkg
1007_3_WlkgCnt
1007_4_Drnkg
No data found for subject 1007 Drnkg visit 4
1007_4_Drwg
No data found for subject 1007 Drwg visit 4
1007_4_Fldg
No data found for subject 1007 Fldg visit 4
1007_4_FtnL
No data found for subject 1007 FtnL visit 4
1007_4_FtnR
No data found for subject 1007 FtnR visit 4
1007_4_NtsBts
No data found for subject 1007 NtsBts visit 4
1007_4_RamL
No data found for subject 1007 RamL visit 4
1007_4_RamR
No data found for subject 1007 RamR visit 4
1007_4_Sheets
No data found for subject 1007 Sheets visit 4
1007_4_Sitng
No data found for subject 1007 Sitng visit 4
1007_4_SitStand
No data found for subject 1007 SitSta

1019_6_Drnkg
1019_6_Drwg
1019_6_Fldg
1019_6_FtnL
1019_6_FtnR
1019_6_NtsBts
1019_6_RamL
1019_6_RamR
1019_6_Sheets
1019_6_Sitng
1019_6_SitStand
1019_6_Stndg
1019_6_Typg
1019_6_Wlkg
1019_6_WlkgCnt
1020_0_Drnkg
No data found for subject 1020 Drnkg visit 0
1020_0_Drwg
No data found for subject 1020 Drwg visit 0
1020_0_Fldg
No data found for subject 1020 Fldg visit 0
1020_0_FtnL
No data found for subject 1020 FtnL visit 0
1020_0_FtnR
No data found for subject 1020 FtnR visit 0
1020_0_NtsBts
No data found for subject 1020 NtsBts visit 0
1020_0_RamL
No data found for subject 1020 RamL visit 0
1020_0_RamR
No data found for subject 1020 RamR visit 0
1020_0_Sheets
No data found for subject 1020 Sheets visit 0
1020_0_Sitng
No data found for subject 1020 Sitng visit 0
1020_0_SitStand
No data found for subject 1020 SitStand visit 0
1020_0_Stndg
No data found for subject 1020 Stndg visit 0
1020_0_Typg
No data found for subject 1020 Typg visit 0
1020_0_Wlkg
No data found for subject 1020 Wlkg visit 0


1024_3_FtnL
1024_3_FtnR
1024_3_NtsBts
1024_3_RamL
1024_3_RamR
1024_3_Sheets
1024_3_Sitng
1024_3_SitStand
1024_3_Stndg
1024_3_Typg
1024_3_Wlkg
1024_3_WlkgCnt
1024_4_Drnkg
1024_4_Drwg
1024_4_Fldg
1024_4_FtnL
1024_4_FtnR
1024_4_NtsBts
1024_4_RamL
1024_4_RamR
1024_4_Sheets
1024_4_Sitng
1024_4_SitStand
1024_4_Stndg
1024_4_Typg
1024_4_Wlkg
1024_4_WlkgCnt
1024_5_Drnkg
1024_5_Drwg
1024_5_Fldg
1024_5_FtnL
1024_5_FtnR
1024_5_NtsBts
1024_5_RamL
1024_5_RamR
1024_5_Sheets
1024_5_Sitng
1024_5_SitStand
1024_5_Stndg
1024_5_Typg
1024_5_Wlkg
1024_5_WlkgCnt
1024_6_Drnkg
1024_6_Drwg
1024_6_Fldg
1024_6_FtnL
1024_6_FtnR
1024_6_NtsBts
1024_6_RamL
1024_6_RamR
1024_6_Sheets
1024_6_Sitng
1024_6_SitStand
1024_6_Stndg
1024_6_Typg
1024_6_Wlkg
1024_6_WlkgCnt
1029_0_Drnkg
1029_0_Drwg
1029_0_Fldg
1029_0_FtnL
1029_0_FtnR
1029_0_NtsBts
1029_0_RamL
1029_0_RamR
1029_0_Sheets
1029_0_Sitng
1029_0_SitStand
1029_0_Stndg
1029_0_Typg
1029_0_Wlkg
1029_0_WlkgCnt
1029_1_Drnkg
1029_1_Drwg
1029_1_Fldg
1029_1_FtnL
1029_1_FtnR
1029_1

1032_3_Sitng
1032_3_SitStand
1032_3_Stndg
1032_3_Typg
1032_3_Wlkg
1032_3_WlkgCnt
1032_4_Drnkg
1032_4_Drwg
1032_4_Fldg
1032_4_FtnL
1032_4_FtnR
1032_4_NtsBts
1032_4_RamL
1032_4_RamR
1032_4_Sheets
1032_4_Sitng
1032_4_SitStand
1032_4_Stndg
1032_4_Typg
1032_4_Wlkg
1032_4_WlkgCnt
1032_5_Drnkg
1032_5_Drwg
1032_5_Fldg
1032_5_FtnL
1032_5_FtnR
1032_5_NtsBts
1032_5_RamL
1032_5_RamR
1032_5_Sheets
1032_5_Sitng
1032_5_SitStand
1032_5_Stndg
1032_5_Typg
1032_5_Wlkg
1032_5_WlkgCnt
1032_6_Drnkg
1032_6_Drwg
1032_6_Fldg
1032_6_FtnL
1032_6_FtnR
1032_6_NtsBts
1032_6_RamL
1032_6_RamR
1032_6_Sheets
1032_6_Sitng
1032_6_SitStand
1032_6_Stndg
1032_6_Typg
1032_6_Wlkg
1032_6_WlkgCnt
1038_0_Drnkg
1038_0_Drwg
1038_0_Fldg
1038_0_FtnL
1038_0_FtnR
1038_0_NtsBts
1038_0_RamL
1038_0_RamR
1038_0_Sheets
1038_0_Sitng
1038_0_SitStand
1038_0_Stndg
1038_0_Typg
1038_0_Wlkg
1038_0_WlkgCnt
1038_1_Drnkg
1038_1_Drwg
1038_1_Fldg
1038_1_FtnL
1038_1_FtnR
1038_1_NtsBts
1038_1_RamL
1038_1_RamR
1038_1_Sheets
1038_1_Sitng
1038_1_SitStand
1

1048_0_Drwg
1048_0_Fldg
1048_0_FtnL
1048_0_FtnR
1048_0_NtsBts
1048_0_RamL
1048_0_RamR
1048_0_Sheets
1048_0_Sitng
1048_0_SitStand
1048_0_Stndg
1048_0_Typg
1048_0_Wlkg
1048_0_WlkgCnt
1048_1_Drnkg
1048_1_Drwg
1048_1_Fldg
1048_1_FtnL
1048_1_FtnR
1048_1_NtsBts
1048_1_RamL
1048_1_RamR
1048_1_Sheets
1048_1_Sitng
1048_1_SitStand
1048_1_Stndg
1048_1_Typg
1048_1_Wlkg
1048_1_WlkgCnt
1048_2_Drnkg
1048_2_Drwg
1048_2_Fldg
1048_2_FtnL
1048_2_FtnR
1048_2_NtsBts
1048_2_RamL
1048_2_RamR
1048_2_Sheets
1048_2_Sitng
1048_2_SitStand
1048_2_Stndg
1048_2_Typg
1048_2_Wlkg
1048_2_WlkgCnt
1048_3_Drnkg
1048_3_Drwg
1048_3_Fldg
1048_3_FtnL
1048_3_FtnR
1048_3_NtsBts
1048_3_RamL
1048_3_RamR
1048_3_Sheets
1048_3_Sitng
1048_3_SitStand
1048_3_Stndg
1048_3_Typg
1048_3_Wlkg
1048_3_WlkgCnt
1048_4_Drnkg
1048_4_Drwg
1048_4_Fldg
1048_4_FtnL
1048_4_FtnR
1048_4_NtsBts
1048_4_RamL
1048_4_RamR
1048_4_Sheets
1048_4_Sitng
1048_4_SitStand
1048_4_Stndg
1048_4_Typg
1048_4_Wlkg
1048_4_WlkgCnt
1048_5_Drnkg
No data found for subject 1048

No data found for subject 1053 FtnR visit 1
1053_1_NtsBts
No data found for subject 1053 NtsBts visit 1
1053_1_RamL
No data found for subject 1053 RamL visit 1
1053_1_RamR
No data found for subject 1053 RamR visit 1
1053_1_Sheets
No data found for subject 1053 Sheets visit 1
1053_1_Sitng
No data found for subject 1053 Sitng visit 1
1053_1_SitStand
No data found for subject 1053 SitStand visit 1
1053_1_Stndg
No data found for subject 1053 Stndg visit 1
1053_1_Typg
No data found for subject 1053 Typg visit 1
1053_1_Wlkg
No data found for subject 1053 Wlkg visit 1
1053_1_WlkgCnt
No data found for subject 1053 WlkgCnt visit 1
1053_2_Drnkg
No data found for subject 1053 Drnkg visit 2
1053_2_Drwg
No data found for subject 1053 Drwg visit 2
1053_2_Fldg
No data found for subject 1053 Fldg visit 2
1053_2_FtnL
No data found for subject 1053 FtnL visit 2
1053_2_FtnR
No data found for subject 1053 FtnR visit 2
1053_2_NtsBts
No data found for subject 1053 NtsBts visit 2
1053_2_RamL
No data found fo

No data found for subject 1055 Typg visit 3
1055_3_Wlkg
No data found for subject 1055 Wlkg visit 3
1055_3_WlkgCnt
No data found for subject 1055 WlkgCnt visit 3
1055_4_Drnkg
No data found for subject 1055 Drnkg visit 4
1055_4_Drwg
No data found for subject 1055 Drwg visit 4
1055_4_Fldg
No data found for subject 1055 Fldg visit 4
1055_4_FtnL
No data found for subject 1055 FtnL visit 4
1055_4_FtnR
No data found for subject 1055 FtnR visit 4
1055_4_NtsBts
No data found for subject 1055 NtsBts visit 4
1055_4_RamL
No data found for subject 1055 RamL visit 4
1055_4_RamR
No data found for subject 1055 RamR visit 4
1055_4_Sheets
No data found for subject 1055 Sheets visit 4
1055_4_Sitng
No data found for subject 1055 Sitng visit 4
1055_4_SitStand
No data found for subject 1055 SitStand visit 4
1055_4_Stndg
No data found for subject 1055 Stndg visit 4
1055_4_Typg
No data found for subject 1055 Typg visit 4
1055_4_Wlkg
No data found for subject 1055 Wlkg visit 4
1055_4_WlkgCnt
No data found for

In [44]:
### GENERATING FEATURES FROM CLINIC DATA USING EXPANDED FEATURES LIST
## ISOLATE NORTHWESTERN SITE SUBJECT DATA FEATURES
s_time = time.time()

clinicData = pd.read_csv(os.path.join(clinicpath, 'Table4.csv'))
siteInfo = pd.read_csv(os.path.join(clinicpath, 'Table1.csv'))
NWsubj = siteInfo.SubjectID[siteInfo.Site == 1332].values
DataFeatures = pd.DataFrame()

for task in clinicData.iterrows():
    if not task[1]['SubjID'] in NWsubj:
        continue
    file = str(int(task[1]['SubjID'])) + '_' + str(VisitNumber[task[1]['Visit']]) + '_' + task[1]['TaskAbb']
    TaskFeatures = DataAggregator2(file, 'clinic')
    if TaskFeatures.empty:
        continue
    featcols = list(TaskFeatures.columns)
    
    TaskFeatures['SubjID'] = task[1]['SubjID']
    TaskFeatures['Visit'] = VisitNumber[task[1]['Visit']]
    TaskFeatures['TaskAbb'] = task[1]['TaskAbb']
    TaskFeatures['Tremor'] = task[1]['Tremor - ' + task[1]['Side']]
    TaskFeatures['Bradykinesia'] = task[1]['Bradykinesia - ' + task[1]['Side']]
    TaskFeatures['Dyskinesia'] = task[1]['Dyskinesia - ' + task[1]['Side']]
    TaskFeatures['Overall'] = task[1]['Overall']
    
    cols = ['SubjID', 'Visit', 'TaskAbb', 'Tremor', 'Bradykinesia', 'Dyskinesia', 'Overall'] + featcols
    TaskFeatures = TaskFeatures[cols]
    
    if DataFeatures.empty:
        DataFeatures = TaskFeatures
        continue
    DataFeatures = pd.concat([DataFeatures, TaskFeatures], ignore_index = True)

DataFeatures.to_csv(os.path.join(dest, 'Clinic Data', 'ClinicDataFeatures3.csv'), index = False)

print(str(int(((time.time() - s_time) / 60) / 60)) + ' hours ' + 
      str(int(((time.time() - s_time) / 60) % 60)) + ' minutes ' + 
      str(int((time.time() - s_time) % 60)) + ' seconds')

1004_0_Drnkg
1004_0_Drwg
1004_0_Fldg
1004_0_FtnL
1004_0_FtnR
1004_0_NtsBts
1004_0_RamL
1004_0_RamR
1004_0_Sheets
1004_0_Sitng
1004_0_SitStand
1004_0_Stndg
1004_0_Typg
1004_0_Wlkg
1004_0_WlkgCnt
1004_1_Drnkg
1004_1_Drwg
1004_1_Fldg
1004_1_FtnL
1004_1_FtnR
1004_1_NtsBts
1004_1_RamL
1004_1_RamR
1004_1_Sheets
1004_1_Sitng
1004_1_SitStand
1004_1_Stndg
1004_1_Typg
1004_1_Wlkg
1004_1_WlkgCnt
1004_2_Drnkg
1004_2_Drwg
1004_2_Fldg
1004_2_FtnL
1004_2_FtnR
1004_2_NtsBts
1004_2_RamL
1004_2_RamR
1004_2_Sheets
1004_2_Sitng
1004_2_SitStand
1004_2_Stndg
1004_2_Typg
1004_2_Wlkg
1004_2_WlkgCnt
1004_3_Drnkg
1004_3_Drwg
1004_3_Fldg
1004_3_FtnL
1004_3_FtnR
1004_3_NtsBts
1004_3_RamL
1004_3_RamR
1004_3_Sheets
1004_3_Sitng
1004_3_SitStand
1004_3_Stndg
1004_3_Typg
1004_3_Wlkg
1004_3_WlkgCnt
1004_4_Drnkg
1004_4_Drwg
1004_4_Fldg
1004_4_FtnL
1004_4_FtnR
1004_4_NtsBts
1004_4_RamL
1004_4_RamR
1004_4_Sheets
1004_4_Sitng
1004_4_SitStand
1004_4_Stndg
1004_4_Typg
1004_4_Wlkg
1004_4_WlkgCnt
1004_5_Drnkg
No data found for

1020_6_Drwg
1020_6_Fldg
1020_6_FtnL
1020_6_FtnR
1020_6_NtsBts
1020_6_RamL
1020_6_RamR
1020_6_Sheets
1020_6_Sitng
1020_6_SitStand
1020_6_Stndg
1020_6_Typg
1020_6_Wlkg
1020_6_WlkgCnt
1024_0_Drnkg
1024_0_Drwg
1024_0_Fldg
1024_0_FtnL
1024_0_FtnR
1024_0_NtsBts
1024_0_RamL
1024_0_RamR
1024_0_Sheets
1024_0_Sitng
1024_0_SitStand
1024_0_Stndg
1024_0_Typg
1024_0_Wlkg
1024_0_WlkgCnt
1024_1_Drnkg
1024_1_Drwg
1024_1_Fldg
1024_1_FtnL
1024_1_FtnR
1024_1_NtsBts
1024_1_RamL
1024_1_RamR
1024_1_Sheets
1024_1_Sitng
1024_1_SitStand
1024_1_Stndg
1024_1_Typg
1024_1_Wlkg
1024_1_WlkgCnt
1024_2_Drnkg
No data found for subject 1024 Drnkg visit 2
1024_2_Drwg
No data found for subject 1024 Drwg visit 2
1024_2_Fldg
No data found for subject 1024 Fldg visit 2
1024_2_FtnL
No data found for subject 1024 FtnL visit 2
1024_2_FtnR
No data found for subject 1024 FtnR visit 2
1024_2_NtsBts
No data found for subject 1024 NtsBts visit 2
1024_2_RamL
No data found for subject 1024 RamL visit 2
1024_2_RamR
No data found for sub

1030_6_Drwg
1030_6_Fldg
1030_6_FtnL
1030_6_FtnR
1030_6_NtsBts
1030_6_RamL
1030_6_RamR
1030_6_Sheets
1030_6_Sitng
1030_6_SitStand
1030_6_Stndg
1030_6_Typg
1030_6_Wlkg
1030_6_WlkgCnt
1032_0_Drnkg
1032_0_Drwg
1032_0_Fldg
1032_0_FtnL
1032_0_FtnR
1032_0_NtsBts
1032_0_RamL
1032_0_RamR
1032_0_Sheets
1032_0_Sitng
1032_0_SitStand
1032_0_Stndg
1032_0_Typg
1032_0_Wlkg
1032_0_WlkgCnt
1032_1_Drnkg
1032_1_Drwg
1032_1_Fldg
1032_1_FtnL
1032_1_FtnR
1032_1_NtsBts
1032_1_RamL
1032_1_RamR
1032_1_Sheets
1032_1_Sitng
1032_1_SitStand
1032_1_Stndg
1032_1_Typg
1032_1_Wlkg
1032_1_WlkgCnt
1032_2_Drnkg
1032_2_Drwg
1032_2_Fldg
1032_2_FtnL
1032_2_FtnR
1032_2_NtsBts
1032_2_RamL
1032_2_RamR
1032_2_Sheets
1032_2_Sitng
1032_2_SitStand
1032_2_Stndg
1032_2_Typg
1032_2_Wlkg
1032_2_WlkgCnt
1032_3_Drnkg
1032_3_Drwg
1032_3_Fldg
1032_3_FtnL
1032_3_FtnR
1032_3_NtsBts
1032_3_RamL
1032_3_RamR
1032_3_Sheets
1032_3_Sitng
1032_3_SitStand
1032_3_Stndg
1032_3_Typg
1032_3_Wlkg
1032_3_WlkgCnt
1032_4_Drnkg
1032_4_Drwg
1032_4_Fldg
1032_4

No data found for subject 1047 Fldg visit 6
1047_6_FtnL
No data found for subject 1047 FtnL visit 6
1047_6_FtnR
No data found for subject 1047 FtnR visit 6
1047_6_NtsBts
No data found for subject 1047 NtsBts visit 6
1047_6_RamL
No data found for subject 1047 RamL visit 6
1047_6_RamR
No data found for subject 1047 RamR visit 6
1047_6_Sheets
No data found for subject 1047 Sheets visit 6
1047_6_Sitng
No data found for subject 1047 Sitng visit 6
1047_6_SitStand
No data found for subject 1047 SitStand visit 6
1047_6_Stndg
No data found for subject 1047 Stndg visit 6
1047_6_Typg
No data found for subject 1047 Typg visit 6
1047_6_Wlkg
No data found for subject 1047 Wlkg visit 6
1047_6_WlkgCnt
No data found for subject 1047 WlkgCnt visit 6
1049_0_Drnkg
1049_0_Drwg
1049_0_Fldg
1049_0_FtnL
1049_0_FtnR
1049_0_NtsBts
1049_0_RamL
1049_0_RamR
1049_0_Sheets
1049_0_Sitng
1049_0_SitStand
1049_0_Stndg
1049_0_Typg
1049_0_Wlkg
1049_0_WlkgCnt
1049_1_Drnkg
1049_1_Drwg
1049_1_Fldg
1049_1_FtnL
1049_1_FtnR
104

No data found for subject 1053 Wlkg visit 1
1053_1_WlkgCnt
No data found for subject 1053 WlkgCnt visit 1
1053_2_Drnkg
No data found for subject 1053 Drnkg visit 2
1053_2_Drwg
No data found for subject 1053 Drwg visit 2
1053_2_Fldg
No data found for subject 1053 Fldg visit 2
1053_2_FtnL
No data found for subject 1053 FtnL visit 2
1053_2_FtnR
No data found for subject 1053 FtnR visit 2
1053_2_NtsBts
No data found for subject 1053 NtsBts visit 2
1053_2_RamL
No data found for subject 1053 RamL visit 2
1053_2_RamR
No data found for subject 1053 RamR visit 2
1053_2_Sheets
No data found for subject 1053 Sheets visit 2
1053_2_Sitng
No data found for subject 1053 Sitng visit 2
1053_2_SitStand
No data found for subject 1053 SitStand visit 2
1053_2_Stndg
No data found for subject 1053 Stndg visit 2
1053_2_Typg
No data found for subject 1053 Typg visit 2
1053_2_Wlkg
No data found for subject 1053 Wlkg visit 2
1053_2_WlkgCnt
No data found for subject 1053 WlkgCnt visit 2
1053_3_Drnkg
No data found

No data found for subject 1055 Wlkg visit 4
1055_4_WlkgCnt
No data found for subject 1055 WlkgCnt visit 4
1055_5_Drnkg
No data found for subject 1055 Drnkg visit 5
1055_5_Drwg
No data found for subject 1055 Drwg visit 5
1055_5_Fldg
No data found for subject 1055 Fldg visit 5
1055_5_FtnL
No data found for subject 1055 FtnL visit 5
1055_5_FtnR
No data found for subject 1055 FtnR visit 5
1055_5_NtsBts
No data found for subject 1055 NtsBts visit 5
1055_5_RamL
No data found for subject 1055 RamL visit 5
1055_5_RamR
No data found for subject 1055 RamR visit 5
1055_5_Sheets
No data found for subject 1055 Sheets visit 5
1055_5_Sitng
No data found for subject 1055 Sitng visit 5
1055_5_SitStand
No data found for subject 1055 SitStand visit 5
1055_5_Stndg
No data found for subject 1055 Stndg visit 5
1055_5_Typg
No data found for subject 1055 Typg visit 5
1055_5_Wlkg
No data found for subject 1055 Wlkg visit 5
1055_5_WlkgCnt
No data found for subject 1055 WlkgCnt visit 5
1056_0_Drnkg
No data found

In [26]:
### GENERATING FEATURES FROM HOME DATA

s_time = time.time()

Records = getTimestampsPaths(timepoints_med, timepoints_symt, timepoints_diar)
DataFeatures = pd.DataFrame()

for record in Records.iterrows():
#     file = record[1]['SaveFilePaths']
    file = '//FS2.smpp.local\\\\RTO\\\\CIS-PD Study\\Patient Record Correlation\\Symptom Reports\\1048\\2017-11\\05\\015645.csv'
    TaskFeatures = DataAggregator2(file, 'home')
    if TaskFeatures.empty:
        continue
    featcols = list(TaskFeatures.columns)
    
    TaskFeatures['SubjID'] = record[1]['SubjID']
    if file[:81] == '//FS2.smpp.local\\\\RTO\\\\CIS-PD Study\\Patient Record Correlation\\Medication Reports':
        TaskFeatures['Report'] = 'Medication Report'
    if file[:78] == '//FS2.smpp.local\\\\RTO\\\\CIS-PD Study\\Patient Record Correlation\\Symptom Reports':
        TaskFeatures['Report'] = 'Symptom Report'
    if file[:70] == '//FS2.smpp.local\\\\RTO\\\\CIS-PD Study\\Patient Record Correlation\\Diaries':
        TaskFeatures['Report'] = 'Diary'
    
    cols = ['SubjID', 'Report'] + featcols
    TaskFeatures = TaskFeatures[cols]
    
    if DataFeatures.empty:
        DataFeatures = TaskFeatures
#         continue
#     DataFeatures = pd.concat([DataFeatures, TaskFeatures], ignore_index = True)
    
    break

DataFeatures.to_csv(os.path.join(dest, 'Home Data', 'HomeDataFeatures.csv'), index = False)
    
print(str(int(((time.time() - s_time) / 60) / 60)) + ' hours ' + 
      str(int(((time.time() - s_time) / 60) % 60)) + ' minutes ' + 
      str(int((time.time() - s_time) % 60)) + ' seconds')

//FS2.smpp.local\\RTO\\CIS-PD Study\Patient Record Correlation\Symptom Reports\1048\2017-11\05\015645.csv
0 hours 0 minutes 20 seconds


In [42]:
siteInfo = pd.read_csv(os.path.join(clinicpath, 'Table1.csv'))
NWsubj = siteInfo.SubjectID[siteInfo.Site == 1332].values

yes
