# Assemble data for CIS-PD Curation

1. Create table/file (tasktimestamp.csv) with subjid, visit, task, start timestamp, stop timestamp
2. Create table/file for sensor info
3. Create table/file for sensor serial number info for each subject

Project status:
- Complete: Table 1, 2, 3
- In progress: Create summary table

# Import packages

In [2]:
# import packages
import os
import pandas as pd
import numpy as np
import re
import pathlib

# Table 1 - task and timestamp

In [316]:
path = r'//FS2.smpp.local\RTO\CIS-PD Study\MJFF Curation\TaskAcc'
# filename = r'//FS2.smpp.local\RTO\CIS-PD MUSC\codelist.sas7bdat'
# data124 = pd.read_hdf(os.path.join(path,'updrs_124.h5'))

## Skip: Functions - if time

In [41]:
def extract_data(path):
    """Open each csv file, extract metadata from filename and first and last timestamps."""
#     locations = [locs for locs in os.listdir(path) if os.path.isdir(os.path.join(path, locs))]
    list = []

    for root, dirs, files in os.walk(path, topdown=True):
        for filenames in files:
            if filenames.endswith('.csv'):
                p = pathlib.Path(os.path.join(root, filenames))
                name = str(p.relative_to(path)).split("\\")[0]
    #             series = series.append(name)
                list.append(name)

    df = pd.DataFrame(list,columns=['subjid'])


## Get all filenames

In [317]:
# df = pd.DataFrame(columns = 'subjid')
# series = pd.Series()
list = []

for root, dirs, files in os.walk(path, topdown=True):
    for filenames in files:
        if filenames.endswith('.csv'):
            p = pathlib.Path(os.path.join(root, filenames))
            name = str(p.relative_to(path)).split("\\")[0]
#             series = series.append(name)
            list.append(name)

# check session name
# colnames = ['subjid', 'session', 'activity']
# # list
# df = pd.DataFrame(np.column_stack([list, list, list]),columns=colnames)
df = pd.DataFrame({'filename':list})

# use list (has file names), open file and extract start and stop timestamps
# make 2 columns and add to df

In [318]:
df.head(3)

Unnamed: 0,filename
0,1003_0_Drnkg.csv
1,1003_0_Drwg.csv
2,1003_0_Fldg.csv


In [319]:
df.filename[0]

'1003_0_Drnkg.csv'

In [320]:
df.filename[0:2][0:4]

0    1003_0_Drnkg.csv
1     1003_0_Drwg.csv
Name: filename, dtype: object

## Split filename into separate columns

In [321]:
temp = pd.DataFrame(df.filename.str.replace('\.csv',''))

In [322]:
temp.filename = temp.filename.str.split('_')

In [323]:
id_list = []
time_list = []
task_list = []
for j,k,l in temp.filename:
    id_list.append(j)
    time_list.append(k)
    task_list.append(l)

In [324]:
subjid = np.asarray(id_list)
time = np.asarray(time_list)
task = np.asarray(task_list)
df['subjid'] = subjid
df['time'] = time
df['task'] = task

## Extract start/stop timestamps

In [326]:
# empty lists
start = []
stop = []

# extract start and stop timeframes in each file (which is each task)
for i in df.filename:
    filename = i
    tempfile = pd.read_csv(os.path.join(path,filename))
    start.append(tempfile.timestamp.iloc[0])
    stop.append(tempfile.timestamp.iloc[-1])  

In [327]:
# add columns to df
start = np.asarray(start)
stop = np.asarray(stop)
df['start timestamp'] = start
df['stop timestamp'] = stop

## Change time to full name

In [329]:
oldname = ['0','1','2','3','4','5','6']
newname = ['2 Weeks: Time 0', 
           '2 Weeks: Time 30', 
           '2 Weeks: Time 60', 
           '2 Weeks: Time 90', 
           '2 Weeks: Time 120', 
           '2 Weeks: Time 150', 
           '1 Month']
namechange = dict(zip(oldname, newname))
df = df.replace({'time':namechange})

## Convert task short name to full name

In [330]:
key = ['Shaking', 'Stndg', 'Wlkg', 'WlkgCnt', 'FtnR', 'FtnL', 'RamR', 'RamL', 'SitStand', 
       'Drwg', 'Typg', 'NtsBts', 'Drnkg', 'Sheets', 'Fldg', 'Sitng']
value = ['Shaking','Standing','Walking','Walking while counting',
         'Finger to nose--right hand', 'Finger to nose--left hand',
         'Alternating right hand movements', 'Alternating left hand movements',
         'Sit to stand','Drawing on a paper',
         'Typing on a computer keyboard','Assembling nuts and bolts',
         'Taking a glass of water and drinking','Organizing sheets in a folder',
         'Folding towels','Sitting']
name_dict = dict(zip(key,value))
df.task = df.task.map(name_dict)

## Delete filename column

In [331]:
del df['filename']

## Change colomn names

In [332]:
df.columns = ['SubjID', 'Visit', 'Task', 'Start Timestamp (UTC)', 'Stop Timestamp (UTC)']

In [333]:
df.head(1)

Unnamed: 0,SubjID,Visit,Task,Start Timestamp (UTC),Stop Timestamp (UTC)
0,1003,2 Weeks: Time 0,Taking a glass of water and drinking,2017-07-03 15:52:57.090,2017-07-03 15:53:32.788


## Save as csv file

In [338]:
savepath = r'//FS2.smpp.local\RTO\CIS-PD Study\MJFF Curation'
finalname = os.path.join(savepath,'task_timestamp.csv')
with open(finalname,'wb') as f:
    df.to_csv(finalname, sep=',')

# Notes for Table 2 and 3
Table 1 - sensor info
- Sensor locations - 10
- Sensor types (at each location) - ECG, Gyro, Accel
- sampling frequency (for each sensor type)
- ranges?
- Description of landmarks for sensor placement

Table 2
- Sensor location and serial # for each participant

# Table 2 - sensor info

Notes

MC10 sensor locations
Medial Chest
Right Arm
Left Arm
Right Thigh
Left Thigh
Right Shank
Left Shank
Back
Right Hand
Left Hand
array(['anterior_thigh_left', 'anterior_thigh_right',
       'distal_lateral_shank_left', 'distal_lateral_shank_right',
       'dorsal_hand_left', 'dorsal_hand_right', 'flexor_digitorum_left',
       'flexor_digitorum_right', 'medial_chest', 'sacrum'], dtype=object)

Description of landmarks
Medial Chest - Halfway between base of throat and bottom of sternum (xiphoid process)
- Readable Left to Right
Right Arm (Wrist Flexors)
- Thick end proximal
- Patient: Flex Wrist
- On top of Wrist Flexors
Left Arm (Wrist Flexors)
Right Thigh (proximal of Epicondyles)
- Thick end proximal
- Patient: Stand Up
- Proximal of Femur Epicondyles
Left Thigh (proximal of Epicondyles)
Right Shank (along fibula, proximal of Lateral Malleolus)
- Thick end proximal
- Patient: Sit Down
- Proximal of Fibular
- Lateral Malleolus
Left Shank (along fibula, proximal of Lateral Malleolus)
Back - (slightly superior of Posterior Superior Iliac Spine (PSIS))
- Readable Left to Right
- Superior of Posterior Superior Iliac Spine (PSIS)
Right Hand - thick end away from thumb
- Thick end medial
- Pointing away from thumb
- Parallel to wrist joint
Left Hand - thick end away from thumb


Refer to PD Study Sensor Guide for info
Path: CIS-PD Study/PD Sensor Guides

Sampling frequency for each sensor location

Medial Chest
- ECG 1 kHz
- Accel 31.25 Hz
Right Arm (Wrist Flexors)
- ECG 1 kHz
- Accel 31.25 Hz
Left Arm (Wrist Flexors)
- ECG 1 kHz
- Accel 31.25 Hz
Right Thigh (proximal of Epicondyles)
- Gyro 62.5 Hz
- Accel 62.5 Hz
Left Thigh (proximal of Epicondyles)
- Gyro 62.5 Hz
- Accel 62.5 Hz
Right Shank (along fibula, proximal of Lateral Malleolus)
- Gyro 62.5 Hz
- Accel 62.5 Hz
Left Shank (along fibula, proximal of Lateral Malleolus)
- Gyro 62.5 Hz
- Accel 62.5 Hz
Back - (slightly superior of Posterior Superior Iliac Spine (PSIS))
- Gyro 62.5 Hz
- Accel 62.5 Hz
Right Hand - thick end away from thumb
- Gyro 62.5 Hz
- Accel 62.5 Hz
Left Hand - thick end away from thumb
- Gyro 62.5 Hz
- Accel 62.5 Hz

In [311]:
# sensorlocs = ['Medial Chest','Medial Chest',
#                 'Right Arm','Right Arm',
#                 'Left Arm','Left Arm',
#                 'Right Thigh','Right Thigh',
#                 'Left Thigh','Left Thigh',
#                 'Right Shank','Right Shank',
#                 'Left Shank','Left Shank',
#                 'Back','Back',
#                 'Right Hand','Right Hand',
#                 'Left Hand','Left Hand']

# Use this list to stay consistent
# sensor name
sensorlocs = ['medial_chest', 'medial_chest',
              'flexor_digitorum_right','flexor_digitorum_right',
              'flexor_digitorum_left','flexor_digitorum_left',
              'anterior_thigh_right','anterior_thigh_right',
              'anterior_thigh_left', 'anterior_thigh_left',
              'distal_lateral_shank_right','distal_lateral_shank_right',
              'distal_lateral_shank_left','distal_lateral_shank_left',
              'sacrum','sacrum',
              'dorsal_hand_right', 'dorsal_hand_right',
              'dorsal_hand_left', 'dorsal_hand_left']

# sensor type
type1 = ['ECG', 'Accel']*3
type2 = ['Gyro', 'Accel']*7
sensortype = type1 + type2

# sensor frequency
freq1 = [1000, 31.25]*3
freq2 = [62.5]*2*7
freq = freq1 + freq2

# landmarks
# medial_chest - Halfway between base of throat and bottom of sternum (xiphoid process)
landmark = ['Halfway between base of throat and bottom of sternum (xiphoid process)',
            'Halfway between base of throat and bottom of sternum (xiphoid process)',
# flexor_digitorum_right (Wrist Flexors)
            'On top of Wrist Flexors',
            'On top of Wrist Flexors',
# flexor_digitorum_left(Wrist Flexors)
            'On top of Wrist Flexors',
            'On top of Wrist Flexors',
# anterior_thigh_right (proximal of Epicondyles)
            'Proximal of Femur Epicondyles',
            'Proximal of Femur Epicondyles',
# anterior_thigh_left (proximal of Epicondyles)
            'Proximal of Femur Epicondyles',
            'Proximal of Femur Epicondyles',
# distal_lateral_shank_right (along fibula, proximal of Lateral Malleolus)
            'Proximal of Fibular Lateral Malleolus',
            'Proximal of Fibular Lateral Malleolus',
# distal_lateral_shank_left (along fibula, proximal of Lateral Malleolus)
            'Proximal of Fibular Lateral Malleolus',
            'Proximal of Fibular Lateral Malleolus',
# sacrum - (slightly superior of Posterior Superior Iliac Spine (PSIS))
            'Superior of Posterior Superior Iliac Spine (PSIS)',
            'Superior of Posterior Superior Iliac Spine (PSIS)',
# dorsal_hand_right - thick end away from thumb
            'Pointing away from thumb, Parallel to wrist joint',
            'Pointing away from thumb, Parallel to wrist joint',
# dorsal_hand_left - thick end away from thumb
            'Pointing away from thumb, Parallel to wrist joint',
            'Pointing away from thumb, Parallel to wrist joint']

# name of columns
colnames = ['Sensor Location','Sensor Type','Frequency (Hz)','Description of Landmarks']


In [313]:
# sensordf = pd.DataFrame(templist, columns=colnames)
sensordf = pd.DataFrame({'Sensor Location':sensorlocs,
                         'Sensor Type':sensortype,
                         'Frequency (Hz)':freq,
                         'Description of Landmarks':landmark})

In [314]:
savepath = r'//FS2.smpp.local\RTO\CIS-PD Study\MJFF Curation'
finalname = os.path.join(savepath,'sensor_info.csv')
with open(finalname,'wb') as f:
    sensordf.to_csv(finalname, sep=',')

In [None]:
# check output
sensordf

# Table 3 - sensor location, sensor serial #
- save the date info (remove later if not needed)

In [241]:
# walk path down and grab subjid, sensor location, and serial #
table3path = r'//FS2.smpp.local\RTO\CIS-PD Study\Subjects'

list = []

for root, dirs, files in os.walk(table3path, topdown=True):
    for d in dirs:
        p = pathlib.Path(os.path.join(root, d))
        list.append(str(p))


In [276]:
# split list such that each element is a subfolder
splitlist = [l.split("\\") for l in list]

In [279]:
templist = []
for i, j in enumerate(splitlist):
    if len(j)==4:
        templist.append(j)

In [285]:
colnames = ['SubjID','Sensor Location','Serial Number','Timestamp (UTC)']
df = pd.DataFrame(templist, columns=colnames)

In [None]:
# check df
df

In [288]:
##############
# optional
##############
# replace _ with ' '
df['Sensor Location'][0].replace('_', ' ')

'anterior thigh left'

In [289]:
savepath = r'//FS2.smpp.local\RTO\CIS-PD Study\MJFF Curation'
finalname = os.path.join(savepath,'sensor_serialnum.csv')
with open(finalname,'wb') as f:
    df.to_csv(finalname, sep=',')