# CIS-PD Form Data

Use datadictionary and codelist to convert the scores on each form. Save each file in a pickle format.

In [1]:
# import packages
import os
import platform # don't need
import pandas as pd
import re
import h5py #save hdf5 but pandas has a function...
import numpy as np

  from ._conv import register_converters as _register_converters


## Clean up data dictionary and codelist

In [2]:
dd = pd.read_sas('Y:\CIS-PD MUSC\datadictionary.sas7bdat')
dd.columns = dd.columns.str.replace('z', '')
del dd['CRFNb']
del dd['FieldNm']
del dd['SASNm']
del dd['SASLabel']
del dd['FieldTypeNm']
del dd['Length']
del dd['Unit']
del dd['Required']
dd = dd.dropna()
dd = dd.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)

In [3]:
codelist = pd.read_sas('Y:\CIS-PD MUSC\codelist.sas7bdat')
codelist.columns = codelist.columns.str.replace('z', '')
codelist = codelist.rename(columns={'GroupID': 'CodeGroup'})
codelist = codelist.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)

In [4]:
# Merge data dictionary and codelist
result = dd.merge(codelist, left_on='CodeGroup', right_on='CodeGroup', how='left')
result.FieldNb = result.FieldNb.str.replace('z', '')

In [2]:
path = r'Y:\CIS-PD MUSC'
path2 = r'Y:\CIS-PD MUSC\decoded_forms'

## Change code to value function

In [6]:
def CodetoValue(FormNb, FormSeries, QNm):
    resultFilt = result[(result.FormID == FormNb) & (result.FieldNb == QNm)]
    return np.vstack([resultFilt[resultFilt['ItemNb']==INb]['ItemNm'].values if INb>=0.0 else INb for INb in FormSeries])

In [7]:
formlist = list(['form101','form126','form238','form239','form240','form241','form242','form243',
               'form501','form502','form503','form504','form505','form506','form508','form509',
               'form510','form515','form518'])

In [None]:
for f in formlist:
    temporiginal = pd.read_sas(os.path.join(path,f + '.sas7bdat'))
    f2 = f.replace('orm','')
    f2 = f2.capitalize()
    temporiginal.columns = temporiginal.columns.str.replace('z', '')
    temporiginal.columns = temporiginal.columns.str.replace(f2, '')
    temp = temporiginal.copy()
    temp = temp.filter(regex='Q|DataCollected')
    temp = temp.select_dtypes(include=['float64'])
    colnames = list(temp)
    form = f
    form = form.replace('form', '')
    form = np.float64(form)
    
    for c in colnames:
        if sum((result.FormID == form) & (result.FieldNb == c))==0:  continue
        temp[c] = CodetoValue(form, temp[c],c)
    
    temporiginal.update(temp)
    temporiginal = temporiginal.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
    
    # save file
    filename = os.path.join(path2,f + '.h5')
    temporiginal.to_hdf(filename,'data')

In [None]:
result[(result.FormID==238.0) & (result.FieldNb=='Q02')]

## Check form509 for motor assessment
### Check times for watch

In [13]:
filename2 = os.path.join(path2,'form509' + '.h5')

In [15]:
form509 = pd.read_hdf(filename2,'data')

In [33]:
form509.head(5)

Unnamed: 0,SubjectCode,SiteID,VisitNm,FormDate,Q10,Q11,Q12,Q13,Q14,Q18,...,Q100,Q101,Q110,Q111,Q120,Q121,Q130,Q131,Q140,Q141
0,1003.0,1313.0,2 Weeks: Time 0,2017-07-03,2.0,0.0,0.0,1.0,0.0,3.0,...,00:15:50,00:16:20,00:17:42,00:18:08,00:19:34,00:20:28,00:21:30,00:22:15,00:22:45,00:23:15
1,1003.0,1313.0,2 Weeks: Time 30,2017-07-03,1.0,0.0,0.0,0.0,0.0,1.0,...,00:15:10,00:15:40,00:16:45,00:17:08,00:17:39,00:18:22,00:18:54,00:19:23,00:19:44,00:20:14
2,1003.0,1313.0,2 Weeks: Time 60,2017-07-03,1.0,2.0,0.0,0.0,0.0,0.0,...,00:14:40,00:15:10,00:16:16,00:16:40,00:17:23,00:18:08,00:18:16,00:18:46,00:19:07,00:19:37
3,1003.0,1313.0,2 Weeks: Time 90,2017-07-03,0.0,0.0,0.0,0.0,0.0,0.0,...,00:20:36,00:21:06,00:21:50,00:22:12,00:23:02,00:23:42,00:23:59,00:24:29,00:23:40,00:24:10
4,1003.0,1313.0,2 Weeks: Time 120,2017-07-03,1.0,0.0,0.0,1.0,0.0,0.0,...,00:10:00,00:10:30,00:10:50,00:11:14,00:12:18,00:12:45,00:13:20,00:14:00,00:14:15,00:14:45


In [37]:
form509[['Q04','Q146_UTC','Q05','Q06','Q07','Q08','Q09','Q10','Q16','Q17']].head(5)

Unnamed: 0,Q04,Q146_UTC,Q05,Q06,Q07,Q08,Q09,Q10,Q16,Q17
0,Yes,2017-07-03 15:35:00,00:00:18,00:00:45,Yes,00:01:50,00:02:20,2.0,00:02:55,00:03:25
1,Yes,2017-07-03 16:49:00,00:00:44,00:00:48,Yes,00:02:25,00:02:55,1.0,00:03:20,00:03:50
2,Yes,2017-07-03 17:09:00,00:00:32,00:00:42,Yes,00:01:10,00:01:40,1.0,00:02:07,00:03:37
3,Yes,2017-07-03 17:38:00,00:00:18,00:00:29,Yes,00:01:03,00:01:33,0.0,00:02:00,00:02:24
4,Yes,2017-07-03 18:14:00,00:00:34,00:00:40,Yes,00:01:12,00:01:42,1.0,00:02:12,00:02:42


In [None]:
list(form509)

## Create file with 4 digit SubjectCode and video ID

### ...need to merge 6 digit SubjectID next (waiting for Nick)

In [36]:
# FoxInsightID is the video ID
subjenroll = pd.read_sas('Y:\CIS-PD MUSC\subjenrollment.sas7bdat')
subjenroll.columns = subjenroll.columns.str.replace('z', '')
subjenroll = subjenroll[['SubjectCode','FoxInsightID']]
subjenroll = subjenroll.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
subjenroll.head(10)

filename = os.path.join(path2, 'videoID' + '.csv')
subjenroll.to_csv(filename, sep=',')

## Check form101 for 6 digit subject ID and 4 digit and save to csv file

In [None]:
# check form101
filename3 = os.path.join(path2,'form101' + '.h5')
form101 = pd.read_hdf(filename3,'data')
form101.head(5)

In [None]:
# check form126
filename3 = os.path.join(path2,'form126' + '.h5')
form126 = pd.read_hdf(filename3,'data')
form126.head(5)

In [None]:
filename = os.path.join(path2, 'subjID' + '.csv')
    subj.to_csv(filename,'data')