In [89]:
import pandas
import numpy

df_updrs_iii = pandas.read_csv('../source_data/MDS_UPDRS_Part_III.csv', low_memory=False)
df_patnos = pandas.read_csv('../source_data/non_hc_patnos.csv')
df_sigfall = pandas.read_csv('../source_data/sigfall_main.csv')
non_hc_patnos = list(df_patnos.values.flat)

In [90]:
df_updrs_iii = df_updrs_iii.query('PDSTATE != "OFF" ')
df_updrs_iii.shape

(15357, 55)

In [91]:
# --- Create dataframe that combines UPDRSiii data with SIGFALL & PREV_SIGFALL ---

def lookup_value(dframe,row,value):
    patno = row['PATNO']
    infodt = row['INFODT']
    
    s1 = dframe[(dframe.INFODT == infodt) & (dframe.PATNO == patno)][value]
    if s1.empty:    
        return numpy.nan
    else:
        return s1.values[0]
    
df_updrs_iii_sigfall_base = df_updrs_iii[df_updrs_iii['PATNO'].isin(non_hc_patnos)]
df_updrs_iii_sigfall_base = df_updrs_iii_sigfall_base.drop(['REC_ID','PAG_NAME', 'PDMEDDT',
       'PDMEDTM', 'PDSTATE', 'EXAMTM', 'DBS_STATUS','DYSKPRES', 'DYSKIRAT', 'PDTRTMNT','DBSONTM', 'DBSOFFTM','HRPOSTMED', 'HRDBSOFF', 'HRDBSON', 'ORIG_ENTRY',
       'LAST_UPDATE'], axis=1)
df_updrs_iii_sigfall_base['SIGFALL'] = df_updrs_iii_sigfall_base.apply (lambda row: lookup_value(df_sigfall,row,'SIGFALL'), axis=1)
df_updrs_iii_sigfall_base['PREV_SIGFALL'] = df_updrs_iii_sigfall_base.apply (lambda row: lookup_value(df_sigfall,row,'PREV_SIGFALL'), axis=1)
df_updrs_iii_sigfall_base = df_updrs_iii_sigfall_base.query('SIGFALL == 0.0 or SIGFALL == 1.0')
df_updrs_iii_sigfall_base = df_updrs_iii_sigfall_base.dropna(axis=0)


In [92]:
# convert columns to floats

updrs_qs = ['NP3SPCH', 'NP3FACXP',
       'NP3RIGN', 'NP3RIGRU', 'NP3RIGLU', 'NP3RIGRL', 'NP3RIGLL', 'NP3FTAPR',
       'NP3FTAPL', 'NP3HMOVR', 'NP3HMOVL', 'NP3PRSPR', 'NP3PRSPL', 'NP3TTAPR',
       'NP3TTAPL', 'NP3LGAGR', 'NP3LGAGL', 'NP3RISNG', 'NP3GAIT', 'NP3FRZGT',
       'NP3PSTBL', 'NP3POSTR', 'NP3BRADY', 'NP3PTRMR', 'NP3PTRML', 'NP3KTRMR',
       'NP3KTRML', 'NP3RTARU', 'NP3RTALU', 'NP3RTARL', 'NP3RTALL', 'NP3RTALJ',
       'NP3RTCON', 'NP3TOT', 'NHY']

for x in updrs_qs:
    df_updrs_iii_sigfall_base[x] = df_updrs_iii_sigfall_base[x].astype(float)
    print(df_updrs_iii_sigfall_base[x].value_counts())

0.0    647
1.0    352
2.0     87
3.0     11
4.0      4
Name: NP3SPCH, dtype: int64
0.0    432
1.0    411
2.0    219
3.0     39
Name: NP3FACXP, dtype: int64
0.0    638
1.0    255
2.0    165
3.0     41
4.0      2
Name: NP3RIGN, dtype: int64
0.0    550
1.0    304
2.0    221
3.0     25
4.0      1
Name: NP3RIGRU, dtype: int64
0.0    611
1.0    282
2.0    186
3.0     22
Name: NP3RIGLU, dtype: int64
0.0    712
1.0    222
2.0    137
3.0     29
4.0      1
Name: NP3RIGRL, dtype: int64
0.0    731
1.0    194
2.0    146
3.0     30
Name: NP3RIGLL, dtype: int64
0.0    496
1.0    358
2.0    191
3.0     53
4.0      3
Name: NP3FTAPR, dtype: int64
0.0    473
1.0    356
2.0    207
3.0     64
4.0      1
Name: NP3FTAPL, dtype: int64
0.0    578
1.0    334
2.0    160
3.0     29
Name: NP3HMOVR, dtype: int64
0.0    527
1.0    346
2.0    191
3.0     37
Name: NP3HMOVL, dtype: int64
0.0    621
1.0    311
2.0    155
3.0     14
Name: NP3PRSPR, dtype: int64
0.0    559
1.0    361
2.0    151
3.0     30
Name: NP3PRSPL, 

In [93]:
# Compute Delta values, from last sample event

def get_infodts(dframe,patno):
    return list(dframe[dframe.PATNO == patno]['INFODT'].values)

def date_to_tuple(date):
    year = date[-4:]
    month = date[:2]
    return (month,year,date)

def date_sort(dates):
    tup_list = map(date_to_tuple,dates)
    return sorted(tup_list, key = lambda x: (x[1],x[0]))

def get_index(date,dates):
    output = (False,999)    
    for idx, val in enumerate(dates):
        if val[2] == date:
            output = (True,idx)
    return output

def prev_value(dframe,row,q):
    output = numpy.nan
    patno = row['PATNO']
    infodt = row['INFODT']    
    dates = get_infodts(dframe,patno)
    sorted_dates = date_sort(dates)
    i = get_index(infodt,sorted_dates)
    
    if i[0] == True:
        try:
            prev_date_index = i[1] - 1
            if prev_date_index > -1:
                prev_date = sorted_dates[prev_date_index][2]
                output = dframe[(dframe.INFODT == prev_date ) & (dframe.PATNO == patno)][q].values[0]
        except:
            pass
    return output

def q_delta_actual(dframe,row,q):
    output = numpy.nan
    patno = row['PATNO']
    infodt = row['INFODT']
    
    current = dframe[(dframe.INFODT == infodt ) & (dframe.PATNO == patno)][q].values[0]
    prev = prev_value(dframe,row,q)
    
    try:
        output = current - prev
    except:
        pass
    return output

updrs_qs = ['NP3SPCH', 'NP3FACXP',
       'NP3RIGN', 'NP3RIGRU', 'NP3RIGLU', 'NP3RIGRL', 'NP3RIGLL', 'NP3FTAPR',
       'NP3FTAPL', 'NP3HMOVR', 'NP3HMOVL', 'NP3PRSPR', 'NP3PRSPL', 'NP3TTAPR',
       'NP3TTAPL', 'NP3LGAGR', 'NP3LGAGL', 'NP3RISNG', 'NP3GAIT', 'NP3FRZGT',
       'NP3PSTBL', 'NP3POSTR', 'NP3BRADY', 'NP3PTRMR', 'NP3PTRML', 'NP3KTRMR',
       'NP3KTRML', 'NP3RTARU', 'NP3RTALU', 'NP3RTARL', 'NP3RTALL', 'NP3RTALJ',
       'NP3RTCON', 'NP3TOT', 'NHY']
dq_pairs = [(x,('D_'+x)) for x in updrs_qs]


for x in dq_pairs:
    df_updrs_iii_sigfall_base[x[1]] = df_updrs_iii_sigfall_base.apply (lambda row: q_delta_actual(df_updrs_iii_sigfall_base,row,x[0]), axis=1)


In [84]:
df_updrs_iii_sigfall_base.columns

Index(['PATNO', 'EVENT_ID', 'INFODT', 'NP3SPCH', 'NP3FACXP', 'NP3RIGN',
       'NP3RIGRU', 'NP3RIGLU', 'NP3RIGRL', 'NP3RIGLL', 'NP3FTAPR', 'NP3FTAPL',
       'NP3HMOVR', 'NP3HMOVL', 'NP3PRSPR', 'NP3PRSPL', 'NP3TTAPR', 'NP3TTAPL',
       'NP3LGAGR', 'NP3LGAGL', 'NP3RISNG', 'NP3GAIT', 'NP3FRZGT', 'NP3PSTBL',
       'NP3POSTR', 'NP3BRADY', 'NP3PTRMR', 'NP3PTRML', 'NP3KTRMR', 'NP3KTRML',
       'NP3RTARU', 'NP3RTALU', 'NP3RTARL', 'NP3RTALL', 'NP3RTALJ', 'NP3RTCON',
       'NP3TOT', 'NHY', 'SIGFALL', 'PREV_SIGFALL', 'D_NP3SPCH', 'D_NP3FACXP',
       'D_NP3RIGN', 'D_NP3RIGRU', 'D_NP3RIGLU', 'D_NP3RIGRL', 'D_NP3RIGLL',
       'D_NP3FTAPR', 'D_NP3FTAPL', 'D_NP3HMOVR', 'D_NP3HMOVL', 'D_NP3PRSPR',
       'D_NP3PRSPL', 'D_NP3TTAPR', 'D_NP3TTAPL', 'D_NP3LGAGR', 'D_NP3LGAGL',
       'D_NP3RISNG', 'D_NP3GAIT', 'D_NP3FRZGT', 'D_NP3PSTBL', 'D_NP3POSTR',
       'D_NP3BRADY', 'D_NP3PTRMR', 'D_NP3PTRML', 'D_NP3KTRMR', 'D_NP3KTRML',
       'D_NP3RTARU', 'D_NP3RTALU', 'D_NP3RTARL', 'D_NP3RTALL', 'D_NP3RTAL

In [94]:
df_qs = df_updrs_iii_sigfall_base[['D_NP3SPCH', 'D_NP3FACXP',
       'D_NP3RIGN', 'D_NP3RIGRU', 'D_NP3RIGLU', 'D_NP3RIGRL', 'D_NP3RIGLL',
       'D_NP3FTAPR', 'D_NP3FTAPL', 'D_NP3HMOVR', 'D_NP3HMOVL', 'D_NP3PRSPR',
       'D_NP3PRSPL', 'D_NP3TTAPR', 'D_NP3TTAPL', 'D_NP3LGAGR', 'D_NP3LGAGL',
       'D_NP3RISNG', 'D_NP3GAIT', 'D_NP3FRZGT', 'D_NP3PSTBL', 'D_NP3POSTR',
       'D_NP3BRADY', 'D_NP3PTRMR', 'D_NP3PTRML', 'D_NP3KTRMR', 'D_NP3KTRML',
       'D_NP3RTARU', 'D_NP3RTALU', 'D_NP3RTARL', 'D_NP3RTALL', 'D_NP3RTALJ',
       'D_NP3RTCON', 'D_NP3TOT', 'D_NHY','PREV_SIGFALL','SIGFALL']]
df_qs = df_qs.dropna()
df_qs.to_csv('../working_data/updrsiii_all_qs.csv', index=False)

# print(df_qs.query('SIGFALL == 1.0').describe())
# print(df_qs.query('SIGFALL == 0.0').describe())