In [1]:
import pandas
import numpy

df_updrs_iv = pandas.read_csv('../source_data/MDS-UPDRS_Part_IV__Motor_Complications.csv', low_memory=False)
df_patnos = pandas.read_csv('../source_data/non_hc_patnos.csv')
df_sigfall = pandas.read_csv('../source_data/sigfall_main.csv')
non_hc_patnos = list(df_patnos.values.flat)

In [16]:
df_updrs_iv_sigfall_base.head()

Unnamed: 0,PATNO,EVENT_ID,INFODT,NP4WDYSK,NP4WDYSKDEN,NP4WDYSKNUM,NP4WDYSKPCT,NP4DYSKI,NP4OFF,NP4OFFDEN,...,D_NP4OFFDEN,D_NP4OFFNUM,D_NP4OFFPCT,D_NP4FLCTI,D_NP4FLCTX,D_NP4DYSTN,D_NP4DYSTNDEN,D_NP4DYSTNNUM,D_NP4DYSTNPCT,D_NP4TOT
6,3001,V17,09/2021,1.0,2.0,14.0,4.0,0.0,0.0,0.0,...,,,,,,,,,,
7,3001,V18,07/2022,1.0,4.0,16.0,25.0,0.0,0.0,0.0,...,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20,3002,V17,09/2021,0.0,1.0,15.0,7.0,0.0,1.0,1.0,...,,,,,,,,,,
21,3002,V18,03/2022,2.0,5.0,16.0,31.0,0.0,1.0,2.0,...,1.0,1.0,6.0,3.0,1.0,0.0,0.0,1.0,0.0,6.0
28,3003,V18,04/2022,1.0,2.0,18.0,11.0,0.0,1.0,2.0,...,,,,,,,,,,


In [8]:
# --- Create dataframe that combines UPDRSiii data with SIGFALL & PREV_SIGFALL ---

def lookup_value(dframe,row,value):
    patno = row['PATNO']
    infodt = row['INFODT']
    
    s1 = dframe[(dframe.INFODT == infodt) & (dframe.PATNO == patno)][value]
    if s1.empty:    
        return numpy.nan
    else:
        return s1.values[0]
    
df_updrs_iv_sigfall_base = df_updrs_iv[df_updrs_iv['PATNO'].isin(non_hc_patnos)]
df_updrs_iv_sigfall_base = df_updrs_iv_sigfall_base.drop(['REC_ID','PAG_NAME','ORIG_ENTRY', 'LAST_UPDATE'], axis=1)
df_updrs_iv_sigfall_base['SIGFALL'] = df_updrs_iv_sigfall_base.apply (lambda row: lookup_value(df_sigfall,row,'SIGFALL'), axis=1)
df_updrs_iv_sigfall_base['PREV_SIGFALL'] = df_updrs_iv_sigfall_base.apply (lambda row: lookup_value(df_sigfall,row,'PREV_SIGFALL'), axis=1)
df_updrs_iv_sigfall_base = df_updrs_iv_sigfall_base.query('SIGFALL == 0.0 or SIGFALL == 1.0')
df_updrs_iv_sigfall_base = df_updrs_iv_sigfall_base.dropna(axis=0)


In [13]:
updrs_qs = ['NP4WDYSK', 'NP4WDYSKDEN', 'NP4WDYSKNUM',
       'NP4WDYSKPCT', 'NP4DYSKI', 'NP4OFF', 'NP4OFFDEN', 'NP4OFFNUM',
       'NP4OFFPCT', 'NP4FLCTI', 'NP4FLCTX', 'NP4DYSTN', 'NP4DYSTNDEN',
       'NP4DYSTNNUM', 'NP4DYSTNPCT', 'NP4TOT']

for x in updrs_qs:
    df_updrs_iv_sigfall_base[x] = df_updrs_iv_sigfall_base[x].astype(float)
    # print(df_updrs_iv_sigfall_base[x].value_counts())

In [15]:
# Compute Delta values, from last sample event

def get_infodts(dframe,patno):
    return list(dframe[dframe.PATNO == patno]['INFODT'].values)

def date_to_tuple(date):
    year = date[-4:]
    month = date[:2]
    return (month,year,date)

def date_sort(dates):
    tup_list = map(date_to_tuple,dates)
    return sorted(tup_list, key = lambda x: (x[1],x[0]))

def get_index(date,dates):
    output = (False,999)    
    for idx, val in enumerate(dates):
        if val[2] == date:
            output = (True,idx)
    return output

def prev_value(dframe,row,q):
    output = numpy.nan
    patno = row['PATNO']
    infodt = row['INFODT']    
    dates = get_infodts(dframe,patno)
    sorted_dates = date_sort(dates)
    i = get_index(infodt,sorted_dates)
    
    if i[0] == True:
        try:
            prev_date_index = i[1] - 1
            if prev_date_index > -1:
                prev_date = sorted_dates[prev_date_index][2]
                output = dframe[(dframe.INFODT == prev_date ) & (dframe.PATNO == patno)][q].values[0]
        except:
            pass
    return output

def q_delta_actual(dframe,row,q):
    output = numpy.nan
    patno = row['PATNO']
    infodt = row['INFODT']
    
    current = dframe[(dframe.INFODT == infodt ) & (dframe.PATNO == patno)][q].values[0]
    prev = prev_value(dframe,row,q)
    
    try:
        output = current - prev
    except:
        pass
    return output

dq_pairs = [(x,('D_'+x)) for x in updrs_qs]


for x in dq_pairs:
    df_updrs_iv_sigfall_base[x[1]] = df_updrs_iv_sigfall_base.apply (lambda row: q_delta_actual(df_updrs_iv_sigfall_base,row,x[0]), axis=1)


In [18]:
df_updrs_iv_sigfall_base.columns

Index(['PATNO', 'EVENT_ID', 'INFODT', 'NP4WDYSK', 'NP4WDYSKDEN', 'NP4WDYSKNUM',
       'NP4WDYSKPCT', 'NP4DYSKI', 'NP4OFF', 'NP4OFFDEN', 'NP4OFFNUM',
       'NP4OFFPCT', 'NP4FLCTI', 'NP4FLCTX', 'NP4DYSTN', 'NP4DYSTNDEN',
       'NP4DYSTNNUM', 'NP4DYSTNPCT', 'NP4TOT', 'SIGFALL', 'PREV_SIGFALL',
       'D_NP4WDYSK', 'D_NP4WDYSKDEN', 'D_NP4WDYSKNUM', 'D_NP4WDYSKPCT',
       'D_NP4DYSKI', 'D_NP4OFF', 'D_NP4OFFDEN', 'D_NP4OFFNUM', 'D_NP4OFFPCT',
       'D_NP4FLCTI', 'D_NP4FLCTX', 'D_NP4DYSTN', 'D_NP4DYSTNDEN',
       'D_NP4DYSTNNUM', 'D_NP4DYSTNPCT', 'D_NP4TOT'],
      dtype='object')

In [19]:
df_qs = df_updrs_iv_sigfall_base[['D_NP4WDYSK', 'D_NP4WDYSKDEN', 'D_NP4WDYSKNUM', 'D_NP4WDYSKPCT',
       'D_NP4DYSKI', 'D_NP4OFF', 'D_NP4OFFDEN', 'D_NP4OFFNUM', 'D_NP4OFFPCT',
       'D_NP4FLCTI', 'D_NP4FLCTX', 'D_NP4DYSTN', 'D_NP4DYSTNDEN',
       'D_NP4DYSTNNUM', 'D_NP4DYSTNPCT', 'D_NP4TOT','PREV_SIGFALL','SIGFALL']]
df_qs = df_qs.dropna()
df_qs.to_csv('../working_data/updrsiv_all_q_p_s.csv', index=False)

df_q = df_qs.drop('PREV_SIGFALL', axis=1)
df_q.to_csv('../working_data/updrsiv_all_q_s.csv', index=False)

In [21]:
# Extract used PATNOS for population analysis
df_pop = df_updrs_iv_sigfall_base
df_pop = df_pop.dropna()
df_pop = df_pop[['PATNO', 'EVENT_ID', 'INFODT']]
df_pop.to_csv('../source_data/u_iv_records.csv')