In [9]:
import pandas
import numpy

df_updrs_i = pandas.read_csv('../source_data/MDS-UPDRS_Part_I.csv')
df_updrs_i_p = pandas.read_csv('../source_data/MDS-UPDRS_Part_I_Patient_Questionnaire.csv')
df_patnos = pandas.read_csv('../source_data/non_hc_patnos.csv')
df_sigfall = pandas.read_csv('../source_data/sigfall_main.csv')
non_hc_patnos = list(df_patnos.values.flat)

df_sigfall.head()

Unnamed: 0,PATNO,EVENT_ID,INFODT,AGE_AT_EVENT,PREV_SIGFALL,SIGFALL
0,3001,V15,03/2019,73.2,0,0.0
1,3001,V17,09/2021,75.7,0,1.0
2,3001,V18,07/2022,76.5,1,0.0
3,3002,V15,03/2019,75.6,0,0.0
4,3002,V17,09/2021,78.1,0,0.0


In [10]:
# Combine the relevant data from the 2 spreadsheets into a single dataframe

def lookup_value(dframe,row,value):
    patno = row['PATNO']
    infodt = row['INFODT']
    
    s1 = dframe[(dframe.INFODT == infodt) & (dframe.PATNO == patno)][value]
    if s1.empty:    
        return numpy.nan
    else:
        return s1.values[0]
    
df_updrs_i_sigfall_base = df_updrs_i[df_updrs_i['PATNO'].isin(non_hc_patnos)]
df_updrs_i_sigfall_base = df_updrs_i_sigfall_base.drop(['REC_ID','PAG_NAME','NUPSOURC','ORIG_ENTRY','LAST_UPDATE'], axis=1)

p_questions = ['NP1SLPN', 'NP1SLPD', 'NP1PAIN', 'NP1URIN', 'NP1CNST', 'NP1LTHD', 'NP1FATG', 'NP1PTOT']

for p in p_questions:
    df_updrs_i_sigfall_base[p] = df_updrs_i_sigfall_base.apply (lambda row: lookup_value(df_updrs_i_p,row,p), axis=1)



In [11]:
df_updrs_i_sigfall_base.columns

Index(['PATNO', 'EVENT_ID', 'INFODT', 'NP1COG', 'NP1HALL', 'NP1DPRS',
       'NP1ANXS', 'NP1APAT', 'NP1DDS', 'NP1RTOT', 'NP1SLPN', 'NP1SLPD',
       'NP1PAIN', 'NP1URIN', 'NP1CNST', 'NP1LTHD', 'NP1FATG', 'NP1PTOT'],
      dtype='object')

In [12]:
# Create new total
df_updrs_i_sigfall_base['NP1TOT'] = df_updrs_i_sigfall_base['NP1RTOT'] + df_updrs_i_sigfall_base['NP1PTOT']
df_updrs_i_sigfall_base = df_updrs_i_sigfall_base.drop(['NP1RTOT','NP1PTOT'], axis=1)
df_updrs_i_sigfall_base.head()


Unnamed: 0,PATNO,EVENT_ID,INFODT,NP1COG,NP1HALL,NP1DPRS,NP1ANXS,NP1APAT,NP1DDS,NP1SLPN,NP1SLPD,NP1PAIN,NP1URIN,NP1CNST,NP1LTHD,NP1FATG,NP1TOT
9,3001,BL,03/2011,0,0,0,0,0,0,1.0,2.0,0.0,4.0,0.0,0.0,1.0,8.0
10,3001,R17,11/2021,2,1,2,2,2,0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,20.0
11,3001,SC,02/2011,0,0,0,1,0,0,0.0,1.0,0.0,4.0,0.0,0.0,1.0,7.0
12,3001,V01,05/2011,0,0,0,0,0,0,1.0,2.0,0.0,3.0,0.0,0.0,1.0,7.0
13,3001,V02,08/2011,0,0,0,0,0,0,0.0,2.0,0.0,4.0,0.0,0.0,1.0,7.0


In [13]:
# --- Create dataframe that combines UPDRSi data with SIGFALL & PREV_SIGFALL ---


df_updrs_i_sigfall_base['SIGFALL'] = df_updrs_i_sigfall_base.apply (lambda row: lookup_value(df_sigfall,row,'SIGFALL'), axis=1)
df_updrs_i_sigfall_base['PREV_SIGFALL'] = df_updrs_i_sigfall_base.apply (lambda row: lookup_value(df_sigfall,row,'PREV_SIGFALL'), axis=1)
df_updrs_i_sigfall_base = df_updrs_i_sigfall_base.dropna()
df_updrs_i_sigfall_base.head()

Unnamed: 0,PATNO,EVENT_ID,INFODT,NP1COG,NP1HALL,NP1DPRS,NP1ANXS,NP1APAT,NP1DDS,NP1SLPN,NP1SLPD,NP1PAIN,NP1URIN,NP1CNST,NP1LTHD,NP1FATG,NP1TOT,SIGFALL,PREV_SIGFALL
25,3001,V15,03/2019,1,0,1,1,1,1,1.0,2.0,0.0,3.0,0.0,0.0,2.0,13.0,0.0,0.0
26,3001,V17,09/2021,2,1,2,2,1,0,1.0,2.0,1.0,4.0,1.0,2.0,2.0,21.0,1.0,0.0
27,3001,V18,07/2022,2,1,3,0,2,0,1.0,2.0,1.0,4.0,1.0,1.0,4.0,22.0,0.0,1.0
44,3002,V15,03/2019,1,0,1,0,1,0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,11.0,0.0,0.0
45,3002,V17,09/2021,3,1,1,2,0,0,2.0,2.0,2.0,2.0,3.0,2.0,1.0,21.0,0.0,0.0


In [14]:
# Compute Delta values, from last sample event

def get_infodts(dframe,patno):
    return list(dframe[dframe.PATNO == patno]['INFODT'].values)

def date_to_tuple(date):
    year = date[-4:]
    month = date[:2]
    return (month,year,date)

def date_sort(dates):
    tup_list = map(date_to_tuple,dates)
    return sorted(tup_list, key = lambda x: (x[1],x[0]))

def get_index(date,dates):
    output = (False,999)    
    for idx, val in enumerate(dates):
        if val[2] == date:
            output = (True,idx)
    return output

def prev_value(dframe,row,q):
    output = numpy.nan
    patno = row['PATNO']
    infodt = row['INFODT']    
    dates = get_infodts(dframe,patno)
    sorted_dates = date_sort(dates)
    i = get_index(infodt,sorted_dates)
    
    if i[0] == True:
        try:
            prev_date_index = i[1] - 1
            if prev_date_index > -1:
                prev_date = sorted_dates[prev_date_index][2]
                output = dframe[(dframe.INFODT == prev_date ) & (dframe.PATNO == patno)][q].values[0]
        except:
            pass
    return output

def q_delta_actual(dframe,row,q):
    output = numpy.nan
    patno = row['PATNO']
    infodt = row['INFODT']
    
    current = dframe[(dframe.INFODT == infodt ) & (dframe.PATNO == patno)][q].values[0]
    prev = prev_value(dframe,row,q)
    
    try:
        output = float(current) - float(prev)
    except:
        pass
    return output

updrs_qs = ['NP1COG', 'NP1HALL', 'NP1DPRS','NP1ANXS', 'NP1APAT', 'NP1DDS','NP1SLPN', 'NP1SLPD', 'NP1PAIN', 'NP1URIN', 'NP1CNST', 'NP1LTHD', 'NP1FATG','NP1TOT']
dq_pairs = [(x,('D_'+x)) for x in updrs_qs]

for x in dq_pairs:
    df_updrs_i_sigfall_base[x[1]] = df_updrs_i_sigfall_base.apply (lambda row: q_delta_actual(df_updrs_i_sigfall_base,row,x[0]), axis=1)


In [17]:
# CHECKS REQUIRED!!!!

# for x in updrs_qs:
#     df_updrs_iii_sigfall_base[x] = df_updrs_iii_sigfall_base[x].astype(float)
#     print(df_updrs_iii_sigfall_base[x].value_counts())
for d in dq_pairs:
    _,a = d
    print(a)

D_NP1COG
D_NP1HALL
D_NP1DPRS
D_NP1ANXS
D_NP1APAT
D_NP1DDS
D_NP1SLPN
D_NP1SLPD
D_NP1PAIN
D_NP1URIN
D_NP1CNST
D_NP1LTHD
D_NP1FATG
D_NP1TOT


In [18]:
# Data for Univariate Analysis

df_qs = df_updrs_i_sigfall_base[['D_NP1COG', 'D_NP1HALL', 'D_NP1DPRS', 'D_NP1ANXS', 'D_NP1APAT',
       'D_NP1DDS', 'D_NP1SLPN','D_NP1SLPD','D_NP1PAIN','D_NP1URIN','D_NP1CNST','D_NP1LTHD','D_NP1FATG','D_NP1TOT','PREV_SIGFALL','SIGFALL']]
df_qs = df_qs.dropna()
df_qs.to_csv('../working_data/updrsi_all_q_p_s.csv', index=False)

In [19]:
df_q = df_qs.drop('PREV_SIGFALL', axis=1)
df_q.to_csv('../working_data/updrsi_all_q_s.csv', index=False)

In [27]:
# Extract used PATNOS for population analysis
df_pop = df_updrs_i_sigfall_base
df_pop = df_pop.dropna()
df_pop = df_pop[['PATNO', 'EVENT_ID', 'INFODT']]
df_pop.to_csv('../source_data/u_i_records.csv')