In [1]:
import pandas
import numpy

updrs_ii = pandas.read_csv('../source_data/MDS_UPDRS_Part_II__Patient_Questionnaire.csv')
falls = pandas.read_csv('../source_data/Determination_of_Freezing_and_Falls.csv')


### Create updrsii_sigfall
Concatenation of updrsII data & SIGFALL; a summary statistic derived from falls data

In [2]:
# --- Summarise significant falls into SIGFALL binary flag ---

def sigfall(row):
    if row['FLLDRVIS'] == 1.0:
        return 1.0
    if row['FLLERVIS'] == 1.0:
        return 1.0
    if row['FLLHOSP'] == 1.0:
        return 1.0
    if row['FLLSURG'] == 1.0:
        return 1.0
    if row['FLLINST'] == 1.0:
        return 1.0
    else:
        return 0.0
    
    
falls['SIGFALL'] = falls.apply (lambda row: sigfall(row), axis=1)

In [4]:
# --- Create dataframe that combines UPDRSii data with SIGFALL binary flag ---

def sig_get(row):
    patno = row['PATNO']
    infodt = row['INFODT']
    
    s1 = falls[(falls.INFODT == infodt) & (falls.PATNO == patno)]['SIGFALL']
    if s1.empty:    
        return numpy.nan
    else:
        return s1.values[0]


updrs_sigfall_base = updrs_ii.drop(['REC_ID','EVENT_ID','PAG_NAME','NUPSOURC','ORIG_ENTRY','LAST_UPDATE'], axis=1)
updrs_sigfall_base['SIGFALL'] = updrs_sigfall_base.apply (lambda row: sig_get(row), axis=1)
updrs_sigfall_base.to_csv('../working_data/updrs_sigfall_base.csv',index=False)

In [5]:
# Drop rows without SIGFALL data

updrs_sigfall = updrs_sigfall_base.dropna()
updrs_sigfall.to_csv('../working_data/updrs_sigfall.csv',index=False)

In [6]:
# Drop UPDRS total value

updrs_sigfall_raw = updrs_sigfall.drop('NP2PTOT',axis = 1)
updrs_sigfall_raw.to_csv('../working_data/updrs_sigfall_raw.csv',index=False)

### Create updrsii_future_sigfall
Concatenation of updrsII data with SIGFALL from the next recorded entry for that patient (chronologically speaking)

In [7]:
def get_infodts(patno):
    return list(updrs_sigfall_base[updrs_sigfall_base.PATNO == patno]['INFODT'].values)

def date_to_tuple(date):
    year = date[-4:]
    month = date[:2]
    return (month,year,date)

def date_sort(dates):
    tup_list = map(date_to_tuple,dates)
    return sorted(tup_list, key = lambda x: (x[1],x[0]))

def get_index(date,dates):
    output = (False,999)    
    for idx, val in enumerate(dates):
        if val[2] == date:
            output = (True,idx)
    return output

def next_sigfall(row):
    output = numpy.nan
    patno = row['PATNO']
    infodt = row['INFODT']    
    dates = get_infodts(patno)
    sorted_dates = date_sort(dates)
    i = get_index(infodt,sorted_dates)
    
    if i[0] == True:
        try:
            next_date_index = i[1] + 1
            next_date = sorted_dates[next_date_index][2]
            output = updrs_sigfall_base[(updrs_sigfall_base.INFODT == next_date ) & (updrs_sigfall_base.PATNO == patno)]['SIGFALL'].values[0]
        except:
            pass
    return output


updrs_future_sigfall = updrs_sigfall_base.drop('SIGFALL',axis=1)
updrs_future_sigfall['SIGFALL_NEXT'] = updrs_future_sigfall.apply (lambda row: next_sigfall(row), axis=1)
updrs_future_sigfall = updrs_future_sigfall.dropna()
updrs_future_sigfall.to_csv('../working_data/updrs_future_sigfall.csv',index=False)

In [8]:
updrs_future_sigfall_raw = updrs_future_sigfall.drop('NP2PTOT',axis = 1)
updrs_future_sigfall_raw.to_csv('../working_data/updrs_future_sigfall_raw.csv',index=False)

### Create increase_updrs_sigfall
Concatenation of SIGFALL & the increase (if any) between the current UPDRS scores & the UPDRS scores of the last visit

In [9]:
def prev_value(row,q):
    output = numpy.nan
    patno = row['PATNO']
    infodt = row['INFODT']    
    dates = get_infodts(patno)
    sorted_dates = date_sort(dates)
    i = get_index(infodt,sorted_dates)
    
    if i[0] == True:
        try:
            prev_date_index = i[1] - 1
            if prev_date_index > -1:
                prev_date = sorted_dates[prev_date_index][2]
                output = updrs_sigfall_base[(updrs_sigfall_base.INFODT == prev_date ) & (updrs_sigfall_base.PATNO == patno)][q].values[0]
        except:
            pass
    return output

def q_delta(row,q):
    output = numpy.nan
    patno = row['PATNO']
    infodt = row['INFODT']
    
    current = updrs_sigfall_base[(updrs_sigfall_base.INFODT == infodt ) & (updrs_sigfall_base.PATNO == patno)][q].values[0]
    prev = prev_value(row,q)
    
    try:
        result = current - prev
        if result > 0:
            output = result
        else:
            output = 0.0
    except:
        pass
    return output
    
updrs_qs = ['NP2SPCH','NP2SALV','NP2SWAL','NP2EAT','NP2DRES','NP2HYGN','NP2HWRT','NP2HOBB','NP2TURN','NP2TRMR','NP2RISE','NP2WALK','NP2FREZ','NP2PTOT']
q_pairs = [(x,('I_'+x)) for x in updrs_qs]

increase_updrs_sigfall = updrs_sigfall_base.drop(['NP2SPCH','NP2SALV','NP2SWAL','NP2EAT','NP2DRES','NP2HYGN','NP2HWRT','NP2HOBB','NP2TURN','NP2TRMR','NP2RISE','NP2WALK','NP2FREZ','NP2PTOT','SIGFALL'],axis=1)

for x in q_pairs:
    increase_updrs_sigfall[x[1]] = increase_updrs_sigfall.apply (lambda row: q_delta(row,x[0]), axis=1)
    
increase_updrs_sigfall['SIGFALL'] = increase_updrs_sigfall.apply (lambda row: sig_get(row), axis=1)
increase_updrs_sigfall = increase_updrs_sigfall.dropna()
increase_updrs_sigfall.to_csv('../working_data/increased_updrs_sigfall.csv',index=False)

In [10]:
increase_updrs_sigfall_raw = increase_updrs_sigfall.drop('I_NP2PTOT',axis=1)
increase_updrs_sigfall_raw.to_csv('../working_data/increased_updrs_sigfall_raw.csv',index=False)

### Create delta_updrs_sigfall
Like above but the raw delta is preserved

In [11]:
def q_delta_actual(row,q):
    output = numpy.nan
    patno = row['PATNO']
    infodt = row['INFODT']
    
    current = updrs_sigfall_base[(updrs_sigfall_base.INFODT == infodt ) & (updrs_sigfall_base.PATNO == patno)][q].values[0]
    prev = prev_value(row,q)
    
    try:
        output = current - prev
    except:
        pass
    return output


dq_pairs = [(x,('D_'+x)) for x in updrs_qs]

delta_updrs_sigfall = updrs_sigfall_base.drop(['NP2SPCH','NP2SALV','NP2SWAL','NP2EAT','NP2DRES','NP2HYGN','NP2HWRT','NP2HOBB','NP2TURN','NP2TRMR','NP2RISE','NP2WALK','NP2FREZ','NP2PTOT','SIGFALL'],axis=1)

for x in dq_pairs:
    delta_updrs_sigfall[x[1]] = delta_updrs_sigfall.apply (lambda row: q_delta_actual(row,x[0]), axis=1)
    
delta_updrs_sigfall['SIGFALL'] = delta_updrs_sigfall.apply (lambda row: sig_get(row), axis=1)
delta_updrs_sigfall = delta_updrs_sigfall.dropna()
delta_updrs_sigfall.to_csv('../working_data/delta_updrs_sigfall.csv',index=False)

delta_updrs_sigfall_raw = delta_updrs_sigfall.drop('D_NP2PTOT',axis=1)
delta_updrs_sigfall_raw.to_csv('../working_data/delta_updrs_sigfall_raw.csv',index=False)