In [1]:
import pandas
import numpy

df_falls = pandas.read_csv('../source_data/Determination_of_Freezing_and_Falls.csv')
df_participant_data = pandas.read_csv('../source_data/participant_main.csv')
df_patnos = pandas.read_csv('../source_data/non_hc_patnos.csv')
non_hc_patnos = list(df_patnos.values.flat)
df_age_at_visit = pandas.read_csv('../source_data/Age_at_visit.csv')


In [2]:
def age_at_visit(patno,event_id):
    patno = int(patno)
    try:
        return df_age_at_visit[(df_age_at_visit['PATNO'] == patno) & (df_age_at_visit['EVENT_ID'] == event_id)]['AGE_AT_VISIT'].values[0]
    except:
        print(f'{patno}:{event_id} not found')
        
def add_age(row):
    p = row['PATNO']
    e = row['EVENT_ID']
    return age_at_visit(p,e)

df_falls['AGE_AT_EVENT'] = df_falls.apply (lambda row: add_age(row), axis=1)

3776:V15 not found
3961:V14 not found
102529:BL not found


In [3]:
# --- Summarise significant falls into SIGFALL binary flag ---

def sigfall(row):
    if row['FLLDRVIS'] == 1.0:
        return 1.0
    if row['FLLERVIS'] == 1.0:
        return 1.0
    if row['FLLHOSP'] == 1.0:
        return 1.0
    if row['FLLSURG'] == 1.0:
        return 1.0
    if row['FLLINST'] == 1.0:
        return 1.0
    else:
        return 0.0
    
df_falls['SIGFALL'] = df_falls.apply (lambda row: sigfall(row), axis=1)

In [4]:
# Create PREV_SIGFALL bool, 1 if any other SIGFALL records exist for that PATNO

def get_infodts(patno):
    return list(df_falls[df_falls.PATNO == patno]['INFODT'].values)

def date_to_tuple(date):
    year = date[-4:]
    month = date[:2]
    return (month,year,date)

def date_sort(dates):
    tup_list = map(date_to_tuple,dates)
    return sorted(tup_list, key = lambda x: (x[1],x[0]))

def get_index(date,dates):
    output = (False,999)    
    for idx, val in enumerate(dates):
        if val[2] == date:
            output = (True,idx)
    return output

def prev_sigfall(row):
    output = 0
    patno = row['PATNO']
    infodt = row['INFODT']    
    dates = get_infodts(patno)
    sorted_dates = date_sort(dates)
    i = get_index(infodt,sorted_dates)
    older_dates = sorted_dates[0:i[1]]
    
    for od in older_dates:
        try:
            sf = df_falls[(df_falls.INFODT == f'{od[0]}/{od[1]}' ) & (df_falls.PATNO == patno)]['SIGFALL'].values[0]
            if sf == 1:
                output = 1
        except:
            pass
    
    return output

# Generate PREV_SIGFALL

df_falls['PREV_SIGFALL'] = df_falls.apply (lambda row: prev_sigfall(row), axis=1)

In [5]:
# df_falls.shape
# df_falls.head(20)
# df_falls.PREV_SIGFALL.value_counts()
df_falls.query('PATNO == 3434').head(10)
# df_falls.query('PREV_SIGFALL == 1').head(10)

Unnamed: 0,REC_ID,PATNO,EVENT_ID,PAG_NAME,INFODT,PTCGBOTH,FRZGT1W,FLNFR1W,FRZGT12M,FLNFR12M,...,FLLDRVIS,FLLERVIS,FLLHOSP,FLLSURG,FLLINST,ORIG_ENTRY,LAST_UPDATE,AGE_AT_EVENT,SIGFALL,PREV_SIGFALL
295,727139401,3434,V14,DTRMFALL,01/2019,1,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,02/2019,2020-06-30 09:07:36.0,61.1,0.0,0
296,759879201,3434,V15,DTRMFALL,10/2019,3,2.0,0.0,2.0,0.0,...,0.0,0.0,0.0,0.0,0.0,11/2019,2020-06-30 09:07:37.0,61.8,0.0,0
297,6f41783a-f791-4137-913d-2f1ae806ca69,3434,V16,DTRMFALL,10/2020,3,1.0,0.0,1.0,1.0,...,1.0,0.0,0.0,0.0,0.0,11/2020,2020-11-03 00:00:00.0,62.8,1.0,0
298,ee1b18c4-6770-4629-baa4-97b035975ea7,3434,V17,DTRMFALL,10/2021,3,1.0,0.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,11/2021,2021-11-01 00:00:00.0,63.8,0.0,1


In [6]:
# Remove healthy cohort patients

df_falls_pd = df_falls[df_falls['PATNO'].isin(non_hc_patnos)]
df_falls_pd.columns

Index(['REC_ID', 'PATNO', 'EVENT_ID', 'PAG_NAME', 'INFODT', 'PTCGBOTH',
       'FRZGT1W', 'FLNFR1W', 'FRZGT12M', 'FLNFR12M', 'INJFRHIP', 'INJFRUE',
       'INJFRSKL', 'INJFROTH', 'HINJNOLC', 'HINJLOC2', 'INJSTCH', 'INJOTH',
       'FLLDRVIS', 'FLLERVIS', 'FLLHOSP', 'FLLSURG', 'FLLINST', 'ORIG_ENTRY',
       'LAST_UPDATE', 'AGE_AT_EVENT', 'SIGFALL', 'PREV_SIGFALL'],
      dtype='object')

In [7]:
df_falls_output = df_falls_pd[['PATNO', 'EVENT_ID','INFODT', 'AGE_AT_EVENT','PREV_SIGFALL','SIGFALL']]
df_falls_output.head()

Unnamed: 0,PATNO,EVENT_ID,INFODT,AGE_AT_EVENT,PREV_SIGFALL,SIGFALL
2,3001,V15,03/2019,73.2,0,0.0
3,3001,V17,09/2021,75.7,0,1.0
4,3002,V15,03/2019,75.6,0,0.0
5,3002,V17,09/2021,78.1,0,0.0
6,3003,V15,03/2019,64.7,0,0.0


In [8]:
df_falls_output.to_csv('../source_data/sigfall_main.csv', index=False)

In [126]:
#--------------- SIGFALL general analysis --------------------

# The SIGFALLS only occur in PD patients

# P(SIGFALL) - non-hc
print(df_falls_ouput.shape)
print(df_falls_output.query('SIGFALL == 1.0').shape)
a,_ = df_falls_ouput.shape
sf,_ = df_falls_output.query('SIGFALL == 1.0').shape
p_sf = sf/a
print(p_sf)

# P(SIGFALL) - All cohorts
b,_ = df_falls_pd.shape
sf2,_ = df_falls_pd.query('SIGFALL == 1.0').shape
p_sf2 = sf2/b
print(p_sf2)

# SIGFALL count comparison
sf == sf2
# repeat fallers
# df_falls_output.query('SIGFALL == 1.0').PATNO.value_counts()

(1357, 5)
(124, 6)
0.09137803979366249
0.09137803979366249


True

In [131]:
# The influence of having had a previous fall SIGFALL_PREV
c,_ = sf2,_ = df_falls_pd.query('PREV_SIGFALL == 1.0').shape
sf3,_ = df_falls_pd.query('SIGFALL == 1.0 & PREV_SIGFALL == 1.0').shape
p_sf3 = sf3/c
print(c)
print(sf3)
print(p_sf3)
# How much more likely are you to experience SIGFALL if you have had PREV_SIGFALL
p_sf3/p_sf

52
20
0.38461538461538464


4.209057071960298

In [137]:
# Age Demographics
print(df_falls_ouput.AGE_AT_EVENT.describe())
df_falls_ouput.query('SIGFALL == 1.0').AGE_AT_EVENT.describe()

count    1355.000000
mean       67.106790
std         8.542152
min        30.600000
25%        61.250000
50%        67.700000
75%        73.300000
max        90.600000
Name: AGE_AT_EVENT, dtype: float64


count    124.000000
mean      70.629839
std        7.970382
min       48.300000
25%       66.100000
50%       72.150000
75%       75.625000
max       88.500000
Name: AGE_AT_EVENT, dtype: float64