In [85]:
import os
import glob
import pandas as pd

csvs_dir = "/home/olle/PycharmProjects/LODE/workspace/sequence_data"

medications_table = "medications_anonymized.csv"
events = "longitudinal_events.csv"

In [86]:
### load tables
med_pd = pd.read_csv(os.path.join(csvs_dir, medications_table))
events_pd = pd.read_csv(os.path.join(csvs_dir, events))

print("Event records: ", events_pd.shape[0])
print("MED records: ", med_pd.shape[0])

med_pd = med_pd[~med_pd.study_date.isin([-1])]

### pre process date columns
med_pd.loc[:, "study_date"] = pd.to_datetime(med_pd.study_date.astype(int).astype(str), format="%Y%m%d")
med_pd[["pseudo_id"]] = med_pd["pseudo_id"].astype(int)

Event records:  33182
MED records:  2909033


### Merge tables, reformat and save new table

In [87]:
events_med_pd = pd.merge(events_pd, med_pd[["pseudo_id", "laterality", "DAT", "MED"]], 
         left_on=["patient_id", "laterality", "study_date"], 
         right_on=["pseudo_id", "laterality", "DAT"], 
         how="left")

columns_final = ["patient_id", "laterality", "study_date", "ICPML", "injection?", "iol?", "MED_y"]

events_med_pd = events_med_pd[columns_final].drop_duplicates()
events_med_pd.rename(columns={"MED_y": "MED"}, inplace=True)
print("Event & MED records: ", events_med_pd.shape[0])

Event & MED records:  33360


In [88]:
events_med_pd.to_csv(os.path.join(csvs_dir, "longitudinal_events_med.csv"))

In [89]:
events_med_pd.MED.unique()

array(['Lucentis', 'Eylea', nan, 'Ozurdex', 'Triamcinolon', 'Avastin',
       'Jetrea', 'Dexamethason', 'Iluvien'], dtype=object)

## Join with export 2 dir

In [91]:
exp2_dir = "/home/olle/PycharmProjects/LODE/workspace/export2"

oct_meta = pd.read_csv(os.path.join(exp2_dir, "oct_meta_longitudinal_naive_information.csv"))
oct_meta.rename(columns={"PATNR": "pseudo_id"}, inplace=True)

oct_meta.loc[:, "study_date"] = pd.to_datetime(oct_meta.study_date, yearfirst=True, format='%Y%m%d')

# format med date column
med_pd.loc[:, "DAT"] = pd.to_datetime(med_pd.DAT, yearfirst=True, format="%Y-%m-%d")


In [92]:
## join medications table
oct_meta_pd = pd.merge(oct_meta, med_pd, left_on=["pseudo_id", "laterality", "study_date"], 
         right_on=["pseudo_id", "laterality", "DAT"], how="left")

oct_meta_pd.drop(["AUGE", "LFDNR", "DAT", "ICPML", "TXT"], axis=1, inplace=True)

In [96]:
oct_meta_pd

Unnamed: 0,pseudo_id,laterality,study_date_x,birthdate,image_type,gender,modality,study_description,img_shape,age,...,INJECTION_first_date,LONGITUDINAL_LISTS_1,LONGITUDINAL_LISTS_2,Naive_x,Naive_y,study_date_dt,time_range,oct_after_injections,MED,study_date_y
0,367711,L,2018-11-13,19640101,"['ORIGINAL', 'PRIMARY']",M,OPT,Makula (OCT),"(49, 496, 512)",57,...,2018-11-13,False,False,,,2018-11-13,907,False,Ozurdex,2020-07-06
1,367711,L,2018-11-13,19640101,"['ORIGINAL', 'PRIMARY']",M,OPT,Makula (OCT),"(49, 496, 512)",57,...,2018-11-13,False,False,,,2018-11-13,907,False,Ozurdex,2020-08-10
2,367711,L,2018-11-13,19640101,"['ORIGINAL', 'PRIMARY']",M,OPT,Makula (OCT),"(49, 496, 512)",57,...,2018-11-13,False,False,,,2018-11-13,907,False,Ozurdex,2020-10-19
3,367711,L,2018-11-13,19640101,"['ORIGINAL', 'PRIMARY']",M,OPT,Makula (OCT),"(49, 496, 512)",57,...,2018-11-13,False,False,,,2018-11-13,907,False,Ozurdex,2018-04-26
4,367711,L,2018-11-13,19640101,"['ORIGINAL', 'PRIMARY']",M,OPT,Makula (OCT),"(49, 496, 512)",57,...,2018-11-13,False,False,,,2018-11-13,907,False,Ozurdex,2018-08-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395734,301443,L,2020-09-04,19471201,"['ORIGINAL', 'PRIMARY']",M,OPT,Makula (OCT),"(49, 496, 512)",73,...,2020-07-29,False,False,,,2020-09-04,1120,False,Iluvien,2020-09-04
395735,301443,L,2020-09-04,19471201,"['ORIGINAL', 'PRIMARY']",M,OPT,Makula (OCT),"(49, 496, 512)",73,...,2020-07-29,False,False,,,2020-09-04,1120,False,Iluvien,2017-08-11
395736,301443,L,2020-09-04,19471201,"['ORIGINAL', 'PRIMARY']",M,OPT,Makula (OCT),"(49, 496, 512)",73,...,2020-07-29,False,False,,,2020-09-04,1120,False,Iluvien,2019-06-17
395737,301443,L,2020-09-04,19471201,"['ORIGINAL', 'PRIMARY']",M,OPT,Makula (OCT),"(49, 496, 512)",73,...,2020-07-29,False,False,,,2020-09-04,1120,False,Iluvien,2019-01-23


In [95]:
oct_meta_pd = oct_meta_pd.drop_duplicates()
oct_meta_pd.to_csv(os.path.join(exp2_dir, "oct_meta_longitudinal_naive_information_med.csv"))

## Filter table for correct medications

In [153]:
drugs = ['Jetrea', 'Ozurdex', 'Illuvien', 'Dexamthason', 'Triamcinolon']

# filter away irrelevant injections
oct_meta_filtered_pd = oct_meta_pd[~oct_meta_pd.MED.isin(drugs)]

# filter away records with no injections
oct_meta_filtered_pd = oct_meta_filtered_pd[~oct_meta_filtered_pd.MED.isna()]

# filter any records wihtout oct path (should be 0 but for safety done anyway)
oct_meta_filtered_pd = oct_meta_filtered_pd[~oct_meta_filtered_pd.oct_path.isna()]

### Sort dates and filter for follow ups

In [156]:
import numpy as np

# sort date within groups
oct_meta_filtered_pd = oct_meta_filtered_pd.sort_values(['pseudo_id','laterality', "study_date_dt"])

oct_meta_filtered_pd = oct_meta_filtered_pd[["pseudo_id", "laterality", "study_date_dt"]].drop_duplicates()
oct_meta_filtered_pd.loc[:, "study_date_dt"] = pd.to_datetime(oct_meta_filtered_pd.study_date_dt, format="%Y-%m-%d")

# get cum sum
oct_meta_filtered_pd['diff'] = oct_meta_filtered_pd.groupby(['pseudo_id','laterality'])['study_date_dt'].diff() / np.timedelta64(1, 'D')
oct_meta_filtered_pd['sum'] = oct_meta_filtered_pd.groupby(['pseudo_id','laterality'])['diff'].cumsum()

## Determine which patients have follow up

In [158]:
oct_meta_filtered_pd["three_month_followup"] = oct_meta_filtered_pd["sum"].between(30,150)
oct_meta_filtered_pd["six_month_followup"] = oct_meta_filtered_pd["sum"].between(120,240)
oct_meta_filtered_pd["twelwe_month_followup"] = oct_meta_filtered_pd["sum"].between(305,425)

oct_meta_filtered_pd["follow_up"] = oct_meta_filtered_pd["three_month_followup"] | oct_meta_filtered_pd["six_month_followup"] | oct_meta_filtered_pd["twelwe_month_followup"]

# filter df for follow ups
patient_with_med_follow_up = oct_meta_filtered_pd[oct_meta_filtered_pd["follow_up"] == True]["pseudo_id"].drop_duplicates().values

# exlude all patients wihtout follow up
oct_meta_followup_med_pd = oct_meta_pd[oct_meta_pd.pseudo_id.isin(patient_with_med_follow_up.tolist())].drop_duplicates()

In [168]:
### Save filtered list

In [167]:
oct_meta_followup_med_pd.to_csv(os.path.join(exp2_dir, "oct_meta_longitudinal_naive_info_med_followup.csv"))

In [171]:
oct_meta_followup_med_pd.pseudo_id.drop_duplicates()

71        199994
1273      168416
1323      328744
1456      187567
1627       59622
           ...  
383199    113059
384422     46183
390269     68705
391723    148663
395087    327192
Name: pseudo_id, Length: 391, dtype: int64