In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pprint
import os

pd.set_option('display.max_colwidth',100000) #https://stackoverflow.com/questions/54692405/output-truncation-in-google-colab

# Below imports are used to print out pretty pandas dataframes
from IPython.display import display, HTML



In [None]:
from datetime import datetime
def convert_time(x):
  date_time_str = x
  if date_time_str[-1] == ':':
    date_time_str += '00'
  date_time_obj = datetime.strptime(date_time_str, '%Y-%m-%d %H:%M:%S')
  return date_time_obj

In [None]:
MV = pd.read_csv('Downloads/derived_MV.csv')
MV.head(5)

In [None]:
print(MV['stay_id'].nunique())

In [None]:
pO2 = pd.read_csv('Downloads/derived_pO2.csv')
pO2.charttime = pO2.charttime.apply(convert_time)
pO2.head(5)

In [None]:
print(pO2['subject_id'].nunique())

In [None]:
fiO2 = pd.read_csv('Downloads/derived_fiO2.csv')
fiO2.charttime = fiO2.charttime.apply(convert_time)
fiO2.head(5)

In [None]:
print(fiO2['subject_id'].nunique())

In [None]:
merged_tables = pO2.merge(fiO2,how='inner',on='subject_id', suffixes=('_pO2', '_fiO2'))
merged_tables = merged_tables.drop(['Unnamed: 0_pO2', 'Unnamed: 0_fiO2', 'fio2_chartevents'], 
               axis=1)
merged_tables.head(5)

In [None]:
merged_tables['time_diff'] = merged_tables.charttime_pO2 - merged_tables.charttime_fiO2
merged_tables = merged_tables[merged_tables['time_diff'] > pd.Timedelta(0)]

merged_tables['time_diff'] = merged_tables['time_diff'] / pd.Timedelta(minutes=1)
merged_tables['min_time_diff'] = merged_tables.groupby('subject_id')['time_diff'].transform('min')

PF_table = merged_tables.where(merged_tables['time_diff'] == merged_tables['min_time_diff'])

PF_table['PF_ratio'] = PF_table.po2 / PF_table.converted_fiO2

PF_table = PF_table[PF_table['PF_ratio'].notna()]

PF_table.head(20)

In [None]:
print(PF_table['subject_id'].nunique())

In [None]:
PF_ARDS = PF_table.where(PF_table['PF_ratio'] < 300)
PF_ARDS = PF_ARDS[PF_ARDS['PF_ratio'].notna()]
PF_ARDS.head(5)

In [None]:
print(PF_ARDS['subject_id'].nunique())

In [None]:
ARDS_patients = PF_ARDS.merge(MV, how='inner', on='subject_id', suffixes=(None, '_MV'))
ARDS_patients = ARDS_patients.drop(['Unnamed: 0'], 
               axis=1)
ARDS_patients.head(5)

In [None]:
print(ARDS_patients['subject_id'].nunique())

In [None]:
mimic_cxr = pd.read_csv('Downloads/mimic_cxr.csv', low_memory=False)
mimic_cxr = mimic_cxr.rename(columns={"PatientID": "subject_id"})

In [None]:
ARDS_Xray = ARDS_patients.merge(mimic_cxr, how='inner', on='subject_id')
pd.set_option('display.max_columns', None)
ARDS_Xray.head(5)

In [None]:
print(ARDS_Xray['subject_id'].nunique())
print(ARDS_Xray['dicom'].nunique())

In [None]:
ARDS_Xray['pO2_date'] = ARDS_Xray['charttime_pO2'].dt.date
ARDS_Xray['pO2_date'] = ARDS_Xray['pO2_date'].astype(str)
ARDS_Xray['pO2_date'] = ARDS_Xray['pO2_date'].str.replace('-','')
ARDS_Xray['pO2_date'] = ARDS_Xray['pO2_date'].astype(int)

In [None]:
ARDS_Xray['Xray_day_diff'] = ARDS_Xray['pO2_date'] - ARDS_Xray['StudyDate']
ARDS_Xray['Xray_day_diff'] = ARDS_Xray['Xray_day_diff'].abs()
final_Xrays = ARDS_Xray.where(ARDS_Xray['Xray_day_diff'] <= 1)

final_Xrays = final_Xrays[final_Xrays['dicom'].notna()]
final_Xrays = final_Xrays.drop(['Unnamed: 0'], 
               axis=1)

final_Xrays.head(5)

In [None]:
print(final_Xrays['dicom'].nunique())
print(final_Xrays['StudyID'].nunique())
print(final_Xrays['subject_id'].nunique())

In [None]:
age = pd.read_csv('Downloads/age.csv')
final_patients = final_Xrays.merge(age, how='inner', on='subject_id')
final_adult_patients = final_patients.where(final_patients['age'] >= 18)

In [None]:
print(final_adult_patients['dicom'].nunique())
print(final_adult_patients['StudyID'].nunique())
print(final_adult_patients['subject_id'].nunique())

In [None]:
final_dicom = final_adult_patients['dicom'].copy()
final_dicom = final_dicom.drop_duplicates()

In [None]:
final_dicom.to_csv('Downloads/final_dicom.csv')

In [None]:
print(final_dicom.nunique)

In [None]:
final_adult_patients.to_csv('Downloads/final_adult_patients.csv')