# Stores patient ids eligible to input to model, according to criteria


Criteria:
1. an admission is only eligible if there are diagnostics associated with it
2. Patient must have more than 1 (eligible) admission to the hospital

In [2]:
import os
cwd = os.getcwd()

# protection against running this cell multiple times
assert os.path.dirname(cwd).split('/')[-1] == 'master-thesis','Oops, directory already changed previously as indended. Ignoring...'

# change working directory (if assert passed)
new_cwd = os.path.dirname(cwd) # parent directory
os.chdir(new_cwd)

In [3]:
# show all outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [4]:
import os
from MimicIII import MimicIII
from MimicIV import MimicIV
from ICDCodesGrouper import ICDCodesGrouper
import pandas as pd
import numpy as np

from config import Settings; settings = Settings()

In [7]:
grouper = ICDCodesGrouper(settings)

# Mimic-III

In [4]:
mimiciii = MimicIII(settings,grouper=grouper)

In [5]:
admissions = mimiciii.read_admissions()
diagnoses = mimiciii.read_diagnoses()

# 1. remove admissions without record of diagnoses
admissions_without_records= diagnoses.loc[pd.isna(diagnoses.ICD9_CODE),'HADM_ID'].unique()
admissions = admissions[~admissions.HADM_ID.isin(admissions_without_records)]
# 2. Only interested in patients with multiple admissions
mult_adm_patients = admissions.groupby('SUBJECT_ID').size().where(lambda x: x > 1).dropna().index.tolist()

# show 5 patients (id)
print('nº patients eligible:',len(mult_adm_patients))

mimicIII_patients = mult_adm_patients

nº patients eligible: 7499


# Mimic-IV (icd9-only)

In [8]:
mimiciv = MimicIV(settings,grouper=grouper)

In [9]:
admissions = mimiciv.read_admissions()
diagnoses = mimiciv.read_diagnoses()

admissions_only_icd9 = diagnoses[diagnoses.icd_version == 9].hadm_id.unique()
admissions_with_diagnoses = admissions[admissions.hadm_id.isin(diagnoses.hadm_id.unique()) == True].hadm_id

eligible_admissions = admissions[(admissions.hadm_id.isin(admissions_only_icd9)) & (admissions.hadm_id.isin(admissions_with_diagnoses))]

mult_adm_patients = eligible_admissions.groupby('subject_id').size().where(lambda x: x > 1).dropna().index.tolist()

print('nº patients eligible:',len(mult_adm_patients))
mimicIV_patients = mult_adm_patients

nº patients eligible: 55483


# Save eligible patients

In [8]:
eligible_base_path = os.path.join(settings.data_base,settings.eligible_patients_folder)

mimicIII_filename='mimicIII_eligible_patients_exc.nodiag_single.adm'
mimicIV_filename='mimicIV_eligible_patients_exc.nodiag_single.adm_no.icd10'

mimicIII_filepath = os.path.join(eligible_base_path,mimicIII_filename)
mimicIV_filepath = os.path.join(eligible_base_path,mimicIV_filename)

In [10]:
np.savetxt(mimicIII_filepath+'.txt', mimicIII_patients, fmt='%d',delimiter=',')
np.savetxt(mimicIV_filepath+'.txt', mimicIV_patients, fmt='%d',delimiter=',')