# Stores patient ids&admission ids eligible to input to model, according to criteria


Criteria:
1. an admission is only eligible if there are diagnostics associated with it
2. Patient must have more than 1 (eligible) admission to the hospital

In [1]:
import os
cwd = os.getcwd()

# protection against running this cell multiple times
assert os.path.dirname(cwd).split('/')[-1] == 'master-thesis','Oops, directory already changed previously as indended. Ignoring...'

# change working directory (if assert passed)
new_cwd = os.path.dirname(cwd) # parent directory
os.chdir(new_cwd)

In [2]:
# show all outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
import os
from MimicIII import MimicIII
from MimicIV import MimicIV
from ICDCodesGrouper import ICDCodesGrouper
import pandas as pd
import numpy as np
from math import ceil
from datetime import timedelta

from config import Settings; settings = Settings()

In [4]:
grouper = ICDCodesGrouper(settings)

# Mimic-III

In [5]:
mimiciii = MimicIII(settings,grouper=grouper)

In [6]:
admissions = mimiciii.read_admissions()
diagnoses = mimiciii.read_diagnoses()

# 1. remove admissions without record of diagnoses
admissions_without_records= diagnoses.loc[pd.isna(diagnoses.ICD9_CODE),'HADM_ID'].unique()
admissions = admissions[~admissions.HADM_ID.isin(admissions_without_records)]
# 2. Only interested in patients with multiple admissions
mult_adm_patients = admissions.groupby('SUBJECT_ID').size().where(lambda x: x > 1).dropna().index.tolist()

# show 5 patients (id)
print('nº patients eligible:',len(mult_adm_patients))

mimicIII_patients = mult_adm_patients

nº patients eligible: 7499


# Mimic-IV (icd9-only)

In [7]:
mimiciv = MimicIV(settings,grouper=grouper)

In [8]:
admissions = mimiciv.read_admissions()
diagnoses = mimiciv.read_diagnoses()

admissions_only_icd9 = diagnoses[diagnoses.icd_version == 9].hadm_id.unique()
admissions_with_diagnoses = admissions[admissions.hadm_id.isin(diagnoses.hadm_id.unique()) == True].hadm_id

eligible_admissions = admissions[(admissions.hadm_id.isin(admissions_only_icd9)) & (admissions.hadm_id.isin(admissions_with_diagnoses))]

mult_adm_patients = eligible_admissions.groupby('subject_id').size().where(lambda x: x > 1).dropna().index.tolist()

print('nº patients eligible:',len(mult_adm_patients))
mimicIV_patients = mult_adm_patients

nº patients eligible: 55483


# Save eligible patients

In [7]:
eligible_base_path = os.path.join(settings.data_base,settings.eligible_patients_folder)

mimicIII_filename='mimicIII_eligible_patients_exc.nodiag_single.adm'
mimicIV_filename='mimicIV_eligible_patients_exc.nodiag_single.adm_no.icd10'

mimicIII_filepath = os.path.join(eligible_base_path,mimicIII_filename)
mimicIV_filepath = os.path.join(eligible_base_path,mimicIV_filename)

In [8]:
np.savetxt(mimicIII_filepath+'.txt', mimicIII_patients, fmt='%d',delimiter=',')
np.savetxt(mimicIV_filepath+'.txt', mimicIV_patients, fmt='%d',delimiter=',')

NameError: name 'mimicIV_patients' is not defined

# Now for timewindows

## Mimic-III

criteria:
1. No admissions without diagnoses
2. patients with at least two admissions seperated by at least 1 month

In [7]:
admissions = mimiciii.read_admissions()
diagnoses = mimiciii.read_diagnoses()

In [12]:
eligible_base_path = os.path.join(settings.data_base,settings.eligible_patients_folder)

for m in [1,3,6,12]:
    admissions = mimiciii.read_admissions()
    
    # 1. remove admissions without record of diagnoses
    admissions_without_records= diagnoses.loc[pd.isna(diagnoses.ICD9_CODE),'HADM_ID'].unique()
    admissions = admissions[~admissions.HADM_ID.isin(admissions_without_records)]

    # get patients that have at least m months difference between the first and last visit
    res = admissions.groupby('SUBJECT_ID').ADMITTIME.apply(lambda subseries: (subseries.max() - subseries.min()).days / 30)
    at_least_m_month_diff_patients = res[res>m].index.values
    eligible_admissions = admissions[admissions.SUBJECT_ID.isin(at_least_m_month_diff_patients)]

    
    # save patients and admissions ids.
    mimicIII_window_filename = f'mimicIII_eligible_patients_time_window_{m}m_exc.nodiag_single.window'
    mimicIII_window_filepath = os.path.join(eligible_base_path, mimicIII_window_filename)
    np.savetxt(mimicIII_window_filepath+'.txt', at_least_m_month_diff_patients, fmt='%d',delimiter=',')
    print('Saved ',mimicIII_window_filepath+'.txt')
    
    mimicIII_window_filename = f'mimicIII_eligible_admissions_time_window_{m}m_exc.nodiag_single.window'
    mimicIII_window_filepath = os.path.join(eligible_base_path, mimicIII_window_filename)
    np.savetxt(mimicIII_window_filepath+'.txt', eligible_admissions.HADM_ID, fmt='%d',delimiter=',')
    print('Saved ',mimicIII_window_filepath+'.txt\n')

Saved  data/eligible_patients/mimicIII_eligible_patients_time_window_1m_exc.nodiag_single.windowtxt
Saved  data/eligible_patients/mimicIII_eligible_admissions_time_window_1m_exc.nodiag_single.windowtxt
Saved  data/eligible_patients/mimicIII_eligible_patients_time_window_3m_exc.nodiag_single.windowtxt
Saved  data/eligible_patients/mimicIII_eligible_admissions_time_window_3m_exc.nodiag_single.windowtxt
Saved  data/eligible_patients/mimicIII_eligible_patients_time_window_6m_exc.nodiag_single.windowtxt
Saved  data/eligible_patients/mimicIII_eligible_admissions_time_window_6m_exc.nodiag_single.windowtxt
Saved  data/eligible_patients/mimicIII_eligible_patients_time_window_12m_exc.nodiag_single.windowtxt
Saved  data/eligible_patients/mimicIII_eligible_admissions_time_window_12m_exc.nodiag_single.windowtxt
