### Antibiotics

A number of qiuantities relevant to patient treatment were not included in the MIMIC-Code output, therefore, in this notebook, we will extract these quantities manually from the raw MIMIC-III data files.

In [1]:
import numpy as np
import pandas as pd
from collections import defaultdict
from tqdm import tqdm

CHUNK_SIZE = 10000

### Convenience functions

As the files are in the orders of GBs we read in and process the files incrementally;

In [2]:
# reads file from path in chunks of size `chunksize`
def read_csv(path, usecols, chunksize=CHUNK_SIZE):
    for i, chunk in enumerate(pd.read_csv(path, usecols=usecols, encoding='latin1', engine='c', chunksize=chunksize)):
        yield i, chunk.reset_index(drop=True) # resets index so that indices range from 0 to chunksize - 1

### Patients

As the raw MIMIC-III database does not identify admissions by `ICUSTAY_ID`, rather by `SUBJECT_ID` (or more specific `hadm_id`), and subjects can be re-admitted several times, we need to check for each of our measurements to which admission it actually belongs by checking the time it was recorded and for which patient.

In [3]:
icustay_cols = ['ICUSTAY_ID', 'HADM_ID', 'INTIME', 'OUTTIME']
icustays = pd.read_csv(r"D:/mimic-iii-clinical-database-1.4/ICUSTAYS.csv", usecols=icustay_cols)
icustays.INTIME = pd.to_datetime(icustays.INTIME)
icustays.OUTTIME = pd.to_datetime(icustays.OUTTIME)
icustays.head()

Unnamed: 0,HADM_ID,ICUSTAY_ID,INTIME,OUTTIME
0,110404,280836,2198-02-14 23:27:38,2198-02-18 05:26:11
1,106296,206613,2170-11-05 11:05:29,2170-11-08 17:46:57
2,188028,220345,2128-06-24 15:05:20,2128-06-27 12:32:29
3,173727,249196,2120-08-07 23:12:42,2120-08-10 00:39:04
4,164716,210407,2186-12-25 21:08:04,2186-12-27 12:01:13


### Antibiotics

Source: [reason_for_admission.sql](https://github.com/AmsterdamUMC/AmsterdamUMCdb/blob/master/amsterdamumcdb/sql/diagnosis/reason_for_admission.sql)

In [4]:
# Look up itemid for same antibiotic in MIMIC-III as in AmsterdamUMCdb
dct = pd.read_csv('D:/mimic-iii-clinical-database-1.4/D_ITEMS.csv')
dct[dct.LABEL.fillna('').str.contains('fosca', case=False)]

Unnamed: 0,ROW_ID,ITEMID,LABEL,ABBREVIATION,DBSOURCE,LINKSTO,CATEGORY,UNITNAME,PARAM_TYPE,CONCEPTID
9873,14033,225871,Foscarnet,Foscarnet,metavision,inputevents_mv,Antibiotics,dose,Solution,


In [5]:
ANTIBIOTICS_CATEGORIES = [
    '08-Antibiotics (IV)',
    '09-Antibiotics (Non IV)'
]

PROFYLAXIS_CATEGORIES = [
    '10-Prophylaxis (IV)',
    '11-Prophylaxis (Non IV)'
]

PROFYL_ANTIBIOTICS = [
    225899, # Co-Trimoxazol (Bactrimel) / Bactrim (SMX/TMP) -> often prophylactic (unless high dose)
    225798, # Vancomycin -> prophylaxis for valve surgery
    225884, # Metronidazole -> often used for GI surgical prophylaxis
    225842, # Ampicillin: wikipedia
    225885, # Micafungin
    7612,  # Amoxicilline/Clavulaanzuur (Augmentin) -> often used for ENT surgical prophylaxis
    None,  # --Cefuroxim (Zinacef) -> often used for GI surgical prophylaxis
    225850, # Cefazolin -> prophylaxis
    225866, # Erythromycin -> often used for gastroparesis
]

OTHER_PROFYL = [
    225975, # Heparin Sodium (Prophylaxis)
    225906, # Enoxaparin (Lovenox) -> not antibiotic!
    225911, # Ranitidine (Prophylaxis) -> not antibiotic
    225913, # Coumadin (Warfarin) -> prevent blood cloths (so not antibiotic)
    225908, # Fondaparinux -> prevent blood cloths
    None,  # --Tetanusimmunoglobuline -> prophylaxis (not antibiotic)
    None,  # --Fusidinezuur (Fucidin) -> prophylaxis
    None,  # --Immunoglobuline (Nanogam) -> not anbiotic
    None,  # --Cefotaxim (Claforan) -> prophylaxis
]

SEPSIS_ANTIBIOTICS = [
    225845, # Azithromycin: use -> wikipedia
    225851, # Cefepime
    227691, # Keflex
    225871, # Foscarnet
    225881, # Linezolid
    225873, # Gancyclovir
    225885, # Micafungin
    225840, # Amikacin
    None, # --Amoxicilline (Clamoxyl/Flemoxin)
    225843, # Ampicillin/Sulbactam (Unasyn)
    225888, # Nafcillin
    225890, # Penicillin G potassium
    225853, # Ceftazidime
    225859, # Ciprofloxacin
    225898, # Rifampin
    225860, # Clindamycin
    225905, # Voriconazole
    225902, # Tobramycin
    225876, # Imipenem/Cilastatin
    225865, # Doxycycline
    None, # --Flucloxacilline (Stafoxil/Floxapen)
    225869, # Fluconazole (Diflucan)
    None, # --Ganciclovir (Cymevene)
    None, # --Flucytosine (Ancotil)
    225875, # Gentamicin
    225871, # --Foscarnet trinatrium (Foscavir)
    None, # --Amfotericine B (Fungizone)
    225883, # Meropenem
    225868, # --Myambutol (ethambutol)
    225896, # Quinine
    225847, # Aztreonam
    None, # --Chlooramfenicol
    225893, # Piperacillin/Tazobactam (Zosyn)
    225892, # Piperacillin
    225855, # Ceftriaxone
    225848, # Caspofungin
    None, # --Itraconazol (Trisporal)
    225879, # Levofloxacin
    None, # --Amfotericine B lipidencomplex  (Abelcet)
    None, # --Ecalta (Anidulafungine)
    None, # --Research Anidulafungin/placebo
    None, # --Research Caspofungin/placebo
    225838, # Ambisome
    225881, # --Linezolid (Zyvoxid)
    None, # --Tigecycline (Tygacil)
    225863, # Daptomycin
    225862, # Colistin
]

In [6]:
antibiotics_dfs = []
for _, chunk in tqdm(read_csv('D:/mimic-iii-clinical-database-1.4/INPUTEVENTS_MV.csv', usecols=['HADM_ID', 'ITEMID', 'ORDERCATEGORYNAME', 'STARTTIME', 'ENDTIME'], chunksize=50000)):    
    # extract records corresponding to anibiotics and profylaxis
    chunk = chunk[chunk.ORDERCATEGORYNAME.isin(ANTIBIOTICS_CATEGORIES + PROFYLAXIS_CATEGORIES)].copy()
    
    # Limit to records during ICU stay
    chunk.STARTTIME = pd.to_datetime(chunk.STARTTIME)
    chunk.ENDTIME = pd.to_datetime(chunk.ENDTIME)
    chunk = icustays.merge(chunk, on='HADM_ID', how='inner')
    chunk = chunk[(chunk.ENDTIME > chunk.INTIME) & (chunk.STARTTIME < chunk.OUTTIME) & chunk.ICUSTAY_ID.notna()].copy()
    
    
    profyl_other = chunk.ITEMID.isin(OTHER_PROFYL)
    profyl_antibiotics = chunk.ITEMID.isin(PROFYL_ANTIBIOTICS)
    sepsis_antibiotics = chunk.ITEMID.isin(SEPSIS_ANTIBIOTICS)
    any_antibiotics = sepsis_antibiotics | profyl_antibiotics | profyl_other
    
    
    # convert to DataFrame
    antibiotics_df = pd.DataFrame({
        'icustay_id': chunk[any_antibiotics].ICUSTAY_ID,
        'starttime': chunk[any_antibiotics].STARTTIME,
        'endtime': chunk[any_antibiotics].ENDTIME,
        'sepsis_antibiotics': sepsis_antibiotics[any_antibiotics].astype(int),
        'profyl_antibiotics': profyl_antibiotics[any_antibiotics].astype(int),
        'profyl_other': profyl_other[any_antibiotics].astype(int),
    })
    antibiotics_dfs.append(antibiotics_df)
        
# Merge ventilator DataFrames
antibiotics_dfs = pd.concat(antibiotics_dfs, axis=0).reset_index(drop=True)
antibiotics_dfs.head()

73it [00:14,  4.88it/s]


Unnamed: 0,icustay_id,starttime,endtime,sepsis_antibiotics,profyl_antibiotics,profyl_other
0,253139,2141-06-11 18:22:00,2141-06-11 18:23:00,1,0,0
1,253139,2141-06-12 18:00:00,2141-06-12 18:01:00,1,0,0
2,237466,2121-05-16 01:04:00,2121-05-16 01:05:00,0,1,0
3,237466,2121-05-16 02:08:00,2121-05-16 02:09:00,1,0,0
4,237466,2121-05-16 08:00:00,2121-05-16 08:01:00,0,0,1


In [7]:
antibiotics_dfs.to_csv('final/antibiotics_cohort.csv', index=False)