In [0]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import math, copy, time
from torch.autograd import Variable
from torch.utils import data
import matplotlib.pyplot as plt
import pandas as pd
import pickle
from torchtext import data, datasets

from google.colab import auth

device = "cuda" if torch.cuda.is_available() else "cpu"

In [0]:
!wget -r -N -c -np --user kyleliu --ask-password https://physionet.org/files/picdb/1.0.0/

In [0]:
admissions = pd.read_csv('physionet.org/files/picdb/1.0.0/ADMISSIONS.csv.gz', compression='gzip')
chartevents = pd.read_csv('physionet.org/files/picdb/1.0.0/CHARTEVENTS.csv.gz', compression='gzip')
diagnoses_icd = pd.read_csv('physionet.org/files/picdb/1.0.0/DIAGNOSES_ICD.csv.gz', compression='gzip')
d_icd_diagnoses = pd.read_csv('physionet.org/files/picdb/1.0.0/D_ICD_DIAGNOSES.csv.gz', compression='gzip')
d_items = pd.read_csv('physionet.org/files/picdb/1.0.0/D_ITEMS.csv.gz', compression='gzip')
d_labitems = pd.read_csv('physionet.org/files/picdb/1.0.0/D_LABITEMS.csv.gz', compression='gzip')
emr_symptoms = pd.read_csv('physionet.org/files/picdb/1.0.0/EMR_SYMPTOMS.csv.gz', compression='gzip')
icu_stays = pd.read_csv('physionet.org/files/picdb/1.0.0/ICUSTAYS.csv.gz', compression='gzip')
input_events = pd.read_csv('physionet.org/files/picdb/1.0.0/INPUTEVENTS.csv.gz', compression='gzip')
lab_events = pd.read_csv('physionet.org/files/picdb/1.0.0/LABEVENTS.csv.gz', compression='gzip')
patients = pd.read_csv('physionet.org/files/picdb/1.0.0/PATIENTS.csv.gz', compression='gzip')
prescriptions = pd.read_csv('physionet.org/files/picdb/1.0.0/PRESCRIPTIONS.csv.gz', compression='gzip')
surgery_vital_signs = pd.read_csv('physionet.org/files/picdb/1.0.0/SURGERY_VITAL_SIGNS.csv.gz', compression='gzip')
microbiology_events = pd.read_csv('physionet.org/files/picdb/1.0.0/MICROBIOLOGYEVENTS.csv.gz', compression='gzip')

In [0]:
# Easier to use: 

item_dict = dict() 
for _, row in d_items.iterrows(): 
  item_dict[row.ITEMID] = row.LABEL

lab_item_dict = dict()
for _, row in d_labitems.iterrows(): 
  lab_item_dict[row.ITEMID] = row.LABEL

ICD_CN_TO_ICD = dict() 
for _, row in d_icd_diagnoses.iterrows(): 
  ICD_CN_TO_ICD[row.ICD10_CODE_CN] = row.ICD10_CODE 


In [0]:
# Clean: Include only the first admission

admissions = admissions.sort_values(by = ['ADMITTIME'])
chartevents = chartevents.sort_values(by = ['CHARTTIME'])
lab_events = lab_events.sort_values(by = ['CHARTTIME'])

admits_to_keep = []
seen_patients = set()

for _, row in admissions.iterrows(): 
  if row.SUBJECT_ID not in seen_patients: 
    admits_to_keep.append(row.HADM_ID)
    seen_patients.add(row.SUBJECT_ID)

In [0]:
def remove_admits(df): 
  return df[df['HADM_ID'].isin(admits_to_keep)]

admissions = remove_admits(admissions)
chartevents = remove_admits(chartevents)
diagnoses_icd = remove_admits(diagnoses_icd)
emr_symptoms = remove_admits(emr_symptoms)
icu_stays = remove_admits(icu_stays)
input_events = remove_admits(input_events)
lab_events = remove_admits(lab_events)
prescriptions = remove_admits(prescriptions)
surgery_vital_signs = remove_admits(surgery_vital_signs)

In [0]:
from datetime import date, timedelta, time, datetime

def to_datetime(x): 
  li = x.split()
  my_date = li[0].split("-")
  my_time = li[1].split(":")

  ret = datetime(int(my_date[0]), int(my_date[1]), int(my_date[2]), int(my_time[0]), int(my_time[1]), int(my_time[2]))
  
  return ret

age_at_admission = dict()  
birth_date = dict()
admit_date = dict() 
for _, row in patients.iterrows(): 
  birth_date[row.SUBJECT_ID] = to_datetime(row.DOB)

for _, row in admissions.iterrows(): 
  admit_date[row.SUBJECT_ID] = to_datetime(row.ADMITTIME)
  age_at_admission[row.SUBJECT_ID] = to_datetime(row.ADMITTIME) - birth_date[row.SUBJECT_ID]

In [0]:
# Time since admission (hours)

def isFloat(string):
    try:
        float(string)
        return True
    except ValueError:
        return False

def normalize_time(patient_id, x): 
  if isFloat(x): 
    return 100000
  delta = to_datetime(x) - admit_date[patient_id]
  return delta.total_seconds() / 3600.0 

In [0]:
patient_set = [p for p in patients.SUBJECT_ID]

In [0]:
chartevents['HOURS_IN'] = chartevents.apply(lambda row: normalize_time(row.SUBJECT_ID, row.CHARTTIME), axis=1)
lab_events['HOURS_IN'] = lab_events.apply(lambda row: normalize_time(row.SUBJECT_ID, row.CHARTTIME), axis=1)
surgery_vital_signs['HOURS_IN'] = surgery_vital_signs.apply(lambda row: normalize_time(row.SUBJECT_ID, row.MONITORTIME), axis=1)
microbiology_events['HOURS_IN'] = microbiology_events.apply(lambda row: normalize_time(row.SUBJECT_ID, row.CHARTTIME), axis=1)
prescriptions['HOURS_IN'] = prescriptions.apply(lambda row: normalize_time(row.SUBJECT_ID, row.STARTDATE), axis=1)

In [0]:
# SIRS score
# Possibly mask by time later

#1 week, 1 month, 1 year, 5 year, 12 year, 18 year
age_thresh = [7 * 24, 30 * 24, 365 * 24, 5 * 365 * 24, 12 *  365 * 24, 18 * 365 * 24]

# core temperature -- Item 1001 / SV1
temp_hi = 38.5
temp_lo = 36

# mean heart rate -- Item 1003 / SV2
tachycardia_thresh = [180, 180, 180, 140, 130, 110]
bradycardia_thresh = [100, 100, 90, 0, 0, 0]

# mean respiratory rate -- Item 1004 / SV6

resp_thresh = [50, 40, 34, 22, 18, 14]

# wbc -- Lab Item 5141
hiwbc_thresh = [34, 19.5, 17.5, 15.5, 13.5, 11]
lowbc_thresh = [0, 5, 5, 6, 4.5, 4.5]

temp_dict = dict()
hr_dict = dict()
rr_dict = dict()
wbc_dict = dict() 

for p in patient_set: 
  temp_dict[p] = -1
  hr_dict[p] = -1
  rr_dict[p] = -1
  wbc_dict[p] = -1

for _, row in chartevents[chartevents['ITEMID'].isin([1001,  1003, 1004])].iterrows(): 
  time_since = normalize_time(row.SUBJECT_ID, row.CHARTTIME)
  my_age = age_at_admission[row.SUBJECT_ID].total_seconds() / 3600.0
  my_index = 0
  while my_age > age_thresh[my_index]: 
    my_index += 1
  if row.ITEMID == 1001: 
    if (row.VALUENUM > temp_hi) or (row.VALUENUM < temp_lo): 
      if temp_dict[row.SUBJECT_ID] <= -1: 
        temp_dict[row.SUBJECT_ID] = time_since
  if row.ITEMID == 1003: 
    if (row.VALUENUM > tachycardia_thresh[my_index]) or (row.VALUENUM < bradycardia_thresh[my_index]): 
      if hr_dict[row.SUBJECT_ID] <= -1: 
        hr_dict[row.SUBJECT_ID] = time_since
  if row.ITEMID == 1004: 
    if (row.VALUENUM > resp_thresh[my_index]): 
      if rr_dict[row.SUBJECT_ID] <= -1: 
        rr_dict[row.SUBJECT_ID] = time_since

for _, row in lab_events[lab_events['ITEMID'] ==  5141].iterrows(): 
  time_since = normalize_time(row.SUBJECT_ID, row.CHARTTIME)
  my_age = age_at_admission[row.SUBJECT_ID].total_seconds() / 3600.0
  my_index = 0
  while my_age > age_thresh[my_index]: 
    my_index += 1
  if (row.VALUENUM > hiwbc_thresh[my_index]) or (row.VALUENUM < lowbc_thresh[my_index]): 
    if wbc_dict[row.SUBJECT_ID] <= -1: 
      wbc_dict[row.SUBJECT_ID] = time_since


MEETS_SIRS = dict()
for p in patient_set: 
  sum = int(temp_dict[p] > -1) + int(hr_dict[p] > -1) + int(rr_dict[p] > -1) + int(wbc_dict[p] > -1)
  if ((temp_dict[p] > -1) or (wbc_dict[p] > -1)) and (sum >= 2): 
    MEETS_SIRS[p] = max([temp_dict[p], hr_dict[p], rr_dict[p], wbc_dict[p]])

print(MEETS_SIRS)
print(len(MEETS_SIRS))


In [0]:
# Infection

infection_codes = [
  'A02.1', 'A40.0', 'A40.1', 'A40.3', 'A40.8', 'A40.9', 'A41.2', 'A41.01', 'A41.02', 'A41.1', 'A40.3', 'A41.4', 'A41.50', 'A41.3', 
  'A41.51', 'A41.52', 'A41.53', 'A41.59', 'A41.81', 'A42.7', 'A41.89', 'A41.9', 'A22.7', 'A26.7', 'A32.7', 
  'B00.7', 
  'I26.01', 'I26.90', 'I40.0', 'I76', 
  'O85', 'O86.81', 'O88.311', 'O88.312', 'O88.313', 'O88.32', 
  'P36.0', 'P36.10', 'P36.19', 'P36.2', 'P36.30', 'P36.39', 'P36.4', 'P36.5', 'P36.8', 'P36.9', 
  'R65.21', 'R65.10', 'R65.20', 'R65.11',
  'T81.12XD', 'T81.12XS', 'T81.12XA'
]

infect_dict = dict() 
for p in patient_set: 
  infect_dict[p] = -1

for _, row in diagnoses_icd.iterrows(): 
  my_age = age_at_admission[row.SUBJECT_ID].total_seconds() / 3600.0

  if ICD_CN_TO_ICD[row.ICD10_CODE_CN] in infection_codes: 
    if infect_dict[row.SUBJECT_ID] <= -1: 
      infect_dict[row.SUBJECT_ID] = 1

antibiotics = ['adoxa', 'ala-tet', 'alodox', 'amikacin', 'amikin', 'amoxicillin', 'amoxicillin%clavulanate', 'clavulanate', 'ampicillin', 'augmentin', 'avelox', 'avidoxy', 'azactam', 'azithromycin', 'aztreonam', 'axetil', 'bactocill', 'bactrim', 'bethkis', 'biaxin', 'bicillin l-a', 'cayston', 'cefazolin', 'cedax', 'cefoxitin', 'ceftazidime', 'cefaclor', 'cefadroxil', 'cefdinir', 'cefditoren', 'cefepime', 'cefotetan', 'cefotaxime', 'cefpodoxime', 'cefprozil', 'ceftibuten', 'ceftin', 'cefuroxime ', 'cefuroxime', 'cephalexin', 'chloramphenicol', 'cipro', 'ciprofloxacin', 'claforan', 'clarithromycin', 'cleocin', 'clindamycin', 'cubicin', 'dicloxacillin', 'doryx', 'doxycycline', 'duricef', 'dynacin', 'ery-tab', 'eryped', 'eryc', 'erythrocin', 'erythromycin', 'factive', 'flagyl', 'fortaz', 'furadantin', 'garamycin', 'gentamicin', 'kanamycin', 'keflex', 'ketek', 'levaquin', 'levofloxacin', 'lincocin', 'macrobid', 'macrodantin', 'maxipime', 'mefoxin', 'metronidazole', 'minocin', 'minocycline', 'monodox', 'monurol', 'morgidox', 'moxatag', 'moxifloxacin', 'myrac', 'nafcillin sodium', 'nicazel doxy 30', 'nitrofurantoin', 'noroxin', 'ocudox', 'ofloxacin', 'omnicef', 'oracea', 'oraxyl', 'oxacillin', 'pc pen vk', 'pce dispertab', 'panixine', 'pediazole', 'penicillin', 'periostat', 'pfizerpen', 'piperacillin', 'tazobactam', 'primsol', 'proquin', 'raniclor', 'rifadin', 'rifampin', 'rocephin', 'smz-tmp', 'septra', 'septra ds', 'septra', 'solodyn', 'spectracef', 'streptomycin sulfate', 'sulfadiazine', 'sulfamethoxazole', 'trimethoprim', 'sulfatrim', 'sulfisoxazole', 'suprax', 'synercid', 'tazicef', 'tetracycline', 'timentin', 'tobi', 'tobramycin', 'trimethoprim', 'unasyn', 'vancocin', 'vancomycin', 'vantin', 'vibativ', 'vibra-tabs', 'vibramycin', 'zinacef', 'zithromax', 'zmax', 'zosyn', 'zyvox']

SEPTIC = dict()

for x in antibiotics: 
  for p in prescriptions[prescriptions['DRUG_NAME_EN'].str.lower().str.contains(x)].SUBJECT_ID: 
    infect_dict[p] = 1

for p in MEETS_SIRS: 
  if infect_dict[p] > -1: 
    SEPTIC[p] = MEETS_SIRS[p]
  
print(len(SEPTIC))
