# Prediction Change of Antibiotic Prescription 

# 배경
- Sepsis환자가 중환자실에 있는 동안 항생제를 유지할것인가? 바꿀것인가?


# 문제 종류
- binary classification: 항생제 처방을 바꿨는가 유지했는가

# 실험
- 코호트

<img src="images/pnp_fig1.png">

- 라벨링
    - TBD
<img src="images/ab-ex1.png">
<img src="images/ab-ex2.png">

- 피처 전처리/엔지니어링
    - TBD
- 모델
    - LR
    - RF
    - GB
    - (R)NN

# 교차검증
- 5-folds CV

# 평가
- AUROC
- variable importance: 어떤 변수가 다음 처방에 영향을 미쳤을까?


# 예측값의 활용
- 처방 실수의 예방

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import seaborn as sns; sns.set(rc={'figure.figsize':(15,15)})
import numpy as np
import pandas as pd
from sqlalchemy import create_engine
engine = create_engine('postgresql://postgres:mimic@127.0.0.1:5555/mimic')

In [3]:
pd.read_sql("""
select * from abx_poe_list limit 10
""", engine)

Unnamed: 0,drug,numobs
0,Vancomycin,42633
1,Vancomycin HCl,22291
2,Levofloxacin,20867
3,MetRONIDAZOLE (FLagyl),9985
4,Piperacillin-Tazobactam Na,9275
5,CefazoLIN,8986
6,CefePIME,8627
7,Piperacillin-Tazobactam,8482
8,Metronidazole,8302
9,Ciprofloxacin HCl,7559


In [23]:
pd.read_sql("""
select * from abx_micro_poe limit 10
""", engine)

Unnamed: 0,icustay_id,antibiotic_name,antibiotic_time,antibiotic_endtime,last72_charttime,next24_charttime,suspected_infection_time,specimen,positiveculture
0,200001,CefTAZidime,2181-11-18,2181-11-20,NaT,2181-11-18 11:45:00,2181-11-18 11:45:00,BLOOD CULTURE,0
1,200001,CefTAZidime,2181-11-18,2181-11-20,NaT,2181-11-18 11:10:00,2181-11-18 11:10:00,BLOOD CULTURE,0
2,200001,Vancomycin,2181-11-19,2181-11-28,2181-11-18 11:10:00,2181-11-19 06:00:00,2181-11-18 11:10:00,BLOOD CULTURE,0
3,200001,Vancomycin,2181-11-19,2181-11-28,2181-11-18 11:45:00,2181-11-19 04:18:00,2181-11-18 11:45:00,BLOOD CULTURE,0
4,200001,Vancomycin,2181-11-19,2181-11-28,2181-11-18 11:10:00,2181-11-19 04:18:00,2181-11-18 11:10:00,BLOOD CULTURE,0
5,200001,Vancomycin,2181-11-19,2181-11-19,2181-11-18 11:10:00,2181-11-19 13:00:00,2181-11-18 11:10:00,BLOOD CULTURE,0
6,200001,Vancomycin,2181-11-19,2181-11-19,2181-11-18 11:45:00,2181-11-19 13:00:00,2181-11-18 11:45:00,BLOOD CULTURE,0
7,200001,Vancomycin,2181-11-19,2181-11-19,2181-11-18 11:10:00,2181-11-19 04:18:00,2181-11-18 11:10:00,BLOOD CULTURE,0
8,200001,Vancomycin,2181-11-19,2181-11-19,2181-11-18 11:45:00,2181-11-19 04:18:00,2181-11-18 11:45:00,BLOOD CULTURE,0
9,200001,Vancomycin,2181-11-19,2181-11-19,2181-11-18 11:10:00,2181-11-19 06:00:00,2181-11-18 11:10:00,BLOOD CULTURE,0


In [50]:
sql = """
select a.subject_id, s.* 
from sepsis3_cohort s 
    left join icustays a 
    on s.hadm_id=a.hadm_id and s.icustay_id=a.icustay_id
where s.excluded = 0
"""
label = pd.read_sql(sql, engine)
label.head().T

Unnamed: 0,0,1,2,3,4
subject_id,165,266,422,671,2457
hadm_id,170252,186251,117029,126769,135882
icustay_id,247247,293876,299666,246119,298039
intime,2170-10-03 17:17:53,2168-07-10 08:02:23,2173-04-03 11:36:29,2195-12-31 04:45:35,2139-10-29 18:14:01
outtime,2170-10-04 16:24:15,2168-07-11 17:40:38,2173-04-08 14:20:31,2196-01-07 17:35:50,2139-10-30 18:26:57
age,86.4824,77.5623,57.6467,47.7629,79.6978
gender,M,F,M,M,F
ethnicity,BLACK/AFRICAN AMERICAN,BLACK/AFRICAN AMERICAN,WHITE,WHITE,HISPANIC OR LATINO
first_service,MED,NMED,CMED,NSURG,GYN
dbsource,metavision,metavision,metavision,metavision,metavision


In [29]:
label.groupby('excluded').size()

excluded
0    11791
1    49741
dtype: int64

In [30]:
label.subject_id.nunique()

46476

In [31]:
label[label.excluded==0].subject_id.nunique()

11791

In [32]:
label[label.excluded==1].subject_id.nunique()

36839

In [33]:
sql = """
select count(distinct subject_id)
from icustays
"""
pd.read_sql(sql, engine)

Unnamed: 0,count
0,46476


- icu의 총 환자 46476명 중에 sepsis3 조건에 맞는 icustay는 11791 건
    - 해당 조건의 고유 환자수도 11791건으로 icustays:subejct_id = 1:1
    - 즉, 각 환자가 한번만 icu에 입원했고, sepsis 진단을 받았다. 여러번 입원하고 진단받은 사례는 없다.
    - 따라서, 환자 수준의 모델링만 가능, 방문 수준의 모델링은 불가능

<hr>

In [40]:
label[(label.excluded==0) & (label.antibiotic_time_poe.notnull())]['icustay_id'].count()

7588

In [41]:
label[(label.excluded==0) & (label.antibiotic_time_poe.isnull())]['icustay_id'].count()

4203

In [43]:
label[(label.excluded==0) & (label.antibiotic_time_poe.notnull())].subject_id.nunique()

7588

In [44]:
label[(label.excluded==0) & (label.antibiotic_time_poe.isnull())].subject_id.nunique()

4203

- icu의 총 환자 46476명 중에 sepsis3 조건에 맞는 icustay는 11791 건
    - 항생제 처방을 받은 환자수 7588, 건수 7588, 
    - 항생제 처방을 받지 않은 환자수 4203, 건수는 4203

<hr>

In [47]:
sql = """
select i.subject_id, s.*, a.antibiotic_time_poe
from sepsis3 s 
    join sepsis3_cohort a 
        on s.icustay_id=a.icustay_id
    join icustays i
        on s.icustay_id=i.icustay_id
where a.excluded = 0
"""
features = pd.read_sql(sql, engine)

In [48]:
features.head().T

Unnamed: 0,0,1,2,3,4
subject_id,61691,41710,56369,74282,67800
icustay_id,200021,200028,200033,200061,200075
hadm_id,109307,181955,198650,121149,132255
excluded,0,0,0,0,0
intime,2114-12-26 19:45:12,2133-10-29 17:13:50,2198-08-07 17:56:17,2134-01-23 16:38:46,2159-09-23 00:13:20
outtime,2114-12-27 22:46:28,2133-11-01 14:55:14,2198-08-21 14:59:18,2134-01-25 16:59:14,2159-09-25 01:55:17
dbsource,metavision,metavision,metavision,metavision,metavision
suspected_infection_time_poe,NaT,NaT,2198-08-08 01:00:00,2134-01-24 00:30:00,2159-09-23 03:33:00
suspected_infection_time_poe_days,,,-0.294248,-0.327245,-0.138657
specimen_poe,,,MRSA SCREEN,URINE,MRSA SCREEN


# TBD

- sepsis3_cohort의 주요 피처들을 좀더 세밀한 타임 윈도우로 추출해야함
     - antibiotic_time_poe: 첫 처방인지 마지막 처방인지
     - vent 등 스코어나 체온 등을 세밀하게 뽑을 수 있는지 


In [5]:
df = pd.read_sql("""
with abx as
(
  select pr.hadm_id
  , pr.drug as antibiotic_name
  , pr.startdate as antibiotic_time
  , pr.enddate as antibiotic_endtime
  from prescriptions pr
  -- inner join to subselect to only antibiotic prescriptions
  inner join abx_poe_list ab
      on pr.drug = ab.drug
) select * from abx;
""", engine)

In [6]:
df.shape

(207154, 4)

In [7]:
df.head()

Unnamed: 0,hadm_id,antibiotic_name,antibiotic_time,antibiotic_endtime
0,143045,Vancomycin HCl,2167-01-09,2167-01-11
1,159514,NEO*IV*Gentamicin,2117-11-20,2117-11-24
2,159514,NEO*IV*Ampicillin Sodium,2117-11-20,2117-11-24
3,150750,Levofloxacin,2149-11-10,2149-11-11
4,163353,NEO*IV*Gentamicin,2138-07-18,2138-07-20


In [8]:
ab_per_visit = df.groupby('hadm_id').agg('min')['antibiotic_time']

In [9]:
ab_per_visit.head()

hadm_id
100003   2150-04-17
100006   2108-04-06
100007   2145-03-31
100009   2162-05-17
100011   2177-08-29
Name: antibiotic_time, dtype: datetime64[ns]

In [24]:
df = pd.read_sql("""
select * from abx_micro_poe
""", engine)

In [25]:
df.shape

(1268126, 9)

In [26]:
df.head()

Unnamed: 0,icustay_id,antibiotic_name,antibiotic_time,antibiotic_endtime,last72_charttime,next24_charttime,suspected_infection_time,specimen,positiveculture
0,200001,CefTAZidime,2181-11-18,2181-11-20,NaT,2181-11-18 11:45:00,2181-11-18 11:45:00,BLOOD CULTURE,0.0
1,200001,CefTAZidime,2181-11-18,2181-11-20,NaT,2181-11-18 11:10:00,2181-11-18 11:10:00,BLOOD CULTURE,0.0
2,200001,Vancomycin,2181-11-19,2181-11-28,2181-11-18 11:10:00,2181-11-19 06:00:00,2181-11-18 11:10:00,BLOOD CULTURE,0.0
3,200001,Vancomycin,2181-11-19,2181-11-28,2181-11-18 11:45:00,2181-11-19 04:18:00,2181-11-18 11:45:00,BLOOD CULTURE,0.0
4,200001,Vancomycin,2181-11-19,2181-11-28,2181-11-18 11:10:00,2181-11-19 04:18:00,2181-11-18 11:10:00,BLOOD CULTURE,0.0


In [49]:
tmp = df[['icustay_id', 'antibiotic_name', 'positiveculture']].copy()
tmp['col'] = 1
target_abs = tmp.groupby(['icustay_id', 'positiveculture']).agg({'antibiotic_name': lambda x: tuple(set(x)), 'col': 'sum'})

target_abs.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,antibiotic_name,col
icustay_id,positiveculture,Unnamed: 2_level_1,Unnamed: 3_level_1
200001,0.0,"(Vancomycin, CefTAZidime)",77
200001,1.0,"(Vancomycin,)",7
200003,0.0,"(Sulfameth/Trimethoprim DS, *NF* Timentin, Van...",65
200003,1.0,"(Sulfameth/Trimethoprim DS, Vancomycin, Ciprof...",22
200011,0.0,"(Sulfameth/Trimethoprim DS,)",1


In [56]:
icustay_id = 200003

In [61]:
pd.read_sql("select * from icustays where icustay_id=200003", engine)

Unnamed: 0,row_id,subject_id,hadm_id,icustay_id,dbsource,first_careunit,last_careunit,first_wardid,last_wardid,intime,outtime,los
0,34913,27513,163557,200003,carevue,SICU,SICU,57,57,2199-08-02 19:50:04,2199-08-08 17:09:18,5.8884


In [57]:
icustay1 = df[df.icustay_id == icustay_id].copy()

In [58]:
icustay1[icustay1.positiveculture == 1]

Unnamed: 0,icustay_id,antibiotic_name,antibiotic_time,antibiotic_endtime,last72_charttime,next24_charttime,suspected_infection_time,specimen,positiveculture
89,200003,Piperacillin-Tazobactam Na,2199-08-02,2199-08-08,NaT,2199-08-02 21:02:00,2199-08-02 21:02:00,BLOOD CULTURE,1.0
91,200003,Piperacillin-Tazobactam Na,2199-08-02,2199-08-08,NaT,2199-08-02 21:29:00,2199-08-02 21:29:00,BLOOD CULTURE,1.0
92,200003,Vancomycin,2199-08-02,2199-08-04,NaT,2199-08-02 21:29:00,2199-08-02 21:29:00,BLOOD CULTURE,1.0
93,200003,Vancomycin,2199-08-02,2199-08-04,NaT,2199-08-02 21:02:00,2199-08-02 21:02:00,BLOOD CULTURE,1.0
96,200003,Vancomycin,2199-08-04,2199-08-06,2199-08-02 21:02:00,2199-08-04 12:30:00,2199-08-02 21:02:00,BLOOD CULTURE,1.0
97,200003,Vancomycin,2199-08-04,2199-08-06,2199-08-02 21:29:00,2199-08-04 12:30:00,2199-08-02 21:29:00,BLOOD CULTURE,1.0
100,200003,Ciprofloxacin,2199-08-04,2199-08-08,2199-08-02 21:02:00,2199-08-04 09:53:00,2199-08-02 21:02:00,BLOOD CULTURE,1.0
101,200003,Ciprofloxacin,2199-08-04,2199-08-08,2199-08-02 21:29:00,2199-08-04 09:53:00,2199-08-02 21:29:00,BLOOD CULTURE,1.0
102,200003,Vancomycin,2199-08-04,2199-08-06,2199-08-02 21:02:00,2199-08-04 09:53:00,2199-08-02 21:02:00,BLOOD CULTURE,1.0
105,200003,Ciprofloxacin,2199-08-04,2199-08-08,2199-08-02 21:02:00,2199-08-04 12:30:00,2199-08-02 21:02:00,BLOOD CULTURE,1.0


In [53]:
icustay1.antibiotic_name.nunique(), icustay1.antibiotic_name.unique()

(8, array(['Vancomycin', 'Piperacillin-Tazobactam Na', 'Ciprofloxacin',
        'Ampicillin-Sulbactam', 'Ciprofloxacin HCl',
        'Sulfameth/Trimethoprim', '*NF* Timentin',
        'Sulfameth/Trimethoprim DS'], dtype=object))

In [54]:
icustay1.groupby('antibiotic_name').agg(min)['antibiotic_time']

antibiotic_name
*NF* Timentin                2199-08-16
Ampicillin-Sulbactam         2199-08-08
Ciprofloxacin                2199-08-04
Ciprofloxacin HCl            2199-08-08
Piperacillin-Tazobactam Na   2199-08-02
Sulfameth/Trimethoprim       2199-08-16
Sulfameth/Trimethoprim DS    2199-08-22
Vancomycin                   2199-08-02
Name: antibiotic_time, dtype: datetime64[ns]

In [55]:
icustay1.groupby('antibiotic_name').agg(max)['antibiotic_endtime']

antibiotic_name
*NF* Timentin                2199-08-16
Ampicillin-Sulbactam         2199-08-09
Ciprofloxacin                2199-08-08
Ciprofloxacin HCl            2199-08-20
Piperacillin-Tazobactam Na   2199-08-15
Sulfameth/Trimethoprim       2199-08-22
Sulfameth/Trimethoprim DS    2199-08-22
Vancomycin                   2199-08-20
Name: antibiotic_endtime, dtype: datetime64[ns]

# Reference 

- [1] Sepsis-3 in MIMIC-III, https://github.com/alistairewj/sepsis3-mimic
- [2] Antibiotic prescription of Sepsis-3 in MIMIC-III, https://github.com/alistairewj/sepsis3-mimic/blob/master/query/tbls/abx-micro-prescription.sql