# Matched Study
This script pulls and processes the data to perform analysis on the effect of vasopressin on serum lactate levels. This particular study looks at entry time to the ICU as the index time.

In [None]:
# Import libraries
import numpy as np
import pandas as pd
import math as mt
import matplotlib.pyplot as plt
import psycopg2
from scipy.stats import ks_2samp
from scipy.stats import linregress
from scipy.stats import ttest_ind
from scipy.stats import chisquare
from scipy.stats import wilcoxon
from collections import defaultdict
%matplotlib inline
plt.style.use('ggplot')

In [None]:
# create a database connection
sqluser = 'KSeverson'
dbname = 'mimic'
schema_name = 'mimiciii'

In [None]:
#change default figure size
plt.rcParams["figure.figsize"] = (10.0, 5.0)

# Query
Query pulls the data from the MIMIC database. This query does not retreive information about the covariates and only looks at the inclusion criteria: adult (>18 years) patients with sepsis in the MICU or SICU with lactate monitoring during the ICU stay.

In [None]:
# Connect to a local postgres version of mimic
con = psycopg2.connect(dbname=dbname, user = sqluser)
cur = con.cursor()
cur.execute('SET search_path TO ' + schema_name)

query = \
"""
SELECT a.hadm_id, a.subject_id, i.icustay_id, ang.angus, i.los, l.valuenum, l.valueuom, a.hospital_expire_flag
	, EXTRACT (EPOCH FROM (i.intime - p.dob)/60.0/60.0/24.0/365.242) AS age_on_admit -- patient age in years
	, EXTRACT (EPOCH FROM (l.charttime - i.intime)/60.0/60.0) AS time_since_admit -- time in hours
    , i.first_careunit, i.last_careunit
    , EXTRACT (EPOCH FROM (p.dod - i.intime)/60.0/60.0) AS time_of_death -- time in hours
FROM admissions a
INNER JOIN patients p
ON a.subject_id = p.subject_id
INNER JOIN labevents l
ON a.hadm_id = l.hadm_id
LEFT JOIN icustays i
ON a.hadm_id = i.hadm_id AND l.charttime BETWEEN (i.intime - '06:00:00'::interval hour) AND i.outtime -- add a buffer time region of 24 hours prior to icu admit 
LEFT JOIN angus_sepsis ang
ON i.hadm_id = ang.hadm_id
WHERE i.first_careunit IN ('MICU', 'SICU')
AND i.last_careunit IN ('MICU', 'SICU')
AND ang.angus = 1
AND l.itemid = 50813
ORDER BY hadm_id, time_since_admit;
"""

data = pd.read_sql_query(query,con)
data

# Data processing
This section refines the inclusion criteria. Patients are required to have a lactate measurement between hours -6 and 6 and 21 and 27. If there are multiple measurements in the window, the lactate value recorded closest to hour 0 and hour 24 is preferred.

In [None]:
i_id = data.icustay_id #put all of the icu_stay IDs into a variable
#find the unique IDs, their place in the array, and the number of entries for each ID
iID_list, iID_ind, iID_count = np.unique(i_id, return_index = True, return_counts = True) 

In [None]:
len(iID_list) #total number of patients in initial query

The below section sets the levels for various tuning paramaters.

In [None]:
time_thres = 24.0 #number of hours a patient must survive past their first lactate value and continue to have lactate monitoring

In [None]:
buffer_time = 3.0 #number of hours of wiggle room

In [None]:
start_monitor = 6.0 #first lactate measurement must be taken within threshold number of hours since admission

In [None]:
#initialize variables
ml = pd.DataFrame(columns = ['icu_stayID','first_lact_time','final_lact_time','initial_lact','final_lact','vasopressin','vaso_time','vaso_dose','hosp_exp'])
remove_id = [] #list of icustays for patients who don't have the correct timing of lactate measurements
short_lact = [] #list of icustays for patients who have only 1 lactate value
lact_traject_time = {} #dictionary of lactate trajectories
lact_traject_val = {} #dictionary of lactate values
one_val = pd.DataFrame(columns = ['icu_stayID', 'lact_val']) #patients with only one lactate value
wrong_time = pd.DataFrame(columns = ['icu_stayID', 'last_time', 'last_lact','vasopressin', 'vaso_time', 'vaso_dose','dod']) #patients who do not meet the timing requirements

In [None]:
# Check inclusion criteria and populate new dataframe
for i in range(len(iID_list)): #loop through each paitent
    ids = iID_list[i] #select a icustay ID
    n = iID_count[i] #find the number of entries for that ID
    time = data.time_since_admit[iID_ind[i]:iID_ind[i]+n] #get the lactate measurement times
    lac = data.valuenum[iID_ind[i]:iID_ind[i]+n] #get the lactate measurement value
    dod = data.time_of_death[iID_ind[i]] #pull the dod
    hosp_exp = data.hospital_expire_flag[iID_ind[i]] #pull the expiry flag
    if len(lac) >= 2 and np.min(lac) > 0 and data.age_on_admit[iID_ind[i]] >= 18.0 and time.iloc[0] <= start_monitor: #inclusion criteria
        #the above statement excludes anyone who is not an adult, doesn't have at least two values, doesn't have measurements before hour 6, 
        #or has a negative lactate value
        #window contains all measurements between hour 21 and 27
        window = time.loc[time > (time_thres - buffer_time)]
        window = window.loc[window < time_thres + buffer_time]
        if window.empty:
            #do nothing, patient does not meet timing requirements
            remove_id.append(ids)
            wrong_time = wrong_time.append({'icu_stayID': ids, 'last_time': time.iloc[-1], 'last_lact': lac.iloc[-1],'vasopressin':0, 'vaso_time':0, 'vaso_dose':0, 'dod':dod},ignore_index = True)
        else:
            val_start = abs(time)
            t0 = val_start.idxmin #find the time closest to 0
            val_end = abs(window - time_thres) 
            tf = val_end.idxmin #find the time closest to 24
            lact_traject_time[ids] = time
            lact_traject_val[ids] = lac
            ml = ml.append({'icu_stayID': ids, 'first_lact_time': time[t0], 'final_lact_time': time[tf], 'initial_lact': lac[t0], 'final_lact':lac[tf], 'vasopressin':0, 'vaso_time':0, 'vaso_dose':0, 'dod':dod,'hosp_exp':hosp_exp},ignore_index = True)
            
    elif len(lac) < 2:
        short_lact.append(ids)
        one_val = one_val.append({'icu_stayID': ids, 'lact_val': lac.iloc[0]},ignore_index = True)
               
#end loop through icustay IDs


In [None]:
print len(ml) #patients who met inclusion criteria
print len(remove_id) #patients who do not meet timing requirements
print len(short_lact) #patients who have less than 2 lactate measurements

In [None]:
plt.subplot(1,2,1)
plt.hist(wrong_time.last_time, bins = np.arange(0,48))
plt.title('Last Lact. Time')

plt.subplot(1,2,2)
plt.hist(wrong_time.loc[~np.isnan(wrong_time.last_lact)].last_lact)
plt.title('Last Lact Value')

In [None]:
plt.hist(one_val.lact_val)
len(one_val.loc[one_val.lact_val <= 2.0])

In [None]:
print min(ml.first_lact_time)
print min(ml.final_lact_time)
plt.subplot(1,2,1)
plt.hist(ml.first_lact_time)
plt.title('Distribution of first lactate measurement timing')
plt.subplot(1,2,2)
plt.hist(ml.final_lact_time)
plt.title('Distribution of final lactate measurement timing')

# Query for vasopressin information
The below query pulls information from MIMIC concerning vasopressin administration

In [None]:
# query the metavision table for the medication information
con = psycopg2.connect(dbname=dbname, user = sqluser)
cur = con.cursor()
cur.execute('SET search_path TO ' + schema_name)

query = \
"""
SELECT icu.icustay_id, ie.itemid, d.label, ie.starttime, ie.rate, ie.rateuom, ie.amount, ie.amountuom
     , EXTRACT (EPOCH FROM (ie.starttime - icu.intime)/60.0/60.0) AS time_since_admit -- time in hours
FROM icustays icu
LEFT JOIN inputevents_mv ie
ON icu.icustay_id = ie.icustay_id
LEFT JOIN d_items d
ON ie.itemid = d.itemid
WHERE ie.itemid IN (
    222315 -- Vasopressin
    ) 
AND ie.rate = ie.rate
ORDER BY icu.icustay_id, ie.starttime
;"""

mvdata = pd.read_sql_query(query,con)
#mvdata.loc[mvdata.itemid != 222315]
mvdata.loc[mvdata.rateuom != 'units/hour']

In [None]:
vaso_dose_mv = mvdata[['rate', 'rateuom']].copy()
vaso_dose_mv['rate_fix'] = 0
for i in range(len(vaso_dose_mv)):
    if vaso_dose_mv.rateuom.iloc[i] == 'units/hour':
        vaso_dose_mv.rate_fix.iloc[i] = vaso_dose_mv.rate.iloc[i]/60
    else:
        vaso_dose_mv.rate_fix.iloc[i] = vaso_dose_mv.rate.iloc[i]

In [None]:
plt.hist(vaso_dose_mv.rate_fix, bins = np.arange(-0.005,0.11,0.01))
plt.title('Vasopressin does in units/min in Metavision')
print max(vaso_dose_mv.rate_fix)
print np.median(vaso_dose_mv.rate_fix)

In [None]:
plt.hist(vaso_dose_mv.rate_fix, bins = np.arange(0.1,7,0.5))
plt.title('Vasopressin erroneous dose in units/min in Metavision')

In [None]:
start_vaso = 3.0 #starting time for vasopressin window
end_vaso = 18.0 #ending time for vasopressin window

In [None]:
# go through all of the icustay_ids and determine if vasopressin was administered between hour 3 and 18
# THIS ONE IS FOR METAVISION
early_sum = 0 #store the number of patients who received vasopressin before hour 3
late_sum = 0 #store the number of patients who received vasopressin after hour 18
for i in ml.icu_stayID: #loop through each icustay_id
    meds = mvdata.loc[mvdata.icustay_id == i] #get a list of the medications the patient receives
    if not meds.empty:
        if meds['time_since_admit'].iloc[0] < start_vaso: #float(ml.loc[ml.icu_stayID == i].first_lact_time):
            ml = ml[ml.icu_stayID != i] #drop patients who receive vasopressin prior to lactate monitoring
            early_sum = early_sum + 1
        elif meds['time_since_admit'].iloc[0] > end_vaso: #float(ml.loc[ml.icu_stayID == i].first_lact_time) + 24.0:
            ml = ml[ml.icu_stayID != i] #drop patients who start receiving vasopressin 24 hours after first lactate
            late_sum = late_sum + 1
        else:
            ml.loc[ml.icu_stayID == i, 'vaso_time'] = meds['time_since_admit'].iloc[0]
            if meds['rateuom'].iloc[0] == 'units/hour':
                ml.loc[ml.icu_stayID == i, 'vaso_dose'] =meds['rate'].iloc[0]/60
            else:
                ml.loc[ml.icu_stayID == i, 'vaso_dose'] =meds['rate'].iloc[0]
            ml.loc[ml.icu_stayID == i,'vasopressin'] = 1 #populate the data table
              

In [None]:
# this is the carevue version of the query
con = psycopg2.connect(dbname=dbname, user = sqluser)
cur = con.cursor()
cur.execute('SET search_path TO ' + schema_name)

query = \
"""
SELECT icu.icustay_id, ie.itemid, d.label, ie.charttime, ie.rate, ie.rateuom
     , EXTRACT (EPOCH FROM (ie.charttime - icu.intime)/60.0/60.0) AS time_since_admit -- time in hours
FROM icustays icu
LEFT JOIN inputevents_cv ie
ON icu.icustay_id = ie.icustay_id
LEFT JOIN d_items d
ON ie.itemid = d.itemid
WHERE ie.itemid IN (
    1136, 1327, 2334, 42273, 1222, 2248, 2561, 2765, 42802, 2445, 7341, 30051, 222315 -- Vasopressin
    ) 
AND ie.rate = ie.rate
ORDER BY icu.icustay_id, ie.charttime
;"""

cvdata = pd.read_sql_query(query,con)
np.unique(cvdata.rateuom)
#cvdata.loc[cvdata.itemid != 30051] #note that only 30051 returns vasopressin results

In [None]:
vaso_dose_cv = cvdata[['rate', 'rateuom']].copy()
vaso_dose_cv['rate_fix'] = 0
for i in range(len(vaso_dose_cv)):
    if vaso_dose_cv.rateuom.iloc[i] == 'Uhr':
        vaso_dose_cv.rate_fix.iloc[i] = vaso_dose_cv.rate.iloc[i]/60
    else:
        vaso_dose_cv.rate_fix.iloc[i] = vaso_dose_cv.rate.iloc[i]

In [None]:
len(vaso_dose_cv.loc[vaso_dose_cv.rate_fix > 6].rate_fix)
#vaso_dose_cv.loc[vaso_dose_cv.rate_fix > 2.45].rate_fix

In [None]:
plt.hist(vaso_dose_cv.rate_fix, bins = np.arange(-0.005,0.11,0.01))
plt.title('Vasopressin does in units/min in Carevue')
max(vaso_dose_cv.rate_fix)

In [None]:
plt.subplot(1,2,1)
plt.hist(vaso_dose_cv.rate_fix, bins = np.arange(0.1,10,0.5))
plt.title('Vasopressin erroneous dose in units/min in Carevue')

plt.subplot(1,2,2)
plt.hist(vaso_dose_cv.rate_fix, bins = np.arange(10,75,5))

In [None]:
# go through all of the icustay_ids and determine if vasopressin is administered between hour 3 and 18
# THIS ONE IS FOR CAREVUE
cvdata = cvdata[cvdata.rate != 0] #first remove entries where no vasopressin is administered
for i in ml.icu_stayID: #loop through each icustay_id
    meds = cvdata.loc[cvdata.icustay_id == i] #get a list of the medications the patient receives
    if not meds.empty:
        if meds['time_since_admit'].iloc[0] < start_vaso: #float(ml.loc[ml.icu_stayID == i].first_lact_time):
            ml = ml[ml.icu_stayID != i] #drop patients who receive vasopressin prior to lactate monitoring
            early_sum = early_sum + 1
        elif meds['time_since_admit'].iloc[0] > end_vaso: #float(ml.loc[ml.icu_stayID == i].first_lact_time) + 24.0:
            ml = ml[ml.icu_stayID != i] #drop patients who start receiving vasopressin 24 hours after first lactate
            late_sum = late_sum + 1
        else:
            ml.loc[ml.icu_stayID == i, 'vaso_time'] = meds['time_since_admit'].iloc[0]
            if meds['rateuom'].iloc[0] == 'Uhr':
                ml.loc[ml.icu_stayID == i, 'vaso_dose'] = meds['rate'].iloc[0]/60
            else:
                ml.loc[ml.icu_stayID == i, 'vaso_dose'] = meds['rate'].iloc[0]
            ml.loc[ml.icu_stayID == i,'vasopressin'] = 1 #populate the data table
            

In [None]:
print early_sum #patients who receive vasopressin before hour 3
print late_sum #patients who receive vasopressin after hour 18
print len(ml) #number of patients who meet inclusion criteria

In [None]:
check = ml.loc[ml.vasopressin == 1]
binwidth = 1
plt.hist(check.vaso_time,bins = np.arange(0,20,binwidth))
plt.title('Time Patient First Recevies Vasopressin')
plt.xlabel('Time since admission')
plt.savefig('VasoTime.png')
print len(check)
print min(check.vaso_time)
print max(check.vaso_time)

In [None]:
plt.subplot(1,2,1)
plt.hist(check.vaso_time, bins = np.arange(3,22,3))
plt.title('Time of 1st Vasopressin admin.')
plt.xlabel('Time since admit')
plt.subplot(1,2,2)
plt.hist(ml.first_lact_time, bins = np.arange(-6,7,0.5))
plt.title('Time of 1st lactate measurement')
plt.xlabel('Time since admit')
plt.savefig('VasoAndLactTime.png')

In [None]:
ml_plot = ml.loc[ml.vaso_time != 0]
plt.plot(ml_plot['first_lact_time'],ml_plot['vaso_time'],'o')
#plt.plot(np.arange(0,13),np.arange(0,13),'k')
#plt.plot(np.arange(0,13),np.arange(0,13)+12.0,'k--')
#plt.plot(np.arange(0,13),np.arange(0,13)+24.0,'k--')
#plt.plot(np.arange(0,13),15.0*np.ones(13),'k')
plt.xlabel('Time since admit of first lactate measurement')
plt.ylabel('Time since admit of first vasopressin adminstration')
#plt.ylim(-5,40)
plt.savefig('VasoLactateTimeCorr.png')

In [None]:
ml_plot['vaso_time'].min()

In [None]:
plt.subplot(1,2,1)
plt.hist(ml.loc[ml.vasopressin == 1].vaso_dose, bins = np.arange(0.005, 0.1, 0.01))
plt.subplot(1,2,2)
plt.hist(ml.loc[ml.vasopressin == 1].vaso_dose, bins = np.arange(0.5, 3, 0.25))

In [None]:
bad_Id = ml.loc[ml.vaso_dose> 0.07].icu_stayID

In [None]:
#plt.plot(cvdata.loc[cvdata.icustay_id == 217156].time_since_admit,cvdata.loc[cvdata.icustay_id == 217156].rate,'o-')
plt.plot(cvdata.loc[cvdata.icustay_id == 224389].time_since_admit,cvdata.loc[cvdata.icustay_id == 224389].rate,'o-')
plt.plot(cvdata.loc[cvdata.icustay_id == 246522].time_since_admit,cvdata.loc[cvdata.icustay_id == 246522].rate,'o-')
plt.xlabel('Time since admission')
plt.ylabel('Vasopressin dose (u/min)')

In [None]:
plt.hist(ml.final_lact_time, bins = np.arange(21,28,1))
plt.xlabel('Time since admit')
plt.title('Time of lactate check point')
print min(ml.final_lact_time)
print max(ml.final_lact_time)
print np.mean(ml.final_lact_time)
print len(ml)

In [None]:
plt.hist(ml.loc[ml.vasopressin == 1].final_lact_time - ml.loc[ml.vasopressin == 1].vaso_time,bins = np.arange(3,30,1))
plt.xlabel('Time delta (hr)')
plt.title('Time since vasopressin admin. to lactate check point')
print max(ml.loc[ml.vasopressin == 1].final_lact_time - ml.loc[ml.vasopressin == 1].vaso_time)
print min(ml.loc[ml.vasopressin == 1].final_lact_time - ml.loc[ml.vasopressin == 1].vaso_time)
print np.mean(ml.loc[ml.vasopressin == 1].final_lact_time - ml.loc[ml.vasopressin == 1].vaso_time)
print np.median(ml.loc[ml.vasopressin == 1].final_lact_time - ml.loc[ml.vasopressin == 1].vaso_time)

In [None]:
ml.to_csv('matched_total_SICU_updatedTime.csv')

# Covariate Queries
The below queries gather the covariate information for the study. There is an intemediate step to build 'matched_cohort SICU' which is performed in a separate script.

In [None]:
con = psycopg2.connect(dbname=dbname, user = sqluser)
cur = con.cursor()
cur.execute('SET search_path TO ' + schema_name)

query = \
"""
SELECT l.icustay_id, i.first_careunit, aps.apsiii, l.first_lact_time, l.first_lact, l.last_lact, a.admission_type, a.admission_location
    , l.vaso_time, p.gender, a.ethnicity, a.hospital_expire_flag
    , EXTRACT (EPOCH FROM (i.intime - p.dob)/60.0/60.0/24.0/365.242) AS age_on_admit -- patient age in years
    , e.renal_failure, e.liver_disease, e.congestive_heart_failure, lb.creatinine_mean, vt.heartrate_mean
    , vt.meanbp_mean, vt.spo2_mean, vn.mechvent, ang.angus, ang.explicit_sepsis, l.vaso, det.los_hospital 
FROM matched_cohort_SICU l
INNER JOIN icustays i
ON l.icustay_id = i.icustay_id
LEFT JOIN apsiii aps
ON l.icustay_id = aps.icustay_id
LEFT JOIN admissions a
ON i.hadm_id = a.hadm_id
LEFT JOIN icustay_detail det
ON i.icustay_id = det.icustay_id
LEFT JOIN patients p
ON i.subject_id = p.subject_id
LEFT JOIN elixhauser_ahrq e
ON i.hadm_id = e.hadm_id
LEFT JOIN angus_sepsis ang
ON i.hadm_id = ang.hadm_id
LEFT JOIN labsfirst3hours lb
ON l.icustay_id = lb.icustay_id
LEFT JOIN vitalsfirst3hours vt
ON l.icustay_id = vt.icustay_id
LEFT JOIN ventfirst3hours vn
ON l.icustay_id = vn.icustay_id;"""


data = pd.read_sql_query(query,con)
data #expect 1310 rows

In [None]:
# Initialize additional data columns
data['Levophed'] = 0
data['Levophed_time'] = 0
data['LR'] = 0
data['NS'] = 0
data['Albumin'] = 0
data['fio2'] = 0

In [None]:
# this is the carevue version of the query

con = psycopg2.connect(dbname=dbname, user = sqluser)
cur = con.cursor()
cur.execute('SET search_path TO ' + schema_name)

query = \
"""
SELECT c.icustay_id, ie.itemid, d.label, ie.charttime, ie.rate, ie.rateuom
     , EXTRACT (EPOCH FROM (ie.charttime - icu.intime)/60.0/60.0) AS time_since_admit -- time in hours
FROM matched_cohort_SICU c 
LEFT JOIN icustays icu
ON c.icustay_id = icu.icustay_id
LEFT JOIN inputevents_cv ie
ON c.icustay_id = ie.icustay_id
LEFT JOIN d_items d
ON ie.itemid = d.itemid
WHERE ie.itemid IN (
    221906, 30047, 30120  -- Norepinephrin / Levophed
    ) 
AND ie.rate = ie.rate
ORDER BY c.icustay_id, ie.charttime
;"""

cvdata = pd.read_sql_query(query,con)

In [None]:
# go through all of the icustay_ids and determine if levophed was administered within 3 hours
# of arrival
for i in data.icustay_id: #loop through each icustay_id
    times = cvdata.loc[cvdata.icustay_id == i]
    for k in times.time_since_admit:
        data.loc[data.icustay_id == i, 'Levophed_time'] = times['time_since_admit'].iloc[0]
        # test if the time is after elevated but also less than 24 hours
        if k < 3.0: # and k <= float(data.loc[data.icustay_id == i].ele_time + 24.0):    
              data.loc[data.icustay_id == i,'Levophed'] = 1 #populate the data table    

In [None]:
# query the metavision table for the medication information
query = \
"""
SELECT c.icustay_id, ie.itemid, d.label, ie.starttime, ie.rate, ie.rateuom
     , EXTRACT (EPOCH FROM (ie.starttime - icu.intime)/60.0/60.0) AS time_since_admit -- time in hours
FROM matched_cohort_SICU c 
LEFT JOIN icustays icu
ON c.icustay_id = icu.icustay_id
LEFT JOIN inputevents_mv ie
ON c.icustay_id = ie.icustay_id
LEFT JOIN d_items d
ON ie.itemid = d.itemid
WHERE ie.itemid IN (
    221906, 30047, 30120  -- Norepinephrin / Levophed
    ) 
AND ie.rate = ie.rate
ORDER BY c.icustay_id, ie.starttime
;"""

mvdata = pd.read_sql_query(query,con)

In [None]:
# go through all of the icustay_ids and determine if levophed was administered within 3 hours
# of arrival
for i in data.icustay_id: #loop through each icustay_id
    times = mvdata.loc[mvdata.icustay_id == i]
    for k in times.time_since_admit:
        # test if the time is after elevated but also less than 24 hours
        data.loc[data.icustay_id == i, 'Levophed_time'] = times['time_since_admit'].iloc[0]
        if k < 3.0: 
              data.loc[data.icustay_id == i,'Levophed'] = 1 #populate the data table  

In [None]:
#look at Levophed vs. vasopressin times, Levophed should be administered first.
#There are a handful of data points that appear erroneous but it is possible that they received Levophed in the ED
time_analysis = data.loc[data.Levophed_time != 0]
time_analysis = time_analysis.loc[time_analysis.vaso == 1]
plt.plot(time_analysis.Levophed_time, time_analysis.vaso_time,'o')
plt.plot(np.arange(0,21),np.arange(0,21),'k')
plt.plot(3.0*np.ones(21),np.arange(0,21),'k')
plt.xlabel('Levophed')
plt.ylabel('Vasopressin')
plt.savefig('LevoVasoTime.png')

In [None]:
print len(time_analysis) #163 patients receive Levophed and vasopressin
print len(data.loc[data.Levophed == 1]) #300 patients receive Levophed
check = data.loc[data.vaso == 1]
print (check.loc[check.Levophed_time == 0]) #7 patients receive vasopressin and not Levophed

In [None]:
#metavision query for lactate ringers
query = \
"""
SELECT c.icustay_id, ie.itemid, d.label
    , ie.starttime, EXTRACT (EPOCH FROM (ie.starttime - icu.intime)/60.0/60.0) AS time_since_admit -- time in hours
    , ie.amount, ie.amountuom
FROM matched_cohort_SICU c 
LEFT JOIN icustays icu
ON c.icustay_id = icu.icustay_id
INNER JOIN inputevents_mv ie
ON c.icustay_id = ie.icustay_id
LEFT JOIN d_items d
ON ie.itemid = d.itemid
WHERE ie.itemid IN (
    225828) -- Metavision
ORDER BY c.icustay_id, ie.starttime;"""

mvfluidLR = pd.read_sql_query(query,con)
mvfluidLR.loc[mvfluidLR.amountuom != 'ml'] #check that the units are consistent

In [None]:
# go through all of the icustay_ids and determine if the patient is receiving lactate ringers
# THIS ONE IS FOR METAVISION
for i in data.icustay_id: #loop through each icustay_id
    times = mvfluidLR.loc[mvfluidLR.icustay_id == i] #get the subset of data for the particular patient
    if not times.empty:
        data.loc[data.icustay_id == i,'LR'] = sum(times.loc[times.time_since_admit < 3.0].amount)

In [None]:
#lactate ringers query for carvue
query = \
"""
SELECT c.icustay_id, ie.itemid, d.label
    , ie.charttime, EXTRACT (EPOCH FROM (ie.charttime - icu.intime)/60.0/60.0) AS time_since_admit -- time in hours
    , ie.amount, ie.amountuom 
FROM matched_cohort_SICU c 
LEFT JOIN icustays icu
ON c.icustay_id = icu.icustay_id
INNER JOIN inputevents_cv ie
ON c.icustay_id = ie.icustay_id
LEFT JOIN d_items d
ON ie.itemid = d.itemid
WHERE ie.itemid IN (
    2971, 44184, 44367, 44521, 44815, 30021, 1634 -- Carevue
    ) 
AND ie.amount = ie.amount
AND ie.amount <> 0.0
ORDER BY c.icustay_id, ie.charttime
;"""

cvfluidLR = pd.read_sql_query(query,con)
cvfluidLR.loc[cvfluidLR.amountuom != 'ml']

In [None]:
# go through all of the icustay_ids and determine if the patient is receiving lactate ringers
# THIS ONE IS FOR CAREVUE
for i in data.icustay_id: #loop through each icustay_id
    times = cvfluidLR.loc[cvfluidLR.icustay_id == i] #get the subset of data for the particular patient
    if not times.empty:
        data.loc[data.icustay_id == i,'LR'] = sum(times.loc[times.time_since_admit < 3.0].amount)

In [None]:
# metavision query for albumin
query = \
"""
SELECT c.icustay_id, ie.itemid, d.label
    , ie.starttime, EXTRACT (EPOCH FROM (ie.starttime - icu.intime)/60.0/60.0) AS time_since_admit -- time in hours
    , ie.amount, ie.amountuom
FROM matched_cohort_SICU c 
LEFT JOIN icustays icu
ON c.icustay_id = icu.icustay_id
LEFT JOIN inputevents_mv ie
ON c.icustay_id = ie.icustay_id
LEFT JOIN d_items d
ON ie.itemid = d.itemid
WHERE ie.itemid IN (
    227456, 220861, 220862, 220863, 220864 )-- Metavision
ORDER BY c.icustay_id, ie.starttime;"""

mvfluidAlb = pd.read_sql_query(query,con)
mvfluidAlb.loc[mvfluidAlb.amountuom != 'ml']

In [None]:
# go through all of the icustay_ids and determine if the patient is receiving lactate ringers
# THIS ONE IS FOR CAREVUE
for i in data.icustay_id: #loop through each icustay_id
    times = mvfluidAlb.loc[mvfluidAlb.icustay_id == i] #get the subset of data for the particular patient
    if not times.empty:
        data.loc[data.icustay_id == i,'Albumin'] = sum(times.loc[times.time_since_admit < 3.0].amount)

In [None]:
# carevue query for albumin
query = \
"""
SELECT c.icustay_id, ie.itemid, d.label
    , ie.charttime, EXTRACT (EPOCH FROM (ie.charttime - icu.intime)/60.0/60.0) AS time_since_admit -- time in hours
    , ie.amount, ie.amountuom
FROM matched_cohort_SICU c 
LEFT JOIN icustays icu
ON c.icustay_id = icu.icustay_id
LEFT JOIN inputevents_cv ie
ON c.icustay_id = ie.icustay_id
LEFT JOIN d_items d
ON ie.itemid = d.itemid
WHERE ie.itemid IN (
    1521, 46564, 43353, 30181, 30008, 30009, 44203, 43237, 3066, 42832, 45403  -- Carevue
    ) 
AND ie.amount = ie.amount
AND ie.amount <> 0.0
ORDER BY c.icustay_id, ie.charttime
;"""

cvfluidAlb = pd.read_sql_query(query,con)
cvfluidAlb.loc[cvfluidAlb.amountuom != 'ml']

In [None]:
# go through all of the icustay_ids and determine if the patient is receiving lactate ringers
# THIS ONE IS FOR CAREVUE
for i in data.icustay_id: #loop through each icustay_id
    times = cvfluidAlb.loc[cvfluidAlb.icustay_id == i] #get the subset of data for the particular patient
    if not times.empty:
        data.loc[data.icustay_id == i,'Albumin'] = sum(times.loc[times.time_since_admit < 3.0].amount)

In [None]:
# metavision query for normal saline
query = \
"""
SELECT c.icustay_id, ie.itemid, d.label
    , ie.starttime, EXTRACT (EPOCH FROM (ie.starttime - icu.intime)/60.0/60.0) AS time_since_admit -- time in hours
    , ie.amount, ie.amountuom
FROM matched_cohort_SICU c 
LEFT JOIN icustays icu
ON c.icustay_id = icu.icustay_id
LEFT JOIN inputevents_mv ie
ON c.icustay_id = ie.icustay_id
LEFT JOIN d_items d
ON ie.itemid = d.itemid
WHERE ie.itemid IN (
    225158 )-- Metavision
ORDER BY c.icustay_id, ie.starttime;"""

mvfluidNS = pd.read_sql_query(query,con)
mvfluidNS.loc[mvfluidNS.amountuom != 'ml']

In [None]:
# go through all of the icustay_ids and determine if the patient is receiving normal saline
# THIS ONE IS FOR metavision
for i in data.icustay_id: #loop through each icustay_id
    times = mvfluidNS.loc[mvfluidNS.icustay_id == i] #get the subset of data for the particular patient
    if not times.empty:
        data.loc[data.icustay_id == i,'NS'] = sum(times.loc[times.time_since_admit < 3.0].amount)

In [None]:
#carevue query for normal saline
query = \
"""
SELECT c.icustay_id, ie.itemid, d.label
    , ie.charttime, EXTRACT (EPOCH FROM (ie.charttime - icu.intime)/60.0/60.0) AS time_since_admit -- time in hours
    , ie.amount, ie.amountuom
FROM matched_cohort_SICU c 
LEFT JOIN icustays icu
ON c.icustay_id = icu.icustay_id
LEFT JOIN inputevents_cv ie
ON c.icustay_id = ie.icustay_id
LEFT JOIN d_items d
ON ie.itemid = d.itemid
WHERE ie.itemid IN (
    44633, 5321, 41237, 41371, 41428, 40850, 30352, 30018, 4647, 5333, 6190, 4970, 
    5199, 41913, 40865, 44983, 30190, 44741, 41467, 45989, 41490, 44491, 41695, 
    42548, 42844, 45137, 44440 -- Carevue
    ) 
AND ie.amount = ie.amount
AND ie.amount <> 0.0
ORDER BY c.icustay_id, ie.charttime
;"""

cvfluidNS = pd.read_sql_query(query,con)
cvfluidNS.loc[cvfluidNS.amountuom != 'ml']

In [None]:
# go through all of the icustay_ids and determine if the patient is receiving normal saline
# THIS ONE IS FOR CAREVUE
for i in data.icustay_id: #loop through each icustay_id
    times = cvfluidNS.loc[cvfluidNS.icustay_id == i] #get the subset of data for the particular patient
    if not times.empty:
        data.loc[data.icustay_id == i,'NS'] = sum(times.loc[times.time_since_admit < 3.0].amount)

In [None]:
# query for fi02
query = \
"""
SELECT m.icustay_id, bg.charttime, bg.fio2,
    EXTRACT (EPOCH FROM (bg.charttime - icu.intime)/60.0/60.0) AS time_since_admit -- time in hours
FROM matched_cohort_SICU m
INNER JOIN bloodgasfirstday bg
ON m.icustay_id = bg.icustay_id
INNER JOIN icustays icu
ON m.icustay_id = icu.icustay_id
WHERE bg.fio2 <> 0.0
ORDER BY m.icustay_id, bg.charttime
;"""
fi02data = pd.read_sql_query(query,con)

In [None]:
# go through all of the icustay_ids and determine if the patient is receiving lactate ringers
# THIS ONE IS FOR CAREVUE
for i in data.icustay_id: #loop through each icustay_id
    times = fi02data.loc[fi02data.icustay_id == i] #get the subset of data for the particular patient
    if not times.loc[times.time_since_admit < 3.0].empty:
        data.loc[data.icustay_id == i,'fio2'] = times.loc[times.time_since_admit < 3.0].fio2.mean()

In [None]:
data.to_csv('basic_set_SICU.csv')

In [None]:
data = pd.read_excel(open('matched_lact_all.xlsx','rb'))

In [None]:
data['number_of_measurements'] = 0

In [None]:
dataTraject = pd.DataFrame(columns = ['match_num','icu_stayID_vaso','icu_stayID_control','lact_change_vaso','lact_change_control','slope_vaso','slope_control'])
match_num = 0
for k in data.icustay_id[data.vaso == 0]:
    match_num = match_num + 1
    early_traject = lact_traject_val[k].loc[lact_traject_time[k] < 3.0]
    early_time = lact_traject_time[k].loc[lact_traject_time[k] < 3.0]
    if len(early_time) < 2:
        lact_change = -100
        slope = -100
    else:
        lact_change = early_traject.iloc[-1] - early_traject.iloc[-2]
        slope = (early_traject.iloc[-1] - early_traject.iloc[-2])/(early_time.iloc[-1] - early_time.iloc[-2])
    dataTraject = dataTraject.append({'match_num': match_num, 'icu_stayID_control': k, 'lact_change_control': lact_change, 'slope_control':slope },ignore_index = True)


In [None]:
match_num = 0
for k in data.icustay_id[data.vaso == 1]:
    match_num = match_num + 1
    early_traject = lact_traject_val[k].loc[lact_traject_time[k] < 3.0]
    early_time = lact_traject_time[k].loc[lact_traject_time[k] < 3.0]
    if len(early_time) < 2:
        lact_change = -100
        slope = -100
    else:
        lact_change = early_traject.iloc[-1] - early_traject.iloc[-2]
        slope = (early_traject.iloc[-1] - early_traject.iloc[-2])/(early_time.iloc[-1] - early_time.iloc[-2])
    dataTraject.loc[dataTraject.match_num == match_num,'slope_vaso'] = slope
    dataTraject.loc[dataTraject.match_num == match_num,'lact_change_vaso'] = lact_change
    dataTraject.loc[dataTraject.match_num == match_num,'icu_stayID_vaso'] = k

In [None]:
dataTraject.to_csv('Trajectories.csv')

In [None]:
for k in data.icustay_id[data.vaso == 0]:
    plt.subplot(2,1,1)
    plt.plot(lact_traject_time[k],lact_traject_val[k],'-')
    plt.xlim(-6,27)
    plt.ylim(0,15)
for k in data.icustay_id[data.vaso == 1]:
    plt.subplot(2,1,2)
    plt.plot(lact_traject_time[k],lact_traject_val[k],'-')
    plt.xlim(-6,27)
    plt.ylim(0,15)

In [None]:
for k in data.icustay_id[data.vaso == 0].iloc[:8]:
    plt.subplot(2,1,1)
    plt.plot(lact_traject_time[k],lact_traject_val[k],'o-')
    plt.xlim(-6,6)
    plt.ylim(0,10)
for k in data.icustay_id[data.vaso == 1].iloc[:8]:
    plt.subplot(2,1,2)
    plt.plot(lact_traject_time[k],lact_traject_val[k],'o-')
    plt.xlim(-6,6)
    plt.ylim(0,10)

In [None]:
line_colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k','r', 'g', 'b', 'c', 'm', 'y', 'k']
color_ind = 0
for k in data.icustay_id[data.vaso == 0].iloc[:14]:
    plt.subplot(2,1,1)
    plt.plot(lact_traject_time[k],lact_traject_val[k],'o-',c = line_colors[color_ind])
    color_ind = color_ind + 1
    plt.xlim(-6,30)
    plt.ylim(0,10)
color_ind = 0
for k in data.icustay_id[data.vaso == 1].iloc[:14]:
    plt.subplot(2,1,2)
    plt.plot(lact_traject_time[k],lact_traject_val[k],'o-',c = line_colors[color_ind])
    plt.plot(data.loc[data.icustay_id == k].vaso_time,1,'s',c = line_colors[color_ind])
    color_ind = color_ind + 1
    plt.xlim(-6,30)
    plt.ylim(0,10)

In [None]:
skipid = []
interval_list_tre = np.empty
interval_list_con = np.empty
dataCheck = pd.DataFrame(columns = ['icu_stayID','number_of_measurements','median_timing','average_timing','vaso','clear_type'])
for k in data.icustay_id:
    vals = (lact_traject_time[k].loc[lact_traject_time[k] < 27.0])
    vals = vals[vals >= -6.0]
    tot = len(vals)
    if (data.loc[data.icustay_id == k].vaso == 1).bool():
        interval_list_tre = np.append(interval_list_tre,np.diff(vals))
    else:
        interval_list_con = np.append(interval_list_con,np.diff(vals))
    med = np.median(np.diff(vals))
    avgt = np.mean(np.diff(vals))
    vaso = data.vaso[data.icustay_id == k].astype(int).values
    lact_delta = ((data.last_lact[data.icustay_id == k] - data.first_lact[data.icustay_id == k]) > 0.0).values
    dataCheck = dataCheck.append({'icu_stayID': k, 'number_of_measurements': tot, 'median_timing': med, 'average_timing': avgt, 'vaso': vaso, 'clear_type': lact_delta},ignore_index = True)

In [None]:
plt.subplot(1,2,1)
plt.hist(dataCheck.loc[dataCheck.vaso == 1].number_of_measurements,bins = np.arange(0,27,3))
plt.rcParams['xtick.color'] = 'k'
plt.rcParams['ytick.color'] = 'k'
plt.rcParams['axes.facecolor']='w'
plt.xlabel('Number of measurements')
plt.rcParams['axes.labelcolor'] = 'k'
plt.rcParams['axes.edgecolor'] = 'k'
plt.title('Treated')
plt.ylim([0,40])


plt.subplot(1,2,2)
plt.hist(dataCheck.loc[dataCheck.vaso == 0].number_of_measurements,bins = np.arange(0,27,3))
plt.title('Control')
plt.rcParams['xtick.color'] = 'k'
plt.rcParams['ytick.color'] = 'k'
plt.xlabel('Number of measurements')
plt.rcParams['axes.facecolor']='w'
plt.rcParams['axes.labelcolor'] = 'k'
plt.rcParams['axes.edgecolor'] = 'k'

In [None]:
plt.subplot(1,2,1)
plt.hist(dataCheck.loc[dataCheck.vaso == 1].median_timing,bins = np.arange(0,27,3))
plt.rcParams['xtick.color'] = 'k'
plt.rcParams['ytick.color'] = 'k'
plt.rcParams['axes.facecolor']='w'
plt.xlabel('Median time between measurements per patient')
plt.rcParams['axes.labelcolor'] = 'k'
plt.rcParams['axes.edgecolor'] = 'k'
plt.title('Treated')
#plt.ylim([0,40])


plt.subplot(1,2,2)
plt.hist(dataCheck.loc[dataCheck.vaso == 0].median_timing,bins = np.arange(0,27,3))
plt.title('Control')
plt.rcParams['xtick.color'] = 'k'
plt.rcParams['ytick.color'] = 'k'
plt.xlabel('Median time between measurements per patient')
plt.rcParams['axes.facecolor']='w'
plt.rcParams['axes.labelcolor'] = 'k'
plt.rcParams['axes.edgecolor'] = 'k'

In [None]:
print np.mean(dataCheck.loc[dataCheck.vaso == 0].number_of_measurements)
print np.mean(dataCheck.loc[dataCheck.vaso == 1].number_of_measurements)

print np.median(dataCheck.loc[dataCheck.vaso == 0].number_of_measurements)
print np.median(dataCheck.loc[dataCheck.vaso == 1].number_of_measurements)

print wilcoxon(np.array(dataCheck.loc[dataCheck.vaso == 1].number_of_measurements), np.array(dataCheck.loc[dataCheck.vaso == 0].number_of_measurements))
print ttest_ind(np.array(dataCheck.loc[dataCheck.vaso == 1].number_of_measurements), np.array(dataCheck.loc[dataCheck.vaso == 0].number_of_measurements),equal_var = False)

print np.median(dataCheck.loc[dataCheck.vaso == 0].median_timing)
print np.median(dataCheck.loc[dataCheck.vaso == 1].median_timing)

print wilcoxon(dataCheck.loc[dataCheck.vaso == 0].median_timing,dataCheck.loc[dataCheck.vaso == 1].median_timing)
print ttest_ind(dataCheck.loc[dataCheck.vaso == 0].median_timing,dataCheck.loc[dataCheck.vaso == 1].median_timing)

print np.mean(np.array(dataCheck.loc[dataCheck.vaso == 0].median_timing) - np.array(dataCheck.loc[dataCheck.vaso == 1].median_timing))

print np.median(interval_list_tre[1:]) 
print np.median(interval_list_con[1:])

In [None]:
plt.subplot(1,2,1)
plt.hist(np.array(dataCheck.loc[dataCheck.vaso == 1].number_of_measurements) - np.array(dataCheck.loc[dataCheck.vaso == 0].number_of_measurements),bins = np.arange(-15,15,2.5))
plt.subplot(1,2,2)
plt.hist(np.array(dataCheck.loc[dataCheck.vaso == 0].median_timing) - np.array(dataCheck.loc[dataCheck.vaso == 1].median_timing),bins = np.arange(-25,25,5))
np.mean(np.array(dataCheck.loc[dataCheck.vaso == 1].number_of_measurements) - np.array(dataCheck.loc[dataCheck.vaso == 0].number_of_measurements))

In [None]:
plt.subplot(1,2,1)
plt.hist(interval_list_tre[1:],bins = np.arange(0,25,2.5))

plt.subplot(1,2,2)
plt.hist(interval_list_con[1:],bins = np.arange(0,25,2.5))
plt.ylim([0,400])

In [None]:
plt.subplot(1,2,1)
plt.hist(ml.loc[ml.icu_stayID.isin(data.icustay_id)].first_lact_time,bins = np.arange(-6,6,1))

plt.subplot(1,2,2)
plt.hist(ml.loc[ml.icu_stayID.isin(data.icustay_id)].final_lact_time,bins = np.arange(21,28))

In [None]:
newframe = ml.loc[ml.icu_stayID.isin(data.icustay_id)]

plt.subplot(2,2,1)
plt.hist(newframe.loc[newframe.vasopressin == 1].first_lact_time,bins = np.arange(-6,6,1))
plt.title('Treated')

plt.subplot(2,2,3)
plt.hist(newframe.loc[newframe.vasopressin == 1].final_lact_time,bins = np.arange(21,28))
plt.xlabel('Time since ICU admission (hrs)')

plt.subplot(2,2,2)
plt.hist(newframe.loc[newframe.vasopressin == 0].first_lact_time,bins = np.arange(-6,6,1))
plt.title('Control')

plt.subplot(2,2,4)
plt.hist(newframe.loc[newframe.vasopressin == 0].final_lact_time,bins = np.arange(21,28))
plt.ylim([0,30])
plt.xlabel('Time since ICU admission (hrs)')