### First lab figures

In [1]:
import functools
import numpy as np
import pandas as pd
from scipy.stats import kstest
import matplotlib.pyplot as plt
import pylab as pl
import psycopg2
%matplotlib inline
plt.style.use('ggplot')

  """)


In [5]:
# create a database connection
sqluser = ''
dbname = ''
schema_name = ''

## Lab ranges

'Normal' ranges for lab values:

Lab Value	|  Lower limit	|  Upper Limit	|  Units
--- | --- | --- | ---
Bicarbonate	| 22	| 32	| mEq/L
BUN	| 6	| 20 | mEq/L
Calcium	| 8.4	| 10.3	| mg/dL
Chloride	| 96	| 108	| mEq/L
Creatinine	| 0.4	| 1.1	| mEq/L
Hemoglobin	| 11.2	 | 15.7	| g/dL
Lactate	| 0.5	| 2	| mmol/L
Magnesium	| 1.6	| 2.6	| mg/dL
Phosphate	| 2.7	| 4.5	| mg/dL
Platelet count	| 150	| 400	| K/uL
Potassium	| 3.3	| 5.1	| mEq/L
Sodium	| 133	| 145	| mEq/L

In [6]:
# Create dictionary of ranges
# Keys should match the lab names in the query below
# Are these values correct given the units? Need to check.

lab_ranges = {'BICARBONATE': [22,32],
              'BUN': [6,20],
              'CALCIUM': [8.4,10.3],
              'CHLORIDE': [96,108],
              'CREATININE': [0.4,1.1],
              'HEMOGLOBIN': [11.2,15.7],
              'LACTATE': [0.5,2.0],
              'MAGNESIUM': [1.6,2.6],
              'PHOSPHATE': [2.7,4.5],
              'PLATELET': [150,400],
              'POTASSIUM': [3.3,5.1],
              'SODIUM': [133,145],
              'FREECALCIUM': [4.64, 5.28]
             }

### SQL: get first laboratory measurements

In [7]:
con = psycopg2.connect(dbname=dbname, user=sqluser, password='mimic')
cur = con.cursor()
cur.execute('SET search_path to ' + schema_name)

query = \
"""
WITH pvt AS (
  SELECT ie.subject_id, ie.hadm_id, ie.outtime, ie.icustay_id, le.charttime, ad.deathtime, ie.los
  , ROUND((cast(ad.admittime as date) - cast(p.dob as date)) / 365.242, 2) as first_admit_age
  , CASE when ad.deathtime between ie.intime and ie.outtime THEN 1 ELSE 0 END AS mort_icu
  , CASE when ad.deathtime between ad.admittime and ad.dischtime THEN 1 ELSE 0 END AS mort_hosp
  -- here we assign labels to ITEMIDs
  -- this also fuses together multiple ITEMIDs containing the same data
  , CASE
        when le.itemid = 50868 then 'ANION GAP'
        when le.itemid = 50862 then 'ALBUMIN'
        when le.itemid = 50882 then 'BICARBONATE'
        when le.itemid = 50885 then 'BILIRUBIN'
        when le.itemid = 50912 then 'CREATININE'
        when le.itemid = 50806 then 'CHLORIDE'
        when le.itemid = 50902 then 'CHLORIDE'
        when itemid = 50809 then 'GLUCOSE'
        when itemid = 50931 then 'GLUCOSE'
        when itemid = 50810 then 'HEMATOCRIT'
        when itemid = 51221 then 'HEMATOCRIT'
        when itemid = 50811 then 'HEMOGLOBIN'
        when itemid = 51222 then 'HEMOGLOBIN'
        when itemid = 50813 then 'LACTATE'
        when itemid = 50960 then 'MAGNESIUM'
        when itemid = 50970 then 'PHOSPHATE'
        when itemid = 51265 then 'PLATELET'
        when itemid = 50822 then 'POTASSIUM'
        when itemid = 50971 then 'POTASSIUM'
        when itemid = 51275 then 'PTT'
        when itemid = 51237 then 'INR'
        when itemid = 51274 then 'PT'
        when itemid = 50824 then 'SODIUM'
        when itemid = 50983 then 'SODIUM'
        when itemid = 51006 then 'BUN'
        when itemid = 51300 then 'WBC'
        when itemid = 51301 then 'WBC'
        -- Calcium
        when itemid = 50893 then 'CALCIUM'
        -- Free calcium
        when itemid = 50808 then 'FREECALCIUM'
      ELSE null
      END AS label
  , -- add in some sanity checks on the values
    -- the where clause below requires all valuenum to be > 0,
    -- so these are only upper limit checks
    CASE
      when le.itemid = 50862 and le.valuenum >    10 then null -- g/dL 'ALBUMIN'
      when le.itemid = 50868 and le.valuenum > 10000 then null -- mEq/L 'ANION GAP'
      when le.itemid = 50882 and le.valuenum > 10000 then null -- mEq/L 'BICARBONATE'
      when le.itemid = 50885 and le.valuenum >   150 then null -- mg/dL 'BILIRUBIN'
      when le.itemid = 50806 and le.valuenum > 10000 then null -- mEq/L 'CHLORIDE'
      when le.itemid = 50902 and le.valuenum > 10000 then null -- mEq/L 'CHLORIDE'
      when le.itemid = 50912 and le.valuenum >   150 then null -- mg/dL 'CREATININE'
      when le.itemid = 50809 and le.valuenum > 10000 then null -- mg/dL 'GLUCOSE'
      when le.itemid = 50931 and le.valuenum > 10000 then null -- mg/dL 'GLUCOSE'
      when le.itemid = 50810 and le.valuenum >   100 then null -- % 'HEMATOCRIT'
      when le.itemid = 51221 and le.valuenum >   100 then null -- % 'HEMATOCRIT'
      when le.itemid = 50811 and le.valuenum >    50 then null -- g/dL 'HEMOGLOBIN'
      when le.itemid = 51222 and le.valuenum >    50 then null -- g/dL 'HEMOGLOBIN'
      when le.itemid = 50813 and le.valuenum >    50 then null -- mmol/L 'LACTATE'
      when le.itemid = 50960 and le.valuenum >    60 then null -- mmol/L 'MAGNESIUM'
      when le.itemid = 50970 and le.valuenum >    60 then null -- mg/dL 'PHOSPHATE'
      when le.itemid = 51265 and le.valuenum > 10000 then null -- K/uL 'PLATELET'
      when le.itemid = 50822 and le.valuenum >    30 then null -- mEq/L 'POTASSIUM'
      when le.itemid = 50971 and le.valuenum >    30 then null -- mEq/L 'POTASSIUM'
      when le.itemid = 51275 and le.valuenum >   150 then null -- sec 'PTT'
      when le.itemid = 51237 and le.valuenum >    50 then null -- 'INR'
      when le.itemid = 51274 and le.valuenum >   150 then null -- sec 'PT'
      when le.itemid = 50824 and le.valuenum >   200 then null -- mEq/L == mmol/L 'SODIUM'
      when le.itemid = 50983 and le.valuenum >   200 then null -- mEq/L == mmol/L 'SODIUM'
      when le.itemid = 51006 and le.valuenum >   300 then null -- 'BUN'
      when le.itemid = 51300 and le.valuenum >  1000 then null -- 'WBC'
      when le.itemid = 51301 and le.valuenum >  1000 then null -- 'WBC'
      -- Calcium
      when le.itemid = 50893 and le.valuenum > 300 then null
      -- Free Calcium
      when le.itemid = 50808 and le.valuenum > 500 then null
    ELSE le.valuenum
    END AS valuenum
  FROM icustays ie

  LEFT JOIN labevents le
    ON le.subject_id = ie.subject_id
    AND le.hadm_id = ie.hadm_id
    AND le.charttime between (ie.intime - interval '24' hour)
    AND (ie.intime + interval '24' hour)
    AND le.itemid IN
    (
      -- comment is: LABEL | CATEGORY | FLUID | NUMBER OF ROWS IN LABEVENTS
      50868, -- ANION GAP | CHEMISTRY | BLOOD | 769895
      50862, -- ALBUMIN | CHEMISTRY | BLOOD | 146697
      50882, -- BICARBONATE | CHEMISTRY | BLOOD | 780733
      50885, -- BILIRUBIN, TOTAL | CHEMISTRY | BLOOD | 238277
      50912, -- CREATININE | CHEMISTRY | BLOOD | 797476
      50902, -- CHLORIDE | CHEMISTRY | BLOOD | 795568
      50806, -- CHLORIDE, WHOLE BLOOD | BLOOD GAS | BLOOD | 48187
      50931, -- GLUCOSE | CHEMISTRY | BLOOD | 748981
      50809, -- GLUCOSE | BLOOD GAS | BLOOD | 196734
      51221, -- HEMATOCRIT | HEMATOLOGY | BLOOD | 881846
      50810, -- HEMATOCRIT, CALCULATED | BLOOD GAS | BLOOD | 89715
      51222, -- HEMOGLOBIN | HEMATOLOGY | BLOOD | 752523
      50811, -- HEMOGLOBIN | BLOOD GAS | BLOOD | 89712
      50813, -- LACTATE | BLOOD GAS | BLOOD | 187124
      50960, -- MAGNESIUM | CHEMISTRY | BLOOD | 664191
      50970, -- PHOSPHATE | CHEMISTRY | BLOOD | 590524
      51265, -- PLATELET COUNT | HEMATOLOGY | BLOOD | 778444
      50971, -- POTASSIUM | CHEMISTRY | BLOOD | 845825
      50822, -- POTASSIUM, WHOLE BLOOD | BLOOD GAS | BLOOD | 192946
      51275, -- PTT | HEMATOLOGY | BLOOD | 474937
      51237, -- INR(PT) | HEMATOLOGY | BLOOD | 471183
      51274, -- PT | HEMATOLOGY | BLOOD | 469090
      50983, -- SODIUM | CHEMISTRY | BLOOD | 808489
      50824, -- SODIUM, WHOLE BLOOD | BLOOD GAS | BLOOD | 71503
      51006, -- UREA NITROGEN | CHEMISTRY | BLOOD | 791925
      51301, -- WHITE BLOOD CELLS | HEMATOLOGY | BLOOD | 753301
      51300,  -- WBC COUNT | HEMATOLOGY | BLOOD | 2371
      -- calcium total
      50893, -- CALCIUM TOTAL | NA | NA | NA
      -- Free calcium
      50808  --FREE CALCIUM | NA | NA | NA
    )
    AND le.valuenum IS NOT null
    AND le.valuenum > 0 -- lab values cannot be 0 and cannot be negative

    LEFT JOIN admissions ad
    ON ie.subject_id = ad.subject_id
    AND ie.hadm_id = ad.hadm_id

    INNER JOIN patients p
    ON ie.subject_id = p.subject_id
    WHERE ROUND((cast(ad.admittime as date) - cast(p.dob as date)) / 365.242, 2) > 15
    -- WHERE ie.subject_id < 10000

),
ranked AS (
SELECT pvt.*, DENSE_RANK() OVER (PARTITION BY
    pvt.subject_id, pvt.hadm_id,pvt.icustay_id,pvt.label ORDER BY cast(pvt.charttime as date)) as drank
FROM pvt
)
SELECT r.subject_id, r.hadm_id, r.icustay_id, r.mort_icu, r.mort_hosp
  -- , max(r.hadm_id) as HADM_ID
  -- , max(r.icustay_id) as ICUSTAY_ID
  , max(r.los) as LOS
  , max(r.mort_icu) as MORT_ICU
  , max(r.mort_hosp) as MORT_HOSP
  , max(r.first_admit_age) as FIRST_ADMIT_AGE
  , max(r.charttime) as CHARTTIME
  , max(case when label = 'ANION GAP' then valuenum else null end) as ANIONGAP_1st
  , max(case when label = 'ALBUMIN' then valuenum else null end) as ALBUMIN_1st
  , max(case when label = 'BICARBONATE' then valuenum else null end) as BICARBONATE_1st
  , max(case when label = 'BILIRUBIN' then valuenum else null end) as BILIRUBIN_1st
  , max(case when label = 'CREATININE' then valuenum else null end) as CREATININE_1st
  , max(case when label = 'CHLORIDE' then valuenum else null end) as CHLORIDE_1st
  , max(case when label = 'GLUCOSE' then valuenum else null end) as GLUCOSE_1st
  , max(case when label = 'HEMATOCRIT' then valuenum else null end) as HEMATOCRIT_1st
  , max(case when label = 'HEMOGLOBIN' then valuenum else null end) as HEMOGLOBIN_1st
  , max(case when label = 'LACTATE' then valuenum else null end) as LACTATE_1st
  , max(case when label = 'MAGNESIUM' then valuenum else null end) as MAGNESIUM_1st
  , max(case when label = 'PHOSPHATE' then valuenum else null end) as PHOSPHATE_1st
  , max(case when label = 'PLATELET' then valuenum else null end) as PLATELET_1st
  , max(case when label = 'POTASSIUM' then valuenum else null end) as POTASSIUM_1st
  , max(case when label = 'PTT' then valuenum else null end) as PTT_1st
  , max(case when label = 'INR' then valuenum else null end) as INR_1st
  , max(case when label = 'PT' then valuenum else null end) as PT_1st
  , max(case when label = 'SODIUM' then valuenum else null end) as SODIUM_1st
  , max(case when label = 'BUN' then valuenum else null end) as BUN_1st
  , max(case when label = 'WBC' then valuenum else null end) as WBC_1st
  -- Calcium
  , max(case when label = 'CALCIUM' then valuenum else null end) as CALCIUM_1st
  , max(case when label = 'FREECALCIUM' then valuenum else null end) as FREECALCIUM_1st
FROM ranked r
WHERE r.drank = 1
GROUP BY r.subject_id, r.hadm_id, r.icustay_id, r.mort_icu, r.mort_hosp, r.drank
ORDER BY r.subject_id, r.hadm_id, r.icustay_id, r.mort_icu, r.mort_hosp, r.drank;
"""

data = pd.read_sql_query(query,con)

In [None]:
# print data

In [8]:
data.to_csv('all_data_initial.csv')

#### From initial to first lab measurements (to be updated)

In [9]:
data_initial = pd.read_csv("all_data_initial.csv", index_col=0)

In [10]:
data_final = data_initial.groupby('subject_id').apply(lambda x: x.sort_values('charttime')).reset_index(drop=True)

In [11]:
data_final.head()

Unnamed: 0,subject_id,hadm_id,icustay_id,mort_icu,mort_hosp,los,mort_icu.1,mort_hosp.1,first_admit_age,charttime,...,platelet_1st,potassium_1st,ptt_1st,inr_1st,pt_1st,sodium_1st,bun_1st,wbc_1st,calcium_1st,freecalcium_1st
0,3,145834,211552,0,0,6.0646,0,0,76.52,2101-10-20 21:51:00,...,282.0,5.4,58.3,1.7,15.7,153.0,53.0,19.1,8.2,1.09
1,4,185777,294638,0,0,1.6785,0,0,47.84,2191-03-16 05:42:00,...,207.0,3.1,31.3,1.0,12.3,135.0,9.0,9.7,8.9,
2,6,107064,228232,0,0,3.6729,0,0,65.94,2175-05-30 22:56:00,...,315.0,5.4,139.0,1.4,14.6,138.0,62.0,10.6,8.6,1.27
3,9,150750,220597,1,1,5.3231,1,1,41.79,2149-11-10 09:40:00,...,258.0,2.9,21.7,1.1,12.7,140.0,16.0,7.5,9.2,0.99
4,11,194540,229441,0,0,1.5844,0,0,50.15,2178-04-17 02:35:00,...,229.0,3.8,28.3,1.1,13.0,138.0,12.0,8.5,9.6,


In [12]:
data_final = data_final.groupby('subject_id').first().reset_index()

In [13]:
data_final.head()

Unnamed: 0,subject_id,hadm_id,icustay_id,mort_icu,mort_hosp,los,mort_icu.1,mort_hosp.1,first_admit_age,charttime,...,platelet_1st,potassium_1st,ptt_1st,inr_1st,pt_1st,sodium_1st,bun_1st,wbc_1st,calcium_1st,freecalcium_1st
0,3,145834,211552,0,0,6.0646,0,0,76.52,2101-10-20 21:51:00,...,282.0,5.4,58.3,1.7,15.7,153.0,53.0,19.1,8.2,1.09
1,4,185777,294638,0,0,1.6785,0,0,47.84,2191-03-16 05:42:00,...,207.0,3.1,31.3,1.0,12.3,135.0,9.0,9.7,8.9,
2,6,107064,228232,0,0,3.6729,0,0,65.94,2175-05-30 22:56:00,...,315.0,5.4,139.0,1.4,14.6,138.0,62.0,10.6,8.6,1.27
3,9,150750,220597,1,1,5.3231,1,1,41.79,2149-11-10 09:40:00,...,258.0,2.9,21.7,1.1,12.7,140.0,16.0,7.5,9.2,0.99
4,11,194540,229441,0,0,1.5844,0,0,50.15,2178-04-17 02:35:00,...,229.0,3.8,28.3,1.1,13.0,138.0,12.0,8.5,9.6,


In [21]:
data_final.to_csv('all_data_final.csv')

In [14]:
con = psycopg2.connect(dbname=dbname, user=sqluser, password='mimic')
cur = con.cursor()
cur.execute('SET search_path to ' + schema_name)

query = \
"""
SELECT VALUEUOM FROM labevents le
WHERE le.itemid = 50808
"""

units = pd.read_sql_query(query,con)
print(units.head())

  valueuom
0   mmol/L
1   mmol/L
2   mmol/L
3   mmol/L
4   mmol/L
