## Install Libraries

In [1]:
import os
import pandas as pd

import psycopg2

## Connect to the DB

In [2]:
# information used to create a database connection
sqluser = 'postgres'
dbname = 'mimic4'
hostname = 'localhost'
port_number = 5434
schema_name = 'omop_cdm'

# Connect to postgres with a copy of the MIMIC-III database
con = psycopg2.connect(dbname=dbname, user=sqluser, host=hostname, port=port_number, password='mysecretpassword')

# the below statement is prepended to queries to ensure they select from the right schema
query_schema = 'set search_path to ' + schema_name + ';'

## Static data

In [4]:
staticQuery = """
    SELECT
    vo.visit_occurrence_id AS visit_occurrence_id,
    con_vo.concept_name AS visit_occurrence_concept_name,
    (DATE_PART('day', (vo.visit_end_datetime - vo.visit_start_datetime)) * 24) + DATE_PART('hour', (vo.visit_end_datetime - vo.visit_start_datetime)) AS visit_duration_hrs,
    con_src.concept_name AS visit_source_concept_name,
    vo.admitting_source_value AS admitting_source_value
    FROM
    omop_cdm.visit_occurrence vo
    INNER JOIN omop_cdm.concept con_vo
    ON con_vo.concept_id = vo.visit_concept_id
    INNER JOIN omop_cdm.concept con_src
    ON con_src.concept_id = vo.visit_source_concept_id
    INNER JOIN mimiciv.admissions adm
    ON adm.hadm_id = split_part(vo.visit_source_value, '|', 2)::int
    INNER JOIN mimiciv.patients pat
    ON pat.subject_id = adm.subject_id
    WHERE visit_source_value NOT LIKE '%-%'
    AND (FLOOR(DATE_PART('day', adm.admittime - make_timestamp(pat.anchor_year, 1, 1, 0, 0, 0))/365.0) + pat.anchor_age) > 18
    ;
    """
staticDf = pd.read_sql_query(staticQuery, con)
staticDf.head()

Unnamed: 0,visit_occurrence_id,visit_occurrence_concept_name,visit_duration_hrs,visit_source_concept_name,admitting_source_value
0,-1665952063,Emergency Room and Inpatient Visit,89.0,URGENT,PHYSICIAN REFERRAL
1,-1274029970,Emergency Room and Inpatient Visit,142.0,URGENT,INTERNAL TRANSFER TO OR FROM PSYCH
2,-675666536,Emergency Room and Inpatient Visit,49.0,URGENT,PHYSICIAN REFERRAL
3,380358250,Emergency Room and Inpatient Visit,51.0,URGENT,PHYSICIAN REFERRAL
4,-59688556,Emergency Room and Inpatient Visit,208.0,URGENT,TRANSFER FROM HOSPITAL


In [14]:
staticDf.to_csv('data/static_data_v_1.0', index=False)

## Vitals data

In [8]:
vitalsQuery = """
    WITH vitals_stg_1 AS
    (
        SELECT
        person_id AS person_id,
        measurement_datetime AS measurement_datetime,
        unit_source_value AS unit_source_value,
        value_as_number AS value_as_number,
        cpt.concept_name AS concept_name
        FROM
        etl_dataset_temp.measurement mmt
        INNER JOIN omop_cdm.concept cpt
        ON cpt.concept_id = mmt.measurement_concept_id
        WHERE
        measurement_concept_id IN (
        3027018 -- Heart rate
        , 21492239, 3004249 -- Systolic blood pressure
        , 21492240, 3012888 -- Diastolic blood pressure
        , 3027598, 21492241 -- Mean blood pressure
        , 1175625, 3024171, 3007469 -- Respiratory rate
        , 3020891 -- Body temperature
        , 40762499 -- Oxygen saturation in Arterial blood by Pulse oximetry
        , 3016335 -- Glasgow coma score eye opening
        , 3009094 -- Glasgow coma score verbal
        , 3008223 -- Glasgow coma score motor
        )
        AND value_as_number IS NOT NULL
    )
    , vitals_stg_2 AS
    (
      SELECT
        person_id,
        measurement_datetime,
        unit_source_value,
        value_as_number,
        concept_name,
        ROW_NUMBER() OVER (PARTITION BY person_id, concept_name ORDER BY measurement_datetime) AS rn
      FROM vitals_stg_1
    )
    SELECT * FROM vitals_stg_2
    """
vitalsDf = pd.read_sql_query(vitalsQuery, con)
vitalsDf.head()

Unnamed: 0,person_id,measurement_datetime,unit_source_value,value_as_number,concept_name,rn
0,-2147469031,2144-12-31 18:48:00,°F,98.9,Body temperature,1
1,-2147469031,2144-12-31 20:00:00,°F,99.4,Body temperature,2
2,-2147469031,2145-01-01 00:00:00,°F,100.8,Body temperature,3
3,-2147469031,2145-01-01 04:00:00,°F,100.5,Body temperature,4
4,-2147469031,2145-01-01 05:00:00,°F,100.0,Body temperature,5


In [15]:
vitalsDf.to_csv('data/vitals_data_v_1.0', index=False)

## Lab results data

In [10]:
labsQuery = """
WITH labs_stg_1 AS
    (
        SELECT
        person_id AS person_id,
        measurement_datetime AS measurement_datetime,
        unit_source_value AS unit_source_value,
        value_as_number AS value_as_number,
        cpt.concept_name AS concept_name
        FROM
        etl_dataset_temp.measurement mmt
        INNER JOIN omop_cdm.concept cpt
        ON cpt.concept_id = mmt.measurement_concept_id
        WHERE
        measurement_concept_id IN (
        3047181	-- Lactate [Moles/volume] in Blood
		, 3013290	-- Carbon dioxide [Partial pressure] in Blood
		, 3024561	-- Albumin [Mass/volume] in Serum or Plasma
		, 3024629	-- Glucose [Mass/volume] in Urine by Test strip
		, 3008939	-- Band form neutrophils [#/volume] in Blood by Manual count
		, 3012501	-- Base excess in Blood by calculation
		, 3005456	-- Potassium [Moles/volume] in Blood
		, 3010421	-- pH of Blood
		, 3014576	-- Chloride [Moles/volume] in Serum or Plasma
		, 3031147	-- Carbon dioxide, total [Moles/volume] in Blood by calculation
		, 3024128	-- Bilirubin.total [Mass/volume] in Serum or Plasma
		, 3000905	-- Leukocytes [#/volume] in Blood by Automated count
		, 3016723	-- Creatinine [Mass/volume] in Serum or Plasma
		, 3022217	-- INR in Platelet poor plasma by Coagulation assay
		, 3019550	-- Sodium [Moles/volume] in Serum or Plasma
		, 3000285	-- Sodium [Moles/volume] in Blood
		, 3000963	-- Hemoglobin [Mass/volume] in Blood
		, 3000963	-- Hemoglobin [Mass/volume] in Blood
		, 3018672	-- pH of Body fluid
		, 3024929	-- Platelets [#/volume] in Blood by Automated count
		, 3013682	-- Urea nitrogen [Mass/volume] in Serum or Plasma
		, 3004501	-- Glucose [Mass/volume] in Serum or Plasma
		, 3018572	-- Chloride [Moles/volume] in Blood
		, 3027315	-- Oxygen [Partial pressure] in Blood
		, 3016293	-- Bicarbonate [Moles/volume] in Serum or Plasma
		, 3023103	-- Potassium [Moles/volume] in Serum or Plasma
		, 3037278	-- Anion gap 4 in Serum or Plasma
		, 3003282	-- Leukocytes [#/volume] in Blood by Manual count
		, 3023314	-- Hematocrit [Volume Fraction] of Blood by Automated count
		, 3013466	-- aPTT in Blood by Coagulation assay
        )
        AND value_as_number IS NOT NULL
    )
    , labs_stg_2 AS
    (
      SELECT
        person_id,
        measurement_datetime,
        unit_source_value,
        value_as_number,
        concept_name,
        ROW_NUMBER() OVER (PARTITION BY person_id, concept_name ORDER BY measurement_datetime) AS rn
      FROM labs_stg_1
    )
    SELECT * FROM labs_stg_2
    """
labsDf = pd.read_sql_query(labsQuery, con)
labsDf.head()

Unnamed: 0,person_id,measurement_datetime,unit_source_value,value_as_number,concept_name,rn
0,-2147469031,2144-12-31 16:00:00,mEq/L,20.0,Bicarbonate [Moles/volume] in Serum or Plasma,1
1,-2147469031,2145-01-01 01:47:00,mEq/L,19.0,Bicarbonate [Moles/volume] in Serum or Plasma,2
2,-2147469031,2144-12-31 16:00:00,mEq/L,102.0,Chloride [Moles/volume] in Serum or Plasma,1
3,-2147469031,2145-01-01 01:47:00,mEq/L,104.0,Chloride [Moles/volume] in Serum or Plasma,2
4,-2147469031,2144-12-31 16:00:00,mg/dL,3.2,Creatinine [Mass/volume] in Serum or Plasma,1


In [16]:
labsDf.to_csv('data/labs_data_v_1.0', index=False)

## Mortality Data

In [12]:
mortalityQuery = """
SELECT
(vo.visit_end_datetime = dth.death_datetime) AS discharge_mortality,
(vo.visit_end_datetime + interval '1 day' >= dth.death_datetime) AS one_day_mortality,
(vo.visit_end_datetime + interval '2 day' >= dth.death_datetime) AS two_day_mortality,
(vo.visit_end_datetime + interval '30 day' >= dth.death_datetime) AS thirty_day_mortality,
(vo.visit_end_datetime + interval '60 day' >= dth.death_datetime) AS sixty_day_mortality,
(vo.visit_end_datetime + interval '90 day' >= dth.death_datetime) AS ninety_day_mortality
FROM
omop_cdm.visit_occurrence vo
INNER JOIN omop_cdm.person per
ON per.person_id = vo.person_id
INNER JOIN omop_cdm.death dth
ON dth.person_id = per.person_id
;
    """
mortalityDf = pd.read_sql_query(mortalityQuery, con)
mortalityDf.head()

Unnamed: 0,discharge_mortality,one_day_mortality,two_day_mortality,thirty_day_mortality,sixty_day_mortality,ninety_day_mortality
0,True,True,True,True,True,True
1,False,False,False,False,False,False
2,False,False,False,False,False,False
3,False,False,False,False,False,False
4,True,True,True,True,True,True


In [17]:
mortalityDf.to_csv('data/mortality_data_v_1.0', index=False)