## Import Libraries

In [1]:
import os
import pandas as pd

import psycopg2

## Connect to the DB

In [2]:
# information used to create a database connection
sqluser = 'postgres'
dbname = 'mimic4'
hostname = 'localhost'
port_number = 5434
schema_name = 'omop_cdm'

# Connect to postgres with a copy of the MIMIC-III database
con = psycopg2.connect(dbname=dbname, user=sqluser, host=hostname, port=port_number, password='mysecretpassword')

# the below statement is prepended to queries to ensure they select from the right schema
query_schema = 'set search_path to ' + schema_name + ';'

## Create Schema

In [3]:
dropSchemaQuery = """drop schema if exists sepsis_micro cascade"""
createSchemaQuery = """create schema if not exists sepsis_micro"""

with con:
    with con.cursor() as cursor:
        cursor.execute(dropSchemaQuery)
        cursor.execute(createSchemaQuery)

## Cohort Selection

In [4]:
dropCohortQuery = """drop table if exists sepsis_micro.cohort cascade"""
cohortQuery = """
        create table sepsis_micro.cohort as
        with stg1 as
        (
            select
            micro_specimen_id
            , subject_id
            , min(chartdate) as chartdate
            , min(charttime) as charttime
            from
            mimiciv.microbiologyevents
            where
            spec_type_desc = 'BLOOD CULTURE'
            and org_itemid != 90760
            and org_name is not null
            group by subject_id, micro_specimen_id
        )
        select
        stg1.micro_specimen_id
        , per.person_id
        , coalesce(stg1.charttime, stg1.chartdate) as chart_time
        from stg1
        inner join mimiciv.patients pat
        on stg1.subject_id = pat.subject_id
        inner join omop_cdm.person per
        on per.person_source_value::int = pat.subject_id
        where (floor(date_part('day', stg1.chartdate - make_timestamp(pat.anchor_year, 1, 1, 0, 0, 0))/365.0) + pat.anchor_age) > 18
        ;
    """

with con:
    with con.cursor() as cursor:
        cursor.execute(dropCohortQuery)
        cursor.execute(cohortQuery)

In [5]:
cohortCountQuery = """select count(*) from sepsis_micro.cohort;"""
cohortCountDf = pd.read_sql_query(cohortCountQuery, con)
cohortCount = cohortCountDf['count'][0]
cohortCount

25784

## Static Data

In [6]:
dropStaticQuery = """drop table if exists sepsis_micro.static cascade"""
staticQuery = """
    create table sepsis_micro.static as
    select
    coh.micro_specimen_id as micro_specimen_id,
    coh.person_id as person_id,
    per.gender_source_value as gender_source_value,
    con_vo.concept_name AS visit_occurrence_concept_name,
    (DATE_PART('day', (vo.visit_end_datetime - vo.visit_start_datetime)) * 24) + DATE_PART('hour', (vo.visit_end_datetime - vo.visit_start_datetime)) AS visit_duration_hrs,
    con_src.concept_name AS visit_source_concept_name,
    vo.admitting_source_value AS admitting_source_value,
    (floor(date_part('day', coh.chart_time - make_timestamp(pat.anchor_year, 1, 1, 0, 0, 0))/365.0) + pat.anchor_age) as age
    from
    sepsis_micro.cohort coh
    inner join omop_cdm.person per
    on coh.person_id = per.person_id
    inner join mimiciv.patients pat
    on pat.subject_id = per.person_source_value::int
    inner join omop_cdm.visit_occurrence vo
    on vo.person_id = per.person_id
    and (vo.visit_start_datetime < coh.chart_time) and (vo.visit_end_datetime > coh.chart_time)
    inner join omop_cdm.concept con_vo
    on con_vo.concept_id = vo.visit_concept_id
    inner join omop_cdm.concept con_src
    on con_src.concept_id = vo.visit_source_concept_id
    ;
    """

with con:
    with con.cursor() as cursor:
        cursor.execute(dropStaticQuery)
        cursor.execute(staticQuery)

In [7]:
staticCountQuery = """select count(*) from sepsis_micro.static;"""
staticCountDf = pd.read_sql_query(staticCountQuery, con)
staticCount = staticCountDf['count'][0]
staticCount

24272

## Vitals data

In [8]:
dropVitalsQuery = """drop table if exists sepsis_micro.vitals cascade"""
vitalsQuery = """
    create table sepsis_micro.vitals as
    with vitals_stg_1 as
    (
        select
        coh.micro_specimen_id as micro_specimen_id,
        mmt.person_id as person_id,
        mmt.measurement_datetime as measurement_datetime,
        mmt.unit_source_value as unit_source_value,
        mmt.value_as_number as value_as_number,
        cpt.concept_name as concept_name
        from
        omop_cdm.measurement mmt
        inner join omop_cdm.concept cpt
        on cpt.concept_id = mmt.measurement_concept_id
        inner join sepsis_micro.cohort coh
        on coh.person_id = mmt.person_id
        where
        measurement_concept_id in (
        3027018 -- Heart rate
        , 21492239, 3004249 -- Systolic blood pressure
        , 21492240, 3012888 -- Diastolic blood pressure
        , 3027598, 21492241 -- Mean blood pressure
        , 1175625, 3024171, 3007469 -- Respiratory rate
        , 3020891 -- Body temperature
        , 40762499 -- Oxygen saturation in Arterial blood by Pulse oximetry
        , 3016335 -- Glasgow coma score eye opening
        , 3009094 -- Glasgow coma score verbal
        , 3008223 -- Glasgow coma score motor
        )
        and value_as_number is not null
        and (mmt.measurement_datetime > coh.chart_time - interval '48 hour')
        and (mmt.measurement_datetime < coh.chart_time + interval '48 hour')
    )
    , vitals_stg_2 AS
    (
      select
        micro_specimen_id,
        person_id,
        measurement_datetime,
        unit_source_value,
        value_as_number,
        concept_name,
        row_number() over (partition by person_id, concept_name order by measurement_datetime) as rn
      from vitals_stg_1
    )
    select * from vitals_stg_2
    ;
    """

with con:
    with con.cursor() as cursor:
        cursor.execute(dropVitalsQuery)
        cursor.execute(vitalsQuery)

In [9]:
vitalsCountQuery = """select count(*) from sepsis_micro.vitals;"""
vitalsCountDf = pd.read_sql_query(vitalsCountQuery, con)
vitalsCount = vitalsCountDf['count'][0]
vitalsCount

4205220

## Lab Measurements Data

In [10]:
dropLabsQuery = """drop table if exists sepsis_micro.lab_measurements cascade"""
labsQuery = """
    create table sepsis_micro.lab_measurements as
    with labs_stg_1 as
        (
            select
            coh.micro_specimen_id as micro_specimen_id,
            mmt.person_id AS person_id,
            measurement_datetime as measurement_datetime,
            unit_source_value as unit_source_value,
            value_as_number as value_as_number,
            cpt.concept_name as concept_name
            from
            etl_dataset_temp.measurement mmt
            inner join omop_cdm.concept cpt
            on cpt.concept_id = mmt.measurement_concept_id
            inner join sepsis_micro.cohort coh
            on coh.person_id = mmt.person_id
            where
            measurement_concept_id in (
            3047181	-- Lactate [Moles/volume] in Blood
            , 3013290	-- Carbon dioxide [Partial pressure] in Blood
            , 3024561	-- Albumin [Mass/volume] in Serum or Plasma
            , 3024629	-- Glucose [Mass/volume] in Urine by Test strip
            , 3008939	-- Band form neutrophils [#/volume] in Blood by Manual count
            , 3012501	-- Base excess in Blood by calculation
            , 3005456	-- Potassium [Moles/volume] in Blood
            , 3010421	-- pH of Blood
            , 3014576	-- Chloride [Moles/volume] in Serum or Plasma
            , 3031147	-- Carbon dioxide, total [Moles/volume] in Blood by calculation
            , 3024128	-- Bilirubin.total [Mass/volume] in Serum or Plasma
            , 3000905	-- Leukocytes [#/volume] in Blood by Automated count
            , 3016723	-- Creatinine [Mass/volume] in Serum or Plasma
            , 3022217	-- INR in Platelet poor plasma by Coagulation assay
            , 3019550	-- Sodium [Moles/volume] in Serum or Plasma
            , 3000285	-- Sodium [Moles/volume] in Blood
            , 3000963	-- Hemoglobin [Mass/volume] in Blood
            , 3000963	-- Hemoglobin [Mass/volume] in Blood
            , 3018672	-- pH of Body fluid
            , 3024929	-- Platelets [#/volume] in Blood by Automated count
            , 3013682	-- Urea nitrogen [Mass/volume] in Serum or Plasma
            , 3004501	-- Glucose [Mass/volume] in Serum or Plasma
            , 3018572	-- Chloride [Moles/volume] in Blood
            , 3027315	-- Oxygen [Partial pressure] in Blood
            , 3016293	-- Bicarbonate [Moles/volume] in Serum or Plasma
            , 3023103	-- Potassium [Moles/volume] in Serum or Plasma
            , 3037278	-- Anion gap 4 in Serum or Plasma
            , 3003282	-- Leukocytes [#/volume] in Blood by Manual count
            , 3023314	-- Hematocrit [Volume Fraction] of Blood by Automated count
            , 3013466	-- aPTT in Blood by Coagulation assay
            )
            and value_as_number is not null
            and (mmt.measurement_datetime > coh.chart_time - interval '72 hour')
            and (mmt.measurement_datetime < coh.chart_time + interval '72 hour')
        )
        , labs_stg_2 as
        (
          select
            micro_specimen_id,
            person_id,
            measurement_datetime,
            unit_source_value,
            value_as_number,
            concept_name,
            row_number() over (partition by person_id, concept_name order by measurement_datetime) as rn
          from labs_stg_1
        )
        select * from labs_stg_2
    ;
    """

with con:
    with con.cursor() as cursor:
        cursor.execute(dropLabsQuery)
        cursor.execute(labsQuery)

In [11]:
labsCountQuery = """select count(*) from sepsis_micro.lab_measurements;"""
labsCountDf = pd.read_sql_query(labsCountQuery, con)
labsCount = labsCountDf['count'][0]
labsCount

378863

## Mortality Data

In [12]:
dropMortalityQuery = """drop table if exists sepsis_micro.mortality cascade"""
mortalityQuery = """
    create table sepsis_micro.mortality as
    select
    coh.micro_specimen_id as micro_specimen_id,
    coh.person_id as person_id
    , case when dth.person_id is null then false else (coh.chart_time + interval '7 day' >= dth.death_datetime) end as seven_day_mortality
    , case when dth.person_id is null then false else (coh.chart_time + interval '14 day' >= dth.death_datetime) end as fourteen_day_mortality
    , case when dth.person_id is null then false else (coh.chart_time + interval '21 day' >= dth.death_datetime) end as twentyone_day_mortality
    , case when dth.person_id is null then false else (coh.chart_time + interval '28 day' >= dth.death_datetime) end as twentyeight_day_mortality
    , case when dth.person_id is null then false else (coh.chart_time + interval '60 day' >= dth.death_datetime) end as sixty_day_mortality
    , case when dth.person_id is null then false else (coh.chart_time + interval '90 day' >= dth.death_datetime) end as ninety_day_mortality
    , case when dth.person_id is null then false else (coh.chart_time + interval '120 day' >= dth.death_datetime) end as onetwenty_day_mortality
    from
    sepsis_micro.cohort coh
    left join omop_cdm.death dth
    on dth.person_id = coh.person_id
    ;
    """
with con:
    with con.cursor() as cursor:
        cursor.execute(dropMortalityQuery)
        cursor.execute(mortalityQuery)

In [13]:
mortalityCountQuery = """select count(*) from sepsis_micro.mortality;"""
mortalityCountDf = pd.read_sql_query(mortalityCountQuery, con)
mortalityCount = mortalityCountDf['count'][0]
mortalityCount

25784