## Install Libraries

In [1]:
import os
import pandas as pd

import psycopg2

## Connect to the DB

In [2]:
# information used to create a database connection
sqluser = 'postgres'
dbname = 'mimic4'
hostname = 'localhost'
port_number = 5434
schema_name = 'omop_cdm'

# Connect to postgres with a copy of the MIMIC-III database
con = psycopg2.connect(dbname=dbname, user=sqluser, host=hostname, port=port_number, password='mysecretpassword')

# the below statement is prepended to queries to ensure they select from the right schema
query_schema = 'set search_path to ' + schema_name + ';'

## Static data (Version 1) - [Micro Results + Antibiotic prescription]

In [11]:
staticQueryPos = """
    SELECT
    vo.person_id AS person_id,
    vo.visit_start_datetime AS visit_start_datetime,
    vo.visit_end_datetime AS visit_end_datetime,
    vo.visit_occurrence_id AS visit_occurrence_id,
    con_vo.concept_name AS visit_occurrence_concept_name,
    (DATE_PART('day', (vo.visit_end_datetime - vo.visit_start_datetime)) * 24) + DATE_PART('hour', (vo.visit_end_datetime - vo.visit_start_datetime)) AS visit_duration_hrs,
    con_src.concept_name AS visit_source_concept_name,
    vo.admitting_source_value AS admitting_source_value,
    1 AS sepsis
    FROM
    omop_cdm.visit_occurrence vo
    INNER JOIN omop_cdm.concept con_vo
    ON con_vo.concept_id = vo.visit_concept_id
    INNER JOIN omop_cdm.concept con_src
    ON con_src.concept_id = vo.visit_source_concept_id
    INNER JOIN mimiciv.admissions adm
    ON adm.hadm_id = split_part(vo.visit_source_value, '|', 2)::int
    INNER JOIN mimiciv.patients pat
    ON pat.subject_id = adm.subject_id
    INNER JOIN mimiciv.icustays icu
    ON icu.hadm_id = adm.hadm_id
    INNER JOIN sepsis.suspinfect_poe sus
    ON sus.stay_id = icu.stay_id
    WHERE visit_source_value NOT LIKE '%-%'
    AND (FLOOR(DATE_PART('day', adm.admittime - make_timestamp(pat.anchor_year, 1, 1, 0, 0, 0))/365.0) + pat.anchor_age) > 18
    AND sus.positiveculture = 1
    ;
    """
staticDfPos = pd.read_sql_query(staticQueryPos, con)
staticDfPos

Unnamed: 0,person_id,visit_start_datetime,visit_end_datetime,visit_occurrence_id,visit_occurrence_concept_name,visit_duration_hrs,visit_source_concept_name,admitting_source_value,sepsis
0,1914479621,2134-01-15 02:40:00,2134-01-30 05:00:00,847586405,Emergency Room and Inpatient Visit,362.0,URGENT,TRANSFER FROM HOSPITAL,1
1,-1338579469,2133-11-20 19:18:00,2133-12-31 17:22:00,-165416090,Emergency Room and Inpatient Visit,982.0,URGENT,TRANSFER FROM HOSPITAL,1
2,-1415432419,2187-10-25 13:07:00,2187-11-01 14:30:00,-572431579,Emergency Room and Inpatient Visit,169.0,URGENT,TRANSFER FROM HOSPITAL,1
3,1992624961,2129-05-07 00:00:00,2129-05-17 15:45:00,1059424698,Emergency Room and Inpatient Visit,255.0,URGENT,TRANSFER FROM HOSPITAL,1
4,-893011074,2133-05-06 11:38:00,2133-05-26 15:23:00,-1690463012,Emergency Room and Inpatient Visit,483.0,URGENT,TRANSFER FROM SKILLED NURSING FACILITY,1
...,...,...,...,...,...,...,...,...,...
6837,1673575901,2172-11-09 13:15:00,2172-11-13 01:00:00,414731374,Observation Room,83.0,OBSERVATION ADMIT,EMERGENCY ROOM,1
6838,-833079733,2188-02-28 14:49:00,2188-03-10 08:50:00,-793085031,Observation Room,258.0,OBSERVATION ADMIT,CLINIC REFERRAL,1
6839,1717614676,2168-09-28 23:16:00,2168-10-07 19:44:00,229166149,Observation Room,212.0,OBSERVATION ADMIT,EMERGENCY ROOM,1
6840,923804058,2128-09-27 00:00:00,2128-10-07 13:04:00,325699064,Observation Room,253.0,OBSERVATION ADMIT,TRANSFER FROM HOSPITAL,1


In [18]:
staticQueryNeg = """
    SELECT
    vo.person_id AS person_id,
    vo.visit_start_datetime AS visit_start_datetime,
    vo.visit_end_datetime AS visit_end_datetime,
    vo.visit_occurrence_id AS visit_occurrence_id,
    con_vo.concept_name AS visit_occurrence_concept_name,
    (DATE_PART('day', (vo.visit_end_datetime - vo.visit_start_datetime)) * 24) + DATE_PART('hour', (vo.visit_end_datetime - vo.visit_start_datetime)) AS visit_duration_hrs,
    con_src.concept_name AS visit_source_concept_name,
    vo.admitting_source_value AS admitting_source_value,
    0 AS sepsis
    FROM
    omop_cdm.visit_occurrence vo
    INNER JOIN omop_cdm.concept con_vo
    ON con_vo.concept_id = vo.visit_concept_id
    INNER JOIN omop_cdm.concept con_src
    ON con_src.concept_id = vo.visit_source_concept_id
    INNER JOIN mimiciv.admissions adm
    ON adm.hadm_id = split_part(vo.visit_source_value, '|', 2)::int
    INNER JOIN mimiciv.patients pat
    ON pat.subject_id = adm.subject_id
    INNER JOIN mimiciv.icustays icu
    ON icu.hadm_id = adm.hadm_id
    INNER JOIN sepsis.suspinfect_poe sus
    ON sus.stay_id = icu.stay_id
    WHERE visit_source_value NOT LIKE '%-%'
    AND (FLOOR(DATE_PART('day', adm.admittime - make_timestamp(pat.anchor_year, 1, 1, 0, 0, 0))/365.0) + pat.anchor_age) > 18
    AND sus.positiveculture = 0
    ;
    """
staticDfNeg = pd.read_sql_query(staticQueryNeg, con)
staticDfNeg

Unnamed: 0,person_id,visit_start_datetime,visit_end_datetime,visit_occurrence_id,visit_occurrence_concept_name,visit_duration_hrs,visit_source_concept_name,admitting_source_value,sepsis
0,2013404679,2135-12-31 11:12:00,2136-01-04 13:07:00,1628107643,Emergency Room and Inpatient Visit,97.0,URGENT,TRANSFER FROM HOSPITAL,0
1,-1477460813,2125-02-15 20:59:00,2125-02-23 14:58:00,-1998665121,Emergency Room and Inpatient Visit,185.0,URGENT,TRANSFER FROM HOSPITAL,0
2,1759815571,2154-02-02 17:40:00,2154-02-15 13:14:00,1459990499,Emergency Room and Inpatient Visit,307.0,URGENT,TRANSFER FROM HOSPITAL,0
3,1439307144,2161-08-17 23:21:00,2161-08-30 14:55:00,-1786406903,Emergency Room and Inpatient Visit,303.0,URGENT,TRANSFER FROM HOSPITAL,0
4,988083818,2113-10-20 19:39:00,2113-10-27 15:05:00,407189930,Emergency Room and Inpatient Visit,163.0,URGENT,TRANSFER FROM HOSPITAL,0
...,...,...,...,...,...,...,...,...,...
38073,-279080179,2146-06-25 23:25:00,2146-07-07 15:20:00,994799340,Observation Room,279.0,OBSERVATION ADMIT,EMERGENCY ROOM,0
38074,-279080179,2146-05-09 20:47:00,2146-05-14 12:25:00,1782457345,Observation Room,111.0,OBSERVATION ADMIT,TRANSFER FROM HOSPITAL,0
38075,1617441915,2168-02-01 07:11:00,2168-02-09 20:41:00,-1302077585,Observation Room,205.0,OBSERVATION ADMIT,EMERGENCY ROOM,0
38076,-587149924,2182-11-18 15:02:00,2182-11-30 21:30:00,-352782592,Observation Room,294.0,OBSERVATION ADMIT,EMERGENCY ROOM,0


In [19]:
staticDfNeg = staticDfNeg.sample(n=staticDfPos.shape[0], random_state=2518)
staticDfNeg

Unnamed: 0,person_id,visit_start_datetime,visit_end_datetime,visit_occurrence_id,visit_occurrence_concept_name,visit_duration_hrs,visit_source_concept_name,admitting_source_value,sepsis
12086,-1063424496,2157-01-12 00:09:00,2157-01-26 15:45:00,979894731,Emergency Room - Hospital,351.0,EW EMER.,PHYSICIAN REFERRAL,0
14830,-1842056937,2132-01-01 00:00:00,2132-01-04 11:10:00,728992356,Emergency Room - Hospital,83.0,EW EMER.,EMERGENCY ROOM,0
10525,-1350563358,2135-04-27 01:12:00,2135-05-08 17:45:00,-2119596572,Emergency Room - Hospital,280.0,EW EMER.,EMERGENCY ROOM,0
6322,1297417538,2172-11-09 22:57:00,2172-11-22 11:30:00,-805060413,Emergency Room and Inpatient Visit,300.0,URGENT,TRANSFER FROM HOSPITAL,0
25199,1909531407,2161-12-24 16:56:00,2161-12-31 14:10:00,-1774716857,Emergency Room - Hospital,165.0,EW EMER.,EMERGENCY ROOM,0
...,...,...,...,...,...,...,...,...,...
6070,377467962,2181-05-11 00:48:00,2181-05-20 15:44:00,-1769828987,Emergency Room and Inpatient Visit,230.0,URGENT,TRANSFER FROM HOSPITAL,0
5948,1856055933,2127-03-15 12:38:00,2127-03-20 14:04:00,1553456598,Emergency Room and Inpatient Visit,121.0,URGENT,PHYSICIAN REFERRAL,0
28151,-697207771,2142-07-15 20:10:00,2142-07-22 17:20:00,445525616,Emergency Room - Hospital,165.0,EW EMER.,EMERGENCY ROOM,0
25614,1133671855,2119-12-03 16:56:00,2120-02-18 18:00:00,-736325490,Emergency Room - Hospital,1849.0,EW EMER.,EMERGENCY ROOM,0


In [20]:
staticDf = pd.concat([staticDfPos, staticDfNeg])
staticDf

Unnamed: 0,person_id,visit_start_datetime,visit_end_datetime,visit_occurrence_id,visit_occurrence_concept_name,visit_duration_hrs,visit_source_concept_name,admitting_source_value,sepsis
0,1914479621,2134-01-15 02:40:00,2134-01-30 05:00:00,847586405,Emergency Room and Inpatient Visit,362.0,URGENT,TRANSFER FROM HOSPITAL,1
1,-1338579469,2133-11-20 19:18:00,2133-12-31 17:22:00,-165416090,Emergency Room and Inpatient Visit,982.0,URGENT,TRANSFER FROM HOSPITAL,1
2,-1415432419,2187-10-25 13:07:00,2187-11-01 14:30:00,-572431579,Emergency Room and Inpatient Visit,169.0,URGENT,TRANSFER FROM HOSPITAL,1
3,1992624961,2129-05-07 00:00:00,2129-05-17 15:45:00,1059424698,Emergency Room and Inpatient Visit,255.0,URGENT,TRANSFER FROM HOSPITAL,1
4,-893011074,2133-05-06 11:38:00,2133-05-26 15:23:00,-1690463012,Emergency Room and Inpatient Visit,483.0,URGENT,TRANSFER FROM SKILLED NURSING FACILITY,1
...,...,...,...,...,...,...,...,...,...
6070,377467962,2181-05-11 00:48:00,2181-05-20 15:44:00,-1769828987,Emergency Room and Inpatient Visit,230.0,URGENT,TRANSFER FROM HOSPITAL,0
5948,1856055933,2127-03-15 12:38:00,2127-03-20 14:04:00,1553456598,Emergency Room and Inpatient Visit,121.0,URGENT,PHYSICIAN REFERRAL,0
28151,-697207771,2142-07-15 20:10:00,2142-07-22 17:20:00,445525616,Emergency Room - Hospital,165.0,EW EMER.,EMERGENCY ROOM,0
25614,1133671855,2119-12-03 16:56:00,2120-02-18 18:00:00,-736325490,Emergency Room - Hospital,1849.0,EW EMER.,EMERGENCY ROOM,0


In [21]:
staticDf.to_csv('data/static_data_v_1.0', index=False)

## Static Data (Version 2) - ICD codes

In [33]:
staticPosQuery2 = """
    SELECT
    vo.person_id AS person_id,
    vo.visit_start_datetime AS visit_start_datetime,
    vo.visit_end_datetime AS visit_end_datetime,
    vo.visit_occurrence_id AS visit_occurrence_id,
    con_vo.concept_name AS visit_occurrence_concept_name,
    (DATE_PART('day', (vo.visit_end_datetime - vo.visit_start_datetime)) * 24) + DATE_PART('hour', (vo.visit_end_datetime - vo.visit_start_datetime)) AS visit_duration_hrs,
    con_src.concept_name AS visit_source_concept_name,
    vo.admitting_source_value AS admitting_source_value,
    1 AS sepsis
    FROM
    omop_cdm.visit_occurrence vo
    INNER JOIN omop_cdm.concept con_vo
    ON con_vo.concept_id = vo.visit_concept_id
    INNER JOIN omop_cdm.concept con_src
    ON con_src.concept_id = vo.visit_source_concept_id
    INNER JOIN mimiciv.admissions adm
    ON adm.hadm_id = split_part(vo.visit_source_value, '|', 2)::int
    INNER JOIN mimiciv.patients pat
    ON pat.subject_id = adm.subject_id
    INNER JOIN mimiciv.diagnoses_icd icd
    ON icd.hadm_id = adm.hadm_id
    WHERE visit_source_value NOT LIKE '%-%'
    AND (FLOOR(DATE_PART('day', adm.admittime - make_timestamp(pat.anchor_year, 1, 1, 0, 0, 0))/365.0) + pat.anchor_age) > 18
    AND (icd.icd_code = '99591' OR icd.icd_code = '99592' or icd.icd_code = '78552' OR icd.icd_code = 'A419' OR icd.icd_code = 'R6520' OR icd.icd_code = 'R6521')
    ;
    """
staticPosDf2 = pd.read_sql_query(staticPosQuery2, con)
staticPosDf2

Unnamed: 0,person_id,visit_start_datetime,visit_end_datetime,visit_occurrence_id,visit_occurrence_concept_name,visit_duration_hrs,visit_source_concept_name,admitting_source_value,sepsis
0,706761387,2188-03-21 20:22:00,2188-04-25 16:22:00,433162270,Emergency Room and Inpatient Visit,836.0,URGENT,TRANSFER FROM HOSPITAL,1
1,1683786691,2133-03-24 04:43:00,2133-04-01 21:33:00,-59688556,Emergency Room and Inpatient Visit,208.0,URGENT,TRANSFER FROM HOSPITAL,1
2,1225572303,2157-11-01 22:30:00,2157-11-02 14:35:00,-211754743,Emergency Room and Inpatient Visit,16.0,URGENT,TRANSFER FROM HOSPITAL,1
3,1225572303,2157-11-01 22:30:00,2157-11-02 14:35:00,-211754743,Emergency Room and Inpatient Visit,16.0,URGENT,TRANSFER FROM HOSPITAL,1
4,1930817712,2111-01-01 19:46:00,2111-01-03 04:24:00,919173749,Emergency Room and Inpatient Visit,32.0,URGENT,TRANSFER FROM HOSPITAL,1
...,...,...,...,...,...,...,...,...,...
20790,-837347565,2169-05-04 01:05:00,2169-05-12 00:15:00,-665461723,Observation Room,191.0,OBSERVATION ADMIT,EMERGENCY ROOM,1
20791,-837347565,2169-01-18 21:49:00,2169-01-23 15:50:00,56037357,Observation Room,114.0,OBSERVATION ADMIT,EMERGENCY ROOM,1
20792,1310134507,2120-02-26 14:26:00,2120-03-06 18:35:00,-568394395,Observation Room,220.0,OBSERVATION ADMIT,EMERGENCY ROOM,1
20793,383317919,2111-12-14 16:59:00,2111-12-22 15:50:00,-1464262786,Observation Room,190.0,OBSERVATION ADMIT,TRANSFER FROM HOSPITAL,1


In [39]:
staticNegQuery2 = """
    SELECT
    vo.person_id AS person_id,
    vo.visit_start_datetime AS visit_start_datetime,
    vo.visit_end_datetime AS visit_end_datetime,
    vo.visit_occurrence_id AS visit_occurrence_id,
    con_vo.concept_name AS visit_occurrence_concept_name,
    (DATE_PART('day', (vo.visit_end_datetime - vo.visit_start_datetime)) * 24) + DATE_PART('hour', (vo.visit_end_datetime - vo.visit_start_datetime)) AS visit_duration_hrs,
    con_src.concept_name AS visit_source_concept_name,
    vo.admitting_source_value AS admitting_source_value,
    0 AS sepsis
    FROM
    omop_cdm.visit_occurrence vo
    INNER JOIN omop_cdm.concept con_vo
    ON con_vo.concept_id = vo.visit_concept_id
    INNER JOIN omop_cdm.concept con_src
    ON con_src.concept_id = vo.visit_source_concept_id
    INNER JOIN mimiciv.admissions adm
    ON adm.hadm_id = split_part(vo.visit_source_value, '|', 2)::int
    INNER JOIN mimiciv.patients pat
    ON pat.subject_id = adm.subject_id
    INNER JOIN mimiciv.diagnoses_icd icd
    ON icd.hadm_id = adm.hadm_id
    WHERE visit_source_value NOT LIKE '%-%'
    AND (FLOOR(DATE_PART('day', adm.admittime - make_timestamp(pat.anchor_year, 1, 1, 0, 0, 0))/365.0) + pat.anchor_age) > 18
    AND adm.subject_id NOT IN (
        SELECT subject_id
        FROM mimiciv.diagnoses_icd
        WHERE icd_code IN ('99591', '99592', '78552', 'A419', 'R6520', 'R6521')
    )
    ;
    """
staticNegDf2 = pd.read_sql_query(staticNegQuery2, con)
staticNegDf2

Unnamed: 0,person_id,visit_start_datetime,visit_end_datetime,visit_occurrence_id,visit_occurrence_concept_name,visit_duration_hrs,visit_source_concept_name,admitting_source_value,sepsis
0,-1801782296,2149-12-10 18:49:00,2149-12-18 15:41:00,288731249,Emergency Room and Inpatient Visit,188.0,URGENT,PHYSICIAN REFERRAL,0
1,-1801782296,2149-12-10 18:49:00,2149-12-18 15:41:00,288731249,Emergency Room and Inpatient Visit,188.0,URGENT,PHYSICIAN REFERRAL,0
2,-1801782296,2149-12-10 18:49:00,2149-12-18 15:41:00,288731249,Emergency Room and Inpatient Visit,188.0,URGENT,PHYSICIAN REFERRAL,0
3,-1801782296,2149-12-10 18:49:00,2149-12-18 15:41:00,288731249,Emergency Room and Inpatient Visit,188.0,URGENT,PHYSICIAN REFERRAL,0
4,-1801782296,2149-12-10 18:49:00,2149-12-18 15:41:00,288731249,Emergency Room and Inpatient Visit,188.0,URGENT,PHYSICIAN REFERRAL,0
...,...,...,...,...,...,...,...,...,...
3976284,-459485199,2184-03-03 23:12:00,2184-03-09 15:25:00,-147850093,Emergency Room and Inpatient Visit,136.0,URGENT,TRANSFER FROM HOSPITAL,0
3976285,-459485199,2184-03-03 23:12:00,2184-03-09 15:25:00,-147850093,Emergency Room and Inpatient Visit,136.0,URGENT,TRANSFER FROM HOSPITAL,0
3976286,23270304,2113-04-20 18:14:00,2113-04-23 13:56:00,1569776977,Emergency Room and Inpatient Visit,67.0,URGENT,PHYSICIAN REFERRAL,0
3976287,23270304,2113-04-20 18:14:00,2113-04-23 13:56:00,1569776977,Emergency Room and Inpatient Visit,67.0,URGENT,PHYSICIAN REFERRAL,0


In [41]:
staticNegDf2 = staticNegDf2.sample(n=staticPosDf2.shape[0], random_state=2518)
staticNegDf2

Unnamed: 0,person_id,visit_start_datetime,visit_end_datetime,visit_occurrence_id,visit_occurrence_concept_name,visit_duration_hrs,visit_source_concept_name,admitting_source_value,sepsis
2537278,1431861522,2133-11-18 15:39:00,2133-11-20 16:42:00,-1975115552,Observation Room,49.0,EU OBSERVATION,PHYSICIAN REFERRAL,0
17445,1510829215,2141-06-20 12:48:00,2141-07-06 18:18:00,-911534824,Emergency Room and Inpatient Visit,389.0,URGENT,TRANSFER FROM HOSPITAL,0
60425,-1839483425,2144-08-23 00:00:00,2144-08-28 11:25:00,1926566599,Emergency Room - Hospital,131.0,DIRECT EMER.,PHYSICIAN REFERRAL,0
3448693,700757124,2164-10-04 21:13:00,2164-10-13 14:45:00,1524310797,Inpatient Visit,209.0,ELECTIVE,PHYSICIAN REFERRAL,0
3038837,1657379429,2191-12-23 19:40:00,2191-12-26 18:40:00,-2137215013,Observation Room,71.0,OBSERVATION ADMIT,EMERGENCY ROOM,0
...,...,...,...,...,...,...,...,...,...
554302,1420004681,2128-10-02 10:51:00,2128-10-05 10:25:00,353761385,Emergency Room - Hospital,71.0,EW EMER.,EMERGENCY ROOM,0
1852377,-552742661,2135-12-15 23:05:00,2135-12-16 23:30:00,329916201,Emergency Room - Hospital,24.0,EW EMER.,PHYSICIAN REFERRAL,0
3113754,-24694497,2141-03-17 10:27:00,2141-03-23 20:09:00,-1382104285,Observation Room,153.0,OBSERVATION ADMIT,TRANSFER FROM HOSPITAL,0
652940,352023899,2150-08-10 06:14:00,2150-08-21 17:09:00,681179376,Emergency Room - Hospital,274.0,EW EMER.,EMERGENCY ROOM,0


In [45]:
staticDf2 = pd.concat([staticPosDf2, staticNegDf2])
staticDf2

Unnamed: 0,person_id,visit_start_datetime,visit_end_datetime,visit_occurrence_id,visit_occurrence_concept_name,visit_duration_hrs,visit_source_concept_name,admitting_source_value,sepsis
0,706761387,2188-03-21 20:22:00,2188-04-25 16:22:00,433162270,Emergency Room and Inpatient Visit,836.0,URGENT,TRANSFER FROM HOSPITAL,1
1,1683786691,2133-03-24 04:43:00,2133-04-01 21:33:00,-59688556,Emergency Room and Inpatient Visit,208.0,URGENT,TRANSFER FROM HOSPITAL,1
2,1225572303,2157-11-01 22:30:00,2157-11-02 14:35:00,-211754743,Emergency Room and Inpatient Visit,16.0,URGENT,TRANSFER FROM HOSPITAL,1
3,1225572303,2157-11-01 22:30:00,2157-11-02 14:35:00,-211754743,Emergency Room and Inpatient Visit,16.0,URGENT,TRANSFER FROM HOSPITAL,1
4,1930817712,2111-01-01 19:46:00,2111-01-03 04:24:00,919173749,Emergency Room and Inpatient Visit,32.0,URGENT,TRANSFER FROM HOSPITAL,1
...,...,...,...,...,...,...,...,...,...
554302,1420004681,2128-10-02 10:51:00,2128-10-05 10:25:00,353761385,Emergency Room - Hospital,71.0,EW EMER.,EMERGENCY ROOM,0
1852377,-552742661,2135-12-15 23:05:00,2135-12-16 23:30:00,329916201,Emergency Room - Hospital,24.0,EW EMER.,PHYSICIAN REFERRAL,0
3113754,-24694497,2141-03-17 10:27:00,2141-03-23 20:09:00,-1382104285,Observation Room,153.0,OBSERVATION ADMIT,TRANSFER FROM HOSPITAL,0
652940,352023899,2150-08-10 06:14:00,2150-08-21 17:09:00,681179376,Emergency Room - Hospital,274.0,EW EMER.,EMERGENCY ROOM,0


In [46]:
staticDf2.to_csv('data/static_data_v_2.0', index=False)

## Vitals data

In [8]:
vitalsQuery = """
    WITH vitals_stg_1 AS
    (
        SELECT
        person_id AS person_id,
        measurement_datetime AS measurement_datetime,
        unit_source_value AS unit_source_value,
        value_as_number AS value_as_number,
        cpt.concept_name AS concept_name
        FROM
        etl_dataset_temp.measurement mmt
        INNER JOIN omop_cdm.concept cpt
        ON cpt.concept_id = mmt.measurement_concept_id
        WHERE
        measurement_concept_id IN (
        3027018 -- Heart rate
        , 21492239, 3004249 -- Systolic blood pressure
        , 21492240, 3012888 -- Diastolic blood pressure
        , 3027598, 21492241 -- Mean blood pressure
        , 1175625, 3024171, 3007469 -- Respiratory rate
        , 3020891 -- Body temperature
        , 40762499 -- Oxygen saturation in Arterial blood by Pulse oximetry
        , 3016335 -- Glasgow coma score eye opening
        , 3009094 -- Glasgow coma score verbal
        , 3008223 -- Glasgow coma score motor
        )
        AND value_as_number IS NOT NULL
    )
    , vitals_stg_2 AS
    (
      SELECT
        person_id,
        measurement_datetime,
        unit_source_value,
        value_as_number,
        concept_name,
        ROW_NUMBER() OVER (PARTITION BY person_id, concept_name ORDER BY measurement_datetime) AS rn
      FROM vitals_stg_1
    )
    SELECT * FROM vitals_stg_2
    """
vitalsDf = pd.read_sql_query(vitalsQuery, con)
vitalsDf.head()

Unnamed: 0,person_id,measurement_datetime,unit_source_value,value_as_number,concept_name,rn
0,-2147469031,2144-12-31 18:48:00,°F,98.9,Body temperature,1
1,-2147469031,2144-12-31 20:00:00,°F,99.4,Body temperature,2
2,-2147469031,2145-01-01 00:00:00,°F,100.8,Body temperature,3
3,-2147469031,2145-01-01 04:00:00,°F,100.5,Body temperature,4
4,-2147469031,2145-01-01 05:00:00,°F,100.0,Body temperature,5


In [15]:
vitalsDf.to_csv('data/vitals_data_v_1.0', index=False)

## Lab results data

In [10]:
labsQuery = """
WITH labs_stg_1 AS
    (
        SELECT
        person_id AS person_id,
        measurement_datetime AS measurement_datetime,
        unit_source_value AS unit_source_value,
        value_as_number AS value_as_number,
        cpt.concept_name AS concept_name
        FROM
        etl_dataset_temp.measurement mmt
        INNER JOIN omop_cdm.concept cpt
        ON cpt.concept_id = mmt.measurement_concept_id
        WHERE
        measurement_concept_id IN (
        3047181	-- Lactate [Moles/volume] in Blood
		, 3013290	-- Carbon dioxide [Partial pressure] in Blood
		, 3024561	-- Albumin [Mass/volume] in Serum or Plasma
		, 3024629	-- Glucose [Mass/volume] in Urine by Test strip
		, 3008939	-- Band form neutrophils [#/volume] in Blood by Manual count
		, 3012501	-- Base excess in Blood by calculation
		, 3005456	-- Potassium [Moles/volume] in Blood
		, 3010421	-- pH of Blood
		, 3014576	-- Chloride [Moles/volume] in Serum or Plasma
		, 3031147	-- Carbon dioxide, total [Moles/volume] in Blood by calculation
		, 3024128	-- Bilirubin.total [Mass/volume] in Serum or Plasma
		, 3000905	-- Leukocytes [#/volume] in Blood by Automated count
		, 3016723	-- Creatinine [Mass/volume] in Serum or Plasma
		, 3022217	-- INR in Platelet poor plasma by Coagulation assay
		, 3019550	-- Sodium [Moles/volume] in Serum or Plasma
		, 3000285	-- Sodium [Moles/volume] in Blood
		, 3000963	-- Hemoglobin [Mass/volume] in Blood
		, 3000963	-- Hemoglobin [Mass/volume] in Blood
		, 3018672	-- pH of Body fluid
		, 3024929	-- Platelets [#/volume] in Blood by Automated count
		, 3013682	-- Urea nitrogen [Mass/volume] in Serum or Plasma
		, 3004501	-- Glucose [Mass/volume] in Serum or Plasma
		, 3018572	-- Chloride [Moles/volume] in Blood
		, 3027315	-- Oxygen [Partial pressure] in Blood
		, 3016293	-- Bicarbonate [Moles/volume] in Serum or Plasma
		, 3023103	-- Potassium [Moles/volume] in Serum or Plasma
		, 3037278	-- Anion gap 4 in Serum or Plasma
		, 3003282	-- Leukocytes [#/volume] in Blood by Manual count
		, 3023314	-- Hematocrit [Volume Fraction] of Blood by Automated count
		, 3013466	-- aPTT in Blood by Coagulation assay
        )
        AND value_as_number IS NOT NULL
    )
    , labs_stg_2 AS
    (
      SELECT
        person_id,
        measurement_datetime,
        unit_source_value,
        value_as_number,
        concept_name,
        ROW_NUMBER() OVER (PARTITION BY person_id, concept_name ORDER BY measurement_datetime) AS rn
      FROM labs_stg_1
    )
    SELECT * FROM labs_stg_2
    """
labsDf = pd.read_sql_query(labsQuery, con)
labsDf.head()

Unnamed: 0,person_id,measurement_datetime,unit_source_value,value_as_number,concept_name,rn
0,-2147469031,2144-12-31 16:00:00,mEq/L,20.0,Bicarbonate [Moles/volume] in Serum or Plasma,1
1,-2147469031,2145-01-01 01:47:00,mEq/L,19.0,Bicarbonate [Moles/volume] in Serum or Plasma,2
2,-2147469031,2144-12-31 16:00:00,mEq/L,102.0,Chloride [Moles/volume] in Serum or Plasma,1
3,-2147469031,2145-01-01 01:47:00,mEq/L,104.0,Chloride [Moles/volume] in Serum or Plasma,2
4,-2147469031,2144-12-31 16:00:00,mg/dL,3.2,Creatinine [Mass/volume] in Serum or Plasma,1


In [16]:
labsDf.to_csv('data/labs_data_v_1.0', index=False)

## Mortality Data

In [5]:
mortalityQuery = """
SELECT
vo.visit_occurrence_id AS visit_occurrence_id,
(vo.visit_end_datetime = dth.death_datetime) AS discharge_mortality,
(vo.visit_end_datetime + interval '1 day' >= dth.death_datetime) AS one_day_mortality,
(vo.visit_end_datetime + interval '2 day' >= dth.death_datetime) AS two_day_mortality,
(vo.visit_end_datetime + interval '30 day' >= dth.death_datetime) AS thirty_day_mortality,
(vo.visit_end_datetime + interval '60 day' >= dth.death_datetime) AS sixty_day_mortality,
(vo.visit_end_datetime + interval '90 day' >= dth.death_datetime) AS ninety_day_mortality
FROM
omop_cdm.visit_occurrence vo
INNER JOIN omop_cdm.person per
ON per.person_id = vo.person_id
INNER JOIN omop_cdm.death dth
ON dth.person_id = per.person_id
;
"""
mortalityDf = pd.read_sql_query(mortalityQuery, con)
mortalityDf.head()

Unnamed: 0,visit_occurrence_id,discharge_mortality,one_day_mortality,two_day_mortality,thirty_day_mortality,sixty_day_mortality,ninety_day_mortality
0,-1003022587,False,False,False,False,False,False
1,1282824216,False,False,False,False,False,False
2,1473749252,False,False,False,False,False,False
3,1125153184,False,False,False,False,False,False
4,-931918731,False,False,False,False,False,False


In [6]:
mortalityDf.to_csv('data/mortality_data_v_1.0', index=False)