## Import Library

In [1]:
import pandas as pd

from scipy import stats

# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2

# below imports are used to print out pretty pandas dataframes
from IPython.display import display, HTML

%matplotlib inline
plt.style.use('ggplot')

## Read Data

In [2]:
%%time
DATA_DIR = '../../data/'
DATA_FILE = DATA_DIR + 'all_hourly_data.h5'
vitals_labs = pd.read_hdf(DATA_FILE, 'vitals_labs')
vitals_labs_mean = pd.read_hdf(DATA_FILE, 'vitals_labs_mean')
interventions = pd.read_hdf(DATA_FILE, 'interventions')
patients = pd.read_hdf(DATA_FILE, 'patients')

CPU times: user 7.56 s, sys: 5.25 s, total: 12.8 s
Wall time: 12.8 s


In [3]:
# information used to create a database connection
sqluser = 'postgres'
dbname = 'mimic'
hostname = 'localhost'
port_number = 5434

# Connect to postgres with a copy of the MIMIC-III database
con = psycopg2.connect(dbname=dbname, user=sqluser, host=hostname, port=port_number, password='mysecretpassword')

# the below statement is prepended to queries to ensure they select from the right schema
# query_schema = 'set search_path to ' + schema_name + ';'

In [4]:
sepsis_query = \
"""
SELECT
DISTINCT
PAT.subject_id,
ADM.hadm_id,
ICU.icustay_id,
1 as "sepsis"
FROM patients PAT
INNER JOIN admissions ADM
ON PAT.subject_id = ADM.subject_id
LEFT JOIN icustays ICU
ON ICU.subject_id = ADM.subject_id AND ICU.hadm_id = ADM.hadm_id
INNER JOIN diagnoses_icd DIA_ICD
ON DIA_ICD.subject_id = ADM.subject_id AND DIA_ICD.hadm_id = ADM.hadm_id AND DIA_ICD.icd9_code in ('99591', '99592', '78552')
WHERE DATE_PART('year', AGE(ADM.admittime, PAT.dob)) > 18
;
"""

sepsis_patients = pd.read_sql_query(sepsis_query, con)

In [5]:
sepsis_patients.shape

(6130, 4)

In [6]:
vitals_labs.shape

(2200954, 312)

In [7]:
vitals_labs_mean.shape

(2200954, 104)

In [8]:
interventions.shape

(2200954, 14)

In [9]:
patients.shape

(34472, 28)

## Vitals

In [10]:
vitals_labs.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,LEVEL2,alanine aminotransferase,alanine aminotransferase,alanine aminotransferase,albumin,albumin,albumin,albumin ascites,albumin ascites,albumin ascites,albumin pleural,...,white blood cell count,white blood cell count urine,white blood cell count urine,white blood cell count urine,ph,ph,ph,ph urine,ph urine,ph urine
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Aggregation Function,count,mean,std,count,mean,std,count,mean,std,count,...,std,count,mean,std,count,mean,std,count,mean,std
subject_id,hadm_id,icustay_id,hours_in,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2
3,145834,211552,0,2.0,25.0,0.0,2.0,1.8,0.0,0.0,,,0.0,...,4.012837,0.0,,,9.0,7.4,0.147733,1.0,5.0,
3,145834,211552,1,0.0,,,0.0,,,0.0,,,0.0,...,,0.0,,,0.0,,,0.0,,
3,145834,211552,2,0.0,,,0.0,,,0.0,,,0.0,...,,0.0,,,3.0,7.26,0.0,0.0,,
3,145834,211552,3,0.0,,,0.0,,,0.0,,,0.0,...,,0.0,,,0.0,,,0.0,,
3,145834,211552,4,0.0,,,0.0,,,0.0,,,0.0,...,,0.0,,,0.0,,,0.0,,


In [11]:
vitals_labs_mean.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,LEVEL2,alanine aminotransferase,albumin,albumin ascites,albumin pleural,albumin urine,alkaline phosphate,anion gap,asparate aminotransferase,basophils,bicarbonate,...,total protein,total protein urine,troponin-i,troponin-t,venous pvo2,weight,white blood cell count,white blood cell count urine,ph,ph urine
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Aggregation Function,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,...,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
subject_id,hadm_id,icustay_id,hours_in,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2
3,145834,211552,0,25.0,1.8,,,,73.0,20.666667,69.0,,16.333333,...,,,,,,,14.842857,,7.4,5.0
3,145834,211552,1,,,,,,,,,,,...,,,,,,,,,,
3,145834,211552,2,,,,,,,,,,,...,,,,,,,,,7.26,
3,145834,211552,3,,,,,,,,,,,...,,,,,,,,,,
3,145834,211552,4,,,,,,,,,,,...,,,,,,,,,,


In [12]:
vitals_labs.columns

MultiIndex([(    'alanine aminotransferase', 'count'),
            (    'alanine aminotransferase',  'mean'),
            (    'alanine aminotransferase',   'std'),
            (                     'albumin', 'count'),
            (                     'albumin',  'mean'),
            (                     'albumin',   'std'),
            (             'albumin ascites', 'count'),
            (             'albumin ascites',  'mean'),
            (             'albumin ascites',   'std'),
            (             'albumin pleural', 'count'),
            ...
            (      'white blood cell count',   'std'),
            ('white blood cell count urine', 'count'),
            ('white blood cell count urine',  'mean'),
            ('white blood cell count urine',   'std'),
            (                          'ph', 'count'),
            (                          'ph',  'mean'),
            (                          'ph',   'std'),
            (                    'ph urine', 'cou

In [13]:
vitals_labs_mean.columns

MultiIndex([(    'alanine aminotransferase', 'mean'),
            (                     'albumin', 'mean'),
            (             'albumin ascites', 'mean'),
            (             'albumin pleural', 'mean'),
            (               'albumin urine', 'mean'),
            (          'alkaline phosphate', 'mean'),
            (                   'anion gap', 'mean'),
            (   'asparate aminotransferase', 'mean'),
            (                   'basophils', 'mean'),
            (                 'bicarbonate', 'mean'),
            ...
            (               'total protein', 'mean'),
            (         'total protein urine', 'mean'),
            (                  'troponin-i', 'mean'),
            (                  'troponin-t', 'mean'),
            (                 'venous pvo2', 'mean'),
            (                      'weight', 'mean'),
            (      'white blood cell count', 'mean'),
            ('white blood cell count urine', 'mean'),
            

## Interventions

In [14]:
interventions.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,vent,vaso,adenosine,dobutamine,dopamine,epinephrine,isuprel,milrinone,norepinephrine,phenylephrine,vasopressin,colloid_bolus,crystalloid_bolus,nivdurations
subject_id,hadm_id,icustay_id,hours_in,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
3,145834,211552,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
3,145834,211552,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0
3,145834,211552,2,1,1,0,0,1,0,0,0,0,1,0,0,0,0
3,145834,211552,3,1,1,0,0,0,0,0,0,0,1,0,0,0,0
3,145834,211552,4,1,1,0,0,0,0,0,0,1,1,0,0,0,0


In [15]:
interventions.columns

Index(['vent', 'vaso', 'adenosine', 'dobutamine', 'dopamine', 'epinephrine',
       'isuprel', 'milrinone', 'norepinephrine', 'phenylephrine',
       'vasopressin', 'colloid_bolus', 'crystalloid_bolus', 'nivdurations'],
      dtype='object')

## Patients

In [16]:
patients.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,gender,ethnicity,age,insurance,admittime,diagnosis_at_admission,dischtime,discharge_location,fullcode_first,dnr_first,...,intime,outtime,los_icu,admission_type,first_careunit,mort_icu,mort_hosp,hospital_expire_flag,hospstay_seq,readmission_30
subject_id,hadm_id,icustay_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
3,145834,211552,M,WHITE,76.526792,Medicare,2101-10-20 19:08:00,HYPOTENSION,2101-10-31 13:58:00,SNF,1.0,0.0,...,2101-10-20 19:10:11,2101-10-26 20:43:09,6.06456,EMERGENCY,MICU,0,0,0,1,0
4,185777,294638,F,WHITE,47.845047,Private,2191-03-16 00:28:00,"FEVER,DEHYDRATION,FAILURE TO THRIVE",2191-03-23 18:41:00,HOME WITH HOME IV PROVIDR,1.0,0.0,...,2191-03-16 00:29:31,2191-03-17 16:46:31,1.678472,EMERGENCY,MICU,0,0,0,1,0
6,107064,228232,F,WHITE,65.942297,Medicare,2175-05-30 07:15:00,CHRONIC RENAL FAILURE/SDA,2175-06-15 16:00:00,HOME HEALTH CARE,1.0,0.0,...,2175-05-30 21:30:54,2175-06-03 13:39:54,3.672917,ELECTIVE,SICU,0,0,0,1,0
9,150750,220597,M,UNKNOWN/NOT SPECIFIED,41.790228,Medicaid,2149-11-09 13:06:00,HEMORRHAGIC CVA,2149-11-14 10:15:00,DEAD/EXPIRED,1.0,0.0,...,2149-11-09 13:07:02,2149-11-14 20:52:14,5.323056,EMERGENCY,MICU,1,1,1,1,0
11,194540,229441,F,WHITE,50.148295,Private,2178-04-16 06:18:00,BRAIN MASS,2178-05-11 19:00:00,HOME HEALTH CARE,1.0,0.0,...,2178-04-16 06:19:32,2178-04-17 20:21:05,1.58441,EMERGENCY,SICU,0,0,0,1,0


In [17]:
patients.columns

Index(['gender', 'ethnicity', 'age', 'insurance', 'admittime',
       'diagnosis_at_admission', 'dischtime', 'discharge_location',
       'fullcode_first', 'dnr_first', 'fullcode', 'dnr', 'dnr_first_charttime',
       'timecmo_chart', 'cmo_first', 'cmo_last', 'cmo', 'deathtime', 'intime',
       'outtime', 'los_icu', 'admission_type', 'first_careunit', 'mort_icu',
       'mort_hosp', 'hospital_expire_flag', 'hospstay_seq', 'readmission_30'],
      dtype='object')

## Sepsis Patients

In [18]:
sepsis_patients.head()

Unnamed: 0,subject_id,hadm_id,icustay_id,sepsis
0,21,111970,216859.0,1
1,38,185910,248910.0,1
2,61,189535,217135.0,1
3,62,116009,216609.0,1
4,64,172056,232593.0,1


## Univariate Analysis

In [19]:
summary_data = []

### Age

In [20]:
sepsis_query = \
"""
SELECT
DISTINCT
PAT.subject_id,
ADM.hadm_id,
ICU.icustay_id,
PAT.gender,
DATE_PART('year', AGE(ADM.admittime, PAT.dob)) as "age"
FROM patients PAT
INNER JOIN admissions ADM
ON PAT.subject_id = ADM.subject_id
LEFT JOIN icustays ICU
ON ICU.subject_id = ADM.subject_id AND ICU.hadm_id = ADM.hadm_id
INNER JOIN diagnoses_icd DIA_ICD
ON DIA_ICD.subject_id = ADM.subject_id AND DIA_ICD.hadm_id = ADM.hadm_id AND DIA_ICD.icd9_code in ('99591', '99592', '78552')
WHERE DATE_PART('year', AGE(ADM.admittime, PAT.dob)) > 18
;
"""

sepsis_patients = pd.read_sql_query(sepsis_query, con)
sepsis_patients

Unnamed: 0,subject_id,hadm_id,icustay_id,gender,age
0,21,111970,216859.0,M,87.0
1,38,185910,248910.0,M,75.0
2,61,189535,217135.0,M,55.0
3,62,116009,216609.0,M,68.0
4,64,172056,232593.0,F,26.0
...,...,...,...,...,...
6125,99865,189346,259297.0,F,77.0
6126,99865,189346,284772.0,F,77.0
6127,99912,189380,236183.0,M,84.0
6128,99985,176670,279638.0,M,53.0


In [21]:
for (age_lower, age_upper) in [(0, 45), (45, 65), (65, 90), (90, 500)]:
    male_patients = [1 if (age > age_lower) and (age < age_upper) else 0 for age in sepsis_patients[sepsis_patients.gender=='M'].age]
    female_patients = [1 if (age > age_lower) and (age < age_upper) else 0 for age in sepsis_patients[sepsis_patients.gender=='F'].age]
    t_value, p_value = stats.ttest_ind(
        male_patients,
        female_patients
    )

    summary_data.append(
        [
            str(age_lower) + ' < Age < ' + str(age_upper),
            sum(male_patients),
            sum(female_patients),
            t_value,
            p_value
        ]
    )
summary_data

[['0 < Age < 45', 336, 268, -0.3654716886674092, 0.7147719504287686],
 ['45 < Age < 65', 1175, 832, 2.4409561288957695, 0.014676550005472406],
 ['65 < Age < 90', 1647, 1256, 0.6061316111905368, 0.5444498120696595],
 ['90 < Age < 500', 181, 249, -6.190866135828064, 6.371023383169715e-10]]

### Ethnicity

In [22]:
def categorize_ethnicity(ethnicity):
    if 'ASIAN' in ethnicity:
        ethnicity = 'ASIAN'
    elif 'WHITE' in ethnicity:
        ethnicity = 'WHITE'
    elif 'HISPANIC' in ethnicity:
        ethnicity = 'HISPANIC/LATINO'
    elif 'BLACK' in ethnicity:
        ethnicity = 'BLACK'
    else:
        ethnicity = 'OTHER'
    return ethnicity

In [23]:
sepsis_query = \
"""
SELECT
DISTINCT
PAT.subject_id,
ADM.hadm_id,
ICU.icustay_id,
PAT.gender,
ADM.ethnicity as "ethnicity"
FROM patients PAT
INNER JOIN admissions ADM
ON PAT.subject_id = ADM.subject_id
LEFT JOIN icustays ICU
ON ICU.subject_id = ADM.subject_id AND ICU.hadm_id = ADM.hadm_id
INNER JOIN diagnoses_icd DIA_ICD
ON DIA_ICD.subject_id = ADM.subject_id AND DIA_ICD.hadm_id = ADM.hadm_id AND DIA_ICD.icd9_code in ('99591', '99592', '78552')
WHERE DATE_PART('year', AGE(ADM.admittime, PAT.dob)) > 18
;
"""

sepsis_patients = pd.read_sql_query(sepsis_query, con)
sepsis_patients

Unnamed: 0,subject_id,hadm_id,icustay_id,gender,ethnicity
0,21,111970,216859.0,M,WHITE
1,38,185910,248910.0,M,WHITE
2,61,189535,217135.0,M,WHITE
3,62,116009,216609.0,M,PATIENT DECLINED TO ANSWER
4,64,172056,232593.0,F,BLACK/AFRICAN AMERICAN
...,...,...,...,...,...
6125,99865,189346,259297.0,F,WHITE
6126,99865,189346,284772.0,F,WHITE
6127,99912,189380,236183.0,M,WHITE
6128,99985,176670,279638.0,M,WHITE


In [24]:
sepsis_patients['ethnicity'] = sepsis_patients.ethnicity.apply(categorize_ethnicity)
sepsis_patients

Unnamed: 0,subject_id,hadm_id,icustay_id,gender,ethnicity
0,21,111970,216859.0,M,WHITE
1,38,185910,248910.0,M,WHITE
2,61,189535,217135.0,M,WHITE
3,62,116009,216609.0,M,OTHER
4,64,172056,232593.0,F,BLACK
...,...,...,...,...,...
6125,99865,189346,259297.0,F,WHITE
6126,99865,189346,284772.0,F,WHITE
6127,99912,189380,236183.0,M,WHITE
6128,99985,176670,279638.0,M,WHITE


In [25]:
for ethnicity in ['ASIAN', 'WHITE', 'HISPANIC/LATINO', 'BLACK', 'OTHER']:
    male_patients = [1 if (e == ethnicity) else 0 for e in sepsis_patients[sepsis_patients.gender=='M'].ethnicity]
    female_patients = [1 if (e == ethnicity) else 0 for e in sepsis_patients[sepsis_patients.gender=='F'].ethnicity]
    t_value, p_value = stats.ttest_ind(
        male_patients,
        female_patients
    )

    summary_data.append(
        [
            'Ethnicity = ' + str(ethnicity),
            sum(male_patients),
            sum(female_patients),
            t_value,
            p_value
        ]
    )
summary_data

[['0 < Age < 45', 336, 268, -0.3654716886674092, 0.7147719504287686],
 ['45 < Age < 65', 1175, 832, 2.4409561288957695, 0.014676550005472406],
 ['65 < Age < 90', 1647, 1256, 0.6061316111905368, 0.5444498120696595],
 ['90 < Age < 500', 181, 249, -6.190866135828064, 6.371023383169715e-10],
 ['Ethnicity = ASIAN', 112, 82, 0.400164663975862, 0.6890491875454969],
 ['Ethnicity = WHITE', 2592, 1915, 3.108697572684383, 0.0018877515737622708],
 ['Ethnicity = HISPANIC/LATINO',
  134,
  65,
  3.184888527719864,
  0.001455353245638791],
 ['Ethnicity = BLACK', 271, 369, -7.571989888199857, 4.218962978887739e-14],
 ['Ethnicity = OTHER', 344, 246, 1.017641810500703, 0.3088883847666546]]

### First Careunit

In [26]:
sepsis_query = \
"""
SELECT
DISTINCT
PAT.subject_id,
ADM.hadm_id,
ICU.icustay_id,
PAT.gender,
ICU.first_careunit as "first_careunit"
FROM patients PAT
INNER JOIN admissions ADM
ON PAT.subject_id = ADM.subject_id
LEFT JOIN icustays ICU
ON ICU.subject_id = ADM.subject_id AND ICU.hadm_id = ADM.hadm_id
INNER JOIN diagnoses_icd DIA_ICD
ON DIA_ICD.subject_id = ADM.subject_id AND DIA_ICD.hadm_id = ADM.hadm_id AND DIA_ICD.icd9_code in ('99591', '99592', '78552')
WHERE DATE_PART('year', AGE(ADM.admittime, PAT.dob)) > 18
;
"""

sepsis_patients = pd.read_sql_query(sepsis_query, con)
sepsis_patients

Unnamed: 0,subject_id,hadm_id,icustay_id,gender,first_careunit
0,21,111970,216859.0,M,MICU
1,38,185910,248910.0,M,CCU
2,61,189535,217135.0,M,MICU
3,62,116009,216609.0,M,CCU
4,64,172056,232593.0,F,MICU
...,...,...,...,...,...
6125,99865,189346,259297.0,F,SICU
6126,99865,189346,284772.0,F,SICU
6127,99912,189380,236183.0,M,MICU
6128,99985,176670,279638.0,M,MICU


In [27]:
for careunit in ['CCU', 'MICU', 'CSRU', 'SICU']:
    male_patients = [1 if (fcu == careunit) else 0 for fcu in sepsis_patients[sepsis_patients.gender=='M'].first_careunit]
    female_patients = [1 if (fcu == careunit) else 0 for fcu in sepsis_patients[sepsis_patients.gender=='F'].first_careunit]
    t_value, p_value = stats.ttest_ind(
        male_patients,
        female_patients
    )

    summary_data.append(
        [
            'Careunit = ' + str(careunit),
            sum(male_patients),
            sum(female_patients),
            t_value,
            p_value
        ]
    )
summary_data

[['0 < Age < 45', 336, 268, -0.3654716886674092, 0.7147719504287686],
 ['45 < Age < 65', 1175, 832, 2.4409561288957695, 0.014676550005472406],
 ['65 < Age < 90', 1647, 1256, 0.6061316111905368, 0.5444498120696595],
 ['90 < Age < 500', 181, 249, -6.190866135828064, 6.371023383169715e-10],
 ['Ethnicity = ASIAN', 112, 82, 0.400164663975862, 0.6890491875454969],
 ['Ethnicity = WHITE', 2592, 1915, 3.108697572684383, 0.0018877515737622708],
 ['Ethnicity = HISPANIC/LATINO',
  134,
  65,
  3.184888527719864,
  0.001455353245638791],
 ['Ethnicity = BLACK', 271, 369, -7.571989888199857, 4.218962978887739e-14],
 ['Ethnicity = OTHER', 344, 246, 1.017641810500703, 0.3088883847666546],
 ['Careunit = CCU', 318, 238, 0.4310545345989765, 0.6664438798443675],
 ['Careunit = MICU', 2224, 1851, -3.900640956378968, 9.696478539271773e-05],
 ['Careunit = CSRU', 151, 78, 2.989938940590666, 0.002801419232908467],
 ['Careunit = SICU', 504, 349, 1.7493406347323013, 0.08028221369021499]]

### Marital Status

In [62]:
def categorize_maritalstatus(maritalstatus):
    if maritalstatus:
        if 'DIVORCED' in maritalstatus:
            maritalstatus = 'SINGLE/SEPARATED/DIVORCED/WIDOW'
        elif 'SEPARATED' in maritalstatus:
            maritalstatus = 'SINGLE/SEPARATED/DIVORCED/WIDOW'
        elif 'SINGLE' in maritalstatus:
            maritalstatus = 'SINGLE/SEPARATED/DIVORCED/WIDOW'
        elif 'WIDOWED' in maritalstatus:
            maritalstatus = 'SINGLE/SEPARATED/DIVORCED/WIDOW'
        elif 'LIFE PARTNER' in maritalstatus:
            maritalstatus = 'MARRIED'
        elif 'MARRIED' in maritalstatus:
            maritalstatus = 'MARRIED'
        else:
            maritalstatus = 'OTHER'
    else:
        maritalstatus = 'OTHER'
    return maritalstatus

In [63]:
sepsis_query = \
"""
SELECT
DISTINCT
PAT.subject_id,
ADM.hadm_id,
ICU.icustay_id,
PAT.gender,
ADM.marital_status as "marital_status"
FROM patients PAT
INNER JOIN admissions ADM
ON PAT.subject_id = ADM.subject_id
LEFT JOIN icustays ICU
ON ICU.subject_id = ADM.subject_id AND ICU.hadm_id = ADM.hadm_id
INNER JOIN diagnoses_icd DIA_ICD
ON DIA_ICD.subject_id = ADM.subject_id AND DIA_ICD.hadm_id = ADM.hadm_id AND DIA_ICD.icd9_code in ('99591', '99592', '78552')
WHERE DATE_PART('year', AGE(ADM.admittime, PAT.dob)) > 18
;
"""

sepsis_patients = pd.read_sql_query(sepsis_query, con)
sepsis_patients

Unnamed: 0,subject_id,hadm_id,icustay_id,gender,marital_status
0,21,111970,216859.0,M,MARRIED
1,38,185910,248910.0,M,WIDOWED
2,61,189535,217135.0,M,MARRIED
3,62,116009,216609.0,M,MARRIED
4,64,172056,232593.0,F,SINGLE
...,...,...,...,...,...
6125,99865,189346,259297.0,F,DIVORCED
6126,99865,189346,284772.0,F,DIVORCED
6127,99912,189380,236183.0,M,MARRIED
6128,99985,176670,279638.0,M,MARRIED


In [66]:
sepsis_patients['marital_status'] = sepsis_patients.marital_status.apply(categorize_maritalstatus)
sepsis_patients

Unnamed: 0,subject_id,hadm_id,icustay_id,gender,marital_status
0,21,111970,216859.0,M,MARRIED
1,38,185910,248910.0,M,SINGLE/SEPARATED/DIVORCED/WIDOW
2,61,189535,217135.0,M,MARRIED
3,62,116009,216609.0,M,MARRIED
4,64,172056,232593.0,F,SINGLE/SEPARATED/DIVORCED/WIDOW
...,...,...,...,...,...
6125,99865,189346,259297.0,F,SINGLE/SEPARATED/DIVORCED/WIDOW
6126,99865,189346,284772.0,F,SINGLE/SEPARATED/DIVORCED/WIDOW
6127,99912,189380,236183.0,M,MARRIED
6128,99985,176670,279638.0,M,MARRIED


In [67]:
for marital_status in ['SINGLE/SEPARATED/DIVORCED/WIDOW', 'MARRIED', 'OTHER']:
    male_patients = [1 if (ms == marital_status) else 0 for ms in sepsis_patients[sepsis_patients.gender=='M'].marital_status]
    female_patients = [1 if (ms == marital_status) else 0 for ms in sepsis_patients[sepsis_patients.gender=='F'].marital_status]
    t_value, p_value = stats.ttest_ind(
        male_patients,
        female_patients
    )

    summary_data.append(
        [
            'Maritalstatus = ' + str(marital_status),
            sum(male_patients),
            sum(female_patients),
            t_value,
            p_value
        ]
    )
summary_data

[['0 < Age < 45', 336, 268, -0.3654716886674092, 0.7147719504287686],
 ['45 < Age < 65', 1175, 832, 2.4409561288957695, 0.014676550005472406],
 ['65 < Age < 90', 1647, 1256, 0.6061316111905368, 0.5444498120696595],
 ['90 < Age < 500', 181, 249, -6.190866135828064, 6.371023383169715e-10],
 ['Ethnicity = ASIAN', 112, 82, 0.400164663975862, 0.6890491875454969],
 ['Ethnicity = WHITE', 2592, 1915, 3.108697572684383, 0.0018877515737622708],
 ['Ethnicity = HISPANIC/LATINO',
  134,
  65,
  3.184888527719864,
  0.001455353245638791],
 ['Ethnicity = BLACK', 271, 369, -7.571989888199857, 4.218962978887739e-14],
 ['Ethnicity = OTHER', 344, 246, 1.017641810500703, 0.3088883847666546],
 ['Careunit = CCU', 318, 238, 0.4310545345989765, 0.6664438798443675],
 ['Careunit = MICU', 2224, 1851, -3.900640956378968, 9.696478539271773e-05],
 ['Careunit = CSRU', 151, 78, 2.989938940590666, 0.002801419232908467],
 ['Careunit = SICU', 504, 349, 1.7493406347323013, 0.08028221369021499],
 ['Maritalstatus = SINGLE/

### Insurance

In [68]:
def categorize_insurance(insurance):
    if insurance:
        if 'Government' in insurance:
            insurance = 'GOVERNMENT'
        elif 'Private' in insurance:
            insurance = 'PRIVATE'
        elif 'Self Pay' in insurance:
            insurance = 'SELF PAY'
        elif 'Medicare' in insurance:
            insurance = 'MEDICARE/MEDICAID'
        elif 'Medicaid' in insurance:
            insurance = 'MEDICARE/MEDICAID'
        else:
            insurance = 'OTHER'
    else:
        insurance = 'OTHER'
    return insurance

In [69]:
sepsis_query = \
"""
SELECT
DISTINCT
PAT.subject_id,
ADM.hadm_id,
ICU.icustay_id,
PAT.gender,
ADM.insurance as "insurance"
FROM patients PAT
INNER JOIN admissions ADM
ON PAT.subject_id = ADM.subject_id
LEFT JOIN icustays ICU
ON ICU.subject_id = ADM.subject_id AND ICU.hadm_id = ADM.hadm_id
INNER JOIN diagnoses_icd DIA_ICD
ON DIA_ICD.subject_id = ADM.subject_id AND DIA_ICD.hadm_id = ADM.hadm_id AND DIA_ICD.icd9_code in ('99591', '99592', '78552')
WHERE DATE_PART('year', AGE(ADM.admittime, PAT.dob)) > 18
;
"""

sepsis_patients = pd.read_sql_query(sepsis_query, con)
sepsis_patients

Unnamed: 0,subject_id,hadm_id,icustay_id,gender,insurance
0,21,111970,216859.0,M,Medicare
1,38,185910,248910.0,M,Medicare
2,61,189535,217135.0,M,Private
3,62,116009,216609.0,M,Medicare
4,64,172056,232593.0,F,Medicaid
...,...,...,...,...,...
6125,99865,189346,259297.0,F,Medicare
6126,99865,189346,284772.0,F,Medicare
6127,99912,189380,236183.0,M,Medicare
6128,99985,176670,279638.0,M,Private


In [71]:
sepsis_patients['insurance'] = sepsis_patients.insurance.apply(categorize_insurance)
sepsis_patients

Unnamed: 0,subject_id,hadm_id,icustay_id,gender,insurance
0,21,111970,216859.0,M,MEDICARE/MEDICAID
1,38,185910,248910.0,M,MEDICARE/MEDICAID
2,61,189535,217135.0,M,PRIVATE
3,62,116009,216609.0,M,MEDICARE/MEDICAID
4,64,172056,232593.0,F,MEDICARE/MEDICAID
...,...,...,...,...,...
6125,99865,189346,259297.0,F,MEDICARE/MEDICAID
6126,99865,189346,284772.0,F,MEDICARE/MEDICAID
6127,99912,189380,236183.0,M,MEDICARE/MEDICAID
6128,99985,176670,279638.0,M,PRIVATE


In [74]:
for insurance in ['MEDICARE/MEDICAID', 'PRIVATE', 'GOVERNMENT', 'SELF PAY']:
    male_patients = [1 if (i == insurance) else 0 for i in sepsis_patients[sepsis_patients.gender=='M'].insurance]
    female_patients = [1 if (i == insurance) else 0 for i in sepsis_patients[sepsis_patients.gender=='F'].insurance]
    t_value, p_value = stats.ttest_ind(
        male_patients,
        female_patients
    )

    summary_data.append(
        [
            'Insurance = ' + str(insurance),
            sum(male_patients),
            sum(female_patients),
            t_value,
            p_value
        ]
    )
summary_data

[['0 < Age < 45', 336, 268, -0.3654716886674092, 0.7147719504287686],
 ['45 < Age < 65', 1175, 832, 2.4409561288957695, 0.014676550005472406],
 ['65 < Age < 90', 1647, 1256, 0.6061316111905368, 0.5444498120696595],
 ['90 < Age < 500', 181, 249, -6.190866135828064, 6.371023383169715e-10],
 ['Ethnicity = ASIAN', 112, 82, 0.400164663975862, 0.6890491875454969],
 ['Ethnicity = WHITE', 2592, 1915, 3.108697572684383, 0.0018877515737622708],
 ['Ethnicity = HISPANIC/LATINO',
  134,
  65,
  3.184888527719864,
  0.001455353245638791],
 ['Ethnicity = BLACK', 271, 369, -7.571989888199857, 4.218962978887739e-14],
 ['Ethnicity = OTHER', 344, 246, 1.017641810500703, 0.3088883847666546],
 ['Careunit = CCU', 318, 238, 0.4310545345989765, 0.6664438798443675],
 ['Careunit = MICU', 2224, 1851, -3.900640956378968, 9.696478539271773e-05],
 ['Careunit = CSRU', 151, 78, 2.989938940590666, 0.002801419232908467],
 ['Careunit = SICU', 504, 349, 1.7493406347323013, 0.08028221369021499],
 ['Maritalstatus = SINGLE/

### Admission Type

In [75]:
sepsis_query = \
"""
SELECT
DISTINCT
PAT.subject_id,
ADM.hadm_id,
ICU.icustay_id,
PAT.gender,
ADM.admission_type as "admission_type"
FROM patients PAT
INNER JOIN admissions ADM
ON PAT.subject_id = ADM.subject_id
LEFT JOIN icustays ICU
ON ICU.subject_id = ADM.subject_id AND ICU.hadm_id = ADM.hadm_id
INNER JOIN diagnoses_icd DIA_ICD
ON DIA_ICD.subject_id = ADM.subject_id AND DIA_ICD.hadm_id = ADM.hadm_id AND DIA_ICD.icd9_code in ('99591', '99592', '78552')
WHERE DATE_PART('year', AGE(ADM.admittime, PAT.dob)) > 18
;
"""

sepsis_patients = pd.read_sql_query(sepsis_query, con)
sepsis_patients

Unnamed: 0,subject_id,hadm_id,icustay_id,gender,admission_type
0,21,111970,216859.0,M,EMERGENCY
1,38,185910,248910.0,M,EMERGENCY
2,61,189535,217135.0,M,EMERGENCY
3,62,116009,216609.0,M,EMERGENCY
4,64,172056,232593.0,F,EMERGENCY
...,...,...,...,...,...
6125,99865,189346,259297.0,F,ELECTIVE
6126,99865,189346,284772.0,F,ELECTIVE
6127,99912,189380,236183.0,M,EMERGENCY
6128,99985,176670,279638.0,M,EMERGENCY


In [76]:
for admission_type in ['URGENT', 'ELECTIVE', 'EMERGENCY']:
    male_patients = [1 if (at == admission_type) else 0 for at in sepsis_patients[sepsis_patients.gender=='M'].admission_type]
    female_patients = [1 if (at == admission_type) else 0 for at in sepsis_patients[sepsis_patients.gender=='F'].admission_type]
    t_value, p_value = stats.ttest_ind(
        male_patients,
        female_patients
    )

    summary_data.append(
        [
            'Admission Type = ' + str(admission_type),
            sum(male_patients),
            sum(female_patients),
            t_value,
            p_value
        ]
    )
summary_data

[['0 < Age < 45', 336, 268, -0.3654716886674092, 0.7147719504287686],
 ['45 < Age < 65', 1175, 832, 2.4409561288957695, 0.014676550005472406],
 ['65 < Age < 90', 1647, 1256, 0.6061316111905368, 0.5444498120696595],
 ['90 < Age < 500', 181, 249, -6.190866135828064, 6.371023383169715e-10],
 ['Ethnicity = ASIAN', 112, 82, 0.400164663975862, 0.6890491875454969],
 ['Ethnicity = WHITE', 2592, 1915, 3.108697572684383, 0.0018877515737622708],
 ['Ethnicity = HISPANIC/LATINO',
  134,
  65,
  3.184888527719864,
  0.001455353245638791],
 ['Ethnicity = BLACK', 271, 369, -7.571989888199857, 4.218962978887739e-14],
 ['Ethnicity = OTHER', 344, 246, 1.017641810500703, 0.3088883847666546],
 ['Careunit = CCU', 318, 238, 0.4310545345989765, 0.6664438798443675],
 ['Careunit = MICU', 2224, 1851, -3.900640956378968, 9.696478539271773e-05],
 ['Careunit = CSRU', 151, 78, 2.989938940590666, 0.002801419232908467],
 ['Careunit = SICU', 504, 349, 1.7493406347323013, 0.08028221369021499],
 ['Maritalstatus = SINGLE/

### Admission Location

In [77]:
def categorize_admission_location(admission_location):
    if admission_location:
        if 'REFERRAL' in admission_location:
            admission_location = 'REFERRAL'
        elif 'EMERGENCY' in admission_location:
            admission_location = 'EMERGENCY'
        elif 'TRANSFER' in admission_location:
            admission_location = 'TRANSFER'
        elif 'TRSF' in admission_location:
            admission_location = 'TRANSFER'
        else:
            admission_location = 'OTHER'
    else:
        admission_location = 'OTHER'
    return admission_location

In [78]:
sepsis_query = \
"""
SELECT
DISTINCT
PAT.subject_id,
ADM.hadm_id,
ICU.icustay_id,
PAT.gender,
ADM.admission_location as "admission_location"
FROM patients PAT
INNER JOIN admissions ADM
ON PAT.subject_id = ADM.subject_id
LEFT JOIN icustays ICU
ON ICU.subject_id = ADM.subject_id AND ICU.hadm_id = ADM.hadm_id
INNER JOIN diagnoses_icd DIA_ICD
ON DIA_ICD.subject_id = ADM.subject_id AND DIA_ICD.hadm_id = ADM.hadm_id AND DIA_ICD.icd9_code in ('99591', '99592', '78552')
WHERE DATE_PART('year', AGE(ADM.admittime, PAT.dob)) > 18
;
"""

sepsis_patients = pd.read_sql_query(sepsis_query, con)
sepsis_patients

Unnamed: 0,subject_id,hadm_id,icustay_id,gender,admission_location
0,21,111970,216859.0,M,EMERGENCY ROOM ADMIT
1,38,185910,248910.0,M,TRANSFER FROM HOSP/EXTRAM
2,61,189535,217135.0,M,CLINIC REFERRAL/PREMATURE
3,62,116009,216609.0,M,EMERGENCY ROOM ADMIT
4,64,172056,232593.0,F,EMERGENCY ROOM ADMIT
...,...,...,...,...,...
6125,99865,189346,259297.0,F,PHYS REFERRAL/NORMAL DELI
6126,99865,189346,284772.0,F,PHYS REFERRAL/NORMAL DELI
6127,99912,189380,236183.0,M,EMERGENCY ROOM ADMIT
6128,99985,176670,279638.0,M,EMERGENCY ROOM ADMIT


In [79]:
sepsis_patients['admission_location'] = sepsis_patients.admission_location.apply(categorize_admission_location)
sepsis_patients

Unnamed: 0,subject_id,hadm_id,icustay_id,gender,admission_location
0,21,111970,216859.0,M,EMERGENCY
1,38,185910,248910.0,M,TRANSFER
2,61,189535,217135.0,M,REFERRAL
3,62,116009,216609.0,M,EMERGENCY
4,64,172056,232593.0,F,EMERGENCY
...,...,...,...,...,...
6125,99865,189346,259297.0,F,REFERRAL
6126,99865,189346,284772.0,F,REFERRAL
6127,99912,189380,236183.0,M,EMERGENCY
6128,99985,176670,279638.0,M,EMERGENCY


In [80]:
for admission_location in ['REFERRAL', 'EMERGENCY', 'TRANSFER']:
    male_patients = [1 if (al == admission_location) else 0 for al in sepsis_patients[sepsis_patients.gender=='M'].admission_location]
    female_patients = [1 if (al == admission_location) else 0 for al in sepsis_patients[sepsis_patients.gender=='F'].admission_location]
    t_value, p_value = stats.ttest_ind(
        male_patients,
        female_patients
    )

    summary_data.append(
        [
            'Admission Location = ' + str(admission_location),
            sum(male_patients),
            sum(female_patients),
            t_value,
            p_value
        ]
    )
summary_data

[['0 < Age < 45', 336, 268, -0.3654716886674092, 0.7147719504287686],
 ['45 < Age < 65', 1175, 832, 2.4409561288957695, 0.014676550005472406],
 ['65 < Age < 90', 1647, 1256, 0.6061316111905368, 0.5444498120696595],
 ['90 < Age < 500', 181, 249, -6.190866135828064, 6.371023383169715e-10],
 ['Ethnicity = ASIAN', 112, 82, 0.400164663975862, 0.6890491875454969],
 ['Ethnicity = WHITE', 2592, 1915, 3.108697572684383, 0.0018877515737622708],
 ['Ethnicity = HISPANIC/LATINO',
  134,
  65,
  3.184888527719864,
  0.001455353245638791],
 ['Ethnicity = BLACK', 271, 369, -7.571989888199857, 4.218962978887739e-14],
 ['Ethnicity = OTHER', 344, 246, 1.017641810500703, 0.3088883847666546],
 ['Careunit = CCU', 318, 238, 0.4310545345989765, 0.6664438798443675],
 ['Careunit = MICU', 2224, 1851, -3.900640956378968, 9.696478539271773e-05],
 ['Careunit = CSRU', 151, 78, 2.989938940590666, 0.002801419232908467],
 ['Careunit = SICU', 504, 349, 1.7493406347323013, 0.08028221369021499],
 ['Maritalstatus = SINGLE/

## Summary

In [28]:
summary_df = pd.DataFrame(summary_data)
summary_df

Unnamed: 0,0,1,2,3,4
0,0 < Age < 45,336,268,-0.365472,0.714772
1,45 < Age < 65,1175,832,2.440956,0.01467655
2,65 < Age < 90,1647,1256,0.606132,0.5444498
3,90 < Age < 500,181,249,-6.190866,6.371023e-10
4,Ethnicity = ASIAN,112,82,0.400165,0.6890492
5,Ethnicity = WHITE,2592,1915,3.108698,0.001887752
6,Ethnicity = HISPANIC/LATINO,134,65,3.184889,0.001455353
7,Ethnicity = BLACK,271,369,-7.57199,4.218963e-14
8,Ethnicity = OTHER,344,246,1.017642,0.3088884
9,Careunit = CCU,318,238,0.431055,0.6664439
