## Supplementary Figure 1 Recreation

#### Loading the subset from Postgres local server

In [None]:
!pip install SQLAlchemy
!pip install psycopg2
!pip install pandas

In [None]:
from sqlalchemy import create_engine
from sqlalchemy import inspect
engine = create_engine('postgresql+psycopg2://postgres:postgres@localhost:5432/mimic')

In [None]:
inspector = inspect(engine)
subset_query = open('data_subset_query_v1.sql', 'r').read()

In [None]:
import pandas as pd

In [None]:
df = pd.read_sql_query(subset_query, engine)

In [None]:
display(df.head())
display(df.shape)
display(df.columns)

In [None]:
# Drop extraneous columns
df = df.loc[:,~df.columns.duplicated()]
df.columns

In [None]:
df['admission_type'].unique()

In [None]:
morbidities = set(['congestive_heart_failure',
       'cardiac_arrhythmias', 'valvular_disease', 'pulmonary_circulation',
       'peripheral_vascular', 'hypertension', 'paralysis',
       'other_neurological', 'chronic_pulmonary', 'diabetes_uncomplicated',
       'diabetes_complicated', 'hypothyroidism', 'renal_failure',
       'liver_disease', 'peptic_ulcer', 'aids', 'lymphoma',
       'metastatic_cancer', 'solid_tumor', 'rheumatoid_arthritis',
       'coagulopathy', 'obesity', 'weight_loss', 'fluid_electrolyte',
       'blood_loss_anemia', 'deficiency_anemias', 'alcohol_abuse',
       'drug_abuse', 'psychoses', 'depression'])

# Creating Morbidity Count Column
def count_morbidities(row, morbidities):
    count = 0
    for morbidity in morbidities:
        if row[morbidity] == 1:
            count += 1
    return count


df['num_morbidity'] = df.apply(count_morbidities, args=(morbidities,), axis=1)
df.head()

In [None]:
## All patient statistics
num_patients = df.shape[0]

median_morbidities = df['num_morbidity'].median()
morbidities_Q1 = df['num_morbidity'].quantile(0.25)
morbidities_Q3 = df['num_morbidity'].quantile(0.75)

prop_with_multiple_morbidities = (df[df['num_morbidity'] > 1].shape[0] / num_patients)
CI_percent_with_multiple_morbidities = 1.96 * ((prop_with_multiple_morbidities) * (1 - prop_with_multiple_morbidities) / num_patients) ** 0.5

sofa_score = df['sofa'].mean()
CI_sofa_score = 1.96 * df['sofa'].std() / num_patients ** 0.5

LOS_score_icu = df['los_icu'].mean()
CI_LOS_score_icu = 1.96 * df['los_icu'].std() / num_patients ** 0.5

LOS_score_hospital = df['los_hospital'].mean()
CI_LOS_score_hospital = 1.96 * df['los_hospital'].std() / num_patients ** 0.5

prop_mortality = df['hospital_expire_flag'].mean()
CI_mortality = 1.96 * (prop_mortality * (1 - prop_mortality) / num_patients) ** 0.5

print(f'Number of patients: {num_patients}')
print(f'Median number of morbidities: {median_morbidities} (Q1: {morbidities_Q1}, Q3: {morbidities_Q3})')
print(f'Percent of patients with multiple morbidities: {prop_with_multiple_morbidities * 100:.2f}% ± {CI_percent_with_multiple_morbidities * 100:.2f}')
print(f'Mean SOFA score: {sofa_score:.2f} ± {CI_sofa_score:.2f}')
print(f'Mean Length of Stay (ICU): {LOS_score_icu:.2f} ± {CI_LOS_score_icu:.2f}')
print(f'Mean Length of Stay (Hospital): {LOS_score_hospital:.2f} ± {CI_LOS_score_hospital:.2f}')
print(f'Percent Mortality: {prop_mortality * 100:.2f}% ± {CI_mortality * 100:.2f}')

In [None]:
## Creating a function for each statistic

def get_num_patients(df):
    return df.shape[0]

def get_median_morbidities(df):
    return df['num_morbidity'].median()

def get_morbidities_CI(df):
    morbidities = df['num_morbidity']
    morbidities_Q1 = morbidities.quantile(0.25)
    morbidities_Q3 = morbidities.quantile(0.75)
    return morbidities_Q1, morbidities_Q3

def get_prop_with_multiple_morbidities(df):
    num_patients = get_num_patients(df)
    prop_with_multiple_morbidities = (df[df['num_morbidity'] > 1].shape[0] / num_patients)
    CI_percent_with_multiple_morbidities = 1.96 * ((prop_with_multiple_morbidities) * (1 - prop_with_multiple_morbidities) / num_patients) ** 0.5
    return prop_with_multiple_morbidities, CI_percent_with_multiple_morbidities

def get_sofa_score(df):
    return df['sofa'].mean()

def get_sofa_CI(df):
    num_patients = get_num_patients(df)
    CI_sofa_score = 1.96 * df['sofa'].std() / num_patients ** 0.5
    return CI_sofa_score

def get_LOS_icu(df):
    return df['los_icu'].mean()

def get_LOS_icu_CI(df):
    num_patients = get_num_patients(df)
    CI_LOS_score_icu = 1.96 * df['los_icu'].std() / num_patients ** 0.5
    return CI_LOS_score_icu

def get_LOS_hospital(df):
    return df['los_hospital'].mean()

def get_LOS_hospital_CI(df):
    num_patients = get_num_patients(df)
    CI_LOS_score_hospital = 1.96 * df['los_hospital'].std() / num_patients ** 0.5
    return CI_LOS_score_hospital

def get_mortality(df):
    return df['hospital_expire_flag'].mean()

def get_mortality_CI(df):
    num_patients = get_num_patients(df)
    prop_mortality = df['hospital_expire_flag'].mean()
    CI_mortality = 1.96 * (prop_mortality * (1 - prop_mortality) / num_patients) ** 0.5
    return CI_mortality

In [None]:
# Creating a dictionary of statistics
rows = [{'Group': 'All Patients', 
        'Number of Patients (%)': get_num_patients(df),
        'Median Morbidity Count (IQR)': get_median_morbidities(df),
        'Percent (95% CI) with multimorbidity': get_prop_with_multiple_morbidities(df),
        'SOFA Score (95% CI)': get_sofa_score(df),
        'LOS ICU (95% CI)': get_LOS_icu(df),
        'LOS Hospital (95% CI)': get_LOS_hospital(df),
        'Percent Mortality (95% CI)': get_mortality(df)}]

gender = ['M', 'F']

age_groups = ['16-24', '25-44', '45-64', '65-84', '85-95']

number_of_disorders = [0, 1, 2, 3, 4, 5, 6, 7, '>8']

admission_type = ['Elective', 'Non-elective']

In [None]:
for grp in gender:
    gender_df = df[df['gender'] == grp]
    
    num_patients = get_num_patients(gender_df)
    median_morbidities = get_median_morbidities(gender_df)
    morbidities_CI = get_morbidities_CI(gender_df)
    prop_with_multiple_morbidities, CI_percent_with_multiple_morbidities = get_prop_with_multiple_morbidities()