In [372]:
import duckdb
import os
import tqdm
import pandas as pd
import string

# Store MIMIC III data as Parquet files

In [4]:
#per chatgpt, used for progress bar estimations below
mimic_table_row_counts = { 
    'ADMISSIONS': 58976,
    'CALLOUT': 34499,
    'CAREGIVERS': 7567,
    'CHARTEVENTS': 330712483,
    'CPTEVENTS': 573146,
    'D_CPT': 134,
    'D_ICD_DIAGNOSES': 14710,
    'D_ICD_PROCEDURES': 3898,
    'D_ITEMS': 12487,
    'D_LABITEMS': 753,
    'DATETIMEEVENTS': 4485937,
    'DIAGNOSES_ICD': 651047,
    'DRGCODES': 125557,
    'ICUSTAYS': 61532,
    'INPUTEVENTS_CV': 17527935,
    'INPUTEVENTS_MV': 3618991,
    'LABEVENTS': 27854055,
    'MICROBIOLOGYEVENTS': 631726,
    'NOTEEVENTS': 2083180,
    'OUTPUTEVENTS': 4349218,
    'PATIENTS': 46520,
    'PRESCRIPTIONS': 4157756,
    'PROCEDUREEVENTS_MV': 258066,
    'PROCEDURES_ICD': 240095,
    'SERVICES': 733241,
    'TRANSFERS': 261897
}


In [8]:
# build column list for preprocessing into parquet file
datetime_columns_translation = [
    'ADMITTIME',  # ADMISSIONS
    'DISCHTIME',  # ADMISSIONS
    'DEATHTIME',  # ADMISSIONS
    'EDREGTIME',  # ADMISSIONS
    'EDOUTTIME',  # ADMISSIONS
    'CHARTTIME',  # CHARTEVENTS, DATETIMEEVENTS, LABEVENTS, OUTPUTEVENTS, NOTEEVENTS, MICROBIOLOGYEVENTS
    'STORETIME',  # CHARTEVENTS, DATETIMEEVENTS, OUTPUTEVENTS
    'STARTTIME',  # INPUTEVENTS_MV, PROCEDUREEVENTS_MV
    'ENDTIME',  # INPUTEVENTS_MV, PROCEDUREEVENTS_MV
    'INTIME',  # ICUSTAYS
    'OUTTIME',  # ICUSTAYS
    'DOB',  # PATIENTS 
    'DOD',  # PATIENTS 
    'DOD_HOSP',  # PATIENTS 
    'DOD_SSN',  # PATIENTS 
    'CHARTDATE',  # NOTEEVENTS, MICROBIOLOGYEVENTS
    'STARTDATE',  # PRESCRIPTIONS
    'ENDDATE',  # PRESCRIPTIONS
]

numeric_columns_translation = [
    'VALUENUM',  # Common in multiple tables
    'VALUE',  # CHARTEVENTS, LABEVENTS, DATETIMEEVENTS, etc.
    'AMOUNT',  # INPUTEVENTS_CV, INPUTEVENTS_MV
    'RATE',  # INPUTEVENTS_CV, INPUTEVENTS_MV
    'ORIGINALAMOUNT',  # INPUTEVENTS_CV, INPUTEVENTS_MV
    'ORIGINALRATE',  # INPUTEVENTS_CV, INPUTEVENTS_MV
    'DILUTION_TEXT',  # MICROBIOLOGYEVENTS
    'DILUTION_COMMENTS',  # MICROBIOLOGYEVENTS
    'DOSE_VAL_RX',  # PRESCRIPTIONS
    'FORM_VAL_DISP',  # PRESCRIPTIONS
    'CPT_CD' 
]


In [10]:
# process the extracted files and turn them into parquet files for dask or duckDB
location = "./mimicIII/mimic-iii-clinical-database-1.4"
files_to_process = os.listdir("./mimicIII/mimic-iii-clinical-database-1.4")
#files_to_process = ["/".join([location,file]) for file in files_to_process]

for i, src_file in enumerate(files_to_process[25:]):
    print(f"Processing {src_file} / {1+i} of {len(files_to_process)}")
    if not src_file.endswith(".csv.gz"):
        print(f"\t Skipping since not proper type")
        continue
    if src_file.startswith("CHARTEVENTS"):
        print(f"\t Already loaded CHARTEVENTS")
        continue
    
    base_fn = src_file.replace(".csv.gz", "")

    #print(f"{base_fn} = {mimic_table_row_counts.get(base_fn)}")

    chunksize = 10e6
    reader = pd.read_csv("/".join([location,src_file]), chunksize=chunksize, compression='gzip')  

    
    for i, chunk in enumerate(tqdm.tqdm(reader, total=1+mimic_table_row_counts.get(base_fn)//chunksize, desc="Processing Chunks")):
        # convert numerical values
        num_columns = list(set(numeric_columns_translation).intersection(chunk.columns))
        dt_columns = list(set(datetime_columns_translation).intersection(chunk.columns))
        for c, col in enumerate(num_columns): 
            #print(f"\t\tChanging numeric in {c+1} of {len(num_columns)}")               
            chunk[col] = pd.to_numeric(chunk[col], errors="coerce")
        
        for c, col in enumerate(dt_columns): 
            #print(f"\t\tChanging dt in {c+1} of {len(dt_columns)}")               
            chunk[col] = pd.to_datetime(chunk[col], errors="coerce")
        
        if "GSN" in chunk.columns:
            chunk["GSN"] = chunk["GSN"].fillna("").astype(str)

        # Save each chunk as a separate Parquet file
        chunk.to_parquet(f"parquet/{base_fn}_{i}.parquet", engine="pyarrow", index=False)

Processing PRESCRIPTIONS.csv.gz / 1 of 32


  for obj in iterable:
Processing Chunks: 100%|██████████| 1/1.0 [00:13<00:00, 13.09s/it]


Processing PROCEDUREEVENTS_MV.csv.gz / 2 of 32


Processing Chunks: 100%|██████████| 1/1.0 [00:01<00:00,  1.08s/it]


Processing PROCEDURES_ICD.csv.gz / 3 of 32


Processing Chunks: 100%|██████████| 1/1.0 [00:00<00:00,  8.93it/s]


Processing README.md / 4 of 32
	 Skipping since not proper type
Processing SERVICES.csv.gz / 5 of 32


Processing Chunks: 100%|██████████| 1/1.0 [00:00<00:00, 10.98it/s]


Processing SHA256SUMS.txt / 6 of 32
	 Skipping since not proper type
Processing TRANSFERS.csv.gz / 7 of 32


Processing Chunks: 100%|██████████| 1/1.0 [00:00<00:00,  1.73it/s]


# Set up DuckDB helpers

In [6]:
# change working directory to where parquet files are
os.chdir("./parquet")

In [7]:
# column look ups for convenience when wanting to see what columns are available in what tables
mimic_columns = dict()
for tbl in mimic_table_row_counts.keys():
    df = pd.read_parquet(f"{tbl}_0.parquet")
    mimic_columns[tbl] = df.columns

del df
# reformat index objects to lists 
mimic_columns = {col: list(mimic_columns[col]) for col in mimic_columns}
# Setup DuckDB helper functions

In [15]:
def table_finder(columns):
    if type(columns) is str:
        columns = [columns]
    for column in columns:
        for table in mimic_columns.keys():
            if column in mimic_columns[table]:
                print(f"{column} found in {table}")
    return None

In [18]:
def table_check():
    def decorator(fnc):
        def wrapper(*args, **kwargs):
            for arg in args:
                if arg not in mimic_columns.keys():
                    raise ValueError(f"'{arg}' is not in the list of MIMIC III tables")
            return fnc(*args, **kwargs)
        return wrapper
    return decorator

In [19]:
# decorator to change anything I am passing a query to be updated with the parquet files
def duckify():
    def decorator(fnc):
        def wrapper(*args, **kwargs):
            #print(*args)
            new_args = []
            for arg in args:
                if type(arg)==str:
                # print(arg)
                    found=False
                    for tbl in mimic_columns.keys():
                        if arg.find(tbl)>-1:
                            #print(f"before {arg=}")
                            arg = arg.replace(tbl, f"'{tbl}*.parquet'")
                            #print(f"after {arg=}")
                            found=True
                    if not found:
                        raise ValueError("Found no table name in the query string")
                new_args.append(arg)
                #print(args)
            return fnc(*new_args, **kwargs)
        return wrapper
    return decorator


In [197]:
@duckify()
def run_query(qry, print_qry=False):
    """
    Run query and return dataframe, or simply print resulting query
    """
    if print_qry:
        print(qry)

    return duckdb.query(qry).to_df()

# Assignment

## 1.  Who are the most recorded patients in the database? 
Find the patients that have the most records across all databases but also the highest average rank

In [None]:
@table_check()
def recon_query(tbl):
    qry = f""" 
    select 
    '{tbl.capitalize()}' as Table,
    subject_id,
    count(*) Count_,
    max()
    row_number() over (partition by null order by Count_ desc, subject_id) Record_Rank
    from {tbl}
    group by 2
    """
    return qry

In [240]:
run_query(recon_query("ADMISSIONS")).head(20)

Unnamed: 0,Table,SUBJECT_ID,Count_,Record_Rank
0,Admissions,13033,42,1
1,Admissions,109,34,2
2,Admissions,11861,34,3
3,Admissions,5060,31,4
4,Admissions,20643,24,5
5,Admissions,19213,23,6
6,Admissions,7809,22,7
7,Admissions,5727,21,8
8,Admissions,23657,20,9
9,Admissions,11318,19,10


In [245]:
run_query("""    select 
    'Admissions' as Table,
    subject_id,
    count(*) Count_,
    row_number() over (partition by null order by Count_ desc, subject_id) Record_Rank
    from ADMISSIONS
    group by 2
       order by 3    desc
          """).head(10)

Unnamed: 0,Table,SUBJECT_ID,Count_,Record_Rank
0,Admissions,13033,42,1
1,Admissions,109,34,2
2,Admissions,11861,34,3
3,Admissions,5060,31,4
4,Admissions,20643,24,5
5,Admissions,19213,23,6
6,Admissions,7809,22,7
7,Admissions,5727,21,8
8,Admissions,23657,20,9
9,Admissions,11318,19,10


In [342]:
# build the recon query for every table in MIMIC iii and get the statistics
qry = "union all".join([recon_query(table) for table, columns in mimic_columns.items() if 'SUBJECT_ID' in columns])
recon_df = run_query(
f"""
select 
src.subject_id, 
datediff('day', subj.dob, cast(adm.admittime as date))/365 as age_at_admission,
avg(record_rank), 
min(record_rank), 
max(record_rank), 
sum(Count_) 
from ({qry}) src 
inner join PATIENTS subj
  on src.subject_id = subj.subject_id
inner join (
  select
  subject_id,
  max(admittime) as admittime
  from ADMISSIONS
  group by 1
) adm
 on src.subject_id = adm.subject_id
where age_at_admission > 30
group by 1, 2
order by 4,3, 2
""", True)


select 
src.subject_id, 
datediff('day', subj.dob, cast(adm.admittime as date))/365 as age_at_admission,
avg(record_rank), 
min(record_rank), 
max(record_rank), 
sum(Count_) 
from ( 
    select 
    'Admissions' as Table,
    subject_id,
    count(*) Count_,
    row_number() over (partition by null order by Count_ desc, subject_id) Record_Rank
    from 'ADMISSIONS*.parquet'
    group by 2
    union all 
    select 
    'Callout' as Table,
    subject_id,
    count(*) Count_,
    row_number() over (partition by null order by Count_ desc, subject_id) Record_Rank
    from 'CALLOUT*.parquet'
    group by 2
    union all 
    select 
    'Chartevents' as Table,
    subject_id,
    count(*) Count_,
    row_number() over (partition by null order by Count_ desc, subject_id) Record_Rank
    from 'CHARTEVENTS*.parquet'
    group by 2
    union all 
    select 
    'Cptevents' as Table,
    subject_id,
    count(*) Count_,
    row_number() over (partition by null order by Count_ desc, subject_id

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [343]:
recon_df.head(15)

Unnamed: 0,SUBJECT_ID,age_at_admission,avg(record_rank),min(record_rank),max(record_rank),sum(Count_)
0,17891,46.775342,1519.055556,1,16915,56384.0
1,27427,75.887671,1519.25,1,25872,201833.0
2,29035,66.873973,1529.95,1,27349,129820.0
3,25256,39.586301,1600.35,1,23838,203090.0
4,12613,61.364384,2176.705882,1,11933,429868.0
5,11923,49.79726,2207.352941,1,11284,344043.0
6,13033,39.676712,2303.65,1,16033,129104.0
7,55672,58.394521,2526.526316,1,34942,139529.0
8,78076,71.646575,2646.947368,1,40818,198711.0
9,55639,77.246575,13885.777778,1,40737,779432.0


In [344]:
subjects = recon_df.iloc[:3]["SUBJECT_ID"].values
subject_qry_clause = f"subject_id in ({','.join([str(s) for s in subjects])})"

In [345]:
subject_qry_clause

'subject_id in (17891,27427,29035)'

## 2.  What are diseases that they have in common

Write intersection query for diseases 

In [None]:
table_finder("SUBJECT_ID")

SUBJECT_ID found in ADMISSIONS
SUBJECT_ID found in CALLOUT
SUBJECT_ID found in CHARTEVENTS
SUBJECT_ID found in CPTEVENTS
SUBJECT_ID found in DATETIMEEVENTS
SUBJECT_ID found in DIAGNOSES_ICD
SUBJECT_ID found in DRGCODES
SUBJECT_ID found in ICUSTAYS
SUBJECT_ID found in INPUTEVENTS_CV
SUBJECT_ID found in INPUTEVENTS_MV
SUBJECT_ID found in LABEVENTS
SUBJECT_ID found in MICROBIOLOGYEVENTS
SUBJECT_ID found in NOTEEVENTS
SUBJECT_ID found in OUTPUTEVENTS
SUBJECT_ID found in PATIENTS
SUBJECT_ID found in PRESCRIPTIONS
SUBJECT_ID found in PROCEDUREEVENTS_MV
SUBJECT_ID found in PROCEDURES_ICD
SUBJECT_ID found in SERVICES
SUBJECT_ID found in TRANSFERS


In [346]:
common_diseases_qry = \
f"""
select 
diag.*
from DIAGNOSES_ICD subj_diag
inner join D_ICD_DIAGNOSES diag
on subj_diag.ICD9_CODE = diag.ICD9_CODE

where subj_diag.SUBJECT_ID = {subjects[0]}
intersect
select 
diag.*
from DIAGNOSES_ICD subj_diag
inner join D_ICD_DIAGNOSES diag
on subj_diag.ICD9_CODE = diag.ICD9_CODE

where subj_diag.SUBJECT_ID = {subjects[1]}
intersect
select 
diag.*
from DIAGNOSES_ICD subj_diag
inner join D_ICD_DIAGNOSES diag
on subj_diag.ICD9_CODE = diag.ICD9_CODE

where subj_diag.SUBJECT_ID = {subjects[2]}
order by 1
"""

common_diseases_df = run_query(common_diseases_qry ,True)


select 
diag.*
from 'DIAGNOSES_ICD*.parquet' subj_diag
inner join 'D_ICD_DIAGNOSES*.parquet' diag
on subj_diag.ICD9_CODE = diag.ICD9_CODE

where subj_diag.SUBJECT_ID = 17891
intersect
select 
diag.*
from 'DIAGNOSES_ICD*.parquet' subj_diag
inner join 'D_ICD_DIAGNOSES*.parquet' diag
on subj_diag.ICD9_CODE = diag.ICD9_CODE

where subj_diag.SUBJECT_ID = 27427
intersect
select 
diag.*
from 'DIAGNOSES_ICD*.parquet' subj_diag
inner join 'D_ICD_DIAGNOSES*.parquet' diag
on subj_diag.ICD9_CODE = diag.ICD9_CODE

where subj_diag.SUBJECT_ID = 29035
order by 1



In [347]:
common_diseases_df

Unnamed: 0,ROW_ID,ICD9_CODE,SHORT_TITLE,LONG_TITLE
0,4473,4280,CHF NOS,"Congestive heart failure, unspecified"
1,5908,5849,Acute kidney failure NOS,"Acute kidney failure, unspecified"


## 3. What medicines do they have in common?

Write intersection query for medicines

In [142]:
run_query("select * from PRESCRIPTIONS limit 10")

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ICUSTAY_ID,STARTDATE,ENDDATE,DRUG_TYPE,DRUG,DRUG_NAME_POE,DRUG_NAME_GENERIC,FORMULARY_DRUG_CD,GSN,NDC,PROD_STRENGTH,DOSE_VAL_RX,DOSE_UNIT_RX,FORM_VAL_DISP,FORM_UNIT_DISP,ROUTE
0,2214776,6,107064,,2175-06-11,2175-06-12,MAIN,Tacrolimus,Tacrolimus,Tacrolimus,TACR1,21796.0,469061711.0,1mg Capsule,2.0,mg,2.0,CAP,PO
1,2214775,6,107064,,2175-06-11,2175-06-12,MAIN,Warfarin,Warfarin,Warfarin,WARF5,6562.0,56017275.0,5mg Tablet,5.0,mg,1.0,TAB,PO
2,2215524,6,107064,,2175-06-11,2175-06-12,MAIN,Heparin Sodium,,,HEPAPREMIX,6522.0,338055002.0,"25,000 unit Premix Bag",,UNIT,1.0,BAG,IV
3,2216265,6,107064,,2175-06-11,2175-06-12,BASE,D5W,,,HEPBASE,,0.0,HEPARIN BASE,250.0,ml,250.0,ml,IV
4,2214773,6,107064,,2175-06-11,2175-06-12,MAIN,Furosemide,Furosemide,Furosemide,FURO20,8208.0,54829725.0,20mg Tablet,20.0,mg,1.0,TAB,PO
5,2214774,6,107064,,2175-06-11,2175-06-15,MAIN,Warfarin,Warfarin,Warfarin,WARF0,14198.0,56016975.0,Check with MD for Dose,1.0,dose,1.0,dose,PO
6,2215525,6,107064,,2175-06-12,2175-06-12,MAIN,Heparin Sodium,,,HEPAPREMIX,6522.0,338055002.0,"25,000 unit Premix Bag",,UNIT,1.0,BAG,IV
7,2216266,6,107064,,2175-06-12,2175-06-12,BASE,D5W,,,HEPBASE,,0.0,HEPARIN BASE,250.0,ml,250.0,ml,IV
8,2215526,6,107064,,2175-06-12,2175-06-13,MAIN,Heparin Sodium,,,HEPAPREMIX,6522.0,338055002.0,"25,000 unit Premix Bag",,UNIT,1.0,BAG,IV
9,2214778,6,107064,,2175-06-12,2175-06-13,MAIN,Warfarin,Warfarin,Warfarin,WARF2,6561.0,56017075.0,2mg Tab,2.0,mg,1.0,TAB,PO


In [435]:
common_rx_qry = \
f"""
select 
drug,
sum(count_) as usage 
from (
    select 
    subject_id,
    drug,
    count(distinct hadm_id) as count_
    from PRESCRIPTIONS 
    where SUBJECT_ID = {subjects[0]}
    group by 1,2
    union
    select 
    subject_id,
    drug,
    count(distinct hadm_id) as count_
    from PRESCRIPTIONS 
    where SUBJECT_ID = {subjects[1]}
    group by 1,2
    union
    select 
    subject_id,
    drug,
    count(distinct hadm_id) as count_
    from PRESCRIPTIONS 
    where SUBJECT_ID = {subjects[2]}
    group by 1,2
) rx
group by 1
having count(distinct subject_id) = 3
order by 2 desc
limit 10
"""
common_rx_df = run_query(common_rx_qry, True)
common_rx_df 


select 
drug,
sum(count_) as usage 
from (
    select 
    subject_id,
    drug,
    count(distinct hadm_id) as count_
    from 'PRESCRIPTIONS*.parquet' 
    where SUBJECT_ID = 17891
    group by 1,2
    union
    select 
    subject_id,
    drug,
    count(distinct hadm_id) as count_
    from 'PRESCRIPTIONS*.parquet' 
    where SUBJECT_ID = 27427
    group by 1,2
    union
    select 
    subject_id,
    drug,
    count(distinct hadm_id) as count_
    from 'PRESCRIPTIONS*.parquet' 
    where SUBJECT_ID = 29035
    group by 1,2
) rx
group by 1
having count(distinct subject_id) = 3
order by 2 desc
limit 10



Unnamed: 0,DRUG,usage
0,Insulin,23.0
1,Acetaminophen,22.0
2,Sodium Chloride 0.9% Flush,21.0
3,Heparin,21.0
4,Iso-Osmotic Dextrose,16.0
5,Furosemide,16.0
6,D5W,16.0
7,Lorazepam,15.0
8,D5 1/2NS,15.0
9,Pantoprazole,15.0


## 4.  What types of ICU initial admissions have they had?

Have subject ID for columns and number of ICU stays by ICU type as columns

In [53]:
run_query("""select * from TRANSFERS limit 10""")

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ICUSTAY_ID,DBSOURCE,EVENTTYPE,PREV_CAREUNIT,CURR_CAREUNIT,PREV_WARDID,CURR_WARDID,INTIME,OUTTIME,LOS
0,657,111,192123,254245.0,carevue,transfer,CCU,MICU,7.0,23.0,2142-04-29 15:27:11,2142-05-04 20:38:33,125.19
1,658,111,192123,,carevue,transfer,MICU,,23.0,45.0,2142-05-04 20:38:33,2142-05-05 11:46:32,15.13
2,659,111,192123,,carevue,discharge,,,45.0,,2142-05-05 11:46:32,NaT,
3,660,111,155897,249202.0,metavision,admit,,MICU,,52.0,2144-07-01 04:13:59,2144-07-01 05:19:39,1.09
4,661,111,155897,,metavision,transfer,MICU,,52.0,32.0,2144-07-01 05:19:39,2144-07-01 06:28:29,1.15
5,662,111,155897,249202.0,metavision,transfer,,MICU,32.0,52.0,2144-07-01 06:28:29,2144-07-01 08:07:16,1.65
6,663,111,155897,,metavision,transfer,MICU,,52.0,32.0,2144-07-01 08:07:16,2144-07-01 08:13:51,0.11
7,664,111,155897,249202.0,metavision,transfer,,MICU,32.0,23.0,2144-07-01 08:13:51,2144-07-01 17:56:31,9.71
8,665,111,155897,,metavision,discharge,MICU,,23.0,,2144-07-01 17:56:31,NaT,
9,666,112,174105,289222.0,carevue,admit,,MICU,,12.0,2194-06-13 18:41:27,2194-06-14 14:51:17,20.16


In [441]:
icu_profile_df = run_query(f"""
          select 
          coalesce(CURR_CAREUNIT, cast(CURR_WARDID as varchar(3))) as Ward,
          sum(case when subject_id = {subjects[0]} then 1 else 0 end) as S1_Stays_in_Ward,
          sum(case when subject_id = {subjects[1]} then 1 else 0 end) as S2_Stays_in_Ward,
          sum(case when subject_id = {subjects[2]} then 1 else 0 end) as S3_Stays_in_Ward,
          count(distinct Case when subject_id = {subjects[0]} then hadm_id end) as S1_Stays_by_Adm,
          count(distinct Case when subject_id = {subjects[1]} then hadm_id end) as S2_Stays_by_Adm,
          count(distinct Case when subject_id = {subjects[2]} then hadm_id end) as S3_Stays_by_Adm,
          S1_Stays_in_Ward + S2_Stays_in_Ward + S3_Stays_in_Ward as Total_Stays_In_Ward,
          S1_Stays_by_Adm + S2_Stays_by_Adm + S3_Stays_by_Adm as Total_Admissions_In_Ward
          from TRANSFERS 
          where WARD is not null
          group by 1 
          order by case when regexp_matches(WARD, '^[A-Z]+$') then 1 else 0 end desc, Total_Stays_In_Ward desc
          limit 10
          """, True)

icu_profile_df


          select 
          coalesce(CURR_CAREUNIT, cast(CURR_WARDID as varchar(3))) as Ward,
          sum(case when subject_id = 17891 then 1 else 0 end) as S1_Stays_in_Ward,
          sum(case when subject_id = 27427 then 1 else 0 end) as S2_Stays_in_Ward,
          sum(case when subject_id = 29035 then 1 else 0 end) as S3_Stays_in_Ward,
          count(distinct Case when subject_id = 17891 then hadm_id end) as S1_Stays_by_Adm,
          count(distinct Case when subject_id = 27427 then hadm_id end) as S2_Stays_by_Adm,
          count(distinct Case when subject_id = 29035 then hadm_id end) as S3_Stays_by_Adm,
          S1_Stays_in_Ward + S2_Stays_in_Ward + S3_Stays_in_Ward as Total_Stays_In_Ward,
          S1_Stays_by_Adm + S2_Stays_by_Adm + S3_Stays_by_Adm as Total_Admissions_In_Ward
          from 'TRANSFERS*.parquet' 
          where WARD is not null
          group by 1 
          order by case when regexp_matches(WARD, '^[A-Z]+$') then 1 else 0 end desc, Total_Stays_In_Ward des

Unnamed: 0,Ward,S1_Stays_in_Ward,S2_Stays_in_Ward,S3_Stays_in_Ward,S1_Stays_by_Adm,S2_Stays_by_Adm,S3_Stays_by_Adm,Total_Stays_In_Ward,Total_Admissions_In_Ward
0,SICU,8.0,3.0,12.0,5,2,5,23.0,12
1,MICU,1.0,12.0,7.0,1,4,4,20.0,9
2,TSICU,0.0,0.0,3.0,0,0,2,3.0,2
3,CCU,0.0,0.0,3.0,0,0,3,3.0,3
4,NICU,0.0,0.0,0.0,0,0,0,0.0,0
5,NWARD,0.0,0.0,0.0,0,0,0,0.0,0
6,CSRU,0.0,0.0,0.0,0,0,0,0.0,0
7,54.0,16.0,0.0,21.0,5,0,12,37.0,17
8,18.0,0.0,7.0,0.0,0,1,0,7.0,1
9,49.0,3.0,0.0,3.0,2,0,2,6.0,4


## 5.  In their stays in ICU, what kind of chart events have they had?

In [None]:
run_query("""
          select
          *
          from CHARTEVENTS
          where subject_id = 7666
          limit 10
          """)

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ICUSTAY_ID,ITEMID,CHARTTIME,STORETIME,CGID,VALUE,VALUENUM,VALUEUOM,WARNING,ERROR,RESULTSTATUS,STOPPED
0,646224,7666,119064,239385.0,225677,2159-05-26 10:14:00,2159-05-26 12:07:00,20889.0,2.9,2.9,mg/dL,0,0,,
1,646225,7666,119064,239385.0,227073,2159-05-26 10:14:00,2159-05-26 12:07:00,20889.0,16.0,16.0,mEq/L,0,0,,
2,646226,7666,119064,239385.0,227442,2159-05-26 10:14:00,2159-05-26 12:07:00,20889.0,4.5,4.5,mEq/L,0,0,,
3,646227,7666,119064,239385.0,227443,2159-05-26 10:14:00,2159-05-26 12:07:00,20889.0,19.0,19.0,mEq/L,1,0,,
4,646228,7666,119064,239385.0,227456,2159-05-26 10:14:00,2159-05-26 20:46:00,20889.0,3.4,3.4,g/dL,0,0,,
5,646229,7666,119064,239385.0,227457,2159-05-26 10:14:00,2159-05-26 11:15:00,20889.0,68.0,68.0,K/uL,1,0,,
6,646230,7666,119064,239385.0,227465,2159-05-26 10:14:00,2159-05-26 12:07:00,20889.0,20.8,20.8,sec,1,0,,
7,646231,7666,119064,239385.0,227466,2159-05-26 10:14:00,2159-05-26 12:07:00,20889.0,34.4,34.4,sec,0,0,,
8,646232,7666,119064,239385.0,227467,2159-05-26 10:14:00,2159-05-26 12:07:00,20889.0,2.0,2.0,,1,0,,
9,646233,7666,119064,239385.0,225668,2159-05-26 11:57:00,2159-05-26 11:58:00,20889.0,1.8,1.8,mmol/L,0,0,,


In [442]:
chart_events_df = run_query(f"""
          select 
          --cv.ITEMID,
          items.LABEL,
          count(distinct cv.itemid) as Item_Code_Count,
          count(distinct case when subject_id = {subjects[0]} then charttime else null end) Chart_Event_Subj_1st,  
          count(distinct case when subject_id = {subjects[1]} then charttime else null end) Chart_Event_Subj_2nd, 
          count(distinct case when subject_id = {subjects[2]} then charttime else null end) Chart_Event_Subj_3rd
          from CHARTEVENTS cv
          inner join D_ITEMS items
           on cv.ITEMID = items.ITEMID
          where {subject_qry_clause} 
          and cv.value is not null
          group by 1
          having least(Chart_Event_Subj_1st, Chart_Event_Subj_2nd, Chart_Event_Subj_3rd) > 0
          order by chart_event_Subj_1st + chart_event_Subj_2nd + chart_event_Subj_3rd desc
          limit 20
          """, True)

chart_events_df


          select 
          --cv.ITEMID,
          items.LABEL,
          count(distinct cv.itemid) as Item_Code_Count,
          count(distinct case when subject_id = 17891 then charttime else null end) Chart_Event_Subj_1st,  
          count(distinct case when subject_id = 27427 then charttime else null end) Chart_Event_Subj_2nd, 
          count(distinct case when subject_id = 29035 then charttime else null end) Chart_Event_Subj_3rd
          from 'CHARTEVENTS*.parquet' cv
          inner join 'D_ITEMS*.parquet' items
           on cv.ITEMID = items.ITEMID
          where subject_id in (17891,27427,29035) 
          and cv.value is not null
          group by 1
          having least(Chart_Event_Subj_1st, Chart_Event_Subj_2nd, Chart_Event_Subj_3rd) > 0
          order by chart_event_Subj_1st + chart_event_Subj_2nd + chart_event_Subj_3rd desc
          limit 20
          


Unnamed: 0,LABEL,Item_Code_Count,Chart_Event_Subj_1st,Chart_Event_Subj_2nd,Chart_Event_Subj_3rd
0,Heart Rate,2,670,3277,2379
1,Respiratory Rate,2,655,3256,2391
2,SpO2,1,658,1167,301
3,calprevflg,1,660,1122,290
4,Arterial BP [Systolic],1,510,1121,180
5,Arterial BP [Diastolic],1,510,1121,180
6,Arterial BP Mean,1,503,1117,174
7,Resp Alarm [Low],1,564,937,272
8,HR Alarm [High],1,564,937,272
9,HR Alarm [Low],1,564,937,272


## 6.  Let's compare their average most common vitals by visit

In [358]:
top_chart_events = chart_events_df.iloc[:6]["LABEL"].values

'Heart_Rate'

In [382]:
def rename_chart_event(chart_event):
    return "Avg_"+"".join([char if char in string.ascii_letters or char in string.digits else "_" for char in chart_event if char not in ["[","]"]])


avg(case when label = 'Heart Rate' then valuenum end) as Avg_Heart_Rate,
avg(case when label = 'Respiratory Rate' then valuenum end) as Avg_Respiratory_Rate,
avg(case when label = 'SpO2' then valuenum end) as Avg_SpO2,
avg(case when label = 'calprevflg' then valuenum end) as Avg_calprevflg,
avg(case when label = 'Arterial BP [Diastolic]' then valuenum end) as Avg_Arterial_BP_Diastolic,
avg(case when label = 'Arterial BP [Systolic]' then valuenum end) as Avg_Arterial_BP_Systolic


In [394]:
"""
  avg(case when label = 'SpO2' then valuenum else null end) as Avg_SpO2,
  avg(case when label = 'Heart Rate' then valuenum else null end) as Avg_Heart_Rate,
  avg(case when label = 'calprevflg' then valuenum else null end) as Avg_calprevflg,
  avg(case when label = 'Respiratory Rate' then valuenum else null end) as Avg_Respiratory_Rate,
  avg(case when label = 'Arterial BP [Diastolic]' then valuenum else null end) as Avg_Art_BP_Diastolic,
  avg(case when label = 'Arterial BP [Systolic]' then valuenum else null end) as Avg_Art_BP_Systolic,
"""

get_chart_data_snippet = ",\n".join([f"avg(case when label = '{event}' then valuenum end) as {rename_chart_event(event)}" for event in top_chart_events])+",\n"

In [395]:
chart_names_in = ",".join([f"'{event}'" for event in top_chart_events])

In [None]:
subject_chart_data_snippet = ",\n".join([f"{subj}.{rename_chart_event(event)} as {rename_chart_event(event)}_{subj}" for event in top_chart_events for subj in ["Subj1", "Subj2", "Subj3"]])

In [444]:
run_query(
f"""
select 
coalesce(subj1.admission_number, subj2.admission_number, subj3.admission_number) as Admission_Number_for_Patient,
{subject_chart_data_snippet}
from (
  select 
  subject_id,
  hadm_id,
  min(charttime) as First_Chart_Event,
  {get_chart_data_snippet}
  Row_Number() over (partition by subject_id order by min(charttime)) as Admission_Number
  from CHARTEVENTS cv
  inner join D_ITEMS items
    on cv.ITEMID = items.ITEMID
  where items.LABEL in ({chart_names_in})
  and subject_id = {subjects[0]}
  group by 1,2
) Subj1
full outer join (
  select 
  subject_id,
  hadm_id,
  min(charttime) as First_Chart_Event,
  {get_chart_data_snippet}
  Row_Number() over (partition by subject_id order by min(charttime)) as Admission_Number
  from CHARTEVENTS cv
  inner join D_ITEMS items
    on cv.ITEMID = items.ITEMID
  where items.LABEL in ({chart_names_in})
  and subject_id = {subjects[1]}
  group by 1,2
) Subj2
 on Subj1.admission_number = subj2.admission_number
full outer join (
  select 
  subject_id,
  hadm_id,
  min(charttime) as First_Chart_Event,
  {get_chart_data_snippet}
  Row_Number() over (partition by subject_id order by min(charttime)) as Admission_Number
  from CHARTEVENTS cv
  inner join D_ITEMS items
    on cv.ITEMID = items.ITEMID
  where items.LABEL in ({chart_names_in})
  and subject_id = {subjects[2]}
  group by 1,2
) Subj3
 on subj1.admission_number = subj3.admission_number
 and subj2.admission_number = subj3.admission_number
order by 1
""", True)


select 
coalesce(subj1.admission_number, subj2.admission_number, subj3.admission_number) as Admission_Number_for_Patient,
Subj1.Avg_Heart_Rate as Avg_Heart_Rate_Subj1,
Subj2.Avg_Heart_Rate as Avg_Heart_Rate_Subj2,
Subj3.Avg_Heart_Rate as Avg_Heart_Rate_Subj3,
Subj1.Avg_Respiratory_Rate as Avg_Respiratory_Rate_Subj1,
Subj2.Avg_Respiratory_Rate as Avg_Respiratory_Rate_Subj2,
Subj3.Avg_Respiratory_Rate as Avg_Respiratory_Rate_Subj3,
Subj1.Avg_SpO2 as Avg_SpO2_Subj1,
Subj2.Avg_SpO2 as Avg_SpO2_Subj2,
Subj3.Avg_SpO2 as Avg_SpO2_Subj3,
Subj1.Avg_calprevflg as Avg_calprevflg_Subj1,
Subj2.Avg_calprevflg as Avg_calprevflg_Subj2,
Subj3.Avg_calprevflg as Avg_calprevflg_Subj3,
Subj1.Avg_Arterial_BP_Diastolic as Avg_Arterial_BP_Diastolic_Subj1,
Subj2.Avg_Arterial_BP_Diastolic as Avg_Arterial_BP_Diastolic_Subj2,
Subj3.Avg_Arterial_BP_Diastolic as Avg_Arterial_BP_Diastolic_Subj3,
Subj1.Avg_Arterial_BP_Systolic as Avg_Arterial_BP_Systolic_Subj1,
Subj2.Avg_Arterial_BP_Systolic as Avg_Arterial_BP_Systo

Unnamed: 0,Admission_Number_for_Patient,Avg_Heart_Rate_Subj1,Avg_Heart_Rate_Subj2,Avg_Heart_Rate_Subj3,Avg_Respiratory_Rate_Subj1,Avg_Respiratory_Rate_Subj2,Avg_Respiratory_Rate_Subj3,Avg_SpO2_Subj1,Avg_SpO2_Subj2,Avg_SpO2_Subj3,Avg_calprevflg_Subj1,Avg_calprevflg_Subj2,Avg_calprevflg_Subj3,Avg_Arterial_BP_Diastolic_Subj1,Avg_Arterial_BP_Diastolic_Subj2,Avg_Arterial_BP_Diastolic_Subj3,Avg_Arterial_BP_Systolic_Subj1,Avg_Arterial_BP_Systolic_Subj2,Avg_Arterial_BP_Systolic_Subj3
0,1,65.292929,98.962963,69.290323,14.573477,19.728972,13.806452,96.598639,96.379913,97.555556,1.0,1.0,1.0,61.89441,52.830986,,125.378882,124.849765,
1,2,89.333333,89.285135,68.732955,13.677083,17.451962,21.551247,98.204301,97.381699,95.021898,1.0,1.0,1.0,66.457447,43.843455,40.505556,129.478723,104.283401,124.138889
2,3,107.635922,92.66092,65.285714,17.253589,18.202312,17.914286,98.868293,97.901734,,1.0,1.0,,62.227723,51.622754,,105.717822,109.730539,
3,4,93.352113,79.583333,84.053892,22.774648,15.041667,19.284294,98.106061,,,1.0,,,55.584906,,,118.509434,,
4,5,,86.787565,,,20.736942,,,,,,,,,,,,,
5,5,,,73.473684,,,20.789474,,,,,,,,,,,,
6,6,,,67.572165,,,20.984615,,,,,,,,,,,,
7,7,,,65.02521,,,18.966102,,,,,,,,,,,,
8,8,,,64.906667,,,18.986667,,,,,,,,,,,,
9,9,,,73.90566,,,22.003431,,,,,,,,,,,,


In [408]:
run_query(
f"""
select 
subject_id,
count(distinct hadm_id) as Admissions
from CHARTEVENTS
where {subject_qry_clause}
group by 1
order by 1
"""
)

Unnamed: 0,SUBJECT_ID,Admissions
0,17891,4
1,27427,5
2,29035,13


## 7. How many other people have the same diseases? 

In [134]:
common_diseases_df["ICD9_CODE"].values

array(['0389', '5849', '2859', '51881', '4019'], dtype=object)

In [409]:
print(common_diseases_qry)


select 
diag.*
from DIAGNOSES_ICD subj_diag
inner join D_ICD_DIAGNOSES diag
on subj_diag.ICD9_CODE = diag.ICD9_CODE

where subj_diag.SUBJECT_ID = 17891
intersect
select 
diag.*
from DIAGNOSES_ICD subj_diag
inner join D_ICD_DIAGNOSES diag
on subj_diag.ICD9_CODE = diag.ICD9_CODE

where subj_diag.SUBJECT_ID = 27427
intersect
select 
diag.*
from DIAGNOSES_ICD subj_diag
inner join D_ICD_DIAGNOSES diag
on subj_diag.ICD9_CODE = diag.ICD9_CODE

where subj_diag.SUBJECT_ID = 29035
order by 1



In [161]:
mimic_columns["DIAGNOSES_ICD"]

['ROW_ID', 'SUBJECT_ID', 'HADM_ID', 'SEQ_NUM', 'ICD9_CODE']

In [421]:
run_query(
f"""
select 
subj_diag.ICD9_CODE,
diag.SHORT_TITLE,
diag.LONG_TITLE,
count(distinct subj_diag.subject_id) as Patients_Affected
from DIAGNOSES_ICD subj_diag
left join D_ICD_DIAGNOSES diag
 on subj_diag.ICD9_CODE = diag.ICD9_CODE
where subj_diag.ICD9_CODE in (
select 
    ICD9_CODE 
    from (
        {common_diseases_qry}
    ) 
)
group by 1,2,3
order by 2
""", True)


select 
subj_diag.ICD9_CODE,
diag.SHORT_TITLE,
diag.LONG_TITLE,
count(distinct subj_diag.subject_id) as Patients_Affected
from 'DIAGNOSES_ICD*.parquet' subj_diag
left join 'D_ICD_DIAGNOSES*.parquet' diag
 on subj_diag.ICD9_CODE = diag.ICD9_CODE
where subj_diag.ICD9_CODE in (
select 
    ICD9_CODE 
    from (
        
select 
diag.*
from 'DIAGNOSES_ICD*.parquet' subj_diag
inner join 'D_ICD_DIAGNOSES*.parquet' diag
on subj_diag.ICD9_CODE = diag.ICD9_CODE

where subj_diag.SUBJECT_ID = 17891
intersect
select 
diag.*
from 'DIAGNOSES_ICD*.parquet' subj_diag
inner join 'D_ICD_DIAGNOSES*.parquet' diag
on subj_diag.ICD9_CODE = diag.ICD9_CODE

where subj_diag.SUBJECT_ID = 27427
intersect
select 
diag.*
from 'DIAGNOSES_ICD*.parquet' subj_diag
inner join 'D_ICD_DIAGNOSES*.parquet' diag
on subj_diag.ICD9_CODE = diag.ICD9_CODE

where subj_diag.SUBJECT_ID = 29035
order by 1

    ) 
)
group by 1,2,3
order by 2



Unnamed: 0,ICD9_CODE,SHORT_TITLE,LONG_TITLE,Patients_Affected
0,5849,Acute kidney failure NOS,"Acute kidney failure, unspecified",7687
1,4280,CHF NOS,"Congestive heart failure, unspecified",9843


## 8. How many other people use the same meds? 

In [447]:
run_query(
f"""
select 
drug,
count(distinct subject_id) as Patients_Administered_Rx
from PRESCRIPTIONS
where drug in (
select 
    DRUG 
    from (
        {common_rx_qry}
    ) 
)
group by 1
order by 2 desc
limit 10
""", True)


select 
drug,
count(distinct subject_id) as Patients_Administered_Rx
from 'PRESCRIPTIONS*.parquet'
where drug in (
select 
    DRUG 
    from (
        
select 
drug,
sum(count_) as usage 
from (
    select 
    subject_id,
    drug,
    count(distinct hadm_id) as count_
    from 'PRESCRIPTIONS*.parquet' 
    where SUBJECT_ID = 17891
    group by 1,2
    union
    select 
    subject_id,
    drug,
    count(distinct hadm_id) as count_
    from 'PRESCRIPTIONS*.parquet' 
    where SUBJECT_ID = 27427
    group by 1,2
    union
    select 
    subject_id,
    drug,
    count(distinct hadm_id) as count_
    from 'PRESCRIPTIONS*.parquet' 
    where SUBJECT_ID = 29035
    group by 1,2
) rx
group by 1
having count(distinct subject_id) = 3
order by 2 desc
limit 10

    ) 
)
group by 1
order by 2 desc
limit 10



Unnamed: 0,DRUG,Patients_Administered_Rx
0,Sodium Chloride 0.9% Flush,29387
1,Acetaminophen,28821
2,Insulin,25235
3,Heparin,24444
4,D5W,21279
5,Iso-Osmotic Dextrose,21098
6,Furosemide,19738
7,Pantoprazole,17074
8,Lorazepam,15449
9,D5 1/2NS,7944


## 9. How many people have the same diseases AND meds? 

In [420]:
run_query(
f"""
select
count(distinct rx.subject_id)
from (
    select 
    distinct
    subject_id,
    drug
    from PRESCRIPTIONS
) rx
inner join (
    select 
    distinct
    subject_id,
    icd9_code
    from DIAGNOSES_ICD
) diag
on rx.subject_id = diag.subject_id
where drug in (
select 
    DRUG 
    from (
        {common_rx_qry}
    ) 
)
and icd9_code in (
select 
    ICD9_CODE 
    from (
        {common_diseases_qry}
    ) 
)
and rx.{subject_qry_clause.replace('in', 'not in')}
"""
, True)


select
count(distinct rx.subject_id)
from (
    select 
    distinct
    subject_id,
    drug
    from 'PRESCRIPTIONS*.parquet'
) rx
inner join (
    select 
    distinct
    subject_id,
    icd9_code
    from 'DIAGNOSES_ICD*.parquet'
) diag
on rx.subject_id = diag.subject_id
where drug in (
select 
    DRUG 
    from (
        
select 
drug
from 'PRESCRIPTIONS*.parquet' 
where SUBJECT_ID = 17891
intersect
select 
drug
from 'PRESCRIPTIONS*.parquet' 
where SUBJECT_ID = 27427
intersect
select 
drug
from 'PRESCRIPTIONS*.parquet' 
where SUBJECT_ID = 29035

    ) 
)
and icd9_code in (
select 
    ICD9_CODE 
    from (
        
select 
diag.*
from 'DIAGNOSES_ICD*.parquet' subj_diag
inner join 'D_ICD_DIAGNOSES*.parquet' diag
on subj_diag.ICD9_CODE = diag.ICD9_CODE

where subj_diag.SUBJECT_ID = 17891
intersect
select 
diag.*
from 'DIAGNOSES_ICD*.parquet' subj_diag
inner join 'D_ICD_DIAGNOSES*.parquet' diag
on subj_diag.ICD9_CODE = diag.ICD9_CODE

where subj_diag.SUBJECT_ID = 27427
intersect
s

Unnamed: 0,count(DISTINCT rx.subject_id)
0,13199


## 10.  What are the most rare diagnoses and meds recorded latest in the dataset?

In [425]:
mimic_columns["ADMISSIONS"]

['ROW_ID',
 'SUBJECT_ID',
 'HADM_ID',
 'ADMITTIME',
 'DISCHTIME',
 'DEATHTIME',
 'ADMISSION_TYPE',
 'ADMISSION_LOCATION',
 'DISCHARGE_LOCATION',
 'INSURANCE',
 'LANGUAGE',
 'RELIGION',
 'MARITAL_STATUS',
 'ETHNICITY',
 'EDREGTIME',
 'EDOUTTIME',
 'DIAGNOSIS',
 'HOSPITAL_EXPIRE_FLAG',
 'HAS_CHARTEVENTS_DATA']

In [428]:
run_query(
f"""
select 
subj_diag.icd9_code,
diag.short_title,
diag.long_title,
rx.drug,
max(dischtime) as Discharge_Date,
count(distinct subj_diag.subject_id) as Patients_Affected
from DIAGNOSES_ICD subj_diag
inner join D_ICD_DIAGNOSES diag
 on subj_diag.ICD9_CODE = diag.ICD9_CODE
inner join PRESCRIPTIONS rx
 on subj_diag.subject_id = rx.subject_id
 and subj_diag.hadm_id = rx.hadm_id
inner join ADMISSIONS adm
 on adm.subject_id = subj_diag.subject_id
 and adm.hadm_id = subj_diag.hadm_id
--where subj_diag.subject_id in (3386, 2187, 3417)
group by 1,2,3,4
order by  5 desc, 6
limit 10
"""
)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,ICD9_CODE,SHORT_TITLE,LONG_TITLE,DRUG,Discharge_Date,Patients_Affected
0,4829,Bacterial pneumonia NOS,"Bacterial pneumonia, unspecified",scopolamine base,2210-08-24 19:43:00,1
1,V463,Wheelchair dependence,Wheelchair dependence,Silver Sulfadiazine 1% Cream,2210-08-24 19:43:00,1
2,34540,Psymotr epil w/o int epi,Localization-related (focal) (partial) epileps...,budesonide,2210-08-24 19:43:00,1
3,V463,Wheelchair dependence,Wheelchair dependence,Nystatin Cream,2210-08-24 19:43:00,1
4,3341,Hered spastic paraplegia,Hereditary spastic paraplegia,Ranitidine,2210-08-24 19:43:00,1
5,3694,Legal blindness-usa def,"Legal blindness, as defined in U.S.A.",Ascorbic Acid (Liquid),2210-08-24 19:43:00,1
6,34540,Psymotr epil w/o int epi,Localization-related (focal) (partial) epileps...,Silver Sulfadiazine 1% Cream,2210-08-24 19:43:00,1
7,V441,Gastrostomy status,Gastrostomy status,budesonide,2210-08-24 19:43:00,1
8,9341,Foreign body bronchus,Foreign body in main bronchus,Zonisamide,2210-08-24 19:43:00,1
9,51881,Acute respiratry failure,Acute respiratory failure,scopolamine base,2210-08-24 19:43:00,1


## 11.  Bonus:  What are the most frequent diagnoses and meds combinations?  

In [192]:
common_combo_df = run_query(
"""
select 
diag.icd9_code,
diag_text.short_title,
diag_text.long_title,
rx.drug,
count(distinct diag.hadm_id) as Admissions_with_Combo,
count(distinct diag.subject_id) as Patients_with_Combo
from DIAGNOSES_ICD diag
inner join D_ICD_DIAGNOSES diag_text
    on diag.icd9_code = diag_text.icd9_code
inner join PRESCRIPTIONS rx
    on diag.hadm_id = rx.hadm_id
group by 1,2,3,4
order by 5 desc
"""
)


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [193]:
common_combo_df.head(20)

Unnamed: 0,ICD9_CODE,SHORT_TITLE,LONG_TITLE,DRUG,Admissions_with_Combo,Patients_with_Combo
0,4019,Hypertension NOS,Unspecified essential hypertension,Sodium Chloride 0.9% Flush,15941,13798
1,4019,Hypertension NOS,Unspecified essential hypertension,Acetaminophen,15020,13370
2,4019,Hypertension NOS,Unspecified essential hypertension,Potassium Chloride,14927,13232
3,4019,Hypertension NOS,Unspecified essential hypertension,Magnesium Sulfate,13838,12320
4,4019,Hypertension NOS,Unspecified essential hypertension,Insulin,13632,12036
5,4019,Hypertension NOS,Unspecified essential hypertension,Docusate Sodium,12427,11142
6,4019,Hypertension NOS,Unspecified essential hypertension,Heparin,12033,10433
7,4019,Hypertension NOS,Unspecified essential hypertension,Furosemide,10814,9747
8,4019,Hypertension NOS,Unspecified essential hypertension,D5W,10780,9841
9,4019,Hypertension NOS,Unspecified essential hypertension,Morphine Sulfate,10270,9589
