In [2]:
import duckdb
import os
import tqdm
import pandas as pd

# Store MIMIC III data as Parquet files

In [4]:
#per chatgpt, used for progress bar estimations below
mimic_table_row_counts = { 
    'ADMISSIONS': 58976,
    'CALLOUT': 34499,
    'CAREGIVERS': 7567,
    'CHARTEVENTS': 330712483,
    'CPTEVENTS': 573146,
    'D_CPT': 134,
    'D_ICD_DIAGNOSES': 14710,
    'D_ICD_PROCEDURES': 3898,
    'D_ITEMS': 12487,
    'D_LABITEMS': 753,
    'DATETIMEEVENTS': 4485937,
    'DIAGNOSES_ICD': 651047,
    'DRGCODES': 125557,
    'ICUSTAYS': 61532,
    'INPUTEVENTS_CV': 17527935,
    'INPUTEVENTS_MV': 3618991,
    'LABEVENTS': 27854055,
    'MICROBIOLOGYEVENTS': 631726,
    'NOTEEVENTS': 2083180,
    'OUTPUTEVENTS': 4349218,
    'PATIENTS': 46520,
    'PRESCRIPTIONS': 4157756,
    'PROCEDUREEVENTS_MV': 258066,
    'PROCEDURES_ICD': 240095,
    'SERVICES': 733241,
    'TRANSFERS': 261897
}


In [8]:
# build column list for preprocessing into parquet file
datetime_columns_translation = [
    'ADMITTIME',  # ADMISSIONS
    'DISCHTIME',  # ADMISSIONS
    'DEATHTIME',  # ADMISSIONS
    'EDREGTIME',  # ADMISSIONS
    'EDOUTTIME',  # ADMISSIONS
    'CHARTTIME',  # CHARTEVENTS, DATETIMEEVENTS, LABEVENTS, OUTPUTEVENTS, NOTEEVENTS, MICROBIOLOGYEVENTS
    'STORETIME',  # CHARTEVENTS, DATETIMEEVENTS, OUTPUTEVENTS
    'STARTTIME',  # INPUTEVENTS_MV, PROCEDUREEVENTS_MV
    'ENDTIME',  # INPUTEVENTS_MV, PROCEDUREEVENTS_MV
    'INTIME',  # ICUSTAYS
    'OUTTIME',  # ICUSTAYS
    'DOB',  # PATIENTS 
    'DOD',  # PATIENTS 
    'DOD_HOSP',  # PATIENTS 
    'DOD_SSN',  # PATIENTS 
    'CHARTDATE',  # NOTEEVENTS, MICROBIOLOGYEVENTS
    'STARTDATE',  # PRESCRIPTIONS
    'ENDDATE',  # PRESCRIPTIONS
]

numeric_columns_translation = [
    'VALUENUM',  # Common in multiple tables
    'VALUE',  # CHARTEVENTS, LABEVENTS, DATETIMEEVENTS, etc.
    'AMOUNT',  # INPUTEVENTS_CV, INPUTEVENTS_MV
    'RATE',  # INPUTEVENTS_CV, INPUTEVENTS_MV
    'ORIGINALAMOUNT',  # INPUTEVENTS_CV, INPUTEVENTS_MV
    'ORIGINALRATE',  # INPUTEVENTS_CV, INPUTEVENTS_MV
    'DILUTION_TEXT',  # MICROBIOLOGYEVENTS
    'DILUTION_COMMENTS',  # MICROBIOLOGYEVENTS
    'DOSE_VAL_RX',  # PRESCRIPTIONS
    'FORM_VAL_DISP',  # PRESCRIPTIONS
    'CPT_CD' 
]


In [10]:
# process the extracted files and turn them into parquet files for dask or duckDB
location = "./mimicIII/mimic-iii-clinical-database-1.4"
files_to_process = os.listdir("./mimicIII/mimic-iii-clinical-database-1.4")
#files_to_process = ["/".join([location,file]) for file in files_to_process]

for i, src_file in enumerate(files_to_process[25:]):
    print(f"Processing {src_file} / {1+i} of {len(files_to_process)}")
    if not src_file.endswith(".csv.gz"):
        print(f"\t Skipping since not proper type")
        continue
    if src_file.startswith("CHARTEVENTS"):
        print(f"\t Already loaded CHARTEVENTS")
        continue
    
    base_fn = src_file.replace(".csv.gz", "")

    #print(f"{base_fn} = {mimic_table_row_counts.get(base_fn)}")

    chunksize = 10e6
    reader = pd.read_csv("/".join([location,src_file]), chunksize=chunksize, compression='gzip')  

    
    for i, chunk in enumerate(tqdm.tqdm(reader, total=1+mimic_table_row_counts.get(base_fn)//chunksize, desc="Processing Chunks")):
        # convert numerical values
        num_columns = list(set(numeric_columns_translation).intersection(chunk.columns))
        dt_columns = list(set(datetime_columns_translation).intersection(chunk.columns))
        for c, col in enumerate(num_columns): 
            #print(f"\t\tChanging numeric in {c+1} of {len(num_columns)}")               
            chunk[col] = pd.to_numeric(chunk[col], errors="coerce")
        
        for c, col in enumerate(dt_columns): 
            #print(f"\t\tChanging dt in {c+1} of {len(dt_columns)}")               
            chunk[col] = pd.to_datetime(chunk[col], errors="coerce")
        
        if "GSN" in chunk.columns:
            chunk["GSN"] = chunk["GSN"].fillna("").astype(str)

        # Save each chunk as a separate Parquet file
        chunk.to_parquet(f"parquet/{base_fn}_{i}.parquet", engine="pyarrow", index=False)

Processing PRESCRIPTIONS.csv.gz / 1 of 32


  for obj in iterable:
Processing Chunks: 100%|██████████| 1/1.0 [00:13<00:00, 13.09s/it]


Processing PROCEDUREEVENTS_MV.csv.gz / 2 of 32


Processing Chunks: 100%|██████████| 1/1.0 [00:01<00:00,  1.08s/it]


Processing PROCEDURES_ICD.csv.gz / 3 of 32


Processing Chunks: 100%|██████████| 1/1.0 [00:00<00:00,  8.93it/s]


Processing README.md / 4 of 32
	 Skipping since not proper type
Processing SERVICES.csv.gz / 5 of 32


Processing Chunks: 100%|██████████| 1/1.0 [00:00<00:00, 10.98it/s]


Processing SHA256SUMS.txt / 6 of 32
	 Skipping since not proper type
Processing TRANSFERS.csv.gz / 7 of 32


Processing Chunks: 100%|██████████| 1/1.0 [00:00<00:00,  1.73it/s]


# Set up DuckDB helpers

In [6]:
# change working directory to where parquet files are
os.chdir("./parquet")

In [7]:
# column look ups for convenience when wanting to see what columns are available in what tables
mimic_columns = dict()
for tbl in mimic_table_row_counts.keys():
    df = pd.read_parquet(f"{tbl}_0.parquet")
    mimic_columns[tbl] = df.columns

del df
# reformat index objects to lists 
mimic_columns = {col: list(mimic_columns[col]) for col in mimic_columns}
# Setup DuckDB helper functions

In [15]:
def table_finder(columns):
    if type(columns) is str:
        columns = [columns]
    for column in columns:
        for table in mimic_columns.keys():
            if column in mimic_columns[table]:
                print(f"{column} found in {table}")
    return None

In [18]:
def table_check():
    def decorator(fnc):
        def wrapper(*args, **kwargs):
            for arg in args:
                if arg not in mimic_columns.keys():
                    raise ValueError(f"'{arg}' is not in the list of MIMIC III tables")
            return fnc(*args, **kwargs)
        return wrapper
    return decorator

In [19]:
# decorator to change anything I am passing a query to be updated with the parquet files
def duckify():
    def decorator(fnc):
        def wrapper(*args, **kwargs):
            #print(*args)
            new_args = []
            for arg in args:
                if type(arg)==str:
                # print(arg)
                    found=False
                    for tbl in mimic_columns.keys():
                        if arg.find(tbl)>-1:
                            #print(f"before {arg=}")
                            arg = arg.replace(tbl, f"'{tbl}*.parquet'")
                            #print(f"after {arg=}")
                            found=True
                    if not found:
                        raise ValueError("Found no table name in the query string")
                new_args.append(arg)
                #print(args)
            return fnc(*new_args, **kwargs)
        return wrapper
    return decorator


In [20]:
@duckify()
def run_query(qry, print_qry=False):
    """
    Run query and return dataframe, or simply print resulting query
    """
    if print_qry:
        print(qry)
        return None
    else:
        return duckdb.query(qry).to_df()

# Assignment

## 1.  Who are the most recorded patients in the database? 
Find the patients that have the most records across all databases but also the highest average rank

In [23]:
@table_check()
def recon_query(tbl):
    qry = f""" 
    select 
    '{tbl.capitalize()}' as Table,
    subject_id,
    count(*) Count_,
    row_number() over (partition by null order by Count_ desc) Record_Rank
    from {tbl}
    group by 2
    """
    return qry

In [24]:
run_query(recon_query("ADMISSIONS")).head(20)

Unnamed: 0,Table,SUBJECT_ID,Count_,Record_Rank
0,Admissions,13033,42,1
1,Admissions,11861,34,2
2,Admissions,109,34,3
3,Admissions,5060,31,4
4,Admissions,20643,24,5
5,Admissions,19213,23,6
6,Admissions,7809,22,7
7,Admissions,5727,21,8
8,Admissions,23657,20,9
9,Admissions,11318,19,10


In [None]:
# build the recon query for every table in MIMIC iii and get the statistics
qry = "union all".join([recon_query(table) for table, columns in mimic_columns.items() if 'SUBJECT_ID' in columns])
recon_df = run_query(f"select subject_id, avg(record_rank), min(record_rank), max(record_rank), sum(Count_) from ({qry}) src group by 1 order by 2")


In [27]:
recon_df.head(15)

Unnamed: 0,SUBJECT_ID,avg(record_rank),min(record_rank),max(record_rank),sum(Count_)
0,7666,208.0,21,1963,153790.0
1,27800,334.631579,14,2877,98502.0
2,15046,399.05,36,2046,85878.0
3,7809,520.0,4,2245,61514.0
4,55337,523.052632,12,3542,131359.0
5,73713,528.473684,6,8286,86271.0
6,13599,529.0,22,6028,111196.0
7,46251,563.315789,71,4389,51246.0
8,24900,571.944444,115,1318,88979.0
9,19213,638.888889,5,3953,74122.0


In [34]:
subjects = recon_df.iloc[:3]["SUBJECT_ID"].values
subject_qry_clause = f"subject_id in ({','.join([str(s) for s in subjects])})"

In [35]:
subject_qry_clause

'subject_id in (7666,27800,15046)'

## 2.  What are diseases that they have in common

Write intersection query for diseases 

In [None]:
table_finder("SUBJECT_ID")

SUBJECT_ID found in ADMISSIONS
SUBJECT_ID found in CALLOUT
SUBJECT_ID found in CHARTEVENTS
SUBJECT_ID found in CPTEVENTS
SUBJECT_ID found in DATETIMEEVENTS
SUBJECT_ID found in DIAGNOSES_ICD
SUBJECT_ID found in DRGCODES
SUBJECT_ID found in ICUSTAYS
SUBJECT_ID found in INPUTEVENTS_CV
SUBJECT_ID found in INPUTEVENTS_MV
SUBJECT_ID found in LABEVENTS
SUBJECT_ID found in MICROBIOLOGYEVENTS
SUBJECT_ID found in NOTEEVENTS
SUBJECT_ID found in OUTPUTEVENTS
SUBJECT_ID found in PATIENTS
SUBJECT_ID found in PRESCRIPTIONS
SUBJECT_ID found in PROCEDUREEVENTS_MV
SUBJECT_ID found in PROCEDURES_ICD
SUBJECT_ID found in SERVICES
SUBJECT_ID found in TRANSFERS


In [None]:
run_query("""
          select 
          diag.*
          from DIAGNOSES_ICD subj_diag
          left join D_ICD_DIAGNOSES diag
           on subj_diag.ICD9_CODE = diag.ICD9_CODE
          
          where subj_diag.SUBJECT_ID = 7666
          intersect
          select 
          diag.*
          from DIAGNOSES_ICD subj_diag
          left join D_ICD_DIAGNOSES diag
           on subj_diag.ICD9_CODE = diag.ICD9_CODE
          
          where subj_diag.SUBJECT_ID = 27800
          intersect
          select 
          diag.*
          from DIAGNOSES_ICD subj_diag
          left join D_ICD_DIAGNOSES diag
           on subj_diag.ICD9_CODE = diag.ICD9_CODE
          
          where subj_diag.SUBJECT_ID = 15046
          """)

Unnamed: 0,ROW_ID,ICD9_CODE,SHORT_TITLE,LONG_TITLE
0,660,389,Septicemia NOS,Unspecified septicemia
1,5279,51881,Acute respiratry failure,Acute respiratory failure
2,4304,4019,Hypertension NOS,Unspecified essential hypertension
3,5908,5849,Acute kidney failure NOS,"Acute kidney failure, unspecified"
4,3147,2859,Anemia NOS,"Anemia, unspecified"


## 3. What medicines do they have in common?

Write intersection query for medicines

In [None]:
run_query("select * from PRESCRIPTIONS")

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ICUSTAY_ID,STARTDATE,ENDDATE,DRUG_TYPE,DRUG,DRUG_NAME_POE,DRUG_NAME_GENERIC,FORMULARY_DRUG_CD,GSN,NDC,PROD_STRENGTH,DOSE_VAL_RX,DOSE_UNIT_RX,FORM_VAL_DISP,FORM_UNIT_DISP,ROUTE
0,875962,7666,178499,262757.0,2157-12-15,2157-12-20,MAIN,Sirolimus,Sirolimus,Sirolimus,SIRO1L,043458,8.103006e+06,1mg/1mL Liquid,1.0,mg,1.0,SYR,PO
1,875960,7666,178499,262757.0,2157-12-15,2157-12-20,MAIN,Pravastatin,Pravastatin,Pravastatin,PRAV20,016367,9.372020e+07,20mg Tablet,40.0,mg,2.0,TAB,PO
2,875959,7666,178499,262757.0,2157-12-15,2157-12-20,MAIN,Sulfameth/Trimethoprim SS,Sulfameth/Trimethoprim SS,Sulfameth/Trimethoprim SS,BACTSS,009395,9.300880e+07,1 Tab,1.0,TAB,1.0,TAB,PO
3,875958,7666,178499,262757.0,2157-12-15,2157-12-20,MAIN,Dextrose 50%,Dextrose 50%,Dextrose 50%,DEX50SY,001989,4.094902e+08,50mL Syringe,12.5,gm,0.5,SYR,IV
4,875956,7666,178499,262757.0,2157-12-15,2157-12-20,MAIN,Docusate Sodium,Docusate Sodium,Docusate Sodium,DOCU100,003009,6.373901e+10,100mg Capsule,100.0,mg,1.0,CAP,PO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,878196,7666,138444,237726.0,2158-06-15,2158-06-15,MAIN,Acetaminophen,Acetaminophen,Acetaminophen,ACET325,004489,1.828448e+08,325mg Tablet,325.0,mg,1.0,TAB,PO
1021,878194,7666,138444,237726.0,2158-06-15,2158-06-15,MAIN,LaMIVudine,LaMIVudine,LaMIVudine,LAMI100,041033,1.730662e+08,100mg Tab,50.0,mg,0.5,TAB,PO
1022,878219,7666,138444,237726.0,2158-06-15,2158-06-16,BASE,Iso-Osmotic Dextrose,,,FRBD100,,0.000000e+00,100ml Bag,100.0,ml,100.0,ml,IV
1023,878227,7666,138444,237726.0,2158-06-15,2158-06-16,MAIN,Piperacillin-Tazobactam Na,,,ZOSY4FPB,021187,2.068855e+08,4.5g Frozen Bag,4.5,gm,1.0,BAG,IV


In [48]:
run_query("""
          select 
          drug
          from PRESCRIPTIONS 
          where SUBJECT_ID = 7666
          intersect
          select 
          drug
          from PRESCRIPTIONS 
          where SUBJECT_ID =  27800
          intersect
          select 
          drug
          from PRESCRIPTIONS 
          where SUBJECT_ID =  15046
          """)

Unnamed: 0,DRUG
0,LR
1,Heparin
2,Pantoprazole
3,Lorazepam
4,Fentanyl Citrate
5,Midazolam
6,MetRONIDAZOLE (FLagyl)
7,Levofloxacin
8,D5W
9,Furosemide


## 4.  What types of ICU initial admissions have they had?

Have subject ID for columns and number of ICU stays by ICU type as columns

In [53]:
run_query("""select * from TRANSFERS limit 10""")

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ICUSTAY_ID,DBSOURCE,EVENTTYPE,PREV_CAREUNIT,CURR_CAREUNIT,PREV_WARDID,CURR_WARDID,INTIME,OUTTIME,LOS
0,657,111,192123,254245.0,carevue,transfer,CCU,MICU,7.0,23.0,2142-04-29 15:27:11,2142-05-04 20:38:33,125.19
1,658,111,192123,,carevue,transfer,MICU,,23.0,45.0,2142-05-04 20:38:33,2142-05-05 11:46:32,15.13
2,659,111,192123,,carevue,discharge,,,45.0,,2142-05-05 11:46:32,NaT,
3,660,111,155897,249202.0,metavision,admit,,MICU,,52.0,2144-07-01 04:13:59,2144-07-01 05:19:39,1.09
4,661,111,155897,,metavision,transfer,MICU,,52.0,32.0,2144-07-01 05:19:39,2144-07-01 06:28:29,1.15
5,662,111,155897,249202.0,metavision,transfer,,MICU,32.0,52.0,2144-07-01 06:28:29,2144-07-01 08:07:16,1.65
6,663,111,155897,,metavision,transfer,MICU,,52.0,32.0,2144-07-01 08:07:16,2144-07-01 08:13:51,0.11
7,664,111,155897,249202.0,metavision,transfer,,MICU,32.0,23.0,2144-07-01 08:13:51,2144-07-01 17:56:31,9.71
8,665,111,155897,,metavision,discharge,MICU,,23.0,,2144-07-01 17:56:31,NaT,
9,666,112,174105,289222.0,carevue,admit,,MICU,,12.0,2194-06-13 18:41:27,2194-06-14 14:51:17,20.16


In [96]:
run_query("""
          select 
          coalesce(CURR_CAREUNIT, cast(CURR_WARDID as varchar(3))) as Ward,
          sum(case when subject_id = 7666 then 1 else 0 end) as Subject_7666_Stays_in_Ward,
          sum(case when subject_id = 27800 then 1 else 0 end) as Subject_27800_Stays_in_Ward,
          sum(case when subject_id = 15046 then 1 else 0 end) as Subject_15046_Stays_in_Ward,
          count(distinct Case when subject_id = 7666 then hadm_id end) as Subject_7666_Stays_by_Admission,
          count(distinct Case when subject_id = 27800 then hadm_id end) as Subject_27800_Stays_by_Admission,
          count(distinct Case when subject_id = 15046 then hadm_id end) as Subject_15046_Stays_by_Admission,
          subject_7666_stays_in_ward + subject_27800_stays_in_ward + subject_15046_stays_in_ward as Total_Stays_In_Ward,
          subject_7666_stays_by_admission + subject_27800_stays_by_admission + subject_15046_stays_by_admission as Total_Admissions_In_Ward
          from TRANSFERS 
          where WARD is not null
          group by 1 
          order by case when regexp_matches(WARD, '^[A-Z]+$') then 1 else 0 end desc, Total_Stays_In_Ward desc
          limit 20
          """)

Unnamed: 0,Ward,Subject_7666_Stays_in_Ward,Subject_27800_Stays_in_Ward,Subject_15046_Stays_in_Ward,Subject_7666_Stays_by_Admission,Subject_27800_Stays_by_Admission,Subject_15046_Stays_by_Admission,Total_Stays_In_Ward,Total_Admissions_In_Ward
0,MICU,7.0,16.0,12.0,6,13,9,35.0,28
1,CCU,1.0,1.0,0.0,1,1,0,2.0,2
2,CSRU,1.0,0.0,0.0,1,0,0,1.0,1
3,NICU,0.0,0.0,0.0,0,0,0,0.0,0
4,TSICU,0.0,0.0,0.0,0,0,0,0.0,0
5,NWARD,0.0,0.0,0.0,0,0,0,0.0,0
6,SICU,0.0,0.0,0.0,0,0,0,0.0,0
7,18.0,0.0,5.0,3.0,0,3,3,8.0,6
8,45.0,5.0,1.0,0.0,4,1,0,6.0,5
9,55.0,1.0,5.0,0.0,1,4,0,6.0,5


## 5.  In their stays in ICU, what kind of chart events have they had?

In [83]:
run_query("""
          select
          *
          from CHARTEVENTS
          where subject_id = 7666
          limit 10
          """)

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ICUSTAY_ID,ITEMID,CHARTTIME,STORETIME,CGID,VALUE,VALUENUM,VALUEUOM,WARNING,ERROR,RESULTSTATUS,STOPPED
0,646224,7666,119064,239385.0,225677,2159-05-26 10:14:00,2159-05-26 12:07:00,20889.0,2.9,2.9,mg/dL,0,0,,
1,646225,7666,119064,239385.0,227073,2159-05-26 10:14:00,2159-05-26 12:07:00,20889.0,16.0,16.0,mEq/L,0,0,,
2,646226,7666,119064,239385.0,227442,2159-05-26 10:14:00,2159-05-26 12:07:00,20889.0,4.5,4.5,mEq/L,0,0,,
3,646227,7666,119064,239385.0,227443,2159-05-26 10:14:00,2159-05-26 12:07:00,20889.0,19.0,19.0,mEq/L,1,0,,
4,646228,7666,119064,239385.0,227456,2159-05-26 10:14:00,2159-05-26 20:46:00,20889.0,3.4,3.4,g/dL,0,0,,
5,646229,7666,119064,239385.0,227457,2159-05-26 10:14:00,2159-05-26 11:15:00,20889.0,68.0,68.0,K/uL,1,0,,
6,646230,7666,119064,239385.0,227465,2159-05-26 10:14:00,2159-05-26 12:07:00,20889.0,20.8,20.8,sec,1,0,,
7,646231,7666,119064,239385.0,227466,2159-05-26 10:14:00,2159-05-26 12:07:00,20889.0,34.4,34.4,sec,0,0,,
8,646232,7666,119064,239385.0,227467,2159-05-26 10:14:00,2159-05-26 12:07:00,20889.0,2.0,2.0,,1,0,,
9,646233,7666,119064,239385.0,225668,2159-05-26 11:57:00,2159-05-26 11:58:00,20889.0,1.8,1.8,mmol/L,0,0,,


In [84]:
subjects

array([ 7666, 27800, 15046], dtype=int64)

In [115]:
run_query(f"""
          select 
          --cv.ITEMID,
          items.LABEL,
          count(distinct cv.itemid) as Item_Code_Count,
          count(distinct case when subject_id = 7666 then charttime else null end) Chart_Event_7666,  
          count(distinct case when subject_id = 27800 then charttime else null end) Chart_Event_27800, 
          count(distinct case when subject_id = 15046 then charttime else null end) Chart_Event_15046
          from CHARTEVENTS cv
          inner join D_ITEMS items
           on cv.ITEMID = items.ITEMID
          where {subject_qry_clause} 
          and cv.value is not null
          group by 1
          having least(Chart_Event_7666, Chart_Event_27800, Chart_Event_15046) > 0
          order by chart_event_7666 + chart_event_27800 + chart_event_15046 desc
          limit 20
          """)

Unnamed: 0,LABEL,Item_Code_Count,Chart_Event_7666,Chart_Event_27800,Chart_Event_15046
0,Heart Rate,2,2146,1838,1243
1,Respiratory Rate,2,2131,1838,1243
2,Mean Airway Pressure,2,207,323,225
3,Peak Insp. Pressure,2,123,305,165
4,Arterial Base Excess,2,364,148,58
5,BUN,2,276,92,72
6,Creatinine,2,276,92,72
7,Minute Volume,2,1,336,88
8,Magnesium,2,249,80,66
9,Phosphorous,2,251,77,64


## 6.  Let's compare their vitals

In [110]:
mimic_columns["CHARTEVENTS"]

['ROW_ID',
 'SUBJECT_ID',
 'HADM_ID',
 'ICUSTAY_ID',
 'ITEMID',
 'CHARTTIME',
 'STORETIME',
 'CGID',
 'VALUE',
 'VALUENUM',
 'VALUEUOM',
 'ERROR',
 'RESULTSTATUS',
 'STOPPED']

In [121]:
run_query(
f"""
select 
coalesce(s7666.admission_number, s27800.admission_number, s15046.admission_number) as Admission_Number_for_Patient,
s7666.Avg_Heart_Rate as Avg_Heart_Rate_7666,
s27800.Avg_Heart_Rate as Avg_Heart_Rate_27800,
s15046.Avg_Heart_Rate as Avg_Heart_Rate_15046,
s7666.Avg_Respiratory_Rate as Avg_Respiratory_Rate_7666,
s27800.Avg_Respiratory_Rate as Avg_Respiratory_Rate_27800,
s15046.Avg_Respiratory_Rate as Avg_Respiratory_Rate_15046,
s7666.Avg_Mean_Airway_Pressure as Avg_Mean_Airway_Pressure_7666,
s27800.Avg_Mean_Airway_Pressure as Avg_Mean_Airway_Pressure_27800,
s15046.Avg_Mean_Airway_Pressure as Avg_Mean_Airway_Pressure_15046,
s7666.Avg_Peak_Insp_Pressure as Avg_Peak_Insp_Pressure_7666,
s27800.Avg_Peak_Insp_Pressure as Avg_Peak_Insp_Pressure_27800,
s15046.Avg_Peak_Insp_Pressure as Avg_Peak_Insp_Pressure_15046,
s7666.Avg_Arterial_Base_Excess as Avg_Arterial_Base_Excess_7666,
s27800.Avg_Arterial_Base_Excess as Avg_Arterial_Base_Excess_27800,
s15046.Avg_Arterial_Base_Excess as Avg_Arterial_Base_Excess_15046
from (
  select 
  subject_id,
  hadm_id,
  min(charttime) as First_Chart_Event,
  avg(case when label = 'Heart Rate' then valuenum else null end) as Avg_Heart_Rate,
  avg(case when label = 'Respiratory Rate' then valuenum else null end) as Avg_Respiratory_Rate,
  avg(case when label = 'Mean Airway Pressure' then valuenum else null end) as Avg_Mean_Airway_Pressure,
  avg(case when label = 'Peak Insp. Pressure' then valuenum else null end) as Avg_Peak_Insp_Pressure,
  avg(case when label = 'Arterial Base Excess' then valuenum else null end) as Avg_Arterial_Base_Excess,
  Row_Number() over (partition by subject_id order by min(charttime)) as Admission_Number
  from CHARTEVENTS cv
  inner join D_ITEMS items
    on cv.ITEMID = items.ITEMID
  where items.LABEL in ('Heart Rate', 'Respiratory Rate', 'Mean Airway Pressure', 'Peak Insp. Pressure', 'Arterial Base Excess')
  and subject_id = 7666
  group by 1,2
) s7666
full outer join (
  select 
  subject_id,
  hadm_id,
  min(charttime) as First_Chart_Event,
  avg(case when label = 'Heart Rate' then valuenum else null end) as Avg_Heart_Rate,
  avg(case when label = 'Respiratory Rate' then valuenum else null end) as Avg_Respiratory_Rate,
  avg(case when label = 'Mean Airway Pressure' then valuenum else null end) as Avg_Mean_Airway_Pressure,
  avg(case when label = 'Peak Insp. Pressure' then valuenum else null end) as Avg_Peak_Insp_Pressure,
  avg(case when label = 'Arterial Base Excess' then valuenum else null end) as Avg_Arterial_Base_Excess,
  Row_Number() over (partition by subject_id order by min(charttime)) as Admission_Number
  from CHARTEVENTS cv
  inner join D_ITEMS items
    on cv.ITEMID = items.ITEMID
  where items.LABEL in ('Heart Rate', 'Respiratory Rate', 'Mean Airway Pressure', 'Peak Insp. Pressure', 'Arterial Base Excess')
  and subject_id = 27800
  group by 1,2
) s27800
 on s7666.admission_number = s27800.admission_number
full outer join (
  select 
  subject_id,
  hadm_id,
  min(charttime) as First_Chart_Event,
  avg(case when label = 'Heart Rate' then valuenum else null end) as Avg_Heart_Rate,
  avg(case when label = 'Respiratory Rate' then valuenum else null end) as Avg_Respiratory_Rate,
  avg(case when label = 'Mean Airway Pressure' then valuenum else null end) as Avg_Mean_Airway_Pressure,
  avg(case when label = 'Peak Insp. Pressure' then valuenum else null end) as Avg_Peak_Insp_Pressure,
  avg(case when label = 'Arterial Base Excess' then valuenum else null end) as Avg_Arterial_Base_Excess,
  Row_Number() over (partition by subject_id order by min(charttime)) as Admission_Number
  from CHARTEVENTS cv
  inner join D_ITEMS items
    on cv.ITEMID = items.ITEMID
  where items.LABEL in ('Heart Rate', 'Respiratory Rate', 'Mean Airway Pressure', 'Peak Insp. Pressure', 'Arterial Base Excess')
  and subject_id = 15046
  group by 1,2
) s15046
 on s27800.admission_number = s15046.admission_number
order by 1
""")

Unnamed: 0,Admission_Number_for_Patient,Avg_Heart_Rate_7666,Avg_Heart_Rate_27800,Avg_Heart_Rate_15046,Avg_Respiratory_Rate_7666,Avg_Respiratory_Rate_27800,Avg_Respiratory_Rate_15046,Avg_Mean_Airway_Pressure_7666,Avg_Mean_Airway_Pressure_27800,Avg_Mean_Airway_Pressure_15046,Avg_Peak_Insp_Pressure_7666,Avg_Peak_Insp_Pressure_27800,Avg_Peak_Insp_Pressure_15046,Avg_Arterial_Base_Excess_7666,Avg_Arterial_Base_Excess_27800,Avg_Arterial_Base_Excess_15046
0,1,83.0,69.770115,64.293907,19.324324,22.988506,17.946237,,8.363636,10.081967,,24.125,24.96,1.0,10.625,0.277778
1,2,82.826829,72.25,67.174699,21.645161,28.178571,18.51506,13.246154,,10.025714,26.0375,,21.391373,1.047619,,-0.862069
2,3,78.384615,78.208333,77.544304,22.269231,32.166667,21.772152,,7.5,9.5,,,23.166667,,12.0,-1.666667
3,4,80.714286,69.7,78.25,17.888889,27.7,17.972973,16.3375,,,28.5375,,,-1.0,3.0,1.0
4,5,83.652174,60.769231,83.931818,20.028986,18.381526,20.022727,8.5,9.769231,,22.5,26.849057,,-0.583333,1.913043,
5,6,73.969231,61.751701,56.627907,19.830769,21.387755,16.883721,,12.807692,8.307692,,31.413043,18.166667,,6.478261,-0.5
6,7,74.493631,75.333333,72.017544,24.585987,29.666667,18.175439,12.583333,,,18.25,,,-4.269231,7.0,
7,8,83.722266,57.771552,64.337979,22.94164,21.508621,17.325175,11.310811,10.018519,6.435484,20.886598,22.265306,12.542373,2.208633,8.272727,3.4
8,9,,72.984456,,,26.984456,,,8.777778,,,25.0,,,6.285714,
9,10,,60.75,,,19.861386,,,9.432432,,,23.583333,,,11.666667,


## 7. How many other people have the same diseases? 

## 8. How many other people use the same meds? 

## 9. How many ICU visits do patients have who had the same use of meds and with same diseases?

## 10.  What are the rarest diagnoses and meds7?