# Additional Analysis

In [1]:
"""
Author: Katy Mombourquette, Denis Ouellette
Date: Dec 15, 2023

Dependencies:
Pandas
Mlxtend

Install instructions
1. Install Pandas: !pip install pandas
2. Install mlxtend: !pip install mlxtend

"""

'\nAuthor: Katy Mombourquette, Denis Ouellette\nDate: Dec 15, 2023\n\nDependencies:\nPandas\nMlxtend\n\nInstall instructions\n1. Install Pandas: !pip install pandas\n2. Install mlxtend: !pip install mlxtend\n\n'

# Setup

In [2]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

data = pd.read_pickle("original_dataset_clean.pkl")
data = data[1:]
data.head()

Unnamed: 0.1,Unnamed: 0,patient_id,dep_name,esi,age,gender,ethnicity,race,lang,religion,...,cc_vaginaldischarge,cc_vaginalpain,cc_weakness,cc_wheezing,cc_withdrawal-alcohol,cc_woundcheck,cc_woundinfection,cc_woundre-evaluation,cc_wristinjury,cc_wristpain
1,1,2,B,4.0,66.0,Male,Hispanic or Latino,Native Hawaiian or Other Pacific Islander,English,Pentecostal,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,3,B,2.0,66.0,Male,Hispanic or Latino,Native Hawaiian or Other Pacific Islander,English,Pentecostal,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,4,A,2.0,66.0,Male,Hispanic or Latino,Native Hawaiian or Other Pacific Islander,English,Pentecostal,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,5,A,3.0,84.0,Female,Hispanic or Latino,Other,Other,Pentecostal,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,5,6,A,3.0,86.0,Female,Hispanic or Latino,Other,Other,Pentecostal,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [3]:
# list different column types

demographic_cols = ["patient_id", "disposition", "age", "gender", "lang", "ethnicity", "race", 
                    "employstatus", "insurance_status", "maritalstatus", "religion"]
triage_eval_cols = ["patient_id","dep_name", 'arrivalmode','arrivalmonth','arrivalday', 
                    'arrivalhour_bin', 'esi', 'triage_vital_hr', 'triage_vital_sbp', 
                    'triage_vital_dbp','triage_vital_rr','triage_vital_o2',
                    'triage_vital_o2_device','triage_vital_temp']
chief_complaint_cols = ["patient_id", "disposition",
    'cc_abdominalcramping', 'cc_abdominaldistention', 'cc_abdominalpain', 'cc_abdominalpainpregnant',
    'cc_abnormallab', 'cc_abscess', 'cc_addictionproblem', 'cc_agitation',
    'cc_alcoholintoxication', 'cc_alcoholproblem', 'cc_allergicreaction', 'cc_alteredmentalstatus',
    'cc_animalbite', 'cc_ankleinjury', 'cc_anklepain', 'cc_anxiety',
    'cc_arminjury', 'cc_armpain', 'cc_armswelling', 'cc_assaultvictim',
    'cc_asthma', 'cc_backpain', 'cc_bleeding/bruising', 'cc_blurredvision',
    'cc_bodyfluidexposure', 'cc_breastpain', 'cc_breathingdifficulty', 'cc_breathingproblem',
    'cc_burn', 'cc_cardiacarrest', 'cc_cellulitis', 'cc_chestpain',
    'cc_chesttightness', 'cc_chills', 'cc_coldlikesymptoms', 'cc_confusion',
    'cc_conjunctivitis', 'cc_constipation', 'cc_cough', 'cc_cyst',
    'cc_decreasedbloodsugar-symptomatic', 'cc_dehydration', 'cc_dentalpain', 'cc_depression',
    'cc_detoxevaluation', 'cc_diarrhea', 'cc_dizziness', 'cc_drug/alcoholassessment',
    'cc_drugproblem', 'cc_dyspnea', 'cc_dysuria', 'cc_earpain',
    'cc_earproblem', 'cc_edema', 'cc_elbowpain', 'cc_elevatedbloodsugar-nosymptoms',
    'cc_elevatedbloodsugar-symptomatic', 'cc_emesis', 'cc_epigastricpain', 'cc_epistaxis',
    'cc_exposuretostd', 'cc_extremitylaceration', 'cc_extremityweakness', 'cc_eyeinjury',
    'cc_eyepain', 'cc_eyeproblem', 'cc_eyeredness', 'cc_facialinjury',
    'cc_faciallaceration', 'cc_facialpain', 'cc_facialswelling', 'cc_fall',
    'cc_fall>65', 'cc_fatigue', 'cc_femaleguproblem', 'cc_fever',
    'cc_fever-75yearsorolder', 'cc_fever-9weeksto74years', 'cc_feverimmunocompromised', 'cc_fingerinjury',
    'cc_fingerpain', 'cc_fingerswelling', 'cc_flankpain', 'cc_follow-upcellulitis',
    'cc_footinjury', 'cc_footpain', 'cc_footswelling', 'cc_foreignbodyineye',
    'cc_fulltrauma', 'cc_generalizedbodyaches', 'cc_gibleeding', 'cc_giproblem',
    'cc_groinpain', 'cc_hallucinations', 'cc_handinjury', 'cc_handpain',
    'cc_headache', 'cc_headache-newonsetornewsymptoms', 'cc_headache-recurrentorknowndxmigraines', 'cc_headachere-evaluation',
    'cc_headinjury', 'cc_headlaceration', 'cc_hematuria', 'cc_hemoptysis',
    'cc_hippain', 'cc_homicidal', 'cc_hyperglycemia', 'cc_hypertension',
    'cc_hypotension', 'cc_influenza', 'cc_ingestion', 'cc_insectbite',
    'cc_irregularheartbeat', 'cc_jawpain', 'cc_jointswelling', 'cc_kneeinjury',
    'cc_kneepain', 'cc_laceration', 'cc_leginjury', 'cc_legpain',
    'cc_legswelling', 'cc_lethargy', 'cc_lossofconsciousness', 'cc_maleguproblem',
    'cc_mass', 'cc_medicalproblem', 'cc_medicalscreening', 'cc_medicationproblem',
    'cc_medicationrefill', 'cc_migraine', 'cc_modifiedtrauma', 'cc_motorcyclecrash',
    'cc_motorvehiclecrash', 'cc_multiplefalls', 'cc_nasalcongestion', 'cc_nausea',
    'cc_nearsyncope', 'cc_neckpain', 'cc_neurologicproblem', 'cc_numbness',
    'cc_oralswelling', 'cc_otalgia', 'cc_other', 'cc_overdose-accidental',
    'cc_overdose-intentional', 'cc_pain', 'cc_palpitations', 'cc_panicattack',
    'cc_pelvicpain', 'cc_poisoning', 'cc_post-opproblem', 'cc_psychiatricevaluation',
    'cc_psychoticsymptoms', 'cc_rapidheartrate', 'cc_rash', 'cc_rectalbleeding',
    'cc_rectalpain', 'cc_respiratorydistress', 'cc_ribinjury', 'cc_ribpain',
    'cc_seizure-newonset', 'cc_seizure-priorhxof', 'cc_seizures', 'cc_shortnessofbreath',
    'cc_shoulderinjury', 'cc_shoulderpain', 'cc_sicklecellpain', 'cc_sinusproblem',
    'cc_skinirritation', 'cc_skinproblem', 'cc_sorethroat', 'cc_stdcheck',
    'cc_strokealert', 'cc_suicidal', 'cc_suture/stapleremoval', 'cc_swallowedforeignbody',
    'cc_syncope', 'cc_tachycardia', 'cc_testiclepain', 'cc_thumbinjury',
    'cc_tickremoval', 'cc_toeinjury', 'cc_toepain', 'cc_trauma',
    'cc_unresponsive', 'cc_uri', 'cc_urinaryfrequency', 'cc_urinaryretention',
    'cc_urinarytractinfection', 'cc_vaginalbleeding', 'cc_vaginaldischarge', 'cc_vaginalpain',
    'cc_weakness', 'cc_wheezing', 'cc_withdrawal-alcohol', 'cc_woundcheck',
    'cc_woundinfection', 'cc_woundre-evaluation', 'cc_wristinjury', 'cc_wristpain']
hosp_use_cols = ["patient_id", "disposition", 'n_edvisits', 'n_admissions', 'n_surgeries', 'previousdispo']
med_hist_cols = ["patient_id", "disposition", '2ndarymalig', 'abdomhernia', 'abdomnlpain', 'abortcompl',
    'acqfootdef', 'acrenlfail', 'acutecvd', 'acutemi',
    'acutphanm', 'adjustmentdisorders', 'adltrespfl', 'alcoholrelateddisorders',
    'allergy', 'amniosdx', 'analrectal', 'anemia',
    'aneurysm', 'anxietydisorders', 'appendicitis', 'artembolism',
    'asppneumon', 'asthma', 'attentiondeficitconductdisruptivebeha', 'backproblem',
    'biliarydx', 'birthasphyx', 'birthtrauma', 'bladdercncr',
    'blindness', 'bnignutneo', 'bonectcncr', 'bph',
    'brainnscan', 'breastcancr', 'breastdx', 'brnchlngca',
    'bronchitis', 'burns', 'cardiaarrst', 'cardiacanom',
    'carditis', 'cataract', 'cervixcancr', 'chestpain',
    'chfnonhp', 'chrkidneydisease', 'coaghemrdx', 'coloncancer',
    'comabrndmg', 'complicdevi', 'complicproc', 'conduction',
    'contraceptiv', 'copd', 'coronathero', 'crushinjury',
    'cysticfibro', 'deliriumdementiaamnesticothercognitiv', 'developmentaldisorders', 'diabmelnoc',
    'diabmelwcm', 'disordersusuallydiagnosedininfancych', 'diverticulos', 'dizziness',
    'dminpreg', 'dysrhythmia', 'earlylabor', 'ecodesadverseeffectsofmedicalcare',
    'ecodesadverseeffectsofmedicaldrugs', 'ecodescutpierce', 'ecodesdrowningsubmersion', 'ecodesfall',
    'ecodesfirearm', 'ecodesfireburn', 'ecodesmachinery', 'ecodesmotorvehicletrafficmvt',
    'ecodesnaturalenvironment', 'ecodesotherspecifiedandclassifiable', 'ecodesotherspecifiednec', 'ecodespedalcyclistnotmvt',
    'ecodesplaceofoccurrence', 'ecodespoisoning', 'ecodessuffocation', 'ecodestransportnotmvt',
    'ecodesunspecified', 'ectopicpreg', 'encephalitis', 'endometrios',
    'epilepsycnv', 'esophcancer', 'esophgealdx', 'exameval',
    'eyeinfectn', 'fatigue', 'femgenitca', 'feminfertil',
    'fetaldistrs', 'fluidelcdx', 'fuo', 'fxarm',
    'fxhip', 'fxleg', 'fxskullfac', 'gangrene',
    'gasduoulcer', 'gastritis', 'gastroent', 'giconganom',
    'gihemorrhag', 'giperitcan', 'glaucoma', 'goutotcrys',
    'guconganom', 'hdnckcancr', 'headachemig', 'hemmorhoids',
    'hemorrpreg', 'hepatitis', 'hivinfectn', 'hodgkinsds',
    'hrtvalvedx', 'htn', 'htncomplicn', 'htninpreg',
    'hyperlipidem', 'immunitydx', 'immunizscrn', 'impulsecontroldisordersnec',
    'inducabortn', 'infectarth', 'influenza', 'infmalegen',
    'intestinfct', 'intobstruct', 'intracrninj', 'jointinjury',
    'kidnyrnlca', 'lateeffcvd', 'leukemias', 'liveborn',
    'liveribdca', 'longpregncy', 'lowbirthwt', 'lungexternl',
    'lymphenlarg', 'maintchemr', 'malgenitca', 'maligneopls',
    'malposition', 'meningitis', 'menopausldx', 'menstrualdx',
    'miscellaneousmentalhealthdisorders', 'mooddisorders', 'mouthdx', 'ms',
    'multmyeloma', 'mycoses', 'nauseavomit', 'neoplsmunsp',
    'nephritis', 'nervcongan', 'nonepithca', 'nonhodglym',
    'nutritdefic', 'obrelatedperintrauma', 'opnwndextr', 'opnwndhead',
    'osteoarthros', 'osteoporosis', 'otacqdefor', 'otaftercare',
    'otbnignneo', 'otbonedx', 'otcirculdx', 'otcomplbir',
    'otconganom', 'otconntiss', 'otdxbladdr', 'otdxkidney',
    'otdxstomch', 'otendodsor', 'otfemalgen', 'othbactinf',
    'othcnsinfx', 'othematldx', 'othercvd', 'othereardx',
    'otheredcns', 'othereyedx', 'othergidx', 'othergudx',
    'otherinjury', 'otherpregnancyanddeliveryincludingnormal', 'otherscreen', 'othfracture',
    'othheartdx', 'othinfectns', 'othliverdx', 'othlowresp',
    'othmalegen', 'othnervdx', 'othskindx', 'othveindx',
    'otinflskin', 'otitismedia', 'otjointdx', 'otnutritdx',
    'otperintdx', 'otpregcomp', 'otprimryca', 'otrespirca',
    'otupprresp', 'otuprspin', 'ovariancyst', 'ovarycancer',
    'pancreascan', 'pancreasdx', 'paralysis', 'parkinsons',
    'pathologfx', 'pelvicobstr', 'perintjaund', 'peripathero',
    'peritonitis', 'personalitydisorders', 'phlebitis', 'pid',
    'pleurisy', 'pneumonia', 'poisnnonmed', 'poisnotmed',
    'poisonpsych', 'precereoccl', 'prevcsectn', 'prolapse',
    'prostatecan', 'pulmhartdx', 'rctmanusca', 'rehab',
    'respdistres', 'retinaldx', 'rheumarth', 'schizophreniaandotherpsychoticdisorde',
    'screeningandhistoryofmentalhealthan', 'septicemia', 'septicemiaexceptinlabor', 'sexualinfxs',
    'shock', 'sicklecell', 'skininfectn', 'skinmelanom',
    'sle', 'socialadmin', 'spincorinj', 'spontabortn',
    'sprain', 'stomchcancr', 'substancerelateddisorders', 'suicideandintentionalselfinflictedin',
    'superficinj', 'syncope', 'teethdx', 'testiscancr',
    'thyroidcncr', 'thyroiddsor', 'tia', 'tonsillitis',
    'tuberculosis', 'ulceratcol', 'ulcerskin', 'umbilcord',
    'unclassified', 'urinstone', 'urinyorgca', 'uteruscancr',
    'uti', 'varicosevn', 'viralinfect', 'whtblooddx']
meds_cols = ["patient_id", "disposition", 'meds_analgesicandantihistaminecombination',
 'meds_analgesics','meds_anesthetics','meds_anti-obesitydrugs','meds_antiallergy','meds_antiarthritics',
 'meds_antiasthmatics','meds_antibiotics','meds_anticoagulants','meds_antidotes','meds_antifungals',
 'meds_antihistamineanddecongestantcombination','meds_antihistamines','meds_antihyperglycemics',
 'meds_antiinfectives','meds_antiinfectives/miscellaneous','meds_antineoplastics','meds_antiparkinsondrugs',
 'meds_antiplateletdrugs','meds_antivirals','meds_autonomicdrugs','meds_biologicals','meds_blood',
 'meds_cardiacdrugs','meds_cardiovascular','meds_cnsdrugs','meds_colonystimulatingfactors','meds_contraceptives',
 'meds_cough/coldpreparations','meds_diagnostic','meds_diuretics','meds_eentpreps','meds_elect/caloric/h2o',
 'meds_gastrointestinal','meds_herbals','meds_hormones','meds_immunosuppressants','meds_investigational',
 'meds_miscellaneousmedicalsupplies,devices,non-drug','meds_musclerelaxants','meds_pre-natalvitamins',
 'meds_psychotherapeuticdrugs','meds_sedative/hypnotics','meds_skinpreps','meds_smokingdeterrents',
 'meds_thyroidpreps','meds_unclassifieddrugproducts','meds_vitamins']
hist_vitals_cols = ["patient_id", "disposition", 'pulse_last','resp_last','spo2_last','temp_last','sbp_last','dbp_last','o2_device_last',
                    'pulse_min','resp_min','spo2_min','temp_min','sbp_min','dbp_min','o2_device_min','pulse_max',
                    'resp_max','spo2_max','temp_max','sbp_max','dbp_max','o2_device_max','pulse_median','resp_median',
                    'spo2_median','temp_median','sbp_median','dbp_median','o2_device_median']
hist_labs_cols = ["patient_id", "disposition", 
    'absolutelymphocytecount_last', 'acetonebld_last', 'alanineaminotransferase(alt)_last', 'albumin_last',
    'alkphos_last', 'anc(absneutrophilcount)_last', 'aniongap_last', 'aspartateaminotransferase(ast)_last',
    'b-typenatriureticpeptide,pro(probnp)_last', 'baseexcess(poc)_last', 'baseexcess,venous(poc)_last', 'basos_last',
    'basosabs_last', 'benzodiazepinesscreen,urine,noconf._last', 'bilirubindirect_last', 'bilirubintotal_last',
    'bun_last', 'bun/creatratio_last', 'calcium_last', 'calculatedco2(poc)_last', 'calculatedhco3(poc)i_last',
    'calculatedo2saturation(poc)_last', 'chloride_last', 'cktotal_last', 'co2_last', 'co2calculated,venous(poc)_last',
    'co2,poc_last', 'creatinine_last', 'd-dimer_last', 'egfr_last', 'egfr(nonafricanamerican)_last', 'egfr(aframer)_last',
    'eos_last', 'eosinoabs_last', 'epithelialcells_last', 'globulin_last', 'glucose_last', 'glucose,meter_last',
    'hco3calculated,venous(poc)_last', 'hematocrit_last', 'hemoglobin_last', 'immaturegrans(abs)_last',
    'immaturegranulocytes_last', 'inr_last', 'lactate,poc_last', 'lipase_last', 'lymphs_last', 'magnesium_last',
    'mch_last', 'mchc_last', 'mcv_last', 'monocytes_last', 'monosabs_last', 'mpv_last', 'neutrophils_last',
    'nrbc_last', 'nrbcabsolute_last', 'o2satcalculated,venous(poc)_last', 'pco2(poc)_last', 'pco2,venous(poc)_last',
    'ph,venous(poc)_last', 'phencyclidine(pcp)screen,urine,noconf._last', 'phosphorus_last', 'platelets_last',
    'po2(poc)_last', 'po2,venous(poc)_last', 'pocbun_last', 'poccreatinine_last', 'pocglucose_last',
    'pochematocrit_last', 'pocionizedcalcium_last', 'pocph_last', 'pocpotassium_last', 'pocsodium_last',
    'poctroponini._last', 'potassium_last', 'proteintotal_last', 'prothrombintime_last', 'ptt_last', 'rbc_last',
    'rbc/hpf_last', 'rdw_last', 'sodium_last', 'troponini(poc)_last', 'troponint_last', 'tsh_last', 'wbc_last',
    'wbc/hpf_last', 'absolutelymphocytecount_min', 'acetonebld_min', 'alanineaminotransferase(alt)_min', 'albumin_min',
    'alkphos_min', 'anc(absneutrophilcount)_min', 'aniongap_min', 'aspartateaminotransferase(ast)_min',
    'b-typenatriureticpeptide,pro(probnp)_min', 'baseexcess(poc)_min', 'baseexcess,venous(poc)_min', 'basos_min',
    'basosabs_min', 'benzodiazepinesscreen,urine,noconf._min', 'bilirubindirect_min', 'bilirubintotal_min',
    'bun_min', 'bun/creatratio_min', 'calcium_min', 'calculatedco2(poc)_min', 'calculatedhco3(poc)i_min',
    'calculatedo2saturation(poc)_min', 'chloride_min', 'cktotal_min', 'co2_min', 'co2calculated,venous(poc)_min',
    'co2,poc_min', 'creatinine_min', 'd-dimer_min', 'egfr_min', 'egfr(nonafricanamerican)_min', 'egfr(aframer)_min',
    'eos_min', 'eosinoabs_min', 'epithelialcells_min', 'globulin_min', 'glucose_min', 'glucose,meter_min',
    'hco3calculated,venous(poc)_min', 'hematocrit_min', 'hemoglobin_min', 'immaturegrans(abs)_min',
    'immaturegranulocytes_min', 'inr_min', 'lactate,poc_min', 'lipase_min', 'lymphs_min', 'magnesium_min',
    'mch_min', 'mchc_min', 'mcv_min', 'monocytes_min', 'monosabs_min', 'mpv_min', 'neutrophils_min',
    'nrbc_min', 'nrbcabsolute_min', 'o2satcalculated,venous(poc)_min', 'pco2(poc)_min', 'pco2,venous(poc)_min',
    'ph,venous(poc)_min', 'phencyclidine(pcp)screen,urine,noconf._min', 'phosphorus_min', 'platelets_min',
    'po2(poc)_min', 'po2,venous(poc)_min', 'pocbun_min', 'poccreatinine_min', 'pocglucose_min',
    'pochematocrit_min', 'pocionizedcalcium_min', 'pocph_min', 'pocpotassium_min', 'pocsodium_min',
    'poctroponini._min', 'potassium_min', 'proteintotal_min', 'prothrombintime_min', 'ptt_min', 'rbc_min',
    'rbc/hpf_min', 'rdw_min', 'sodium_min', 'troponini(poc)_min', 'troponint_min', 'tsh_min', 'wbc_min',
    'wbc/hpf_min', 'absolutelymphocytecount_max', 'acetonebld_max', 'alanineaminotransferase(alt)_max',
    'albumin_max', 'alkphos_max', 'anc(absneutrophilcount)_max', 'aniongap_max', 'aspartateaminotransferase(ast)_max',
    'b-typenatriureticpeptide,pro(probnp)_max', 'baseexcess(poc)_max', 'baseexcess,venous(poc)_max', 'basos_max',
    'basosabs_max', 'benzodiazepinesscreen,urine,noconf._max', 'bilirubindirect_max', 'bilirubintotal_max',
    'bun_max', 'bun/creatratio_max', 'calcium_max', 'calculatedco2(poc)_max', 'calculatedhco3(poc)i_max',
    'calculatedo2saturation(poc)_max', 'chloride_max', 'cktotal_max', 'co2_max', 'co2calculated,venous(poc)_max',
    'co2,poc_max', 'creatinine_max', 'd-dimer_max', 'egfr_max', 'egfr(nonafricanamerican)_max', 'egfr(aframer)_max',
    'eos_max', 'eosinoabs_max', 'epithelialcells_max', 'globulin_max', 'glucose_max', 'glucose,meter_max',
    'hco3calculated,venous(poc)_max', 'hematocrit_max', 'hemoglobin_max', 'immaturegrans(abs)_max',
    'immaturegranulocytes_max', 'inr_max', 'lactate,poc_max', 'lipase_max', 'lymphs_max', 'magnesium_max',
    'mch_max', 'mchc_max', 'mcv_max', 'monocytes_max', 'monosabs_max', 'mpv_max', 'neutrophils_max',
    'nrbc_max', 'nrbcabsolute_max', 'o2satcalculated,venous(poc)_max', 'pco2(poc)_max', 'pco2,venous(poc)_max',
    'ph,venous(poc)_max', 'phencyclidine(pcp)screen,urine,noconf._max', 'phosphorus_max', 'platelets_max',
    'po2(poc)_max', 'po2,venous(poc)_max', 'pocbun_max', 'poccreatinine_max', 'pocglucose_max',
    'pochematocrit_max', 'pocionizedcalcium_max', 'pocph_max', 'pocpotassium_max', 'pocsodium_max',
    'poctroponini._max', 'potassium_max', 'proteintotal_max', 'prothrombintime_max', 'ptt_max', 'rbc_max',
    'rbc/hpf_max', 'rdw_max', 'sodium_max', 'troponini(poc)_max', 'troponint_max', 'tsh_max', 'wbc_max',
    'wbc/hpf_max', 'absolutelymphocytecount_median', 'acetonebld_median', 'alanineaminotransferase(alt)_median',
    'albumin_median', 'alkphos_median', 'anc(absneutrophilcount)_median', 'aniongap_median',
    'aspartateaminotransferase(ast)_median', 'b-typenatriureticpeptide,pro(probnp)_median', 'baseexcess(poc)_median',
    'baseexcess,venous(poc)_median', 'basos_median', 'basosabs_median', 'benzodiazepinesscreen,urine,noconf._median',
    'bilirubindirect_median', 'bilirubintotal_median', 'bun_median', 'bun/creatratio_median', 'calcium_median',
    'calculatedco2(poc)_median', 'calculatedhco3(poc)i_median', 'calculatedo2saturation(poc)_median',
    'chloride_median', 'cktotal_median', 'co2_median', 'co2calculated,venous(poc)_median', 'co2,poc_median',
    'creatinine_median', 'd-dimer_median', 'egfr_median', 'egfr(nonafricanamerican)_median', 'egfr(aframer)_median',
    'eos_median', 'eosinoabs_median', 'epithelialcells_median', 'globulin_median', 'glucose_median',
    'glucose,meter_median', 'hco3calculated,venous(poc)_median', 'hematocrit_median', 'hemoglobin_median',
    'immaturegrans(abs)_median', 'immaturegranulocytes_median', 'inr_median', 'lactate,poc_median', 'lipase_median',
    'lymphs_median', 'magnesium_median', 'mch_median', 'mchc_median', 'mcv_median', 'monocytes_median',
    'monosabs_median', 'mpv_median', 'neutrophils_median', 'nrbc_median', 'nrbcabsolute_median',
    'o2satcalculated,venous(poc)_median', 'pco2(poc)_median', 'pco2,venous(poc)_median', 'ph,venous(poc)_median',
    'phencyclidine(pcp)screen,urine,noconf._median', 'phosphorus_median', 'platelets_median', 'po2(poc)_median',
    'po2,venous(poc)_median', 'pocbun_median', 'poccreatinine_median', 'pocglucose_median', 'pochematocrit_median',
    'pocionizedcalcium_median', 'pocph_median', 'pocpotassium_median', 'pocsodium_median', 'poctroponini._median',
    'potassium_median', 'proteintotal_median', 'prothrombintime_median', 'ptt_median', 'rbc_median', 'rbc/hpf_median',
    'rdw_median', 'sodium_median', 'troponini(poc)_median', 'troponint_median', 'tsh_median', 'wbc_median',
    'wbc/hpf_median', 'bloodua_last', 'glucoseua_last', 'ketonesua_last', 'leukocytesua_last', 'nitriteua_last',
    'pregtestur_last', 'proteinua_last', 'bloodculture,routine_last', 'urineculture,routine_last', 'bloodua_npos',
    'glucoseua_npos', 'ketonesua_npos', 'leukocytesua_npos', 'nitriteua_npos', 'pregtestur_npos', 'proteinua_npos',
    'bloodculture,routine_npos', 'urineculture,routine_npos', 'bloodua_count', 'glucoseua_count', 'ketonesua_count',
    'leukocytesua_count', 'nitriteua_count', 'pregtestur_count', 'proteinua_count', 'bloodculture,routine_count',
    'urineculture,routine_count']
imag_EKG_cols = ["patient_id", "disposition", 'cxr_count','echo_count','ekg_count','headct_count','mri_count',
                 'otherct_count','otherimg_count','otherus_count','otherxr_count']

# Association Analysis - Complaints

In [4]:
# Create list of complaints
data['ComplaintList'] = data.apply(lambda row: [col for col in chief_complaint_cols if row[col] == 1], axis=1)
data[['patient_id', 'ComplaintList']]

Unnamed: 0,patient_id,ComplaintList
1,2,[cc_motorvehiclecrash]
2,3,[cc_dizziness]
3,4,[cc_headachere-evaluation]
4,5,[cc_emesis]
5,6,[cc_dizziness]
...,...,...
440255,440256,[cc_diarrhea]
440256,440257,[cc_alteredmentalstatus]
440257,440258,[cc_alteredmentalstatus]
440258,440259,[cc_shortnessofbreath]


In [5]:
# Create TransactionEncoder()
te_cc = TransactionEncoder()

# Fit and transform the TransactionEncoder
te_ary_cc = te_cc.fit_transform(data['ComplaintList'].tolist())

# Convert to df
df_cc = pd.DataFrame(te_ary_cc, columns=te_cc.columns_)
df_cc

Unnamed: 0,cc_abdominalcramping,cc_abdominaldistention,cc_abdominalpain,cc_abdominalpainpregnant,cc_abnormallab,cc_abscess,cc_addictionproblem,cc_agitation,cc_alcoholintoxication,cc_alcoholproblem,...,cc_vaginaldischarge,cc_vaginalpain,cc_weakness,cc_wheezing,cc_withdrawal-alcohol,cc_woundcheck,cc_woundinfection,cc_woundre-evaluation,cc_wristinjury,cc_wristpain
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
410626,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
410627,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
410628,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
410629,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [6]:
# Get frequent itemsets
# Minimum support was set as low as possible to see as many sets as possible
frequent_itemsets_cc = apriori(df_cc, min_support=0.0002, use_colnames=True)
frequent_itemsets_cc

Unnamed: 0,support,itemsets
0,0.001086,(cc_abdominalcramping)
1,0.001171,(cc_abdominaldistention)
2,0.100212,(cc_abdominalpain)
3,0.003763,(cc_abdominalpainpregnant)
4,0.007128,(cc_abnormallab)
...,...,...
326,0.000295,"(cc_weakness, cc_shortnessofbreath)"
327,0.000453,"(cc_diarrhea, cc_emesis, cc_abdominalpain)"
328,0.000205,"(cc_diarrhea, cc_abdominalpain, cc_nausea)"
329,0.000604,"(cc_emesis, cc_abdominalpain, cc_nausea)"


In [7]:
# Get association rules
# Minimum threshold was set similarly to min. support
rules_cc = association_rules(frequent_itemsets_cc, min_threshold=0.00000000000000000001)
rules_cc

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(cc_alcoholintoxication),(cc_abdominalpain),0.022146,0.100212,0.000331,0.014955,0.149233,-0.001888,0.913449,-0.853588
1,(cc_abdominalpain),(cc_alcoholintoxication),0.100212,0.022146,0.000331,0.003305,0.149233,-0.001888,0.981096,-0.863683
2,(cc_abdominalpain),(cc_backpain),0.100212,0.037250,0.001018,0.010158,0.272697,-0.002715,0.972630,-0.747737
3,(cc_backpain),(cc_abdominalpain),0.037250,0.100212,0.001018,0.027327,0.272697,-0.002715,0.925068,-0.734767
4,(cc_abdominalpain),(cc_chestpain),0.100212,0.067482,0.001388,0.013852,0.205268,-0.005374,0.945617,-0.811424
...,...,...,...,...,...,...,...,...,...,...
275,"(cc_emesis, cc_nausea)",(cc_diarrhea),0.001814,0.007905,0.000329,0.181208,22.923489,0.000314,1.211657,0.958115
276,"(cc_diarrhea, cc_nausea)",(cc_emesis),0.000684,0.021187,0.000329,0.480427,22.675660,0.000314,1.883880,0.956554
277,(cc_emesis),"(cc_diarrhea, cc_nausea)",0.021187,0.000684,0.000329,0.015517,22.675660,0.000314,1.015067,0.976591
278,(cc_diarrhea),"(cc_emesis, cc_nausea)",0.007905,0.001814,0.000329,0.041590,22.923489,0.000314,1.041501,0.963997


In [8]:
# Cast antecedents and consequents columns to strings instead of frozen sets
rules_cc['antecedents'] = rules_cc['antecedents'].astype('string').apply(lambda x: x.replace("frozenset({'", '').replace("})", '').replace("'", ''))
rules_cc['consequents'] = rules_cc['consequents'].astype('string').apply(lambda x: x.replace("frozenset({'", '').replace("})", '').replace("'", ''))

rules_cc

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,cc_alcoholintoxication,cc_abdominalpain,0.022146,0.100212,0.000331,0.014955,0.149233,-0.001888,0.913449,-0.853588
1,cc_abdominalpain,cc_alcoholintoxication,0.100212,0.022146,0.000331,0.003305,0.149233,-0.001888,0.981096,-0.863683
2,cc_abdominalpain,cc_backpain,0.100212,0.037250,0.001018,0.010158,0.272697,-0.002715,0.972630,-0.747737
3,cc_backpain,cc_abdominalpain,0.037250,0.100212,0.001018,0.027327,0.272697,-0.002715,0.925068,-0.734767
4,cc_abdominalpain,cc_chestpain,0.100212,0.067482,0.001388,0.013852,0.205268,-0.005374,0.945617,-0.811424
...,...,...,...,...,...,...,...,...,...,...
275,"cc_emesis, cc_nausea",cc_diarrhea,0.001814,0.007905,0.000329,0.181208,22.923489,0.000314,1.211657,0.958115
276,"cc_diarrhea, cc_nausea",cc_emesis,0.000684,0.021187,0.000329,0.480427,22.675660,0.000314,1.883880,0.956554
277,cc_emesis,"cc_diarrhea, cc_nausea",0.021187,0.000684,0.000329,0.015517,22.675660,0.000314,1.015067,0.976591
278,cc_diarrhea,"cc_emesis, cc_nausea",0.007905,0.001814,0.000329,0.041590,22.923489,0.000314,1.041501,0.963997


In [9]:
# # Write to csv
# rules_cc.to_csv("apriori_rules_cc.csv")

In [10]:
# View top 10 rules
rules_cc.sort_values("confidence", ascending=False).head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
276,"cc_diarrhea, cc_nausea",cc_emesis,0.000684,0.021187,0.000329,0.480427,22.67566,0.000314,1.88388,0.956554
270,"cc_abdominalpain, cc_nausea",cc_emesis,0.001615,0.021187,0.000604,0.374057,17.655118,0.00057,1.563742,0.944885
219,cc_homicidal,cc_suicidal,0.000774,0.014865,0.000273,0.352201,23.69344,0.000261,1.520742,0.958537
257,"cc_abdominalpain, cc_diarrhea",cc_emesis,0.001291,0.021187,0.000453,0.350943,16.564165,0.000426,1.508055,0.940843
269,"cc_emesis, cc_nausea",cc_abdominalpain,0.001814,0.100212,0.000604,0.332886,3.321829,0.000422,1.348777,0.700231
263,"cc_diarrhea, cc_nausea",cc_abdominalpain,0.000684,0.100212,0.000205,0.298932,2.983011,0.000136,1.283454,0.665223
256,"cc_emesis, cc_diarrhea",cc_abdominalpain,0.001559,0.100212,0.000453,0.290625,2.900113,0.000297,1.268424,0.656209
274,"cc_emesis, cc_diarrhea",cc_nausea,0.001559,0.009644,0.000329,0.210938,21.8731,0.000314,1.255105,0.955771
141,cc_diarrhea,cc_emesis,0.007905,0.021187,0.001559,0.197166,9.306019,0.001391,1.219197,0.899654
124,cc_nasalcongestion,cc_cough,0.001856,0.021457,0.000351,0.188976,8.807123,0.000311,1.206553,0.888104


# Association Analysis - Past Medical History

In [11]:
# Create list of conditions
data['UnderLyingConditionList'] = data.apply(lambda row: [col for col in med_hist_cols if row[col] == 1], axis=1)
data[['patient_id', 'UnderLyingConditionList']]

Unnamed: 0,patient_id,UnderLyingConditionList
1,2,"[esophgealdx, hdnckcancr, maligneopls, viralin..."
2,3,"[esophgealdx, hdnckcancr, maligneopls, viralin..."
3,4,"[esophgealdx, hdnckcancr, maligneopls, viralin..."
4,5,"[esophgealdx, htn, hyperlipidem, otjointdx]"
5,6,"[anemia, cataract, deliriumdementiaamnesticoth..."
...,...,...
440255,440256,"[anemia, coaghemrdx, copd, nonhodglym, retinaldx]"
440256,440257,"[anemia, coaghemrdx, copd, nonhodglym, retinaldx]"
440257,440258,"[anemia, coaghemrdx, copd, nonhodglym, retinaldx]"
440258,440259,"[anemia, coaghemrdx, copd, nonhodglym, retinaldx]"


In [12]:
# Create TransactionEncoder()
te_pmh = TransactionEncoder()

# Fit and transform the TransactionEncoder
te_ary_pmh = te_pmh.fit_transform(data['ComplaintList'].tolist())

# Conver to df
df_pmh = pd.DataFrame(te_ary_pmh, columns=te_pmh.columns_)
df_pmh

Unnamed: 0,cc_abdominalcramping,cc_abdominaldistention,cc_abdominalpain,cc_abdominalpainpregnant,cc_abnormallab,cc_abscess,cc_addictionproblem,cc_agitation,cc_alcoholintoxication,cc_alcoholproblem,...,cc_vaginaldischarge,cc_vaginalpain,cc_weakness,cc_wheezing,cc_withdrawal-alcohol,cc_woundcheck,cc_woundinfection,cc_woundre-evaluation,cc_wristinjury,cc_wristpain
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
410626,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
410627,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
410628,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
410629,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [13]:
# Get frequent itemsets
frequent_itemsets_pmh = apriori(df_pmh, min_support=0.0002, use_colnames=True)
frequent_itemsets_pmh

Unnamed: 0,support,itemsets
0,0.001086,(cc_abdominalcramping)
1,0.001171,(cc_abdominaldistention)
2,0.100212,(cc_abdominalpain)
3,0.003763,(cc_abdominalpainpregnant)
4,0.007128,(cc_abnormallab)
...,...,...
326,0.000295,"(cc_weakness, cc_shortnessofbreath)"
327,0.000453,"(cc_diarrhea, cc_emesis, cc_abdominalpain)"
328,0.000205,"(cc_diarrhea, cc_abdominalpain, cc_nausea)"
329,0.000604,"(cc_emesis, cc_abdominalpain, cc_nausea)"


In [14]:
# Get association rules
rules_pmh = association_rules(frequent_itemsets_pmh, min_threshold=0.00000000000000000001)
rules_pmh

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(cc_alcoholintoxication),(cc_abdominalpain),0.022146,0.100212,0.000331,0.014955,0.149233,-0.001888,0.913449,-0.853588
1,(cc_abdominalpain),(cc_alcoholintoxication),0.100212,0.022146,0.000331,0.003305,0.149233,-0.001888,0.981096,-0.863683
2,(cc_abdominalpain),(cc_backpain),0.100212,0.037250,0.001018,0.010158,0.272697,-0.002715,0.972630,-0.747737
3,(cc_backpain),(cc_abdominalpain),0.037250,0.100212,0.001018,0.027327,0.272697,-0.002715,0.925068,-0.734767
4,(cc_abdominalpain),(cc_chestpain),0.100212,0.067482,0.001388,0.013852,0.205268,-0.005374,0.945617,-0.811424
...,...,...,...,...,...,...,...,...,...,...
275,"(cc_emesis, cc_nausea)",(cc_diarrhea),0.001814,0.007905,0.000329,0.181208,22.923489,0.000314,1.211657,0.958115
276,"(cc_diarrhea, cc_nausea)",(cc_emesis),0.000684,0.021187,0.000329,0.480427,22.675660,0.000314,1.883880,0.956554
277,(cc_emesis),"(cc_diarrhea, cc_nausea)",0.021187,0.000684,0.000329,0.015517,22.675660,0.000314,1.015067,0.976591
278,(cc_diarrhea),"(cc_emesis, cc_nausea)",0.007905,0.001814,0.000329,0.041590,22.923489,0.000314,1.041501,0.963997


In [15]:
# Cast antecedents and consequents data types
rules_pmh['antecedents'] = rules_pmh['antecedents'].astype('string').apply(lambda x: x.replace("frozenset({'", '').replace("})", '').replace("'", ''))
rules_pmh['consequents'] = rules_pmh['consequents'].astype('string').apply(lambda x: x.replace("frozenset({'", '').replace("})", '').replace("'", ''))

rules_pmh

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,cc_alcoholintoxication,cc_abdominalpain,0.022146,0.100212,0.000331,0.014955,0.149233,-0.001888,0.913449,-0.853588
1,cc_abdominalpain,cc_alcoholintoxication,0.100212,0.022146,0.000331,0.003305,0.149233,-0.001888,0.981096,-0.863683
2,cc_abdominalpain,cc_backpain,0.100212,0.037250,0.001018,0.010158,0.272697,-0.002715,0.972630,-0.747737
3,cc_backpain,cc_abdominalpain,0.037250,0.100212,0.001018,0.027327,0.272697,-0.002715,0.925068,-0.734767
4,cc_abdominalpain,cc_chestpain,0.100212,0.067482,0.001388,0.013852,0.205268,-0.005374,0.945617,-0.811424
...,...,...,...,...,...,...,...,...,...,...
275,"cc_emesis, cc_nausea",cc_diarrhea,0.001814,0.007905,0.000329,0.181208,22.923489,0.000314,1.211657,0.958115
276,"cc_diarrhea, cc_nausea",cc_emesis,0.000684,0.021187,0.000329,0.480427,22.675660,0.000314,1.883880,0.956554
277,cc_emesis,"cc_diarrhea, cc_nausea",0.021187,0.000684,0.000329,0.015517,22.675660,0.000314,1.015067,0.976591
278,cc_diarrhea,"cc_emesis, cc_nausea",0.007905,0.001814,0.000329,0.041590,22.923489,0.000314,1.041501,0.963997


In [16]:
# # Write to csv
# rules_pmh.to_csv("apriori_rules_pmh.csv")

In [17]:
# View top 10 rules
rules_pmh.sort_values("confidence", ascending=False).head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
276,"cc_diarrhea, cc_nausea",cc_emesis,0.000684,0.021187,0.000329,0.480427,22.67566,0.000314,1.88388,0.956554
270,"cc_abdominalpain, cc_nausea",cc_emesis,0.001615,0.021187,0.000604,0.374057,17.655118,0.00057,1.563742,0.944885
219,cc_homicidal,cc_suicidal,0.000774,0.014865,0.000273,0.352201,23.69344,0.000261,1.520742,0.958537
257,"cc_abdominalpain, cc_diarrhea",cc_emesis,0.001291,0.021187,0.000453,0.350943,16.564165,0.000426,1.508055,0.940843
269,"cc_emesis, cc_nausea",cc_abdominalpain,0.001814,0.100212,0.000604,0.332886,3.321829,0.000422,1.348777,0.700231
263,"cc_diarrhea, cc_nausea",cc_abdominalpain,0.000684,0.100212,0.000205,0.298932,2.983011,0.000136,1.283454,0.665223
256,"cc_emesis, cc_diarrhea",cc_abdominalpain,0.001559,0.100212,0.000453,0.290625,2.900113,0.000297,1.268424,0.656209
274,"cc_emesis, cc_diarrhea",cc_nausea,0.001559,0.009644,0.000329,0.210938,21.8731,0.000314,1.255105,0.955771
141,cc_diarrhea,cc_emesis,0.007905,0.021187,0.001559,0.197166,9.306019,0.001391,1.219197,0.899654
124,cc_nasalcongestion,cc_cough,0.001856,0.021457,0.000351,0.188976,8.807123,0.000311,1.206553,0.888104


# Counting Chief Complaints

In [18]:
df = pd.DataFrame(data['cc_abdominalcramping'].value_counts())

# Count complaints and add to final_df
final_df = df
for col in chief_complaint_cols[3:]:
    current_df = pd.DataFrame(data[col].value_counts())
    final_df = pd.concat([final_df, current_df], axis=1)

# Clean up the dataframe
chief_complaint_counts = final_df[:1].T.reset_index().rename(columns={0.0:"No", 1.0:"Yes",'index': 'complaint'})

chief_complaint_counts

Unnamed: 0,complaint,No,Yes
0,cc_abdominalcramping,410185.0,446.0
1,cc_abdominaldistention,410149.0,481.0
2,cc_abdominalpain,369472.0,41150.0
3,cc_abdominalpainpregnant,409083.0,1545.0
4,cc_abnormallab,407701.0,2927.0
...,...,...,...
195,cc_woundcheck,408880.0,1751.0
196,cc_woundinfection,409435.0,1196.0
197,cc_woundre-evaluation,410105.0,526.0
198,cc_wristinjury,410087.0,544.0


In [19]:
# Remove 'cc_' prefix from values
chief_complaint_counts['complaint'] = chief_complaint_counts['complaint'].apply(lambda x: x.replace('cc_', ''))

In [20]:
# # Write to csv
# chief_complaint_counts.to_csv("chief_complaint_counts.csv")

# Counting Past Medical History

In [21]:
df = pd.DataFrame(data['2ndarymalig'].value_counts())

# Count pmh and add to final_df
final_df = df
for col in med_hist_cols[3:]:
    current_df = pd.DataFrame(data[col].value_counts())
    
    final_df = pd.concat([final_df, current_df], axis=1)

# Clean up the dataframe
patient_history_counts = final_df.T.reset_index().rename(columns={0.0:"No", 1.0:"Yes", "index":"underlying condition"})

patient_history_counts

Unnamed: 0,underlying condition,No,Yes
0,2ndarymalig,409672.0,959.0
1,abdomhernia,396883.0,13748.0
2,abdomnlpain,404174.0,6457.0
3,abortcompl,410618.0,13.0
4,acqfootdef,408245.0,2386.0
...,...,...,...
275,uteruscancr,408896.0,1735.0
276,uti,393530.0,17101.0
277,varicosevn,408289.0,2342.0
278,viralinfect,392196.0,18435.0


In [22]:
# # Write to csv
# patient_history_counts.to_csv("patient_history_counts.csv")