# PROJ 406 Capstone Project
## Predicting Hospital Admission For Non-English Speaking Patients


### Import Required Libraries

In [None]:
"""
Author: Katy Mombourquette, Denis Ouellette
Date: Dec 15, 2023

Dependencies:
Pandas
Numpy
Sklearn
Imblearn
Matplotlib
Seaborn
Scipy
Pyro
Torch

Install instructions
1. Install Pandas: !pip install pandas
2. Install Numpy: !pip install numpy
3. Install Sklearn: !pip install sklearn
4. Install Imblearn: !pip install imblearn
5. Install Matplotlib: !pip install matplotlib
6. Install Seaborn: !pip install seaborn
7. Install Scipy: !pip install scipy
8. Install Pyro: !pip3 install pyro-ppl
9. Install Torch: pip3 install torch torchvision torchaudio

"""

In [None]:
# main libraries
import pandas as pd
import numpy as np


# sklearn
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler, OrdinalEncoder
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import RandomOverSampler
from sklearn.metrics import classification_report, confusion_matrix

# visualizations
import matplotlib.pyplot as plt
import seaborn as sns

# other
from scipy.stats import randint, loguniform
from sklearn.preprocessing import FunctionTransformer
from functions import calculate_stats, set_threshold
import pyro
import torch
from pyro.infer.mcmc import HMC, MCMC

# ignore warnings
import warnings
warnings.filterwarnings("ignore")

### Prepare the data for the model

#### read in the cleaned dataset

Please note that the dataset read in below is the already cleaned dataset, which includes the resampled language variable. Refer to the "data_cleaning.ipynb" notebook to see the process.

In [None]:
data = pd.read_pickle("balanced_nonEng_clean.pkl")

#### drop irrelevant or inappropriate columns

In [None]:
data = data.drop(columns=['race', 'Unnamed: 0', 'patient_id'])
data = data.sample(1000)

#### filter to pre-determined top 400 features

In [None]:
top_400_features = ['disposition','breastcancr','burns','anemia','backproblem','cc_dentalpain','cardiaarrst',
                    'attentiondeficitconductdisruptivebeha','asthma','acutphanm','bladdercncr','biliarydx','asppneumon',
                    'lang','meds_cough/coldpreparations','cc_diarrhea','dep_name','ecodesstruckbyagainst','complicproc',
                    'birthasphyx','esi','allergy','bph','acrenlfail','alcoholrelateddisorders','complicdevi','artembolism',
                    'age','gender','brnchlngca','arrivalday','birthtrauma','breastdx','cervixcancr','adjustmentdisorders',
                    'triage_vital_temp','cc_femaleguproblem','triage_vital_dbp','cc_alteredmentalstatus','meds_cardiacdrugs',
                    'cc_wheezing','religion','meds_herbals','poisnnonmed','cc_medicationproblem','previousdispo',
                    'meds_antiinfectives','abdomnlpain','otinflskin','otitismedia','analrectal','cc_medicationrefill',
                    'echo_count','deliriumdementiaamnesticothercognitiv','cc_urinaryfrequency','adltrespfl','triage_vital_hr',
                    'meds_blood','coaghemrdx','ecodesfirearm','cc_lossofconsciousness','arrivalhour_bin','urinstone',
                    'cc_shoulderinjury','cc_confusion','viralinfect','othergidx','diabmelwcm','respdistres',
                    'meds_antihyperglycemics','cc_withdrawal-alcohol','cc_insectbite','othergudx','fetaldistrs','cc_legpain',
                    'cc_alcoholproblem','cc_cough','urinyorgca','cc_thumbinjury','cc_faciallaceration','ethnicity',
                    'htninpreg','whtblooddx','cc_shoulderpain','hyperlipidem','cxr_count',
                    'cc_headache-recurrentorknowndxmigraines','cc_dehydration','arrivalmonth','cc_legswelling','cc_jawpain',
                    'gihemorrhag','pancreascan','2ndarymalig','mooddisorders','giconganom','cc_hallucinations','pancreasdx',
                    'cc_irregularheartbeat','abortcompl','cc_wristpain','cc_maleguproblem','cc_panicattack',
                    'cc_woundre-evaluation','cc_multiplefalls','leukemias','infmalegen','aneurysm','tuberculosis',
                    'cc_generalizedbodyaches','meds_eentpreps','epilepsycnv','cc_footpain','cc_neurologicproblem',
                    'cc_handpain','meds_colonystimulatingfactors','blindness','cc_leginjury','ekg_count','meds_diagnostic',
                    'cc_hypotension','cc_ribinjury','meds_antifungals','suicideandintentionalselfinflictedin',
                    'cc_overdose-intentional','cc_chesttightness','cc_gibleeding','cc_fingerinjury','cc_handinjury',
                    'cc_hyperglycemia','cc_drug/alcoholassessment','meds_elect/caloric/h2o','othliverdx','otconganom',
                    'immunizscrn','meds_autonomicdrugs','cc_post-opproblem','meds_anesthetics','othlowresp',
                    'meds_analgesicandantihistaminecombination','otcomplbir','meds_antihistamines','anxietydisorders',
                    'ulceratcol','cc_chestpain','cc_hematuria','insurance_status','othbactinf','cc_breastpain',
                    'cc_medicalproblem','cc_stdcheck','cc_cyst','cc_suture/stapleremoval','copd','cc_hemoptysis',
                    'meds_antidotes','employstatus','chrkidneydisease','schizophreniaandotherpsychoticdisorde',
                    'otherus_count','cc_hippain','cc_laceration','cc_headinjury','nonhodglym','cc_breathingdifficulty',
                    'cc_cardiacarrest','meds_antiinfectives/miscellaneous','liveborn','cc_skinirritation',
                    'cc_feverimmunocompromised','cc_seizures','cc_lethargy','cc_anklepain','othfracture',
                    'cc_vaginaldischarge','cc_urinaryretention','maritalstatus','otdxbladdr','cc_fall>65',
                    'cc_follow-upcellulitis','otbnignneo','cc_shortnessofbreath','cc_ingestion','cc_armswelling',
                    'cc_conjunctivitis','feminfertil','cc_nausea','cc_kneepain','cc_nasalcongestion',
                    'impulsecontroldisordersnec','otherimg_count','otrespirca','cc_fatigue','meds_contraceptives',
                    'rehab','cc_kneeinjury','triage_vital_sbp','pleurisy','cc_headlaceration','cc_tickremoval',
                    'meds_anti-obesitydrugs','cc_suicidal','unclassified','sprain','fatigue','ecodesdrowningsubmersion',
                    'gastroent','cc_asthma','cc_toepain','maligneopls','ecodesadverseeffectsofmedicaldrugs',
                    'cc_headachere-evaluation','meds_anticoagulants','cc_elevatedbloodsugar-nosymptoms','goutotcrys',
                    'ms','cc_oralswelling','carditis','cc_influenza','cc_breathingproblem','cc_vaginalpain','malgenitca',
                    'cc_alcoholintoxication','pulmhartdx','bronchitis','cc_respiratorydistress',
                    'ecodesotherspecifiedandclassifiable','maintchemr','acutemi','cc_bleeding/bruising','cc_fulltrauma',
                    'arrivalmode','diabmelnoc','cc_elbowpain','cc_fingerpain','glaucoma','acutecvd','cc_wristinjury',
                    'coronathero','meds_antiallergy','meds_antineoplastics','pelvicobstr','otdxstomch','gastritis',
                    'poisonpsych','longpregncy','othinfectns','endometrios','cc_mass','cc_pelvicpain','cc_groinpain',
                    'nephritis','cc_headache','cc_sorethroat','developmentaldisorders','umbilcord','cc_chills','mouthdx',
                    'fxarm','otheredcns','othnervdx','cc_vaginalbleeding','meds_smokingdeterrents','otendodsor',
                    'meds_gastrointestinal','cc_poisoning','earlylabor','cc_headache-newonsetornewsymptoms',
                    'meds_antiplateletdrugs','cc_hypertension','testiscancr','dysrhythmia','cc_edema','cc_fingerswelling',
                    'othheartdx','cc_palpitations','cc_dysuria','cc_footinjury','cardiacanom','cc_detoxevaluation',
                    'multmyeloma','contraceptiv','cc_ankleinjury','cc_rectalpain','cc_depression','inducabortn',
                    'brainnscan','cc_fever-75yearsorolder','lowbirthwt','cc_toeinjury','spontabortn','cc_dizziness',
                    'cc_rash','fuo','cc_woundinfection','meds_antihistamineanddecongestantcombination','cc_trauma',
                    'cc_sicklecellpain','ecodesunspecified','bonectcncr','cc_fall','cc_uri','cc_modifiedtrauma',
                    'comabrndmg','cc_numbness','meds_pre-natalvitamins','meds_diuretics','tonsillitis','othereardx',
                    'cc_elevatedbloodsugar-symptomatic','meds_antivirals','lymphenlarg','meds_biologicals','cc_backpain',
                    'cc_emesis','otherct_count','cc_tachycardia','cc_addictionproblem','prostatecan','skinmelanom',
                    'cc_epistaxis','n_admissions','nauseavomit','otbonedx','miscellaneousmentalhealthdisorders',
                    'menopausldx','teethdx','cc_burn','parkinsons','hepatitis','rheumarth','cc_strokealert','malposition',
                    'ectopicpreg','cc_coldlikesymptoms','cc_medicalscreening','femgenitca',
                    'screeningandhistoryofmentalhealthan','cc_fever','cc_facialswelling','varicosevn',
                    'cc_nearsyncope','ecodessuffocation','menstrualdx','cc_blurredvision','cc_jointswelling',
                    'cc_animalbite','ovariancyst','ecodesplaceofoccurrence','spincorinj','nutritdefic','ecodescutpierce',
                    'septicemia','neoplsmunsp','htncomplicn','cc_extremitylaceration','cc_ribpain','osteoarthros',
                    'nervcongan','otupprresp','cc_giproblem','kidnyrnlca','otpregcomp','peripathero','ovarycancer',
                    'skininfectn','lateeffcvd','exameval','ecodesfall','influenza','otdxkidney','diverticulos','eyeinfectn',
                    'cysticfibro','intracrninj','disordersusuallydiagnosedininfancych','superficinj','syncope','cc_syncope',
                    'fxleg','othereyedx','sle','triage_vital_o2_device','cc_epigastricpain','cc_earproblem','pathologfx',
                    'hemorrpreg','intobstruct','cc_foreignbodyineye','meds_antibiotics','triage_vital_rr','otjointdx',
                    'othercvd','dizziness','cc_eyeproblem','tia','hdnckcancr','esophcancer','htn','appendicitis',
                    'hemmorhoids']


data = data[top_400_features]
data.info()

#### Split the data into train and test data & oversample to balance target variable

Note that oversampling introduces some (educated) synthetic data into the model. However the original number of rows was ~64000 and so only about 2000 synthetic rows were added.

In [None]:
df_train, df_test = train_test_split(data, test_size = 0.2, random_state = 127)
X_train = df_train.drop(columns="disposition")
y_train = df_train['disposition']
y_train = (y_train == 'Admit').astype('int') # because we're predicting Admission
X_test = df_test.drop(columns="disposition")
y_test = df_test['disposition']
y_test = (y_test == 'Admit').astype('int')

In [None]:
# resample the data so disposition is balanced

oversampler = RandomOverSampler(random_state=17)
X_resampled, y_resampled = oversampler.fit_resample(X_train, y_train)
y_resampled.value_counts()

#### split features into types

In [None]:
ordinal_cols = ['arrivalmonth', 'arrivalday', 'arrivalhour_bin']
cat_cols = ['gender', 'ethnicity', 'religion', 'maritalstatus', 'employstatus', 'insurance_status', 
            'dep_name', 'arrivalmode', 'lang', 'previousdispo']
binary_cols = [
     'triage_vital_o2_device', 'cc_addictionproblem', 'cc_alcoholintoxication', 'cc_alcoholproblem', 
     'cc_alteredmentalstatus', 'cc_animalbite', 'cc_ankleinjury', 'cc_anklepain', 'cc_armswelling', 
     'cc_asthma', 'cc_backpain', 'cc_bleeding/bruising', 'cc_blurredvision', 'cc_breastpain', 
     'cc_breathingdifficulty', 'cc_breathingproblem', 'cc_burn', 'cc_cardiacarrest', 'cc_chestpain', 
     'cc_chesttightness', 'cc_chills', 'cc_coldlikesymptoms', 'cc_confusion', 'cc_conjunctivitis', 'cc_cough', 
     'cc_cyst', 'cc_dehydration', 'cc_dentalpain', 'cc_depression', 'cc_detoxevaluation', 'cc_diarrhea', 'cc_dizziness', 
     'cc_drug/alcoholassessment', 'cc_dysuria', 'cc_earproblem', 'cc_edema', 'cc_elbowpain', 
     'cc_elevatedbloodsugar-nosymptoms', 'cc_elevatedbloodsugar-symptomatic', 'cc_emesis', 'cc_epigastricpain', 
     'cc_epistaxis', 'cc_extremitylaceration', 'cc_eyeproblem', 'cc_faciallaceration', 'cc_facialswelling', 'cc_fall', 
     'cc_fall>65', 'cc_fatigue', 'cc_femaleguproblem', 'cc_fever', 'cc_fever-75yearsorolder', 'cc_feverimmunocompromised', 
     'cc_fingerinjury', 'cc_fingerpain', 'cc_fingerswelling', 'cc_follow-upcellulitis', 'cc_footinjury', 'cc_footpain', 
     'cc_foreignbodyineye', 'cc_fulltrauma', 'cc_generalizedbodyaches', 'cc_gibleeding', 'cc_giproblem', 'cc_groinpain', 
     'cc_hallucinations', 'cc_handinjury', 'cc_handpain', 'cc_headache', 'cc_headache-newonsetornewsymptoms', 
     'cc_headache-recurrentorknowndxmigraines', 'cc_headachere-evaluation', 'cc_headinjury', 'cc_headlaceration', 
     'cc_hematuria', 'cc_hemoptysis', 'cc_hippain', 'cc_hyperglycemia', 'cc_hypertension', 'cc_hypotension', 'cc_influenza',
     'cc_ingestion', 'cc_insectbite', 'cc_irregularheartbeat', 'cc_jawpain', 'cc_jointswelling', 'cc_kneeinjury', 
     'cc_kneepain', 'cc_laceration', 'cc_leginjury', 'cc_legpain', 'cc_legswelling', 'cc_lethargy', 
     'cc_lossofconsciousness', 'cc_maleguproblem', 'cc_mass', 'cc_medicalproblem', 'cc_medicalscreening', 
     'cc_medicationproblem', 'cc_medicationrefill', 'cc_modifiedtrauma', 'cc_multiplefalls', 'cc_nasalcongestion', 
     'cc_nausea', 'cc_nearsyncope', 'cc_neurologicproblem', 'cc_numbness', 'cc_oralswelling', 'cc_overdose-intentional', 
     'cc_palpitations', 'cc_panicattack', 'cc_pelvicpain', 'cc_poisoning', 'cc_post-opproblem', 'cc_rash', 'cc_rectalpain',
     'cc_respiratorydistress', 'cc_ribinjury', 'cc_ribpain', 'cc_seizures', 'cc_shortnessofbreath', 'cc_shoulderinjury', 
     'cc_shoulderpain', 'cc_sicklecellpain', 'cc_skinirritation', 'cc_sorethroat', 'cc_stdcheck', 'cc_strokealert', 
     'cc_suicidal', 'cc_suture/stapleremoval', 'cc_syncope', 'cc_tachycardia', 'cc_thumbinjury', 'cc_tickremoval', 
     'cc_toeinjury', 'cc_toepain', 'cc_trauma', 'cc_uri', 'cc_urinaryfrequency', 'cc_urinaryretention', 'cc_vaginalbleeding',
     'cc_vaginaldischarge', 'cc_vaginalpain', 'cc_wheezing', 'cc_withdrawal-alcohol', 'cc_woundinfection', 
     'cc_woundre-evaluation', 'cc_wristinjury', 'cc_wristpain', '2ndarymalig', 'abdomnlpain', 'abortcompl', 'acrenlfail', 
     'acutecvd', 'acutemi', 'acutphanm', 'adjustmentdisorders', 'adltrespfl', 'alcoholrelateddisorders', 'allergy', 
     'analrectal', 'anemia', 'aneurysm', 'anxietydisorders', 'appendicitis', 'artembolism', 'asppneumon', 'asthma', 
     'attentiondeficitconductdisruptivebeha', 'backproblem', 'biliarydx', 'birthasphyx', 'birthtrauma', 'bladdercncr', 
     'blindness', 'bonectcncr', 'bph', 'brainnscan', 'breastcancr', 'breastdx', 'brnchlngca', 'bronchitis', 'burns', 
     'cardiaarrst', 'cardiacanom', 'carditis', 'cervixcancr', 'chrkidneydisease', 'coaghemrdx', 'comabrndmg', 'complicdevi',
     'complicproc', 'contraceptiv', 'copd', 'coronathero', 'cysticfibro', 'deliriumdementiaamnesticothercognitiv', 
     'developmentaldisorders', 'diabmelnoc', 'diabmelwcm', 'disordersusuallydiagnosedininfancych', 'diverticulos', 
     'dizziness', 'dysrhythmia', 'earlylabor', 'ecodesadverseeffectsofmedicaldrugs', 'ecodescutpierce', 
     'ecodesdrowningsubmersion', 'ecodesfall', 'ecodesfirearm', 'ecodesotherspecifiedandclassifiable', 
     'ecodesplaceofoccurrence', 'ecodesstruckbyagainst', 'ecodessuffocation', 'ecodesunspecified', 'ectopicpreg', 
     'endometrios', 'epilepsycnv', 'esophcancer', 'exameval', 'eyeinfectn', 'fatigue', 'femgenitca', 'feminfertil', 
     'fetaldistrs', 'fuo', 'fxarm', 'fxleg', 'gastritis', 'gastroent', 'giconganom', 'gihemorrhag', 'glaucoma', 'goutotcrys',
     'hdnckcancr', 'hemmorhoids', 'hemorrpreg', 'hepatitis', 'htn', 'htncomplicn', 'htninpreg', 'hyperlipidem', 
     'immunizscrn', 'impulsecontroldisordersnec', 'inducabortn', 'influenza', 'infmalegen', 'intobstruct', 'intracrninj',
     'kidnyrnlca', 'lateeffcvd', 'leukemias', 'liveborn', 'longpregncy', 'lowbirthwt', 'lymphenlarg', 'maintchemr',
     'malgenitca', 'maligneopls', 'malposition', 'menopausldx', 'menstrualdx', 'miscellaneousmentalhealthdisorders',
     'mooddisorders', 'mouthdx', 'ms', 'multmyeloma', 'nauseavomit', 'neoplsmunsp', 'nephritis', 'nervcongan', 'nonhodglym',
     'nutritdefic', 'osteoarthros', 'otbnignneo', 'otbonedx', 'otcomplbir', 'otconganom', 'otdxbladdr', 'otdxkidney', 
     'otdxstomch', 'otendodsor', 'othbactinf', 'othercvd', 'othereardx', 'otheredcns', 'othereyedx', 'othergidx', 
     'othergudx', 'othfracture', 'othheartdx', 'othinfectns', 'othliverdx', 'othlowresp', 'othnervdx', 'otinflskin', 
     'otitismedia', 'otjointdx', 'otpregcomp', 'otrespirca', 'otupprresp', 'ovariancyst', 'ovarycancer', 'pancreascan', 
     'pancreasdx', 'parkinsons', 'pathologfx', 'pelvicobstr', 'peripathero', 'pleurisy', 'poisnnonmed', 'poisonpsych', 
     'prostatecan', 'pulmhartdx', 'rehab', 'respdistres', 'rheumarth', 'schizophreniaandotherpsychoticdisorde', 
     'screeningandhistoryofmentalhealthan', 'septicemia', 'skininfectn', 'skinmelanom', 'sle', 'spincorinj', 
     'spontabortn', 'sprain', 'suicideandintentionalselfinflictedin', 'superficinj', 'syncope', 'teethdx', 'testiscancr',
     'tia', 'tonsillitis', 'tuberculosis', 'ulceratcol', 'umbilcord', 'unclassified', 'urinstone', 'urinyorgca', 
     'varicosevn', 'viralinfect', 'whtblooddx']

numeric_cols = ['esi', 'age', 'n_admissions', 'triage_vital_hr', 
                'triage_vital_sbp', 'triage_vital_dbp', 'triage_vital_rr', 
                'triage_vital_temp', 'cxr_count', 'echo_count', 'ekg_count', 
                'otherct_count', 'otherimg_count', 'otherus_count', 
                'meds_analgesicandantihistaminecombination', 'meds_anesthetics', 
                'meds_anti-obesitydrugs', 'meds_antiallergy', 'meds_antibiotics', 
                'meds_anticoagulants', 'meds_antidotes', 'meds_antifungals', 
                'meds_antihistamineanddecongestantcombination', 'meds_antihistamines', 
                'meds_antihyperglycemics', 'meds_antiinfectives', 'meds_antiinfectives/miscellaneous', 
                'meds_antineoplastics', 'meds_antiplateletdrugs', 'meds_antivirals', 'meds_autonomicdrugs', 
                'meds_biologicals', 'meds_blood', 'meds_cardiacdrugs', 'meds_colonystimulatingfactors', 
                'meds_contraceptives', 'meds_cough/coldpreparations', 'meds_diagnostic', 'meds_diuretics', 
                'meds_eentpreps', 'meds_elect/caloric/h2o', 'meds_gastrointestinal', 'meds_herbals', 
                'meds_pre-natalvitamins', 'meds_smokingdeterrents']

#### set the order of ordinal variables

In [None]:
# order ordinal values
month_order = ['January', 'February', 'March', 'April', 'May',
               'June', 'July', 'August', 'September', 'October', 'November', 'December']
assert set(month_order) == set(df_train['arrivalmonth'].unique())

day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
assert set(day_order) == set(df_train['arrivalday'].unique())

hour_order = ['23-02', '03-06', '07-10', '11-14', '15-18', '19-22']
assert set(hour_order) == set(df_train['arrivalhour_bin'].unique())

#### create column transformers

In [None]:
cat_transformer = make_pipeline(OneHotEncoder(dtype=int, handle_unknown='ignore'))
binary_transformer = make_pipeline(OneHotEncoder(drop='if_binary', dtype=int, handle_unknown='ignore'))
numeric_transformer = make_pipeline(StandardScaler())
ordinal_transformer1 = make_pipeline(OrdinalEncoder(categories=[month_order], dtype=int))
ordinal_transformer2 = make_pipeline(OrdinalEncoder(categories=[day_order], dtype=int))
ordinal_transformer3 = make_pipeline(OrdinalEncoder(categories=[hour_order], dtype=int))

preprocessor = make_column_transformer(
    (cat_transformer, cat_cols),
    (binary_transformer, binary_cols),
    (numeric_transformer, numeric_cols),
    (ordinal_transformer1, ['arrivalmonth']),
    (ordinal_transformer2, ['arrivalday']),
    (ordinal_transformer3, ['arrivalhour_bin'])
)

### Train the model

#### cross validate to find general accuracy score

The RFC's parameters were determined in a prior hyperparameter tuning process

In [None]:
# fit & score the pipeline
rfc_pipe = make_pipeline(preprocessor, RandomForestClassifier(class_weight='balanced',
                                                              n_estimators=200,
                                                              max_depth=None))
rfc_scores = pd.DataFrame(cross_validate(rfc_pipe, X_resampled, y_resampled, cv=5, return_train_score=True))
rfc_scores

#### set a custom threshold and refit.

In [None]:
threshold_transformer = FunctionTransformer(set_threshold, kw_args={'threshold': 0.25})
rfc_pipe = make_pipeline(preprocessor, RandomForestClassifier(class_weight='balanced', n_estimators=200, max_depth=None))

rfc_pipe.fit(X_train, y_train)

#### predict on the test set

In [None]:
y_pred_proba = rfc_pipe.predict_proba(X_test)

y_pred_threshold = threshold_transformer.transform(y_pred_proba)

## Scoring

#### Confusion Matrix

In [None]:
conf_matrix = confusion_matrix(y_test, y_pred_threshold)

# Plot Confusion Matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False,
            xticklabels=['Discharged', 'Admit'],
            yticklabels=['Discharged', 'Admit'],
            linewidths=1,
            linecolor='white')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()
plt.savefig('confusion_matrix.png')

#### Classification Report

In [None]:
# Print Classification Report
class_report = classification_report(y_test, y_pred_threshold)
print("\nClassification Report:")
print(class_report)

#### False Negative / False Positive %

Note that the numbers input below may differ slightly depending on when the confusion matrix was last run

In [None]:
calculate_stats(6491, 1897, 526, 3049)

# Results

In [None]:
predictions = pd.DataFrame(y_pred_proba)
predictions.columns = ['Discharge Probability', 'Admit Probability']
predictions['Prediction'] = y_pred_threshold
predictions['Actual'] = y_test
predictions['Correct'] = (predictions['Prediction'] == predictions['Actual'])
predictions['Correct'].replace(True, 'Correct', inplace=True)
predictions['Correct'].replace(False, 'Incorrect', inplace=True)
predictions

In [None]:
# # Write predictions to csv
# predictions.to_csv("predictions.csv")

#### Feature Importances

In [None]:
# Get Feature Importances
feature_importances = rfc_pipe.named_steps['randomforestclassifier'].feature_importances_

# Convert to dataframe
feature_importance_dict = dict(zip(X_resampled.columns.tolist(), feature_importances))
sorted_feature_importance = sorted(feature_importance_dict.items(), key=lambda x: x[1], reverse=True)
feature_importance_df = pd.DataFrame(sorted_feature_importance, columns=['Feature', 'Importance'])

# Display top n features
n = int(input("Please input the number of features to view: "))
feature_importance_df[:n]

In [None]:
# # Write to csv

# feature_importance_df.to_csv("feature_importances.csv")