### Multi-Output Model Using Sklearn Wrapper

In [None]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.multioutput import MultiOutputClassifier
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
from skmultilearn.model_selection.iterative_stratification import iterative_train_test_split

In [None]:
csv_files = glob('/content/drive/MyDrive/CSE6242 Clean Partitions/*.csv')
csv_files[:5]

['/content/drive/MyDrive/CSE6242 Clean Partitions/df_partition_0.csv',
 '/content/drive/MyDrive/CSE6242 Clean Partitions/df_partition_1.csv',
 '/content/drive/MyDrive/CSE6242 Clean Partitions/df_partition_2.csv',
 '/content/drive/MyDrive/CSE6242 Clean Partitions/df_partition_3.csv',
 '/content/drive/MyDrive/CSE6242 Clean Partitions/df_partition_4.csv']

In [None]:
df = pd.concat([pd.read_csv(file).dropna() for file in csv_files], axis=0)

In [None]:
targets = ['hosp_yn', 'icu_yn', 'death_yn']
for target in targets:
  print(target)
  print(df[target].value_counts(normalize=True))

hosp_yn
0.0    0.854746
1.0    0.145254
Name: hosp_yn, dtype: float64
icu_yn
0.0    0.951343
1.0    0.048657
Name: icu_yn, dtype: float64
death_yn
0.0    0.954915
1.0    0.045085
Name: death_yn, dtype: float64


In [None]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(targets, axis=1), df[targets],
                                                              test_size=0.3)


In [None]:
mogb = MultiOutputClassifier(GradientBoostingClassifier(), n_jobs=-1)
mogb.fit(X_train, y_train)

MultiOutputClassifier(estimator=GradientBoostingClassifier(), n_jobs=-1)

In [None]:
train_preds = mogb.predict(X_train)
test_preds = mogb.predict(X_test)

for i, target in enumerate(targets):
  print(f'{target.upper()} classifications')
  print(f'{target.upper()} Train Accuracy: {accuracy_score(y_train[target], train_preds[:, i])}, F1 score: {f1_score(y_train[target], train_preds[:, i])}')
  print(f'{target.upper()} Test Accuracy: {accuracy_score(y_test[target], test_preds[:, i])}, F1 score: {f1_score(y_test[target], test_preds[:, i])} \n')

HOSP_YN classifications
HOSP_YN Train Accuracy: 0.9317938315909918, F1 score: 0.6977335006530591
HOSP_YN Test Accuracy: 0.9321884430444949, F1 score: 0.6999367534218786 

ICU_YN classifications
ICU_YN Train Accuracy: 0.9669388533485896, F1 score: 0.5305499483038748
ICU_YN Test Accuracy: 0.9667751687535564, F1 score: 0.5273173726212401 

DEATH_YN classifications
DEATH_YN Train Accuracy: 0.9804222071970551, F1 score: 0.7317953671010339
DEATH_YN Test Accuracy: 0.9807339127902116, F1 score: 0.733154041535933 

