# Build ML Models

## Initialise logger

In [1]:

import logging
import sys

log = logging.getLogger("EHR-ML")
log.setLevel(logging.INFO)
format = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")

ch = logging.StreamHandler(sys.stdout)
ch.setFormatter(format)
log.addHandler(ch)
import warnings
warnings.simplefilter(action='ignore', category=Warning)

In [2]:
import os
import sys

sys.path.append(os.environ['EICU_EHR_PIPELINE_BASE'] + "/EHR-ML")


from ehrml.utils import DataUtils

X, XVitalsAvg, XVitalsMin, XVitalsMax, XVitalsFirst, XVitalsLast, XLabsAvg, XLabsMin, XLabsMax, XLabsFirst, XLabsLast, y = DataUtils.readEicuData(dirPath=os.environ['EICU_EHR_PIPELINE_BASE'] + '/data/final/data_matrix.csv')

In [3]:
from ehrml.utils import MlUtils


lrScores = MlUtils.buildLRModel(X, y)
lrScores

2023-11-14 16:11:52,144 - EHR-ML - INFO - Performing Hyperparameter optimisation
2023-11-14 16:13:39,994 - EHR-ML - INFO - Building the model
2023-11-14 16:13:39,996 - EHR-ML - INFO - Performing cross-validation


{'fit_time': array([1.44842362, 1.58930182, 1.52776456, 1.43548298, 1.60096693]),
 'score_time': array([0.02950621, 0.02920747, 0.03007889, 0.02933598, 0.02961135]),
 'test_accuracy': array([0.85147679, 0.86751055, 0.8649789 , 0.86582278, 0.85991561]),
 'test_balanced_accuracy': array([0.50314502, 0.52027429, 0.51619213, 0.52744289, 0.50583795]),
 'test_average_precision': array([0.19583174, 0.28268851, 0.28298281, 0.36013281, 0.25866975]),
 'test_f1': array([0.04347826, 0.08187135, 0.06976744, 0.11173184, 0.03488372]),
 'test_roc_auc': array([0.64038844, 0.70014679, 0.69622841, 0.76150393, 0.68208368]),
 'test_mccf1_score': array([nan, nan, nan, nan, nan])}

In [4]:
from ehrml.utils import MlUtils


xgbEnsembleScores = MlUtils.buildEnsembleXGBoostModel(XVitalsAvg, XVitalsMin, XVitalsMax, XVitalsFirst, XVitalsLast, XLabsAvg, XLabsMin, XLabsMax, XLabsFirst, XLabsLast, y)
xgbEnsembleScores

2023-11-14 16:28:05,078 - EHR-ML - INFO - Split data to test and train sets
2023-11-14 16:28:05,368 - EHR-ML - INFO - Performing Hyperparameter optimisation for XGBoost
2023-11-14 16:28:05,369 - EHR-ML - INFO - Hyperparameter optimisation for: {'max_depth': range(1, 10), 'scale_pos_weight': [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4]}
2023-11-14 16:28:39,196 - EHR-ML - INFO - Hyperparameter optimisation for: {'n_estimators': range(50, 250, 10)}
2023-11-14 16:28:55,488 - EHR-ML - INFO - Hyperparameter optimisation for: {'min_child_weight': range(1, 10)}
2023-11-14 16:28:59,596 - EHR-ML - INFO - Hyperparameter optimisation for: {'gamma': [0.0, 0.1, 0.2, 0.3, 0.4]}
2023-11-14 16:29:01,090 - EHR-ML - INFO - Hyperparameter optimisation for: {'subsample': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], 'colsample_bytree': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}
2023-11-14 16:29:37,428 - EHR-ML - INFO - Hyperparameter optimisation for: {'reg_alpha': [0, 1e-05, 0.001, 0.1, 10]}
2023-11-14 16:

{'fit_time': array([0.04235792, 0.04047656, 0.04251337, 0.04025888, 0.04115963]),
 'score_time': array([0.03345919, 0.03206754, 0.03253698, 0.03202438, 0.03243256]),
 'test_accuracy': array([0.86846543, 0.87015177, 0.87015177, 0.86993243, 0.86993243]),
 'test_balanced_accuracy': array([0.5, 0.5, 0.5, 0.5, 0.5]),
 'test_average_precision': array([0.29098879, 0.32337649, 0.20941848, 0.25931348, 0.20447345]),
 'test_f1': array([0., 0., 0., 0., 0.]),
 'test_roc_auc': array([0.6976475 , 0.6794775 , 0.63054968, 0.68442819, 0.63008448]),
 'test_mccf1_score': array([nan, nan, nan, nan, nan])}