# Build ML Models

## Initialise logger

In [1]:

import logging
import sys

log = logging.getLogger("EHR-ML")
log.setLevel(logging.INFO)
format = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")

ch = logging.StreamHandler(sys.stdout)
ch.setFormatter(format)
log.addHandler(ch)
import warnings
warnings.simplefilter(action='ignore', category=Warning)

In [2]:
import os
import sys

sys.path.append(os.environ['EICU_EHR_PIPELINE_BASE'] + "/EHR-ML")


from ehrml.utils import DataUtils

X, XVitalsAvg, XVitalsMin, XVitalsMax, XVitalsFirst, XVitalsLast, XLabsAvg, XLabsMin, XLabsMax, XLabsFirst, XLabsLast, y = DataUtils.readEicuData(dirPath=os.environ['EICU_EHR_PIPELINE_BASE'] + '/data/final/data_matrix.csv')

In [5]:
from ehrml.utils import MlUtils

lrScores = MlUtils.buildLRModel(XLabsAvg, y)
lrScores

2023-11-15 00:06:06,669 - EHR-ML - INFO - Performing Hyperparameter optimisation
2023-11-15 00:06:18,002 - EHR-ML - INFO - Building the model
2023-11-15 00:06:18,004 - EHR-ML - INFO - Performing cross-validation


{'fit_time': array([0.09037375, 0.08481932, 0.08304834, 0.09835219, 0.10028124]),
 'score_time': array([0.02797961, 0.02732158, 0.02829027, 0.02814627, 0.02827334]),
 'test_accuracy': array([0.88423423, 0.88378378, 0.88328076, 0.8819288 , 0.8828301 ]),
 'test_balanced_accuracy': array([0.51634379, 0.53419042, 0.5108956 , 0.51671079, 0.51393101]),
 'test_average_precision': array([0.24540675, 0.26052408, 0.30309769, 0.28186759, 0.28013946]),
 'test_f1': array([0.06545455, 0.13422819, 0.04428044, 0.07092199, 0.05797101]),
 'test_roc_auc': array([0.66013006, 0.70151411, 0.70950842, 0.71569782, 0.69648036]),
 'test_mccf1_score': array([nan, nan, nan, nan, nan])}

In [5]:
from ehrml.utils import MlUtils

lrScores = MlUtils.buildLRModel(XVitalsAvg, y)
lrScores

{'fit_time': array([0.14808893, 0.12459564, 0.11545753, 0.13739347, 0.13573694]),
 'score_time': array([0.0274148 , 0.02594209, 0.02639365, 0.02707648, 0.02686477]),
 'test_accuracy': array([0.88153153, 0.88153153, 0.8828301 , 0.8819288 , 0.8819288 ]),
 'test_balanced_accuracy': array([0.51151953, 0.50493694, 0.50899446, 0.50519217, 0.50354666]),
 'test_average_precision': array([0.27119531, 0.27119152, 0.26529958, 0.25180596, 0.27646488]),
 'test_f1': array([0.05054152, 0.02230483, 0.03703704, 0.02238806, 0.01503759]),
 'test_roc_auc': array([0.68180909, 0.6920735 , 0.67646396, 0.68151034, 0.6532265 ]),
 'test_mccf1_score': array([nan, nan, nan, nan, nan])}

In [6]:
from ehrml.utils import MlUtils


lrScores = MlUtils.buildLRModel(X, y)
lrScores

{'fit_time': array([3.66100478, 3.14904356, 2.79392457, 2.73756933, 3.02086067]),
 'score_time': array([0.03015971, 0.03062677, 0.0322907 , 0.03085637, 0.02987194]),
 'test_accuracy': array([0.88693694, 0.88783784, 0.89184317, 0.88733664, 0.89274448]),
 'test_balanced_accuracy': array([0.59686783, 0.60560705, 0.58321864, 0.59053551, 0.57879334]),
 'test_average_precision': array([0.40106576, 0.39260343, 0.42619618, 0.40643092, 0.44219159]),
 'test_f1': array([0.31232877, 0.33243968, 0.28143713, 0.29775281, 0.26993865]),
 'test_roc_auc': array([0.77682726, 0.80451378, 0.81244411, 0.80200728, 0.79120693]),
 'test_mccf1_score': array([nan, nan, nan, nan, nan])}

In [3]:
from ehrml.utils import MlUtils


xgbEnsembleScores = MlUtils.buildEnsembleXGBoostModel(XVitalsAvg, XVitalsMin, XVitalsMax, XVitalsFirst, XVitalsLast, XLabsAvg, XLabsMin, XLabsMax, XLabsFirst, XLabsLast, y)
xgbEnsembleScores

2023-11-15 00:13:08,946 - EHR-ML - INFO - Split data to test and train sets
2023-11-15 00:13:09,510 - EHR-ML - INFO - Performing Hyperparameter optimisation for XGBoost
2023-11-15 00:13:09,512 - EHR-ML - INFO - Hyperparameter optimisation for: {'max_depth': range(1, 10), 'scale_pos_weight': [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4]}
2023-11-15 00:13:41,940 - EHR-ML - INFO - Hyperparameter optimisation for: {'n_estimators': range(50, 250, 10)}
2023-11-15 00:13:51,576 - EHR-ML - INFO - Hyperparameter optimisation for: {'min_child_weight': range(1, 10)}
2023-11-15 00:13:57,198 - EHR-ML - INFO - Hyperparameter optimisation for: {'gamma': [0.0, 0.1, 0.2, 0.3, 0.4]}
2023-11-15 00:13:59,505 - EHR-ML - INFO - Hyperparameter optimisation for: {'subsample': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], 'colsample_bytree': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}
2023-11-15 00:14:47,652 - EHR-ML - INFO - Hyperparameter optimisation for: {'reg_alpha': [0, 1e-05, 0.001, 0.1, 10]}
2023-11-15 00:

{'fit_time': array([0.34140635, 0.28912258, 0.30054426, 0.29913259, 0.29670596]),
 'score_time': array([0.03859925, 0.03659058, 0.03638148, 0.03576899, 0.03736639]),
 'test_accuracy': array([0.88738739, 0.88288288, 0.88108108, 0.88918919, 0.88187556]),
 'test_balanced_accuracy': array([0.59860104, 0.58621491, 0.56871195, 0.59605128, 0.58896669]),
 'test_average_precision': array([0.36049144, 0.33352291, 0.29843757, 0.40719216, 0.32933688]),
 'test_f1': array([0.31693989, 0.28571429, 0.24137931, 0.31284916, 0.29189189]),
 'test_roc_auc': array([0.75198302, 0.76252556, 0.73658814, 0.79605359, 0.74167209])}