# Build ML Models

## Initialise logger

In [4]:
import logging
import sys

log = logging.getLogger("EHR-ML")
log.setLevel(logging.INFO)
format = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")

ch = logging.StreamHandler(sys.stdout)
ch.setFormatter(format)
log.addHandler(ch)
import warnings
warnings.simplefilter(action='ignore', category=Warning)

In [5]:
import os
import sys

sys.path.append(os.environ['EICU_EHR_PIPELINE_BASE'] + "/EHR-ML")


from ehrml.utils import DataUtils

X, XVitalsAvg, XVitalsMin, XVitalsMax, XVitalsFirst, XVitalsLast, XLabsAvg, XLabsMin, XLabsMax, XLabsFirst, XLabsLast, y = DataUtils.readEicuData(dirPath=os.environ['EICU_EHR_PIPELINE_BASE'] + '/data/final/data_matrix.csv')

In [6]:
XVitalsAvg.shape

(11097, 6)

In [3]:
from ehrml.utils import MlUtils

lrScores = MlUtils.buildLRModel(XLabsAvg, y)
lrScores

2023-11-15 11:07:49,689 - EHR-ML - INFO - Performing Hyperparameter optimisation
2023-11-15 11:08:01,069 - EHR-ML - INFO - Building the model
2023-11-15 11:08:01,071 - EHR-ML - INFO - Performing cross-validation


{'fit_time': array([0.08584809, 0.07926106, 0.08050442, 0.083534  , 0.09469962]),
 'score_time': array([0.01054335, 0.01011515, 0.01016164, 0.00991106, 0.01022744]),
 'test_accuracy': array([0.88423423, 0.88378378, 0.88328076, 0.8819288 , 0.8828301 ]),
 'test_balanced_accuracy': array([0.51634379, 0.53419042, 0.5108956 , 0.51671079, 0.51393101]),
 'test_average_precision': array([0.24540675, 0.26052408, 0.30309769, 0.28186759, 0.28013946]),
 'test_f1': array([0.06545455, 0.13422819, 0.04428044, 0.07092199, 0.05797101]),
 'test_roc_auc': array([0.66013006, 0.70151411, 0.70950842, 0.71569782, 0.69648036]),
 'test_mccf1_score': array([0.27318147, 0.32223549, 0.25565682, 0.2726764 , 0.26450785])}

In [3]:
from ehrml.utils import MlUtils

lrScores = MlUtils.buildLRModel(XVitalsAvg, y)
lrScores

2023-11-15 09:33:11,416 - EHR-ML - INFO - Performing Hyperparameter optimisation
2023-11-15 09:33:16,354 - EHR-ML - INFO - Building the model
2023-11-15 09:33:16,355 - EHR-ML - INFO - Performing cross-validation


{'fit_time': array([0.13916707, 0.11742806, 0.1088655 , 0.12665391, 0.12547636]),
 'score_time': array([0.02637315, 0.02582145, 0.02622485, 0.02617121, 0.0252161 ]),
 'test_accuracy': array([0.88153153, 0.88153153, 0.8828301 , 0.8819288 , 0.8819288 ]),
 'test_balanced_accuracy': array([0.51151953, 0.50493694, 0.50899446, 0.50519217, 0.50354666]),
 'test_average_precision': array([0.27119531, 0.27119152, 0.26529958, 0.25180596, 0.27646488]),
 'test_f1': array([0.05054152, 0.02230483, 0.03703704, 0.02238806, 0.01503759]),
 'test_roc_auc': array([0.68180909, 0.6920735 , 0.67646396, 0.68151034, 0.6532265 ]),
 'test_mccf1_score': array([nan, nan, nan, nan, nan])}

In [5]:
from ehrml.utils import MlUtils


lrScores = MlUtils.buildLRModel(X, y)
lrScores

2023-11-15 11:16:12,093 - EHR-ML - INFO - Performing Hyperparameter optimisation
2023-11-15 11:18:36,173 - EHR-ML - INFO - Building the model
2023-11-15 11:18:36,175 - EHR-ML - INFO - Performing cross-validation


{'fit_time': array([3.0500493 , 3.14974451, 2.6080153 , 3.10153484, 3.75684047]),
 'score_time': array([0.01347136, 0.01285028, 0.01343441, 0.01270795, 0.01310468]),
 'test_accuracy': array([0.88693694, 0.88783784, 0.89184317, 0.88733664, 0.89274448]),
 'test_balanced_accuracy': array([0.59686783, 0.60560705, 0.58321864, 0.59053551, 0.57879334]),
 'test_average_precision': array([0.40106576, 0.39260343, 0.42619618, 0.40643092, 0.44219159]),
 'test_f1': array([0.31232877, 0.33243968, 0.28143713, 0.29775281, 0.26993865]),
 'test_roc_auc': array([0.77682726, 0.80451378, 0.81244411, 0.80200728, 0.79120693]),
 'test_mccf1_score': array([0.45422979, 0.46938483, 0.43570718, 0.44392354, 0.4285272 ])}

In [4]:
from ehrml.utils import MlUtils


xgbEnsembleScores = MlUtils.buildEnsembleXGBoostModel(X, XVitalsAvg, XVitalsMin, XVitalsMax, XVitalsFirst, XVitalsLast, XLabsAvg, XLabsMin, XLabsMax, XLabsFirst, XLabsLast, y)
xgbEnsembleScores

2023-11-15 11:37:27,738 - EHR-ML - INFO - Split data to test and train sets
2023-11-15 11:37:28,244 - EHR-ML - INFO - Performing Hyperparameter optimisation for XGBoost smaller models
2023-11-15 11:37:28,245 - EHR-ML - INFO - Hyperparameter optimisation for: {'max_depth': range(1, 10), 'scale_pos_weight': [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4]}
2023-11-15 11:38:01,140 - EHR-ML - INFO - Hyperparameter optimisation for: {'n_estimators': range(50, 250, 10)}
2023-11-15 11:38:09,612 - EHR-ML - INFO - Hyperparameter optimisation for: {'min_child_weight': range(1, 10)}
2023-11-15 11:38:14,722 - EHR-ML - INFO - Hyperparameter optimisation for: {'gamma': [0.0, 0.1, 0.2, 0.3, 0.4]}
2023-11-15 11:38:16,747 - EHR-ML - INFO - Hyperparameter optimisation for: {'subsample': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], 'colsample_bytree': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}
2023-11-15 11:39:00,473 - EHR-ML - INFO - Hyperparameter optimisation for: {'reg_alpha': [0, 1e-05, 0.001, 0.1, 10]}

{'fit_time': array([0.45201397, 0.45436692, 0.45428658, 0.44965506, 0.44824982]),
 'score_time': array([0.02246952, 0.02125525, 0.02094316, 0.02063608, 0.02125502]),
 'test_accuracy': array([0.89459459, 0.88918919, 0.88108108, 0.88468468, 0.88548242]),
 'test_balanced_accuracy': array([0.61252091, 0.6061768 , 0.56546433, 0.56426378, 0.58773766]),
 'test_average_precision': array([0.41081057, 0.37638874, 0.3662031 , 0.3752453 , 0.34244457]),
 'test_f1': array([0.35359116, 0.33513514, 0.23255814, 0.22891566, 0.29050279]),
 'test_roc_auc': array([0.73860538, 0.78283603, 0.78801918, 0.77135777, 0.7434788 ]),
 'test_mccf1_score': array([0.4891316 , 0.47234271, 0.39302632, 0.39304367, 0.4377517 ])}

In [1]:
xgbEnsembleScores = {'fit_time': list([0.45201397, 0.45436692, 0.45428658, 0.44965506, 0.44824982]),
 'score_time': list([0.02246952, 0.02125525, 0.02094316, 0.02063608, 0.02125502]),
 'test_accuracy': list([0.89459459, 0.88918919, 0.88108108, 0.88468468, 0.88548242]),
 'test_balanced_accuracy': list([0.61252091, 0.6061768 , 0.56546433, 0.56426378, 0.58773766]),
 'test_average_precision': list([0.41081057, 0.37638874, 0.3662031 , 0.3752453 , 0.34244457]),
 'test_f1': list([0.35359116, 0.33513514, 0.23255814, 0.22891566, 0.29050279]),
 'test_roc_auc': list([0.73860538, 0.78283603, 0.78801918, 0.77135777, 0.7434788 ]),
 'test_mccf1_score': list([0.4891316 , 0.47234271, 0.39302632, 0.39304367, 0.4377517 ])}

In [2]:
xgbEnsembleScores

{'fit_time': [0.45201397, 0.45436692, 0.45428658, 0.44965506, 0.44824982],
 'score_time': [0.02246952, 0.02125525, 0.02094316, 0.02063608, 0.02125502],
 'test_accuracy': [0.89459459, 0.88918919, 0.88108108, 0.88468468, 0.88548242],
 'test_balanced_accuracy': [0.61252091,
  0.6061768,
  0.56546433,
  0.56426378,
  0.58773766],
 'test_average_precision': [0.41081057,
  0.37638874,
  0.3662031,
  0.3752453,
  0.34244457],
 'test_f1': [0.35359116, 0.33513514, 0.23255814, 0.22891566, 0.29050279],
 'test_roc_auc': [0.73860538, 0.78283603, 0.78801918, 0.77135777, 0.7434788],
 'test_mccf1_score': [0.4891316,
  0.47234271,
  0.39302632,
  0.39304367,
  0.4377517]}

In [7]:
from ehrml.utils import DataUtils


DataUtils.saveCvScores(
    scores_dict=xgbEnsembleScores,
    dirPath=os.environ['EICU_EHR_PIPELINE_BASE'] + '/data/experiments/01_time_window_analysis',
    fileName='wb_' + str(0) + '_wa_' + str(3) + '.json'
    )