# Perform prediction

## Import libraries

In [1]:
import os

import logging
import sys

log = logging.getLogger("Pipeline")
log.setLevel(logging.INFO)
format = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")

ch = logging.StreamHandler(sys.stdout)
ch.setFormatter(format)
log.addHandler(ch)

import warnings
warnings.simplefilter(action='ignore', category=Warning)

module_path = os.path.abspath(os.path.join('../'))
if module_path not in sys.path:
    sys.path.append(module_path)

import predict_mortality as pm

## Define data directory

A top level directory to store all the data for this experiment

In [2]:
dataDirName = '/home/yram0006/phd/chapter_2/workspace/mortality_data/standardisation_experiment/'

## Perform prediction on raw dataset

### Copy the raw file to working directory

In [3]:
import os


os.system(
    '''cp ''' + dataDirName + '''data_matrix/data_matrix_raw.csv ''' + dataDirName + '''data_matrix.csv;'''
)

0

### Perform prediction

In [4]:
pm.runPredictionsForAllTargets(
    label='raw',
    dirPath = dataDirName,
    vitalsBefore = 0,
    vitalsAfter = 72,
    labsBefore = 0,
    labsAfter = 72,
    # targetList = [7, 14, 21, 30, 60, 90, 120, (7, 14), (14, 21), (21, 30), (30, 60), (60, 90), (90, 120)],
    targetList = [7],
    )

2023-10-10 10:13:42,227 - Pipeline - INFO - dirName: raw_icd_vb_0_va_72_lb_0_la_72
2023-10-10 10:13:42,227 - Pipeline - INFO - Running Predictions for vb_0_va_72_lb_0_la_72, targetStart : 0, targetEnd : 7
2023-10-10 10:13:42,228 - Pipeline - INFO - Reading data
2023-10-10 10:13:43,025 - Pipeline - INFO - Formatting data
2023-10-10 10:13:43,032 - Pipeline - INFO - Performing SFS
2023-10-10 10:17:58,126 - Pipeline - INFO - Building XGBoost model with all the features
2023-10-10 10:17:58,127 - Pipeline - INFO - Performing Hyperparameter optimisation
2023-10-10 10:17:58,152 - Pipeline - INFO - Building the model
2023-10-10 10:17:58,153 - Pipeline - INFO - Hyperparameter optimisation for: {'max_depth': range(1, 10), 'scale_pos_weight': [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4]}
2023-10-10 10:19:26,804 - Pipeline - INFO - Hyperparameter optimisation for: {'n_estimators': range(50, 250, 10)}
2023-10-10 10:19:58,224 - Pipeline - INFO - Hyperparameter optimisation for: {'min_child_weight': range(1

### Remove the data matrix file from working directory

In [5]:
import os


os.system(
    '''rm ''' + dataDirName + '''data_matrix.csv'''
)

0

## Perform prediction on standardised dataset

### Copy the standardised file to working directory

In [6]:
import os


os.system(
    '''cp ''' + dataDirName + '''data_matrix/data_matrix_standardised.csv ''' + dataDirName + '''data_matrix.csv;'''
)

0

### Perform prediction

In [7]:
pm.runPredictionsForAllTargets(
    label='standardised',
    dirPath = dataDirName,
    vitalsBefore = 0,
    vitalsAfter = 72,
    labsBefore = 0,
    labsAfter = 72,
    # targetList = [7, 14, 21, 30, 60, 90, 120, (7, 14), (14, 21), (21, 30), (30, 60), (60, 90), (90, 120)],
    targetList = [7],
    )

2023-10-10 10:29:11,652 - Pipeline - INFO - dirName: standardised_icd_vb_0_va_72_lb_0_la_72
2023-10-10 10:29:11,653 - Pipeline - INFO - Running Predictions for vb_0_va_72_lb_0_la_72, targetStart : 0, targetEnd : 7
2023-10-10 10:29:11,653 - Pipeline - INFO - Reading data


2023-10-10 10:29:12,256 - Pipeline - INFO - Formatting data
2023-10-10 10:29:12,263 - Pipeline - INFO - Performing SFS
2023-10-10 10:33:27,785 - Pipeline - INFO - Building XGBoost model with all the features
2023-10-10 10:33:27,787 - Pipeline - INFO - Performing Hyperparameter optimisation
2023-10-10 10:33:27,787 - Pipeline - INFO - Building the model
2023-10-10 10:33:27,788 - Pipeline - INFO - Hyperparameter optimisation for: {'max_depth': range(1, 10), 'scale_pos_weight': [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4]}
2023-10-10 10:34:57,355 - Pipeline - INFO - Hyperparameter optimisation for: {'n_estimators': range(50, 250, 10)}
2023-10-10 10:35:28,570 - Pipeline - INFO - Hyperparameter optimisation for: {'min_child_weight': range(1, 10)}
2023-10-10 10:35:38,602 - Pipeline - INFO - Hyperparameter optimisation for: {'gamma': [0.0, 0.1, 0.2, 0.3, 0.4]}
2023-10-10 10:35:44,905 - Pipeline - INFO - Hyperparameter optimisation for: {'subsample': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], 'co

### Remove the data matrix file from working directory

In [8]:
import os


os.system(
    '''rm ''' + dataDirName + '''data_matrix.csv'''
)

0

## Perform prediction on rescaled dataset

### Copy the rescaled file to working directory

In [9]:
import os


os.system(
    '''cp ''' + dataDirName + '''data_matrix/data_matrix_rescaled.csv ''' + dataDirName + '''data_matrix.csv;'''
)

0

### Perform prediction

In [10]:
pm.runPredictionsForAllTargets(
    label='rescaled',
    dirPath = dataDirName,
    vitalsBefore = 0,
    vitalsAfter = 72,
    labsBefore = 0,
    labsAfter = 72,
    # targetList = [7, 14, 21, 30, 60, 90, 120, (7, 14), (14, 21), (21, 30), (30, 60), (60, 90), (90, 120)],
    targetList = [7],
    )

2023-10-10 10:46:30,623 - Pipeline - INFO - dirName: rescaled_icd_vb_0_va_72_lb_0_la_72
2023-10-10 10:46:30,624 - Pipeline - INFO - Running Predictions for vb_0_va_72_lb_0_la_72, targetStart : 0, targetEnd : 7
2023-10-10 10:46:30,624 - Pipeline - INFO - Reading data
2023-10-10 10:46:31,217 - Pipeline - INFO - Formatting data
2023-10-10 10:46:31,224 - Pipeline - INFO - Performing SFS
2023-10-10 10:51:10,452 - Pipeline - INFO - Building XGBoost model with all the features
2023-10-10 10:51:10,454 - Pipeline - INFO - Performing Hyperparameter optimisation
2023-10-10 10:51:10,455 - Pipeline - INFO - Building the model
2023-10-10 10:51:10,455 - Pipeline - INFO - Hyperparameter optimisation for: {'max_depth': range(1, 10), 'scale_pos_weight': [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4]}
2023-10-10 10:53:04,010 - Pipeline - INFO - Hyperparameter optimisation for: {'n_estimators': range(50, 250, 10)}
2023-10-10 10:53:35,965 - Pipeline - INFO - Hyperparameter optimisation for: {'min_child_weight': ra

### Remove the data matrix file from working directory

In [11]:
import os


os.system(
    '''rm ''' + dataDirName + '''data_matrix.csv'''
)

0