# Perform prediction

## Import libraries

In [1]:
import os

import logging
import sys

log = logging.getLogger("Pipeline")
log.setLevel(logging.INFO)
format = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")

ch = logging.StreamHandler(sys.stdout)
ch.setFormatter(format)
log.addHandler(ch)

import warnings
warnings.simplefilter(action='ignore', category=Warning)

module_path = os.path.abspath(os.path.join('../'))
if module_path not in sys.path:
    sys.path.append(module_path)

import predict_mortality as pm

## Define data directory

A top level directory to store all the data for this experiment

In [2]:
dataDirName = '/home/yram0006/phd/chapter_2/workspace/mortality_data/standardisation_experiment/'

## Perform prediction on raw dataset

### Copy the raw file to working directory

In [3]:
import os


os.system(
    '''cp ''' + dataDirName + '''data_matrix/data_matrix_raw.csv ''' + dataDirName + '''data_matrix.csv;'''
)

0

### Perform prediction

In [4]:
pm.runPredictionsForAllTargets(
    label='raw',
    dirPath = dataDirName,
    vitalsBefore = 0,
    vitalsAfter = 72,
    labsBefore = 0,
    labsAfter = 72,
    # targetList = [7, 14, 21, 30, 60, 90, 120, (7, 14), (14, 21), (21, 30), (30, 60), (60, 90), (90, 120)],
    targetList = [7],
    )

2023-09-24 20:33:02,687 - Pipeline - INFO - dirName: raw_icd_vb_0_va_72_lb_0_la_72
2023-09-24 20:33:02,688 - Pipeline - INFO - Running Predictions for vb_0_va_72_lb_0_la_72, targetStart : 0, targetEnd : 7
2023-09-24 20:33:02,688 - Pipeline - INFO - Reading data
2023-09-24 20:33:03,467 - Pipeline - INFO - Formatting data
2023-09-24 20:33:03,475 - Pipeline - INFO - Performing SFS
2023-09-24 20:37:21,033 - Pipeline - INFO - Building XGBoost model with all the features
2023-09-24 20:37:21,034 - Pipeline - INFO - Performing Hyperparameter optimisation
2023-09-24 20:37:21,054 - Pipeline - INFO - Building the model
2023-09-24 20:37:21,055 - Pipeline - INFO - Hyperparameter optimisation for: {'max_depth': range(1, 10), 'scale_pos_weight': [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4]}
2023-09-24 20:38:50,014 - Pipeline - INFO - Hyperparameter optimisation for: {'n_estimators': range(50, 250, 10)}
2023-09-24 20:39:21,507 - Pipeline - INFO - Hyperparameter optimisation for: {'min_child_weight': range(1

### Remove the data matrix file from working directory

In [5]:
import os


os.system(
    '''rm ''' + dataDirName + '''data_matrix.csv'''
)

0

## Perform prediction on standardised dataset

### Copy the standardised file to working directory

In [6]:
import os


os.system(
    '''cp ''' + dataDirName + '''data_matrix/data_matrix_standardised.csv ''' + dataDirName + '''data_matrix.csv;'''
)

0

### Perform prediction

In [7]:
pm.runPredictionsForAllTargets(
    label='standardised',
    dirPath = dataDirName,
    vitalsBefore = 0,
    vitalsAfter = 72,
    labsBefore = 0,
    labsAfter = 72,
    # targetList = [7, 14, 21, 30, 60, 90, 120, (7, 14), (14, 21), (21, 30), (30, 60), (60, 90), (90, 120)],
    targetList = [7],
    )

2023-09-24 20:48:25,342 - Pipeline - INFO - dirName: standardised_icd_vb_0_va_72_lb_0_la_72
2023-09-24 20:48:25,344 - Pipeline - INFO - Running Predictions for vb_0_va_72_lb_0_la_72, targetStart : 0, targetEnd : 7
2023-09-24 20:48:25,344 - Pipeline - INFO - Reading data
2023-09-24 20:48:25,944 - Pipeline - INFO - Formatting data
2023-09-24 20:48:25,951 - Pipeline - INFO - Performing SFS
2023-09-24 20:52:08,329 - Pipeline - INFO - Building XGBoost model with all the features
2023-09-24 20:52:08,331 - Pipeline - INFO - Performing Hyperparameter optimisation
2023-09-24 20:52:08,331 - Pipeline - INFO - Building the model
2023-09-24 20:52:08,332 - Pipeline - INFO - Hyperparameter optimisation for: {'max_depth': range(1, 10), 'scale_pos_weight': [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4]}
2023-09-24 20:53:37,505 - Pipeline - INFO - Hyperparameter optimisation for: {'n_estimators': range(50, 250, 10)}
2023-09-24 20:54:08,569 - Pipeline - INFO - Hyperparameter optimisation for: {'min_child_weight'

### Remove the data matrix file from working directory

In [8]:
import os


os.system(
    '''rm ''' + dataDirName + '''data_matrix.csv'''
)

0

## Perform prediction on rescaled dataset

### Copy the rescaled file to working directory

In [9]:
import os


os.system(
    '''cp ''' + dataDirName + '''data_matrix/data_matrix_rescaled.csv ''' + dataDirName + '''data_matrix.csv;'''
)

0

### Perform prediction

In [10]:
pm.runPredictionsForAllTargets(
    label='rescaled',
    dirPath = dataDirName,
    vitalsBefore = 0,
    vitalsAfter = 72,
    labsBefore = 0,
    labsAfter = 72,
    # targetList = [7, 14, 21, 30, 60, 90, 120, (7, 14), (14, 21), (21, 30), (30, 60), (60, 90), (90, 120)],
    targetList = [7],
    )

2023-09-24 21:04:58,211 - Pipeline - INFO - dirName: rescaled_icd_vb_0_va_72_lb_0_la_72
2023-09-24 21:04:58,212 - Pipeline - INFO - Running Predictions for vb_0_va_72_lb_0_la_72, targetStart : 0, targetEnd : 7
2023-09-24 21:04:58,212 - Pipeline - INFO - Reading data
2023-09-24 21:04:58,803 - Pipeline - INFO - Formatting data
2023-09-24 21:04:58,811 - Pipeline - INFO - Performing SFS
2023-09-24 21:08:52,708 - Pipeline - INFO - Building XGBoost model with all the features
2023-09-24 21:08:52,710 - Pipeline - INFO - Performing Hyperparameter optimisation
2023-09-24 21:08:52,710 - Pipeline - INFO - Building the model
2023-09-24 21:08:52,711 - Pipeline - INFO - Hyperparameter optimisation for: {'max_depth': range(1, 10), 'scale_pos_weight': [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4]}
2023-09-24 21:10:22,473 - Pipeline - INFO - Hyperparameter optimisation for: {'n_estimators': range(50, 250, 10)}
2023-09-24 21:10:53,791 - Pipeline - INFO - Hyperparameter optimisation for: {'min_child_weight': ra

### Remove the data matrix file from working directory

In [11]:
import os


os.system(
    '''rm ''' + dataDirName + '''data_matrix.csv'''
)

0