# Perform prediction

## Import libraries

In [1]:
import os

import logging
import sys

log = logging.getLogger("Pipeline")
log.setLevel(logging.INFO)
format = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")

ch = logging.StreamHandler(sys.stdout)
ch.setFormatter(format)
log.addHandler(ch)

import warnings
warnings.simplefilter(action='ignore', category=Warning)

module_path = os.path.abspath(os.path.join('../'))
if module_path not in sys.path:
    sys.path.append(module_path)

import predict_mortality as pm

## Define data directory

A top level directory to store all the data for this experiment

In [2]:
dataDirName = '/home/yram0006/phd/chapter_2/workspace/mortality_data/standardisation_experiment/'

## Perform prediction on raw dataset

### Copy the raw file to working directory

In [3]:
import os


os.system(
    '''cp ''' + dataDirName + '''data_matrix/data_matrix_raw.csv ''' + dataDirName + '''data_matrix.csv;'''
)

0

### Perform prediction

In [4]:
pm.runPredictionsForAllTargets(
    label='raw',
    dirPath = dataDirName,
    vitalsBefore = 0,
    vitalsAfter = 72,
    labsBefore = 0,
    labsAfter = 72,
    # targetList = [7, 14, 21, 30, 60, 90, 120, (7, 14), (14, 21), (21, 30), (30, 60), (60, 90), (90, 120)],
    targetList = [7],
    )

2023-09-21 20:47:27,385 - Pipeline - INFO - dirName: raw_icd_vb_0_va_72_lb_0_la_72
2023-09-21 20:47:27,386 - Pipeline - INFO - Running Predictions for vb_0_va_72_lb_0_la_72, targetStart : 0, targetEnd : 7
2023-09-21 20:47:27,386 - Pipeline - INFO - Reading data
2023-09-21 20:47:28,130 - Pipeline - INFO - Formatting data
2023-09-21 20:47:28,138 - Pipeline - INFO - Performing SFS
2023-09-21 20:52:21,295 - Pipeline - INFO - Building XGBoost model with all the features
2023-09-21 20:52:21,297 - Pipeline - INFO - Performing Hyperparameter optimisation
2023-09-21 20:52:21,319 - Pipeline - INFO - Building the model
2023-09-21 20:52:21,320 - Pipeline - INFO - Hyperparameter optimisation for: {'max_depth': range(1, 10), 'scale_pos_weight': [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4]}
2023-09-21 20:53:51,108 - Pipeline - INFO - Hyperparameter optimisation for: {'n_estimators': range(50, 250, 10)}
2023-09-21 20:54:35,148 - Pipeline - INFO - Hyperparameter optimisation for: {'min_child_weight': range(1

### Remove the data matrix file from working directory

In [5]:
import os


os.system(
    '''rm ''' + dataDirName + '''data_matrix.csv'''
)

0

## Perform prediction on standardised dataset

### Copy the standardised file to working directory

In [6]:
import os


os.system(
    '''cp ''' + dataDirName + '''data_matrix/data_matrix_standardised.csv ''' + dataDirName + '''data_matrix.csv;'''
)

0

### Perform prediction

In [7]:
pm.runPredictionsForAllTargets(
    label='standardised',
    dirPath = dataDirName,
    vitalsBefore = 0,
    vitalsAfter = 72,
    labsBefore = 0,
    labsAfter = 72,
    # targetList = [7, 14, 21, 30, 60, 90, 120, (7, 14), (14, 21), (21, 30), (30, 60), (60, 90), (90, 120)],
    targetList = [7],
    )

2023-09-21 21:06:13,540 - Pipeline - INFO - dirName: standardised_icd_vb_0_va_72_lb_0_la_72
2023-09-21 21:06:13,541 - Pipeline - INFO - Running Predictions for vb_0_va_72_lb_0_la_72, targetStart : 0, targetEnd : 7
2023-09-21 21:06:13,541 - Pipeline - INFO - Reading data
2023-09-21 21:06:14,131 - Pipeline - INFO - Formatting data
2023-09-21 21:06:14,138 - Pipeline - INFO - Performing SFS
2023-09-21 21:10:52,277 - Pipeline - INFO - Building XGBoost model with all the features
2023-09-21 21:10:52,279 - Pipeline - INFO - Performing Hyperparameter optimisation
2023-09-21 21:10:52,279 - Pipeline - INFO - Building the model
2023-09-21 21:10:52,280 - Pipeline - INFO - Hyperparameter optimisation for: {'max_depth': range(1, 10), 'scale_pos_weight': [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4]}
2023-09-21 21:12:22,109 - Pipeline - INFO - Hyperparameter optimisation for: {'n_estimators': range(50, 250, 10)}
2023-09-21 21:13:06,383 - Pipeline - INFO - Hyperparameter optimisation for: {'min_child_weight'

### Remove the data matrix file from working directory

In [8]:
import os


os.system(
    '''rm ''' + dataDirName + '''data_matrix.csv'''
)

0

## Perform prediction on rescaled dataset

### Copy the rescaled file to working directory

In [9]:
import os


os.system(
    '''cp ''' + dataDirName + '''data_matrix/data_matrix_rescaled.csv ''' + dataDirName + '''data_matrix.csv;'''
)

0

### Perform prediction

In [10]:
pm.runPredictionsForAllTargets(
    label='rescaled',
    dirPath = dataDirName,
    vitalsBefore = 0,
    vitalsAfter = 72,
    labsBefore = 0,
    labsAfter = 72,
    # targetList = [7, 14, 21, 30, 60, 90, 120, (7, 14), (14, 21), (21, 30), (30, 60), (60, 90), (90, 120)],
    targetList = [7],
    )

2023-09-21 21:51:05,653 - Pipeline - INFO - dirName: rescaled_icd_vb_0_va_72_lb_0_la_72
2023-09-21 21:51:05,654 - Pipeline - INFO - Running Predictions for vb_0_va_72_lb_0_la_72, targetStart : 0, targetEnd : 7
2023-09-21 21:51:05,655 - Pipeline - INFO - Reading data
2023-09-21 21:51:06,266 - Pipeline - INFO - Formatting data
2023-09-21 21:51:06,273 - Pipeline - INFO - Performing SFS
2023-09-21 21:55:16,912 - Pipeline - INFO - Building XGBoost model with all the features
2023-09-21 21:55:16,913 - Pipeline - INFO - Performing Hyperparameter optimisation
2023-09-21 21:55:16,914 - Pipeline - INFO - Building the model
2023-09-21 21:55:16,914 - Pipeline - INFO - Hyperparameter optimisation for: {'max_depth': range(1, 10), 'scale_pos_weight': [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4]}
2023-09-21 21:58:49,291 - Pipeline - INFO - Hyperparameter optimisation for: {'n_estimators': range(50, 250, 10)}
2023-09-21 21:59:33,607 - Pipeline - INFO - Hyperparameter optimisation for: {'min_child_weight': ra

R[write to console]: Error in ROCR::prediction(predictor, response) : 
  Number of classes is not equal to 2.
ROCR currently supports only evaluation of binary classification tasks.

R[write to console]: In addition: 
R[write to console]: 

R[write to console]: Error in ROCR::prediction(predictor, response) : 
  Number of classes is not equal to 2.
ROCR currently supports only evaluation of binary classification tasks.

R[write to console]: In addition: 

R[write to console]: 1: 
R[write to console]: In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
R[write to console]: 
 
R[write to console]:  libraries ‘/usr/local/lib/R/site-library’, ‘/usr/lib/R/site-library’ contain no packages

R[write to console]: 2: 
R[write to console]: In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
R[write to console]: 
 
R[write to console]:  libraries ‘/usr/local/lib/R/site-library’, ‘/usr/lib/R/site-library’ contain no packages



2023-09-21 22:12:21,282 - Pipeline - INFO - Building Ensemble LR model with all the features
2023-09-21 22:12:21,283 - Pipeline - INFO - Performing Hyperparameter optimisation
2023-09-21 22:12:22,063 - Pipeline - INFO - Building the model
2023-09-21 22:12:22,064 - Pipeline - INFO - Performing cross-validation
2023-09-21 22:12:23,148 - Pipeline - INFO - Saving the CV results for all the models
2023-09-21 22:12:23,151 - Pipeline - INFO - Completed !!!


### Remove the data matrix file from working directory

In [11]:
import os


os.system(
    '''rm ''' + dataDirName + '''data_matrix.csv'''
)

0