In [1]:
import os
import pickle
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from sklearn import metrics

import sqlalchemy
import snowflake.connector
from sqlalchemy import create_engine
from snowflake.sqlalchemy import *

import Preprocessing
from Preprocessing import preprocess_data

import model_selection 
from model_selection import train_and_save_best_model

import xgboost as xgb
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor


from datetime import datetime, timedelta
import time  
import pytz    
tz_NY = pytz.timezone('Asia/Kolkata')

import warnings
warnings.filterwarnings('ignore')

import joblib

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, Lasso, HuberRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from sklearn import metrics
from sklearn.metrics import r2_score 

ModuleNotFoundError: No module named 'pandas'

In [4]:
data = pd.read_csv("Data/health_data.csv")

In [5]:
# Assuming you have a DataFrame called "data" with the appropriate columns
# Convert "ADMISSION_DATE" and "DISCHARGE_DATE" to datetime format
data['ADMISSION_DATE'] = pd.to_datetime(data['ADMISSION_DATE'])
data['DISCHARGE_DATE'] = pd.to_datetime(data['DISCHARGE_DATE'])

# Create a new DataFrame "base" with the COALESCE transformations
base = data.copy()  # Create a copy of the original DataFrame

base['HOSPITAL_CODE'] = base['HOSPITAL_CODE'].fillna(0)
base['HOSPITAL_TYPE_CODE'] = base['HOSPITAL_TYPE_CODE'].fillna('None')
base['CITY_CODE_HOSPITAL'] = base['CITY_CODE_HOSPITAL'].fillna(0)
base['HOSPITAL_REGION_CODE'] = base['HOSPITAL_REGION_CODE'].fillna('None')
base['AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL'] = base['AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL'].fillna(0)
base['DEPARTMENT'] = base['DEPARTMENT'].fillna('None')
base['WARD_TYPE'] = base['WARD_TYPE'].fillna('None')
base['WARD_FACILITY_CODE'] = base['WARD_FACILITY_CODE'].fillna('None')
base['BED_GRADE'] = base['BED_GRADE'].fillna(0)
base['CITY_CODE_PATIENT'] = base['CITY_CODE_PATIENT'].fillna(0)
base['TYPE_OF_ADMISSION'] = base['TYPE_OF_ADMISSION'].fillna('None')
base['SEVERITY_OF_ILLNESS'] = base['SEVERITY_OF_ILLNESS'].fillna('Minor')
base['VISITORS_WITH_PATIENT'] = base['VISITORS_WITH_PATIENT'].fillna(0)
base['AGE'] = base['AGE'].fillna('None')
base['ADMISSION_DEPOSIT'] = base['ADMISSION_DEPOSIT'].fillna(0)

# Create a new DataFrame "base_with_features" with additional transformations
base_with_features = base.copy()

base_with_features['ADMISSION_YEAR'] = base_with_features['ADMISSION_DATE'].dt.year
base_with_features['ADMISSION_MONTH'] = base_with_features['ADMISSION_DATE'].dt.month
base_with_features['ADMISSION_DAY'] = base_with_features['ADMISSION_DATE'].dt.day
base_with_features['ADMISSION_MONTH_NAME'] = base_with_features['ADMISSION_DATE'].dt.month_name()
base_with_features['ADMISSION_DAY_NAME'] = base_with_features['ADMISSION_DATE'].dt.day_name()
base_with_features['ADMISSION_ILLNESS_COMB'] = base_with_features['TYPE_OF_ADMISSION'] + '-' + base_with_features['SEVERITY_OF_ILLNESS']
base_with_features['ILLNESS_BEDGRADE_COMB'] = base_with_features['SEVERITY_OF_ILLNESS'] + '-' + base_with_features['BED_GRADE'].astype(str)
base_with_features['DEPARTMENT_ILLNESS_COMB'] = base_with_features['DEPARTMENT'] + '-' + base_with_features['SEVERITY_OF_ILLNESS']


base_with_features['LENGTH_OF_STAY'] = (base_with_features['DISCHARGE_DATE'] - base_with_features['ADMISSION_DATE']).dt.days

# Additional categorization using CASE statement
def categorize_stay_duration(row):
    duration = row['LENGTH_OF_STAY']
    if duration <= 7:
        return 'Short Stay'
    elif duration <= 14:
        return 'Medium Stay'
    else:
        return 'Long Stay'

base_with_features['STAY_DURATION'] = base_with_features.apply(categorize_stay_duration, axis=1)

# Now, you have the equivalent Python code for the SQL query with datetime conversion and corrected column name
result = base_with_features.copy()

# You can print or work with the "result" DataFrame as needed
print(result)

        CASE_ID  HOSPITAL_CODE HOSPITAL_TYPE_CODE  CITY_CODE_HOSPITAL  \
0             1              8                  c                   3   
1             3             10                  e                   1   
2             4             26                  b                   2   
3             5             26                  b                   2   
4             6             23                  a                   6   
...         ...            ...                ...                 ...   
236699   255877             23                  a                   6   
236700   255879              3                  c                   3   
236701   255880              3                  c                   3   
236702   255882             19                  a                   7   
236703   255883             14                  a                   1   

       HOSPITAL_REGION_CODE  AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL    DEPARTMENT  \
0                         Z                

In [6]:
def check_n_create_model_features(df,feat_list):
    test = pd.DataFrame()
    for col in feat_list:
        if col in df.columns.tolist():
            test[col] = df[col]
        else:
            test[col] = 0
    
    return test

In [7]:
# Loading the scoring data
score_data = result.copy()
score_data.columns = [col.upper() for col in score_data.columns.tolist()]
    
# Applying the preprocessing steps
score_data_processed = Preprocessing.preprocess_data(score_data)
    
# Applying feature selection
final_feats = pd.read_pickle('MODEL_FEATS.pkl')
score_data_final = check_n_create_model_features(score_data_processed,final_feats)
    
    
# Writing the dataframe to snowflake as a table
#score_data_final = score_data_final.reset_index()
#score_data_table = pd.merge(score_data,score_data_final,on='CASE_ID',how='left')
#status = insert_predictions_to_snowflake_table(score_data_table)

In [15]:
# Train and Test split
X = score_data_final.drop('LENGTH_OF_STAY',axis=1)
y = score_data_final[['LENGTH_OF_STAY']]
print(X.shape,y.shape)

x_train, x_test, y_train, y_test = train_test_split(X,y,test_size=0.3,stratify=y,random_state=123)
print(x_train.shape,y_train.shape)
print(x_test.shape,y_test.shape)

(236704, 17) (236704, 1)
(165692, 17) (165692, 1)
(71012, 17) (71012, 1)


In [16]:
import model_selection 
from model_selection import train_and_save_best_model

In [24]:
train_and_save_best_model(train_and_save_best_model(x_train, y_train, x_test, y_test, "best_regression_model.pkl"))

Model: Linear Regression
Train R-squared: 0.29
RMSE: 14.19
MAE: 3.23

Model: Random Forest Regressor
Train R-squared: 0.89
RMSE: 13.96
MAE: 3.14

Model: XGBoost Regressor
Train R-squared: 0.46
RMSE: 12.73
MAE: 3.00

The best model is: XGBoost Regressor with RMSE: 12.73
The best model has been saved to best_regression_model.pkl


TypeError: train_and_save_best_model() missing 4 required positional arguments: 'y_train', 'x_test', 'y_test', and 'save_path'

In [23]:


best_model = joblib.load("best_regression_model.pkl")

# Now, you can use the best model for predictions
model = None  # Initialize the model variable

if isinstance(best_model, LinearRegression):
    model = LinearRegression()
elif isinstance(best_model, RandomForestRegressor):
    model = RandomForestRegressor()
elif isinstance(best_model, xgb.XGBRegressor):
    model = xgb.XGBRegressor()

In [30]:
if model == xgb.XGBRegressor():
    model.load_model('MODEL_XGB.model')
elif model == LinearRegression():
    model == pickle.load('MODEL_LR.pkl')
elif model == RandomForestRegressor():
    model == pickle.load('MODEL_RF.pkl')

score_data_final['PREDICTED_LOS'] = np.ceil(model.predict(score_data_final.drop('LENGTH_OF_STAY',axis=1)))


In [33]:
# Writing the dataframe to snowflake as a table
score_data_final = score_data_final.reset_index()
score_data_table = pd.merge(score_data,score_data_final,on='CASE_ID',how='left')

In [34]:
print(score_data.shape)
score_data.head()

(236704, 24)


Unnamed: 0_level_0,HOSPITAL_TYPE_CODE,CITY_CODE_HOSPITAL,HOSPITAL_REGION_CODE,AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL,DEPARTMENT,WARD_TYPE,WARD_FACILITY_CODE,BED_GRADE,CITY_CODE_PATIENT,TYPE_OF_ADMISSION,SEVERITY_OF_ILLNESS,VISITORS_WITH_PATIENT,AGE,ADMISSION_DEPOSIT,ADMISSION_YEAR,ADMISSION_MONTH,ADMISSION_DAY,ADMISSION_MONTH_NAME,ADMISSION_DAY_NAME,ADMISSION_ILLNESS_COMB,ILLNESS_BEDGRADE_COMB,DEPARTMENT_ILLNESS_COMB,LENGTH_OF_STAY,STAY_DURATION
CASE_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
1,c,3,Z,3,radiotherapy,R,F,2.0,7.0,Emergency,Extreme,2,51-60,4911,2022,8,10,August,Wednesday,Emergency-Extreme,Extreme-2.0,radiotherapy-Extreme,10,Medium Stay
3,e,1,X,2,anesthesia,S,E,2.0,7.0,Trauma,Extreme,2,51-60,4745,2022,9,9,September,Friday,Trauma-Extreme,Extreme-2.0,anesthesia-Extreme,40,Long Stay
4,b,2,Y,2,radiotherapy,R,D,2.0,7.0,Trauma,Extreme,2,51-60,7272,2022,9,22,September,Thursday,Trauma-Extreme,Extreme-2.0,radiotherapy-Extreme,50,Long Stay
5,b,2,Y,2,radiotherapy,S,D,2.0,7.0,Trauma,Extreme,2,51-60,5558,2022,11,2,November,Wednesday,Trauma-Extreme,Extreme-2.0,radiotherapy-Extreme,50,Long Stay
6,a,6,X,2,anesthesia,S,F,2.0,7.0,Trauma,Extreme,2,51-60,4449,2022,9,16,September,Friday,Trauma-Extreme,Extreme-2.0,anesthesia-Extreme,20,Long Stay


In [35]:
print(score_data_processed.shape)
score_data_processed.head()

(236704, 188)


Unnamed: 0_level_0,AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL,VISITORS_WITH_PATIENT,ADMISSION_DEPOSIT,LENGTH_OF_STAY,HOSPITAL_TYPE_CODE_a,HOSPITAL_TYPE_CODE_b,HOSPITAL_TYPE_CODE_c,HOSPITAL_TYPE_CODE_d,HOSPITAL_TYPE_CODE_e,HOSPITAL_TYPE_CODE_f,HOSPITAL_TYPE_CODE_g,CITY_CODE_HOSPITAL_1,CITY_CODE_HOSPITAL_2,CITY_CODE_HOSPITAL_3,CITY_CODE_HOSPITAL_4,CITY_CODE_HOSPITAL_5,CITY_CODE_HOSPITAL_6,CITY_CODE_HOSPITAL_7,CITY_CODE_HOSPITAL_9,CITY_CODE_HOSPITAL_10,CITY_CODE_HOSPITAL_11,CITY_CODE_HOSPITAL_13,HOSPITAL_REGION_CODE_X,HOSPITAL_REGION_CODE_Y,HOSPITAL_REGION_CODE_Z,DEPARTMENT_TB & Chest disease,DEPARTMENT_anesthesia,DEPARTMENT_gynecology,DEPARTMENT_radiotherapy,DEPARTMENT_surgery,WARD_TYPE_P,WARD_TYPE_Q,WARD_TYPE_R,WARD_TYPE_S,WARD_TYPE_T,WARD_TYPE_U,WARD_FACILITY_CODE_A,WARD_FACILITY_CODE_B,WARD_FACILITY_CODE_C,WARD_FACILITY_CODE_D,WARD_FACILITY_CODE_E,WARD_FACILITY_CODE_F,BED_GRADE_0.0,BED_GRADE_1.0,BED_GRADE_2.0,BED_GRADE_3.0,BED_GRADE_4.0,CITY_CODE_PATIENT_0.0,CITY_CODE_PATIENT_1.0,CITY_CODE_PATIENT_2.0,CITY_CODE_PATIENT_3.0,CITY_CODE_PATIENT_4.0,CITY_CODE_PATIENT_5.0,CITY_CODE_PATIENT_6.0,CITY_CODE_PATIENT_7.0,CITY_CODE_PATIENT_8.0,CITY_CODE_PATIENT_9.0,CITY_CODE_PATIENT_10.0,CITY_CODE_PATIENT_11.0,CITY_CODE_PATIENT_12.0,CITY_CODE_PATIENT_13.0,CITY_CODE_PATIENT_14.0,CITY_CODE_PATIENT_15.0,CITY_CODE_PATIENT_16.0,CITY_CODE_PATIENT_18.0,CITY_CODE_PATIENT_19.0,CITY_CODE_PATIENT_20.0,CITY_CODE_PATIENT_21.0,CITY_CODE_PATIENT_22.0,CITY_CODE_PATIENT_23.0,CITY_CODE_PATIENT_24.0,CITY_CODE_PATIENT_25.0,CITY_CODE_PATIENT_26.0,CITY_CODE_PATIENT_27.0,CITY_CODE_PATIENT_28.0,CITY_CODE_PATIENT_29.0,CITY_CODE_PATIENT_30.0,CITY_CODE_PATIENT_31.0,CITY_CODE_PATIENT_32.0,CITY_CODE_PATIENT_33.0,CITY_CODE_PATIENT_34.0,CITY_CODE_PATIENT_35.0,CITY_CODE_PATIENT_36.0,CITY_CODE_PATIENT_37.0,TYPE_OF_ADMISSION_Emergency,TYPE_OF_ADMISSION_Trauma,TYPE_OF_ADMISSION_Urgent,SEVERITY_OF_ILLNESS_Extreme,SEVERITY_OF_ILLNESS_Minor,SEVERITY_OF_ILLNESS_Moderate,AGE_0-10,AGE_20-Nov,AGE_21-30,AGE_31-40,AGE_41-50,AGE_51-60,AGE_61-70,AGE_71-80,AGE_81-90,AGE_91-100,ADMISSION_YEAR_2022,ADMISSION_MONTH_8,ADMISSION_MONTH_9,ADMISSION_MONTH_10,ADMISSION_MONTH_11,ADMISSION_DAY_1,ADMISSION_DAY_2,ADMISSION_DAY_3,ADMISSION_DAY_4,ADMISSION_DAY_5,ADMISSION_DAY_6,ADMISSION_DAY_7,ADMISSION_DAY_8,ADMISSION_DAY_9,ADMISSION_DAY_10,ADMISSION_DAY_11,ADMISSION_DAY_12,ADMISSION_DAY_13,ADMISSION_DAY_14,ADMISSION_DAY_15,ADMISSION_DAY_16,ADMISSION_DAY_17,ADMISSION_DAY_18,ADMISSION_DAY_19,ADMISSION_DAY_20,ADMISSION_DAY_21,ADMISSION_DAY_22,ADMISSION_DAY_23,ADMISSION_DAY_24,ADMISSION_DAY_25,ADMISSION_DAY_26,ADMISSION_DAY_27,ADMISSION_DAY_28,ADMISSION_DAY_29,ADMISSION_DAY_30,ADMISSION_DAY_31,ADMISSION_MONTH_NAME_August,ADMISSION_MONTH_NAME_November,ADMISSION_MONTH_NAME_October,ADMISSION_MONTH_NAME_September,ADMISSION_DAY_NAME_Friday,ADMISSION_DAY_NAME_Monday,ADMISSION_DAY_NAME_Saturday,ADMISSION_DAY_NAME_Sunday,ADMISSION_DAY_NAME_Thursday,ADMISSION_DAY_NAME_Tuesday,ADMISSION_DAY_NAME_Wednesday,ADMISSION_ILLNESS_COMB_Emergency-Extreme,ADMISSION_ILLNESS_COMB_Emergency-Minor,ADMISSION_ILLNESS_COMB_Emergency-Moderate,ADMISSION_ILLNESS_COMB_Trauma-Extreme,ADMISSION_ILLNESS_COMB_Trauma-Minor,ADMISSION_ILLNESS_COMB_Trauma-Moderate,ADMISSION_ILLNESS_COMB_Urgent-Extreme,ADMISSION_ILLNESS_COMB_Urgent-Minor,ADMISSION_ILLNESS_COMB_Urgent-Moderate,ILLNESS_BEDGRADE_COMB_Extreme-0.0,ILLNESS_BEDGRADE_COMB_Extreme-1.0,ILLNESS_BEDGRADE_COMB_Extreme-2.0,ILLNESS_BEDGRADE_COMB_Extreme-3.0,ILLNESS_BEDGRADE_COMB_Extreme-4.0,ILLNESS_BEDGRADE_COMB_Minor-0.0,ILLNESS_BEDGRADE_COMB_Minor-1.0,ILLNESS_BEDGRADE_COMB_Minor-2.0,ILLNESS_BEDGRADE_COMB_Minor-3.0,ILLNESS_BEDGRADE_COMB_Minor-4.0,ILLNESS_BEDGRADE_COMB_Moderate-0.0,ILLNESS_BEDGRADE_COMB_Moderate-1.0,ILLNESS_BEDGRADE_COMB_Moderate-2.0,ILLNESS_BEDGRADE_COMB_Moderate-3.0,ILLNESS_BEDGRADE_COMB_Moderate-4.0,DEPARTMENT_ILLNESS_COMB_TB & Chest disease-Extreme,DEPARTMENT_ILLNESS_COMB_TB & Chest disease-Minor,DEPARTMENT_ILLNESS_COMB_TB & Chest disease-Moderate,DEPARTMENT_ILLNESS_COMB_anesthesia-Extreme,DEPARTMENT_ILLNESS_COMB_anesthesia-Minor,DEPARTMENT_ILLNESS_COMB_anesthesia-Moderate,DEPARTMENT_ILLNESS_COMB_gynecology-Extreme,DEPARTMENT_ILLNESS_COMB_gynecology-Minor,DEPARTMENT_ILLNESS_COMB_gynecology-Moderate,DEPARTMENT_ILLNESS_COMB_radiotherapy-Extreme,DEPARTMENT_ILLNESS_COMB_radiotherapy-Minor,DEPARTMENT_ILLNESS_COMB_radiotherapy-Moderate,DEPARTMENT_ILLNESS_COMB_surgery-Extreme,DEPARTMENT_ILLNESS_COMB_surgery-Minor,DEPARTMENT_ILLNESS_COMB_surgery-Moderate,STAY_DURATION_Long Stay,STAY_DURATION_Medium Stay
CASE_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1
1,3,2,4911,10,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1
3,2,2,4745,40,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0
4,2,2,7272,50,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0
5,2,2,5558,50,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0
6,2,2,4449,20,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0


In [36]:
print(score_data_final.shape)
score_data_final.head()

(236704, 21)


Unnamed: 0,index,CASE_ID,TYPE_OF_ADMISSION_Emergency,AGE_41-50,CITY_CODE_HOSPITAL_7,VISITORS_WITH_PATIENT,WARD_TYPE_S,ADMISSION_MONTH_NAME_November,TYPE_OF_ADMISSION_Trauma,ADMISSION_MONTH_11,CITY_CODE_HOSPITAL_2,WARD_TYPE_P,STAY_DURATION_Long Stay,ADMISSION_DEPOSIT,ADMISSION_MONTH_10,SEVERITY_OF_ILLNESS_Minor,WARD_TYPE_Q,STAY_DURATION_Medium Stay,AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL,LENGTH_OF_STAY,PREDICTED_LOS
0,0,1,1,0,0,2,0,0,0,0,0,0,0,4911,0,0,0,1,3,10,10.0
1,1,3,0,0,0,2,1,0,1,0,0,0,1,4745,0,0,0,0,2,40,33.0
2,2,4,0,0,0,2,0,0,1,0,1,0,1,7272,0,0,0,0,2,50,35.0
3,3,5,0,0,0,2,1,1,1,1,1,0,1,5558,0,0,0,0,2,50,30.0
4,4,6,0,0,0,2,1,0,1,0,0,0,1,4449,0,0,0,0,2,20,35.0


In [37]:
print(score_data_table.shape)
score_data_table.head()

(236704, 45)


Unnamed: 0,CASE_ID,HOSPITAL_TYPE_CODE,CITY_CODE_HOSPITAL,HOSPITAL_REGION_CODE,AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL_x,DEPARTMENT,WARD_TYPE,WARD_FACILITY_CODE,BED_GRADE,CITY_CODE_PATIENT,TYPE_OF_ADMISSION,SEVERITY_OF_ILLNESS,VISITORS_WITH_PATIENT_x,AGE,ADMISSION_DEPOSIT_x,ADMISSION_YEAR,ADMISSION_MONTH,ADMISSION_DAY,ADMISSION_MONTH_NAME,ADMISSION_DAY_NAME,ADMISSION_ILLNESS_COMB,ILLNESS_BEDGRADE_COMB,DEPARTMENT_ILLNESS_COMB,LENGTH_OF_STAY_x,STAY_DURATION,index,TYPE_OF_ADMISSION_Emergency,AGE_41-50,CITY_CODE_HOSPITAL_7,VISITORS_WITH_PATIENT_y,WARD_TYPE_S,ADMISSION_MONTH_NAME_November,TYPE_OF_ADMISSION_Trauma,ADMISSION_MONTH_11,CITY_CODE_HOSPITAL_2,WARD_TYPE_P,STAY_DURATION_Long Stay,ADMISSION_DEPOSIT_y,ADMISSION_MONTH_10,SEVERITY_OF_ILLNESS_Minor,WARD_TYPE_Q,STAY_DURATION_Medium Stay,AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL_y,LENGTH_OF_STAY_y,PREDICTED_LOS
0,1,c,3,Z,3,radiotherapy,R,F,2.0,7.0,Emergency,Extreme,2,51-60,4911,2022,8,10,August,Wednesday,Emergency-Extreme,Extreme-2.0,radiotherapy-Extreme,10,Medium Stay,0,1,0,0,2,0,0,0,0,0,0,0,4911,0,0,0,1,3,10,10.0
1,3,e,1,X,2,anesthesia,S,E,2.0,7.0,Trauma,Extreme,2,51-60,4745,2022,9,9,September,Friday,Trauma-Extreme,Extreme-2.0,anesthesia-Extreme,40,Long Stay,1,0,0,0,2,1,0,1,0,0,0,1,4745,0,0,0,0,2,40,33.0
2,4,b,2,Y,2,radiotherapy,R,D,2.0,7.0,Trauma,Extreme,2,51-60,7272,2022,9,22,September,Thursday,Trauma-Extreme,Extreme-2.0,radiotherapy-Extreme,50,Long Stay,2,0,0,0,2,0,0,1,0,1,0,1,7272,0,0,0,0,2,50,35.0
3,5,b,2,Y,2,radiotherapy,S,D,2.0,7.0,Trauma,Extreme,2,51-60,5558,2022,11,2,November,Wednesday,Trauma-Extreme,Extreme-2.0,radiotherapy-Extreme,50,Long Stay,3,0,0,0,2,1,1,1,1,1,0,1,5558,0,0,0,0,2,50,30.0
4,6,a,6,X,2,anesthesia,S,F,2.0,7.0,Trauma,Extreme,2,51-60,4449,2022,9,16,September,Friday,Trauma-Extreme,Extreme-2.0,anesthesia-Extreme,20,Long Stay,4,0,0,0,2,1,0,1,0,0,0,1,4449,0,0,0,0,2,20,35.0
