## Let's predict WIES Value

In [1]:
# Importing the packages
import pickle
import pandas as pd
from sklearn import preprocessing

In [2]:
# Feature selection function
from sklearn.base import BaseEstimator, TransformerMixin

# custom function for RFI feature selection inside a pipeline
# here we use n_estimators=100
class RFIFeatureSelector(BaseEstimator, TransformerMixin):
    
    # class constructor 
    # make sure class attributes end with a "_"
    # per scikit-learn convention to avoid errors
    def __init__(self, n_features_=10):
        self.n_features_ = n_features_
        self.fs_indices_ = None

    # override the fit function
    def fit(self, X, y):
        from sklearn.ensemble import RandomForestRegressor
        from numpy import argsort
        model_rfi = RandomForestRegressor(n_estimators=80, max_depth=15)
        model_rfi.fit(X, y)
        self.fs_indices_ = argsort(model_rfi.feature_importances_)[::-1][0:self.n_features_] 
        return self 
    
    # override the transform function
    def transform(self, X, y=None):
        return X[:, self.fs_indices_]

In [3]:
# Load the provided model
loaded_model = pickle.load(open('PredictWIES.sav', 'rb'))

In [4]:
# Read Main data
DataAll = pd.read_csv('TheRelevant.csv', header = 0, sep=',', names = ['PatientID','IPEpisodeID','Gender Code',
                                                                     'Age On Admission','Birth Weight','ATSI Flag',
                                                                     'Admission Source Code','Admission Type Code',
                                                                     'Admission Specialty','Discharge Specialty',
                                                                     'Admission Ward Code','Discharge Ward Code',
                                                                     'Separation Method Code','DRG Code',
                                                                     'ICU LOS Hours','Total LOS Days','HITH Days',
                                                                     'LOS Type','WIES Value'])

DataAll.head()

Unnamed: 0,PatientID,IPEpisodeID,Gender Code,Age On Admission,Birth Weight,ATSI Flag,Admission Source Code,Admission Type Code,Admission Specialty,Discharge Specialty,Admission Ward Code,Discharge Ward Code,Separation Method Code,DRG Code,ICU LOS Hours,Total LOS Days,HITH Days,LOS Type,WIES Value
0,973538,3320175,M,56,,No,T,P,HIHC,HIHC,F2B,FHITH,H,K01C,,34,67.94,Multiday,8.3579
1,1198129,3327705,F,81,,No,S,S,GBS,GBS,SHITH,SHITH,H,G70A,,10,55.36,Multiday,4.0218
2,319442,3333120,M,82,,No,H,C,45,26,F2C,FHITH,H,K01C,,18,102.62,Multiday,6.7687
3,1210448,3353681,M,88,,No,H,C,CRGS,UGIG,F2E,F3E,H,G02A,146.62,41,0.0,Multiday,9.5665
4,387487,3355364,M,72,,No,H,C,IMSD,IMSD,S2A,S3E,S,F63A,,59,0.0,Multiday,9.7236


In [5]:
# Read the data you want to test on. Rename Test.csv to your file name.
# Please make sure your test data has columns as specified here.
DataTest = pd.read_csv('Test.csv', header = 0, sep=',', names = ['PatientID','IPEpisodeID','Gender Code',
                                                                     'Age On Admission','Birth Weight','ATSI Flag',
                                                                     'Admission Source Code','Admission Type Code',
                                                                     'Admission Specialty','Discharge Specialty',
                                                                     'Admission Ward Code','Discharge Ward Code',
                                                                     'Separation Method Code','DRG Code',
                                                                     'ICU LOS Hours','Total LOS Days','HITH Days',
                                                                     'LOS Type'])
DataTest.head()

Unnamed: 0,PatientID,IPEpisodeID,Gender Code,Age On Admission,Birth Weight,ATSI Flag,Admission Source Code,Admission Type Code,Admission Specialty,Discharge Specialty,Admission Ward Code,Discharge Ward Code,Separation Method Code,DRG Code,ICU LOS Hours,Total LOS Days,HITH Days,LOS Type
0,973538,3320175,M,56,,No,T,P,HIHC,HIHC,F2B,FHITH,H,K01C,,34,67.94,Multiday
1,1198129,3327705,F,81,,No,S,S,GBS,GBS,SHITH,SHITH,H,G70A,,10,55.36,Multiday
2,319442,3333120,M,82,,No,H,C,45,26,F2C,FHITH,H,K01C,,18,102.62,Multiday
3,1210448,3353681,M,88,,No,H,C,CRGS,UGIG,F2E,F3E,H,G02A,146.62,41,0.0,Multiday
4,387487,3355364,M,72,,No,H,C,IMSD,IMSD,S2A,S3E,S,F63A,,59,0.0,Multiday


In [6]:
DataAll = DataAll.drop(columns=['IPEpisodeID','PatientID','DRG Code','Birth Weight','ICU LOS Hours','WIES Value'])
DataTest = DataTest.drop(columns=['IPEpisodeID','PatientID','DRG Code','Birth Weight','ICU LOS Hours'])


In [7]:
# Fixing dimension
DataAll['dummycolumn']=1
DataTest['dummycolumn']=0
data = pd.concat([DataAll,DataTest])

# One hot encoding
categorical_cols = data.columns[data.dtypes==object].tolist()

for col in categorical_cols:
    n = len(data[col].unique())
    if (n == 2):
        data[col] = pd.get_dummies(data[col], drop_first=True)
        
data = pd.get_dummies(data)

# Seperating the data post dimension fixing
Original = data[data ["dummycolumn"] == 1]
TestData = data[data ["dummycolumn"] == 0]
TestData.drop(["dummycolumn"], axis=1, inplace=True)
TestData.head()

# Min-max scaling
Data_scaler = preprocessing.MinMaxScaler()

Data_scaler.fit(TestData)
Test = Data_scaler.fit_transform(TestData)


Unnamed: 0,Gender Code,Age On Admission,ATSI Flag,Admission Source Code,Admission Type Code,Admission Specialty,Discharge Specialty,Admission Ward Code,Discharge Ward Code,Separation Method Code,Total LOS Days,HITH Days,LOS Type,dummycolumn
148320,M,78,No,H,P,DUA,DUA,SDHM,SDHM,H,1,0.0,Same day,1
42149,M,55,No,H,P,34,34,S1E,S1E,H,1,0.0,Same day,1
226709,F,28,No,H,M,OBST2,OBST2,SWPCC,S1B,H,2,0.0,Multiday,1
69287,F,20,No,H,C,44,44,FEOU,FEOU,H,1,0.0,Same day,1
154613,M,37,No,H,P,DUA,DUA,WDU,WDU,H,1,0.0,Same day,1
72506,M,36,No,H,C,44,44,FEOU,FEOU,H,1,0.0,Overnight,1
216022,F,0,No,H,C,12,NBS,SSCNI,SSCNI,H,2,0.0,Multiday,1
133767,M,73,No,H,P,UGIG,UGIG,FDPU,F3E,H,1,0.0,Overnight,1
19598,M,43,No,H,C,44,44,SEOU,SEOU,H,1,0.0,Same day,1
142193,F,48,No,H,P,9,9,S1EMA,S1EMA,H,1,0.0,Same day,1


In [12]:
# Prediction
prediction = loaded_model.predict(Test).sum()

print("Predicted WIES Value -", prediction)

Predicted WIES Value - 1866.8596346889233


In [13]:
#Actual Sum - 1734.32