### Importing all the useful libraries

In [1]:
import pandas as pd
import numpy as np
import gc
import xgboost as xgb
from sklearn.model_selection import StratifiedKFold
from ml_metrics import rmsle
import scikitplot as skplt
from scikitplot.estimators import plot_learning_curve
from sklearn.model_selection import train_test_split
from sklearn.metrics import make_scorer
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import pickle
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

%matplotlib inline

### Loading the data

In [2]:
train = pd.read_hdf('../input/diabetic_train.h5')
test = pd.read_hdf('../input/diabetic_test.h5')
df = pd.concat([train, test])

### Data preprocessing

In [3]:
# Replace all '?', representing unknown, with NaN
df = df.replace('?',np.nan)

In [4]:
# Take a look at the available features
df.columns

Index(['encounter_id', 'patient_nbr', 'race', 'gender', 'age', 'weight',
       'admission_type_id', 'discharge_disposition_id', 'admission_source_id',
       'time_in_hospital', 'payer_code', 'medical_specialty',
       'num_lab_procedures', 'num_procedures', 'num_medications',
       'number_outpatient', 'number_emergency', 'number_inpatient', 'diag_1',
       'diag_2', 'diag_3', 'number_diagnoses', 'max_glu_serum', 'A1Cresult',
       'metformin', 'repaglinide', 'nateglinide', 'chlorpropamide',
       'glimepiride', 'acetohexamide', 'glipizide', 'glyburide', 'tolbutamide',
       'pioglitazone', 'rosiglitazone', 'acarbose', 'miglitol', 'troglitazone',
       'tolazamide', 'examide', 'citoglipton', 'insulin',
       'glyburide-metformin', 'glipizide-metformin',
       'glimepiride-pioglitazone', 'metformin-rosiglitazone',
       'metformin-pioglitazone', 'change', 'diabetesMed', 'readmitted', 'id'],
      dtype='object')

In [5]:
# Numeric features
cols_num = ['time_in_hospital', 'num_lab_procedures', 'num_procedures', 'num_medications', 'number_outpatient', 
'number_emergency', 'number_inpatient', 'number_diagnoses']

In [6]:
# Categorical features
cols_cat = ['race', 'gender', 'max_glu_serum', 'A1Cresult', 
            'metformin', 'repaglinide', 'nateglinide', 'chlorpropamide',
            'glimepiride', 'acetohexamide', 'glipizide', 'glyburide', 'tolbutamide',
            'pioglitazone', 'rosiglitazone', 'acarbose', 'miglitol', 'troglitazone',
            'tolazamide','insulin',
            'glyburide-metformin', 'glipizide-metformin',
            'glimepiride-pioglitazone', 'metformin-rosiglitazone',
            'metformin-pioglitazone', 'change', 'diabetesMed', 'payer_code']

In [7]:
# Fill in missing values
df['race'] = df['race'].fillna('UNK')
df['payer_code'] = df['payer_code'].fillna('UNK')
df['medical_specialty'] = df['medical_specialty'].fillna('UNK')

In [8]:
# For medical_specialty, pick 10 most common ones - as we have many categories and some are not very common
top_10 = ['UNK','InternalMedicine','Family/GeneralPractice','Emergency/Trauma','Cardiology',
          'Surgery-General','Orthopedics','Nephrology','Orthopedics-Reconstructive','Radiologist']

In [9]:
df['med_spec'] = df['medical_specialty'].copy()
df.head()

Unnamed: 0,encounter_id,patient_nbr,race,gender,age,weight,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,payer_code,medical_specialty,num_lab_procedures,num_procedures,num_medications,number_outpatient,number_emergency,number_inpatient,diag_1,diag_2,diag_3,number_diagnoses,max_glu_serum,A1Cresult,metformin,repaglinide,nateglinide,chlorpropamide,glimepiride,acetohexamide,glipizide,glyburide,tolbutamide,pioglitazone,rosiglitazone,acarbose,miglitol,troglitazone,tolazamide,examide,citoglipton,insulin,glyburide-metformin,glipizide-metformin,glimepiride-pioglitazone,metformin-rosiglitazone,metformin-pioglitazone,change,diabetesMed,readmitted,id,med_spec
0,2278392,8222157,Caucasian,Female,[0-10),,6,25,1,1,UNK,Pediatrics-Endocrinology,41,0,1,0,0,0,250.83,,,1,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,False,False,0,0,Pediatrics-Endocrinology
2,64410,86047875,AfricanAmerican,Female,[20-30),,1,1,7,2,UNK,UNK,11,5,13,2,0,1,648.0,250.0,V27,6,,,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,False,True,0,2,UNK
3,500364,82442376,Caucasian,Male,[30-40),,1,1,7,2,UNK,UNK,44,1,16,0,0,0,8.0,250.43,403,7,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,True,True,0,3,UNK
4,16680,42519267,Caucasian,Male,[40-50),,1,1,7,1,UNK,UNK,51,0,8,0,0,0,197.0,157.0,250,5,,,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,True,True,0,4,UNK
8,12522,48330783,Caucasian,Female,[80-90),,2,1,4,13,UNK,UNK,68,2,28,0,0,0,398.0,427.0,38,8,,,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,True,True,0,8,UNK


In [10]:
# Substitute less common medical_specialty with "Other" category
df.loc[~df.med_spec.isin(top_10), 'med_spec'] = "Other"

In [11]:
# Some discharge disposition ids unfortunately indicate that a patient has passed away. 
# That may make for a useful feature for readmittion prediction
df['death'] = df.discharge_disposition_id.isin([11,13,14,19,20,21]).astype(int)
np.unique(df['death'])

array([0, 1])

In [12]:
cols_cat_num = ['admission_type_id', 'discharge_disposition_id', 'admission_source_id']

In [13]:
df[cols_cat_num] = df[cols_cat_num].astype('str')

In [14]:
df_cat = pd.get_dummies(df[cols_cat + cols_cat_num + ['med_spec']], drop_first=True)

In [15]:
df = pd.concat([df,df_cat], axis=1)

In [16]:
cols_all_cat = list(df_cat.columns)

In [17]:
age_id = {
    '[0-10)':0,
    '[10-20)':1,
    '[20-30)':2,
    '[30-40)':3,
    '[40-50)':4,
    '[50-60)':5,
    '[60-70)':6,
    '[70-80)':7,
    '[80-90)':8,
    '[90-100)':9     
}

df['age_group'] = df.age.replace(age_id)

In [18]:
df['has_weight'] = df['weight'].notnull().astype(int)

In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 66221 entries, 0 to 101764
Columns: 166 entries, encounter_id to has_weight
dtypes: bool(4), int64(58), object(16), uint8(88)
memory usage: 43.7+ MB


In [20]:
# Check number of each patients records in db
df['encounter_nr'] = df[ ['patient_nbr'] ].apply(lambda x: df[ df['patient_nbr'] == x['patient_nbr']].shape[0], axis=1)

In [21]:
cols_extra = ['age_group', 'has_weight', 'death', 'encounter_nr']

In [22]:
col2use = cols_num + cols_all_cat + cols_extra

In [23]:
#Let's have a look at all the feature columns that can be used
df[col2use].head()

Unnamed: 0,time_in_hospital,num_lab_procedures,num_procedures,num_medications,number_outpatient,number_emergency,number_inpatient,number_diagnoses,metformin,metformin.1,repaglinide,repaglinide.1,nateglinide,nateglinide.1,chlorpropamide,chlorpropamide.1,glimepiride,glimepiride.1,acetohexamide,acetohexamide.1,glipizide,glipizide.1,glyburide,glyburide.1,tolbutamide,tolbutamide.1,pioglitazone,pioglitazone.1,rosiglitazone,rosiglitazone.1,acarbose,acarbose.1,miglitol,miglitol.1,troglitazone,troglitazone.1,tolazamide,tolazamide.1,insulin,insulin.1,glyburide-metformin,glyburide-metformin.1,glipizide-metformin,glipizide-metformin.1,glimepiride-pioglitazone,glimepiride-pioglitazone.1,metformin-rosiglitazone,metformin-rosiglitazone.1,metformin-pioglitazone,metformin-pioglitazone.1,change,change.1,diabetesMed,diabetesMed.1,race_Asian,race_Caucasian,race_Hispanic,race_Other,race_UNK,gender_Male,gender_Unknown/Invalid,max_glu_serum_>300,max_glu_serum_None,max_glu_serum_Norm,A1Cresult_>8,A1Cresult_None,A1Cresult_Norm,payer_code_CH,payer_code_CM,payer_code_CP,payer_code_DM,payer_code_FR,payer_code_HM,payer_code_MC,payer_code_MD,payer_code_MP,payer_code_OG,payer_code_OT,payer_code_PO,payer_code_SI,payer_code_SP,payer_code_UN,payer_code_UNK,payer_code_WC,admission_type_id_2,admission_type_id_3,admission_type_id_4,admission_type_id_5,admission_type_id_6,admission_type_id_7,admission_type_id_8,discharge_disposition_id_10,discharge_disposition_id_11,discharge_disposition_id_12,discharge_disposition_id_13,discharge_disposition_id_14,discharge_disposition_id_15,discharge_disposition_id_16,discharge_disposition_id_17,discharge_disposition_id_18,discharge_disposition_id_19,discharge_disposition_id_2,discharge_disposition_id_20,discharge_disposition_id_22,discharge_disposition_id_23,discharge_disposition_id_24,discharge_disposition_id_25,discharge_disposition_id_27,discharge_disposition_id_28,discharge_disposition_id_3,discharge_disposition_id_4,discharge_disposition_id_5,discharge_disposition_id_6,discharge_disposition_id_7,discharge_disposition_id_8,discharge_disposition_id_9,admission_source_id_10,admission_source_id_11,admission_source_id_13,admission_source_id_14,admission_source_id_17,admission_source_id_2,admission_source_id_20,admission_source_id_22,admission_source_id_25,admission_source_id_3,admission_source_id_4,admission_source_id_5,admission_source_id_6,admission_source_id_7,admission_source_id_8,admission_source_id_9,med_spec_Emergency/Trauma,med_spec_Family/GeneralPractice,med_spec_InternalMedicine,med_spec_Nephrology,med_spec_Orthopedics,med_spec_Orthopedics-Reconstructive,med_spec_Other,med_spec_Radiologist,med_spec_Surgery-General,med_spec_UNK,age_group,has_weight,death,encounter_nr
0,1,41,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,False,False,False,False,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1
2,2,11,5,13,2,0,1,6,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,False,False,True,True,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,2,0,0,1
3,2,44,1,16,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,True,True,True,True,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,3,0,0,1
4,1,51,0,8,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,True,True,True,True,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,4,0,0,1
8,13,68,2,28,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,True,True,True,True,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,8,0,0,1


In [24]:
#Dividing the data into test and train again
train = df[df.readmitted != '']
test = df[df.readmitted == '']

In [25]:
print(train.shape)
print(test.shape)

(33051, 167)
(33170, 167)


In [26]:
df_data = train[col2use + ['readmitted']]
df_data.head()

Unnamed: 0,time_in_hospital,num_lab_procedures,num_procedures,num_medications,number_outpatient,number_emergency,number_inpatient,number_diagnoses,metformin,metformin.1,repaglinide,repaglinide.1,nateglinide,nateglinide.1,chlorpropamide,chlorpropamide.1,glimepiride,glimepiride.1,acetohexamide,acetohexamide.1,glipizide,glipizide.1,glyburide,glyburide.1,tolbutamide,tolbutamide.1,pioglitazone,pioglitazone.1,rosiglitazone,rosiglitazone.1,acarbose,acarbose.1,miglitol,miglitol.1,troglitazone,troglitazone.1,tolazamide,tolazamide.1,insulin,insulin.1,glyburide-metformin,glyburide-metformin.1,glipizide-metformin,glipizide-metformin.1,glimepiride-pioglitazone,glimepiride-pioglitazone.1,metformin-rosiglitazone,metformin-rosiglitazone.1,metformin-pioglitazone,metformin-pioglitazone.1,change,change.1,diabetesMed,diabetesMed.1,race_Asian,race_Caucasian,race_Hispanic,race_Other,race_UNK,gender_Male,gender_Unknown/Invalid,max_glu_serum_>300,max_glu_serum_None,max_glu_serum_Norm,A1Cresult_>8,A1Cresult_None,A1Cresult_Norm,payer_code_CH,payer_code_CM,payer_code_CP,payer_code_DM,payer_code_FR,payer_code_HM,payer_code_MC,payer_code_MD,payer_code_MP,payer_code_OG,payer_code_OT,payer_code_PO,payer_code_SI,payer_code_SP,payer_code_UN,payer_code_UNK,payer_code_WC,admission_type_id_2,admission_type_id_3,admission_type_id_4,admission_type_id_5,admission_type_id_6,admission_type_id_7,admission_type_id_8,discharge_disposition_id_10,discharge_disposition_id_11,discharge_disposition_id_12,discharge_disposition_id_13,discharge_disposition_id_14,discharge_disposition_id_15,discharge_disposition_id_16,discharge_disposition_id_17,discharge_disposition_id_18,discharge_disposition_id_19,discharge_disposition_id_2,discharge_disposition_id_20,discharge_disposition_id_22,discharge_disposition_id_23,discharge_disposition_id_24,discharge_disposition_id_25,discharge_disposition_id_27,discharge_disposition_id_28,discharge_disposition_id_3,discharge_disposition_id_4,discharge_disposition_id_5,discharge_disposition_id_6,discharge_disposition_id_7,discharge_disposition_id_8,discharge_disposition_id_9,admission_source_id_10,admission_source_id_11,admission_source_id_13,admission_source_id_14,admission_source_id_17,admission_source_id_2,admission_source_id_20,admission_source_id_22,admission_source_id_25,admission_source_id_3,admission_source_id_4,admission_source_id_5,admission_source_id_6,admission_source_id_7,admission_source_id_8,admission_source_id_9,med_spec_Emergency/Trauma,med_spec_Family/GeneralPractice,med_spec_InternalMedicine,med_spec_Nephrology,med_spec_Orthopedics,med_spec_Orthopedics-Reconstructive,med_spec_Other,med_spec_Radiologist,med_spec_Surgery-General,med_spec_UNK,age_group,has_weight,death,encounter_nr,readmitted
0,1,41,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,False,False,False,False,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0
2,2,11,5,13,2,0,1,6,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,False,False,True,True,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,2,0,0,1,0
3,2,44,1,16,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,True,True,True,True,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,3,0,0,1,0
4,1,51,0,8,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,True,True,True,True,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,4,0,0,1,0
8,13,68,2,28,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,True,True,True,True,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,8,0,0,1,0


In [27]:
#Mix data into random order
df_data = df_data.sample(n= len(df_data), random_state=42)
df_data = df_data.reset_index(drop=True)

In [28]:
df_valid_test = df_data.sample(frac=0.30,random_state=42)
len(df_valid_test)/len(df_data)

0.29999092311881637

In [29]:
#Divade data into train,test and validation sets
df_test = df_valid_test.sample(frac=0.50,random_state=42)
df_valid = df_valid_test.drop(df_test.index)

In [30]:
df_train_all = df_data.drop(df_valid_test.index)

In [31]:
#Determine the prevalence of readmitted cases in each dataset
def calc_prevalence(y_actual):
    return (sum(y_actual)/len(y_actual))

In [32]:
print(calc_prevalence(df_test['readmitted'].values))
print(calc_prevalence(df_valid['readmitted'].values))
print(calc_prevalence(df_train_all['readmitted'].values))

17.365873336022588
17.207988702844464
17.31932918395574


In [33]:
df_train = df_train_all

In [34]:
df_train['readmitted_cat'] = (df_train['readmitted'].astype('int') > 0).astype(int)
# df_train = df_train.loc[:,~df_train.columns.duplicated()]
df_train.corr().nlargest(20, 'readmitted_cat')

Unnamed: 0,time_in_hospital,num_lab_procedures,num_procedures,num_medications,number_outpatient,number_emergency,number_inpatient,number_diagnoses,metformin,metformin.1,repaglinide,repaglinide.1,nateglinide,nateglinide.1,chlorpropamide,chlorpropamide.1,glimepiride,glimepiride.1,acetohexamide,acetohexamide.1,glipizide,glipizide.1,glyburide,glyburide.1,tolbutamide,tolbutamide.1,pioglitazone,pioglitazone.1,rosiglitazone,rosiglitazone.1,acarbose,acarbose.1,miglitol,miglitol.1,troglitazone,troglitazone.1,tolazamide,tolazamide.1,insulin,insulin.1,glyburide-metformin,glyburide-metformin.1,glipizide-metformin,glipizide-metformin.1,glimepiride-pioglitazone,glimepiride-pioglitazone.1,metformin-rosiglitazone,metformin-rosiglitazone.1,metformin-pioglitazone,metformin-pioglitazone.1,change,change.1,diabetesMed,diabetesMed.1,race_Asian,race_Caucasian,race_Hispanic,race_Other,race_UNK,gender_Male,gender_Unknown/Invalid,max_glu_serum_>300,max_glu_serum_None,max_glu_serum_Norm,A1Cresult_>8,A1Cresult_None,A1Cresult_Norm,payer_code_CH,payer_code_CM,payer_code_CP,payer_code_DM,payer_code_FR,payer_code_HM,payer_code_MC,payer_code_MD,payer_code_MP,payer_code_OG,payer_code_OT,payer_code_PO,payer_code_SI,payer_code_SP,payer_code_UN,payer_code_UNK,payer_code_WC,admission_type_id_2,admission_type_id_3,admission_type_id_4,admission_type_id_5,admission_type_id_6,admission_type_id_7,admission_type_id_8,discharge_disposition_id_10,discharge_disposition_id_11,discharge_disposition_id_12,discharge_disposition_id_13,discharge_disposition_id_14,discharge_disposition_id_15,discharge_disposition_id_16,discharge_disposition_id_17,discharge_disposition_id_18,discharge_disposition_id_19,discharge_disposition_id_2,discharge_disposition_id_20,discharge_disposition_id_22,discharge_disposition_id_23,discharge_disposition_id_24,discharge_disposition_id_25,discharge_disposition_id_27,discharge_disposition_id_28,discharge_disposition_id_3,discharge_disposition_id_4,discharge_disposition_id_5,discharge_disposition_id_6,discharge_disposition_id_7,discharge_disposition_id_8,discharge_disposition_id_9,admission_source_id_10,admission_source_id_11,admission_source_id_13,admission_source_id_14,admission_source_id_17,admission_source_id_2,admission_source_id_20,admission_source_id_22,admission_source_id_25,admission_source_id_3,admission_source_id_4,admission_source_id_5,admission_source_id_6,admission_source_id_7,admission_source_id_8,admission_source_id_9,med_spec_Emergency/Trauma,med_spec_Family/GeneralPractice,med_spec_InternalMedicine,med_spec_Nephrology,med_spec_Orthopedics,med_spec_Orthopedics-Reconstructive,med_spec_Other,med_spec_Radiologist,med_spec_Surgery-General,med_spec_UNK,age_group,has_weight,death,encounter_nr,readmitted_cat
readmitted_cat,0.063571,0.036191,-0.0268,0.056595,0.059766,0.121128,0.262976,0.09363,-0.033022,-0.033022,0.014834,0.014834,-0.002903,-0.002903,-0.005933,-0.005933,0.00254,0.00254,,,0.014956,0.014956,-0.002806,-0.002806,0.001042,0.001042,0.001056,0.001056,0.004153,0.004153,0.005672,0.005672,-0.005212,-0.005212,-0.003009,-0.003009,-0.002369,-0.002369,0.065828,0.065828,0.004321,0.004321,0.00803,0.00803,,,,,,,0.034316,0.034316,0.049229,0.049229,-0.005777,0.023146,-0.006544,-0.010671,-0.028852,-0.006364,-0.005212,0.015137,-0.019014,0.011574,-0.014974,0.025848,-0.016931,-0.000681,-0.006724,-0.014496,0.001935,,-0.014809,0.026733,0.001857,-0.004742,0.013627,-0.007695,-0.012407,0.000995,-0.003024,-0.018719,0.008112,-0.009374,-0.008073,-0.024375,-0.003009,-0.00851,0.021821,-0.006729,-0.01219,,-0.071686,0.014365,-0.013595,-0.018348,0.040099,-0.003009,-0.003009,-0.010171,-0.004256,0.029489,-0.003009,0.087022,-0.012087,-0.00541,-0.005,-0.003009,0.028298,0.068689,-0.006413,0.03336,0.041266,0.007,-0.006617,0.013909,-0.004256,-0.003009,-0.003009,,0.000926,-0.007995,0.008635,-0.005212,,0.002948,-0.023478,-0.003455,-0.031522,0.044259,0.008812,-0.012601,0.005248,0.011661,-0.006116,0.033429,-0.005075,-0.027864,-0.025583,-0.02243,-0.009183,0.029637,0.037644,0.022196,-0.073105,0.431111,1.0
encounter_nr,0.041575,0.002282,-0.052553,0.023433,0.065409,0.200856,0.581176,0.050936,-0.046056,-0.046056,-0.000521,-0.000521,-0.003765,-0.003765,-0.003564,-0.003564,-0.011885,-0.011885,,,-0.011707,-0.011707,-0.01397,-0.01397,-0.004369,-0.004369,-0.010999,-0.010999,-0.012207,-0.012207,-0.002685,-0.002685,-0.003384,-0.003384,-0.001954,-0.001954,0.000823,0.000823,0.086843,0.086843,0.002613,0.002613,0.005702,0.005702,,,,,,,0.038741,0.038741,0.031839,0.031839,-0.010917,-0.003333,-0.006414,-0.011485,-0.025299,-0.021848,-0.003384,0.027046,-0.016315,0.00316,-0.021167,0.041117,-0.030368,-0.002746,-0.008161,-0.015118,0.002098,,0.009544,0.028518,0.016579,-0.002871,0.071205,-0.00632,-0.01174,0.008647,0.018492,-0.018953,-0.051161,-0.008736,-0.001665,-0.020059,-0.001954,0.005623,-0.035015,-0.004369,-0.011773,,-0.014886,0.004031,0.006132,0.00207,0.008647,-0.001954,-0.001954,0.01333,-0.002763,0.008867,-0.001954,0.024643,-0.002135,-0.003313,0.011937,-0.001954,0.013226,0.01313,0.009417,0.001417,0.026588,0.011315,-0.002512,0.026982,-0.002763,-0.001954,-0.001954,,-0.008458,-0.015707,-0.011567,0.003527,,-5.4e-05,-0.018455,-0.003855,-0.016011,0.060139,0.003662,-0.009923,0.059015,0.009187,-0.029215,0.057777,-0.01696,-0.020891,-0.00996,-0.016456,0.001223,0.006751,-0.053213,-0.038386,-0.009548,1.0,0.431111
number_inpatient,0.078818,0.033911,-0.063278,0.05473,0.112065,0.244412,1.0,0.114202,-0.06925,-0.06925,0.00579,0.00579,-0.002621,-0.002621,-0.003401,-0.003401,-0.010791,-0.010791,,,-0.021234,-0.021234,-0.023634,-0.023634,-0.006646,-0.006646,-0.022537,-0.022537,-0.018354,-0.018354,-0.00717,-0.00717,-0.005148,-0.005148,-0.002972,-0.002972,0.001782,0.001782,0.087201,0.087201,-0.009373,-0.009373,0.003947,0.003947,,,,,,,0.021565,0.021565,0.02945,0.02945,-0.012256,0.010466,-0.003046,-0.012042,-0.039314,-0.003827,-0.005148,0.029937,-0.01173,-0.004863,-0.045686,0.068158,-0.036088,-0.012077,-0.006217,-0.017891,0.017995,,-0.001692,0.050902,0.023058,-0.006383,0.024308,-0.006127,-0.02251,0.009726,-0.023718,-0.022542,-0.028411,-0.013039,-0.004267,-0.041462,-0.002972,-0.014365,-0.004275,-0.006646,-0.002846,,0.036506,0.002791,0.018509,0.026781,-0.003244,0.002791,0.002791,-0.005806,-0.004203,0.00348,-0.002972,0.002403,0.007661,-0.001977,-0.013861,0.002791,0.01772,0.043527,0.009841,-0.002246,0.055161,0.015048,-0.00342,0.039778,-0.004203,-0.002972,-0.002972,,-0.010063,-0.017954,-0.006872,0.001507,,-0.00422,-0.021315,0.015284,-0.014195,0.071986,-0.004068,-0.016195,0.01999,0.009787,0.011553,0.06718,-0.031771,-0.02534,-0.01779,-0.018749,0.00757,0.001878,-0.007029,1.7e-05,0.048187,0.581176,0.262976
number_emergency,-0.008332,-0.00644,-0.042035,0.0088,0.094299,1.0,0.244412,0.053703,-0.012539,-0.012539,0.010325,0.010325,0.004751,0.004751,-0.00023,-0.00023,0.002166,0.002166,,,-0.010547,-0.010547,-0.026378,-0.026378,-0.002747,-0.002747,0.006607,0.006607,-0.01047,-0.01047,-0.007165,-0.007165,-0.002128,-0.002128,-0.001229,-0.001229,-0.000565,-0.000565,0.057879,0.057879,-0.002415,-0.002415,-0.001738,-0.001738,,,,,,,0.033177,0.033177,0.022142,0.022142,-0.006035,-0.026524,0.00234,-0.001754,-0.023285,-0.017496,-0.002128,0.023296,-0.029418,0.011451,-0.009591,0.008479,0.001418,-0.004631,0.00257,-0.013107,0.039999,,0.025253,0.028452,0.068265,-0.004076,0.037284,0.007436,-0.008553,-0.000799,-0.007802,-0.005602,-0.077205,-0.007377,0.001752,-0.031378,-0.001229,0.019029,-0.011952,-0.002747,-0.002148,,0.00613,-0.001229,-0.005926,0.001614,-0.000799,0.015235,-0.001229,-0.037048,-0.001738,0.001521,-0.001229,-0.009443,0.003102,0.000497,0.00391,0.007003,0.000861,0.005005,-0.00449,-0.004676,0.011919,0.029382,-0.004598,0.010434,-0.001738,-0.001229,-0.001229,,0.029541,-0.014298,-0.003096,-0.002128,,-0.006474,-0.033056,0.00297,-0.027315,0.057583,-0.002747,-0.007479,0.046703,-0.003581,-0.003052,0.029807,-0.016016,-0.012109,-0.026919,-0.007909,0.036966,-0.003142,-0.052317,0.007588,0.003418,0.200856,0.121128
number_diagnoses,0.224223,0.16431,0.069774,0.251786,0.092492,0.053703,0.114202,1.0,-0.069024,-0.069024,0.028887,0.028887,0.024107,0.024107,-0.009173,-0.009173,0.006116,0.006116,,,0.00361,0.00361,-0.030255,-0.030255,0.000759,0.000759,0.004958,0.004958,-0.006311,-0.006311,0.002549,0.002549,-0.003627,-0.003627,0.005649,0.005649,-0.015703,-0.015703,0.115187,0.115187,-0.006432,-0.006432,-0.008437,-0.008437,,,,,,,0.058314,0.058314,0.023299,0.023299,-0.006659,0.113093,-0.030496,-0.018936,-0.064686,-0.001669,-0.007459,0.001689,0.026715,-0.030952,-0.064788,0.02025,0.026184,0.012333,0.027552,0.011318,0.004943,,-0.017505,0.166488,0.012886,0.010733,0.002474,-0.005613,-0.016282,-0.009762,-0.021448,-0.085485,-0.106932,-0.0087,-0.028576,-0.071287,0.005649,0.012531,-0.070916,0.00818,-0.042012,,0.076071,-0.000988,0.037241,0.035717,0.014304,-0.014262,0.005649,-0.068783,0.003296,0.03927,0.005649,0.034862,0.022853,0.012866,-0.027796,0.002331,0.015101,0.14317,0.013294,0.000903,0.099079,-0.009756,-0.001923,0.011129,-0.001397,-0.000988,0.005649,,0.007505,-0.006106,0.01885,0.000205,,-0.025159,-0.006488,0.045016,-0.172199,0.149019,-0.005178,-0.056244,0.05778,-0.032934,-0.061647,0.006595,-0.05497,-0.054285,-0.145942,0.012118,-0.041633,0.177709,0.248482,0.05254,0.09283,0.050936,0.09363
discharge_disposition_id_22,0.061303,-0.008905,0.010348,0.064327,-0.01122,-0.009443,0.002403,0.034862,-0.001893,-0.001893,0.02062,0.02062,0.001162,0.001162,0.004656,0.004656,0.005442,0.005442,,,-0.003007,-0.003007,0.012961,0.012961,-0.002269,-0.002269,0.018183,0.018183,0.00712,0.00712,0.003302,0.003302,-0.001757,-0.001757,-0.001014,-0.001014,-0.00287,-0.00287,0.025323,0.025323,0.040505,0.040505,-0.001435,-0.001435,,,,,,,0.03149,0.03149,0.013631,0.013631,-0.006139,0.022479,-0.015875,-0.007595,-0.017005,-0.018998,0.02343,-0.010795,0.029842,-0.021956,-0.017081,0.01484,-0.000992,-0.006091,0.00797,0.001545,-0.011141,,-0.017838,0.03704,-0.005556,-0.003365,-0.008382,-0.004059,0.021438,-0.004059,0.023937,0.001289,-0.033741,0.008461,-0.003503,0.035072,-0.001014,-0.030028,-0.029546,-0.002269,-0.008377,,-0.024167,-0.001014,-0.010713,-0.011418,-0.004059,-0.001014,-0.001014,-0.032156,-0.001435,-0.02358,-0.001014,1.0,-0.009642,-0.003515,-0.015359,-0.001014,-0.006091,-0.061267,-0.014078,-0.017685,-0.056108,-0.012039,-0.003797,-0.002485,-0.001435,-0.001014,-0.001014,,-0.035289,0.003838,-0.006006,-0.001757,,-0.006421,-0.01265,-0.006198,-0.015619,0.015806,-0.002269,0.001002,0.031242,-0.002359,-0.021501,-0.014217,0.093056,0.057601,-0.018878,0.013039,-0.000564,-0.011351,0.0631,-0.025198,-0.029087,0.024643,0.087022
discharge_disposition_id_3,0.186086,0.060644,-0.060976,0.090728,0.033294,0.005005,0.043527,0.14317,-0.030341,-0.030341,0.018512,0.018512,0.000903,0.000903,0.004558,0.004558,0.001931,0.001931,,,0.003647,0.003647,0.001425,0.001425,-0.005838,-0.005838,-0.002463,-0.002463,-0.00894,-0.00894,-0.002811,-0.002811,0.006545,0.006545,0.016557,0.016557,0.006171,0.006171,0.046988,0.046988,-0.01564,-0.01564,-0.003692,-0.003692,,,,,,,0.027274,0.027274,0.019569,0.019569,-0.006327,0.062443,-0.02245,-0.023938,0.006347,-0.076879,-0.004522,0.042571,-0.087628,0.05331,-0.058399,0.037416,0.000218,-0.012478,0.013125,-0.040811,-0.014637,,-0.037188,0.194895,-0.002451,-0.00288,-0.035351,-0.010446,-0.021254,-0.000859,-0.028153,-0.026361,-0.088987,-0.002887,-0.026026,-0.066889,-0.002611,0.101152,-0.017709,0.002735,-0.016903,,-0.062194,-0.002611,-0.02757,-0.029383,-0.010446,-0.002611,-0.002611,-0.082751,-0.003692,-0.060682,-0.002611,-0.061267,-0.024814,-0.009045,-0.039526,-0.002611,-0.015675,1.0,-0.036228,-0.045512,-0.14439,-0.030982,-0.009771,-0.006395,-0.003692,-0.002611,-0.002611,,0.063701,-0.031374,0.000756,-0.004522,,-0.007425,-0.042257,0.129064,0.014126,0.025887,-0.005838,-0.015892,0.032422,0.01057,0.012303,0.001196,0.042074,0.042493,-0.06949,-0.029715,-0.001856,0.025516,0.277509,0.02626,-0.074855,0.01313,0.068689
insulin,0.132894,0.12255,0.019308,0.247144,0.024247,0.057879,0.087201,0.115187,-0.030115,-0.030115,0.02565,0.02565,-0.007554,-0.007554,-0.006756,-0.006756,0.011947,0.011947,,,-0.021468,-0.021468,-0.070674,-0.070674,-0.006663,-0.006663,0.009963,0.009963,-0.002618,-0.002618,0.008829,0.008829,0.001658,0.001658,0.000957,0.000957,-0.011212,-0.011212,1.0,1.0,0.00513,0.00513,0.001354,0.001354,,,,,,,0.621145,0.621145,0.487167,0.487167,-0.009913,-0.035454,0.00598,0.030103,-0.001881,0.008982,-0.009706,0.06453,-0.011964,-0.04371,0.13365,-0.099978,-0.003475,-0.008479,0.027833,0.000187,0.020719,,-0.037726,0.084344,0.080473,-0.006718,0.068165,-0.006015,0.025673,0.008752,0.081435,0.004714,-0.166357,0.000275,0.0086,-0.00575,0.007518,0.038935,-0.054562,-0.009597,0.009497,,0.015748,-0.005604,0.001368,0.01253,0.010393,0.007518,0.000957,-0.115218,-0.007925,0.001451,0.007518,0.025323,0.028499,-0.002367,0.011428,-0.005604,-0.000819,0.046988,-0.011506,-0.011147,0.032253,0.008021,-0.013957,0.007702,0.001354,0.000957,-0.005604,,0.002116,0.007478,-0.005432,-0.00213,,-0.016783,-0.019094,-0.011605,-0.037051,0.054053,0.00214,-0.004968,0.102545,-0.007389,0.001273,0.018597,-0.025047,-0.022504,-0.016063,-0.002976,-0.002729,-0.010642,-0.094941,-0.084691,0.018676,0.086843,0.065828
insulin,0.132894,0.12255,0.019308,0.247144,0.024247,0.057879,0.087201,0.115187,-0.030115,-0.030115,0.02565,0.02565,-0.007554,-0.007554,-0.006756,-0.006756,0.011947,0.011947,,,-0.021468,-0.021468,-0.070674,-0.070674,-0.006663,-0.006663,0.009963,0.009963,-0.002618,-0.002618,0.008829,0.008829,0.001658,0.001658,0.000957,0.000957,-0.011212,-0.011212,1.0,1.0,0.00513,0.00513,0.001354,0.001354,,,,,,,0.621145,0.621145,0.487167,0.487167,-0.009913,-0.035454,0.00598,0.030103,-0.001881,0.008982,-0.009706,0.06453,-0.011964,-0.04371,0.13365,-0.099978,-0.003475,-0.008479,0.027833,0.000187,0.020719,,-0.037726,0.084344,0.080473,-0.006718,0.068165,-0.006015,0.025673,0.008752,0.081435,0.004714,-0.166357,0.000275,0.0086,-0.00575,0.007518,0.038935,-0.054562,-0.009597,0.009497,,0.015748,-0.005604,0.001368,0.01253,0.010393,0.007518,0.000957,-0.115218,-0.007925,0.001451,0.007518,0.025323,0.028499,-0.002367,0.011428,-0.005604,-0.000819,0.046988,-0.011506,-0.011147,0.032253,0.008021,-0.013957,0.007702,0.001354,0.000957,-0.005604,,0.002116,0.007478,-0.005432,-0.00213,,-0.016783,-0.019094,-0.011605,-0.037051,0.054053,0.00214,-0.004968,0.102545,-0.007389,0.001273,0.018597,-0.025047,-0.022504,-0.016063,-0.002976,-0.002729,-0.010642,-0.094941,-0.084691,0.018676,0.086843,0.065828
time_in_hospital,1.0,0.333255,0.177213,0.461383,-0.013638,-0.008332,0.078818,0.224223,0.008498,0.008498,0.034799,0.034799,0.00541,0.00541,0.013168,0.013168,0.03652,0.03652,,,0.038906,0.038906,0.053035,0.053035,0.013053,0.013053,0.005348,0.005348,0.02099,0.02099,0.005254,0.005254,-5.9e-05,-5.9e-05,-0.000768,-0.000768,0.004055,0.004055,0.132894,0.132894,0.004191,0.004191,0.00047,0.00047,,,,,,,0.118879,0.118879,0.075313,0.075313,-0.021436,0.008477,-0.01585,-0.008933,-0.008965,-0.028974,-0.003873,0.045616,-0.006181,-0.030742,0.037829,-0.066483,0.04046,-0.01159,0.000619,-0.030957,-0.009041,,-0.048546,0.030283,0.001741,0.020691,-0.015651,-0.011883,-0.015019,0.005185,-0.032733,-0.023884,0.062448,-0.010121,0.022442,-0.010537,0.001434,-0.02837,0.010905,0.008129,-0.022921,,0.016163,-0.000768,0.029802,0.035015,0.02776,-0.00297,0.021249,0.051528,-0.010427,-0.019115,0.008039,0.061303,0.057108,-0.002026,0.000852,0.014644,-0.006081,0.186086,0.031731,0.042136,0.13418,-0.052043,0.025966,0.003512,-0.001086,-0.00297,-0.00297,,-0.020898,0.011359,0.00588,0.021551,,0.004893,0.098189,0.049567,0.003933,-0.001687,-0.003687,-0.022426,-0.017469,0.011359,0.035193,0.029418,-0.021349,-0.015027,0.026929,-0.037197,0.006369,0.003087,0.129524,0.017045,0.038799,0.041575,0.063571


In [35]:
#Select features of interest based on correlation with readmitted_cat column
feats_special = ['number_inpatient','number_diagnoses', 'number_emergency', 'time_in_hospital', 
'discharge_disposition_id_3', 'discharge_disposition_id_22', 'insulin', 'num_medications', 'number_outpatient',
'admission_source_id_7', 'diabetesMed', 'age_group', 'med_spec_Nephrology', 'discharge_disposition_id_6',
'discharge_disposition_id_2', 'discharge_disposition_id_5', 'med_spec_UNK', 'discharge_disposition_id_2',
'discharge_disposition_id_28', 'payer_code_MC', 'A1Cresult_None', 'race_Caucasian', 'has_weight',
'admission_type_id_6', 'max_glu_serum_>300', 'glipizide', 'repaglinide', 'encounter_nr']

In [36]:
X_train = df_train[feats_special].values
X_train_all = df_train_all[feats_special].values
X_valid = df_valid[feats_special].values
X_test = df_test[feats_special].values

y_train = df_train['readmitted'].values.astype(np.int8)
y_valid = df_valid['readmitted'].values.astype(np.int8)
y_test = df_test['readmitted'].values.astype(np.int8)

In [37]:
print(X_train.shape)
print(X_valid.shape)
print(X_test.shape)
print(X_train_all.shape)

(23136, 32)
(4957, 32)
(4958, 32)
(23136, 32)


### Training the model

In [38]:
model = xgb.XGBClassifier()
cv = StratifiedKFold(n_splits=10)
scores = []
for train_idx, test_idx in cv.split(X_train, y_train):
    X_fold_train, X_fold_test = X_train[train_idx], X_train[test_idx]
    y_fold_train, y_fold_test = y_train[train_idx], y_train[test_idx]
    
    y_log = np.log2(y_fold_train + 3)
    model.fit(X_fold_train, y_log)
    y_pred_log = model.predict(X_fold_test) 
    y_pred = np.exp2(y_pred_log) - 3
    y_pred[ y_pred < 0 ] = 0 #reset negative result
    
    score = rmsle(y_fold_test, y_pred)
    scores.append( score )
print('score: ', np.mean(scores), 'std: ', np.std(scores))

score:  1.454 std:  0.03146


In [39]:
y_pred2 = model.predict(X_test) 
score2 = rmsle(y_pred2.astype(np.int8), y_test.astype(np.int8))
score2

1.523

### Making the final prediction for the test data

In [40]:
data = test[feats_special]
readmitted = model.predict(data.values) 
test = test.assign(readmitted = readmitted)
test.head()

Unnamed: 0,encounter_id,patient_nbr,race,gender,age,weight,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,payer_code,medical_specialty,num_lab_procedures,num_procedures,num_medications,number_outpatient,number_emergency,number_inpatient,diag_1,diag_2,diag_3,number_diagnoses,max_glu_serum,A1Cresult,metformin,repaglinide,nateglinide,chlorpropamide,glimepiride,acetohexamide,glipizide,glyburide,tolbutamide,pioglitazone,rosiglitazone,acarbose,miglitol,troglitazone,tolazamide,examide,citoglipton,insulin,glyburide-metformin,glipizide-metformin,glimepiride-pioglitazone,metformin-rosiglitazone,metformin-pioglitazone,change,diabetesMed,readmitted,id,med_spec,death,metformin.1,repaglinide.1,nateglinide.1,chlorpropamide.1,glimepiride.1,acetohexamide.1,glipizide.1,glyburide.1,tolbutamide.1,pioglitazone.1,rosiglitazone.1,acarbose.1,miglitol.1,troglitazone.1,tolazamide.1,insulin.1,glyburide-metformin.1,glipizide-metformin.1,glimepiride-pioglitazone.1,metformin-rosiglitazone.1,metformin-pioglitazone.1,change.1,diabetesMed.1,race_Asian,race_Caucasian,race_Hispanic,race_Other,race_UNK,gender_Male,gender_Unknown/Invalid,max_glu_serum_>300,max_glu_serum_None,max_glu_serum_Norm,A1Cresult_>8,A1Cresult_None,A1Cresult_Norm,payer_code_CH,payer_code_CM,payer_code_CP,payer_code_DM,payer_code_FR,payer_code_HM,payer_code_MC,payer_code_MD,payer_code_MP,payer_code_OG,payer_code_OT,payer_code_PO,payer_code_SI,payer_code_SP,payer_code_UN,payer_code_UNK,payer_code_WC,admission_type_id_2,admission_type_id_3,admission_type_id_4,admission_type_id_5,admission_type_id_6,admission_type_id_7,admission_type_id_8,discharge_disposition_id_10,discharge_disposition_id_11,discharge_disposition_id_12,discharge_disposition_id_13,discharge_disposition_id_14,discharge_disposition_id_15,discharge_disposition_id_16,discharge_disposition_id_17,discharge_disposition_id_18,discharge_disposition_id_19,discharge_disposition_id_2,discharge_disposition_id_20,discharge_disposition_id_22,discharge_disposition_id_23,discharge_disposition_id_24,discharge_disposition_id_25,discharge_disposition_id_27,discharge_disposition_id_28,discharge_disposition_id_3,discharge_disposition_id_4,discharge_disposition_id_5,discharge_disposition_id_6,discharge_disposition_id_7,discharge_disposition_id_8,discharge_disposition_id_9,admission_source_id_10,admission_source_id_11,admission_source_id_13,admission_source_id_14,admission_source_id_17,admission_source_id_2,admission_source_id_20,admission_source_id_22,admission_source_id_25,admission_source_id_3,admission_source_id_4,admission_source_id_5,admission_source_id_6,admission_source_id_7,admission_source_id_8,admission_source_id_9,med_spec_Emergency/Trauma,med_spec_Family/GeneralPractice,med_spec_InternalMedicine,med_spec_Nephrology,med_spec_Orthopedics,med_spec_Orthopedics-Reconstructive,med_spec_Other,med_spec_Radiologist,med_spec_Surgery-General,med_spec_UNK,age_group,has_weight,encounter_nr
6,55842,84259809,Caucasian,Male,[60-70),,3,1,2,4,UNK,UNK,70,1,21,0,0,0,414,411,V45,7,,,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,True,True,1.584961,6,UNK,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,True,True,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,6,0,1
9,15738,63555939,Caucasian,Female,[90-100),,3,3,4,12,UNK,InternalMedicine,33,3,18,0,0,0,434,198,486,8,,,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,True,True,1.584961,9,InternalMedicine,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,True,True,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,9,0,1
13,42570,77586282,Caucasian,Male,[80-90),,1,6,7,10,UNK,Family/GeneralPractice,55,1,31,0,0,0,428,411,427,8,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,False,True,1.584961,13,Family/GeneralPractice,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,False,True,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,8,0,1
15,73578,86328819,AfricanAmerican,Male,[60-70),,1,3,7,12,UNK,UNK,75,5,13,0,0,0,999,507,996,9,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,True,True,1.584961,15,UNK,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,True,True,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,6,0,1
16,77076,92519352,AfricanAmerican,Male,[50-60),,1,1,7,4,UNK,UNK,45,4,17,0,0,0,410,411,414,8,,,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,True,True,1.584961,16,UNK,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,True,True,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,5,0,1


In [41]:
test['readmitted'].head()

6     1.584961
9     1.584961
13    1.584961
15    1.584961
16    1.584961
Name: readmitted, dtype: float16

In [42]:
#Saving results to the .csv file
test[ ['id', 'readmitted'] ].to_csv('../output/lets_improve_it_final.csv', index=False)