In [29]:
import numpy as np
import pickle
import pandas as pd
from tensorflow.keras import models
from tensorflow.keras.layers import Dense, Input, Dropout, LSTM, Masking, Bidirectional, Concatenate, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler, ReduceLROnPlateau

In [2]:
X = np.asarray(pd.read_csv("data/diabetes/X_enc.csv", header=None))
X_val = np.asarray(pd.read_csv("data/diabetes/X_val_enc.csv", header=None))
X_test = np.asarray(pd.read_csv("data/diabetes/X_test_enc.csv", header=None))
Y = np.asarray(pd.read_csv("data/diabetes/Y.csv", header=None))
Y_val = np.asarray(pd.read_csv("data/diabetes/Y_val.csv", header=None))
Y_test = np.asarray(pd.read_csv("data/diabetes/Y_test.csv", header=None))

In [3]:
print(Y.shape, Y_val.shape)

(6000, 1) (2000, 1)


In [4]:
X_discharge_disposition = pickle.load(open("data/diabetes/X_discharge_disposition_enc.csv", "rb"))
X_val_discharge_disposition = pickle.load(open("data/diabetes/X_val_discharge_disposition_enc.csv", "rb"))
X_test_discharge_disposition = pickle.load(open("data/diabetes/X_test_discharge_disposition_enc.csv", "rb"))

In [5]:
print(X_discharge_disposition.shape)

(6000, 5, 44)


In [6]:
X_medical_speciality = pickle.load(open("data/diabetes/X_medical_speciality_enc.csv", "rb"))
X_val_medical_speciality = pickle.load(open("data/diabetes/X_val_medical_speciality_enc.csv", "rb"))
X_test_medical_speciality = pickle.load(open("data/diabetes/X_test_medical_speciality_enc.csv", "rb"))

In [7]:
print(X_medical_speciality.shape)

(6000, 3, 51)


In [8]:
X_diagnoses = pickle.load(open("data/diabetes/X_diagnoses_enc.csv", "rb")).reshape(6000, 10, -1)
X_val_diagnoses = pickle.load(open("data/diabetes/X_val_diagnoses_enc.csv", "rb")).reshape(2000, 10, -1)
X_test_diagnoses = pickle.load(open("data/diabetes/X_test_diagnoses_enc.csv", "rb")).reshape(2000, 10, -1)

In [9]:
print(X_diagnoses.shape)

(6000, 10, 3174)


In [32]:
def model():
    x = Input(shape=(65,))
    
    x_d = Input(shape=(10, 3174))
    x_dis = Input(shape=(5, 44))
    x_m = Input(shape=(3, 51))
    
    rnn_d = LSTM(30)(x_d)
    rnn_dis = LSTM(20)(x_dis)
    rnn_m = LSTM(20)(x_m)
    
    #conc = Concatenate()([x, rnn_d, rnn_dis, rnn_m])

    dense = Dense(400, activation='relu', name="dense")(x)
    dense = Dropout(.1)(dense)
    dense = BatchNormalization()(dense)
    dense = Dense(300, activation='relu', name="dense2")(dense)
    dense = Dropout(.1)(dense)
    dense = BatchNormalization()(dense)
    dense = Dense(200, activation='relu', name="dense3")(dense)
    dense = Dropout(.1)(dense)
    dense = BatchNormalization()(dense)
    dense = Dense(100, activation='relu', name="dense4")(dense)
    dense = Dropout(.1)(dense)
    dense = BatchNormalization()(dense)
    y = Dense(1, activation='sigmoid', name="y")(dense)

    model = models.Model([x], y, name="model")
    model.compile(optimizer='Adam', loss="binary_crossentropy", metrics=['acc'])

    return model

In [33]:
m = model()

In [34]:
m.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_25 (InputLayer)        [(None, 65)]              0         
_________________________________________________________________
dense (Dense)                (None, 400)               26400     
_________________________________________________________________
dropout_3 (Dropout)          (None, 400)               0         
_________________________________________________________________
batch_normalization_2 (Batch (None, 400)               1600      
_________________________________________________________________
dense2 (Dense)               (None, 300)               120300    
_________________________________________________________________
dropout_4 (Dropout)          (None, 300)               0         
_________________________________________________________________
batch_normalization_3 (Batch (None, 300)               1200  

In [35]:
Y.shape

(6000, 1)

In [36]:
cbs = []
cbs.append(EarlyStopping(monitor="val_acc", mode="max", patience=5, verbose=1))
cbs.append(ReduceLROnPlateau(monitor="val_acc", mode="max", patience=3, verbose=2))        
m.fit([X], Y, shuffle=True, epochs=100, \
      validation_data=([X_val], Y_val), \
      callbacks=cbs)

Train on 6000 samples, validate on 2000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 00016: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 17/100
Epoch 18/100
Epoch 00018: early stopping


<tensorflow.python.keras.callbacks.History at 0x7f002d33fe10>

In [41]:
Y.shape

(6000, 1)

In [43]:
import lightgbm as lgbm
params = {
    'objective' :'binary',
    'metric': 'binary_logloss'
}
d_train = lgbm.Dataset(X, Y)
d_valid = lgbm.Dataset(X_val, Y_val)
lmod = lgbm.LGBMClassifier()
lmod.fit(X, Y.reshape(-1))
#bst = lgbm.train(params, d_train, 5000, valid_sets=[d_valid], verbose_eval=50, early_stopping_rounds=100)

LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
               importance_type='split', learning_rate=0.1, max_depth=-1,
               min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
               n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
               random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
               subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [44]:
lmod.predict(X_val)

array([0., 0., 1., ..., 0., 0., 0.])

In [45]:
np.mean(np.abs(Y_test-lmod.predict(X_val).round().astype(int)))

0.45239