In [28]:
import numpy as np
import pandas as pd
from keras.layers import Dense, Input, Dropout, Activation
from keras.models import Model, load_model
from keras.optimizers import Adam
from keras.regularizers import l2
from keras.callbacks import EarlyStopping, LambdaCallback
from sklearn.metrics import roc_auc_score, confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline
from IPython import display
import time
import tqdm
import random
from bayes_opt import BayesianOptimization

Load and clean.

In [29]:
data = pd.read_csv('final_data.csv')
# Convert labels
data.loc[data.CONT == "NO","CONT"] = 0
data.loc[data.CONT != 0,"CONT"] = 1
# Subset columns
cols = ["BUN","CA","CL","CO2","CRET","GLU","K","NA.","CONT"]
data = data[cols]
# Banish negativity :)
data.loc[data.CA < 0,"CA"] = 0
# Convert to int.  Could save space with binary, but meh...
data.CONT = data.CONT.astype("int64")
# Drop NAs
data = data.dropna()
# To numpy, Batman!
data = data.as_matrix()
# Normalize features
num_features = len(cols)-1
means = np.zeros(num_features)
stds = np.zeros(num_features)
for x in range(data.shape[1]-1):
    means[x] = data[:,x].mean()
    stds[x] = data[:,x].std()
    data[:,x] = (data[:,x]-means[x])/stds[x]
# Split
test_samples, val_samples = [int(.2*len(data))]*2
np.random.shuffle(data)
test = data[:test_samples,:]
val = data[test_samples:test_samples+val_samples,:]
train = data[test_samples+val_samples:,:]
X_train, X_val, X_test = train[:,:-1], val[:,:-1], test[:,:-1]
y_train, y_val, y_test = train[:,-1], val[:,-1], test[:,-1]

In [10]:
def buildNet(lr=1e-3, dropoutA=0.5, dropoutB=0.5, weightReg=.01):
    inputLayer = Input(shape = (8,))
    D = Dense(128, W_regularizer=l2(weightReg), b_regularizer=l2(weightReg))(inputLayer)
    D = Activation('relu')(D)
    D = Dropout(dropoutA)(D)
    D = Dense(64, W_regularizer=l2(weightReg), b_regularizer=l2(weightReg))(D)
    D = Activation('relu')(D)
    D = Dropout(dropoutB)(D)
    pred = Dense(1,activation = 'sigmoid')(D)
    model = Model(input = inputLayer, output=pred)
    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=lr))
    return model

def trainAndPredict(model, nb_epoch=100, batch_size=32):
    #plotterT = LambdaCallback(on_epoch_end=lambda epoch, logs: plt.plot(epoch, logs['loss']))
    #plotterV = LambdaCallback(on_epoch_end=lambda epoch, logs: plt.plot(np.arange(epoch), logs['val_loss']))
    stopper = EarlyStopping(patience = 3, verbose = 1)
    model.fit(train[:,:8],train[:,-1],validation_data=(val[:,:8],val[:,-1]),
              nb_epoch=int(round(nb_epoch)), batch_size=int(round(batch_size)),callbacks=[stopper])
    return model.predict(val[:,:8])

def evalAUC(y_pred,labels=val[:,-1]):
    return roc_auc_score(labels,y_pred)

def measurable(lr=1e-3, dropoutA=.5, dropoutB=.5, weightReg=.01, nb_epoch=100, batch_size=32):
    model = buildNet(lr, dropoutA, dropoutB, weightReg)
    y_pred = trainAndPredict(model, nb_epoch, batch_size)
    return evalAUC(y_pred)

In [None]:
params = {'lr':(1e-5,.01), 'weightReg':(0,.5)}
BO = BayesianOptimization(measurable, params)
BO.maximize(init_points=5,n_iter=30)

[31mInitialization[0m
[94m-------------------------------------------------------[0m
 Step |   Time |      Value |        lr |   weightReg | 
Train on 8367 samples, validate on 2789 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 00014: early stopping
    1 | 00m12s | [35m   0.63865[0m | [32m   0.0063[0m | [32m     0.3694[0m | 
Train on 8367 samples, validate on 2789 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 00010: early stopping
    2 | 00m09s | [35m   0.92908[0m | [32m   0.0062[0m | [32m     0.2361[0m | 
Train on 8367 samples, validate on 2789 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 00006: early stopping
    3 | 00m07s | [35m   0.99219[0m | [32m   0.0077

In [25]:
BO.res['max']

{'max_params': {'dropoutA': 0.083730742972679156,
  'dropoutB': 0.37669203808894514,
  'dropoutC': 0.26408076037832368,
  'lr': 0.019685168087984706},
 'max_val': 0.99911993724769932}

In [153]:
model = buildNet(lr = 0.019685168087984706, dropoutA=0.083730742972679156, 
                 dropoutB=0.37669203808894514,dropoutC=0.26408076037832368)
y_pred = trainAndPredict(model)

Train on 8367 samples, validate on 2789 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100

In [154]:
evalAUC(y_pred)

0.97354072203409281

In [88]:
def buildDeep(lr=.01, dropoutA=.5, dropoutB=.5, dropoutC=.5, weightReg=.01):
    inputLayer = Input(shape = (8,))
    D = Dense(128, W_regularizer=l2(weightReg), b_regularizer=l2(weightReg))(inputLayer)
    D = Activation('relu')(D)
    D = Dropout(dropoutA)(D)
    D = Dense(64, W_regularizer=l2(weightReg), b_regularizer=l2(weightReg))(D)
    D = Activation('relu')(D)
    D = Dropout(dropoutB)(D)
    D = Dense(32, W_regularizer=l2(weightReg), b_regularizer=l2(weightReg))(D)
    D = Activation('relu')(D)
    D = Dropout(dropoutC)(D)
    pred = Dense(1,activation = 'sigmoid')(D)
    model = Model(input = inputLayer, output=pred)
    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=lr))
    return model

def measurableDeep(lr=.01, dropoutA=.5, dropoutB=.5, dropoutC=.5, weightReg=.01, nb_epoch=100, batch_size=32):
    model = buildDeep(lr,dropoutA,dropoutB,dropoutC,dropoutD,dropoutE, weightReg=weightReg)
    y_pred = trainAndPredict(model, nb_epoch,batch_size)
    return evalAUC(y_pred)

In [None]:
#params1 = {'lr':(1e-4,.1), 'weightReg':(0,.5)}
#BO1 = BayesianOptimization(measurableDeep,params1)
#BO1.maximize(init_points=5,n_iter=50)

In [1]:
#BO1.res['max']

In [11]:
best_model = load_model('best_small_model.hdf5')

In [30]:
train_pred = best_model.predict(X_train)
val_pred = best_model.predict(X_val)
test_pred = best_model.predict(X_test)

tn, fp, fn, tp = confusion_matrix(np.round(train_pred),y_train).ravel()
print("Train AUC: {0}, Train Sens: {1}, Train Spec: {2}".format(
    roc_auc_score(y_train,train_pred), tp/(tp+fn), tn/(tn+fp)))
print("Train Acc: {0}".format((tp+tn)/(tp+tn+fp+fn)))

tn, fp, fn, tp = confusion_matrix(np.round(val_pred),y_val).ravel()
print("Val AUC: {0}, Val Sens: {1}, Val Spec: {2}".format(
    roc_auc_score(y_val,val_pred), tp/(tp+fn), tn/(tn+fp)))
print("Val Acc: {0}".format((tp+tn)/(tp+tn+fp+fn)))

tn, fp, fn, tp = confusion_matrix(np.round(test_pred),y_test).ravel()
print("Test AUC: {0}, Test Sens: {1}, Test Spec: {2}".format(
    roc_auc_score(y_test,test_pred), tp/(tp+fn), tn/(tn+fp)))
print("Test Acc: {0}".format((tp+tn)/(tp+tn+fp+fn)))

Train AUC: 0.9445792129619152, Train Sens: nan, Train Spec: 0.9872116648739094
Train Acc: 0.9872116648739094
Val AUC: 0.9588676314412972, Val Sens: nan, Val Spec: 0.9835066332018645
Val Acc: 0.9835066332018645
Test AUC: 0.966272588490077, Test Sens: nan, Test Spec: 0.9852993904625313
Test Acc: 0.9852993904625313




### Output

Train AUC: 0.9454681759049717, Train Sens: 0.9324324324324325, Train Spec: 0.9937296515133245

Train Acc: 0.9931875224094657

Val AUC: 0.9754746342981637, Val Sens: 0.9615384615384616, Val Spec: 0.9963807455664133

Val Acc: 0.9960559340265328

Test AUC: 0.9449769461822495, Test Sens: 0.8387096774193549, Test Spec: 0.9956490210297317

Test Acc: 0.9939046253137325

In [35]:
nn_test = np.concatenate((test_pred,np.reshape(y_test,(y_test.shape[0],1))), axis=1)

In [36]:
np.save("nn_test.npy",nn_test)