In [1]:
# Import libraries
from keras import optimizers, losses, activations, models
from keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler, ReduceLROnPlateau
from keras.layers import Layer, GRU, LSTM, Dense, Input, Dropout, Convolution1D, MaxPool1D, GlobalMaxPool1D, GlobalAveragePooling1D, \
    concatenate
from keras.layers import LeakyReLU
from keras import regularizers, backend, initializers
from keras.models import Sequential
from keras.utils import to_categorical
from keras.initializers import Ones, Zeros
import keras.backend as K
from keras.wrappers.scikit_learn import KerasClassifier

from sklearn import metrics
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score, confusion_matrix
from sklearn import preprocessing
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score

import time
import gc

import numpy
import pandas as pd
import pylab as plt
import tensorflow as tf
import numpy as np
from numpy import loadtxt
from numpy import savetxt
from tensorflow.python.framework import ops
print(tf.__version__)

# Visualization Libraries
import seaborn as sns

# Size of matplotlib histogram bins
bin_size = 20

2.1.0


In [None]:
# Load array
train = loadtxt('train.csv', delimiter=',')
test = loadtxt('test.csv', delimiter=',')

# Split array
train_x = train[:,:11]
test_x = test[:,:11]
train_y = train[:,11]
test_y = test[:,11]

x_re = np.vstack((train_x, test_x))
y_re = np.vstack((train_y, test_y))

In [7]:
# Define Layer Normalization class
class LayerNormalization(Layer):
    def __init__(self, eps=1e-6, **kwargs):
        self.eps = eps
        super(LayerNormalization, self).__init__(**kwargs)
    def build(self, input_shape):
        self.gamma = self.add_weight(name='gamma', shape=input_shape[-1:],
                                     initializer=Ones(), trainable=True)
        self.beta = self.add_weight(name='beta', shape=input_shape[-1:],
                                    initializer=Zeros(), trainable=True)
        super(LayerNormalization, self).build(input_shape)
    def call(self, x):
        mean = K.mean(x, axis=-1, keepdims=True)
        std = K.std(x, axis=-1, keepdims=True)
        return self.gamma * (x - mean) / (std + self.eps) + self.beta
    def compute_output_shape(self, input_shape):
        return input_shape

layer_size1 = 12
layer_size2 = 10
layer_size3 = 7
layer_size4 = 5
layer_size5 = 4
layer_size6 = 3
timesteps = 1 # static data
data_dim = 11

X_train = np.reshape(train_x, (train_x.shape[0], 1, train_x.shape[1]))
X_test = np.reshape(test_x, (test_x.shape[0], 1, test_x.shape[1]))


def create_model(learning_rate=0.001):
    model = Sequential()
    model.add(GRU(layer_size1, return_sequences=True, input_shape=(timesteps, data_dim)))
    model.add(LayerNormalization())
    model.add(LeakyReLU(alpha=0.01))
    model.add(GRU(layer_size2, return_sequences=True))
    model.add(LayerNormalization())
    model.add(LeakyReLU(alpha=0.01))
    model.add(GRU(layer_size3, return_sequences=True))
    model.add(LayerNormalization())
    model.add(LeakyReLU(alpha=0.01))
    model.add(GRU(layer_size4, return_sequences=True))
    model.add(LayerNormalization())
    model.add(LeakyReLU(alpha=0.01))
    model.add(GRU(layer_size5, return_sequences=True))
    model.add(LayerNormalization())
    model.add(LeakyReLU(alpha=0.01))
    model.add(GRU(layer_size6, return_sequences=False))
    model.add(LayerNormalization())
    model.add(LeakyReLU(alpha=0.01))
    model.add(Dense(2, activation='softmax'))

    opt = optimizers.Adam(learning_rate)
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
    return model

In [8]:
# Train the model with the optimal parameters 
model = create_model(learning_rate = 0.001)

# Set early stopping based on accuracy. It stops after 10 consecutive epochs of no accuracy improvement.
early = EarlyStopping(monitor='val_accuracy', patience=10, verbose=0)

# Reduce learning rate based on accuracy. It reduces the rate after 7 consecutive epochs of no accuracy improvement.
redonplat = ReduceLROnPlateau(monitor='val_accuracy', mode="max", patience=7, verbose=0)

callbacks_list = [early, redonplat]

train_y = to_categorical(train_y)

# Train the model
history = model.fit(X_train, train_y, epochs=1000, verbose=0, callbacks=callbacks_list, validation_split=0.1)

In [9]:
start_time = time.time()

# Test the model
pred_test = model.predict(X_test)
pred_test = np.argmax(pred_test, axis=-1)

# Get f1 score 
f1 = f1_score(test_y, pred_test, average="macro")
print("Test f1 score : %s "% f1)

# Get ROC AUC score 
roc = roc_auc_score(test_y, pred_test)
print("Test ROC AUC score : %s "% roc)

# Get the accuracy
acc = accuracy_score(test_y, pred_test)
print("Test accuracy score : %s "% acc)

print("--- %s seconds ---" % (time.time() - start_time))

Test f1 score : 0.824696564178524 
Test ROC AUC score : 0.8248306853888278 
Test accuracy score : 0.844229278163354 
--- 4.864065408706665 seconds ---


In [10]:
# Get the specificity
tn, fp, fn, tp = confusion_matrix(test_y, pred_test).ravel()
specificity = tn / (tn+fp)
print("Specificity : %s "% specificity)

# Get the sensitivity
sensitivity= tp / (tp+fn)
print("Sensitivity: %s "% sensitivity)

Specificity : 0.8828498136311456 
Sensitivity: 0.7668115571465098 


In [11]:
# 10-fold cross validation on the test data 
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

# Use the whole dataset
X_re = np.reshape(x_re, (x_re.shape[0], 1, x_re.shape[1]))

kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
cvscores = []

for train, test in kfold.split(X_re, y_re):
    model = create_model(learning_rate = 0.001)
    # Fit the model
    model.fit(X_re[train], to_categorical(y_re[train]), epochs=1000, verbose=0, callbacks=callbacks_list, validation_split=0.1)
    # evaluate the model
    scores = model.evaluate(X_re[test], to_categorical(y_re[test]), verbose=0)
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    cvscores.append(scores[1] * 100)
print("%.2f%% (+/- %.2f%%)" % (numpy.mean(cvscores), numpy.std(cvscores)))

accuracy: 83.83%
accuracy: 83.80%
accuracy: 84.29%
accuracy: 84.04%
accuracy: 83.96%
accuracy: 83.89%
accuracy: 83.22%
accuracy: 84.02%
accuracy: 83.90%
accuracy: 83.69%
83.86% (+/- 0.27%)
