# CNN Training

From our paper on "Explainable Prediction of Acute Myocardial Infarction using Machine Learning and Shapley Values"

In [2]:
# Import libraries
import pandas as pd
import numpy as np
import pylab as plt
from keras import optimizers, losses, activations, models, regularizers
from keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler, ReduceLROnPlateau
from keras.layers import Dense, Input, Dropout, Convolution1D, MaxPool1D, Flatten, GlobalMaxPool1D, GlobalAveragePooling1D, \
    concatenate
from keras.utils import to_categorical
from keras.models import load_model, Sequential
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score, confusion_matrix
from sklearn.model_selection import train_test_split

# Loading Data

In [3]:
# Load data
# Import train and test data into dataframes from csv files produced using the data processing code
df_cnn_train = pd.read_csv("train_noagesex.csv", header=None)
df_cnn_train = df_cnn_train.sample(frac=1)
df_cnn_test = pd.read_csv("test_noagesex.csv", header=None)

In [6]:
# Get data from dataframes
y_cnn = np.array(df_cnn_train[9].values).astype(np.int8)
y_cnn=to_categorical(y_cnn)
x_cnn = np.array(df_cnn_train[list(range(9))].values)[..., np.newaxis]
y_cnn_test = np.array(df_cnn_test[9].values).astype(np.int8)
x_cnn_test = np.array(df_cnn_test[list(range(9))].values)[..., np.newaxis]

# Model Definition + Training

In [7]:
# Model definition
def get_model(learning_rate=0.001):
    nclass = 2
    inp = Input(shape=(9, 1))
    
    cnn = Convolution1D(16, kernel_size=5, activation=activations.relu, padding="valid")(inp)
    cnn = Dropout(rate=0.1)(cnn) 
    cnn = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")(cnn)
    cnn = Dropout(rate=0.1)(cnn)
    cnn = Convolution1D(64, kernel_size=3, activation=activations.relu, padding="valid")(cnn)
    cnn = Dropout(rate=0.1)(cnn)
    cnn = Convolution1D(256, kernel_size=3, activation=activations.relu, padding="valid")(cnn)
    cnn = GlobalMaxPool1D()(cnn)
    cnn = Dropout(rate=0.1)(cnn)
    dense_1 = Dense(64, activation=activations.relu, name="dense_1", kernel_regularizer=regularizers.l2(l=0.1))(cnn)
    dense_1 = Dense(16, activation=activations.relu, name="dense_2", kernel_regularizer=regularizers.l2(l=0.1))(dense_1)
    dense_1 = Dense(nclass, activation=activations.softmax, name="dense_3_ecg_view")(dense_1) 
    

    model = models.Model(inputs=inp, outputs=dense_1)
    opt = optimizers.Adam(learning_rate)

    model.compile(optimizer=opt, loss=losses.binary_crossentropy, metrics=['acc'])
    return model

In [8]:
# Model Training
# Load model and model summary
model = get_model()
model.summary()

# File path to save the model
file_path = "cnn_ecgview_noagesex.h5"

# Checkpoint the model's weight based on the accuracy of the model
checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')

# Set early stopping based on accuracy. It stops after 10 consecutive epochs of no accuracy improvement.
early = EarlyStopping(monitor="val_acc", mode="max", patience=10, verbose=1)

# Reduce learning rate based on accuracy. It reduces the rate after 7 consecutive epochs of no accuracy improvement.
redonplat = ReduceLROnPlateau(monitor="val_acc", mode="max", patience=7, verbose=2)

callbacks_list = [checkpoint, early, redonplat]

# Train the model, load weights into above file path to save the model
model.fit(x_cnn, y_cnn, epochs=1000, verbose=2, callbacks=callbacks_list, validation_split=0.1)
model.load_weights(file_path)

# The file will be saved in the file_path and can be loaded later using Keras for evaluation

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 9, 1)              0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 9, 16)             96        
_________________________________________________________________
dropout_1 (Dropout)          (None, 9, 16)             0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 7, 32)             1568      
_________________________________________________________________
dropout_2 (Dropout)          (None, 7, 32)             0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 5, 64)             6208      
_________________________________________________________________
dropout_3 (Dropout)          (None, 5, 64)             0   


Epoch 00037: val_acc improved from 0.85403 to 0.85432, saving model to baseline_cnn_ecgview_noagesex.h5
Epoch 38/1000
 - 48s - loss: 0.3816 - acc: 0.8245 - val_loss: 0.3388 - val_acc: 0.8543

Epoch 00038: val_acc did not improve from 0.85432
Epoch 39/1000
 - 47s - loss: 0.3801 - acc: 0.8257 - val_loss: 0.3382 - val_acc: 0.8556

Epoch 00039: val_acc improved from 0.85432 to 0.85562, saving model to baseline_cnn_ecgview_noagesex.h5
Epoch 40/1000
 - 47s - loss: 0.3798 - acc: 0.8258 - val_loss: 0.3401 - val_acc: 0.8536

Epoch 00040: val_acc did not improve from 0.85562
Epoch 41/1000
 - 47s - loss: 0.3808 - acc: 0.8249 - val_loss: 0.3385 - val_acc: 0.8548

Epoch 00041: val_acc did not improve from 0.85562
Epoch 42/1000
 - 47s - loss: 0.3795 - acc: 0.8262 - val_loss: 0.3398 - val_acc: 0.8553

Epoch 00042: val_acc did not improve from 0.85562
Epoch 43/1000
 - 48s - loss: 0.3781 - acc: 0.8270 - val_loss: 0.3227 - val_acc: 0.8609

Epoch 00043: val_acc improved from 0.85562 to 0.86091, saving m