# ECG-ViEW 

In [1]:
import pandas as pd
import numpy as np
import pylab as plt
import seaborn as sns
from keras import optimizers, losses, activations, models
from keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler, ReduceLROnPlateau
from keras.layers import Dense, Input, Dropout, Convolution1D, MaxPool1D, GlobalMaxPool1D, GlobalAveragePooling1D, \
    concatenate
from keras import regularizers
from keras.utils import to_categorical
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score
from sklearn import preprocessing
import imblearn
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from collections import Counter
import random
from sklearn.preprocessing import RobustScaler

Using TensorFlow backend.


## CNN with Robust Scaling, SMOTE 10%: Experiments

In [2]:
# getting data into correct format for tensorflow backend
df_cnn_train = pd.read_csv("train.csv", header=None)
df_cnn_train = df_cnn_train.sample(frac=1)
df_cnn_test = pd.read_csv("test.csv", header=None)

y_cnn = np.array(df_cnn_train[11].values).astype(np.int8)
y_cnn=to_categorical(y_cnn)
x_cnn = np.array(df_cnn_train[list(range(11))].values)[..., np.newaxis]
y_cnn_test = np.array(df_cnn_test[11].values).astype(np.int8)
x_cnn_test = np.array(df_cnn_test[list(range(11))].values)[..., np.newaxis]

In [3]:
def get_model():
    # number of categories in our problem
    nclass = 2
    
    # shape of input
    inp = Input(shape=(11, 1))
    
    # 1 -- 16 filters
    img_1 = Convolution1D(16, kernel_size=5, activation=activations.relu, padding="valid")(inp)
    img_1 = Dropout(rate=0.1)(img_1) #to prevent overfitting

    # 2 -- 32 filters
    img_1 = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = Dropout(rate=0.1)(img_1) #to prevent overfitting
    
    # 3 -- 64 filters
    img_1 = Convolution1D(64, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = Dropout(rate=0.1)(img_1) #to prevent overfitting
    
    # 2 -- 256 filters # NEW 
    img_1 = Convolution1D(256, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = GlobalMaxPool1D()(img_1)
    img_1 = Dropout(rate=0.1)(img_1) #to prevent overfitting

    
    # 4 -- Dense layers
    dense_1 = Dense(64, activation=activations.relu, name="dense_1", kernel_regularizer=regularizers.l1(l=0.1))(img_1)
    dense_1 = Dense(16, activation=activations.relu, name="dense_2", kernel_regularizer=regularizers.l1(l=0.1))(dense_1)
    dense_1 = Dense(nclass, activation=activations.softmax, name="dense_3_ecg_view")(dense_1)
    
    
    model = models.Model(inputs=inp, outputs=dense_1)
    opt = optimizers.Adam(0.001)

    model.compile(optimizer=opt, loss=losses.binary_crossentropy, metrics=['acc'])
    model.summary()
    return model

In [4]:
model = get_model()
file_path = "baseline_cnn_ecgview_0427.h5"

# checkpointing the model's weight based on the accuracy of the model
checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')

# set early stopping based on accuracy improving or not. It stops after 5 epochs of no accuracy improvement
early = EarlyStopping(monitor="val_acc", mode="max", patience=5, verbose=1)

# reduces learning rate when a metric has stopped improving
redonplat = ReduceLROnPlateau(monitor="val_acc", mode="max", patience=3, verbose=2)

# defining the callbacks list to include the above parameters
callbacks_list = [checkpoint, early, redonplat]

# train the model
model.fit(x_cnn, y_cnn, epochs=1000, verbose=2, callbacks=callbacks_list, validation_split=0.1)
model.load_weights(file_path)


Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 11, 1)             0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 7, 16)             96        
_________________________________________________________________
dropout_1 (Dropout)          (None, 7, 16)             0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 5, 32)             1568      
_________________________________________________________________
dropout_2 (Dropout)          (None, 5, 32)             0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 3, 64)             6208      
_________________________________________________________________
dropout_3 (Dropout)          (None, 3, 64)             0   

KeyboardInterrupt: 

In [8]:
# test the model
pred_test = model.predict(x_cnn_test)
pred_test = np.argmax(pred_test, axis=-1)

# get f1 score of the model & print it. The f1 score considers the precision & recall.
f1 = f1_score(y_cnn_test, pred_test, average="macro")
print("Test f1 score : %s "% f1)

# get ROC AUC score of the model & print it
roc = roc_auc_score(y_cnn_test, pred_test)
print("Test ROC AUC score : %s "% roc)

# get the accuracy and print it
acc = accuracy_score(y_cnn_test, pred_test)
print("Test accuracy score : %s "% acc)

Test f1 score : 0.8508141863595394 
Test ROC AUC score : 0.8532264145919009 
Test accuracy score : 0.8662980769230769 


## Experiments:
Without robust scaling:
* Test f1 score : 0.8389719144071639 
* Test ROC AUC score : 0.8438790062228122 
* Test accuracy score : 0.8546634615384615 

With robust scaling (L2 regularization in the dense layers):
* Test f1 score : 0.9079003131812582 
* Test ROC AUC score : 0.9239154633754072 
* Test accuracy score : 0.9150961538461538 

With robust scaling (L1 regularization in the dense layers):
* Test f1 score : 0.8508141863595394 
* Test ROC AUC score : 0.8532264145919009 
* Test accuracy score : 0.8662980769230769

With robust scaling with SGD instead of Adam as the optimizer: 
* Test f1 score : 0.7498124657169878 
* Test ROC AUC score : 0.7431943352721175 
* Test accuracy score : 0.7838942307692308 