# Ensembles for MIT BIH Dataset
The goal of this notebook is to evaluate the potential benefits of an ensemble model for MIT-BIH Arrhythmia Database: https://physionet.org/physiobank/database/mitdb/  
The data was downloaded from kaggle: https://www.kaggle.com/shayanfazeli/heartbeat  


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.engine.saving import load_model
from sklearn.metrics import accuracy_score, f1_score, roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

Using TensorFlow backend.


### Read the data and normalize

In [2]:
timepoints_per_sample = 187
train_path = "../input/mitbih_train.csv"
test_path = "../input/mitbih_test.csv"
df_train = pd.read_csv(train_path, header=None)
df_train = df_train.sample(frac=1)
df_test = pd.read_csv(test_path, header=None)

Y = np.array(df_train[187].values).astype(np.int8)
X = np.array(df_train[list(range(187))].values)[..., np.newaxis]

Y_test = np.array(df_test[187].values).astype(np.int8)
X_test = np.array(df_test[list(range(187))].values)[..., np.newaxis]

# normalize without taking the zero-padding at the end into account:
for sample_idx in range(X.shape[0]):
    first_zero_sample = timepoints_per_sample
    while X[sample_idx, first_zero_sample - 1, 0] == 0:
        first_zero_sample -= 1
    X[sample_idx, 0: first_zero_sample, 0] -= np.mean(X[sample_idx, 0: first_zero_sample, 0])
    X[sample_idx, 0: first_zero_sample, 0] /= np.std(X[sample_idx, 0: first_zero_sample, 0])

for sample_idx in range(X_test.shape[0]):
    first_zero_sample = timepoints_per_sample
    while X_test[sample_idx, first_zero_sample - 1, 0] == 0:
        first_zero_sample -= 1
    X_test[sample_idx, 0: first_zero_sample, 0] -= np.mean(X_test[sample_idx, 0: first_zero_sample, 0])
    X_test[sample_idx, 0: first_zero_sample, 0] /= np.std(X_test[sample_idx, 0: first_zero_sample, 0])

## Model Averaging

The following is a simple approach to aggregate the results from several classification models.

In [3]:
MODEL_NUM = 3

resnet_mitbih = load_model("../saved_trained_models/paper_resnet_mitbih.h5")
own_cnn_mitbih = load_model("../saved_trained_models/own_cnn_mitbih.h5")
own_lstm_mitbih = load_model("../saved_trained_models/own_lstm_mitbih.h5")

model_ensemble = [resnet_mitbih, own_cnn_mitbih, own_lstm_mitbih]

Average the outputs of the sigmoid layer and classify based on them.

In [4]:
ensemble_output_1 = (resnet_mitbih.predict(X_test) + own_cnn_mitbih.predict(X_test) + 
                     own_lstm_mitbih.predict(X_test))/MODEL_NUM

prediction_1 = np.argmax(ensemble_output_1, axis=-1)

In [5]:
print("Accuracy: {0:.3f}".format(accuracy_score(Y_test, prediction_1)))
print("F1 score: {0:.3f}".format(f1_score(Y_test, prediction_1, average="macro")))

Accuracy: 0.988
F1 score: 0.927


The performance of the created ensemble is slightly better than the baseline.