#Reproducing results from paper : Deep Learning for ECG Classification

##Presenter : Enes Kuzucu
##Link to Original Paper : https://iopscience.iop.org/article/10.1088/1742-6596/913/1/012004


#Imports

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
from keras.callbacks import ModelCheckpoint
from sklearn.preprocessing import MinMaxScaler, RobustScaler
import pandas as pd
import scipy.io as sio
from os import listdir
from os.path import isfile, join
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Conv1D, GlobalAveragePooling1D, MaxPooling1D
from keras import regularizers
from keras.utils import np_utils

#Get Data

In [None]:

!git clone https://github.com/karaposu/DeepECG.git
! cp  /content/DeepECG/*zip  .



import zipfile
with zipfile.ZipFile("training2017.zip","r") as zip_ref:
    zip_ref.extractall("data")


Cloning into 'DeepECG'...
remote: Enumerating objects: 164, done.[K
remote: Counting objects: 100% (1/1), done.[K
remote: Total 164 (delta 0), reused 0 (delta 0), pack-reused 163[K
Receiving objects: 100% (164/164), 98.51 MiB | 19.48 MiB/s, done.
Resolving deltas: 100% (86/86), done.


#Functions

In [None]:
def to_one_hot(y): # 0. >> [1. 0. 0. 0.]
    return np_utils.to_categorical(y)


def change(x):  #  [1. 0. 0. 0.]  >> 0 
    answer = np.zeros((np.shape(x)[0]))
    for i in range(np.shape(x)[0]):
        max_value = max(x[i, :])
        max_index = list(x[i, :]).index(max_value)
        answer[i] = max_index
    return answer.astype(np.int)

In [None]:
!pwd
!ls
!ls ./data/training2017/

/content
data  DeepECG  MIT-BH.zip  sample_data	training2017.zip
A00001.hea  A01707.hea	A03413.hea  A05119.hea	A06825.hea
A00001.mat  A01707.mat	A03413.mat  A05119.mat	A06825.mat
A00002.hea  A01708.hea	A03414.hea  A05120.hea	A06826.hea
A00002.mat  A01708.mat	A03414.mat  A05120.mat	A06826.mat
A00003.hea  A01709.hea	A03415.hea  A05121.hea	A06827.hea
A00003.mat  A01709.mat	A03415.mat  A05121.mat	A06827.mat
A00004.hea  A01710.hea	A03416.hea  A05122.hea	A06828.hea
A00004.mat  A01710.mat	A03416.mat  A05122.mat	A06828.mat
A00005.hea  A01711.hea	A03417.hea  A05123.hea	A06829.hea
A00005.mat  A01711.mat	A03417.mat  A05123.mat	A06829.mat
A00006.hea  A01712.hea	A03418.hea  A05124.hea	A06830.hea
A00006.mat  A01712.mat	A03418.mat  A05124.mat	A06830.mat
A00007.hea  A01713.hea	A03419.hea  A05125.hea	A06831.hea
A00007.mat  A01713.mat	A03419.mat  A05125.mat	A06831.mat
A00008.hea  A01714.hea	A03420.hea  A05126.hea	A06832.hea
A00008.mat  A01714.mat	A03420.mat  A05126.mat	A06832.mat
A00009.hea  A01715.hea	

#Data Preprocessing

In [None]:
number_of_classes = 4  # Total number of classes

mypath = 'data/training2017/'
onlyfiles = [f for f in listdir(mypath) if (isfile(join(mypath, f)) and f[0] == 'A')]

print(onlyfiles)

bats = [f for f in onlyfiles if f[7] == 'm']

print(bats)

check = 100

mats = [f for f in bats if (np.shape(sio.loadmat(mypath + f)['val'])[1] >= check)]
print(mats)
size = len(mats)
print('Total training size is ', size)

big = 10100
X = np.zeros((size, big))

for i in range(size):
    dummy = sio.loadmat(mypath + mats[i])['val'][0, :]
    if (big - len(dummy)) <= 0:
        X[i, :] = dummy[0:big]
    else:
        b = dummy[0:(big - len(dummy))]
        goal = np.hstack((dummy, b))
        while len(goal) != big:
            b = dummy[0:(big - len(goal))]
            goal = np.hstack((goal, b))
        X[i, :] = goal

target_train = np.zeros((size, 1))


Train_data = pd.read_csv(mypath + 'REFERENCE.csv', sep=',', header=None, names=None)
print(Train_data)


['A03320.mat', 'A00602.hea', 'A03303.hea', 'A01464.mat', 'A01986.mat', 'A05922.hea', 'A03530.hea', 'A05451.hea', 'A04408.mat', 'A07184.hea', 'A02875.mat', 'A03832.mat', 'A01003.mat', 'A05543.mat', 'A03733.mat', 'A05086.mat', 'A07544.hea', 'A03547.hea', 'A00148.hea', 'A04052.hea', 'A07836.hea', 'A07238.mat', 'A05484.mat', 'A04920.mat', 'A05506.mat', 'A07223.mat', 'A04515.hea', 'A06284.mat', 'A00710.mat', 'A06542.mat', 'A00224.mat', 'A03280.mat', 'A05193.mat', 'A05128.hea', 'A06791.hea', 'A03105.mat', 'A00847.mat', 'A02922.hea', 'A02342.hea', 'A06703.hea', 'A06858.mat', 'A02924.mat', 'A08416.hea', 'A04370.mat', 'A01605.hea', 'A08193.hea', 'A05417.hea', 'A08255.hea', 'A04689.mat', 'A04302.hea', 'A01862.hea', 'A07739.hea', 'A08414.mat', 'A04320.hea', 'A00387.mat', 'A01628.mat', 'A00325.hea', 'A06161.mat', 'A06209.hea', 'A04107.mat', 'A01017.mat', 'A03473.mat', 'A00323.mat', 'A03284.mat', 'A04064.mat', 'A03201.mat', 'A00896.hea', 'A04845.mat', 'A04991.hea', 'A01292.hea', 'A07841.mat', 'A029

In [None]:


for i in range(size):
    if Train_data.loc[Train_data[0] == mats[i][:6], 1].values == 'N':
        target_train[i] = 0
    elif Train_data.loc[Train_data[0] == mats[i][:6], 1].values == 'A':
        target_train[i] = 1
    elif Train_data.loc[Train_data[0] == mats[i][:6], 1].values == 'O':
        target_train[i] = 2
    else:
        target_train[i] = 3

Label_set = to_one_hot(target_train)

X = (X - X.mean()) / (X.std())  # Some normalization here
X = np.expand_dims(X, axis=2)  # For Keras's data input size

values = [i for i in range(size)]
permutations = np.random.permutation(values)
X = X[permutations, :]
Label_set = Label_set[permutations, :]

train = 0.9  # Size of training set in percentage
X_train = X[:int(train * size), :]
Y_train = Label_set[:int(train * size), :]
X_val = X[int(train * size):, :]
Y_val = Label_set[int(train * size):, :]

#Model

In [None]:



# def create_model():
model = Sequential()
model.add(Conv1D(128, 55, activation='relu', input_shape=(big, 1)))
model.add(MaxPooling1D(10))
model.add(Dropout(0.5))
model.add(Conv1D(128, 25, activation='relu'))
model.add(MaxPooling1D(5))
model.add(Dropout(0.5))
model.add(Conv1D(128, 10, activation='relu'))
model.add(MaxPooling1D(5))
model.add(Dropout(0.5))
model.add(Conv1D(128, 5, activation='relu'))
model.add(GlobalAveragePooling1D())
# model.add(Flatten())
model.add(Dense(256, kernel_initializer='normal', activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, kernel_initializer='normal', activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, kernel_initializer='normal', activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(number_of_classes, kernel_initializer='normal', activation='softmax'))

model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 10046, 128)        7168      
                                                                 
 max_pooling1d (MaxPooling1D  (None, 1004, 128)        0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 1004, 128)         0         
                                                                 
 conv1d_1 (Conv1D)           (None, 980, 128)          409728    
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 196, 128)         0         
 1D)                                                             
                                                                 
 dropout_1 (Dropout)         (None, 196, 128)          0

#Training

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
hist = model.fit(X_train, Y_train, validation_data=(X_val, Y_val), batch_size=256, epochs=50, verbose=2, shuffle=True)


import warnings


warnings.filterwarnings('ignore')


predictions = model.predict(X_val)
score = accuracy_score(change(Y_val), change(predictions))
print('Last epoch\'s validation score is ', score)


Epoch 1/50
30/30 - 39s - loss: 1.1598 - accuracy: 0.5454 - val_loss: 1.0507 - val_accuracy: 0.5955 - 39s/epoch - 1s/step
Epoch 2/50
30/30 - 22s - loss: 0.9829 - accuracy: 0.5896 - val_loss: 0.9335 - val_accuracy: 0.5955 - 22s/epoch - 725ms/step
Epoch 3/50
30/30 - 22s - loss: 0.9206 - accuracy: 0.5918 - val_loss: 0.8658 - val_accuracy: 0.5955 - 22s/epoch - 720ms/step
Epoch 4/50
30/30 - 22s - loss: 0.8699 - accuracy: 0.5956 - val_loss: 0.7930 - val_accuracy: 0.6284 - 22s/epoch - 719ms/step
Epoch 5/50
30/30 - 22s - loss: 0.8109 - accuracy: 0.6450 - val_loss: 0.7682 - val_accuracy: 0.6882 - 22s/epoch - 718ms/step
Epoch 6/50
30/30 - 22s - loss: 0.7585 - accuracy: 0.6865 - val_loss: 0.6890 - val_accuracy: 0.6987 - 22s/epoch - 717ms/step
Epoch 7/50
30/30 - 22s - loss: 0.7062 - accuracy: 0.7093 - val_loss: 0.6426 - val_accuracy: 0.7233 - 22s/epoch - 717ms/step
Epoch 8/50
30/30 - 22s - loss: 0.6806 - accuracy: 0.7154 - val_loss: 0.6563 - val_accuracy: 0.7327 - 22s/epoch - 717ms/step
Epoch 9/50
