# <span style="color:#DC143C">ECG Heartbeat Categorization</span>

In [None]:
#This dataset has been used in exploring heartbeat classification using deep neural network architectures,
#and observing some of the capabilities of transfer learning on it. The signals correspond to electrocardiogram (ECG)
#shapes of heartbeats for the normal case and the cases affected by different arrhythmias and myocardial infarction.
#These signals are preprocessed and segmented, with each segment corresponding to a heartbeat.

In [None]:
#This dataset consists of two CSV files. Each of these CSV files contains a matrix, with each row representing
#an example in that portion of the dataset. The final element of each row represents the class to which that example belongs.


In [None]:
#This dataset was derivated the MIT-BIH Arrhythmia Dataset. Note that MIT-BIH Arrhythmia Dataset contains only images
#and dataset we work with contains rows of numbers.
#Our data set was preprocessed according to the methodology proposed in this paper. (https://arxiv.org/pdf/1805.00794.pdf)

In [None]:
#Sneak peak of how it was done: https://github.com/koen-aerts/ECG_ML/blob/master/02_import_mitdb_data.ipynb

In [1]:
import numpy as np
import pandas as pd

In [3]:
import torch
from torch.utils.data import DataLoader
from torch import nn,optim
import sys
from tqdm import tqdm

In [14]:
import os
tmp = []
for dirname, _, filenames in os.walk(r'C:\Users\user\Desktop\ekg\input'):
    for filename in filenames:
        tmp.append(os.path.join(dirname, filename))
        print(os.path.join(dirname, filename))

C:\Users\user\Desktop\ekg\input\mitbih_test.csv
C:\Users\user\Desktop\ekg\input\mitbih_train.csv


In [16]:
tmp[1]

'C:\\Users\\user\\Desktop\\ekg\\input\\mitbih_train.csv'

In [4]:
def my_DataLoader(train_root,test_root,batch_size = 100, val_split_factor = 0.2):

    train_df = pd.read_csv(train_root, header=None)
    test_df = pd.read_csv(test_root, header=None)

    train_data = train_df.to_numpy()
    test_data = test_df.to_numpy()

    train_dataset = torch.utils.data.TensorDataset(torch.from_numpy(train_data[:, :-1]).float(),
                                                   torch.from_numpy(train_data[:, -1]).long(),)
    test_dataset = torch.utils.data.TensorDataset(torch.from_numpy(test_data[:, :-1]).float(),
                                                  torch.from_numpy(test_data[:, -1]).long())

    train_len = train_data.shape[0]
    val_len = int(train_len * val_split_factor)
    train_len -= val_len

    train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_len, val_len])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

    return train_loader, val_loader, test_loader

In [5]:
#define the NET model

class  conv_net(nn.Module):

    def __init__(self, num_of_class):
        super(conv_net, self).__init__()

        self.model = nn.Sequential(
            nn.Conv1d(1, 16, kernel_size=3, stride=1, padding=1),
            nn.MaxPool1d(2),

            nn.Conv1d(16, 64, kernel_size=3, stride=1, padding=1),
            nn.MaxPool1d(2),

            nn.Conv1d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.MaxPool1d(2),

        )

        self.linear = nn.Sequential(
            nn.Linear(2944,500),
            nn.LeakyReLU(inplace=True),
            nn.Linear(500, num_of_class),

        )


    def forward(self,x):
        x = x.unsqueeze(1)
        x = self.model(x)
        # print(x.shape)
        x = x.view(x.size(0), -1)
        #x [b, 2944]
        # print(x.shape)
        x = self.linear(x)

        return x

In [6]:
batch_size=1000
lr = 3e-3
epochs = 50
torch.manual_seed(1234)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))

using cpu device.


In [7]:
def evalute(model, loader):
    model.eval()

    correct = 0
    total = len(loader.dataset)
    val_bar = tqdm(loader, file=sys.stdout)
    for x, y in val_bar:
        x, y = x.to(device), y.to(device)
        with torch.no_grad():
            logits = model(x)
            pred = logits.argmax(dim=1)
        correct += torch.eq(pred, y).sum().float().item()


    return correct / total


In [17]:
def main():

    train_loader, val_loader, test_loader = my_DataLoader(tmp[1],
                                                          tmp[0],
                                                          batch_size=batch_size,
                                                          val_split_factor=0.2)

    model = conv_net(5).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criteon = nn.CrossEntropyLoss()

    best_acc, best_epoch = 0, 0
    global_step = 0

    for epoch in range(epochs):

        train_bar = tqdm(train_loader, file=sys.stdout)
        for step, (x, y) in enumerate(train_bar):
            # x: [b, 187], y: [b]
            x, y = x.to(device), y.to(device)

            model.train()
            logits = model(x)
            loss = criteon(logits, y)

            optimizer.zero_grad()
            loss.backward()

            # for param in model.parameters():
            #     print(param.grad)

            optimizer.step()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)

            global_step += 1

        if epoch % 1 == 0:  # You can change the validation frequency as you wish

            val_acc = evalute(model, val_loader)
            print('val_acc = ',val_acc)
            if val_acc > best_acc:
                best_epoch = epoch
                best_acc = val_acc

                torch.save(model.state_dict(), 'best.mdl')

    print('best acc:', best_acc, 'best epoch:', best_epoch)

    model.load_state_dict(torch.load('best.mdl'))
    print('loaded from ckpt!')

    test_acc = evalute(model, test_loader)
    print('test acc:', test_acc)


if __name__ == '__main__':
    main()

train epoch[1/50] loss:0.144: 100%|████████████████████████████████████████████████████| 71/71 [00:35<00:00,  2.00it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 18/18 [00:03<00:00,  4.80it/s]
val_acc =  0.9391776127926899
train epoch[2/50] loss:0.086: 100%|████████████████████████████████████████████████████| 71/71 [00:35<00:00,  2.00it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 18/18 [00:03<00:00,  5.19it/s]
val_acc =  0.9540262707024557
train epoch[3/50] loss:0.138: 100%|████████████████████████████████████████████████████| 71/71 [00:35<00:00,  2.01it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 18/18 [00:03<00:00,  5.14it/s]
val_acc =  0.9696744717304397
train epoch[4/50] loss:0.032: 100%|████████████████████████████████████████████████████| 71/71 [00:35<00:00,  1.99it/s]
100%|█████████████████████████████████████████████████████████████████