### Train a 1D CNN model to classify the synthetic ECG data and test the model on real data 

In [1]:
from synDataLoader import syn_mitbih, mixed_mitbih
from DataLoader import mitbih_train, mitbih_test

In [2]:
mixed_ecg = mixed_mitbih(real_samples = 200, syn_samples = 800)

In [13]:
syn_ecg = syn_mitbih(n_samples=800, reshape=True)

In [14]:
real_ecg = mitbih_train(n_samples=200, oneD=True)

In [11]:
real_test_ecg = mitbih_test(n_samples=500, oneD=True)

In [12]:
from torch.utils import data
syn_loader = data.DataLoader(syn_ecg, batch_size=32, num_workers=4, shuffle=True)
real_loader = data.DataLoader(real_ecg, batch_size=32, num_workers=4, shuffle=True)
mixed_loader = data.DataLoader(mixed_ecg, batch_size=32, num_workers=4, shuffle=True)
test_real_loader = data.DataLoader(real_test_ecg, batch_size=32, num_workers=4, shuffle=True)

In [4]:
#Define a simple CNN classifier 
import torch
import torch.nn as nn
import torch.nn.functional as F


class ECG_Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv1d(1, 64, 6)
        self.conv2 = nn.Conv1d(64, 64, 6)
        self.conv3 = nn.Conv1d(64, 64, 3)
        self.dropout = nn.Dropout(p=0.5) 
        self.pool = nn.MaxPool1d(3)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(1152, 100)
        self.fc2 = nn.Linear(100, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.dropout(x)
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        
        return x


In [5]:
def train(model, train_data_loader, test_data_loader, epochs, criterion, optimizer, filename="test_cm"):
    for epoch in range(epochs):  # loop over the dataset multiple times
        model.train()
        total_loss = 0.0
        total = 0
        correct = 0

        for i, data in enumerate(train_data_loader):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs = inputs.double()
            labels = labels.long()

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            total_loss += loss.item()
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
        epoch_train_loss = total_loss / len(train_data_loader)
        epoch_train_acc = correct / total
        print(f'Epoch {epoch + 1}, train loss = {epoch_train_loss}, train acc = {epoch_train_acc}')

        if (epoch+1) % 5 == 0:
            _eval(model, test_data_loader, criterion, epoch)
#             _eval_single_class(model, test_data_loader, criterion, epoch)
            
    _final_eval(model, test_data_loader, criterion, filename)

    print('Finished Training and testing')

In [6]:
def _eval(model, real_test_loader, criterion, epoch):
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for i, data in enumerate(real_test_loader):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs = inputs.double()
            labels = labels.long()

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # print statistics
            total_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        epoch_test_loss = total_loss / len(real_test_loader)
        epoch_test_acc = correct / total
    
    print('=====================================================')
    print(f'Epoch {epoch+1}, test loss = {epoch_test_loss}, test acc = {epoch_test_acc}')
    print('=====================================================')

In [7]:
classes = ['Non-Ectopic Beats', 'Superventrical Ectopic', 'Ventricular Beats', 'Unknown', 'Fusion Beats']
classes_idx = ['1','2','3','4','5']
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

In [8]:
def _eval_single_class(model, real_test_loader, criterion, epoch):
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for i, data in enumerate(real_test_loader):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs = inputs.double()
            labels = labels.long()

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # print statistics
            total_loss += loss.item()
            _, predictions = torch.max(outputs, 1)
            # collect the correct predictions for each class
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    correct_pred[classes[label]] += 1
                total_pred[classes[label]] += 1


    # print accuracy for each class
    for classname, correct_count in correct_pred.items():
        accuracy = 100 * float(correct_count) / total_pred[classname]
        print("Accuracy for class {:5s} is: {:.1f} %".format(classname,
                                                       accuracy))

In [9]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

def _final_eval(model, real_test_loader, criterion, filename="test_cm"):
    nb_classes = 5
    predlist=torch.zeros(0,dtype=torch.long, device='cpu')
    lbllist=torch.zeros(0,dtype=torch.long, device='cpu')
    target_names = classes
    y_preds = []
    y_trues = []
    with torch.no_grad():
        for i, data in enumerate(real_test_loader):
            inputs, labels = data
            inputs = inputs.double()
            labels = labels.long()
            
            outputs = model(inputs)
            _, predictions = torch.max(outputs, 1)

            # Append batch prediction results
            predlist=torch.cat([predlist,predictions.view(-1).cpu()])
            lbllist=torch.cat([lbllist,labels.view(-1).cpu()])
            
            y_preds.append(predictions)
            y_trues.append(labels)
            
    # Confusion matrix
    cm=confusion_matrix(lbllist.numpy(), predlist.numpy())
    print(cm)
    cm_df = pd.DataFrame(cm,
                     index = classes_idx, 
                     columns = classes_idx)
    fig = plt.figure(figsize=(6.5,5))
    sns.heatmap(cm_df, annot=True, fmt='d', cmap='cubehelix_r')
#     plt.title('ECG classification Accuracy')
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout() # keeps labels from being cutoff when saving as pdf
    plt.savefig(f'{filename}.pdf')
    plt.show()
    
    # Per-class accuracy
    class_accuracy = 100*cm.diagonal() / cm.sum(1)
    print(class_accuracy)
    
    #print classification report 
    y_preds_flatten = [label for sublist in y_preds for label in sublist]
    y_trues_flatten = [label for sublist in y_trues for label in sublist]
    
    print(classification_report(y_trues_flatten, y_preds_flatten, target_names=classes))

In [42]:
# Define training hyperparameters
import torch.optim as optim
ECG_model = ECG_Net()
ECG_model.double()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(ECG_model.parameters(), lr=0.0005, momentum=0.9)
train(ECG_model, syn_loader, test_real_loader, 50, criterion, optimizer, filename='synthetic_data')

In [43]:
# Define training hyperparameters
import torch.optim as optim
ECG_model = ECG_Net()
ECG_model.double()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(ECG_model.parameters(), lr=0.0005, momentum=0.9)
train(ECG_model, real_loader, test_real_loader, 50, criterion, optimizer, filename='real_data')

In [11]:
# Define training hyperparameters
import torch.optim as optim
ECG_model = ECG_Net()
ECG_model.double()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(ECG_model.parameters(), lr=0.0005, momentum=0.9)
train(ECG_model, real_loader, test_real_loader, 50, criterion, optimizer, filename='real_data_small')

In [13]:
import torch.optim as optim
ECG_model = ECG_Net()
ECG_model.double()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(ECG_model.parameters(), lr=0.0005, momentum=0.9)
train(ECG_model, mixed_loader, test_real_loader, 50, criterion, optimizer, filename='mixed_data')