# IDEA

- Usar una Inception Network y concatenarla con una Dense Layer que procesa la informacion del paciente.

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import wfdb
import torch
import torch.nn as nn
import ast
from wfdb import processing
import os
from scipy import signal
import scipy
import matplotlib as mpl
from ssqueezepy import cwt

# Installing W&B package

In [2]:
!pip install wandb



In [3]:
!wandb login

wandb: Appending key for api.wandb.ai to your netrc file: C:\Users\Erno/.netrc


In [4]:
import wandb

# PTB-XL Dataset

In [5]:
os.chdir('F:\Datasets\ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.1')
os.listdir()

['example_physionet.py',
 'LICENSE.txt',
 'ptbxl_database.csv',
 'RECORDS',
 'records100',
 'records500',
 'scp_statements.csv',
 'SHA256SUMS.txt',
 'wandb']

# Reading Data

In [6]:
idx = 1110

In [7]:
SCP_DATA = pd.read_csv(os.path.join(os.getcwd(), 'scp_statements.csv'))
scp_codes = np.unique(SCP_DATA['Unnamed: 0'].to_numpy())
scp_codes

array(['1AVB', '2AVB', '3AVB', 'ABQRS', 'AFIB', 'AFLT', 'ALMI', 'AMI',
       'ANEUR', 'ASMI', 'BIGU', 'CLBBB', 'CRBBB', 'DIG', 'EL', 'HVOLT',
       'ILBBB', 'ILMI', 'IMI', 'INJAL', 'INJAS', 'INJIL', 'INJIN',
       'INJLA', 'INVT', 'IPLMI', 'IPMI', 'IRBBB', 'ISCAL', 'ISCAN',
       'ISCAS', 'ISCIL', 'ISCIN', 'ISCLA', 'ISC_', 'IVCD', 'LAFB',
       'LAO/LAE', 'LMI', 'LNGQT', 'LOWT', 'LPFB', 'LPR', 'LVH', 'LVOLT',
       'NDT', 'NORM', 'NST_', 'NT_', 'PAC', 'PACE', 'PMI', 'PRC(S)',
       'PSVT', 'PVC', 'QWAVE', 'RAO/RAE', 'RVH', 'SARRH', 'SBRAD',
       'SEHYP', 'SR', 'STACH', 'STD_', 'STE_', 'SVARR', 'SVTAC', 'TAB_',
       'TRIGU', 'VCLVH', 'WPW'], dtype=object)

In [8]:
METADATA = pd.read_csv(os.path.join(os.getcwd(), 'ptbxl_database.csv'))

# Working with data

In [9]:
diagnostic_class = np.unique([i for i in SCP_DATA['diagnostic_class'].to_numpy() if i is not np.nan])
diagnostic_class

array(['CD', 'HYP', 'MI', 'NORM', 'STTC'], dtype='<U4')

In [10]:
diagnostic_subclass = np.unique([i for i in SCP_DATA['diagnostic_subclass'].to_numpy() if i is not np.nan])
diagnostic_subclass

array(['AMI', 'CLBBB', 'CRBBB', 'ILBBB', 'IMI', 'IRBBB', 'ISCA', 'ISCI',
       'ISC_', 'IVCD', 'LAFB/LPFB', 'LAO/LAE', 'LMI', 'LVH', 'NORM',
       'NST_', 'PMI', 'RAO/RAE', 'RVH', 'SEHYP', 'STTC', 'WPW', '_AVB'],
      dtype='<U9')

# Assigning label to data

In [11]:
scp_code2id = {}
for i, code in enumerate(scp_codes):
    scp_code2id[code] = i

id2scp_code = {}
for i, code in enumerate(scp_codes):
    id2scp_code[i] = code
    
len(id2scp_code)

71

# Wandb preparation

In [12]:
config = dict(
    epochs=10,
    classes=len(id2scp_code),
#     kernels=[16, 32],
    batch_size=16,
    learning_rate=0.1,
    architecture="Own-ResNet")

In [13]:
wandb.config

<wandb.sdk.lib.preinit.PreInitObject at 0x21e351ba788>

## Dataset creation

In [14]:
def encode_one_hot(idx):
    x = np.zeros(len(scp_codes))
    x[idx] = 1
    return x

str2dict = lambda x: ast.literal_eval(x)
signal_x = 0

signals = []
labels = []

for i in range(int(METADATA.shape[0] * .4)): # METADATA.shape[0]
    raw_data = METADATA.iloc[i]
    
    file_path = os.path.join(os.getcwd(), *raw_data.filename_lr.split('/'))  
    diagnostic_scp_codes = str2dict(raw_data.scp_codes)
    
    signal, signal_metadata = wfdb.rdsamp(file_path)
    channels = signal_metadata['sig_name']
    
    # Patience data
    age = raw_data.age if raw_data.age is not np.nan else raw_data.age
    sex = raw_data.sex if raw_data.sex is not np.nan else raw_data.sex
    weight = raw_data.weight if raw_data.weight is not np.nan else raw_data.weight
    
    # Normalization
    channel_max = np.max(signal, axis=0)
    channel_min = np.min(signal, axis=0)

    num_channels = signal.shape[-1]
    for i in range(num_channels):
        signal[:,i] = (signal[:,i] - channel_min[i]) / (channel_max[i] - channel_min[i])
    signal_x = signal
    
    # Agarro el que mayor porcentaje de ser tiene
    pc = list(diagnostic_scp_codes.values())
    max_pc_idx = pc.index(max(pc))
    label = list(diagnostic_scp_codes.keys())[max_pc_idx]
    label = scp_code2id[label]
    
    signals.append(signal_x)
#     labels.append(encode_one_hot(label))
    labels.append(label)
#     break

In [15]:
X = torch.tensor(np.array(signals))
X = X.permute(0,2,1)

# y = torch.tensor(np.array(labels))
y = torch.tensor(np.array(labels))#.reshape(-1,1)

X.shape, y.shape

(torch.Size([8734, 12, 1000]), torch.Size([8734]))

In [16]:
def create_training_testing_data(X, y, bz=16):
    train_size = int(.8 * .4 * METADATA.shape[0])
    # train_size = int(.9 * 200)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]

    train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=bz)

    test_dataset = torch.utils.data.TensorDataset(X_test, y_test)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=bz)
    
    return train_dataloader, test_dataloader
    
# create_training_testing_data(X, y, 32)

## AI model

In [17]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, ksize=3, stride=2):
        super(ResidualBlock, self).__init__()
        self.block = nn.Sequential(
            nn.Conv1d(in_channels, hidden_channels, ksize, stride=stride),
            nn.BatchNorm1d(hidden_channels),
            nn.ReLU(),
            nn.Dropout1d(.3),
            nn.Conv1d(hidden_channels, out_channels, ksize, stride=stride),
            nn.BatchNorm1d(out_channels)
        )
        
    def forward(self, x):
        block_out = self.block(x)
#         print("Block Out | Shape {}".format(block_out.shape))
        return nn.ReLU()(x + block_out)


class NeuralNetwork(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, ksize=3, stride=2):
#         NeuralNetwork.__init__(self, in_channels, hidden_channels, out_channels)
        super(NeuralNetwork, self).__init__()
#         NeuralNetwork.__init__(self, in_channels, hidden_channels, out_channels, ksize=3, stride=2)
        self.in_model = nn.Sequential(
            nn.Conv1d(in_channels, hidden_channels, 3, stride=2),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_channels),
            nn.MaxPool1d(ksize),
#             nn.AvgPool1d(ksize)
        )
        self.res1 = ResidualBlock(hidden_channels, 3, hidden_channels, ksize, stride)
        
        self.out_model = nn.Sequential(
            nn.MaxPool1d(ksize),
            nn.Flatten(),  
        )
       
    def forward(self, x):
        x = self.in_model(x)
#         print("Stage 1 | Shape: {}".format(x.shape))
        x = self.res1(x)
#         print("Stage 2 | Shape: {}".format(x.shape))
        x = self.out_model(x)
#         print("Stage 3 | Shape: {}".format(x.shape))
        
        out = nn.Sequential(
            nn.Linear(x.shape[-1], len(scp_codes)),
            nn.Softmax(dim=1)
#             nn.Sigmoid()
        )
        x = out(x)
#         print("Stage 4 | Shape: {}".format(x.shape))
        
        return x

In [18]:
# model = NeuralNetwork(12, 6, 12, 1, 1)
# loss_fn = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.parameters(), lr=0.1, weight_decay=0.01)

In [19]:
def train_log(loss, acc, epoch):
    # Where the magic happens
    wandb.log({"epoch": epoch, "loss": loss, "accuracy": acc})
#     print(f"Loss after {str(example_ct).zfill(5)} examples: {loss:.3f}")

### Training

In [28]:
num_epochs = 10

def train_model(model, criterion, optimizer, train_dataloader, epochs=10):
    total_loss = []
    total_acc = []
    model.train()
    for epoch in range(num_epochs):

        wandb.watch(model, criterion, log="all", log_freq=1)

        loss_values = []
        acc_values = []

        for signals, labels in train_dataloader:
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            pred = model(signals.float())
            loss = criterion(pred, labels.long())
            loss_values.append(loss.item())
            loss.backward()
            optimizer.step()

            pred_indeces = torch.argmax(pred, 1)
            acc = torch.sum(pred_indeces == labels) / pred_indeces.shape[0]
            acc_values.append(acc)
            
            train_log(loss, acc, epoch)

        total_loss.append(sum(loss_values) / len(loss_values))
        total_acc.append(sum(acc_values) / len(acc_values))

        print("Epoch: {} | Loss: {} | Accuracy: {}".format(epoch, total_loss[-1], total_acc[-1]))

    print("Training Complete")
    return total_loss, total_acc

In [21]:
def plot_performance(loss_values, acc_values):
    fig, axs = plt.subplots(2, 1, figsize=(14, 11), sharex=True)
    axs[0].plot(loss_values)
    axs[0].grid(True)
    axs[0].set_ylabel("Loss", size=24)

    axs[1].plot(acc_values)
    axs[1].grid(True)
    axs[1].set_ylabel("Accuracy", size=24)
    axs[1].set_xlabel("Epoch", size=24)
    
    
# plot_performance(total_loss, total_acc)

### Evaluating

In [22]:
def eval_model():
    total_loss = []
    total_acc = []

    with torch.no_grad():
        model.eval()

        loss_values = []
        acc_values = []

        for signals, y in test_dataloader:
            outputs = model(signals.float())
            loss = loss_fn(outputs, y.long())
            loss_values.append(loss.item())

            pred_indeces = torch.argmax(outputs, 1)
            acc = torch.sum(pred_indeces == y) / pred_indeces.shape[0]
            acc_values.append(acc)

            total_loss.append(sum(loss_values) / len(loss_values))
            total_acc.append(sum(acc_values) / len(acc_values))

        print("Epoch: {} | Loss: {} | Accuracy: {}".format(epoch, total_loss[-1], total_acc[-1]))


    print("Testing finished!")
    return total_loss, total_acc

In [23]:
# plot_performance(total_loss, total_acc)

In [24]:
def make():
    model = NeuralNetwork(12, 6, 12, 1, 1)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1, weight_decay=0.01)
    
    train_dataloader, test_dataloader = create_training_testing_data(X, y, 32)
    
    return model, loss_fn, optimizer, train_dataloader, test_dataloader

In [29]:
def make_pipeline():
    with wandb.init(project="ECG"):
        model, loss_fn, optimizer, train_dataloader, test_dataloader = make()
        epochs = 10
        
        train_model(model, loss_fn, optimizer, train_dataloader, epochs)
        
#     return model
make_pipeline()

Epoch: 0 | Loss: 4.262819784417 | Accuracy: 0.01397104561328888
Epoch: 1 | Loss: 4.262763258529036 | Accuracy: 0.014840182848274708
Epoch: 2 | Loss: 4.262728283938752 | Accuracy: 0.016124429181218147
Epoch: 3 | Loss: 4.262781356567661 | Accuracy: 0.010844748467206955
Epoch: 4 | Loss: 4.262644465111162 | Accuracy: 0.012699771672487259
Epoch: 5 | Loss: 4.262615410704591 | Accuracy: 0.013958074152469635
Epoch: 6 | Loss: 4.2627794426870125 | Accuracy: 0.01141552533954382
Epoch: 7 | Loss: 4.2627181897969 | Accuracy: 0.012985159642994404
Epoch: 8 | Loss: 4.262692745417764 | Accuracy: 0.012414383701980114
Epoch: 9 | Loss: 4.262692377447538 | Accuracy: 0.013542963191866875
Training Complete


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

[34m[1mwandb[0m: [32m[41mERROR[0m Control-C detected -- Run data was not synced


### Confusion Matrix

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

import seaborn as sns


y_pred = list(itertools.chain(*y_pred))
y_test = list(itertools.chain(*y_test))


print(classification_report(y_test, y_pred))


cf_matrix = confusion_matrix(y_test, y_pred)

plt.subplots(figsize=(8, 5))

sns.heatmap(cf_matrix, annot=True, cbar=False, fmt="g")

plt.show()