# Imports

In [1]:
from getpass import getpass

token = getpass('Enter your GitHub personal access token: ')
name = getpass('Enter your GitHub name: ')
#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

!git clone https://{token}@github.com/{name}/comp_med.git
#%cd comp_med


Enter your GitHub personal access token: ··········
Enter your GitHub name: ··········
Cloning into 'comp_med'...
remote: Enumerating objects: 66, done.[K
remote: Counting objects: 100% (66/66), done.[K
remote: Compressing objects: 100% (54/54), done.[K
remote: Total 66 (delta 35), reused 30 (delta 11), pack-reused 0 (from 0)[K
Receiving objects: 100% (66/66), 22.04 KiB | 3.67 MiB/s, done.
Resolving deltas: 100% (35/35), done.


In [2]:
!git config --global user.email "vierling.lukas@gmailcom"
!git config --global user.name  "lukasVierling"
!git add .
!git commit -m "My latest changes from Colab"
!git push origin main


fatal: not a git repository (or any of the parent directories): .git
fatal: not a git repository (or any of the parent directories): .git
fatal: not a git repository (or any of the parent directories): .git


## Make Code Deterministic for Reproducibility

In [69]:
import os
import random
import numpy as np
import torch
os.environ["PYTHONHASHSEED"] = str(42)
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


In [2]:
!pip install wfdb



In [3]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
!touch /content/comp_med/__init__.py
!touch /content/comp_med/data/__init__.py
!touch /content/comp_med/models/__init__.py


In [30]:
import numpy as np
import torch
import torch.optim as optim
from tqdm import tqdm
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

#my imports
from comp_med.models.attentionCNN import CNN_1D
from comp_med.models.oldCNN import CNN_2D
from comp_med.data.preprocessing import get_dataloaders


## Training Helpers

In [67]:
def train(model, criterion, optimizer, train_loader, val_loader, epochs, device="cpu", return_loss=False):
  model.to(device)
  loss_tracker = []
  for epoch in range(epochs):
    pbar = tqdm(train_loader, desc=f"Train the model in epoch {epoch}...")
    loss = 0
    for x,y in pbar:
      optimizer.zero_grad()
      x,y = x.to(device),y.to(device)
      out = model(x)
      out = torch.sigmoid(out)
      loss = criterion(out,y)
      loss.backward()
      optimizer.step()
      loss = loss.item()
      pbar.set_description(f"Current loss in epoch {epoch} is {loss}")
      loss_tracker.append(loss)
    #start validation
    acc = eval(model, val_loader, device)
    print(f"Acc on val in epoch {epoch} is: {acc}")
  if return_loss:
    return loss_tracker

def eval(model, data_loader, device="cpu"):
    model.to(device)
    #start validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
      for x,y in data_loader:
        x,y = x.to(device), y.to(device)
        out = model(x)
        preds = (torch.sigmoid(out) > 0.5).float()
        correct += (preds == y).sum().item()
        total += y.shape[0]
    model.train()
    acc = correct / total
    return acc

# Verification Dummy Task

In [46]:
# create a dummy dataset to verify the architecture works
batch_size = 256
length = 5
samples_per_sec = 1000
num_leads = 12
num_samples = 500

t = np.arange(0, length, 1/samples_per_sec)
base_freq = 1

X = np.zeros((num_samples, num_leads, t.size))
labels = np.zeros((num_samples,))

for i in range(num_samples):
  label = i % 2
  for l in range(num_leads):
    amplitude = l+1
    X[i,l] = amplitude * np.sin( 2*np.pi * base_freq * t)
  if label == 1:
    #get a random lead
    lead_idx = np.random.randint(0, num_leads)
    #doulbe the frequency when label is 1
    X[i, lead_idx] = (lead_idx+1) * np.sin( 2*np.pi * base_freq*2 * t)

  #add some noise
  noise = np.random.normal(0, 0.1, size=X[i].shape)
  X[i] = X[i]+noise
  labels[i]= label

X_t = torch.from_numpy(X).float()
y_t = torch.from_numpy(labels).unsqueeze(1).float()

len = X_t.shape[0]
permutation = torch.randperm(len)
train_idx = permutation[:400]
test_idx =permutation[400:]

X_train, y_train = X_t[train_idx], y_t[train_idx]
X_test, y_test = X_t[test_idx] ,y_t[test_idx]
print(X_t.shape)
print(X_test.shape)
print(X_train.shape)
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size = batch_size)

torch.Size([500, 12, 5000])
torch.Size([100, 12, 5000])
torch.Size([400, 12, 5000])


In [57]:
epochs = 25
lr = 1e-3
device = "cuda" if torch.cuda.is_available() else "cpu"

model = CNN_1D()
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.BCELoss()
train(model, criterion, optimizer, train_loader, train_loader ,epochs, device)
print("Fianl acc on test is: ", eval(model, test_loader, device))

Current loss in epoch 0 is 0.6096674799919128: 100%|██████████| 2/2 [00:00<00:00,  3.07it/s]


Acc on val in epoch 0 is: 0.4975


Current loss in epoch 1 is 0.18008433282375336: 100%|██████████| 2/2 [00:00<00:00,  3.76it/s]


Acc on val in epoch 1 is: 0.4975


Current loss in epoch 2 is 0.04525245353579521: 100%|██████████| 2/2 [00:00<00:00,  3.74it/s]


Acc on val in epoch 2 is: 0.96


Current loss in epoch 3 is 0.026750681921839714: 100%|██████████| 2/2 [00:00<00:00,  3.74it/s]


Acc on val in epoch 3 is: 0.96


Current loss in epoch 4 is 0.025116825476288795: 100%|██████████| 2/2 [00:00<00:00,  3.74it/s]


Acc on val in epoch 4 is: 0.87


Current loss in epoch 5 is 0.019351104274392128: 100%|██████████| 2/2 [00:00<00:00,  3.75it/s]


Acc on val in epoch 5 is: 0.9475


Current loss in epoch 6 is 0.01452405285090208: 100%|██████████| 2/2 [00:00<00:00,  3.72it/s]


Acc on val in epoch 6 is: 1.0


Current loss in epoch 7 is 0.013068484142422676: 100%|██████████| 2/2 [00:00<00:00,  3.72it/s]


Acc on val in epoch 7 is: 1.0


Current loss in epoch 8 is 0.01156632974743843: 100%|██████████| 2/2 [00:00<00:00,  3.70it/s]


Acc on val in epoch 8 is: 1.0


Current loss in epoch 9 is 0.010668138042092323: 100%|██████████| 2/2 [00:00<00:00,  3.71it/s]


Acc on val in epoch 9 is: 0.535


Current loss in epoch 10 is 0.009036223404109478: 100%|██████████| 2/2 [00:00<00:00,  3.71it/s]


Acc on val in epoch 10 is: 0.4975


Current loss in epoch 11 is 0.0074169025756418705: 100%|██████████| 2/2 [00:00<00:00,  3.70it/s]


Acc on val in epoch 11 is: 0.4975


Current loss in epoch 12 is 0.007038566749542952: 100%|██████████| 2/2 [00:00<00:00,  3.69it/s]


Acc on val in epoch 12 is: 0.4975


Current loss in epoch 13 is 0.005879055242985487: 100%|██████████| 2/2 [00:00<00:00,  3.71it/s]


Acc on val in epoch 13 is: 0.4975


Current loss in epoch 14 is 0.005916973575949669: 100%|██████████| 2/2 [00:00<00:00,  3.72it/s]


Acc on val in epoch 14 is: 0.4975


Current loss in epoch 15 is 0.004978308454155922: 100%|██████████| 2/2 [00:00<00:00,  3.72it/s]


Acc on val in epoch 15 is: 1.0


Current loss in epoch 16 is 0.004654604941606522: 100%|██████████| 2/2 [00:00<00:00,  3.73it/s]


Acc on val in epoch 16 is: 1.0


Current loss in epoch 17 is 0.004090910777449608: 100%|██████████| 2/2 [00:00<00:00,  3.66it/s]


Acc on val in epoch 17 is: 1.0


Current loss in epoch 18 is 0.004083108622580767: 100%|██████████| 2/2 [00:00<00:00,  3.67it/s]


Acc on val in epoch 18 is: 1.0


Current loss in epoch 19 is 0.0045180534943938255: 100%|██████████| 2/2 [00:00<00:00,  3.66it/s]


Acc on val in epoch 19 is: 1.0


Current loss in epoch 20 is 0.003292000386863947: 100%|██████████| 2/2 [00:00<00:00,  3.64it/s]


Acc on val in epoch 20 is: 1.0


Current loss in epoch 21 is 0.0031643244437873363: 100%|██████████| 2/2 [00:00<00:00,  3.63it/s]


Acc on val in epoch 21 is: 1.0


Current loss in epoch 22 is 0.002981016645208001: 100%|██████████| 2/2 [00:00<00:00,  3.67it/s]


Acc on val in epoch 22 is: 1.0


Current loss in epoch 23 is 0.0033904339652508497: 100%|██████████| 2/2 [00:00<00:00,  3.66it/s]


Acc on val in epoch 23 is: 1.0


Current loss in epoch 24 is 0.002561150351539254: 100%|██████████| 2/2 [00:00<00:00,  3.62it/s]


Acc on val in epoch 24 is: 1.0
Fianl acc on test is:  1.0


# Convergence Analysis of the Model

In [60]:
train_loader, val_loader, test_loader = get_dataloaders("/content/drive/MyDrive/ptbdb",preprocessed_data_path="/content/drive/MyDrive/ptbdb/preprocessed_data.pt", train_ratio=0.6, val_ratio=0.1)

After filtering, we got: 228 records. Healthy: 80, Disease: 148
Patients: train: 120 | val: 20 | test: 60
Load data from given path


In [65]:
epochs = 20
lr = 1e-3
device = "cuda" if torch.cuda.is_available() else "cpu"

In [68]:
model = CNN_1D()
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.BCELoss()
losses = train(model, criterion, optimizer, train_loader, val_loader ,epochs, device, return_loss=True)
print("Fianl acc on test is: ", eval(model, test_loader, device))

Current loss in epoch 0 is 0.12874650955200195: 100%|██████████| 23/23 [00:07<00:00,  3.06it/s]


Acc on val in epoch 0 is: 0.6530782029950083


Current loss in epoch 1 is 0.04215865209698677: 100%|██████████| 23/23 [00:07<00:00,  3.07it/s]


Acc on val in epoch 1 is: 0.7321131447587355


Current loss in epoch 2 is 0.03640229254961014: 100%|██████████| 23/23 [00:07<00:00,  3.03it/s]


Acc on val in epoch 2 is: 0.8394342762063228


Current loss in epoch 3 is 0.004116324242204428: 100%|██████████| 23/23 [00:07<00:00,  3.04it/s]


Acc on val in epoch 3 is: 0.7662229617304492


Current loss in epoch 4 is 0.0024900087155401707: 100%|██████████| 23/23 [00:07<00:00,  3.04it/s]


Acc on val in epoch 4 is: 0.7445923460898503


Current loss in epoch 5 is 0.0019461291376501322: 100%|██████████| 23/23 [00:07<00:00,  3.01it/s]


Acc on val in epoch 5 is: 0.7337770382695508


Current loss in epoch 6 is 0.02479163557291031: 100%|██████████| 23/23 [00:07<00:00,  3.03it/s]


Acc on val in epoch 6 is: 0.7712146422628952


Current loss in epoch 7 is 0.0054873814806342125: 100%|██████████| 23/23 [00:07<00:00,  2.99it/s]


Acc on val in epoch 7 is: 0.7104825291181365


Current loss in epoch 8 is 0.006484761368483305: 100%|██████████| 23/23 [00:07<00:00,  2.94it/s]


Acc on val in epoch 8 is: 0.7079866888519135


Current loss in epoch 9 is 0.001384336268529296: 100%|██████████| 23/23 [00:07<00:00,  2.96it/s]


Acc on val in epoch 9 is: 0.7737104825291181


Current loss in epoch 10 is 0.0025850138626992702: 100%|██████████| 23/23 [00:07<00:00,  2.92it/s]


Acc on val in epoch 10 is: 0.7820299500831946


Current loss in epoch 11 is 0.0005988054326735437: 100%|██████████| 23/23 [00:07<00:00,  2.89it/s]


Acc on val in epoch 11 is: 0.7612312811980033


Current loss in epoch 12 is 0.00048797886120155454: 100%|██████████| 23/23 [00:07<00:00,  2.91it/s]


Acc on val in epoch 12 is: 0.7021630615640599


Current loss in epoch 13 is 0.00027850610786117613: 100%|██████████| 23/23 [00:08<00:00,  2.87it/s]


Acc on val in epoch 13 is: 0.6930116472545758


Current loss in epoch 14 is 0.0002543050504755229: 100%|██████████| 23/23 [00:08<00:00,  2.81it/s]


Acc on val in epoch 14 is: 0.6946755407653911


Current loss in epoch 15 is 0.00017425759870093316: 100%|██████████| 23/23 [00:07<00:00,  2.92it/s]


Acc on val in epoch 15 is: 0.7004991680532446


Current loss in epoch 16 is 0.005647094454616308: 100%|██████████| 23/23 [00:07<00:00,  2.91it/s]


Acc on val in epoch 16 is: 0.7004991680532446


Current loss in epoch 17 is 0.0005618511349894106: 100%|██████████| 23/23 [00:07<00:00,  2.93it/s]


Acc on val in epoch 17 is: 0.7096505823627288


Current loss in epoch 18 is 0.00016227253945544362: 100%|██████████| 23/23 [00:07<00:00,  2.94it/s]


Acc on val in epoch 18 is: 0.7196339434276207


Current loss in epoch 19 is 0.0001406513329129666: 100%|██████████| 23/23 [00:07<00:00,  2.93it/s]


Acc on val in epoch 19 is: 0.71630615640599
Fianl acc on test is:  0.8981793661496965


# Sensitivity Analysis Towards Data Perturbation
We add random noise $\mathcal{N}(0,\sigma^2)$ to the input of the model and evaluate how robust the model is.

In [79]:
def eval_with_perturbation(model, data_loader, std_levels=[1e-3,5e-3,1e-2,5e-2,1e-1,5e-1,1], device="cpu"):
    accs = []
    for std in std_levels:
      model.to(device)
      #start validation
      model.eval()
      correct = 0
      total = 0
      with torch.no_grad():
        for x,y in data_loader:
          x,y = x.to(device), y.to(device)
          # add noise
          x = x + torch.randn_like(x) * std
          out = model(x)
          preds = (torch.sigmoid(out) > 0.5).float()
          correct += (preds == y).sum().item()
          total += y.shape[0]
      model.train()
      acc = correct / total
      accs.append(acc)
    return accs

In [80]:
accs = eval_with_perturbation(model, test_loader, device=device)

In [81]:
print(accs)

[0.8985165205664194, 0.8961564396493594, 0.8958192852326365, 0.8924477410654079, 0.8739042481456507, 0.7073499662845584, 0.3509777478084963]


# Sensetivity Analysis Towards Hyperparameters
We evaluate the sensitivity towards hyperparameters. We execute every experiment 3 times and report the average

In [None]:
train_loader, val_loader, test_loader = get_dataloaders("/content/drive/MyDrive/ptbdb",preprocessed_data_path="/content/drive/MyDrive/ptbdb/preprocessed_data.pt", train_ratio=0.6, val_ratio=0.1)

## Kernel Size
We evaluate different learning rates $lr = \{ 10^{-2}, 10^{-3}, 10^{-4}\}$

In [None]:
epochs = 20
lr = 1e-3
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
lrs = [0.01,0.001,0.0001]
accs = []
for lr in lrs:
  model = CNN_1D()
  optimizer = optim.Adam(model.parameters(), lr=lr)
  criterion = nn.BCELoss()
  losses = train(model, criterion, optimizer, train_loader, val_loader ,epochs, device, return_loss=True)
  acc=eval(model, test_loader, device)
  print("Fianl acc on test is: ",acc )
  accs.append(acc)

## Dilation
We performance when changing dilation

In [None]:
dilations = []
accs = []
for lr in lrs:
  model = CNN_1D()
  optimizer = optim.Adam(model.parameters(), lr=lr)
  criterion = nn.BCELoss()
  losses = train(model, criterion, optimizer, train_loader, val_loader ,epochs, device, return_loss=True)
  acc=eval(model, test_loader, device)
  print("Fianl acc on test is: ",acc )
  accs.append(acc)

## Attention Heads
We evaluate the performance when changing the number of attention heads

In [None]:
attention_heads = []
accs = []
for lr in lrs:
  model = CNN_1D()
  optimizer = optim.Adam(model.parameters(), lr=lr)
  criterion = nn.BCELoss()
  losses = train(model, criterion, optimizer, train_loader, val_loader ,epochs, device, return_loss=True)
  acc=eval(model, test_loader, device)
  print("Fianl acc on test is: ",acc )
  accs.append(acc)

## Stride
We investigate different Kernel Size / Stride combinations

In [None]:
kernel_size_stride_pairs = []
accs = []
for lr in lrs:
  model = CNN_1D()
  optimizer = optim.Adam(model.parameters(), lr=lr)
  criterion = nn.BCELoss()
  losses = train(model, criterion, optimizer, train_loader, val_loader ,epochs, device, return_loss=True)
  acc=eval(model, test_loader, device)
  print("Fianl acc on test is: ",acc )
  accs.append(acc)