# Model Processing

## Imports & General Settings 

In [15]:
import unittest

from pycwt import Morlet
import matplotlib.pyplot as plt
import sklearn
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from torch import nn
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import trange, tqdm
import sklearn
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix, average_precision_score, roc_curve, roc_auc_score
from sklearn import metrics

# Our imports
from data import WaveletTransform, AFECGDataset, SecondDataset, WrapperDataset
import dsp
from model.blocks import ConvNet, BRNN, SoftmaxAttention
from model.baseline import Baseline
from training import train, test
import utils


%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
testCase = unittest.TestCase()
plt.rcParams.update({'font.size': 12})
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cpu


## Dataset creation

In [17]:
dataset_name='afdb'
# dataset = SecondDataset(dataset_name, '../data/afdb/')
dataset = SecondDataset(dataset_name, '../data/afdb/', wt=WaveletTransform(Morlet(6), resample=20))
class_weights = dataset.load('./backup')

Loaded 2300 samples from backup
torch.Size([2300])
torch.Size([2300, 20, 300])


### Dataset balancing

The dataset is augmented to contain a balanced number of positive and negative examples

In [18]:
heldout = int(len(dataset) * 0.2)
train_size = len(dataset) - heldout
true_examples = dataset.samples[dataset.labels == 1]
true_labels = dataset.labels[dataset.labels == 1]
false_examples = dataset.samples[dataset.labels == 0]
false_labels = dataset.labels[dataset.labels == 0]

true_train_size =  int(len(true_examples) * 0.8)
false_train_size = int(len(false_examples) * 0.8)

true_examples_train, true_examples_test = true_examples[:true_train_size], true_examples[true_train_size:]
true_labels_train, true_labels_test = true_labels[:true_train_size], true_labels[true_train_size:]

false_examples_train, false_examples_test = false_examples[:false_train_size], false_examples[false_train_size:]
false_labels_train, false_labels_test = false_labels[:false_train_size], false_labels[false_train_size:]

augmented_examples_train = []
augmented_labels_train = []
for i in range(len(false_examples_train) // len(true_examples_train)):
    augmented_examples_train.append(true_examples_train.clone())
    augmented_labels_train.append(torch.ones(len(true_examples_train)).long())

dataset.samples = torch.cat([torch.cat(augmented_examples_train), false_examples_train])
dataset.labels = torch.cat([torch.cat(augmented_labels_train), false_labels_train])

test_data = torch.cat([true_examples_test, false_examples_test])
test_labels = torch.cat([true_labels_test, false_labels_test])

train_dataset = dataset
test_dataset = WrapperDataset(test_data, test_labels)

In [19]:
# print(train_dataset.labels.shape)
# print(train_dataset.samples.shape)
print(test_dataset.labels[test_dataset.labels == 1].shape)
print(test_dataset.samples[test_dataset.labels == 1].shape)

torch.Size([4])
torch.Size([4, 20, 300])


In [20]:
# Add channel (single) to dataset
# train_dataset.samples = train_dataset.samples.unsqueeze(1).float()

In [21]:
# test_dataset.samples = test_dataset.samples.unsqueeze(1).float()

## Training

In [26]:
# model = Baseline(add_brnn=True, hidden_size=100)

# model = nn.Sequential(
#     ConvNet(size=(375, 20), batch=False),
#     nn.ReLU()
#     nn.Linear(50, 2)
# )

class OneDNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.m1 = nn.Sequential(
            nn.Conv1d(1, 10, kernel_size=21),
            nn.ReLU()
        )
        self.fc = nn.Sequential(
        nn.Linear(74800, 100),
            nn.ReLU(),
            nn.Linear(100, 2)
        )

    def forward(self, X):
        X = X.unsqueeze(1)
        out = self.m1(X)
        out = out.flatten(start_dim=1)
        return self.fc(out)
        
model = OneDNetwork()

config = dict(
    num_workers=8,
    batch_size=90,
    learning_rate=0.01,
    weight_decay=0.01,
    class_weights=None,
    num_epochs=5,
    is_notebook=True
)

train(model, train_dataset, config)

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=5.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Iteration', max=41.0, style=ProgressStyle(description_wid…





RuntimeError: Expected 4-dimensional input for 4-dimensional weight 10 1 3 21, but got 3-dimensional input of size [90, 1, 300] instead

## Testing

In [None]:
y_true = test_dataset.labels
y_pred, test_acc = test(model, test_dataset, config)

In [None]:
results = pd.DataFrame(classification_report(y_true, y_pred, zero_division=0, output_dict=True)).transpose()
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
specificity = tn / (tn+fp)
fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred)
auc_score = metrics.auc(fpr, tpr)

In [None]:
display(results)
print("Specificity:", specificity)
print("AUC:", auc_score)

In [None]:
lr_auc = roc_auc_score(y_true, y_pred)
print('ROC AUC=%.3f' % (lr_auc))
lr_fpr, lr_tpr, _ = roc_curve(y_true, y_pred)
plt.plot(lr_fpr, lr_tpr, marker='.', label='Baseline model')

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend()
plt.show()

In [None]:
pr_auc = average_precision_score(y_true, y_pred)
print("PR AUC:", specificity)