In [1]:
# Mount gdrive into colab
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
!cp /content/drive/My\ Drive/Project\ files/CTdata1.zip /content

In [None]:
!unzip -q /content/CTdata1.zip

In [2]:
# Import statements
import os
import csv
import sys
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from scipy.io import loadmat
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import Normalizer

In [None]:
# Check GPU availability, use if present
%matplotlib inline

use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

print(use_cuda)

True


In [None]:
# Shuffle data and split into training and validation using holdout 

reference = pd.read_csv('CTdata1/reference_data1.csv')
reference = reference.dropna()
reference = reference[reference['0'] != 2.0]
shuffled = reference.sample(frac = 1.0)

train_csv = shuffled[:-5200]
val_csv = shuffled[-500:]
test_csv = shuffled[-3500:]

train_csv.to_csv('training_data1.csv')
val_csv.to_csv('validation_data1.csv')
test_csv.to_csv('test_data1.csv')

print(reference)

print(val_csv)
print(train_csv)

       A00001_1    0
0      A00001_2  0.0
1      A00001_3  0.0
2      A00002_1  0.0
3      A00002_2  0.0
4      A00002_3  0.0
...         ...  ...
24595  A08527_2  0.0
24596  A08527_3  0.0
24597  A08528_1  0.0
24598  A08528_2  0.0
24599  A08528_3  0.0

[17257 rows x 2 columns]
       A00001_1    0
11142  A03863_2  1.0
7650   A02649_1  1.0
13795  A04786_1  0.0
17922  A06207_3  0.0
9184   A03184_2  0.0
...         ...  ...
8652   A03000_2  0.0
20743  A07186_1  0.0
10461  A03626_3  0.0
4186   A01448_3  0.0
1426   A00492_3  0.0

[500 rows x 2 columns]
       A00001_1    0
16878  A05853_1  0.0
4010   A01387_2  0.0
4939   A01710_1  0.0
21363  A07400_3  0.0
14593  A05071_3  0.0
...         ...  ...
1747   A00604_3  0.0
20493  A07095_2  0.0
23423  A08115_2  0.0
3622   A01254_2  0.0
9523   A03301_3  0.0

[12057 rows x 2 columns]


In [3]:
!cp /content/drive/My\ Drive/Project\ files/*.csv /content/
train_csv = pd.read_csv('training_data1.csv')
val_csv = pd.read_csv('validation_data1.csv')
test_csv = pd.read_csv('test_data1.csv')

In [6]:
count_0 = len(train_csv[train_csv['0'] == 1]) + len(val_csv[val_csv['0'] == 1]) + len(test_csv[test_csv['0'] == 1])
print(count_0)

2180


In [None]:
!cp /content/*.csv /content/drive/My\ Drive/Project\ files/

In [None]:
class AFDataset(Dataset):
    """Atrial Fibrillation Chirplet Transformed Dataset"""
    
    def __init__(self, csv_file, root_dir):
        """
        Args:
            csv_file (string): Path to the csv file with labels.
            root_dir (string): Directory with all the signals
        """
        
        self.reference = pd.read_csv(csv_file)
        self.root_dir = root_dir
        
    def __len__(self):
        return len(self.reference)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        ECG_name = os.path.join(self.root_dir, self.reference.iloc[idx, 1])
        ECG = loadmat(ECG_name)['res']
        ECG = (ECG - ECG.mean(keepdims = True)) / np.sqrt(ECG.var(keepdims = True))
        ECG = torch.FloatTensor(ECG)
        ECG = ECG.reshape(1, ECG.shape[0],ECG.shape[1])
        label = np.array(self.reference.iloc[idx, 2].astype(int))
        
        return torch.FloatTensor(ECG), label

In [None]:
# Weights for sampler into network, fixes class imbalance

weights_train = np.array(train_csv['0'])
weights_val = np.array(val_csv['0'])

weight_normal_train = len(weights_train) / (float) (np.count_nonzero(weights_train == 0))
weight_af_train = len(weights_train) / (float) (np.count_nonzero(weights_train == 1))

weight_normal_val = len(weights_val) / (float) (np.count_nonzero(weights_val == 0))
weight_af_val = len(weights_val) / (float) (np.count_nonzero(weights_val == 1))

weights_train[weights_train == 0] = weight_normal_train
weights_train[weights_train == 1] = weight_af_train

weights_val[weights_val == 0] = weight_normal_val
weights_val[weights_val == 1] = weight_af_val

weights_train = torch.DoubleTensor(weights_train.astype('float32'))
weights_val = torch.DoubleTensor(weights_val.astype('float32'))

train_sampler = torch.utils.data.sampler.WeightedRandomSampler(weights_train, len(weights_train))
val_sampler = torch.utils.data.sampler.WeightedRandomSampler(weights_val, len(weights_val))

print(weight_normal_train, weight_af_train)

1.1428436018957346 8.000663570006635


In [None]:
# Parameters for CUDA
params = {'batch_size': 32,
        'num_workers': 30}

# Hyperparameters
MAX_EPOCHS = 60
ALPHA = 1e-5
REG = 0.01

In [None]:
# Generate training and validation datasets
train_set = AFDataset('training_data1.csv', 'CTdata1/')
train_generator = DataLoader(train_set, **params, sampler = train_sampler)

val_set = AFDataset('validation_data1.csv', 'CTdata1/')
val_generator = DataLoader(val_set, **params, sampler = val_sampler)

test_set = AFDataset('test_data1.csv', 'CTdata1/')
test_generator = DataLoader(test_set, **params)

In [None]:
plt.imshow(val_set[0][0][0])
print(val_set[0][0][0].shape)

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.bn4 = nn.BatchNorm2d(3)
        self.bn1 = nn.BatchNorm2d(5)
        self.bn2 = nn.BatchNorm2d(7)
        self.bn3 = nn.BatchNorm2d(9)
        self.conv4 = nn.Conv2d(1, 3, (30,512))
        self.conv1 = nn.Conv2d(3, 5, (20, 256))
        self.conv2 = nn.Conv2d(5, 7, (12, 128))
        self.conv3 = nn.Conv2d(7, 9, (8, 64))
        self.fc1 = nn.Linear(100 * 244 * 2, 500)
        self.fc2 = nn.Linear(500, 200)
        self.fc3 = nn.Linear(200, 3)
        self.do1 = nn.Dropout(p = 0.2)
        self.lstm_layer = nn.LSTM(input_size = 9 * 9, hidden_size = 100, num_layers = 1, batch_first = True, bidirectional = True)

    def forward(self, x):
        x = self.do1(self.bn4(F.relu(self.conv4(x))))
        x = self.do1(self.bn1(F.relu(self.conv1(x))))
        x = self.do1(self.bn2(F.relu(self.conv2(x))))
        x = self.do1(self.bn3(F.relu(self.conv3(x))))
        x = x.reshape(-1, 244 , 9 * 9)
        x, _ = self.lstm_layer(x)
        x = x.reshape(-1, 244 * 100 * 2)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(self.do1(x))
        x = self.fc3(self.do1(F.relu(x)))
        return x


net = Net()
net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=ALPHA, weight_decay = REG)

In [None]:
net.load_state_dict(torch.load('/content/drive/My Drive/Project files/ctdata1classifier.pt'))

In [None]:
def adjust_learning_rate(optimizer, lrd, epoch, schedule):
  if epoch in schedule:
    for param_group in optimizer.param_groups:
      print('lr decay from {} to {}'.format(param_group['lr'], param_group['lr'] * lrd))
      param_group['lr'] *= lrd

In [None]:
print('Running...')

train_acc = []
val_acc = []

# Loop over epochs
running_loss = 0.0
for epoch in range(MAX_EPOCHS):
    adjust_learning_rate(optimizer, 0.7, epoch, [10, 20, 30, 40])
    # Training
    correct = 0
    total = 0
    #torch.save(net.state_dict(), '/content/drive/My Drive/Project files/ctdata1classifier.pt')
    for i, (local_batch, local_labels) in enumerate(train_generator, 0):
        # Transfer to GPU
        local_batch, local_labels = local_batch.to(device), local_labels.to(device)

        # Model computations
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(local_batch)
        loss = criterion(outputs, local_labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print('[%d, %5d] loss: %.3f' %
                (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0
        
        _, predicted = torch.max(outputs.data, 1)
        total += local_labels.size(0)
        correct += (predicted == local_labels).sum().item()

    train_acc.append(correct / total)
    print("Train acc : %d %%" % (100 * correct / total))

    # Validation
    correct = 0
    total = 0
    with torch.no_grad():
        for local_batch, local_labels in val_generator:
          # Transfer to GPU
          local_batch, local_labels = local_batch.to(device), local_labels.to(device)

          # Model computations
          outputs = net(local_batch)
          _, predicted = torch.max(outputs.data, 1)
          total += local_labels.size(0)
          correct += (predicted == local_labels).sum().item()
        val_acc.append(correct / total)
        print("Val acc : %d %%" % (100 * correct / total))

In [None]:
# Confusion matrix generation

test_cm_gen = DataLoader(test_set, **params)

cm_test = np.array([[0, 0], [0, 0]])

with torch.set_grad_enabled(False):
  for local_batch, local_labels in test_cm_gen:
    # Transfer to GPU
    local_batch, local_labels = local_batch.to(device), local_labels.to(device)

    # Model computations
    outputs = net(local_batch)
    _, predicted = torch.max(outputs.data, 1)
    for idx in range(len(local_batch)):
      x = local_labels[idx]
      y = predicted[idx]
      cm_test[x][y] += 1

print('\n \n', cm_test)

tn = cm_test[0][0]
fn = cm_test[1][0]
fp = cm_test[0][1]
tp = cm_test[1][1]

In [None]:
n = tn + tp + fn + fp
acc = (tn + tp) / n
f1 = 2 * tp / (2 * tp + fp + fn)
s = (tp + fn) / n
p = (tp + fp) / n
mcc = (tp / n - s * p) / math.sqrt(p * s * (1 - p) * (1 - s))

In [None]:
print(acc)
print(f1)
print(mcc)

In [None]:
plt.plot(train_acc, label = 'Training Accuracy')
plt.plot(val_acc, label = 'Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()