In [1]:
# Import
import torch
import torch.nn as nn
import torchvision
import scipy.io as sio
from torch.utils.data.dataset import Dataset
import glob
import pandas as pd
import numpy as np
from torch.utils.data.sampler import SubsetRandomSampler

In [2]:
# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
# Hyper Parameters
input_size = 1
hidden_size = 200
num_layers = 1
num_classes = 4
num_epoch = 10
batch_size = 50
learning_rate = 0.01


In [27]:
# Custom dataset
class ECGDataset(Dataset):
   def __init__(self, folder_path):
      """
      A dataset built off of ECG data in .mat form
      Args:
      folder_path (string): path to folder
      """
      # get image paths 
      image_list = glob.glob(folder_path+'*.mat')
      image_list.sort(key=lambda v: int(v[len("./QTDataset/ecg"):-4]))
      # get label paths
      label_list = glob.glob(folder_path+'*.csv')
      label_list.sort(key=lambda v: int(v[len("./QTDataset/ecg"):-4]))
      # Initialize the numpy arrays to store ecg and labels
      # Iterate over the labels and image_list (assume that they are of same length)
      for i in range(0, len(label_list)):
         # Load mat then convert to numpy
         ecgData = sio.loadmat(image_list[i])["ecgSignal"]
         ecgData = ecgData.flatten()
         ecgData = ecgData.astype(np.float32)
         ecgSize = ecgData.size
         trim = -1 * (ecgSize % 5000)
         # Reshape ecg into samples of 5000 ignoring whatever is left over
         ecgData = ecgData[: trim if trim != 0 else ecgSize].reshape(ecgSize // 5000, 5000)
         if i == 0:
            self.ecgs = ecgData
         else:
            np.concatenate((self.ecgs, ecgData), axis=0)
         # handle the labels
         labelData = pd.read_csv(label_list[i])
         labels = np.array([[1,0,0,0]] * ecgSize)
         # Encoders to convert P, T, QRS to expected output from model
         encoder = {
            'P': [0,1,0,0],
            'T': [0,0,1,0],
            'QRS':[0,0,0,1]
            }
         for _, row in labelData.iterrows():
            labels[range(row['ROILimits_1'], row['ROILimits_2']+1)] = encoder[row['Value']]
         labels = labels[:trim if trim != 0 else ecgSize].reshape(ecgSize // 5000, 5000, 4)
         if i == 0:
            self.labels = labels
         else:
            np.concatenate((self.labels, labels), axis=0)
   def __getitem__(self, index):
      return torch.from_numpy(self.ecgs[index]), torch.from_numpy(self.labels[index])
   def __len__(self):
      return len(self.ecgs)

In [2]:
# Load Data
dataset = ECGDataset("./QTDataset/")
validation_split = .3
shuffle_dataset = True
random_seed= 42

# Creating data indices for training and validation splits:
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
if shuffle_dataset :
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, 
                                           sampler=train_sampler)
validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                                sampler=valid_sampler) 

NameError: name 'ECGDataset' is not defined

In [31]:
# Model
class Classifier(nn.Module):
   def __init__(self, input_size, hidden_size, num_layers, num_classes):
      super(Classifier, self).__init__()
      self.hidden_size = hidden_size
      self.num_layers = num_layers
      self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first = True)
      self.fc = nn.Linear(hidden_size, num_classes)
      self.softmax = nn.Softmax()
   def forward(self, x):
      # Set initial hidden and cell states 
      h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
      c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
      data, _= self.lstm(x, (h0, c0))
      out = self.softmax(self.fc(data))
      return out

In [32]:
model = Classifier(input_size, hidden_size, num_layers, num_classes)

In [33]:
# Optimizers
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [1]:
# Train the model:
total_step = len(train_loader)
for epoch in range(num_epoch):
   for i, (samples, labels) in enumerate(train_loader):
      print(samples.size())
      samples = samples.reshape(-1, 5000, input_size).to(device)
      print(samples.size())
      labels = labels.to(device)
      
      # fwd pass
      outputs = model(samples)
      print(outputs.size())
      print(labels.size())
      # Main issue: Do I need a custom loss function?
      loss = criterion(outputs, labels)
      # Backwards
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      if (i+1) % 100 == 0:
         print("Epoch [{}/{}], Step[{}/{}] Loss: {:.4f}"
               .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
      

NameError: name 'train_loader' is not defined