In [1]:
import torch
import pandas as pd
import numpy as np
from torch import nn
from torch.nn import functional as F
from torch.utils.data import TensorDataset, DataLoader
from torch.optim.lr_scheduler import _LRScheduler
import time

In [2]:
# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True
torch.multiprocessing.set_start_method('spawn')

In [3]:
path = "/media/tuan/NGUI GA/ECG_split/" 

In [4]:
from sklearn.preprocessing import StandardScaler
def get_np_arrays(file_name):
    scaler = StandardScaler()
    arr = pd.read_csv(path + file_name)['# ECG Channel 1'].values.reshape(-1, 1)
    
    seq_len = arr.shape[0]
    
    result = np.zeros([76800, 1])
    result[:arr.shape[0],:arr.shape[1]] = arr
    
    scaler.fit(result)
    result = scaler.transform(result)
    return torch.from_numpy(result).float().to(device)

In [5]:
get_np_arrays('1.csv')

tensor([[-0.2610],
        [-0.2744],
        [-0.3148],
        ...,
        [ 0.2371],
        [ 0.0890],
        [-0.0725]], device='cuda:0')

In [6]:
df = pd.read_csv('mapped_df.csv')

In [7]:
test_df = df[df['data_type'] == 'test']
train_df = df[df['data_type'] == 'train']
val_df = df[df['data_type'] == 'val']

In [8]:
X_train_filenames = train_df['file'].values.tolist()
y_train = train_df['class'].values.tolist()

X_val_filenames = val_df['file'].values.tolist()
y_val = val_df['class'].values.tolist()

In [9]:
class Dataset(torch.utils.data.Dataset):
    'Characterizes a dataset for PyTorch'

    def __init__(self, series_IDs, labels):
        'Initialization'
        self.labels = labels
        self.series_IDs = series_IDs

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.series_IDs)

    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        ID = self.series_IDs[index]

        # Load data and get label
        X = get_np_arrays(ID)
        y = self.labels[index]

        return X, y

In [10]:
# Parameters
params = {'batch_size': 64,
          'shuffle': True,
          'num_workers': 0}
max_epochs = 100

In [11]:
# Generators
training_set = Dataset(X_train_filenames, y_train)
training_generator = torch.utils.data.DataLoader(training_set, **params)

In [12]:
validation_set = Dataset(X_val_filenames, y_val)
validation_generator = torch.utils.data.DataLoader(validation_set, **params)

In [13]:
class LinearBaseline(nn.Module):
    """A PyTorch implementation of the Linear Baseline
    From https://arxiv.org/abs/1909.04939

    Attributes
    ----------
    sequence_length:
        The size of the input sequence
    num_pred_classes:
        The number of output classes
    """

    def __init__(self, num_inputs: int, num_pred_classes: int = 1) -> None:
        super().__init__()

        # for easier saving and loading
        self.input_args = {
            'num_inputs': num_inputs,
            'num_pred_classes': num_pred_classes
        }

        self.layers = nn.Sequential(
            nn.Dropout(0.1),
            LinearBlock(num_inputs, 500, 0.2),
            LinearBlock(500, 500, 0.2),
            LinearBlock(500, num_pred_classes, 0.3),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:  # type: ignore
        return self.layers(x.view(x.shape[0], -1))


class LinearBlock(nn.Module):

    def __init__(self, input_size: int, output_size: int,
                 dropout: float) -> None:
        super().__init__()

        self.layers = nn.Sequential(
            nn.Linear(input_size, output_size),
            nn.ReLU(),
            nn.Dropout(p=dropout)
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:  # type: ignore

        return self.layers(x)

In [14]:
class LSTMClassifier(nn.Module):
    """Very simple implementation of LSTM-based time-series classifier."""
    
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.rnn = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.batch_size = 64
        self.hidden = None
    
    def forward(self, x):
        h0, c0 = self.init_hidden(x)
        out, (hn, cn) = self.rnn(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out
    
    def init_hidden(self, x):
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
        c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
        return [t.cuda() for t in (h0, c0)]

In [25]:
classes = (0, 1)
input_dim = 1    
hidden_dim = 256
layer_dim = 1
output_dim = 2
seq_dim = 128

model = LSTMClassifier(1, 16, 2, 2)
model.to(device)
model.train()

LSTMClassifier(
  (rnn): LSTM(1, 16, num_layers=2, batch_first=True)
  (fc): Linear(in_features=16, out_features=2, bias=True)
)

In [26]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [27]:
max_epochs = 1

In [28]:
def validation(model):
    correct = 0
    total = 0
    # Validation
    with torch.set_grad_enabled(False):
        for i, data in enumerate(validation_generator, 0):
            # Transfer to GPU
            local_batch, local_labels = data[0].cuda(), data[1].cuda()

            with torch.no_grad():
                outputs = model(local_batch)
                _, predicted = torch.max(outputs.data, 1)
                total += local_labels.size(0)
                correct += (predicted == local_labels).sum().item()

    print('Accuracy of the network on the 10000 test images: %d %%' % (
                    100 * correct / total))
    return correct / total

In [None]:
# Loop over epochs
# infer_time = 0
start_time = time.time()
best_accuracy = -1


for epoch in range(max_epochs):
    # Training
    print(f"Current epoch: {epoch}")
    running_loss = 0.0
    for i, data in enumerate(training_generator, 0):
        model.train()
        print(f"Current i: {i}")
        # Transfer to GPU
        local_batch, local_labels = data[0].cuda(), data[1].cuda()
        
        # Model computations
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        start_time = time.time()
        
        outputs = model(local_batch)
        
#         forward_time = time.time() - start_time
#         infer_time += forward_time
        loss = criterion(outputs, local_labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 5 == 4:    # print every 200 mini-batches
            elapsed_time = time.time() - start_time
            model.eval()
            with torch.no_grad():
                val_acc = validation(model)
                print(f"Validation accuracy: {val_acc}")
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0