In [126]:
import os
import numpy as np
import torch.nn as nn
from torch.utils.data import DataLoader
import torch
import matplotlib.pyplot as plt
import feather
import csv
import pandas
import copy

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load Data

In [178]:
X_train, X_valid, X_test = [], [], []

os.chdir('./Data/training')
directory = './'

for filename in os.listdir(directory):
    if 'csv' in filename:
        continue
    DF = feather.read_dataframe(filename)
    X_train.append(DF)
X_train = np.stack(X_train, axis=0)
y_train = pandas.read_csv('y_train.csv', sep=",", header=0,index_col=0).to_numpy().squeeze()
    
os.chdir('../valid')
for filename in os.listdir(directory):
    if 'csv' in filename:
        continue
    DF = feather.read_dataframe(filename)
    X_valid.append(DF)
X_valid = np.stack(X_valid, axis=0)
y_valid = pandas.read_csv('y_valid.csv', sep=",", header=0,index_col=0).to_numpy().squeeze()

os.chdir('../test')
for filename in os.listdir(directory):
    if 'csv' in filename:
        continue
    DF = feather.read_dataframe(filename)
    X_test.append(DF)
X_test = np.stack(X_test, axis=0)
y_test = pandas.read_csv('y_test.csv', sep=",", header=0,index_col=0).to_numpy().squeeze()
    
os.chdir('../../')

# Check data shape and setup dataloaders

In [179]:
print(f"Train data shape: {X_train.shape}")
print(f"Valid data shape:{X_valid.shape}")
print(f"Test data shape: {X_test.shape}")
print(f"Train target shape: {y_train.shape}")
print(f"Valid target shape: {y_valid.shape}")
print(f"Test target shape: {y_test.shape}")

class Dataset(torch.utils.data.Dataset):
    def __init__(self, X, y,  transform=None):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        self.X = torch.tensor(X, dtype=float, device=self.device)
        self.y = torch.tensor(y, dtype=torch.long, device=self.device)
        self.transform = transform

    # def trial_len(self):
    #     return self.X.shape[-1]

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        x = self.X[idx]
        y = self.y[idx]

        if self.transform:
            x = self.transform(x)
        return x.float(), y

batch_size = 1
    
train_dataset = Dataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size)

val_dataset = Dataset(X_valid, y_valid)
val_loader = DataLoader(val_dataset, len(X_valid))

test_dataset = Dataset(X_test, y_test)
test_loader = DataLoader(test_dataset, len(X_test))

dataloaders = [train_loader, val_loader, test_loader]

Train data shape: (94, 144, 10)
Valid data shape:(36, 144, 10)
Test data shape: (26, 144, 10)
Train target shape: (94,)
Valid target shape: (36,)
Test target shape: (26,)


# Setup Training

In [147]:
def train_one_epoch(model, opt, dataloader, loss_fn, curr_epoch, tb_writer=None):
    model.train(True)
    running_loss = 0.
    last_loss = 0.

    correct = 0
    for i, data in enumerate(dataloader):
        opt.zero_grad() # reset opt

        X, y = data
        outputs = model(X)

        loss = loss_fn(outputs, y)
        loss.backward() # backprop

        # grad step
        opt.step()

        #training acc
        pred = outputs.data.max(1, keepdim=True)[1]
        correct += torch.sum(pred.squeeze(1) == y)

        running_loss += loss.item()
        if i % 100 == 0:
            last_loss = loss.item() # loss per batch
            tb_x = curr_epoch * len(dataloader) + i + 1

            if tb_writer is not None:
                tb_writer.add_scalar('Loss/train', last_loss, tb_x)

            running_loss = 0

    final_acc = correct / len(dataloader.dataset)
    return last_loss, final_acc

def train_and_eval(model, opt, dataloaders, loss_fn, n_epochs, tb_writer=None, verbose=True):
    best_vloss = 1_000_000.

    train_loader, val_loader, test_loader = dataloaders

    best_test_acc = 0
    best_val_acc = 0
    for epoch in range(n_epochs):
        avg_loss, train_acc = train_one_epoch(model, opt, train_loader, loss_fn, epoch, tb_writer)

        avg_vloss, val_acc = eval(model, val_loader, loss_fn)

        avg_tloss, test_acc = eval(model, test_loader, loss_fn)

        if val_acc > best_val_acc:
            best_test_acc = test_acc
            best_val_acc = val_acc

        if verbose:
            print(f'Epoch: {epoch + 1} LOSS -- train: {avg_loss} valid: {avg_vloss}')
            print(f'Epoch: {epoch + 1} Acc -- train: {train_acc} valid: {val_acc} test: {test_acc}')

        if tb_writer is not None:
            # Log the running loss averaged per batch
            # for both training and validation
            tb_writer.add_scalars('Training vs. Validation vs. Testing Loss',
                            { 'Training' : avg_loss, 'Validation' : avg_vloss, "Testing": avg_tloss },
                            epoch + 1)

            tb_writer.add_scalars('Training vs. Validation vs. Testing Acc',
                            { 'Training' : train_acc, 'Validation' : val_acc, "Testing": test_acc },
                            epoch + 1)
            tb_writer.flush()

        # Track best performance, and save the model's state
        if avg_vloss < best_vloss:
            best_vloss = avg_vloss
            best_model_state = copy.deepcopy(model.state_dict())

    print(f"Training Finished. Best Val Acc: {best_val_acc} Best Test Acc: {best_test_acc}")
    return best_model_state, best_val_acc.item(), best_test_acc.item()

def eval(model, dataloader, loss_fn):
    running_loss = 0.0
    model.eval() # go to evaluation mode

    correct = 0
    # Disable gradient computation and reduce memory consumption.
    with torch.no_grad():
        for i, data in enumerate(dataloader):
            inputs, y = data
            outputs = model(inputs)
            loss = loss_fn(outputs, y)
            running_loss += loss

            pred = outputs.data.max(1, keepdim=True)[1]
            correct += torch.sum(pred.squeeze(1) == y)

    final_acc = correct / len(dataloader.dataset)

    avg_loss = running_loss / (i + 1)

    return avg_loss, final_acc

# Models 

In [180]:
class Model(nn.Module):

    def __init__(self, dropout=.4):
        super(Model, self).__init__()
        
        self.CNN = nn.Sequential(
            nn.Conv1d(in_channels=10, out_channels=10, kernel_size=2, padding='same'),
            nn.AvgPool1d(kernel_size=2,padding=0),
            nn.InstanceNorm1d(num_features = 10, eps=1e-05, momentum=0.2, affine=True),
            nn.ELU(inplace = True),
            nn.Dropout(p=dropout),
        )
        
        self.lstm = nn.LSTM(10, 10, 1, batch_first=True, dropout=dropout)

        self.fc = nn.Sequential(
            nn.Linear(10, 10),
            nn.InstanceNorm1d(num_features=10, eps=1e-05, momentum=0.2, affine=False),
            nn.ReLU(inplace = True),
            nn.Dropout(p=dropout),
            nn.Flatten(),
            nn.Linear(720, 2),
            nn.Softmax()
        )

    def forward(self, x, h=None):
        N, L, H = x.size() # (1, 144, 10)
        x = x.view(N, H, L) # reshape to (N, H, L) to fit CNN

        out= self.CNN(x) # output is (N, H, L/2)
        out = out.view(out.shape[0], out.size(2), out.size(1)) # reshape to (N, L, H)
        out = self.lstm(out)
        out = self.fc(out[0])

        return out

# Evaluate Models

In [181]:
LR      = 0.0001
BETAS   = (0.9, 0.999)
EPS     = 1e-08
DECAY   = 0.005
DROPOUT = 0

EPOCHS  = 50
CRITERION = nn.CrossEntropyLoss()

model = Model(dropout=DROPOUT).cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=LR, betas=BETAS, eps=EPS, weight_decay=DECAY)
best_model = train_and_eval(model, optimizer, dataloaders, CRITERION, EPOCHS)

  return self._call_impl(*args, **kwargs)


Epoch: 1 LOSS -- train: 0.6033260822296143 valid: 0.7203848361968994
Epoch: 1 Acc -- train: 0.5106382966041565 valid: 0.472222238779068 test: 0.5
Epoch: 2 LOSS -- train: 0.5355368852615356 valid: 0.7025401592254639
Epoch: 2 Acc -- train: 0.5106382966041565 valid: 0.3611111044883728 test: 0.5
Epoch: 3 LOSS -- train: 0.6471095681190491 valid: 0.7006485462188721
Epoch: 3 Acc -- train: 0.4999999701976776 valid: 0.5 test: 0.6153846383094788
Epoch: 4 LOSS -- train: 0.7018797397613525 valid: 0.701759934425354
Epoch: 4 Acc -- train: 0.44680848717689514 valid: 0.5277777910232544 test: 0.5384615659713745
Epoch: 5 LOSS -- train: 0.7277728319168091 valid: 0.7033064365386963
Epoch: 5 Acc -- train: 0.45744678378105164 valid: 0.5833333134651184 test: 0.5384615659713745
Epoch: 6 LOSS -- train: 0.7427035570144653 valid: 0.7050879001617432
Epoch: 6 Acc -- train: 0.4787233769893646 valid: 0.5833333134651184 test: 0.5
Epoch: 7 LOSS -- train: 0.7523221969604492 valid: 0.7068971991539001
Epoch: 7 Acc -- tra