In [1]:
import sys
from functools import partial

import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import wandb
from ignite.engine import (Engine, Events, create_supervised_evaluator,
                           create_supervised_trainer)
from ignite.handlers import ModelCheckpoint
from ignite.metrics import Accuracy, Loss
from scipy.io.arff import loadarff
from sklearn.model_selection import train_test_split
from torch import nn
from torch.functional import F
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler

sys.path.append('../')
from src.datasets import FordDataset
from src.models import LSTMClassification
from src.utils import build_optimizer, split_batch

In [9]:
n_splits = 1

config = {
    "model":{
        "encoder": {
            "d_model": n_splits,
            "nhead": n_splits // 2,
            "layer_norm_eps": 1e-3,
            "dropout": 0.5
        },
        "input_dim": 1,
        "hidden_dim": 100,
        "num_layers": 1,
        "fc":{
            "dim": 50,
            "dropout": 0.5
        }
    },
    "train":{
        "optimizer": torch.optim.Adam,
        "lr": 3e-4,
        "n_epoch": 10
    },
    "random_state": np.random.randint(0, 1000)
}

In [10]:
train_path = "../data/FordA/FordA_TRAIN.arff"
test_path = "../data/FordA/FordA_TEST.arff"

train_dataset = FordDataset(train_path)
test_dataset = FordDataset(test_path)

idx = np.arange(len(train_dataset))
idx_train, idx_val = train_test_split(idx, train_size=0.8, stratify=train_dataset.labels, random_state=config['random_state'])

train_sampler = SubsetRandomSampler(idx_train)
val_sampler = SubsetRandomSampler(idx_val)

train_dataloader = DataLoader(train_dataset, batch_size=128, sampler=train_sampler, collate_fn=partial(split_batch, n_splits=n_splits))
val_dataloader = DataLoader(train_dataset, batch_size=128, sampler=val_sampler, collate_fn=partial(split_batch, n_splits=n_splits))
test_dataloader = DataLoader(test_dataset, batch_size=64, collate_fn=partial(split_batch, n_splits=n_splits))

In [11]:
# Initialize your model
wandb.init(entity='ts-robustness', project='ml-course', config=config)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('device:',device)

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

device: cuda


In [12]:
model = LSTMClassification(config['model']).to(device)

In [13]:
# Initialize your optimizer and criterion
optimizer = build_optimizer(config, model)
criterion = nn.BCELoss()

def train_step(engine, batch):
    model.train()
    optimizer.zero_grad()
    x, y = batch[0].to(device), batch[1].to(device)
    y_pred = model(x)

    #y_pred = y_pred.float()
    #y = y.float()
    
    loss = criterion(y_pred, y.unsqueeze(1))
    loss.backward()
    optimizer.step()
    return loss.item()

trainer = Engine(train_step)

def validation_step(engine, batch):
    model.eval()
    with torch.no_grad():
        x, y = batch[0].to(device), batch[1].to(device)
        y_pred = model(x)
        return y_pred, y

train_evaluator = Engine(validation_step)
val_evaluator = Engine(validation_step)

# Attach metrics to the evaluators
metrics = {
    'accuracy': Accuracy(output_transform=lambda x: (x[0] > 0.5, x[1])),
    'loss': Loss(criterion, output_transform=lambda x: (x[0], x[1].unsqueeze(1)))
}

for name, metric in metrics.items():
    metric.attach(train_evaluator, name)

for name, metric in metrics.items():
    metric.attach(val_evaluator, name)


# checkpoint_handler = ModelCheckpoint(dirname='saved_models', filename_prefix='best',
#                                      n_saved=1, require_empty=False,
#                                      score_function=lambda engine: engine.state.metrics['accuracy'],
#                                      score_name="accuracy", global_step_transform=lambda *_: trainer.state.epoch)
# val_evaluator.add_event_handler(Events.EPOCH_COMPLETED, checkpoint_handler, {"model": model})


### Logging
@trainer.on(Events.ITERATION_COMPLETED)
def log_training_loss(trainer):
    batch_loss = trainer.state.output
    print("Training Results - Avg loss: {:.4f}".format(batch_loss))
    wandb.log({"train_loss": batch_loss})
    
@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(trainer):
    train_evaluator.run(train_dataloader)
    metrics = train_evaluator.state.metrics
    print("Training Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.4f}"
          .format(trainer.state.epoch, metrics['accuracy'], metrics['loss']))
    wandb.log({"train_accuracy": metrics['accuracy'],
               "train_loss": metrics['loss']})

@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(trainer):
    val_evaluator.run(val_dataloader)
    metrics = val_evaluator.state.metrics
    print("Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.4f}"
          .format(trainer.state.epoch, metrics['accuracy'], metrics['loss']))
    wandb.log({"val_accuracy": metrics['accuracy'],
               "val_loss": metrics['loss']})


# Run the training loop
trainer.run(train_dataloader, max_epochs=config['train']['n_epoch'])
wandb.finish()

Training Results - Avg loss: 0.6922
Training Results - Avg loss: 0.7112
Training Results - Avg loss: 0.6928
Training Results - Avg loss: 0.6935
Training Results - Avg loss: 0.7114
Training Results - Avg loss: 0.7230
Training Results - Avg loss: 0.7016
Training Results - Avg loss: 0.7105
Training Results - Avg loss: 0.7239
Training Results - Avg loss: 0.7129
Training Results - Avg loss: 0.6979
Training Results - Avg loss: 0.6831
Training Results - Avg loss: 0.7052
Training Results - Avg loss: 0.6776
Training Results - Avg loss: 0.6892
Training Results - Avg loss: 0.7019
Training Results - Avg loss: 0.7138
Training Results - Avg loss: 0.6986
Training Results - Avg loss: 0.6866
Training Results - Avg loss: 0.6918
Training Results - Avg loss: 0.7001
Training Results - Avg loss: 0.6920
Training Results - Avg loss: 0.7139
Training Results - Epoch: 1  Avg accuracy: 0.55 Avg loss: 0.6897
Validation Results - Epoch: 1  Avg accuracy: 0.53 Avg loss: 0.6918
Training Results - Avg loss: 0.6966
Trai

VBox(children=(Label(value='0.001 MB of 0.038 MB uploaded\r'), FloatProgress(value=0.03613724545431491, max=1.…

0,1
train_accuracy,▂▁▄▅▆▇▇▇██
train_loss,██▇▇▇███▇▇▇▇▇▆▇▆▆▅▅▄▅▇▅▅▄▄▄▅▃▄▃▄▂▃▅▄▂▂▃▁
val_accuracy,▁▁▃▅▇▇▇▇▇█
val_loss,██▇▅▃▂▁▁▁▁

0,1
train_accuracy,0.79375
train_loss,0.46444
val_accuracy,0.73509
val_loss,0.56754
