In [5]:
import sys

import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import wandb
from ignite.engine import (Engine, Events, create_supervised_evaluator,
                           create_supervised_trainer)
from ignite.metrics import Accuracy, Loss
from scipy.io.arff import loadarff
from sklearn.model_selection import train_test_split
from torch import nn
from torch.functional import F
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler

sys.path.append('../')
from src.datasets import FordDataset
from src.models import LSTMClassification, TransformerClassification
from src.utils import build_optimizer, str2torch
from torch.optim import Adam, AdamW, SGD
from src.train import train

In [18]:
train_path = "./FordA_TRAIN.arff"
test_path = "./FordA_TEST.arff"

train_dataset = FordDataset(train_path, config['data'])
test_dataset = FordDataset(test_path, config['data'])

idx = np.arange(len(train_dataset))
idx_train, idx_val = train_test_split(idx, train_size=0.8, stratify=train_dataset.labels, random_state=config['random_state'])

train_sampler = SubsetRandomSampler(idx)
val_sampler = SubsetRandomSampler(idx_val)

train_dataloader = DataLoader(train_dataset, batch_size=config['data']['batch_size'], sampler=train_sampler)
val_dataloader = DataLoader(train_dataset, batch_size=config['data']['batch_size'], sampler=val_sampler)
test_dataloader = DataLoader(test_dataset, batch_size=64)

In [9]:
class CNNClassification(nn.Module):

    def __init__(self, input_dim, hidden_dim, target_size=2):
        super(CNNClassification, self).__init__()

        self.fc = nn.Sequential(
            nn.Conv1d(input_dim, hidden_dim, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),

            nn.Conv1d(hidden_dim, hidden_dim, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),

            nn.Flatten(),
            nn.Linear(3700, 256),
            nn.ReLU(),
            nn.Linear(256, target_size),
        )

    def forward(self, input_):
        if len(input_.shape) == 2:
            input_ = input_.unsqueeze(2)
        input_ = input_.permute(0, 2, 1)
        logits = self.fc(input_)
        return logits

In [10]:
a = next(iter(train_dataloader))[0]

In [11]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = CNNClassification(1, 100, 2).to(device)
model(a.to(device)).shape

torch.Size([256, 2])

In [13]:
config = {
  "data": {
    "step": 50,
    "batch_size": 256,
    "seq_length": 150
  },
  "model": {
    "fc": {
      "input_dim": 500,
      "hidden_dim": 100,
      "target_size": 2
    }
  },
  "train": {
    "lr": 0.0006345611256400737,
    "n_epoch": 30,
    "optimizer": "adam",
    "scheduler_config": {
      "type": None
    }
  },
  "random_state": 42
}

In [15]:
# Initialize your model
wandb.init(entity='ts-robustness', project='ml-course', config=config, tags=['hypersearch'])

device = 'cuda' if torch.cuda.is_available() else 'cpu'
config['train']['optimizer'] = str2torch(config['train']['optimizer'])

model = CNNClassification(1, 100, 2).to(device)

# Initialize your optimizer and criterion
optimizer = build_optimizer(config, model)
criterion = nn.CrossEntropyLoss()

def train_step(engine, batch):
    model.train()
    optimizer.zero_grad()
    x, y = batch[0].to(device), batch[1].to(device)
    y_pred = model(x)
    loss = criterion(y_pred, y.long())
    loss.backward()
    optimizer.step()
    return loss.item()

trainer = Engine(train_step)

def validation_step(engine, batch):
    model.eval()
    with torch.no_grad():
        x, y = batch[0].to(device), batch[1].to(device)
        y_pred = model(x)
        return y_pred, y

train_evaluator = Engine(validation_step)
val_evaluator = Engine(validation_step)
test_evaluator = Engine(validation_step)

# Attach metrics to the evaluators
metrics = {
    'accuracy': Accuracy(output_transform=lambda x: (torch.argmax(x[0], dim=1), x[1])),
    'loss': Loss(criterion, output_transform=lambda x: (x[0], x[1].long()))
}

for name, metric in metrics.items():
    metric.attach(train_evaluator, name)
    metric.attach(val_evaluator, name)
    metric.attach(test_evaluator, name)


@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(trainer):
    train_evaluator.run(train_dataloader)
    metrics = train_evaluator.state.metrics
    print("Training Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.4f}"
          .format(trainer.state.epoch, metrics['accuracy'], metrics['loss']))
    wandb.log({"train_accuracy": metrics['accuracy'],
               "train_loss": metrics['loss']})

@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(trainer):
    val_evaluator.run(val_dataloader)
    metrics = val_evaluator.state.metrics
    print("Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.4f}"
          .format(trainer.state.epoch, metrics['accuracy'], metrics['loss']))
    wandb.log({"val_accuracy": metrics['accuracy'],
               "val_loss": metrics['loss']})

@trainer.on(Events.COMPLETED)
def log_test_results(trainer):
    test_evaluator.run(test_dataloader)
    metrics = test_evaluator.state.metrics
    print("Test Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.4f}"
          .format(trainer.state.epoch, metrics['accuracy'], metrics['loss']))
    wandb.log({"test_accuracy": metrics['accuracy'],
               "test_loss": metrics['loss']})


# Run the training loop
trainer.run(train_dataloader, max_epochs=config['train']['n_epoch'])
wandb.finish()

Training Results - Epoch: 1  Avg accuracy: 0.72 Avg loss: 0.5248
Validation Results - Epoch: 1  Avg accuracy: 0.73 Avg loss: 0.5212
Training Results - Epoch: 2  Avg accuracy: 0.85 Avg loss: 0.3470
Validation Results - Epoch: 2  Avg accuracy: 0.85 Avg loss: 0.3430
Training Results - Epoch: 3  Avg accuracy: 0.79 Avg loss: 0.4585
Validation Results - Epoch: 3  Avg accuracy: 0.78 Avg loss: 0.4617
Training Results - Epoch: 4  Avg accuracy: 0.85 Avg loss: 0.3347
Validation Results - Epoch: 4  Avg accuracy: 0.85 Avg loss: 0.3301
Training Results - Epoch: 5  Avg accuracy: 0.88 Avg loss: 0.2818
Validation Results - Epoch: 5  Avg accuracy: 0.88 Avg loss: 0.2849
Training Results - Epoch: 6  Avg accuracy: 0.85 Avg loss: 0.3291
Validation Results - Epoch: 6  Avg accuracy: 0.85 Avg loss: 0.3386
Training Results - Epoch: 7  Avg accuracy: 0.86 Avg loss: 0.3070
Validation Results - Epoch: 7  Avg accuracy: 0.85 Avg loss: 0.3241
Training Results - Epoch: 8  Avg accuracy: 0.89 Avg loss: 0.2581
Validation 

VBox(children=(Label(value='0.109 MB of 0.109 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
test_accuracy,▁
test_loss,▁
train_accuracy,▁▅▃▅▆▅▅▆▆▆▆▆▆▆▆▆▇▆▆▇▇▇▇███████
train_loss,█▅▇▅▄▅▄▄▃▃▃▄▃▃▃▃▂▃▃▃▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▆▃▆█▆▇████▇███▇█▇▇▇██▇█▇▇▇▇▇▇
val_loss,█▃▆▃▁▃▃▁▁▁▁▂▁▁▂▃▁▃▃▃▂▂▃▂▃▃▄▃▄▄

0,1
test_accuracy,0.86733
test_loss,0.4178
train_accuracy,0.96659
train_loss,0.09325
val_accuracy,0.87018
val_loss,0.38872


In [23]:
torch.save(model.state_dict(), 'cnn.pth')