# Assignment 1

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

device

device(type='cuda')

Define constants and training parameters:

In [None]:
DATASET_PATH = './data'

BATCH_SIZE = 1024
SHUFFLE_TRAIN = True

NUM_EPOCHS = 10

## Task 1

Train and compare the following models on the CIFAR-10 dataset (available in PyTorch). The models should have slightly optimized parameters to try to maximize their performance.

### Load dataset

In [5]:
train_dataset = datasets.CIFAR10(root=DATASET_PATH, train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.CIFAR10(root=DATASET_PATH, train=False, transform=transforms.ToTensor(), download=True)

100%|██████████| 170M/170M [02:13<00:00, 1.27MB/s] 


In [6]:
train_loader = torch.utils.data.DataLoader(
        dataset=train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=SHUFFLE_TRAIN
    )

eval_loader = torch.utils.data.DataLoader(
        dataset=test_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        # drop_last=True
    )

### MLP

In [None]:
LR = 1e-4

INPUT_DIM = np.prod(train_dataset.data.shape[-3:]) # 32*32*3
OUTPUT_DIM = len(set(train_dataset.targets)) # 10

hidden_dim = int((INPUT_DIM + OUTPUT_DIM) * 2/3) # 2/3 rule
print(f"input dim={INPUT_DIM}, output dim={OUTPUT_DIM}, hidden={hidden_dim}")

input dim=3072, output dim=10, hidden=2054


In [None]:
from mlp import MLP

mlp = MLP(INPUT_DIM, hidden_dim, OUTPUT_DIM)
mlp = mlp.to(device)

In [None]:
from train import train

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mlp.parameters(), lr=LR)

losses = train(mlp, NUM_EPOCHS, train_loader, device, criterion, optimizer)

Epoch 1 Iteration 49: loss 1.87118. : 100%|██████████| 49/49 [00:09<00:00,  5.40it/s]
Epoch 2 Iteration 49: loss 1.77854. : 100%|██████████| 49/49 [00:08<00:00,  5.54it/s]
Epoch 3 Iteration 49: loss 1.76290. : 100%|██████████| 49/49 [00:08<00:00,  5.70it/s]
Epoch 4 Iteration 49: loss 1.74665. : 100%|██████████| 49/49 [00:08<00:00,  5.61it/s]
Epoch 5 Iteration 49: loss 1.70097. : 100%|██████████| 49/49 [00:08<00:00,  5.67it/s]
Epoch 6 Iteration 49: loss 1.62019. : 100%|██████████| 49/49 [00:08<00:00,  5.65it/s]
Epoch 7 Iteration 49: loss 1.56460. : 100%|██████████| 49/49 [00:08<00:00,  5.49it/s]
Epoch 8 Iteration 49: loss 1.61896. : 100%|██████████| 49/49 [00:08<00:00,  5.54it/s]
Epoch 9 Iteration 49: loss 1.63349. : 100%|██████████| 49/49 [00:09<00:00,  5.44it/s]
Epoch 10 Iteration 49: loss 1.55644. : 100%|██████████| 49/49 [00:09<00:00,  5.41it/s]


In [None]:
from evaluate import test

acc = test(mlp, eval_loader, device)

In [54]:
from visualize import plot_learning_curve

plot_learning_curve(losses)

: 

### Simple CNN

In [None]:
# TODO

## Task 2 Analysis

  - Compare the performance of both models: investigate their learning curves (train/eval loss), compare their test accuracy, and compute their confusion matrix
  - Visualize some images, along with the predictions from your models. In what images do your models fail more often? Why do you think so?

## Task 3 Mitigate Overfitting

  - Train both models with and without DropOut. Compare the results: training and validation loss, accuracy, ...
  - Implement and use **your own** custom learning rate warmup and learning rate scheduler criterion to avoid overfitting.<br>
    It is not allowed to use PyTorch's schedulers, but you should code your own.<br>
    Train **one model** using it and show whether you can mitigate overfitting.