## Set up dependencies, etc.

In [None]:
!pip install d2l==1.0.3
!pip install torchinfo

In [None]:
import torch
import torchvision
import numpy as np
import os
import random
from torch import nn
from d2l import torch as d2l
from torchinfo import summary

## Set random seed to ensure reproducibility

**TODO**: This doesn't seem to be working properly.

In [None]:
def set_seed(seed):
    """Ensures reproducibility"""
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.use_deterministic_algorithms(True)
    os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

In [None]:
SEED = 765 # ナムコプロ最強

## Model

We use the following CNN architecture structured similarly to architectures such as VGG (albeit much smaller, of course):

In [None]:
class CNN_FashionMNIST(d2l.Classifier):
    def __init__(self, lr=1e-4, num_classes=10):
        super().__init__()
        self.save_hyperparameters()
        self.net = nn.Sequential(
            nn.LazyConv2d(64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.LazyConv2d(96, kernel_size=3, padding=1),
            nn.BatchNorm2d(96),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.LazyConv2d(128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Flatten(),
            
            nn.LazyLinear(110),
            nn.ReLU(),
            nn.Dropout(0.6),
            nn.LazyLinear(10)
        )

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr, weight_decay=0.001)

Note that we use Kaiming initialization over Xavier (Glorot) as this works better with ReLU:

In [None]:
def init_cnn(module):
    if isinstance(module, (nn.Linear, nn.Conv2d)):
        nn.init.kaiming_normal_(module.weight, nonlinearity='relu')

We instantiate the model and inspect its properties, e.g. parameter count:

In [None]:
set_seed(SEED) # Set for reproducibility

X = torch.randn(1, 1, 28, 28)
model = CNN_FashionMNIST(lr=5e-4)
model(X)
summary(model)

## Data augmentation

To allow the model to generalize better, we augment the data by applying affine transformations such as a horizontal flip.

In [None]:
set_seed(SEED) # Set for reproducibility

data = d2l.FashionMNIST(batch_size=128, resize=(28, 28))
trainer = d2l.Trainer(max_epochs=20, num_gpus=1)

def augment_transform(train):
    t = [transforms.Resize((28, 28)), transforms.ToTensor()]

    # Only do this during training, not testing
    if train:
        t.insert(1, transforms.RandomHorizontalFlip(p=0.5))
        
    return transforms.Compose(t)

## Training

In [None]:
set_seed(SEED) # Set for reproducibility

data.get_transform = augment_transform
model.apply_init([next(iter(data.get_dataloader(True)))[0]], init_cnn)
trainer.fit(model, data)

## Measure accuracy

In [None]:
set_seed(SEED) # Set for reproducibility

test_iter = data.get_dataloader(train=False)
model.eval()

metric = d2l.Accumulator(2) 

# Test on all batches (TODO: Ask Ms. if there's specific code we need to execute
# to measure accuracy consistently for all groups in our class)
for X, y in test_iter:
    X, y = X.to(d2l.try_gpu()), y.to(d2l.try_gpu())
    metric.add(d2l.accuracy(model(X), y), y.numel())

final_test_acc = metric[0] / metric[1]
print(f'Final Test Accuracy: {final_test_acc:.4f}')

Expected accuracy: 92.42%