<a href="https://colab.research.google.com/github/kaiyiyu/MachineLearning/blob/main/FashionMNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#[SOURCE CODE] FashionMNIST.ipynb
Link to Colab: https://colab.research.google.com/drive/1-nsccEpKXelyqC0Wtc2QEgy7pnwMwjlv?usp=sharing

This code was adapted from the following template:
https://d2l.ai/chapter_convolutional-modern/resnet.html

In [None]:
!pip install d2l==v1.0.0-alpha1.post0

In [None]:
import time
import torch
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l

In [None]:
class Residual(nn.Module):
    """The Residual block of ResNet models."""
    def __init__(self, num_channels, use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.LazyConv2d(num_channels, kernel_size=3, padding=1,
                                   stride=strides)
        self.conv2 = nn.LazyConv2d(num_channels, kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.LazyConv2d(num_channels, kernel_size=1,
                                       stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.LazyBatchNorm2d()
        self.bn2 = nn.LazyBatchNorm2d()

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        Y += X
        return F.relu(Y)

In [None]:
blk = Residual(3)
X = torch.randn(4, 3, 6, 6)
blk(X).shape

In [None]:
blk = Residual(6, use_1x1conv=True, strides=2)
blk(X).shape

In [None]:
class ResNet(d2l.Classifier):
    def b1(self):
        return nn.Sequential(
            nn.LazyConv2d(64, kernel_size=7, stride=2, padding=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1), # Switch batch norm and max-pooling layer
            nn.BatchNorm2d(64)) # LazyBatchNorm to BatchNorm2D

In [None]:
@d2l.add_to_class(ResNet)
def block(self, num_residuals, num_channels, first_block=False):
    blk = []
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.append(Residual(num_channels, use_1x1conv=True, strides=2))
        else:
            blk.append(Residual(num_channels))
    return nn.Sequential(*blk)

In [None]:
@d2l.add_to_class(ResNet)
def __init__(self, arch, lr=0.1, weight_decay=0.0001, num_classes=10):
    super(ResNet, self).__init__()
    self.save_hyperparameters()
    self.net = nn.Sequential(self.b1())
    for i, b in enumerate(arch):
        self.net.add_module(f'b{i+2}', self.block(*b, first_block=(i==0)))
    self.net.add_module('last', nn.Sequential(
        nn.AdaptiveAvgPool2d((1, 1)),
        nn.Flatten(),
        nn.LazyLinear(num_classes)))
    self.net.apply(d2l.init_cnn)
    self.optimizer = torch.optim.SGD(self.parameters(), lr=lr, weight_decay=weight_decay) # add weight decay and SGD optimizer

In [None]:
class ResNet14(ResNet):
    def __init__(self, lr=0.1, weight_decay=0.0001, num_classes=10): # add weight decay
        super().__init__(((2, 64), (2, 128), (2, 256)), # change to resnet14
                       lr, weight_decay, num_classes)

In [None]:
model = ResNet14(lr=0.01, weight_decay=0.1)
trainer = d2l.Trainer(max_epochs=1, num_gpus=1)
data = d2l.FashionMNIST(batch_size=256, resize=(28, 28))
model.apply_init([next(iter(data.get_dataloader(True)))[0]], d2l.init_cnn)

# Initialize start time for calculating training time
start_time = time.time()

# Train model
trainer.fit(model, data)

# Evaluate accuracy on test dataset
test_accuracy = d2l.evaluate_accuracy_gpu(model, data.get_dataloader(False), None)

# Calculate total training time
total_time = time.time() - start_time

# Calculate average training time of one epoch
avg_time_epoch = total_time / trainer.max_epochs

# Print results
print(f"Accuracy on test dataset: {test_accuracy:.2%}")
print(f"Total training time: {total_time:.2f} seconds")
print(f"Average training time per epoch: {avg_time_epoch:.2f} seconds")