<a href="https://colab.research.google.com/github/kaiyiyu/MachineLearning/blob/main/CIFAR10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

The following additional libraries are needed to run this
notebook. Note that running on Colab is experimental, please report a Github
issue if you have any problem.

In [None]:
!pip install d2l==v1.0.0-alpha1.post0

In [None]:
import time
import torch
import torchvision
from torch import nn
from torch.nn import functional as F
from torchvision import datasets, transforms
from d2l import torch as d2l

In [None]:
class Residual(nn.Module):
    """The Residual block of ResNet models."""
    def __init__(self, num_channels, use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.LazyConv2d(num_channels, kernel_size=3, padding=1,
                                   stride=strides)
        self.conv2 = nn.LazyConv2d(num_channels, kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.LazyConv2d(num_channels, kernel_size=1,
                                       stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.LazyBatchNorm2d()
        self.bn2 = nn.LazyBatchNorm2d()

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        Y += X
        return F.relu(Y)

In [None]:
blk = Residual(3)
X = torch.randn(4, 3, 6, 6)
blk(X).shape

In [None]:
blk = Residual(6, use_1x1conv=True, strides=2)
blk(X).shape

In [None]:
class ResNet(d2l.Classifier):
    def b1(self):
        return nn.Sequential(
            nn.LazyConv2d(16, kernel_size=3, stride=1, padding=1),
            nn.LazyBatchNorm2d(),
            nn.ReLU(),
            nn.LazyConv2d(16, kernel_size=3, stride=1, padding=1),
            nn.LazyBatchNorm2d(),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

In [None]:
@d2l.add_to_class(ResNet)
def block(self, num_residuals, num_channels, first_block=False):
    blk = []
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.append(Residual(num_channels, use_1x1conv=True, strides=2))
        else:
            blk.append(Residual(num_channels))
    return nn.Sequential(*blk)

In [None]:
@d2l.add_to_class(ResNet)
def __init__(self, arch, lr=0.1, weight_decay=0.0001, num_classes=10):
    super(ResNet, self).__init__()
    self.save_hyperparameters()
    self.net = nn.Sequential(self.b1())
    for i, b in enumerate(arch):
        self.net.add_module(f'b{i+2}', self.block(*b, first_block=(i==0)))
    self.net.add_module('last', nn.Sequential(
        nn.AdaptiveAvgPool2d((1, 1)),
        nn.Flatten(),
        nn.LazyLinear(num_classes)))
    self.net.apply(d2l.init_cnn)

In [None]:
class ResNet20(ResNet):
    def __init__(self, lr=0.1, weight_decay=0.0001, num_classes=10):
        super().__init__(((3, 16), (3, 32), (3, 64)), lr, weight_decay, num_classes)

In [None]:
ResNet20().layer_summary((1, 1, 96, 96))

In [None]:
def train_cnn(model, train_loader, test_loader, num_epochs, lr=0.1, wd=0.0):
    device = torch.device('cuda' if torch.cuda.is_available() else 'gpu')
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    for epoch in range(num_epochs):
        train_loss, train_acc, n = 0.0, 0.0, 0
        model.train()
        for X, y in train_loader:
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()
            y_hat = model(X)
            loss = F.cross_entropy(y_hat, y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * y.size(0)
            train_acc += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.size(0)
        train_loss /= n
        train_acc /= n
        test_acc = d2l.evaluate_accuracy_gpu(model, test_loader)
        print(f"Epoch {epoch + 1}, train loss: {train_loss:.4f}, train acc: {train_acc:.4f}, test acc: {test_acc:.4f}")


# Define the model
model = ResNet20(lr=1, weight_decay=0.001)

# Define the data transformation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

# Load the data
train_data = datasets.CIFAR10(root='data', train=True, transform=transform, download=True)
test_data = datasets.CIFAR10(root='data', train=False, transform=transform, download=True)

# Create the data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=512, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=512, shuffle=False)

# Initialize the model parameters
model.apply(d2l.init_cnn)

# Train the model
start_time = time.time()
train_cnn(model, train_loader, test_loader, num_epochs=15, lr=0.01, wd=0.0001)
end_time = time.time()

# Calculate the total training time
total_time = end_time - start_time

# Evaluate the model on the test dataset
test_accuracy = d2l.evaluate_accuracy_gpu(model, test_loader)

# Calculate average training time of one epoch
avg_time_epoch = total_time / 5

# Print results
print(f"Accuracy on test dataset: {test_accuracy:.2%}")
print(f"Total training time: {total_time:.2f} seconds")
print(f"Average training time per epoch: {avg_time_epoch:.2f} seconds")