
# Task 2 — ResNet18 × CIFAR-10 (Follow HackMD Skeleton)

依照 **HackMD: Task-2-Code** 的格式實作：
- 檢查 CUDA
- 計算 mean/std
- 設定 **Data Augmentation** 與 **Normalization**
- 載入 **CIFAR-10**，切出 **Validation**
- **BasicBlock** 與 **ResNet18**
- Model Summary
- 訓練（含最佳模型存檔）、驗證、測試
- 繪出 **Training/Validation Loss & Accuracy**


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install torchsummary
!pip install thop

Collecting thop
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Installing collected packages: thop
Successfully installed thop-0.1.1.post2209072238


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import argparse
import os
import numpy as np
from torch.utils.tensorboard import SummaryWriter
from thop import profile
from torchsummary import summary
from torch.utils.data import random_split, DataLoader

import torch.nn as nn
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
import time
import matplotlib.pyplot as plt

In [4]:
print(torch.cuda.is_available())

True


In [5]:
def get_mean_std(dataset, ratio=1):
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=int(len(dataset)*ratio), shuffle=True, num_workers=2)
    data = next(iter(dataloader))[0]
    mean = np.mean(data.numpy(), axis=(0,2,3))
    std = np.std(data.numpy(), axis=(0,2,3))
    return mean, std

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transforms.ToTensor())
test_dataset  = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transforms.ToTensor())

train_mean, train_std = get_mean_std(train_dataset)
test_mean, test_std   = train_mean, train_std

print("train_mean:", train_mean, "train_std:", train_std)
print("test_mean :", test_mean,  "test_std :", test_std)


100%|██████████| 170M/170M [00:13<00:00, 12.5MB/s]


train_mean: [0.49139905 0.4821573  0.4465301 ] train_std: [0.24703303 0.24348477 0.26158857]
test_mean : [0.49139905 0.4821573  0.4465301 ] test_std : [0.24703303 0.24348477 0.26158857]


In [6]:

##### data augmentation & normalization #####
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=train_mean, std=train_std),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=test_mean, std=test_std),
])


In [7]:
# Dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_ds  = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

image, label = trainset[0]
print("image shape: ", image.shape)

torch.manual_seed(43)
val_size = 5000
train_size = len(trainset) - val_size
train_ds, val_ds = random_split(trainset, [train_size, val_size])

print("train length: ", len(train_ds))
print("val length: ", len(val_ds))
print("test length: ", len(test_ds))

# Declare batch size
# 學生實作部分：Set batch size
BATCH_SIZE = 128

trainloader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=2, pin_memory=True)
valloader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)
testloader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)


image shape:  torch.Size([3, 32, 32])
train length:  45000
val length:  5000
test length:  10000


In [8]:
class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1   = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2   = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)), inplace=True)
        out = self.bn2(self.conv2(out))
        out = out + self.shortcut(x)
        out = F.relu(out, inplace=True)
        return out


In [9]:
class ResNet18(nn.Module):
    def __init__(self, num_classes=1000):
        super(ResNet18, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1   = nn.BatchNorm2d(64)

        self.layer1 = self._make_layer(BasicBlock, out_channels=64,  num_blocks=2, stride=1)
        self.layer2 = self._make_layer(BasicBlock, out_channels=128, num_blocks=2, stride=2)
        self.layer3 = self._make_layer(BasicBlock, out_channels=256, num_blocks=2, stride=2)
        self.layer4 = self._make_layer(BasicBlock, out_channels=512, num_blocks=2, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for s in strides:
            layers.append(block(self.in_channels, out_channels, s))
            self.in_channels = out_channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)), inplace=True)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = torch.flatten(out, 1)
        out = self.fc(out)
        return out


In [10]:
# Create SummaryWriter
writer = SummaryWriter("./tensorboard")
model = ResNet18(num_classes=10)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Print model summary
summary(model, (3, 224, 224))

# Calculate FLOPs and Params
dummy_input = torch.randn(1, 3, 32, 32).to(device)
flops, params = profile(model, inputs=(dummy_input, ))
print(f"FLOPs: {flops/1e6:.2f} MFLOPs")
print(f"Params: {params/1e6:.2f} M")


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,728
       BatchNorm2d-2         [-1, 64, 224, 224]             128
            Conv2d-3         [-1, 64, 224, 224]          36,864
       BatchNorm2d-4         [-1, 64, 224, 224]             128
            Conv2d-5         [-1, 64, 224, 224]          36,864
       BatchNorm2d-6         [-1, 64, 224, 224]             128
        BasicBlock-7         [-1, 64, 224, 224]               0
            Conv2d-8         [-1, 64, 224, 224]          36,864
       BatchNorm2d-9         [-1, 64, 224, 224]             128
           Conv2d-10         [-1, 64, 224, 224]          36,864
      BatchNorm2d-11         [-1, 64, 224, 224]             128
       BasicBlock-12         [-1, 64, 224, 224]               0
           Conv2d-13        [-1, 128, 112, 112]          73,728
      BatchNorm2d-14        [-1, 128, 1

In [11]:
# Setting parameter
EPOCH     = 60
pre_epoch = 0
lr        = 0.1

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4, nesterov=True)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCH)

scaler = torch.amp.GradScaler('cuda', enabled=torch.cuda.is_available())

In [None]:
best_model_path = 'best_resnet18.pth'

train_losses, train_accuracies = [], []
val_losses,   val_accuracies   = [], []
best_val_acc = 0.0

def accuracy(output, target):
    pred = output.argmax(dim=1)
    return (pred == target).float().mean().item()

for epoch in range(pre_epoch, EPOCH):
    t0 = time.time()
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast('cuda', enabled=torch.cuda.is_available()):
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item() * labels.size(0)
        correct += (outputs.argmax(dim=1) == labels).sum().item()
        total += labels.size(0)

    train_loss = running_loss / total
    train_acc  = correct / total

    model.eval()
    val_running_loss, val_correct, val_total = 0.0, 0, 0
    with torch.no_grad():
        for v_inputs, v_labels in valloader:
            v_inputs, v_labels = v_inputs.to(device), v_labels.to(device)
            v_outputs = model(v_inputs)
            v_loss = criterion(v_outputs, v_labels)
            val_running_loss += v_loss.item() * v_labels.size(0)
            val_correct += (v_outputs.argmax(dim=1) == v_labels).sum().item()
            val_total += v_labels.size(0)

    val_loss = val_running_loss / val_total
    val_acc  = val_correct / val_total

    train_losses.append(train_loss); train_accuracies.append(train_acc)
    val_losses.append(val_loss);     val_accuracies.append(val_acc)

    # 儲存最佳模型
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), best_model_path)

    scheduler.step()
    t1 = time.time()

    print(f"Epoch [{epoch+1}/{EPOCH}] "
          f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc*100:.2f}% | "
          f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc*100:.2f}% | "
          f"Time: {t1 - t0:.1f}s")

plt.figure(figsize=(7,5))
plt.plot(range(1, EPOCH+1), train_losses, label="Train Loss")
plt.plot(range(1, EPOCH+1), val_losses,   label="Val Loss")
plt.xlabel("Epoch"); plt.ylabel("Loss"); plt.legend(); plt.title("Loss")
plt.tight_layout(); plt.show()

plt.figure(figsize=(7,5))
plt.plot(range(1, EPOCH+1), [x*100 for x in train_accuracies], label="Train Acc")
plt.plot(range(1, EPOCH+1), [x*100 for x in val_accuracies],   label="Val Acc")
plt.xlabel("Epoch"); plt.ylabel("Accuracy (%)"); plt.legend(); plt.title("Accuracy")
plt.tight_layout(); plt.show()


Epoch [1/60] Train Loss: 2.0161 | Train Acc: 27.38% | Val Loss: 1.7418 | Val Acc: 36.62% | Time: 41.5s
Epoch [2/60] Train Loss: 1.4637 | Train Acc: 46.24% | Val Loss: 1.2613 | Val Acc: 53.84% | Time: 41.8s
Epoch [3/60] Train Loss: 1.0880 | Train Acc: 61.17% | Val Loss: 1.1046 | Val Acc: 61.44% | Time: 41.5s
Epoch [4/60] Train Loss: 0.8464 | Train Acc: 70.08% | Val Loss: 1.0102 | Val Acc: 64.34% | Time: 42.7s
Epoch [5/60] Train Loss: 0.7105 | Train Acc: 75.07% | Val Loss: 0.8432 | Val Acc: 70.68% | Time: 41.2s


In [None]:
# Load the best model and evaluate on the test set
model.load_state_dict(torch.load(best_model_path, map_location=device))
model.eval()

test_correct, test_total = 0, 0
test_running_loss = 0.0
with torch.no_grad():
    for test_inputs, test_labels in testloader:
        test_inputs, test_labels = test_inputs.to(device), test_labels.to(device)
        test_outputs = model(test_inputs)
        _, test_predicted = test_outputs.max(1)
        test_total += test_labels.size(0)
        test_correct += test_predicted.eq(test_labels).sum().item()

        loss = criterion(test_outputs, test_labels)
        test_running_loss += loss.item() * test_labels.size(0)

test_accuracy = 100.0 * test_correct / test_total
test_loss = test_running_loss / test_total
print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.2f}%")
