In [1]:
### ResNet

### 残差块

作用：加速信息的流动，捕捉信息的细微波动。

理想映射为$f(x)$，残差映射$f(x)-x$，恒等映射$f(x) = x$.实际上，如果理想映射接近恒等映射时，即$f(x) \approx x$，那么下图中右边虚线框内的卷积块（残差映射）会将恒等映射的微小波动进行捕捉。

![](./5.11_residual-block.svg)

标准残差块的构成：

$ input  ---  conv  --  bn  -- relu  --  conv  --  bn  --+--  relu   \\
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ |   \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \  \  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \   | \\  
 \ \ \ \ \ \  \ \ \  \ \ \ \ \ \ |-------------------- $

In [9]:
### 构建Residual block

import time
import torch
from torch import nn, optim
import torch.nn.functional as F

import sys
sys.path.append('../d2lzh/')
import d2lzh_pytorch as d2l
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class Residual(nn.Module):
    def __init__(self, in_c, out_c, use_1x1conv=False, stride=1):
        super(Residual, self).__init__()
        self.conv1 = nn.Conv2d(in_c, out_c, kernel_size=3, padding=1, stride=stride)
        self.conv2 = nn.Conv2d(out_c, out_c, kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(in_c, out_c, kernel_size=1, stride=stride)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(out_c)
        self.bn2 = nn.BatchNorm2d(out_c)
        
    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        return F.relu(Y + X)

In [4]:
# 查看输出形状是否与输入形状一致
blk = Residual(3, 3)            # in_c, out_c
X = torch.rand((4, 3, 6, 6))
blk(X).shape

torch.Size([4, 3, 6, 6])

In [5]:
# 改变通道数，output的尺寸
blk = Residual(3, 6, use_1x1conv=True, stride=2)
blk(X).shape

torch.Size([4, 6, 3, 3])

In [6]:
### ResNet 模型

In [10]:
# 前两层（1个卷积层）
net = nn.Sequential(
    nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)

# 
def resnet_block(in_c, out_c, num_residuals, first_block=False):
    if first_block:
        assert in_c == out_c
    blk = []
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.append(Residual(in_c, out_c, use_1x1conv=True, stride=2))
        else:
            blk.append(Residual(out_c, out_c))
    return nn.Sequential(*blk)

# 这里有4×(2x2)=16个卷积层
net.add_module('resnet_block1', resnet_block(64, 64, 2, first_block=True))
net.add_module('resnet_block2', resnet_block(64, 128, 2))
net.add_module('resnet_block3', resnet_block(128, 256, 2))
net.add_module('resnet_block4', resnet_block(256, 512, 2))

# 1个卷积层
net.add_module('global_avg_pool', d2l.GlobalAvgPool2d())
net.add_module('fc', nn.Sequential(d2l.FlattenLayer(), nn.Linear(512, 10)))

## 共18个卷积层，故称为ResNet-18

In [11]:
# input的形状变化
X = torch.rand((1, 1, 224, 224))
for name, layer in net.named_children():
    X = layer(X)
    print(name, ' output shape: \t', X.shape)

0  output shape: 	 torch.Size([1, 64, 112, 112])
1  output shape: 	 torch.Size([1, 64, 112, 112])
2  output shape: 	 torch.Size([1, 64, 112, 112])
3  output shape: 	 torch.Size([1, 64, 56, 56])
resnet_block1  output shape: 	 torch.Size([1, 64, 56, 56])
resnet_block2  output shape: 	 torch.Size([1, 128, 28, 28])
resnet_block3  output shape: 	 torch.Size([1, 256, 14, 14])
resnet_block4  output shape: 	 torch.Size([1, 512, 7, 7])
global_avg_pool  output shape: 	 torch.Size([1, 512, 1, 1])
fc  output shape: 	 torch.Size([1, 10])


In [12]:
### 准备数据

In [13]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)

In [14]:
lr, num_epochs = 0.001, 10
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
d2l.train_ch5(net, train_iter, test_iter, batch_size,
             optimizer, device, num_epochs)

training on  cuda
epoch 1, loss 0.4073, train_acc 0.850, test acc 0.846, time 56.3 sec
epoch 2, loss 0.2474, train_acc 0.909, test acc 0.914, time 56.2 sec
epoch 3, loss 0.2073, train_acc 0.923, test acc 0.856, time 56.3 sec
epoch 4, loss 0.1803, train_acc 0.933, test acc 0.939, time 56.4 sec
epoch 5, loss 0.1556, train_acc 0.942, test acc 0.952, time 56.8 sec
epoch 6, loss 0.1349, train_acc 0.950, test acc 0.938, time 56.6 sec
epoch 7, loss 0.1163, train_acc 0.958, test acc 0.968, time 56.6 sec
epoch 8, loss 0.0964, train_acc 0.965, test acc 0.969, time 56.7 sec
epoch 9, loss 0.0836, train_acc 0.969, test acc 0.976, time 56.7 sec
epoch 10, loss 0.0681, train_acc 0.974, test acc 0.982, time 57.2 sec
