In [1]:
## NiN网络的提出初衷是：
# 1. LeNet, AlexNet, VGG net都是conv模块（抽取空间特征）＋FC模块(分类)．
# 2. AlexNet, VGG net都是在ＬeNet的基础上在宽度（channels)和深度（layers）上下工夫
# 3. NiN则尝试使用多个conv+'fc'层来构建深层网络．

## 具体做法：
# 1. conv模块的输入输出都是４维，fc模块的输入是４维，输出是２维
# 2. 如果要在FC模块后面再加上卷积层，那么要先将２维上升到４维
# 3. 由于chap5.3介绍１x1可以视为fc层，这样避免了降维和多余的升维．
# 4. 因此，使用１x1conv替代fc层

In [2]:
### NiN block

In [1]:
import time
import torch
from torch import nn, optim

import sys
sys.path.append('../d2lzh/')
import d2lzh_pytorch as d2l
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def nin_block(in_channels, out_channels, kernel_size, stride, padding):
    blk = nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding),
        nn.ReLU(),
        nn.Conv2d(out_channels, out_channels, kernel_size=1),
        nn.ReLU(),
        nn.Conv2d(out_channels, out_channels, kernel_size=1),
        nn.ReLU(),
        )
    
    return blk

In [5]:
### NiN model(NiN是在AlexNet后不久提出，故他们的内部大致相当)
# 不同之处：
# 1. AlexNet中的FC模块被替换为了nin_block中的out_channels(10类)
# 2. 随后的10个feature maps被全局池化层变成了１个包含10个元素的行向量．

In [2]:
import torch.nn.functional as F
class GlobalAvgPool2d(nn.Module): # 全局平均池化层
    def __init__(self):
        super(GlobalAvgPool2d, self).__init__()
    def forward(self, x):
        return F.avg_pool2d( x, kernel_size=x.size()[2:] )
    
net = nn.Sequential(
    nin_block(1, 96, kernel_size=11, stride=4, padding=0),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nin_block(96, 256, kernel_size=5, stride=1, padding=2),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nin_block(256, 384, kernel_size=3, stride=1, padding=1),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Dropout(0.5),
    
    # 这部分代码替代了ＦＣ模块
    nin_block(384, 10, kernel_size=3, stride=1, padding=1),
    GlobalAvgPool2d(),  # 输出为４维(batchsize, channel=10, h=1, w=1)
    d2l.FlattenLayer(), # 度压缩 (batchsize, channel=10)
)

In [3]:
# 调试net,查看各layer的shape
Ｘ = torch.rand(1, 1, 224, 224)
for name, blk in net.named_children():
    X = blk(X)
    print(name, 'output shape:', X.shape)

0 output shape: torch.Size([1, 96, 54, 54])
1 output shape: torch.Size([1, 96, 26, 26])
2 output shape: torch.Size([1, 256, 26, 26])
3 output shape: torch.Size([1, 256, 12, 12])
4 output shape: torch.Size([1, 384, 12, 12])
5 output shape: torch.Size([1, 384, 5, 5])
6 output shape: torch.Size([1, 384, 5, 5])
7 output shape: torch.Size([1, 10, 5, 5])
8 output shape: torch.Size([1, 10, 1, 1])
9 output shape: torch.Size([1, 10])


In [17]:
### 获取训练数据

In [4]:
batch_size = 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)

In [5]:
### 训练模型

In [7]:
lr, num_epochs = 0.002, 10 # 学习率增加
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
d2l.train_ch5(net, train_iter=train_iter, test_iter=test_iter,
             batch_size=batch_size, optimizer=optimizer, 
              device=device, num_epochs=num_epochs)

training on  cuda
epoch 1, loss 1.2928, train_acc 0.538, test acc 0.796, time 80.1 sec
epoch 2, loss 0.5473, train_acc 0.801, test acc 0.821, time 80.6 sec
epoch 3, loss 0.4770, train_acc 0.825, test acc 0.838, time 80.8 sec
epoch 4, loss 0.4406, train_acc 0.837, test acc 0.855, time 81.7 sec
epoch 5, loss 0.4092, train_acc 0.849, test acc 0.872, time 80.8 sec
epoch 6, loss 0.3816, train_acc 0.859, test acc 0.870, time 80.8 sec
epoch 7, loss 0.3568, train_acc 0.869, test acc 0.885, time 80.8 sec
epoch 8, loss 0.3345, train_acc 0.876, test acc 0.887, time 81.5 sec
epoch 9, loss 0.3239, train_acc 0.882, test acc 0.897, time 80.8 sec
epoch 10, loss 0.3050, train_acc 0.889, test acc 0.905, time 80.8 sec


In [None]:
# training on  cuda
# epoch 1, loss 1.2928, train_acc 0.538, test acc 0.796, time 80.1 sec
# epoch 2, loss 0.5473, train_acc 0.801, test acc 0.821, time 80.6 sec
# epoch 3, loss 0.4770, train_acc 0.825, test acc 0.838, time 80.8 sec
# epoch 4, loss 0.4406, train_acc 0.837, test acc 0.855, time 81.7 sec
# epoch 5, loss 0.4092, train_acc 0.849, test acc 0.872, time 80.8 sec
# epoch 6, loss 0.3816, train_acc 0.859, test acc 0.870, time 80.8 sec
# epoch 7, loss 0.3568, train_acc 0.869, test acc 0.885, time 80.8 sec
# epoch 8, loss 0.3345, train_acc 0.876, test acc 0.887, time 81.5 sec
# epoch 9, loss 0.3239, train_acc 0.882, test acc 0.897, time 80.8 sec
# epoch 10, loss 0.3050, train_acc 0.889, test acc 0.905, time 80.8 sec