In [None]:
# VGG（Visual Geometry Group） net的提出：
# 虽然ＡlexＮet提出，使用深层卷积神经网络可以提升算法的效果，但是没有提供简洁的\可以指导后来研究者如何设计对应网络的简单规则．

# 此处ＶＧＧ使用重复元素来构建网络，使网络构建相对更加容易．具体做法如下：
# １．仅使用３x3kernel代替其他尺寸的kernel.如使用两个３x3替代一个５x5kernel,他们的感受野一致，但参数却更少了，且层数也增加了．
# ２．pooling的尺寸也进行了模式化，只使用stride＝２，kernel=2的pooling.
# 3. 除了pooling会改变feature　map的尺寸，其他操作都不会改变输入信息的尺寸．

In [1]:

### VGG BLOCK

import time
import torch
from torch import nn, optim

import sys
sys.path.append('./d2lzh/')
import d2lzh_pytorch as d2l
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def vgg_block(num_convs, in_channels, out_channels):
    blk = []
    for i in range(num_convs):
        if i == 0:
            blk.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
        else:
            blk.append(nn.Conv2d(out_channels,out_channels, kernel_size=3, padding=1))
        blk.append(nn.ReLU())
        
    blk.append(nn.MaxPool2d(kernel_size=2, stride=2))
    return nn.Sequential(*blk)

In [2]:

### VGG NET
#　卷积层模块　＋　ＦＣ模块

conv_arch = (
    (1, 1, 64),    # 对应参数（num_convs, in_channels, out_channels）
    (1, 64, 128),
    (2, 128, 256),
    (2, 256, 512),
    (2, 512, 512),
)
# 上边一个有5个ｖgg_block, 每个block后减少一半，故最后大小为224/(2**5) = 7
fc_features = 512 * 7 * 7 # c * w * h
fc_hidden_units = 4096

In [3]:
def vgg(conv_arch, fc_features, fc_hidden_units=4096):
    net = nn.Sequential()
    # CONV模块
    for i, (num_convs, in_channels, out_channels) in enumerate(conv_arch):
        net.add_module('vgg_block_' + str(i+1), vgg_block(num_convs,in_channels,out_channels))
        
    # FC模块
    fc_module = nn.Sequential(
        d2l.FlattenLayer(),
        nn.Linear(fc_features, fc_hidden_units),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(fc_hidden_units, fc_hidden_units),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(fc_hidden_units, 10),    
    )
    net.add_module('fc', fc_module)
    
    return net

In [4]:
# 输入一个数据观察每一层的输出形状

net = vgg(conv_arch, fc_features, fc_hidden_units)
X = torch.rand(1, 1, 224, 224)

# 获取一级子模块名字及模块本身．
for name, blk in net.named_children():
    X = blk(X)
    print(name, 'output shape:', X.shape)
    
# 从输出结果可看到，ｖgg_block每层的高，宽减半，通道数目加倍，
# 这种设计方式使得多数卷积层具有相同的模型参数尺寸计算复杂度．

vgg_block_1 output shape: torch.Size([1, 64, 112, 112])
vgg_block_2 output shape: torch.Size([1, 128, 56, 56])
vgg_block_3 output shape: torch.Size([1, 256, 28, 28])
vgg_block_4 output shape: torch.Size([1, 512, 14, 14])
vgg_block_5 output shape: torch.Size([1, 512, 7, 7])
fc output shape: torch.Size([1, 10])


In [5]:

### VGG NET　mini(考虑到数据集本身的原因，将模型构造过大与任务本身不匹配)
#　卷积层模块　＋　ＦＣ模块

ratio = 8
small_conv_arch = [
    (1, 1, 64//ratio),
    (1, 64//ratio, 128//ratio),
    (2, 128//ratio, 256//ratio),
    (2, 256//ratio, 512//ratio),
    (2, 512//ratio, 512//ratio),
]

net = vgg(small_conv_arch, fc_features//ratio, fc_hidden_units//ratio)
print(net)

Sequential(
  (vgg_block_1): Sequential(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (vgg_block_2): Sequential(
    (0): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (vgg_block_3): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (vgg_block_4): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [6]:
### 开始训练
batch_size = 32
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)

lr, num_epochs = 0.001, 10
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

training on  cuda
epoch 1, loss 0.5078, train_acc 0.813, test acc 0.892, time 71.5 sec
epoch 2, loss 0.3189, train_acc 0.886, test acc 0.912, time 73.6 sec
epoch 3, loss 0.2687, train_acc 0.902, test acc 0.924, time 72.1 sec
epoch 4, loss 0.2420, train_acc 0.911, test acc 0.930, time 72.3 sec
epoch 5, loss 0.2193, train_acc 0.921, test acc 0.936, time 72.5 sec
epoch 6, loss 0.2068, train_acc 0.926, test acc 0.939, time 72.1 sec
epoch 7, loss 0.1942, train_acc 0.929, test acc 0.949, time 71.9 sec
epoch 8, loss 0.1800, train_acc 0.935, test acc 0.946, time 72.4 sec
epoch 9, loss 0.1680, train_acc 0.939, test acc 0.949, time 72.0 sec
epoch 10, loss 0.1629, train_acc 0.941, test acc 0.956, time 72.3 sec
