# **VGGNet**

VGGNet使用多个重复模块进行组合来构建深度模型

## **VGGNet模块**

In [1]:
import torch
from torch import nn, optim
import sys
sys.path.append(r'C:\D\ProgramFile\jupyter\torch_learn\dive_to_dp\utils') 
import d2lzh as d2l

In [2]:
device = torch.device('cuda')

In [3]:
def vgg_block(num_convs, in_channels, out_channels):
    blk = []
    for i in range(num_convs):
        if i == 0:
            blk.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
        else:
            blk.append(nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1))
        blk.append(nn.ReLU())
    blk.append(nn.MaxPool2d(kernel_size=2, stride=2))
    return nn.Sequential(*blk)

## **VGG结构搭建**

VGGNet由多个vgg_block连接组成，我们使用变量`conv_arch`来定义各个vgg_block的参数    
因为我们使用的是FashionMnist数据，所以输入通道为1      
我们接下来的网络使用了8个卷积层和3个全连接，被称为vgg-11     

In [4]:
conv_arch = ((1, 1, 64), (1, 64, 128), (2, 128, 256), (2, 256, 512), (2, 512, 512))
fc_features = 512 * 7 * 7
fc_hidden_units = 4096

In [5]:
def vgg(conv_arch, fc_features, fc_hidden_units=4096):
    net = nn.Sequential()
    for i , (num_convs, in_channels, out_channels) in enumerate(conv_arch):
        net.add_module('vgg_block' + str(i), vgg_block(num_convs, in_channels, out_channels))
    net.add_module('fc', nn.Sequential(d2l.FlattenLayer(),
                                       nn.Linear(fc_features, fc_hidden_units),
                                       nn.ReLU(),
                                       nn.Dropout(0.5),
                                       nn.Linear(fc_hidden_units, fc_hidden_units),
                                       nn.ReLU(),
                                       nn.Dropout(0.5),
                                       nn.Linear(fc_hidden_units, 10)))
    return net
        

In [6]:
net = vgg(conv_arch, fc_features, fc_hidden_units)
X = torch.rand(1, 1, 224, 224)

In [7]:
# 测试每一个子模块的输出
for name, blk in net.named_children(): 
    X = blk(X)
    print(name, 'output shape: ', X.shape)

vgg_block0 output shape:  torch.Size([1, 64, 112, 112])
vgg_block1 output shape:  torch.Size([1, 128, 56, 56])
vgg_block2 output shape:  torch.Size([1, 256, 28, 28])
vgg_block3 output shape:  torch.Size([1, 512, 14, 14])
vgg_block4 output shape:  torch.Size([1, 512, 7, 7])
fc output shape:  torch.Size([1, 10])


## **加载数据**

因为vggnet的参数太多了，我们构建一个小一点的vggNet来进行训练

In [8]:
ratio = 8
small_conv_arch = [(1, 1, 64//ratio), (1, 64//ratio, 128//ratio), (2, 128//ratio, 256//ratio), 
                   (2, 256//ratio, 512//ratio), (2, 512//ratio, 512//ratio)]
net = vgg(small_conv_arch, fc_features // ratio, fc_hidden_units // ratio)
print(net)

Sequential(
  (vgg_block0): Sequential(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (vgg_block1): Sequential(
    (0): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (vgg_block2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (vgg_block3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ce

In [9]:
batch_size = 64
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size, resize=224)

In [10]:
net = net.to(device)

In [11]:
import torchsummary
torchsummary.summary(net, (1, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 8, 224, 224]              80
              ReLU-2          [-1, 8, 224, 224]               0
         MaxPool2d-3          [-1, 8, 112, 112]               0
            Conv2d-4         [-1, 16, 112, 112]           1,168
              ReLU-5         [-1, 16, 112, 112]               0
         MaxPool2d-6           [-1, 16, 56, 56]               0
            Conv2d-7           [-1, 32, 56, 56]           4,640
              ReLU-8           [-1, 32, 56, 56]               0
            Conv2d-9           [-1, 32, 56, 56]           9,248
             ReLU-10           [-1, 32, 56, 56]               0
        MaxPool2d-11           [-1, 32, 28, 28]               0
           Conv2d-12           [-1, 64, 28, 28]          18,496
             ReLU-13           [-1, 64, 28, 28]               0
           Conv2d-14           [-1, 64,

In [12]:
lr, num_epochs = 0.001, 5
optimizer = optim.Adam(params=net.parameters(), lr=lr)
loss = nn.CrossEntropyLoss()

In [13]:
for epoch in range(num_epochs):
    train_l_sum, train_acc_sum, n, batch_count = 0.0, 0.0, 0, 0
    for X, y in train_iter:
        X = X.to(device)
        y = y.to(device)
        y_hat = net(X)
        l = loss(y_hat, y)
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
        train_l_sum += l.cpu()
        train_acc_sum += (y_hat.argmax(dim=1) == y).float().cpu().sum()
        n += y.shape[0]
        batch_count += 1
    test_acc = d2l.evaluate_accuracy(test_iter, net)
    print(f'epoch{epoch+1}: loss {train_l_sum/batch_count:.4f} train_acc {train_acc_sum / n:.4f} test_acc {test_acc:.4f}')

epoch1: loss 0.6528 train_acc 0.7541 test_acc 0.8709
epoch2: loss 0.3376 train_acc 0.8776 test_acc 0.8891
epoch3: loss 0.2891 train_acc 0.8955 test_acc 0.9042
epoch4: loss 0.2532 train_acc 0.9082 test_acc 0.9120
epoch5: loss 0.2318 train_acc 0.9156 test_acc 0.9151
