![](../image/vgg11.svg)

vgg卷积块:

- 都是3x3的卷积核大小
- (卷积层+ReLU)*n个 + 最大汇聚层


vgg11整体架构:
- 8个卷积层和3个全连接层，因此它通常被称为VGG-11。

- 5个卷积块，其中前两个块各有1个卷积层，后三个块各包含2个卷积层。 第一个模块有64个输出通道，每个后续模块将输出通道数量翻倍，直到该数字达到512。

vgg19整体架构: 
- 16个卷积层和3个全连接层

- 5个卷积块，其中前两个块各有2个卷积层，后三个块各包含4个卷积层。 第一个模块有64个输出通道，每个后续模块将输出通道数量翻倍，直到该数字达到512。


和AlexNet的不同:
- AlexNet使用不同大小的卷积核, vgg使用相同大小的卷积核



怎么感觉, vgg的工作就是搞了个重复块的形式来重写一下老工作.

In [1]:
import torch
# torchvision.datasets.FashionMNIST
import torchvision
# 修改数据集格式
from torchvision import transforms
# data.DataLoader
from torch.utils import data
# nn块
from torch import nn

In [2]:
# -----------参数-----------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
batch_size = 128
lr = 3e-2
num_epochs=10

cuda


In [3]:
# 列表
trans = [
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
]
# 转化列表为torchvision.transforms.transforms.Compose对象, 这样就能写 transform=trans
trans = transforms.Compose(trans)
mnist_train_totensor = torchvision.datasets.FashionMNIST(
    root="../data",
    train=True,
    download=True,
    transform=trans
)
mnist_test_totensor = torchvision.datasets.FashionMNIST(
    root="../data",
    train=False,
    download=True,
    transform=trans
)
# 28*28, 不用转化大小
# mnist_train_totensor[0][0]
mnist_train_totensor[0][0].shape

torch.Size([1, 224, 224])

In [4]:
# shuffle, 打乱
# num_workers, 使用4个进程来读取数据
train_iter = data.DataLoader(
    mnist_train_totensor, batch_size, shuffle=True, num_workers=4)
test_iter = data.DataLoader(
    mnist_test_totensor, batch_size, shuffle=True, num_workers=4)

In [5]:
# 制造vgg块, 每个块由连续个卷积层+最大汇聚层构成
# num_convs: 每个块中卷积层的数目
# in_channels: 输入通道数目
# out_channels: 输出通道数目
def vgg_block(num_convs, in_channels, out_channels):
    layers = []
    for _ in range(num_convs):
        layers.append(nn.Conv2d(in_channels, out_channels,
                                kernel_size=3, padding=1))
        layers.append(nn.ReLU())
        # 让此块中的卷积层们的通道保持上下层连续
        in_channels = out_channels
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
    return nn.Sequential(*layers)


# (一块中有几个卷积层, 输出的通道数)
conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))


def vgg(conv_arch):
    conv_blks = []
    in_channels = 1
    # 卷积层部分
    for (num_convs, out_channels) in conv_arch:
        conv_blks.append(vgg_block(num_convs, in_channels, out_channels))
        # 每个块之间的通道数保持上下连续一致
        in_channels = out_channels

    return nn.Sequential(
        *conv_blks,
        
        nn.Flatten(),
        
        # 全连接层部分
        nn.Linear(out_channels * 7 * 7, 4096),
        nn.ReLU(),
        nn.Dropout(0.5),
        
        nn.Linear(4096, 4096),
        nn.ReLU(),
        nn.Dropout(0.5),
        
        nn.Linear(4096, 10)
    )


net = vgg(conv_arch).to(device)
net

Sequential(
  (0): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (2): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (3): Sequential(
    (0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (4):

In [6]:
X = torch.rand(size=(1, 1, 224, 224), dtype=torch.float32).to(device)
for layer in net:
    X = layer(X)
    print(f'output shape: {layer.__class__.__name__: <15}{X.shape}')

output shape: Sequential     torch.Size([1, 64, 112, 112])
output shape: Sequential     torch.Size([1, 128, 56, 56])
output shape: Sequential     torch.Size([1, 256, 28, 28])
output shape: Sequential     torch.Size([1, 512, 14, 14])
output shape: Sequential     torch.Size([1, 512, 7, 7])
output shape: Flatten        torch.Size([1, 25088])
output shape: Linear         torch.Size([1, 4096])
output shape: ReLU           torch.Size([1, 4096])
output shape: Dropout        torch.Size([1, 4096])
output shape: Linear         torch.Size([1, 4096])
output shape: ReLU           torch.Size([1, 4096])
output shape: Dropout        torch.Size([1, 4096])
output shape: Linear         torch.Size([1, 10])


In [7]:
def init_weights(m):
    if type(m) == nn.Linear or type(m) == nn.Conv2d:
        nn.init.xavier_uniform_(m.weight)


net.apply(init_weights)
optimizer = torch.optim.SGD(net.parameters(), lr=lr)
loss = nn.CrossEntropyLoss()

In [8]:
def train_loop(train_iter, net, loss, optimizer):
    # 共有几批
    num_batchs = len(train_iter)
    # 总平均loss
    total_train_loss = 0
    for batch, (X, y) in enumerate(train_iter):
        # move to device
        X, y = X.to(device), y.to(device)
        # 该批的推断结果
        y_hat = net(X)
        
        train_loss = loss(y_hat, y)
        total_train_loss += train_loss.item()

        # Backpropagation
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

        # --------打印进度        
        print(f"\r[{batch+1:>8d}/{num_batchs:>8d}]  ", end='')

    
    return total_train_loss / num_batchs

In [10]:
# ---------训练
for epoch in range(num_epochs):
    total_train_loss = train_loop(train_iter, net, loss, optimizer)
    print(f'epoch {epoch + 1}, total_train_loss {total_train_loss:f}')

[     469/     469]  epoch 1, total_train_loss 1.057548
[     469/     469]  epoch 2, total_train_loss 0.450408
[     469/     469]  epoch 3, total_train_loss 0.364171
[     469/     469]  epoch 4, total_train_loss 0.321859
[     469/     469]  epoch 5, total_train_loss 0.289413
[     469/     469]  epoch 6, total_train_loss 0.264886
[     469/     469]  epoch 7, total_train_loss 0.246249
[     469/     469]  epoch 8, total_train_loss 0.227602
[     469/     469]  epoch 9, total_train_loss 0.212800
[     469/     469]  epoch 10, total_train_loss 0.198910


In [11]:
# ----------预测
def test_net(test_iter, net, loss):
    # 共有几批
    num_batchs = len(test_iter)
    # 总平均loss, 总平均准确率
    total_test_loss, total_correct = 0, 0
    # 设定评估模式
    net.eval()
    # 不要梯度
    with torch.no_grad():
        for batch, (X, y) in enumerate(test_iter):
            # move to device
            X, y = X.to(device), y.to(device)
            y_hat = net(X)

            test_loss = loss(y_hat, y)
            # 分类0,1,2,3的类别对的上否
            correct = (y_hat.argmax(1) == y).float().sum().item()
            total_test_loss += test_loss.item()
            total_correct += correct/len(X)

            # --------打印进度
            print(f"\r[{batch+1:>8d}/{num_batchs:>8d}]  ", end='')


    total_test_loss /= num_batchs
    total_correct /= num_batchs
    print(
        f"\nTest: Accuracy: {total_correct:.1%}, Avg loss: {total_test_loss:f}")
    
test_net(test_iter, net, loss)

[      79/      79]  
Test: Accuracy: 91.4%, Avg loss: 0.232293
