In [1]:
import d2lzh as d2l
from torch.nn import init
from torch import nn
import torch
from torch import optim

def vgg_block(layer_num, num_convs, channels):
    assert num_convs + 1 == len(channels)
    block = nn.Sequential()
    for i in range(num_convs):
        block.add_module('conv%d-%d' % (layer_num, i + 1), nn.Conv2d(channels[i], channels[i + 1], kernel_size=3, padding=1))
        block.add_module('relu%d-%d' % (layer_num, i), nn.ReLU())
    block.add_module('maxpool%d' % layer_num, nn.MaxPool2d(2))
    return block

In [2]:
conv_arch = ((1, (1, 64)), (1, (64, 128)), (2, (128, 256, 256)), (2, (256, 512, 512)), (2, (512, 512, 512)))

In [3]:
def vgg(conv_arch):
    net = nn.Sequential()
    for i, (num_convs, channels) in enumerate(conv_arch):
        net.add_module('conv%d' % (i + 1), vgg_block(i+1, num_convs, channels))
        
    net.add_module('fc', nn.Sequential(
        d2l.FlattenLayer(),
        nn.Linear(128 * 7 * 7, 4096),
        nn.ReLU(),
        nn.Dropout(),
        nn.Linear(4096, 4096),
        nn.ReLU(),
        nn.Dropout(),
        nn.Linear(4096, 10)
    ))
    return net

In [4]:
net = vgg(conv_arch)

In [5]:
X = torch.rand((1, 1, 224, 224))
for blk in net:
    X = blk(X)
    print('output shape:\t', X.shape)

output shape:	 torch.Size([1, 64, 112, 112])
output shape:	 torch.Size([1, 128, 56, 56])
output shape:	 torch.Size([1, 256, 28, 28])
output shape:	 torch.Size([1, 512, 14, 14])
output shape:	 torch.Size([1, 512, 7, 7])
output shape:	 torch.Size([1, 10])


In [4]:
ratio = 4
small_conv_arch = ((1, (1, 8)), (1, (8, 32)), (2, (32, 64, 64)), (2, (64, 128, 128)), (2, (128, 128, 128)))
net = vgg(small_conv_arch)

In [None]:
lr, num_epochs, batch_size = 0.05, 10, 128
device = 'cuda' if torch.cuda.is_available() else 'cpu'
for module in net:
    for layer in module:
        if type(layer) not in [nn.ReLU, d2l.FlattenLayer, nn.Dropout, nn.MaxPool2d]:
            init.xavier_normal_(layer.weight.data)
net = net.to(device)
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)
optimizer = optim.SGD(net.parameters(), lr=lr)
d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

training on cuda
epoch 1, loss 0.0088, train acc 0.578, test acc 0.801, time 48.7 sec
epoch 2, loss 0.0032, train acc 0.850, test acc 0.852, time 48.9 sec
epoch 3, loss 0.0026, train acc 0.877, test acc 0.870, time 49.1 sec
epoch 4, loss 0.0023, train acc 0.893, test acc 0.890, time 49.1 sec
