In [1]:
import d2lzh as d2l
from mxnet import nd, init, gluon
from mxnet.gluon import nn

class Residual(nn.Block):
    def __init__(self, num_channels, use_1x1conv=False, strides=1, **kwargs):
        super(Residual, self).__init__(**kwargs)
        self.conv1 = nn.Conv2D(num_channels, kernel_size=3, padding=1, strides=strides)
        self.conv2 = nn.Conv2D(num_channels, kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2D(num_channels, kernel_size=1, strides=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm()
        self.bn2 = nn.BatchNorm()

    def forward(self, X):
        Y = nd.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        return nd.relu(Y + X)

In [2]:
blk = Residual(3)
blk.initialize()
X = nd.random.uniform(shape=(4, 3, 6, 6))
blk(X).shape

(4, 3, 6, 6)

In [3]:
blk = Residual(6, use_1x1conv=True, strides=2)
blk.initialize()
blk(X).shape

(4, 6, 3, 3)

In [4]:
net = nn.Sequential()
net.add(
    nn.Conv2D(64, kernel_size=7, strides=2, padding=3),
    nn.BatchNorm(),nn.Activation('relu'),
    nn.MaxPool2D(pool_size=3, strides=2, padding=1)
)

In [5]:
def resnet_block(num_channels, num_residuals, first_block=False):
    blk = nn.Sequential()
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.add(Residual(num_channels, use_1x1conv=True, strides=2))
        else:
            blk.add(Residual(num_channels))
    return blk

In [6]:
net.add(resnet_block(64, 2, first_block=True),
        resnet_block(128, 2),
        resnet_block(256, 2),
        resnet_block(512, 2))

In [7]:
net.add(nn.GlobalAvgPool2D(), nn.Dense(10))

In [8]:
X = nd.random.uniform(shape=(1,1,28,28))
net.initialize()
for layer in net:
        X = layer(X)
        print(layer.name, 'output shape:\t', X.shape)

conv5 output shape:	 (1, 64, 14, 14)
batchnorm4 output shape:	 (1, 64, 14, 14)
relu0 output shape:	 (1, 64, 14, 14)
pool0 output shape:	 (1, 64, 7, 7)
sequential1 output shape:	 (1, 64, 7, 7)
sequential2 output shape:	 (1, 128, 4, 4)
sequential3 output shape:	 (1, 256, 2, 2)
sequential4 output shape:	 (1, 512, 1, 1)
pool1 output shape:	 (1, 512, 1, 1)
dense0 output shape:	 (1, 10)


In [9]:
lr, num_epochs, batch_size, ctx = 0.05, 5, 256, d2l.try_gpu()
net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)

training on gpu(0)
epoch 1, loss 0.5018, train acc 0.828, test acc 0.896, time 96.0 sec
epoch 2, loss 0.2593, train acc 0.904, test acc 0.901, time 74.2 sec
epoch 3, loss 0.1947, train acc 0.928, test acc 0.873, time 75.5 sec
epoch 4, loss 0.1470, train acc 0.946, test acc 0.921, time 76.4 sec
epoch 5, loss 0.1129, train acc 0.959, test acc 0.917, time 81.0 sec
