<a href="https://colab.research.google.com/github/skywalker0803r/mxnet_course/blob/master/mxnet_Resnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#!pip install mxnet-cu100 d2lzh
import d2lzh as d2l
from mxnet import gluon, init, nd
from mxnet.gluon import nn

# 殘差塊

![alt text](https://zh.gluon.ai/_images/residual-block.svg)

In [0]:
def resnet_block(num_channels, num_residuals, first_block=False):
    blk = nn.Sequential()
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.add(d2l.Residual(num_channels, use_1x1conv=True, strides=2))
        else:
            blk.add(d2l.Residual(num_channels))
    return blk

# ResNet模型

In [0]:
net = nn.Sequential()

net.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3))
net.add(nn.BatchNorm()) 
net.add(nn.Activation('relu'))
net.add(nn.MaxPool2D(pool_size=3, strides=2, padding=1))

net.add(resnet_block(64, 2, first_block=True))
net.add(resnet_block(128, 2))
net.add(resnet_block(256, 2))
net.add(resnet_block(512, 2))

net.add(nn.GlobalAvgPool2D())
net.add(nn.Dense(10))

In [8]:
X = nd.random.uniform(shape=(1, 1, 224, 224))
net.initialize()
for layer in net:
    X = layer(X)
    print(layer.name, 'output shape:\t', X.shape)

conv2 output shape:	 (1, 64, 112, 112)
batchnorm2 output shape:	 (1, 64, 112, 112)
relu2 output shape:	 (1, 64, 112, 112)
pool2 output shape:	 (1, 64, 56, 56)
sequential3 output shape:	 (1, 64, 56, 56)
sequential4 output shape:	 (1, 128, 28, 28)
sequential5 output shape:	 (1, 256, 14, 14)
sequential6 output shape:	 (1, 512, 7, 7)
pool3 output shape:	 (1, 512, 1, 1)
dense0 output shape:	 (1, 10)


# 获取数据和训练模型

In [9]:
lr, num_epochs, batch_size, ctx = 0.05, 5, 256, d2l.try_gpu()
net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx,
              num_epochs)

Downloading /root/.mxnet/datasets/fashion-mnist/train-images-idx3-ubyte.gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/fashion-mnist/train-images-idx3-ubyte.gz...
Downloading /root/.mxnet/datasets/fashion-mnist/train-labels-idx1-ubyte.gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/fashion-mnist/train-labels-idx1-ubyte.gz...
Downloading /root/.mxnet/datasets/fashion-mnist/t10k-images-idx3-ubyte.gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/fashion-mnist/t10k-images-idx3-ubyte.gz...
Downloading /root/.mxnet/datasets/fashion-mnist/t10k-labels-idx1-ubyte.gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/fashion-mnist/t10k-labels-idx1-ubyte.gz...
training on gpu(0)
epoch 1, loss 0.4868, train acc 0.829, test acc 0.894, time 94.0 sec
epoch 2, loss 0.2543, train acc 0.906, test acc 0.908, time 85.8 sec
epoch 3, loss 0.1859, train acc 0.933, test acc 0.913, time