In [2]:
# ResNet，即深度残差网络，有效的解决了深度卷积神经网络训练难的问题
# 由于在误差反向传播的过程中，梯度的变化越来越小，因此，权重的更新
# 变慢，会导致远离损失函数的计算层训练缓慢。
# Resnet的思路是通过增加跨层的连接来解决梯度反向传播过程当中逐渐变小的问题

# Resnet沿用3×3的卷积操作，但是在卷积和池化之间加入batch_norm进行加速训练
# 每次跨层连接，跨越连接2层卷积。如果输入与输出的通道不一样，则使用额外的一个
# 1×1的卷积做铜套变幻，并使用strides=2来讲长宽减半


from mxnet.gluon import nn
from mxnet import nd

class Residual(nn.Block):

    def __init__(self, channels, same_shape=True, *args, **kwargs):
        super(Residual, self).__init__(*args, **kwargs)
        self.same_shape = same_shape
        with self.name_scope():
            strides = 1 if same_shape else 2
            self.conv1 = nn.Conv2D(channels, kernel_size=3, padding=1, strides=strides)
            self.bn1 = nn.BatchNorm()
            self.conv2 = nn.Conv2D(channels, kernel_size=3, padding=1)
            self.bn2 = nn.BatchNorm()
            if not same_shape:
                self.conv3 = nn.Conv2D(channels, kernel_size=1, strides=strides)

    def forward(self, x):
        out = nd.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        if not self.same_shape:
            x = self.conv3(x)
        return nd.relu(out + x)

In [3]:
blk = Residual(3)
blk.initialize()
x = nd.random_uniform(shape=(4, 3, 6, 6))
blk(x).shape

(4, 3, 6, 6)

In [4]:
blk2 = Residual(8, same_shape=False)
blk2.initialize()
blk2(x).shape

(4, 8, 3, 3)

In [5]:
class ResNet(nn.Block):

    def __init__(self, num_classes, verbose=False, *args, **kwargs):
        super(ResNet, self).__init__(*args, **kwargs)
        self.verbose = verbose
        with self.name_scope():
            b1 = nn.Conv2D(64, kernel_size=7, strides=2)

            b2 = nn.Sequential()
            b2.add(nn.MaxPool2D(pool_size=3, strides=2),
                   Residual(64), Residual(64))

            b3 = nn.Sequential()
            b3.add(Residual(128, same_shape=False), Residual(128))

            b4 = nn.Sequential()
            b4.add(Residual(256, same_shape=False), Residual(256))

            b5 = nn.Sequential()
            b5.add(Residual(512, same_shape=False), Residual(512))

            b6 = nn.Sequential()
            b6.add(nn.AvgPool2D(pool_size=3), nn.Dense(num_classes))

            self.net = nn.Sequential()
            self.net.add(b1, b2, b3, b4, b5, b6)

    def forward(self, x):
        out = x
        for i, b in enumerate(self.net):
            out = b(out)
            if self.verbose:
                print('Block %d output: %s' % (i + 1, out.shape))
        return out

In [6]:
net = ResNet(10, verbose=True)
net.initialize()
x = nd.random_uniform(shape=(4, 3, 96, 96))
y = net(x)

Block 1 output: (4, 64, 45, 45)
Block 2 output: (4, 64, 22, 22)
Block 3 output: (4, 128, 11, 11)
Block 4 output: (4, 256, 6, 6)
Block 5 output: (4, 512, 3, 3)
Block 6 output: (4, 10)


In [None]:
from mxnet import gluon
from mxnet import init

import utils

train_data, test_data = utils.load_data_fashion_mnist_new(batch_size=64, resize=96)
ctx = utils.try_gpu()

net_ = ResNet(10)
net_.initialize(ctx=ctx, init=init.Xavier())

loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net_.collect_params(), 'sgd', {'learning_rate': 0.05})

utils.train(train_data, test_data, net_, loss, trainer, ctx, num_epochs=1)