In [1]:
# importing all necessary packages
import gluonbook as gb
import mxnet as mx
from mxnet import gluon, init, nd, autograd
from mxnet.gluon import nn


In [2]:
# Implementation of modified ResNet - batch nomalization, activation and convolution
def convBlock(nChannels):
    denseNet = nn.Sequential()
    #print("denseNet in convBlock: ", denseNet)
    denseNet.add(nn.BatchNorm(),nn.Activation('relu'),nn.Conv2D(nChannels, kernel_size=3, padding=1))
    return denseNet


In [3]:
class DenseBlock(nn.Block):
    def __init__(self, nConvs, nChannels, **kwargs):
        super(DenseBlock, self).__init__(**kwargs)
        self.net = nn.Sequential()
        
        for _ in range(nConvs):
            self.net.add(convBlock(nChannels))
            #print("Self net:", self.net)
    def forward(self, X):
        for block in self.net:
            Y = block(X)
            X = nd.concat(X, Y, dim=1) # Concatenate the input and output of each block on the channel dimension.
        return X
    

In [4]:
denseblk = DenseBlock(2, 10)
denseblk.initialize()
#print("Dense Block:", denseblk)
X = nd.random.uniform(shape=(4, 5, 8, 8))
Y = denseblk(X)
Y.shape


(4, 25, 8, 8)

In [5]:
# transition layer - used to control the complexity of the model. 
#                  - Reduces the number of channels by using the 1x1 convolutional layer and halves the height 
#                    and width of the average pooling layer with a stride of 2
def transBlock(nChannels):
    transblk = nn.Sequential()
    transblk.add(nn.BatchNorm(), nn.Activation('relu'),
            nn.Conv2D(nChannels, kernel_size=1),
            nn.AvgPool2D(pool_size=2, strides=2))
    return transblk


In [6]:
denseblk = transBlock(10)
denseblk.initialize()
#print("Dense Block", denseblk)
denseblk(Y).shape


(4, 10, 4, 4)

In [7]:
denseblk = nn.Sequential()
denseblk.add(nn.Conv2D(64, kernel_size=5, strides=2, padding=3),
        nn.BatchNorm(), nn.Activation('relu'),
        nn.MaxPool2D(pool_size=3, strides=2, padding=1))
#print(denseblk)


In [8]:
num_channels, growth_rate = 64, 32
num_convs_in_dense_blocks = [4, 4, 4, 4]

for i, num_convs in enumerate(num_convs_in_dense_blocks):
    denseblk.add(DenseBlock(num_convs, growth_rate))
    # This is the number of output channels in the previous dense block
    num_channels += num_convs * growth_rate
    # A transition layer that halves the number of channels is added between
    # the dense blocks
    if i != len(num_convs_in_dense_blocks) - 1:
        denseblk.add(transBlock(num_channels // 2))
        
        

In [9]:
denseblk.add(nn.BatchNorm(),nn.Activation('relu'),nn.GlobalAvgPool2D(), nn.Dense(10))




In [10]:
def data_xform(data):
    """Move channel axis to the beginning, cast to float32, and normalize to [0, 1]."""
    return nd.moveaxis(data, 2, 0).astype('float32') / 255

train_data = mx.gluon.data.vision.MNIST(train=True).transform_first(data_xform)
val_data = mx.gluon.data.vision.MNIST(train=False).transform_first(data_xform)

batch_size = 256
train_iter = mx.gluon.data.DataLoader(train_data, shuffle=True, batch_size=batch_size)
test_iter = mx.gluon.data.DataLoader(val_data, shuffle=False, batch_size=batch_size)

lr, num_epochs, ctx = 0.1, 5, gb.try_gpu()
denseblk.initialize(ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(denseblk.collect_params(), 'sgd', {'learning_rate': lr})
gb.train_ch5(denseblk, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)


training on cpu(0)
epoch 1, loss 0.1237, train acc 0.963, test acc 0.971, time 320.0 sec
epoch 2, loss 0.0352, train acc 0.989, test acc 0.988, time 321.3 sec
epoch 3, loss 0.0226, train acc 0.993, test acc 0.989, time 319.3 sec
epoch 4, loss 0.0161, train acc 0.995, test acc 0.986, time 320.3 sec
epoch 5, loss 0.0093, train acc 0.998, test acc 0.990, time 319.0 sec


<mxnet.gluon.data.dataloader.DataLoader at 0x119ccbb38>