From: https://gluon.mxnet.io/chapter04_convolutional-neural-networks/deep-cnns-alexnet.html

In [27]:
from __future__ import print_function
import mxnet as mx
from mxnet import nd, autograd
from mxnet import gluon
import numpy as np
from tqdm import tqdm
from mxnet.gluon import HybridBlock, nn
mx.random.seed(1)

In [2]:
# ctx = mx.gpu()
ctx = mx.cpu()

In [35]:
def transformer(data, label):
    data = mx.image.imresize(data, 224, 224)
    #data = mx.nd.transpose(data, (2,0,1))
    data = data.astype(np.float32)
    return data, label

batch_size = 64
train_data = gluon.data.DataLoader(
    gluon.data.vision.CIFAR10('./data', train=True, transform=transformer),
    batch_size=batch_size, shuffle=True, last_batch='discard')

test_data = gluon.data.DataLoader(
    gluon.data.vision.CIFAR10('./data', train=False, transform=transformer),
    batch_size=batch_size, shuffle=False, last_batch='discard')

In [36]:
alex_net = gluon.nn.Sequential()
with alex_net.name_scope():
    #  First convolutional layer
    alex_net.add(gluon.nn.Conv2D(channels=96, kernel_size=11, strides=(4,4), activation='relu'))
    alex_net.add(gluon.nn.MaxPool2D(pool_size=3, strides=2))
    #  Second convolutional layer
    alex_net.add(gluon.nn.Conv2D(channels=192, kernel_size=5, activation='relu'))
    alex_net.add(gluon.nn.MaxPool2D(pool_size=3, strides=(2,2)))
    # Third convolutional layer
    alex_net.add(gluon.nn.Conv2D(channels=384, kernel_size=3, activation='relu'))
    # Fourth convolutional layer
    alex_net.add(gluon.nn.Conv2D(channels=384, kernel_size=3, activation='relu'))
    # Fifth convolutional layer
    alex_net.add(gluon.nn.Conv2D(channels=256, kernel_size=3, activation='relu'))
    alex_net.add(gluon.nn.MaxPool2D(pool_size=3, strides=2))
    # Flatten and apply fullly connected layers
    alex_net.add(gluon.nn.Flatten())
    alex_net.add(gluon.nn.Dense(4096, activation="relu"))
    alex_net.add(gluon.nn.Dense(4096, activation="relu"))
    alex_net.add(gluon.nn.Dense(10))

In [37]:
class AlexNet(HybridBlock):
    r"""AlexNet model from the `"One weird trick..." `_ paper.

    Parameters
    ----------
    classes : int, default 1000
        Number of classes for the output layer.
    """
    def __init__(self, classes=1000, **kwargs):
        super(AlexNet, self).__init__(**kwargs)
        with self.name_scope():
            with self.features.name_scope():
                self.features.add(nn.Conv2D(64, kernel_size=11, strides=4,
                                            padding=5, activation='relu', in_channels=3,layout="NHWC"))
                self.features.add(nn.MaxPool2D(pool_size=3, strides=2, padding=1))
                self.features.add(nn.Conv2D(256, kernel_size=5, padding=2,
                                            activation='relu',layout="NHWC"))
                self.features.add(nn.MaxPool2D(pool_size=3, strides=2, padding=1))
                self.features.add(nn.Conv2D(384, kernel_size=3, padding=1,
                                            activation='relu', layout="NHWC"))
                self.features.add(nn.Conv2D(256, kernel_size=3, padding=1,
                                            activation='relu', layout="NHWC"))
                self.features.add(nn.Conv2D(256, kernel_size=3, padding=1,
                                            activation='relu', layout="NHWC"))
                self.features.add(nn.MaxPool2D(pool_size=3, strides=2, padding=1))
                self.features.add(nn.Flatten())
                self.features.add(nn.Dense(4096, activation='relu'))
                self.features.add(nn.Dropout(0.5))
                self.features.add(nn.Dense(4096, activation='relu'))
                self.features.add(nn.Dropout(0.5))

            self.output = nn.Dense(classes)

    def hybrid_forward(self, F, x):
        x = self.features(x)
        x = self.output(x)
        return x

In [38]:
alex_net = AlexNet()

In [39]:
alex_net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)


In [40]:
trainer = gluon.Trainer(alex_net.collect_params(), mx.optimizer.Adam(learning_rate=.001))


In [41]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()


In [42]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for d, l in data_iterator:
        data = d.as_in_context(ctx)
        label = l.as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

In [43]:

###########################
#  Only one epoch so tests can run quickly, increase this variable to actually run
###########################
epochs = 1
smoothing_constant = .01


for e in range(epochs):
    for i, (d, l) in tqdm(enumerate(train_data)):
        data = d.as_in_context(ctx)
        label = l.as_in_context(ctx)
        with autograd.record():
            output = alex_net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])

        ##########################
        #  Keep a moving average of the losses
        ##########################
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (curr_loss if ((i == 0) and (e == 0))
                       else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)

    test_accuracy = evaluate_accuracy(test_data, alex_net)
    train_accuracy = evaluate_accuracy(train_data, alex_net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, moving_loss, train_accuracy, test_accuracy))

0it [00:02, ?it/s]


MXNetError: MXNetError: could not create a descriptor for a dilated convolution forward propagation primitive