In [1]:
import mxnet as mx
from mxnet import ndarray as nd

In [2]:
w = nd.arange(4, ctx=mx.gpu(0)).reshape((1,1,2,2))
b = nd.array([1], ctx=mx.gpu(0))
data = nd.arange(9, ctx=mx.gpu(0)).reshape((1,1,3,3))
out = nd.Convolution(data, w, b, kernel=w.shape[2:],
                     num_filter=w.shape[1])
print('input', data, '\nweight:', w, '\nbias', b,
     '\noutput', out)

input 
[[[[0. 1. 2.]
   [3. 4. 5.]
   [6. 7. 8.]]]]
<NDArray 1x1x3x3 @gpu(0)> 
weight: 
[[[[0. 1.]
   [2. 3.]]]]
<NDArray 1x1x2x2 @gpu(0)> 
bias 
[1.]
<NDArray 1 @gpu(0)> 
output 
[[[[20. 26.]
   [38. 44.]]]]
<NDArray 1x1x2x2 @gpu(0)>


In [3]:
out = nd.Convolution(data, w, b, kernel=w.shape[2:],
                     num_filter=w.shape[1], stride=(2,2), pad=(1,1))
print('input', data, '\nweight:', w, '\nbias', b,
     '\noutput', out)

input 
[[[[0. 1. 2.]
   [3. 4. 5.]
   [6. 7. 8.]]]]
<NDArray 1x1x3x3 @gpu(0)> 
weight: 
[[[[0. 1.]
   [2. 3.]]]]
<NDArray 1x1x2x2 @gpu(0)> 
bias 
[1.]
<NDArray 1 @gpu(0)> 
output 
[[[[ 1.  9.]
   [22. 44.]]]]
<NDArray 1x1x2x2 @gpu(0)>


In [5]:
w = nd.arange(8, ctx=mx.gpu(0)).reshape((1,2,2,2))
data = nd.arange(18, ctx=mx.gpu(0)).reshape((1,2,3,3))
out = nd.Convolution(data, w, b, kernel=w.shape[2:],
                     num_filter=w.shape[0])
print('input', data, '\nweight:', w, '\nbias', b,
     '\noutput', out)

input 
[[[[ 0.  1.  2.]
   [ 3.  4.  5.]
   [ 6.  7.  8.]]

  [[ 9. 10. 11.]
   [12. 13. 14.]
   [15. 16. 17.]]]]
<NDArray 1x2x3x3 @gpu(0)> 
weight: 
[[[[0. 1.]
   [2. 3.]]

  [[4. 5.]
   [6. 7.]]]]
<NDArray 1x2x2x2 @gpu(0)> 
bias 
[1.]
<NDArray 1 @gpu(0)> 
output 
[[[[269. 297.]
   [353. 381.]]]]
<NDArray 1x1x2x2 @gpu(0)>


In [6]:
w = nd.arange(16, ctx=mx.gpu(0)).reshape((2,2,2,2))
data = nd.arange(18, ctx=mx.gpu(0)).reshape((1,2,3,3))
b = nd.array([1,2],ctx=mx.gpu(0))
out = nd.Convolution(data, w, b, kernel=w.shape[2:],
                     num_filter=w.shape[0])
print('input', data, '\nweight:', w, '\nbias', b,
     '\noutput', out)

input 
[[[[ 0.  1.  2.]
   [ 3.  4.  5.]
   [ 6.  7.  8.]]

  [[ 9. 10. 11.]
   [12. 13. 14.]
   [15. 16. 17.]]]]
<NDArray 1x2x3x3 @gpu(0)> 
weight: 
[[[[ 0.  1.]
   [ 2.  3.]]

  [[ 4.  5.]
   [ 6.  7.]]]


 [[[ 8.  9.]
   [10. 11.]]

  [[12. 13.]
   [14. 15.]]]]
<NDArray 2x2x2x2 @gpu(0)> 
bias 
[1. 2.]
<NDArray 2 @gpu(0)> 
output 
[[[[ 269.  297.]
   [ 353.  381.]]

  [[ 686.  778.]
   [ 962. 1054.]]]]
<NDArray 1x2x2x2 @gpu(0)>


In [7]:
data = nd.arange(18, ctx=mx.gpu(0)).reshape((1,2,3,3))
max_pool = nd.Pooling(data=data, pool_type='max', kernel=(2,2))
avg_pool = nd.Pooling(data=data, pool_type='avg', kernel=(2,2))
print('data', data, '\nmax pooling:', max_pool, '\navg poolling', avg_pool)

data 
[[[[ 0.  1.  2.]
   [ 3.  4.  5.]
   [ 6.  7.  8.]]

  [[ 9. 10. 11.]
   [12. 13. 14.]
   [15. 16. 17.]]]]
<NDArray 1x2x3x3 @gpu(0)> 
max pooling: 
[[[[ 4.  5.]
   [ 7.  8.]]

  [[13. 14.]
   [16. 17.]]]]
<NDArray 1x2x2x2 @gpu(0)> 
avg poolling 
[[[[ 2.  3.]
   [ 5.  6.]]

  [[11. 12.]
   [14. 15.]]]]
<NDArray 1x2x2x2 @gpu(0)>


In [24]:
try:
    ctx = mx.gpu()
    _ = nd.zeros((1,), ctx=ctx)
except:
    ctx = mx.cpu()

In [25]:
weight_scale = .01
num_outputs = 10

# channle = 20, kernel = 5 * 5
w1 = nd.random_normal(shape=(20, 1, 5, 5), scale=weight_scale, ctx=ctx)
b1 = nd.zeros(w1.shape[0], ctx=ctx)

# channel = 50, kernel = 3 * 3
w2 = nd.random_normal(shape=(50, 20, 3, 3), scale=weight_scale, ctx=ctx)
b2 = nd.zeros(w2.shape[0], ctx=ctx)

w3 = nd.random_normal(shape=(1250, 128), scale=weight_scale, ctx=ctx)
b3 = nd.zeros(w3.shape[1], ctx=ctx)

w4 = nd.random_normal(shape=(w3.shape[1], 10), scale=weight_scale, ctx=ctx)
b4 = nd.zeros(w4.shape[1], ctx=ctx)

params = [w1, b1, w2, b2, w3, b3, w4, b4]

for param in params:
    param.attach_grad()

In [32]:
def net(x, verbose=False):
    x = x.as_in_context(w1.context)
    # 第一层卷积
    h1_conv = nd.Convolution(
        data=x, weight=w1, bias=b1, kernel=w1.shape[2:], num_filter=w1.shape[0])

    h1_activation = nd.relu(h1_conv)
    h1 = nd.Pooling(
        data=h1_activation, pool_type='max', kernel=(2,2), stride=(2,2))

    # 第二层卷积
    h2_conv = nd.Convolution(
        data=h1, weight=w2, bias=b2, kernel=w2.shape[2:], num_filter=w2.shape[0])

    h2_activation = nd.relu(h2_conv)
    h2 = nd.Pooling(
        data=h2_activation, pool_type='max', kernel=(2,2), stride=(2,2))

    h2 = nd.flatten(h2)

    # 第一层全连接
    h3_linear = nd.dot(h2, w3) + b3
    h3 = nd.relu(h3_linear)

    # 第二层全连接
    h4 = nd.dot(h3, w4) + b4
    if verbose:
        print('1st conv block:', h1.shape)
        print('2nd conv block:', h2.shape)
        print('1st dense:', h3.shape)
        print('2nd dense:', h4.shape)
    return h4

In [40]:
batch_size = 256

from mxnet import gluon

In [41]:
def load_data_fashion_mnist(batch_size):
    """download the fashion mnist dataest and then load into memory"""
    def transform_mnist(data, label):
    # change data from height x weight x channel to channel x height x weight
        return nd.transpose(
            data.astype('float32'), (2,0,1))/255, label.astype('float32')

    mnist_train = gluon.data.vision.FashionMNIST(
        train=True, transform=transform_mnist)
    mnist_test = gluon.data.vision.FashionMNIST(
        train=False, transform=transform_mnist)
    train_data = gluon.data.DataLoader(
        mnist_train, batch_size, shuffle=True)
    test_data = gluon.data.DataLoader(
        mnist_test, batch_size, shuffle=False)
    return (train_data, test_data)


train_data_, test_data_ = load_data_fashion_mnist(batch_size)

for data, _ in train_data_:
    net(data, verbose=True)
    break

1st conv block: (256, 20, 12, 12)
2nd conv block: (256, 1250)
1st dense: (256, 128)
2nd dense: (256, 10)


In [46]:
import utils
from mxnet import autograd

def evaluate_accuracy(data_iterator, net, ctx=mx.cpu()):
    acc = 0.
    for data, label in data_iterator:
        output = net(data.as_in_context(ctx))
        acc += utils.accuracy(output, label.as_in_context(ctx))
    return acc / len(data_iterator)

In [47]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

learning_rate = .2

for epoch in range(5):
    train_loss = 0.
    train_acc = 0.
    for data, label in train_data_:
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        utils.SGD(params, learning_rate/batch_size)

        train_loss += nd.mean(loss).asscalar()
        train_acc += utils.accuracy(output, label)

    test_acc = evaluate_accuracy(test_data_, net, ctx)
    print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
            epoch, train_loss/len(train_data), train_acc/len(train_data), test_acc))

Epoch 0. Loss: 0.513204, Train acc 0.805624, Test acc 0.841797
Epoch 1. Loss: 0.447548, Train acc 0.833167, Test acc 0.856250
Epoch 2. Loss: 0.405360, Train acc 0.851513, Test acc 0.864746
Epoch 3. Loss: 0.376168, Train acc 0.861381, Test acc 0.871387
Epoch 4. Loss: 0.356211, Train acc 0.869492, Test acc 0.880859
