In [12]:
from mxnet import nd, gpu, autograd
from mxnet.gluon import nn, data, loss, trainer, utils
from mxnet.gluon.data.vision import datasets, transforms
import time

In [2]:
x = nd.ones((3, 4), ctx=gpu())
x


[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
<NDArray 3x4 @gpu(0)>

In [3]:
x.copyto(gpu(1))


[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
<NDArray 3x4 @gpu(1)>

In [4]:
y = nd.random.uniform(shape=(3, 4), ctx=gpu())
x + y


[[1.6686509 1.1740919 1.3850025 1.2467831]
 [1.3513434 1.8404298 1.6369917 1.12847  ]
 [1.1724988 1.9368206 1.5918345 1.9497097]]
<NDArray 3x4 @gpu(0)>

In [5]:
net = nn.Sequential()
net.add(
    nn.Conv2D(channels=6, kernel_size=5, activation='relu'),
    nn.MaxPool2D(pool_size=2, strides=2),
    nn.Conv2D(channels=16, kernel_size=3, activation='relu'),
    nn.MaxPool2D(pool_size=2, strides=2),
    nn.Flatten(),
    nn.Dense(120, activation="relu"),
    nn.Dense(84, activation="relu"),
    nn.Dense(10)
)

In [6]:
net.load_parameters('net.params', ctx=gpu())

In [7]:
x = nd.random.uniform(shape=(1, 1, 28, 28), ctx=gpu())
net(x)


[[ 0.6284458   0.21993409  1.1261961   0.06318956 -0.4687658  -1.3573954
   1.1736336  -1.2489953   1.4780961  -1.2589195 ]]
<NDArray 1x10 @gpu(0)>

In [9]:
batch_size = 256
transformer = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(0.13, 0.31)
])
train_data = data.DataLoader(datasets.FashionMNIST(train=True).transform_first(transformer), batch_size, shuffle=True, num_workers=4)
valid_data = data.DataLoader(datasets.FashionMNIST(train=False).transform_first(transformer), batch_size, shuffle=True, num_workers=4)

In [13]:
devices = [gpu(0), gpu(1)]
net.collect_params().initialize(force_reinit=True, ctx=devices)

softmax_cross_entropy = loss.SoftmaxCrossEntropyLoss()
optimizer = trainer.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})

for epoch in range(1, 11):
    train_loss = 0.
    tic = time.time()
    for Xs, ys in train_data:
        data_list = utils.split_and_load(Xs, devices)
        label_list = utils.split_and_load(ys, devices)
        with autograd.record():
            losses = [softmax_cross_entropy(net(X), y) for X, y in zip(data_list, label_list)]
        for l in losses:
            l.backward()
        optimizer.step(batch_size)
        train_loss += sum([l.sum().asscalar() for l in losses])
    print("Epoch %d: loss %.3f, in %.1f sec" % (epoch, train_loss / len(train_data) / batch_size, time.time() - tic))

Epoch 1: loss 1.863, in 3.7 sec
Epoch 2: loss 0.724, in 3.1 sec
Epoch 3: loss 0.560, in 3.1 sec
Epoch 4: loss 0.484, in 3.1 sec
Epoch 5: loss 0.444, in 3.1 sec
Epoch 6: loss 0.408, in 3.0 sec
Epoch 7: loss 0.383, in 3.1 sec
Epoch 8: loss 0.358, in 3.5 sec
Epoch 9: loss 0.343, in 3.1 sec
Epoch 10: loss 0.329, in 2.9 sec
