In [1]:
%matplotlib inline
import d2lzh as d2l
from mxnet import autograd, nd

In [2]:
#get fashion-mnist data set
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

In [3]:
#init model parameters
num_inputs = 784
num_outputs = 10

W = nd.random_normal(scale=0.01, shape=(num_inputs, num_outputs))
b = nd.zeros(num_outputs)

In [4]:
W.attach_grad()
b.attach_grad()

In [5]:
X = nd.array([[1, 2, 3], [4, 5, 6]])
X.sum(axis=0, keepdims=True), X.sum(axis=1, keepdims=True)

(
 [[5. 7. 9.]]
 <NDArray 1x3 @cpu(0)>, 
 [[ 6.]
  [15.]]
 <NDArray 2x1 @cpu(0)>)

In [6]:
#define softmax
def softmax(X):
    X_exp = X.exp()
    partition = X_exp.sum(axis=1, keepdims=True)
    return X_exp / partition

In [7]:
X = nd.random.normal(shape=(2, 5))
X_prob = softmax(X)
X_prob, X_prob.sum(axis=1)

(
 [[0.21324193 0.33961776 0.1239742  0.27106097 0.05210521]
  [0.11462264 0.3461234  0.19401033 0.29583326 0.04941036]]
 <NDArray 2x5 @cpu(0)>, 
 [1.0000001 1.       ]
 <NDArray 2 @cpu(0)>)

In [8]:
#define model
def net(X):
    return softmax(nd.dot(X.reshape(-1, num_inputs), W) +b)

In [9]:
#define loss

def cross_entropy(y_hat, y):
    return -nd.pick(y_hat, y).log()

In [10]:
#define accuracy function
def accuracy(y_hat, y):
    return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()


In [11]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        y = y.astype('float32')
        acc_sum += (net(X).argmax(axis=1) == y).sum().asscalar()
        n += y.size
        return acc_sum / n

In [12]:
#define trainer
num_epochs = 5
lr = 0.1
loss = cross_entropy

def train(net, train_iter, test_iter, loss, num_epochs, batch_size,
         params=None, lr=None, trainer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y).sum()
            l.backward()
            #if trainer is None:
            d2l.sgd(params, lr, batch_size)
            #else:
            #    trainer.step(batch_size)
            y = y.astype('float32')
            train_l_sum += l.asscalar()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
            n += y.size
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f,test acc %.3f' 
                  %(epoch +1, train_l_sum/n, train_acc_sum / n, test_acc))

In [13]:
train(net, train_iter, test_iter, loss, num_epochs, batch_size, [W, b], lr)

epoch 1, loss 0.7892, train acc 0.747,test acc 0.801
epoch 2, loss 0.5747, train acc 0.811,test acc 0.797
epoch 3, loss 0.5291, train acc 0.823,test acc 0.852
epoch 4, loss 0.5044, train acc 0.831,test acc 0.836
epoch 5, loss 0.4905, train acc 0.833,test acc 0.859
