这是softmax regression的非gluon实现

In [6]:
import gluonbook as gb
from mxnet import autograd, nd

# 直接使用glunbook提供的fashion_mnist数据集
batch_size = 256
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
print(len(train_iter))
print(len(test_iter))

235
40


In [7]:
# 初始化模型的参数值
num_inputs = 784
num_outputs = 10

W = nd.random.normal(scale=0.01, shape=(num_inputs, num_outputs))
b = nd.zeros(num_outputs)

# 申请自动求梯度所需要的内存空间
W.attach_grad()
b.attach_grad()

In [8]:
X = nd.array([[1,2,3], [4,5,6]])
# 看一下sum函数怎么用
X.sum(axis=0, keepdims=True), X.sum(axis=1, keepdims=True),X.exp()

(
 [[5. 7. 9.]]
 <NDArray 1x3 @cpu(0)>, 
 [[ 6.]
  [15.]]
 <NDArray 2x1 @cpu(0)>, 
 [[  2.7182817   7.389056   20.085537 ]
  [ 54.59815   148.41316   403.4288   ]]
 <NDArray 2x3 @cpu(0)>)

In [9]:
def softmax(X):
    X_exp = X.exp()
    partition = X_exp.sum(axis=1, keepdims=True)
    return X_exp / partition

In [10]:
X = nd.random.normal(shape=(2, 5))
X_prob = softmax(X)
X_prob, X_prob.sum(axis=1)

(
 [[0.6264712  0.126293   0.01826552 0.10885343 0.12011679]
  [0.25569436 0.2917251  0.0754655  0.3024068  0.07470828]]
 <NDArray 2x5 @cpu(0)>, 
 [0.99999994 1.        ]
 <NDArray 2 @cpu(0)>)

In [11]:
# 定义模型
def net(X):
    return softmax(nd.dot(X.reshape((-1, num_inputs)), W) + b)

In [12]:
y_hat = nd.array([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
y = nd.array([0, 2])
# 看下pick函数是怎么用的？
nd.pick(y_hat, y)


[0.1 0.5]
<NDArray 2 @cpu(0)>

In [13]:
# 定义损失函数
def cross_entropy(y_hat, y):
    return - nd.pick(y_hat, y).log()

In [14]:
def accuracy(y_hat, y):
    return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()
accuracy(y_hat, y)

0.5

In [15]:
# 计算准确率
def evaluate_accuracy(data_iter, net):
    acc = 0
    for X, y in data_iter:
        acc += accuracy(net(X), y)
    return acc / len(data_iter)
evaluate_accuracy(test_iter, net)

0.08798828125

In [17]:
# 开始训练
num_epochs = 1
lr = 0.5
def train_ch3(net, train_iter, test_iter,
              loss, num_epochs, batch_size,
              params=None, lr=None, trainer=None):
    for epoch in range(num_epochs):
        train_l_sum = 0
        train_acc_sum = 0
        for X, y in train_iter:
            with autograd.record():
                y_hat = net(X) # 模型也是传参
                l = loss(y_hat, y) # loss函数也是传参
            l.backward()
            if trainer is None:
                gb.sgd(params, lr, batch_size)
            else:
                trainer.step(batch_size)
            train_l_sum += l.mean().asscalar()
            train_acc_sum += accuracy(y_hat, y)
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
             % (epoch+1, train_l_sum/len(train_iter),
               train_acc_sum/len(train_iter), test_acc))

In [None]:
train_ch3(net, train_iter, test_iter,
          cross_entropy, num_epochs, batch_size,
         [W, b], lr) # 这里没有传入trainer

In [3]:
#  对比一个测试样本的真实label和预测label
for X, y in test_iter:
    break
true_labels = gb.get_fashion_mnist_labels(y.asnumpy())
pred_labels = gb.get_fashion_mnist_labels(net(X).argmax(axis=1).asnumpy())
titles = [true+'\n'+pred for true, pred in zip(true_labels, pred_labels)]

gb.show_fashion_mnist(X[0:9], titles[0:9])

NameError: name 'test_iter' is not defined