## 创建数据集
使用一个人工数据集
y[i] = 2 * x[i][0] - 3.4 * x[i][1] + 4.2 + noise

In [3]:
from mxnet import ndarray as nd
from mxnet import autograd

num_inputs = 2
num_examples = 1000

true_w = [2, -3.4]
true_b = 4.2

x = nd.random_normal(shape = (num_examples, num_inputs))
y = true_w[0] * x[:, 0] + true_w[1] * x[:, 1] + true_b
y += 0.01 * nd.random_normal(shape = y.shape)

## 数据读取
训练神经网络时候，不断读取数据块，通过Python的yield来构造一个迭代器

In [4]:
import random
batch_size = 10
def data_iter():
    idx = list(range(num_examples))
    random.shuffle(idx)
    for i in range(0, num_examples, batch_size):
        j = nd.array(idx[i:min(i+batch_size,num_examples)])
        #在这里使用了yield返回了一个迭代器，对for中进行遍历，且中断之后
        #后可以从中断位置接着下行
        yield nd.take(x, j), nd.take(y, j)

## 初始化模型参数
下面随机初始化模型参数

对所有参数创建其梯度

In [37]:
w = nd.random_normal(shape = (num_inputs, 1))
b = nd.zeros((1, ))
params = [w, b]

In [38]:
for param in params:
    param.attach_grad()

## 定义模型
线性模型的定义

In [28]:
def net(x):
    return nd.dot(x, w) + b

In [29]:
def square_loss(yhat, y):
    return (yhat - y.reshape(yhat.shape)) ** 2

## 进行优化
通过梯度学习下降求解

In [30]:
def SGD(params, lr):
    for param in params:
        param[:] = param - lr * param.grad

## 训练
在一次迭代里面，随机读取固定数的数据，计算梯度并更新

In [39]:
epochs = 5
learning_rate = 0.05
for e in range(epochs):
    total_loss = 0
    for data,label in data_iter():
        with autograd.record():
            output = net(data)
            loss = square_loss(output, label)
        loss.backward()
        SGD(params, learning_rate)
        total_loss += nd.sum(loss).asscalar()
    print("Epoch: %d, average loss: %f"% (e, total_loss/num_examples))

Epoch: 0, average loss: 0.202821
Epoch: 1, average loss: 0.000148
Epoch: 2, average loss: 0.000160
Epoch: 3, average loss: 0.000172
Epoch: 4, average loss: 0.000133
