In [109]:
from IPython import display
from matplotlib import pyplot as plt
from mxnet import autograd, nd
import random

In [110]:
num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)

In [111]:
features[0], labels[0]

(
 [-0.4548878   0.06443289]
 <NDArray 2 @cpu(0)>, 
 [3.0621269]
 <NDArray 1 @cpu(0)>)

In [112]:
# 在训练模型的时候，我们需要遍历数据集并不断读取小批量数据样本。这⾥我们定义⼀个函数：它每次返回batch_size（批量⼤小）个随机样本的特征和标签。
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices) # 样本的读取顺序是随机的
    for i in range(0, num_examples, batch_size):
        j = nd.array(indices[i: min(i + batch_size, num_examples)]) # batch_size个索引
        yield features.take(j), labels.take(j) # take函数根据索引返回对应元素


In [113]:
batch_size = 10
for X, y in data_iter(batch_size, features, labels):
    print(X, y)
    break



[[-1.6151997   1.5316861 ]
 [ 0.76087284 -1.2117889 ]
 [ 1.0388833   0.8026588 ]
 [-0.19741638  2.1630235 ]
 [ 0.50740314  0.42834154]
 [ 0.4975765  -0.456513  ]
 [-1.3294283  -0.4583839 ]
 [-1.0225859  -3.063631  ]
 [-0.14015885  0.36172876]
 [ 2.0954275   0.9163307 ]]
<NDArray 10x2 @cpu(0)> 
[-4.2292576  9.850231   3.5535052 -3.5422158  3.7483315  6.756504
  3.095443  12.577177   2.6899717  5.2745547]
<NDArray 10 @cpu(0)>


In [114]:
# 初始化模型参数

In [115]:
w = nd.random.normal(scale=0.01, shape=(num_inputs, 1))
b = nd.zeros(shape=(1,))

In [116]:
#之后的模型训练中，需要对这些参数求梯度来迭代参数的值，因此我们需要创建它们的梯度
w.attach_grad()
b.attach_grad()

In [117]:
def linreg(X, w, b): # 本函数已保存在d2lzh包中⽅便以后使⽤
    return nd.dot(X, w) + b

In [118]:
def squared_loss(y_hat, y): # 本函数已保存在d2lzh包中⽅便以后使⽤
    #print("dddddd",(y_hat - y.reshape(y_hat.shape))**2/2)
    return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2


In [119]:
def sgd(params, lr, batch_size): # 本函数已保存在d2lzh包中⽅便以后使⽤
    for param in params:
        param[:] = param - lr * param.grad / batch_size

In [120]:
lr = 0.03
num_epochs = 3
net = linreg
loss = squared_loss

In [121]:
for epoch in range(num_epochs): # 训练模型⼀共需要num_epochs个迭代周期
    # 在每⼀个迭代周期中，会使⽤训练数据集中所有样本⼀次（假设样本数能够被批量⼤⼩整除）。X
    # 和y分别是⼩批量样本的特征和标签
    for X, y in data_iter(batch_size, features, labels):
        with autograd.record():
            l = loss(net(X, w, b), y) # l是有关⼩批量X和y的损失
        l.backward() # ⼩批量的损失对模型参数求梯度
        sgd([w, b], lr, batch_size) # 使⽤⼩批量随机梯度下降迭代模型参数
    train_l = loss(net(features, w, b), labels)
    print('epoch %d, loss %f' % (epoch + 1, train_l.mean().asnumpy()))

epoch 1, loss 0.044552
epoch 2, loss 0.000178
epoch 3, loss 0.000053


In [122]:
true_w, w

([2, -3.4], 
 [[ 1.9999061]
  [-3.3994224]]
 <NDArray 2x1 @cpu(0)>)

In [123]:
true_b, b

(4.2, 
 [4.1994123]
 <NDArray 1 @cpu(0)>)

In [None]:
# 线性回归的简洁实现

In [206]:
# Gluon提供了data包来读取数据。由于data常⽤作变量名，我们将导⼊的data模块⽤添加了Gluon⾸字⺟的假名gdata代替。在每⼀次迭代中，我们将随机读取包含10个数据样本的小批量。
from mxnet.gluon import data as gdata

In [207]:
num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)

batch_size = 10
# 将训练数据的特征和标签组合
dataset = gdata.ArrayDataset(features, labels)
# 随机读取⼩批量
data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)

In [209]:
for X, y in data_iter:
    print(X, y)
    break


[[-1.3475399  -0.8394777 ]
 [-1.1405525  -0.8819148 ]
 [ 0.69339937  0.14764738]
 [-1.237683    0.11952726]
 [-0.7804534   1.2758106 ]
 [-0.6843921   0.88088065]
 [ 0.20693605  0.20194586]
 [-0.47642735 -0.75797933]
 [ 1.7871675   0.779684  ]
 [-1.1624961  -0.16245984]]
<NDArray 10x2 @cpu(0)> 
[ 4.3697734   4.919496    5.0961995   1.3250526  -1.7046833  -0.16487028
  3.9217532   5.8421726   5.1121807   2.4439287 ]
<NDArray 10 @cpu(0)>


In [210]:
from mxnet.gluon import nn
net = nn.Sequential()
# 在Gluon中，全连接层是⼀个Dense实例。我们定义该层输出个数为1。
net.add(nn.Dense(1))

In [211]:
from mxnet import init
net.initialize(init.Normal(sigma=0.01))

In [212]:
from mxnet.gluon import loss as gloss
loss = gloss.L2Loss() # 平⽅损失⼜称L2范数损失

In [213]:
from mxnet import gluon
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.03})

In [214]:
num_epochs = 3
for epoch in range(num_epochs):
    for X, y in data_iter:
        with autograd.record():
            l = loss(net(X), y) # l是有关⼩批量X和y的损失
        l.backward() # ⼩批量的损失对模型参数求梯度
        trainer.step(batch_size)
    l = loss(net(features), labels)
    print('epoch %d, loss %f' % (epoch + 1, l.mean().asnumpy()))

epoch 1, loss 0.034532
epoch 2, loss 0.000120
epoch 3, loss 0.000048


In [215]:
dense = net[0]
true_w, dense.weight.data()

([2, -3.4], 
 [[ 1.9999975 -3.399219 ]]
 <NDArray 1x2 @cpu(0)>)

In [216]:
true_b, dense.bias.data()

(4.2, 
 [4.1995907]
 <NDArray 1 @cpu(0)>)