In [109]:
from IPython import display
from matplotlib import pyplot as plt
from mxnet import autograd, nd
import random

In [110]:
num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)

In [111]:
features[0], labels[0]

(
 [-0.4548878   0.06443289]
 <NDArray 2 @cpu(0)>, 
 [3.0621269]
 <NDArray 1 @cpu(0)>)

In [112]:
# 在训练模型的时候，我们需要遍历数据集并不断读取小批量数据样本。这⾥我们定义⼀个函数：它每次返回batch_size（批量⼤小）个随机样本的特征和标签。
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices) # 样本的读取顺序是随机的
    for i in range(0, num_examples, batch_size):
        j = nd.array(indices[i: min(i + batch_size, num_examples)]) # batch_size个索引
        yield features.take(j), labels.take(j) # take函数根据索引返回对应元素


In [113]:
batch_size = 10
for X, y in data_iter(batch_size, features, labels):
    print(X, y)
    break



[[-1.6151997   1.5316861 ]
 [ 0.76087284 -1.2117889 ]
 [ 1.0388833   0.8026588 ]
 [-0.19741638  2.1630235 ]
 [ 0.50740314  0.42834154]
 [ 0.4975765  -0.456513  ]
 [-1.3294283  -0.4583839 ]
 [-1.0225859  -3.063631  ]
 [-0.14015885  0.36172876]
 [ 2.0954275   0.9163307 ]]
<NDArray 10x2 @cpu(0)> 
[-4.2292576  9.850231   3.5535052 -3.5422158  3.7483315  6.756504
  3.095443  12.577177   2.6899717  5.2745547]
<NDArray 10 @cpu(0)>


In [114]:
# 初始化模型参数

In [115]:
w = nd.random.normal(scale=0.01, shape=(num_inputs, 1))
b = nd.zeros(shape=(1,))

In [116]:
#之后的模型训练中，需要对这些参数求梯度来迭代参数的值，因此我们需要创建它们的梯度
w.attach_grad()
b.attach_grad()

In [117]:
def linreg(X, w, b): # 本函数已保存在d2lzh包中⽅便以后使⽤
    return nd.dot(X, w) + b

In [118]:
def squared_loss(y_hat, y): # 本函数已保存在d2lzh包中⽅便以后使⽤
    #print("dddddd",(y_hat - y.reshape(y_hat.shape))**2/2)
    return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2


In [119]:
def sgd(params, lr, batch_size): # 本函数已保存在d2lzh包中⽅便以后使⽤
    for param in params:
        param[:] = param - lr * param.grad / batch_size

In [120]:
lr = 0.03
num_epochs = 3
net = linreg
loss = squared_loss

In [121]:
for epoch in range(num_epochs): # 训练模型⼀共需要num_epochs个迭代周期
    # 在每⼀个迭代周期中，会使⽤训练数据集中所有样本⼀次（假设样本数能够被批量⼤⼩整除）。X
    # 和y分别是⼩批量样本的特征和标签
    for X, y in data_iter(batch_size, features, labels):
        with autograd.record():
            l = loss(net(X, w, b), y) # l是有关⼩批量X和y的损失
        l.backward() # ⼩批量的损失对模型参数求梯度
        sgd([w, b], lr, batch_size) # 使⽤⼩批量随机梯度下降迭代模型参数
    train_l = loss(net(features, w, b), labels)
    print('epoch %d, loss %f' % (epoch + 1, train_l.mean().asnumpy()))

epoch 1, loss 0.044552
epoch 2, loss 0.000178
epoch 3, loss 0.000053


In [122]:
true_w, w

([2, -3.4], 
 [[ 1.9999061]
  [-3.3994224]]
 <NDArray 2x1 @cpu(0)>)

In [123]:
true_b, b

(4.2, 
 [4.1994123]
 <NDArray 1 @cpu(0)>)

In [None]:
# 线性回归的简洁实现

In [191]:
# Gluon提供了data包来读取数据。由于data常⽤作变量名，我们将导⼊的data模块⽤添加了Gluon⾸字⺟的假名gdata代替。在每⼀次迭代中，我们将随机读取包含10个数据样本的小批量。
from mxnet.gluon import data as gdata
import mxnet as mx

In [194]:
num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs), ctx=mx.gpu())
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape, ctx=mx.gpu())

batch_size = 10
# 将训练数据的特征和标签组合
dataset = gdata.ArrayDataset(features, labels)
# 随机读取⼩批量
data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)

In [197]:
features, labels

(
 [[-0.17739409  0.8909654 ]
  [ 0.720208   -0.04110664]
  [-0.14618981 -1.1971692 ]
  ...
  [-1.7919273   1.236199  ]
  [ 0.47072384  0.90302587]
  [ 1.156984    1.534884  ]]
 <NDArray 1000x2 @gpu(0)>, 
 [ 8.02321732e-01  5.78764582e+00  7.98177671e+00  6.43129301e+00
   9.05338097e+00 -5.04550362e+00 -1.10004985e+00  1.21830177e+01
   1.46606278e+00  4.57768965e+00  4.59037828e+00  9.94587231e+00
   3.81356144e+00  8.50929260e+00  1.31196797e+00  1.74511647e+00
   8.17923832e+00  1.86180925e+00  2.41627049e+00  1.18500957e+01
   2.06905580e+00 -1.61908126e+00  2.43128133e+00  4.21254921e+00
   3.73978662e+00  9.26617813e+00  5.64176655e+00  3.41676259e+00
  -1.50507486e+00  1.04945440e+01  2.77161598e+00 -1.11613661e-01
   4.91331959e+00 -2.61863971e+00  1.25943308e+01  1.66245639e+00
   3.44821453e+00  8.56688404e+00  2.92915916e+00  5.71758890e+00
   7.42617273e+00  1.24267254e+01  5.44200993e+00  4.98985052e+00
   9.66446304e+00  5.19729805e+00 -3.43590879e+00  7.99924970e-01
   

In [199]:
for X, y in data_iter:
    print(X, y)
    break


[[-0.99123675 -0.80509216]
 [ 0.6881445  -2.1662738 ]
 [ 1.5378752  -1.5480202 ]
 [ 0.06240999 -1.0963577 ]
 [-0.98775446 -1.2209109 ]
 [-1.0407561  -0.31223154]
 [-0.8260921   0.49631175]
 [ 1.1478789   0.29746735]
 [ 0.84213954  1.5267886 ]
 [-1.189887   -0.8651939 ]]
<NDArray 10x2 @gpu(0)> 
[ 4.964828  12.940525  12.536772   8.0601015  6.3619027  3.186148
  0.8498413  5.4862347  0.6877142  4.7764025]
<NDArray 10 @cpu(0)>


In [200]:
from mxnet.gluon import nn
net = nn.Sequential()
# 在Gluon中，全连接层是⼀个Dense实例。我们定义该层输出个数为1。
net.add(nn.Dense(1))

In [201]:
from mxnet import init
net.initialize(init.Normal(sigma=0.01))

In [202]:
from mxnet.gluon import loss as gloss
loss = gloss.L2Loss() # 平⽅损失⼜称L2范数损失

In [203]:
from mxnet import gluon
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.03})

In [204]:
num_epochs = 3
for epoch in range(num_epochs):
    for X, y in data_iter:
        with autograd.record():
            l = loss(net(X), y) # l是有关⼩批量X和y的损失
        l.backward() # ⼩批量的损失对模型参数求梯度
        trainer.step(batch_size)
    l = loss(net(features), labels)
    print('epoch %d, loss %f' % (epoch + 1, l.mean().asnumpy()))

RuntimeError: Parameter 'dense6_weight' was not initialized on context gpu(0). It was only initialized on [cpu(0)].

In [188]:
dense = net[0]
true_w, dense.weight.data()

([2, -3.4], 
 [[ 2.0001216 -3.399694 ]]
 <NDArray 1x2 @cpu(0)>)

In [189]:
true_b, dense.bias.data()

(4.2, 
 [4.1990976]
 <NDArray 1 @cpu(0)>)