# 线性回归的简洁实现

通过使用深度学习框架来简洁地实现
线性回归模型
生成数据集

In [1]:
import sys
sys.path.append('..')

In [2]:
import numpy as np
import mindspore
import mindspore.dataset as ds
from d2l import mindspore as d2l

true_w = np.array([2, -3.4])
true_b = 4.2

调用框架中现有的API来读取数据，这里使用mindspore的GeneratorDataset

In [3]:
class SyntheticData():
    def __init__(self):
        self.features, self.labels = d2l.synthetic_data(true_w, true_b, 1000)

    def __getitem__(self, index):
        return self.features[index], self.labels[index]
    
    def __len__(self):
        return len(self.labels)

def load_array(data_arrays, column_names, batch_size, is_train=True):  
    """构造一个MindSpore数据迭代器。"""
    dataset = ds.GeneratorDataset(data_arrays, column_names, shuffle=is_train)
    dataset = dataset.batch(batch_size)
    return dataset

batch_size = 10
data_iter = SyntheticData()
dataset = load_array(data_iter, ['features', 'labels'], batch_size)

next(iter(dataset))

[Tensor(shape=[10, 2], dtype=Float32, value=
 [[ 1.37558311e-01,  7.56683886e-01],
  [ 1.38452029e+00,  6.27575338e-01],
  [-1.62963021e+00,  1.22711408e+00],
  ...
  [-1.14223678e-02, -1.85596108e+00],
  [ 1.21596611e+00, -1.52309394e+00],
  [-1.58561802e+00, -1.71928239e+00]]),
 Tensor(shape=[10, 1], dtype=Float32, value=
 [[ 1.90698516e+00],
  [ 4.83025599e+00],
  [-3.21380663e+00],
  ...
  [ 1.04689331e+01],
  [ 1.17963915e+01],
  [ 6.87181902e+00]])]

使用框架的预定义好的层, 这里初始化模型参数可以直接使用传参的方式

In [4]:
import mindspore.nn as nn
from mindspore.common.initializer import Normal

net = nn.SequentialCell([nn.Dense(2, 1, weight_init=Normal(0.01, 0), bias_init='zero')])

计算均方误差使用的是`MSELoss`类，也称为平方$L_2$范数

In [5]:
loss = nn.MSELoss()

将net与loss连接,这里使用MindSpore自带的Wrapper, `nn.WithLossCell`

In [6]:
net_with_loss = nn.WithLossCell(net, loss)

实例化一个`SGD`实例

In [7]:
optim = nn.SGD(net.trainable_params(), learning_rate=0.03)

将net,loss,optim连接，这里使用MindSpore自带的Wrapper, `nn.TrainOneStepCell`

In [8]:
trainer = nn.TrainOneStepCell(net_with_loss, optim)

训练过程代码与我们从零开始实现时所做的非常相似

In [9]:
num_epochs = 3
dataset_iter = dataset.create_tuple_iterator(num_epochs=num_epochs)
for epoch in range(num_epochs):
    for data in dataset_iter:
        l = trainer(*data)
    l = net_with_loss(mindspore.Tensor(data_iter.features), mindspore.Tensor(data_iter.labels))
    print(f'epoch {epoch + 1}, loss {l.asnumpy():f}')

epoch 1, loss 0.000261
epoch 2, loss 0.000100
epoch 3, loss 0.000099


比较生成数据集的真实参数和通过有限数据训练获得的模型参数

In [10]:
w = net[0].weight.data
print('w的估计误差：', true_w - w.reshape(true_w.shape))
b = net[0].bias.data
print('b的估计误差：', true_b - b)

w的估计误差： [Tensor(shape=[], dtype=Float32, value= -8.10623e-05)
 Tensor(shape=[], dtype=Float32, value= -0.000388145)]
b的估计误差： [0.00039816]
