# 2.2 线性回归的简单实现

## 2.2.1 生成数据集

In [25]:
import numpy as np
import torch
from torch.utils import data
from d2l import torch as d2l

true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = d2l.synthetic_data(true_w, true_b, 1000)

## 2.2.2 读取数据集

将features和labels作为API的参数传递

In [26]:
def load_array(data_arrays,batch_size,is_train=True):
    """构造一个PyTorch数据迭代器"""
    dataset=data.TensorDataset(*data_arrays)#dataset得到数据后
    return data.DataLoader(dataset,batch_size,shuffle=is_train)#可以调用dataloader这个函数，从dataset中得到batch_size个数据

batch_size=10
data_iter=load_array((features,labels),batch_size)#把features,labels作为一个list输入为data_arrays

next(iter(data_iter))

[tensor([[-0.2542,  0.9732],
         [ 1.5512,  0.4119],
         [ 0.4716,  1.3830],
         [ 0.6640,  1.8432],
         [ 0.1621,  0.0192],
         [ 0.2439,  0.3238],
         [ 0.0871, -0.0907],
         [-0.1809, -1.3449],
         [ 1.0176,  0.1230],
         [-0.3806, -1.4888]]),
 tensor([[ 0.3737],
         [ 5.9128],
         [ 0.4385],
         [-0.7406],
         [ 4.4632],
         [ 3.5822],
         [ 4.6833],
         [ 8.4108],
         [ 5.8094],
         [ 8.5132]])]

## 2.3.3 定义模型

In [27]:
#  `nn`是神经网络的缩写
from torch import nn

net = nn.Sequential(nn.Linear(2, 1))#输入的维是2，输出是1

## 2.3.4 初始化模型

In [28]:
net[0].weight.data.normal_(0,0.01)
net[0].bias.data.fill_(0)

tensor([0.])

以上两个模块等价于实现w，b和network的function

## 2.3.5 计算均方误差

In [29]:
loss=nn.MSELoss()

## 2.3.6 实例化SGD实例

In [30]:
trainer=torch.optim.SGD(net.parameters(),lr=0.03)#parameters里面包含了w,b

## 2.3.7 训练

In [31]:
num_epochs = 3
for epoch in range(num_epochs):
    for X, y in data_iter:
        l = loss(net(X) ,y)
        trainer.zero_grad()# 优化器，先把梯度清零
        l.backward()# 自动求sum
        trainer.step()# 有了梯度之后，调用step函数，进行更新
    l = loss(net(features), labels)
    print(f'epoch {epoch + 1}, loss {l:f}')

epoch 1, loss 0.000287
epoch 2, loss 0.000107
epoch 3, loss 0.000107
