# 3.3 线性回归的简洁实现

# 3.3.1 生成数据集

In [1]:
import numpy as np
import torch
from torch.utils import data
from d2l import torch as d2l

true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = d2l.synthetic_data(true_w, true_b, 1000)

# 3.3.2 读取数据集

In [2]:
"""
构造一个PyTorch数据迭代器DataLoader
torch.utils.data.DataLoader(dataset类对象, batch_size=1, shuffle=False, sampler=None,
    batch_sampler=None, num_workers=0, collate_fn=None,
    pin_memory=False, drop_last=False, timeout=0,
    worker_init_fn=None)
dataset类---图形式/迭代器形式
    1.可以用data.TensorDataset直接打包
    2.创建类装入两个列表, 用getitem()返回
"""
def load_array(data_arrays, batch_size, is_train=True):  #@save
    """构造一个PyTorch数据迭代器"""
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)

batch_size = 10
data_iter = load_array((features, labels), batch_size)

# 3.3.3 定义模型

In [3]:
# nn是神经网络的缩写
from torch import nn

net = nn.Sequential(nn.Linear(2, 1))

# 3.3.4 初始化模型参数

In [4]:
# 通过net[i]访问神经网络中第i+1个图层，可直接设置其参数weight, bias
net[0].weight.data.normal_(0, 0.01)
net[0].bias.data.fill_(0)

tensor([0.])

# 3.3.5 定义损失函数

In [5]:
loss = nn.MSELoss()

# 3.3.6 定义优化算法

In [11]:
# 优化器参数：opfunc(返回cost以及cost导数)、x（参数net.parameters()）、配置（如lr）、state
trainer = torch.optim.SGD(net.parameters(), lr = 0.03)

# 3.3.7 训练

In [9]:
num_epochs = 3
for epoch in range(num_epochs):
    for X, y in data_iter:
        l = loss(net(X) ,y)
        trainer.zero_grad()                   # 新一批次梯度清零
        l.backward()                          # 反响传播
        trainer.step()                        # 更新参数parameters
    l = loss(net(features), labels)
    print(f'epoch {epoch + 1}, loss {l:f}')

epoch 1, loss 0.000100
epoch 2, loss 0.000100
epoch 3, loss 0.000101


In [10]:
w = net[0].weight.data
print('w的估计误差：', true_w - w.reshape(true_w.shape))
b = net[0].bias.data
print('b的估计误差：', true_b - b)

w的估计误差： tensor([0.0002, 0.0008])
b的估计误差： tensor([-0.0003])


# 3.3.8 小结

# 3.3.9 练习