In [1]:
import numpy as np
import torch
from torch.utils import data
from d2l import torch as d2l 

In [None]:
# 生成数据集
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = d2l.synthetic_data(true_w, true_b, 1000)

读取数据集

In [None]:
def load_array(data_arrays, batch_size, is_train=True):
    """构造一个Pytorch数据迭代器"""
    # *是解包运算符，将data_array这一可迭代对象(通常列表或元组)解包成多个独立的参数
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train) # 如果是训练集，需要打乱

In [4]:
batch_size = 10
data_iter = load_array((features, labels), batch_size)

In [None]:
next(iter(data_iter))   # iter函数用于生成迭代器，迭代器是一个可以记住遍历位置的对象，从集合第一个元素开始访问，直到所有元素被访问完
                        # 迭代器只能向前遍历，不能后退

[tensor([[-0.4285, -1.1982],
         [-0.2016, -0.2909],
         [-0.2131,  0.1792],
         [ 1.2691,  0.7763],
         [-0.7904,  0.9988],
         [ 0.7530, -0.2462],
         [ 0.0113, -0.3252],
         [-1.1479,  0.1706],
         [ 1.6251, -0.6387],
         [-1.3447,  0.1676]]),
 tensor([[ 7.4228],
         [ 4.7746],
         [ 3.1689],
         [ 4.1033],
         [-0.7694],
         [ 6.5334],
         [ 5.3275],
         [ 1.3350],
         [ 9.6345],
         [ 0.9475]])]

In [7]:
# 定义模型
from torch import nn

net = nn.Sequential(nn.Linear(2, 1)) # 输入两个特征，输出一个结果

In [24]:
# 初始化模型参数
# net[0] 选择网络中的第一层
net[0].weight.data.normal_(0, 0.01)  # 权重初始化为服从正态分布，平均值0，标准差0.01
net[0].bias.data.fill_(0)

tensor([0.])

In [None]:
# 定义损失函数
loss = nn.MSELoss() # 均方误差, 返回样本集的损失的平均值
# loss = nn.HuberLoss() # 胡伯尔损失

In [10]:
# 定义优化算法
trainer = torch.optim.SGD(net.parameters(), lr=0.03) # 小批量随机梯度下降算法

In [26]:
# 训练
num_epochs = 3
for epoch in range(num_epochs):
    for X, y in data_iter:
        l = loss(net(X), y)   # 标量
        trainer.zero_grad()
        l.backward()
        trainer.step()  # 执行一次优化步骤，根据梯度更新模型参数

    l = loss(net(features), labels)  # 一轮迭代后，此时模型损失值
    print(f'epoch {epoch + 1}, loss {l:f}')

epoch 1, loss 2.220452
epoch 2, loss 0.502706
epoch 3, loss 0.002743


In [12]:
w = net[0].weight.data
print('w的估计误差: ', true_w - w.reshape(true_w.shape))
b = net[0].bias.data
print('b的估计误差: ', true_b - b)

w的估计误差:  tensor([-0.0002,  0.0005])
b的估计误差:  tensor([-0.0002])
