In [15]:
import numpy as np
import torch
from torch.utils import data
from d2l import torch as d2l

In [16]:
true_w = torch.tensor([2,-3.4],dtype=torch.float32)
true_b = 4.2
features,labels = d2l.synthetic_data(true_w,true_b,1000)

In [33]:
from typing import Tuple
def load_array(data_arrays: Tuple[torch.Tensor], batch_size: int, is_train: bool = True):
  """构造pyTorch数据迭代器

  Args:
      data_arrays (Tuple[torch.Tensor]): 传入的数据。
      batch_size (int): 每批量的数据规模。
      is_train (bool, optional): 是否打乱数据. Defaults to True.

  Returns:
      _type_: pyTorch数据迭代器
  """
  dataset = data.TensorDataset(*data_arrays)
  return data.DataLoader(dataset, batch_size, shuffle=is_train)

batch_size = 10
data_iter = load_array((features,labels),batch_size)

In [21]:
from torch import nn
net = nn.Sequential(nn.Linear(2,1))

Linear(in_features=2, out_features=1, bias=True)

In [28]:
# 定义梯度下降的初始参数
net[0].weight.data.normal_(0,0.01)
net[0].bias.data.fill_(0)
net[0].bias.data

tensor([0.])

In [29]:
# 定义损失函数
loss = nn.MSELoss()

我们要训练的参数从`net.parameters()`中获得。

In [31]:
trainer = torch.optim.SGD(net.parameters(),lr=0.03)

In [41]:
num_epoches = 3
for epoch in range(num_epoches):
  for X,y in data_iter:
    l:torch.Tensor = loss(net(X),y)
    trainer.zero_grad()
    l.backward()
    # TODO 在进行反向传播之后，梯度自动保存到了w和b中下一步调用step()更新训练器中的参数?调用step函数之后,参数会进行更新到net中？
    trainer.step()
  l = loss(net(features),labels)
  print(f"epoch {epoch + 1},loss {l:f}")

epoch 1,loss 0.000097
epoch 2,loss 0.000096
epoch 3,loss 0.000096


In [42]:
w = net[0].weight.data
print('w的估计误差：', true_w - w.reshape(true_w.shape))
b = net[0].bias.data
print('b的估计误差：', true_b - b)

w的估计误差： tensor([0.0003, 0.0005])
b的估计误差： tensor([9.4414e-05])
