In [12]:
import random
import torch
from d2l import torch as d2l

In [13]:
# 生产数据集
def synthetic_data(w,b,num_examples):
    X = torch.normal(0,1,(num_examples,len(w)))
    Y = torch.matmul(X, w) + b 
    Y += torch.normal(0, 0.01, Y.shape)
    return X, Y.reshape((-1,1))

PyTorch 的 torch.matmul 有一套广播兼容规则，专门处理 “高维张量与低维张量” 的乘法，核心逻辑是：
将 1 维张量（向量）自动 “适配” 为与高维张量兼容的形状，前提是维度长度匹配。

In [14]:
true_w = torch.tensor([20.0,-5.0])
true_b = 10.0
features,labels = synthetic_data(true_w,true_b,1000)
print(features[:5],'\n',labels[:5])

tensor([[ 0.0858,  0.3810],
        [-0.3824, -0.4119],
        [-0.4348, -0.2424],
        [-0.9042,  0.0497],
        [ 2.7399, -1.0111]]) 
 tensor([[ 9.8119],
        [ 4.4183],
        [ 2.5063],
        [-8.3395],
        [69.8503]])


In [15]:
# 读取数据集
def data_iter(batch_size, features, labels):
    num = len(features)
    indices = list(range(num))
    random.shuffle(indices)
    for i in range(0, num, batch_size):
        batch_indices = torch.tensor(indices[i:min(i + batch_size, num)])
        yield features[batch_indices],labels[batch_indices]

range(num) 会生成一个包含 0, 1, 2, ..., num-1 的可迭代对象
list(range(num)) 将这个可迭代对象转换为列表形式

与 PyTorch 张量的索引机制兼容：
当你使用features[batch_indices]这样的索引操作时，PyTorch 的张量需要接收 tensor 类型的索引，而不是普通的 Python 列表。如果直接使用列表作为索引，可能会导致错误或不符合预期的结果。
支持 GPU 加速：
如果后续需要将数据迁移到 GPU 上进行计算，tensor 类型的索引可以与 GPU 上的张量无缝配合，而 Python 列表则需要先转换为 tensor 才能在 GPU 上使用。

In [16]:
batch_size = 10
for X, y in data_iter(batch_size, features, labels):
    print(X, '\n', y)
    break

tensor([[-0.0693, -0.9389],
        [ 0.1107,  1.4232],
        [ 0.5632,  0.0621],
        [-0.2045,  1.2542],
        [ 1.0693, -1.6508],
        [-0.4675, -0.3359],
        [ 2.2012,  1.7287],
        [ 0.5624,  0.8335],
        [ 1.1232,  1.4098],
        [-0.4698, -0.8444]]) 
 tensor([[13.3274],
        [ 5.0813],
        [20.9632],
        [-0.3705],
        [39.6374],
        [ 2.3237],
        [45.3884],
        [17.0877],
        [25.4099],
        [ 4.8086]])


In [17]:
# 初始化模型参数
w = torch.normal(0, 0.01, size = (2, 1), requires_grad = True)
b = torch.zeros(1, requires_grad = True)

In [18]:
# 模型
def linreg(X, w, b):
    return torch.matmul(X, w) + b

In [19]:
# 损失函数 均方损失
def squared_loss(y_hat, y):
    return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2

In [20]:
# 优化算法
def sgd(params, lr, batch_size):
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad
            param.grad.zero_()

In [21]:
# 训练
lr = 0.03
num_epochs = 3
net = linreg
loss = squared_loss
for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        l = loss(net(X, w, b), y)
        l.mean().backward()
        sgd([w, b], lr, batch_size)
    with torch.no_grad():
        train_l = loss(net(features, w, b), labels)
        print(f'epoch {epoch + 1}, loss {float(train_l.mean()):.8f}')
        print(f'w: {w},b: {b}')


epoch 1, loss 0.55139697
w: tensor([[19.1314],
        [-4.5967]], requires_grad=True),b: tensor([9.5636], requires_grad=True)
epoch 2, loss 0.00135584
w: tensor([[19.9604],
        [-4.9726]], requires_grad=True),b: tensor([9.9803], requires_grad=True)
epoch 3, loss 0.00005603
w: tensor([[19.9980],
        [-4.9979]], requires_grad=True),b: tensor([9.9989], requires_grad=True)


In [22]:
print(true_w - w.reshape(true_w.shape), w) # 把二维列向量，转化为一维张量
print(true_b - b, b)

tensor([ 0.0020, -0.0021], grad_fn=<SubBackward0>) tensor([[19.9980],
        [-4.9979]], requires_grad=True)
tensor([0.0011], grad_fn=<RsubBackward1>) tensor([9.9989], requires_grad=True)
