In [131]:
import numpy as np
import torch
from torch.utils import data
from d2l import torch as d2l

In [132]:
true_w = torch.tensor([2., -3.4])
true_b = torch.tensor(4.2)
num_examples = 1000

features, labels = d2l.synthetic_data(true_w, true_b, num_examples)

In [133]:
def load_array(data_arrays, batch_size, is_train=True):
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)


batch_size = 10
data_iter = load_array((features, labels), batch_size)

next(iter(data_iter))

[tensor([[ 1.9510, -0.7591],
         [-1.7607, -0.5033],
         [-1.3429, -0.3691],
         [ 0.0261, -0.8050],
         [ 0.5833,  1.3862],
         [ 0.1054, -1.8401],
         [-0.5974,  0.2927],
         [-0.3062, -0.7229],
         [ 0.8274,  0.3321],
         [-0.9414, -0.6336]]),
 tensor([[10.6537],
         [ 2.3915],
         [ 2.7816],
         [ 6.9881],
         [ 0.6572],
         [10.6729],
         [ 2.0062],
         [ 6.0278],
         [ 4.7404],
         [ 4.4652]])]

In [134]:
from torch import nn

net = nn.Sequential(nn.Linear(2, 1))

In [135]:
net[0].weight.data.normal_(0, 0.01)
net[0].bias.data.fill_(0)

tensor([0.])

In [136]:
loss = nn.HuberLoss()
trainer = torch.optim.SGD(net.parameters(), lr=0.06)

In [137]:
num_epochs = 3
for epoch in range(num_epochs):
    for X, y in data_iter:
        l = loss(net(X), y)
        trainer.zero_grad()
        l.backward()
        print(f'w grad {net[0].weight.grad}, b grad {net[0].bias.grad}')
        trainer.step()
    l = loss(net(features), labels)
    print(f'epoch {epoch + 1}, loss {l:f}')

w grad tensor([[-0.1874,  0.6536]]), b grad tensor([-0.8000])
w grad tensor([[-0.5618,  0.3832]]), b grad tensor([-0.7664])
w grad tensor([[0.1323, 0.6826]]), b grad tensor([-0.7816])
w grad tensor([[-0.1349, -0.1279]]), b grad tensor([-0.8000])
w grad tensor([[-0.8784,  0.0983]]), b grad tensor([-0.5586])
w grad tensor([[0.1221, 0.4159]]), b grad tensor([-0.4829])
w grad tensor([[-0.3878,  0.8023]]), b grad tensor([-0.4000])
w grad tensor([[-0.4211,  0.6969]]), b grad tensor([-0.9840])
w grad tensor([[-0.5804,  0.5607]]), b grad tensor([-0.6000])
w grad tensor([[-0.4410,  0.1210]]), b grad tensor([-0.7023])
w grad tensor([[0.3463, 0.2812]]), b grad tensor([-0.7125])
w grad tensor([[-0.2648,  0.3172]]), b grad tensor([-0.2621])
w grad tensor([[-0.1386,  0.1788]]), b grad tensor([-0.9525])
w grad tensor([[-0.5545,  0.7228]]), b grad tensor([-0.6000])
w grad tensor([[-0.5746,  0.6766]]), b grad tensor([-0.8610])
w grad tensor([[-0.3995,  0.1589]]), b grad tensor([-0.6850])
w grad tensor(