In [1]:
%matplotlib inline
from IPython import display
from matplotlib import pyplot as plt
import torch
import random

In [2]:
num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
features = torch.randn(num_examples, num_inputs)
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += 0.01 * torch.randn(labels.shape)

In [4]:
def use_svg_display():
    display.set_matplotlib_formats('svg')
    
def set_figsize(figsize=(3.5, 2.5)):
    use_svg_display()
    plt.rcParams['figure.figsize'] = figsize
    
set_figsize()
plt.scatter(features[:, 1].numpy(), labels.numpy(), 1);

<Figure size 252x180 with 1 Axes>

In [6]:
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        j = torch.tensor(indices[i:min(i + batch_size, num_examples)])
        yield features[j], labels[j]

In [7]:
batch_size = 10
for X, y in data_iter(batch_size, features, labels):
    print(X, y)
    break

tensor([[ 0.3597,  0.4132],
        [-1.1064,  0.5253],
        [ 2.0192,  0.4825],
        [ 0.2590, -1.3935],
        [ 0.3399, -0.1918],
        [ 0.3968,  0.9032],
        [-0.1567, -1.6845],
        [ 0.1436, -1.4471],
        [-0.6682, -0.5580],
        [-1.4976,  0.3910]]) tensor([ 3.5197,  0.2082,  6.6115,  9.4515,  5.5397,  1.9389,  9.6152,  9.3998,
         4.7456, -0.1244])


In [8]:
w = torch.randn((num_inputs)) * 0.01
w.requires_grad = True
b = torch.zeros(1, requires_grad=True)

In [9]:
def linreg(X, w, b):
    return X @ w + b

In [10]:
def squared_loss(y_hat, y):
    return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2

In [11]:
def sgd(params, lr, batch_size):
    for param in params:
        param[:] = param - lr * param.grad / batch_size

In [12]:
lr = 0.03
num_epochs = 3
net = linreg
loss = squared_loss

for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        l = loss(net(X, w, b), y)
        l.sum().backward()
        with torch.no_grad():
            sgd([w, b], lr, batch_size)
            w.grad.zero_()
            b.grad.zero_()
        
    train_l = loss(net(features, w, b), labels)
    print('epoch %d, loss %f' % (epoch + 1, train_l.mean().detach().numpy()))

epoch 1, loss 0.047507
epoch 2, loss 0.000191
epoch 3, loss 0.000047


In [13]:
true_w, w

([2, -3.4], tensor([ 1.9994, -3.3989], requires_grad=True))

In [14]:
true_b, b

(4.2, tensor([4.1992], requires_grad=True))