In [1]:
import torch

In [25]:
data = torch.tensor([[73, 67, 43],
                    [91, 88, 64],
                    [87, 134, 58],
                    [102, 43, 37],
                    [96, 69, 70]], dtype=torch.float32)

In [17]:
targets = torch.tensor([[56, 70],
                        [81, 101],
                        [119, 133],
                        [22, 37],
                        [103, 119]], dtype=torch.float32)

In [20]:
w = torch.randn(2, 3, requires_grad=True, dtype=torch.float32)
b = torch.randn(2, requires_grad=True, dtype=torch.float32)

def model(x):
    return x @ w.t() + b

In [23]:
b

tensor([1.8404, 0.9305], requires_grad=True)

In [26]:
model(data)

tensor([[143.1101, 125.3820],
        [189.5668, 155.5463],
        [331.1159, 119.7950],
        [ 51.6976, 199.0910],
        [128.1172, 178.0680]], grad_fn=<AddBackward0>)

In [29]:
def mse(preds, targets):
    diff = preds - targets
    return torch.sum(diff * diff) / diff.numel()

In [43]:
loss = mse(model(data), targets)
loss

tensor(7340.7329, grad_fn=<DivBackward0>)

In [44]:
loss.backward()

In [34]:
w

tensor([[-0.7107,  2.9930, -0.1716],
        [ 2.1117, -0.5682,  0.1946]], requires_grad=True)

In [45]:
lr = 1e-4
with torch.no_grad():
    w -= w.grad * lr
    b -= b.grad * lr

In [46]:
mse(model(data), targets)

tensor(5460.6235, grad_fn=<DivBackward0>)

In [38]:
w.grad

tensor([[8026.6211, 9364.7734, 5170.7500],
        [6012.3164, 3557.3611, 3047.7263]])

In [42]:
b.grad.zero_()

tensor([0., 0.])

In [41]:
.grad

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [47]:
import torch.nn as nn

In [48]:
model = nn.Linear(3, 2)

In [54]:
list(model.parameters())

[Parameter containing:
 tensor([[-0.5261, -0.2469, -0.3408],
         [-0.3165, -0.1057,  0.0958]], requires_grad=True),
 Parameter containing:
 tensor([0.4363, 0.4240], requires_grad=True)]

In [55]:
from torch.nn import functional as F

In [57]:
loss_fn = F.mse_loss

In [60]:
loss_fn(model(data), targets)

tensor(22216.6992, grad_fn=<MseLossBackward0>)

In [66]:
optim = torch.optim.SGD(model.parameters(), lr=1e-6, momentum=0.9)

In [81]:
pred = model(data)
loss = loss_fn(pred, targets)
loss.backward()
print(loss)
optim.step()
optim.zero_grad()

tensor(302.9650, grad_fn=<MseLossBackward0>)
