In [28]:
import torch
import numpy as np
from torch.nn import functional as F

In [29]:
# autograd.grad

# example 1
# loss = (y-(wx-b))**2 -> dl/dw,dl/db

x = torch.tensor([[1,2,3], 
                  [4,5,6]], dtype=torch.float32)
w = torch.ones(2, dtype=torch.float32)
b = torch.ones(1, dtype=torch.float32)

w.requires_grad_(), b.requires_grad_()

y = w@x + b

print(y)

tensor([ 6.,  8., 10.], grad_fn=<AddBackward0>)


In [30]:
x.shape, w.shape, b.shape, y.shape

(torch.Size([2, 3]), torch.Size([2]), torch.Size([1]), torch.Size([3]))

In [31]:
loss = F.mse_loss(torch.ones_like(y), y)
print(loss)

tensor(51.6667, grad_fn=<MseLossBackward0>)


In [32]:
dloss_dw = torch.autograd.grad(loss, [w], retain_graph=True)
dloss_db = torch.autograd.grad(loss, [b], retain_graph=True)

In [33]:
dloss_dw, dloss_db

((tensor([30.6667, 72.6667]),), (tensor([14.]),))

In [37]:
# loss.backward()

# example 2
# loss = (y-(wx-b))**2 -> dl/dw,dl/db

x = torch.tensor([[1,2,3], 
                  [4,5,6]], dtype=torch.float32)
w = torch.ones(2, dtype=torch.float32)
b = torch.ones(1, dtype=torch.float32)

w.requires_grad_(), b.requires_grad_()

y_hat = F.softmax(w@x + b)

print(y_hat)

tensor([0.0159, 0.1173, 0.8668], grad_fn=<SoftmaxBackward0>)


  y_hat = F.softmax(w@x + b)


In [41]:
y = torch.tensor([[3,6,9], 
                  [12,15,18]], dtype=torch.float32)

print(y)

tensor([[ 3.,  6.,  9.],
        [12., 15., 18.]])


In [43]:
loss = F.mse_loss(y_hat, y)

print(loss)

tensor(128.0533, grad_fn=<MseLossBackward0>)


  loss = F.mse_loss(y_hat, y)


In [44]:
loss.backward()

In [45]:
w.grad, b.grad

(tensor([-0.2506, -0.2506]), tensor([-1.4901e-08]))