In [1]:
import torch

x = torch.ones(5)  # input tensor
y = torch.zeros(3)  # expected output
x, y

(tensor([1., 1., 1., 1., 1.]), tensor([0., 0., 0.]))

In [2]:
w = torch.randn(5, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)

In [None]:
z = torch.matmul(x, w) + b # x @ w-> (5,) @ (5, 3)
z.shape

torch.Size([3])

In [7]:
z

tensor([-2.3351,  3.4849,  0.8461], grad_fn=<AddBackward0>)

In [14]:
z.grad_fn

<AddBackward0 at 0x740ebd696d40>

In [9]:

loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)
loss

tensor(1.6035, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)

In [16]:
loss.grad_fn


<BinaryCrossEntropyWithLogitsBackward0 at 0x740fb567d960>

In [17]:
loss.backward()

In [18]:
w.grad

tensor([[0.0294, 0.3234, 0.2332],
        [0.0294, 0.3234, 0.2332],
        [0.0294, 0.3234, 0.2332],
        [0.0294, 0.3234, 0.2332],
        [0.0294, 0.3234, 0.2332]])

In [19]:
b.grad

tensor([0.0294, 0.3234, 0.2332])

In [20]:
z = torch.matmul(x, w)+b
print(z.requires_grad)

with torch.no_grad():
    z = torch.matmul(x, w)+b
print(z.requires_grad)

True
False


In [22]:
z = torch.matmul(x, w)+b
z_det = z.detach()
print(z_det.requires_grad)

False


In [23]:
inp = torch.eye(4, 5, requires_grad=True)
inp

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.]], requires_grad=True)

In [27]:
out = (inp+1).pow(2).t()
out

tensor([[4., 1., 1., 1.],
        [1., 4., 1., 1.],
        [1., 1., 4., 1.],
        [1., 1., 1., 4.],
        [1., 1., 1., 1.]], grad_fn=<TBackward0>)

In [29]:
out.backward(torch.ones_like(out), retain_graph=True)

In [31]:
print(f"First call\n{inp.grad}")

First call
tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.]])


In [32]:
inp = torch.eye(4, 5, requires_grad=True)
out = (inp+1).pow(2).t()
out.backward(torch.ones_like(out), retain_graph=True)
print(f"First call\n{inp.grad}")
out.backward(torch.ones_like(out), retain_graph=True)
print(f"\nSecond call\n{inp.grad}")
inp.grad.zero_()
out.backward(torch.ones_like(out), retain_graph=True)
print(f"\nCall after zeroing gradients\n{inp.grad}")

First call
tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.]])

Second call
tensor([[8., 4., 4., 4., 4.],
        [4., 8., 4., 4., 4.],
        [4., 4., 8., 4., 4.],
        [4., 4., 4., 8., 4.]])

Call after zeroing gradients
tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.]])


In [33]:
import torch

x = torch.tensor(2.0, requires_grad=True)
y = x*3
z = y**2
z.backward()
print(f"x: {x}, x.grad: {x.grad}")

x: 2.0, x.grad: 36.0


```
dz/dy = 2*y
dz/dx = dz/dy * dy/dx (chain rule)
dy/dx = 3


x.grad --> dz/dx = 2* y * 3 = 2 * (6) * 3 = 36
y.grad --> dz/dy = 2 * 6 = 12
```


In [35]:
y.grad

  y.grad


In [34]:
print(f"y: {y}, y.grad: {y.grad}")


y: 6.0, y.grad: None


  print(f"y: {y}, y.grad: {y.grad}")
