In [46]:
import torch
x = torch.ones(5)
y = torch.zeros(3)

w = torch.rand(5,3, requires_grad=True)
b = torch.rand(3, requires_grad=True)

z = torch.matmul(x, w)+b

In [47]:
loss = torch.nn.functional.binary_cross_entropy_with_logits(z,y)

In [48]:
loss

tensor(3.0605, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)

In [49]:
print('Gradient function for z =', z.grad_fn)
print('Gradient function for loss =', loss.grad_fn)

Gradient function for z = <AddBackward0 object at 0x1621f7f70>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward0 object at 0x1621f7700>


In [50]:
loss.backward()
print(w.grad)
print(b.grad)

tensor([[0.3154, 0.3013, 0.3267],
        [0.3154, 0.3013, 0.3267],
        [0.3154, 0.3013, 0.3267],
        [0.3154, 0.3013, 0.3267],
        [0.3154, 0.3013, 0.3267]])
tensor([0.3154, 0.3013, 0.3267])


In [55]:
# we can disable the gradient tracking by

with torch.no_grad():
    z = torch.matmul(x,w)+b

z.requires_grad

# z.detach can be also used
# z.detach()

# this is used when freezin part of the network
# it also speed up computations in testing time

False

In [56]:
# we can also use it to compute jacobian

In [58]:
inputs = torch.eye(5, requires_grad=True)
out = (inputs + 1) **2

In [60]:
# specify the shape of backward for scalar-value functions
out.backward(torch.ones_like(inputs), retain_graph=True)

print("First call\n", inputs.grad)

First call
 tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.],
        [2., 2., 2., 2., 4.]])


In [61]:
out.backward(torch.ones_like(inputs), retain_graph=True)
print("Second call\n", inputs.grad)

Second call
 tensor([[8., 4., 4., 4., 4.],
        [4., 8., 4., 4., 4.],
        [4., 4., 8., 4., 4.],
        [4., 4., 4., 8., 4.],
        [4., 4., 4., 4., 8.]])


In [63]:
# we need to zero the grad otherwise it accumulates
inputs.grad.zero_()
out.backward(torch.ones_like(inputs), retain_graph=True)
print("\nCall after zeroing gradients\n", inputs.grad)


Call after zeroing gradients
 tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.],
        [2., 2., 2., 2., 4.]])
