In [12]:
import torch
import numpy as np

x = torch.tensor([3.], dtype=torch.float64, requires_grad=False)
y = torch.exp(x)

# see if gradient is enabled
print(x.requires_grad) # False
print(y.requires_grad) # False

False
False


later, could change x to requires_grad=True

In [13]:
x.requires_grad_(True)
y = torch.exp(x)
print(x.requires_grad) # True
print(y.requires_grad) # True

True
True


now, we compute the gradient of y w.r.t x

In [14]:
y.backward()
# unaccessible as it is not the leaf tensor (on computation graph)
print(y.grad)

None


  print(y.grad)


In [16]:
# instead, grad is only available for leaf tensor
print(x.grad)
# manually compute the derivative: d/dx(exp(x))=exp(x) still
np.exp(3.) 

tensor([20.0855], dtype=torch.float64)


20.085536923187668

The argument of `backward()` function

From documentation, `backward()` has following arguments:

backward(gradient=None, retain_graph=None, create_graph=False, inputs=None):
    gradient (Tensor or None): Gradient w.r.t. the
        tensor. If it is a tensor, it will be automatically converted
        to a Tensor that does not require grad unless ``create_graph`` is True.
        None values can be specified for scalar Tensors or ones that
        don't require grad. If a None value would be acceptable then
        this argument is optional.

In [18]:
# the computation graph by default is not retained, so the backward() cannot be repeated.
y.backward()
# 

RuntimeError: Trying to backward through the graph a second time (or directly access saved variables after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved variables after calling backward.

In [19]:
# derivative of vector-/tensor-valued function
xx = torch.arange(4, dtype=torch.float32, requires_grad=True)
yy = torch.pow(xx, 2) # square func

In [21]:
print(yy)

yy.backward() # this cause error since yy now is not scalar

tensor([0., 1., 4., 9.], grad_fn=<PowBackward0>)


RuntimeError: grad can be implicitly created only for scalar outputs

In [22]:
# by default, should do this:
yy.backward(torch.ones_like(yy))
print(xx.grad) # should return value same as 2*xx

tensor([0., 2., 4., 6.])


### More generally, `yy.backward()` could be used as computing the derivative of multiple loss function with different **weight**. When conducting `yy.backward(torch.ones_like(yy))` as above, the default is multiple loss are having the same weight. Alternatively, one could do:

In [25]:
print(f'yy is leaf node?', yy.is_leaf)
print(f'xx is leaf node?', xx.is_leaf)

yy is leaf node? False
xx is leaf node? True


In [35]:
xx = torch.arange(4, dtype=torch.float32, requires_grad=True)
yy = torch.pow(xx, 2) # square func
print(f'yy = {yy}')
yy.backward(torch.tensor([0.01, 0.1, 1., 10.]))
print(xx.grad)

yy = tensor([0., 1., 4., 9.], grad_fn=<PowBackward0>)
tensor([ 0.0000,  0.2000,  4.0000, 60.0000])


### Another thing I observed is, when do not re-initialize `xx` but doing `yy=torch.pow(xx, 2)`, then the derivative changes.

In [36]:
xx = torch.arange(4, dtype=torch.float32, requires_grad=True)
for i in range(5):
    yy = torch.pow(xx, 2)
    print(f'xx={xx} and yy={yy}')
    yy.backward(torch.tensor([0.01, 0.1, 1., 10.]))
    print(f'{i}-th gradient is {xx.grad}')

xx=tensor([0., 1., 2., 3.], requires_grad=True) and yy=tensor([0., 1., 4., 9.], grad_fn=<PowBackward0>)
0-th gradient is tensor([ 0.0000,  0.2000,  4.0000, 60.0000])
xx=tensor([0., 1., 2., 3.], requires_grad=True) and yy=tensor([0., 1., 4., 9.], grad_fn=<PowBackward0>)
1-th gradient is tensor([  0.0000,   0.4000,   8.0000, 120.0000])
xx=tensor([0., 1., 2., 3.], requires_grad=True) and yy=tensor([0., 1., 4., 9.], grad_fn=<PowBackward0>)
2-th gradient is tensor([  0.0000,   0.6000,  12.0000, 180.0000])
xx=tensor([0., 1., 2., 3.], requires_grad=True) and yy=tensor([0., 1., 4., 9.], grad_fn=<PowBackward0>)
3-th gradient is tensor([  0.0000,   0.8000,  16.0000, 240.0000])
xx=tensor([0., 1., 2., 3.], requires_grad=True) and yy=tensor([0., 1., 4., 9.], grad_fn=<PowBackward0>)
4-th gradient is tensor([  0.,   1.,  20., 300.])


Seems `xx.grad` is added by each time `backward()`.