In [26]:

"""
   In the forward phase, the autograd tape will remember all the operations it executed, 
   and in the backward phase, it will replay the operations.
   
   - requires_grad=True -> all computation will be tracked
                        -> after computing backward, a gradient w.r.t this tensor is accumulated into .grad
   - Function : interconnected and build up an acycle graph, that encodes a complete history of computation
                   -> return grad_fn
                   
   - Derivatives : call .backward() on tensor 
                   - for scalar no args, others -> put args of grad_args(ex, output values)
"""

'\n   In the forward phase, the autograd tape will remember all the operations it executed, \n   and in the backward phase, it will replay the operations.\n   \n   - requires_grad=True -> all computation will be tracked\n                        -> after computing backward, a gradient w.r.t this tensor is accumulated into .grad\n   - Function : interconnected and build up an acycle graph, that encodes a complete history of computation\n                   -> return grad_fn\n                   \n   - Derivatives : call .backward() on tensor \n                   - for scalar no args, others -> put args of grad_args(ex, output values)\n'

In [27]:
import torch

In [28]:
x = torch.ones(2,2, requires_grad=True)
print(x)
print(x.data) # with only values of tensor
print(x.grad)
print(x.grad_fn)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
tensor([[1., 1.],
        [1., 1.]])
None
None


In [29]:
# Do operation to x

y = x*2
print(y)
print(y.grad)
print(y.grad_fn)


print()

z = y*y*3
out = z.mean()
print(z, out, out.grad_fn, out.grad)

tensor([[2., 2.],
        [2., 2.]], grad_fn=<MulBackward>)
None
<MulBackward object at 0x7fa83b0ea908>

tensor([[12., 12.],
        [12., 12.]], grad_fn=<MulBackward>) tensor(12., grad_fn=<MeanBackward1>) <MeanBackward1 object at 0x7fa83b0ea978> None


In [30]:
# defaut requires_grad to tensor is FALSE

a = torch.randn(2,2)
a = ((a*3)/a-1)
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = a*a
print(b.requires_grad)
b.requires_grad_(False)
print(b.requires_grad)

False
True
True


RuntimeError: you can only change requires_grad flags of leaf variables. If you want to use a computed variable in a subgraph that doesn't require differentiation use var_no_grad = var.detach().

In [31]:
"""
    Gradients
"""
out.backward()
print(x.grad)

tensor([[6., 6.],
        [6., 6.]])


In [32]:
out.backward()
print(x.grad)

RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.

In [None]:
"""
    By default, gradient computation flushes all the internal buffers contained in the graph,
    so if you even want to do the backward on some part of the graph twice,
    you need to pass in retain_variables = True during the first pass.
    
    To reduce memory usage, during the .backward() call, all the intermediary results are deleted
    when they are not needed anymore. Hence if you try to call .backward() again, 
    the intermediary results don’t exist and the backward pass cannot be performed 
    (and you get the error you see).
    You can call .backward(retain_graph=True) to make a backward pass that will
    not delete intermediary results, and so you will be able to call .backward() again.
    All but the last call to backward should have the retain_graph=True option.\
"""

In [34]:
x = torch.ones(2, 2, requires_grad=True)
y = x + 2
y.backward(torch.ones(2, 2))
# the retain_variables flag will prevent the internal buffers from being freed
print(x.grad)

tensor([[1., 1.],
        [1., 1.]])


In [23]:
x = torch.ones(2, 2, requires_grad=True)
y = x + 2
y.backward(torch.ones(2, 2), retain_graph=True)
# the retain_variables flag will prevent the internal buffers from being freed
print(x.grad)

tensor([[1., 1.],
        [1., 1.]])


In [35]:
gradient = torch.randn(2,2)

y.backward()

RuntimeError: grad can be implicitly created only for scalar outputs

In [40]:
print(y.size())
y.backward(gradient)
print(x.grad)

torch.Size([2, 2])
tensor([[-1.0540, -0.5899],
        [ 1.1447, -1.4761]])
