In [9]:
""""
Autograd 
 -> automatic differentiation for all operations on Tensors
 -> engine for computing the vector-Jacobian product.
 -> compute partial derivates while applying the chain rule
 -> set requires_grad = True
"""

import torch

# require_grad = True -> tracks all operations on the tensor.
x = torch.randn(3, requires_grad=True)
y = x + 2

# y was created as a result of an operation, so it has a grad_fn attribute.
# grad_fn: references a Function that has created the Tensor.
print(x) # created by the user -> grad_fn is None
print(y)
print(y.grad_fn)


tensor([-1.4845, -0.2887,  2.1277], requires_grad=True)
tensor([0.5155, 1.7113, 4.1277], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x000002BF2D571D50>


In [10]:
# Do more operations on y
z = y * y * 3
print(z)
z = z.mean()
print(z)

tensor([ 0.7972,  8.7855, 51.1146], grad_fn=<MulBackward0>)
tensor(20.2324, grad_fn=<MeanBackward0>)


In [11]:
"""
Let's compute the gradients with backpropagation
When we finish our computation we can call .backward() and have all the gradients computed automatically.
The gradient for this tensor will be accumulated into .grad attribute.
It is partial derivate of the function w.r.t. the tensor
"""

print(x.grad)
z.backward()
print(x.grad)

"""
!!!Careful!!! backward() accumulates the gradient for this tensor into .grad attribute.
we need to set the gradients to zero -> when we for loop for each epoch
!!!We need to be careful during optimization !!! optimizer.zero_grad() -> set gradient to zero
"""
pass

None
tensor([1.0310, 3.4226, 8.2555])


In [None]:
# Stop a tensor from tracking history
"""
Stop a tensor from tracking history
 -> during the training loop when we want to update our weights
 -> after training during evaluation
 
 these operations should not be part of the gradient computation. To prevent this we can use:
  * x.requires_grad_(False)
  * x.detach()
  * warp in -> with torch.no_grad():
"""

In [19]:
# .requires_grad_(...) changes an existing flag in-place.
a = torch.randn(2,2)
b = (a * a).sum()
print(a.requires_grad)
print(b.grad_fn)

print()

a.requires_grad_(True)
b = (a * a).sum()
print(a.requires_grad)
print(b.grad_fn)
print()

a.requires_grad_(False)
b = (a * a).sum()
print(a.requires_grad)
print(b.grad_fn)

False
None

True
<SumBackward0 object at 0x000002BF2D918F10>

False
None


In [17]:
# .detach(): get a new Tensor with the same content but no gradient computation:
a = torch.randn(2,2, requires_grad=True)
b = a.detach()

print(a)
print(a.requires_grad)

print(b)
print(b.requires_grad)

tensor([[-1.5631, -0.9173],
        [-0.1732, -0.9280]], requires_grad=True)
True
tensor([[-1.5631, -0.9173],
        [-0.1732, -0.9280]])
False


In [8]:
import torch

# wrap in "with torch.no_grad():"
a = torch.randn(2,2,requires_grad=True)
print(a.requires_grad)
# print(a.grad)
# print(a)

with torch.no_grad():
    b = a**2
    print(b.requires_grad)
    
# print(a.grad)
# print(b)

tensor([[-0.7098, -0.2711],
        [-0.6618, -1.2170]], requires_grad=True)
tensor([[0.5039, 0.0735],
        [0.4380, 1.4810]])
tensor([[0.5039, 0.0735],
        [0.4380, 1.4810]])


In [11]:
c = torch.randn(3,3,requires_grad=True)
print(c)
with torch.no_grad():
    c = c **2
    print(c)
print(c)



tensor([[-0.1650, -1.1990,  0.4117],
        [-1.5180,  0.1664,  0.3169],
        [ 1.5767,  0.0829,  1.3169]], requires_grad=True)
tensor([[0.0272, 1.4375, 0.1695],
        [2.3043, 0.0277, 0.1004],
        [2.4860, 0.0069, 1.7342]])
tensor([[0.0272, 1.4375, 0.1695],
        [2.3043, 0.0277, 0.1004],
        [2.4860, 0.0069, 1.7342]])


In [23]:
c = torch.randn(1,1,requires_grad=True)
print("c:",c)
x = c*2
print("x:",x)

with torch.no_grad():
    c = c **2
    c.requires_grad_(True)
    print(c)

print("c:",c)
print("x:",x)

c: tensor([[-0.4490]], requires_grad=True)
x: tensor([[-0.8980]], grad_fn=<MulBackward0>)
c grad: tensor([[2.]])
x: tensor([[-0.8980]], grad_fn=<MulBackward0>)
