In [1]:
import torch

In [2]:
tensor1 = torch.Tensor([[1,2,3],[4,5,6]])
tensor1

tensor([[1., 2., 3.],
        [4., 5., 6.]])

In [3]:
tensor2 = torch.Tensor([[7,8,9],[10,11,12]])
tensor2

tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])

In [4]:
# true -> PyTorch tracks computations for a tensor in the forward phase and will calculate gradients for this tensor in the backward phase
# we need to enabled it so the history is tracked for the tensor and gradients are calculated with respect to the tensor
tensor1.requires_grad

False

In [5]:
tensor1.requires_grad_()

tensor([[1., 2., 3.],
        [4., 5., 6.]], requires_grad=True)

In [6]:
tensor1.requires_grad

True

In [7]:
# no gradients available yet -> though it's part of a computation graph, no forward or backward passes been made yet
print(tensor1.grad)

None


In [8]:
# tensors and functions in a computation graph (tensors = nodes; functions = edges)
print(tensor1.grad_fn)

None


In [9]:
output_tensor = tensor1 * tensor2
output_tensor.requires_grad

True

In [10]:
# grad fn is "multiplication" backward (because tensor1 * tensor2)
print(output_tensor.grad_fn)

<MulBackward0 object at 0x0000022DEE05AD90>


In [11]:
# though two operations were used, tensor will have the grad fn set to the last one -> mean
output_tensor = (tensor1 * tensor2).mean()
print(output_tensor.grad_fn)

<MeanBackward0 object at 0x0000022DEE53AA00>


In [12]:
# backward computation for the computation graph
output_tensor.backward()

In [14]:
# gradient of tensor1 -> partial derivatives for parameters in tensor1 calculated with reference to the output tensor
print(tensor1.grad)
# shape of the gradient will match the shape of the tensor
tensor1.grad.shape, tensor1.shape

tensor([[1.1667, 1.3333, 1.5000],
        [1.6667, 1.8333, 2.0000]])


(torch.Size([2, 3]), torch.Size([2, 3]))

In [15]:
# though the tensor2 was involved in the calc. as well, because of requires_grad set to false -> no gradient
print(tensor2.grad)

None


In [16]:
# torch.no_grad() to stop autograd from tracking the history on those tensors -> every computation within the with block with no history
with torch.no_grad():
    new_tensor = tensor1 * 3
    print('new_tensor = ', new_tensor)
    print('requires_grad for tensor = ', tensor1.requires_grad)
    print('requires_grad for tensor = ', tensor2.requires_grad)
    print('requires_grad for tensor = ', new_tensor.requires_grad)

new_tensor =  tensor([[ 3.,  6.,  9.],
        [12., 15., 18.]])
requires_grad for tensor =  True
requires_grad for tensor =  False
requires_grad for tensor =  False


In [17]:
def calculate(t):
    return t * 2

# decorater @torch.no_grad() -> gradients won't be generated; no tracking history, even though the tensors require it
@torch.no_grad()
def calculate_with_no_grad(t):
    return t * 2

In [18]:
result_tensor = calculate(tensor1)
result_tensor

tensor([[ 2.,  4.,  6.],
        [ 8., 10., 12.]], grad_fn=<MulBackward0>)

In [19]:
result_tensor_no_grad = calculate_with_no_grad(tensor1)
result_tensor_no_grad

tensor([[ 2.,  4.,  6.],
        [ 8., 10., 12.]])

In [20]:
result_tensor_no_grad.requires_grad

False

In [21]:
with torch.no_grad():
    new_tensor_no_grad = tensor1 * 3
    print('new_tensor_no_grad = ', new_tensor_no_grad)

    with torch.enable_grad():
        new_tensor_grad = tensor1 * 3
        print('new_tensor_grad = ', new_tensor_grad)

new_tensor_no_grad =  tensor([[ 3.,  6.,  9.],
        [12., 15., 18.]])
new_tensor_grad =  tensor([[ 3.,  6.,  9.],
        [12., 15., 18.]], grad_fn=<MulBackward0>)


In [22]:
# the option to specify the requires_grad on tensor declaration
tensor_one = torch.tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
tensor_one

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)

In [23]:
tensor_two = torch.tensor([[5.0, 6.0], [7.0, 8.0]])
tensor_two.requires_grad_()

tensor([[5., 6.],
        [7., 8.]], requires_grad=True)

In [24]:
final_tensor = (tensor_one + tensor_two).mean()
final_tensor

tensor(9., grad_fn=<MeanBackward0>)

In [26]:
print(final_tensor.requires_grad)
print(tensor_one.grad)
print(tensor_two.grad)

True
None
None


In [27]:
final_tensor.backward()

print(final_tensor.requires_grad)
print(tensor_one.grad)
print(tensor_two.grad)

True
tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]])
tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]])


In [28]:
# detached tensor always requires_grad set to false -> isn't part of the computation graph
detached_tensor = tensor_one.detach()
detached_tensor

tensor([[1., 2.],
        [3., 4.]])

In [30]:
mean_tensor = (tensor_one + detached_tensor).mean()
mean_tensor.backward()

In [31]:
print(tensor_one.grad)
print(detached_tensor.grad)

tensor([[0.7500, 0.7500],
        [0.7500, 0.7500]])
None
