# Introduction


**What?** Introduction to automatic differentiation in pyTorch_part_2



# Import python modules

In [22]:
import torch, torchvision
from torch import nn, optim

In [3]:
print("torch version: ",torch.__version__)
print("torchvision version:", torchvision.__version__)

torch version:  1.7.1
torchvision version: 0.8.2


# Step-by-step autograd explanation

In [None]:
"""
requires_grad=True aignals to autograd that every operation on them should be tracked.
Let's create 2 tensor, essentially two vectors
"""

In [4]:
a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)

In [6]:
print(a.shape)
print(b.shape)

torch.Size([2])
torch.Size([2])


In [20]:
Q = 3*a**3 - b**2
print(Q)

tensor([-12.,  65.], grad_fn=<SubBackward0>)


In [17]:
external_grad = torch.tensor([1., 1.])
Q.backward(gradient=external_grad)

In [14]:
# check if collected gradients are correct
print(9*a**2 == a.grad)
print(-2*b == b.grad)

tensor([True, True])
tensor([True, True])


# requires_grad flag setting

In [21]:
x = torch.rand(5, 5)
y = torch.rand(5, 5)
z = torch.rand((5, 5), requires_grad=True)

a = x + y
print("Does `a` require gradients? :", a.requires_grad)
b = x + z
print("Does `b` require gradients? :", b.requires_grad)

Does `a` require gradients? : False
Does `b` require gradients? : True


# Application to a larger ANN

In [None]:
"""
In ANNs, parameters that don’t compute gradients are usually called frozen parameters. It is useful to “freeze” 
part of your model if you know in advance that you won’t need the gradients of those parameters (this offers 
SOME performance BENEFITS by reducing autograd computations).
"""

In [23]:
model = torchvision.models.resnet18(pretrained=True)

# Freeze all the parameters in the network
for param in model.parameters():
    param.requires_grad = False

In [None]:
"""
Let’s say we want to finetune the model on a new dataset with 10 labels. In resnet, the classifier is the last
linear layer model.fc. We can simply replace it with a new linear layer (-->>UNFROZEN<<-- by default) that acts
as our classifier.
"""

In [24]:
model.fc = nn.Linear(512, 10)

In [None]:
"""
Now all parameters in the model, except the parameters of model.fc, are frozen. The only parameters that compute 
gradients are the weights and bias of model.fc.
"""

In [25]:
# Optimize only the classifier
optimizer = optim.SGD(model.fc.parameters(), lr=1e-2, momentum=0.9)

# References


- https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html#sphx-glr-beginner-blitz-autograd-tutorial-py

