In [4]:
import torch
from torchvision.models import resnet18, ResNet18_Weights

# `autograd` Usage in PyTorch

In [None]:
model = resnet18(weights=ResNet18_Weights.DEFAULT)
data = torch.rand(1, 3, 64, 64) #  single 64x64 RGB image
labels = torch.rand(1, 1000) # 1000 labels
# print(model)
print(data.shape)
print(data[0,0,0,0])
print(labels.shape)
print(labels[0,0])

torch.Size([1, 3, 64, 64])
tensor(0.5421)
torch.Size([1, 1000])
tensor(0.2896)


In [12]:
# passing in a random "image" -- a "3-channel 64x64 image"
prediction = model(data) # forward pass
print(prediction.shape) # 1000 classes predicted against

torch.Size([1, 1000])


In [13]:
loss = (prediction - labels).sum()
loss.backward() # backward pass

In [8]:
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)


In [9]:
optim.step() #gradient descent


# Differentiation in `autograd`

In [27]:
# requires_grad=True signals to autograd that every operation on them should be tracked.
a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)
print(a)
print(b)
print(f'a.grad: {a.grad}')
print(f'b.grad: {b.grad}')

tensor([2., 3.], requires_grad=True)
tensor([6., 4.], requires_grad=True)
a.grad: None
b.grad: None


In [28]:
Q = 3*a**3 - b**2
print(Q)

tensor([-12.,  65.], grad_fn=<SubBackward0>)


Let’s assume a and b to be parameters of an NN, and Q to be the error.  
In NN training, we want gradients of the error w.r.t. parameters, i.e.  


In [29]:
external_grad = torch.tensor([1., 1.])
Q.backward(gradient=external_grad)


In [32]:
print(f'a.grad: {a.grad}')
print(f'b.grad: {b.grad}')
print(9*a**2 == a.grad)
print(-2*b == b.grad)


a.grad: tensor([36., 81.])
b.grad: tensor([-12.,  -8.])
tensor([True, True])
tensor([True, True])


In [33]:
# How to freeze params
from torch import nn, optim

model = resnet18(weights=ResNet18_Weights.DEFAULT)

# Freeze all the parameters in the network
for param in model.parameters():
    param.requires_grad = False

In [34]:
# fc = last linear layer
# fc stands for fully connected, and it's the fully connected
# layer at the end of the neural net model

# Let’s say we want to finetune the model on a new dataset with 10 labels.
# In resnet, the classifier is the last linear layer model.fc.
# We can simply replace it with a new linear layer (unfrozen by default)
# that acts as our classifier.
model.fc = nn.Linear(512, 10)

# Now all parameters in the model, except the parameters of model.fc, are frozen.
# The only parameters that compute gradients are the weights and bias of model.fc.

# Optimize only the classifier
optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)