In this notebook, we learn how to use pretrained models for image classification.

Let's first import our data.

In [1]:
# Import libraries
import torch
import torchvision
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from torch import optim
from torch import nn


In [2]:
# train, val, and test data paths
train_path = 'Fish-vs-Cats/train'
val_path = 'Fish-vs-Cats/val'
test_path = 'Fish-vs-Cats/test'


In [3]:
# Define transform
transform= transforms.Compose([transforms.Resize((64,64)),
                           transforms.ToTensor(),
                           transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                                             std= [0.229, 0.224, 0.225])])

In [4]:
# Import train, val, and test datasets
train_dataset = datasets.ImageFolder(root=train_path, transform=transform)
val_dataset = datasets.ImageFolder(root=val_path, transform=transform)
test_dataset = datasets.ImageFolder(root=test_path, transform=transform)

In [5]:
train_dataset.class_to_idx

{'cat': 0, 'fish': 1}

In [6]:
train_dataset.classes

['cat', 'fish']

In [7]:
train_dataset.targets[:5]

[0, 0, 0, 0, 0]

In [8]:
# Create dataloader 
batch_size =64
train_dl = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_dl = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)
test_dl = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

Now let's load the pretrained model.
There are two common ways to load or transfer a pre-trained model in PyTorch:

1. Using GitHub Repository (Torch Hub):
- PyTorch Hub allows you to load models directly from public GitHub repositories. This method uses the torch.hub.load() function.
- This method is typically used when the model is hosted on GitHub and can be accessed by specifying the repository name and version.


2. Directly from torchvision.models:
- PyTorch’s torchvision.models module has built-in support for various pre-trained models, and you can load them using the respective function.
- This method is more straightforward and doesn't require any external repository. It directly loads pre-trained models available in the torchvision library.



In [9]:
import torch

# 1. Load ResNet-50 from git repo

resnet50 = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)


Using cache found in /Users/tanukhanuja/.cache/torch/hub/pytorch_vision_v0.10.0


In [10]:
# To get list of models available in a repo (here pytorch/vision).
torch.hub.list('pytorch/vision:v0.10.0')  

Using cache found in /Users/tanukhanuja/.cache/torch/hub/pytorch_vision_v0.10.0


['alexnet',
 'deeplabv3_mobilenet_v3_large',
 'deeplabv3_resnet101',
 'deeplabv3_resnet50',
 'densenet121',
 'densenet161',
 'densenet169',
 'densenet201',
 'fcn_resnet101',
 'fcn_resnet50',
 'googlenet',
 'inception_v3',
 'lraspp_mobilenet_v3_large',
 'mnasnet0_5',
 'mnasnet0_75',
 'mnasnet1_0',
 'mnasnet1_3',
 'mobilenet_v2',
 'mobilenet_v3_large',
 'mobilenet_v3_small',
 'resnet101',
 'resnet152',
 'resnet18',
 'resnet34',
 'resnet50',
 'resnext101_32x8d',
 'resnext50_32x4d',
 'shufflenet_v2_x0_5',
 'shufflenet_v2_x1_0',
 'squeezenet1_0',
 'squeezenet1_1',
 'vgg11',
 'vgg11_bn',
 'vgg13',
 'vgg13_bn',
 'vgg16',
 'vgg16_bn',
 'vgg19',
 'vgg19_bn',
 'wide_resnet101_2',
 'wide_resnet50_2']

In [11]:
# 2. Directly import using torchvision
import torchvision.models as models
transfer_model = models.resnet50(pretrained= True)

In [12]:
print(transfer_model)  # Layers of ResNet50

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [13]:
# Parameter names that model stores
for name, param in transfer_model.named_parameters():
    print(name)

conv1.weight
bn1.weight
bn1.bias
layer1.0.conv1.weight
layer1.0.bn1.weight
layer1.0.bn1.bias
layer1.0.conv2.weight
layer1.0.bn2.weight
layer1.0.bn2.bias
layer1.0.conv3.weight
layer1.0.bn3.weight
layer1.0.bn3.bias
layer1.0.downsample.0.weight
layer1.0.downsample.1.weight
layer1.0.downsample.1.bias
layer1.1.conv1.weight
layer1.1.bn1.weight
layer1.1.bn1.bias
layer1.1.conv2.weight
layer1.1.bn2.weight
layer1.1.bn2.bias
layer1.1.conv3.weight
layer1.1.bn3.weight
layer1.1.bn3.bias
layer1.2.conv1.weight
layer1.2.bn1.weight
layer1.2.bn1.bias
layer1.2.conv2.weight
layer1.2.bn2.weight
layer1.2.bn2.bias
layer1.2.conv3.weight
layer1.2.bn3.weight
layer1.2.bn3.bias
layer2.0.conv1.weight
layer2.0.bn1.weight
layer2.0.bn1.bias
layer2.0.conv2.weight
layer2.0.bn2.weight
layer2.0.bn2.bias
layer2.0.conv3.weight
layer2.0.bn3.weight
layer2.0.bn3.bias
layer2.0.downsample.0.weight
layer2.0.downsample.1.weight
layer2.0.downsample.1.bias
layer2.1.conv1.weight
layer2.1.bn1.weight
layer2.1.bn1.bias
layer2.1.conv2.we

In [14]:
# To get a understanding of parameters and their sizes
for name, param in transfer_model.named_parameters():
    print(name, param.shape)


conv1.weight torch.Size([64, 3, 7, 7])
bn1.weight torch.Size([64])
bn1.bias torch.Size([64])
layer1.0.conv1.weight torch.Size([64, 64, 1, 1])
layer1.0.bn1.weight torch.Size([64])
layer1.0.bn1.bias torch.Size([64])
layer1.0.conv2.weight torch.Size([64, 64, 3, 3])
layer1.0.bn2.weight torch.Size([64])
layer1.0.bn2.bias torch.Size([64])
layer1.0.conv3.weight torch.Size([256, 64, 1, 1])
layer1.0.bn3.weight torch.Size([256])
layer1.0.bn3.bias torch.Size([256])
layer1.0.downsample.0.weight torch.Size([256, 64, 1, 1])
layer1.0.downsample.1.weight torch.Size([256])
layer1.0.downsample.1.bias torch.Size([256])
layer1.1.conv1.weight torch.Size([64, 256, 1, 1])
layer1.1.bn1.weight torch.Size([64])
layer1.1.bn1.bias torch.Size([64])
layer1.1.conv2.weight torch.Size([64, 64, 3, 3])
layer1.1.bn2.weight torch.Size([64])
layer1.1.bn2.bias torch.Size([64])
layer1.1.conv3.weight torch.Size([256, 64, 1, 1])
layer1.1.bn3.weight torch.Size([256])
layer1.1.bn3.bias torch.Size([256])
layer1.2.conv1.weight tor

In [15]:
# Freeze parameter updation while training
for name, param in transfer_model.named_parameters():
    param.requires_grad= False

This will set the gradient upgradation of the pretrained model layers to OFF.

In [16]:
# Remove batch normalization from freeze parameter list
# for name, param in transfer_model.named_parameters():
#     if ("bn" not in name):
#         param.requires_grad= False

Batch normalization parameters are tuned based on the original dataset on which ResNet is trained. If not updated during training based on our dataset, then it can cause us losing some of signal as batchnorm corrects your input.

In [17]:
transfer_model.fc.in_features

2048

fc is instance variable of the final classifier layer in ResNet

In [18]:
# Replace final classification block
import torch.nn as nn 

transfer_model.fc = nn.Sequential(
    nn.Linear(2048, 500),
    nn.ReLU(),
    nn.Dropout(),
    nn.Linear(500,2)
)

In [19]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(transfer_model.parameters(), lr=0.001)

- transfer_model.paramaters() will include all the new layers parameters and the layers which are not freezed (i.e. bn layers).

In [20]:
l = 0
for name, param in transfer_model.named_parameters():
    if (('bn' in name) or ('fc' in name)):
        print(name)


bn1.weight
bn1.bias
layer1.0.bn1.weight
layer1.0.bn1.bias
layer1.0.bn2.weight
layer1.0.bn2.bias
layer1.0.bn3.weight
layer1.0.bn3.bias
layer1.1.bn1.weight
layer1.1.bn1.bias
layer1.1.bn2.weight
layer1.1.bn2.bias
layer1.1.bn3.weight
layer1.1.bn3.bias
layer1.2.bn1.weight
layer1.2.bn1.bias
layer1.2.bn2.weight
layer1.2.bn2.bias
layer1.2.bn3.weight
layer1.2.bn3.bias
layer2.0.bn1.weight
layer2.0.bn1.bias
layer2.0.bn2.weight
layer2.0.bn2.bias
layer2.0.bn3.weight
layer2.0.bn3.bias
layer2.1.bn1.weight
layer2.1.bn1.bias
layer2.1.bn2.weight
layer2.1.bn2.bias
layer2.1.bn3.weight
layer2.1.bn3.bias
layer2.2.bn1.weight
layer2.2.bn1.bias
layer2.2.bn2.weight
layer2.2.bn2.bias
layer2.2.bn3.weight
layer2.2.bn3.bias
layer2.3.bn1.weight
layer2.3.bn1.bias
layer2.3.bn2.weight
layer2.3.bn2.bias
layer2.3.bn3.weight
layer2.3.bn3.bias
layer3.0.bn1.weight
layer3.0.bn1.bias
layer3.0.bn2.weight
layer3.0.bn2.bias
layer3.0.bn3.weight
layer3.0.bn3.bias
layer3.1.bn1.weight
layer3.1.bn1.bias
layer3.1.bn2.weight
layer3.1.b

These are the weights that will be updated while training.

In [21]:
# Train the mnodel
num_epochs = 20

for epoch in range(num_epochs):
    transfer_model.train() # Set model to training mode

    running_loss = 0

    for input, label in train_dl:
        optimizer.zero_grad()
        output = transfer_model(input)
        loss = criterion(output, label)
        # print(f"{loss=}")
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    average_loss = running_loss/len(train_dl)
    print(f"Epoch {epoch+1}/{num_epochs}, loss {average_loss:.4f}")

    


loss=tensor(0.7044, grad_fn=<NllLossBackward0>)
loss=tensor(1.0705, grad_fn=<NllLossBackward0>)
loss=tensor(0.8017, grad_fn=<NllLossBackward0>)
loss=tensor(0.6097, grad_fn=<NllLossBackward0>)
loss=tensor(0.5485, grad_fn=<NllLossBackward0>)
loss=tensor(0.5649, grad_fn=<NllLossBackward0>)
loss=tensor(0.4393, grad_fn=<NllLossBackward0>)
loss=tensor(0.3933, grad_fn=<NllLossBackward0>)
loss=tensor(0.4905, grad_fn=<NllLossBackward0>)
loss=tensor(0.3264, grad_fn=<NllLossBackward0>)
loss=tensor(0.4638, grad_fn=<NllLossBackward0>)
loss=tensor(0.4083, grad_fn=<NllLossBackward0>)
loss=tensor(0.4082, grad_fn=<NllLossBackward0>)
Epoch 1/20, loss 0.5561
loss=tensor(0.3900, grad_fn=<NllLossBackward0>)
loss=tensor(0.3370, grad_fn=<NllLossBackward0>)
loss=tensor(0.2837, grad_fn=<NllLossBackward0>)
loss=tensor(0.2733, grad_fn=<NllLossBackward0>)
loss=tensor(0.2796, grad_fn=<NllLossBackward0>)
loss=tensor(0.2139, grad_fn=<NllLossBackward0>)
loss=tensor(0.2706, grad_fn=<NllLossBackward0>)
loss=tensor(0.23

In [22]:
# Applying model on validation data
transfer_model.eval()
val_loss = 0
correct_pred = 0
total_pred = 0
for batch in val_dl:
    input, label = batch
    output = transfer_model(input)
    # print(output)
    loss = criterion(output, label)
    val_loss += loss.item()
    values, indices = torch.max(output, 1)  # maximum values in each row (dim = 1)
    total_pred += label.size(0)
    correct_pred += (indices==label).sum().item()

accuracy = (correct_pred/total_pred) * 100
print(f"Validation loss: {val_loss}, Accuracy: {accuracy:.4f}%")


    

Validation loss: 0.6642313748598099, Accuracy: 87.9630%


In [23]:
# Test data
transfer_model.eval()
running_loss = 0
total_pred = 0
correct_pred = 0

for batch in test_dl:
    input, label = batch
    output = transfer_model(input)
    loss = criterion(output, label)
    running_loss += loss.item()

    values, indices = torch.max(output, 1)
    total_pred += label.size(0)
    correct_pred += (indices==label).sum().item()

accuracy = (correct_pred/total_pred)*100
print(f"Test Accuracy: {accuracy}%")

Test Accuracy: 88.125%


# 