<a href="https://colab.research.google.com/github/torrhen/pytorch/blob/main/06_pytorch_transfer_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# For this notebook to run with updated APIs, we need torch 1.12+ and torchvision 0.13+
try:
    import torch
    import torchvision
    assert int(torch.__version__.split(".")[1]) >= 12, "torch version should be 1.12+"
    assert int(torchvision.__version__.split(".")[1]) >= 13, "torchvision version should be 0.13+"
    print(f"torch version: {torch.__version__}")
    print(f"torchvision version: {torchvision.__version__}")
except:
    print(f"[INFO] torch/torchvision versions not as required, installing nightly versions.")
    !pip3 install -U torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
    import torch
    import torchvision
    print(f"torch version: {torch.__version__}")
    print(f"torchvision version: {torchvision.__version__}")

torch version: 1.13.0+cu116
torchvision version: 0.14.0+cu116


In [2]:
import matplotlib.pyplot as plt
import torch
import torchvision

from torch import nn
from torchvision import transforms

# import torchinfo or install if required
try:
  from torchinfo import summary
except:
  # install torchinfo
  !pip install torchinfo
  from torchinfo import summary

# download going_modular repository and import required scripts
try:
  from going_modular import data_setup, engine
except:
  !git clone https://github.com/mrdbourke/pytorch-deep-learning
  !mv pytorch-deep-learning/going_modular .
  !rm -rf pytorch-deep-learning
  from going_modular.going_modular import data_setup, engine


Cloning into 'pytorch-deep-learning'...
remote: Enumerating objects: 3350, done.[K
remote: Total 3350 (delta 0), reused 0 (delta 0), pack-reused 3350[K
Receiving objects: 100% (3350/3350), 640.93 MiB | 46.34 MiB/s, done.
Resolving deltas: 100% (1921/1921), done.
Checking out files: 100% (221/221), done.
mv: cannot move 'pytorch-deep-learning/going_modular' to './going_modular': Directory not empty


In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [4]:
import os
import zipfile

from pathlib import Path

import requests

data_path = Path('data/')
image_path = data_path / 'pizza_steak_sushi'

if image_path.is_dir():
  print(f"{image_path} already exists.")
else:
  image_path.mkdir(parents=True, exist_ok=True)
  with open(data_path / "pizza_steak_sushi.zip", 'wb') as f:
    response = requests.get("https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip")
    f.write(response.content)

  with zipfile.ZipFile(data_path / "pizza_steak_sushi.zip", 'r') as z:
    z.extractall(image_path)

  os.remove(data_path / 'pizza_steak_sushi.zip')
  

In [5]:
train_folder = image_path / 'train'
test_folder = image_path / 'test'

### 1. Manual transforms for transfer learning

In [7]:
'''
your custom data going into the model is prepared in the same way as the original training data that went into the model.
'''
manual_transform =  transforms.Compose(
    [
        transforms.Resize((244, 244)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ]
)

In [8]:
# create train and test data loaders and batches using manual transforms
BATCH_SIZE = 32
train_loader, test_loader, class_names = data_setup.create_dataloaders(train_folder,
                                                                       test_folder,
                                                                       manual_transform,
                                                                       BATCH_SIZE)

### 2. Automatic transforms for transfer learning (available with torchvision v0.13+)

In [9]:
# set of best available weights from pretrained ImageNet model
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
# get the transforms used to train the EfficientNet_B0_Weights on ImageNet
auto_transforms = weights.transforms()
print(auto_transforms)

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)


In [10]:
# create train and test data loaders and batches using the automatic transforms used on ImageNet
BATCH_SIZE = 32
train_loader, test_loader, class_names = data_setup.create_dataloaders(train_folder,
                                                                       test_folder,
                                                                       auto_transforms,
                                                                       BATCH_SIZE)

In [12]:
# setup efficient net pretrained model with best available learned weights and allocate to device
model = torchvision.models.efficientnet_b0(weights=weights).to(device)

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-3dd342df.pth


  0%|          | 0.00/20.5M [00:00<?, ?B/s]

In [15]:
# display a summary of the model using torchinfo
summary(model=model,
        col_names=['input_size', 'output_size', 'trainable'],
        input_size=[BATCH_SIZE, 3, 224, 244],
        col_width=20,
        row_settings=['var_names'])

Layer (type (var_name))                                      Input Shape          Output Shape         Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 244]    [32, 1000]           True
├─Sequential (features)                                      [32, 3, 224, 244]    [32, 1280, 7, 8]     True
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 244]    [32, 32, 112, 122]   True
│    │    └─Conv2d (0)                                       [32, 3, 224, 244]    [32, 32, 112, 122]   True
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 122]   [32, 32, 112, 122]   True
│    │    └─SiLU (2)                                         [32, 32, 112, 122]   [32, 32, 112, 122]   --
│    └─Sequential (1)                                        [32, 32, 112, 122]   [32, 16, 112, 122]   True
│    │    └─MBConv (0)                                       [32, 32, 112, 122]   [32, 16, 112, 122]   True
│    └─Sequential (2)    

The pretrained model has an output size of 1000 features since it was trained using ImageNet which has 1000 classes. The size of this output must be changed for the smaller dataset containing 3 classes.

The features layers (convolutions and other activations) can be kept the same (frozen) as they learn the representation of each image.

However the classifier component should be modified to be suitable to the problem.

In [16]:
# freeze all layers/parameters in the features component of the pretrained model
for param in  model.features.parameters():
  param.requires_grad = False # prevents pytorch tracking the gradients of parameters within this part of the models architecture


In [17]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# modify the classifier component of the pretrained model to suit our output size
model.classifier = nn.Sequential(
    nn.Dropout(p=0.2, inplace=True), # identical to previous architecture
    nn.Linear(in_features=1280, # identical to previous architecture
              out_features=len(class_names), # modified
              bias=True)).to(device) # allocate to device

In [18]:
# display changes to architecture - output vector dimension should have changed
# parameters in the features layers should be frozen (no longer trainable)
summary(model=model,
        col_names=['input_size', 'output_size', 'trainable'],
        input_size=[BATCH_SIZE, 3, 224, 244],
        col_width=20,
        row_settings=['var_names'])

Layer (type (var_name))                                      Input Shape          Output Shape         Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 244]    [32, 3]              Partial
├─Sequential (features)                                      [32, 3, 224, 244]    [32, 1280, 7, 8]     False
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 244]    [32, 32, 112, 122]   False
│    │    └─Conv2d (0)                                       [32, 3, 224, 244]    [32, 32, 112, 122]   False
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 122]   [32, 32, 112, 122]   False
│    │    └─SiLU (2)                                         [32, 32, 112, 122]   [32, 32, 112, 122]   --
│    └─Sequential (1)                                        [32, 32, 112, 122]   [32, 16, 112, 122]   False
│    │    └─MBConv (0)                                       [32, 32, 112, 122]   [32, 16, 112, 122]   False
│    └─Sequentia

The number of trainable parameters has decreased from 5.2 million to 4,000. Less computational power is needed to train this model than TinyVGG and the weights of larger model can still be exploited.

Transfer learning allows us to used reliable, large models with only small tweaks to the output layers to suit our needs and use less resources than would be required to train the model from scratch.

### Train model

In [19]:
# set up loss function
loss_fn = nn.CrossEntropyLoss()
# set up optimizer
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)

In [20]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# train the model and store the results for every epoch
results = engine.train(model,
                       train_loader,
                       test_loader,
                       optimizer,
                       loss_fn,
                       epochs=5,
                       device=device)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0901 | train_acc: 0.4102 | test_loss: 0.8925 | test_acc: 0.6714
Epoch: 2 | train_loss: 0.9162 | train_acc: 0.6367 | test_loss: 0.8032 | test_acc: 0.7746
Epoch: 3 | train_loss: 0.8170 | train_acc: 0.7070 | test_loss: 0.6780 | test_acc: 0.9072
Epoch: 4 | train_loss: 0.7456 | train_acc: 0.7266 | test_loss: 0.6718 | test_acc: 0.8248
Epoch: 5 | train_loss: 0.6219 | train_acc: 0.7656 | test_loss: 0.6246 | test_acc: 0.8561
