In [1]:
from torchvision import models
import torch
from torch import nn

import fastai
import fastai.data.transforms as fastai_transforms
import fastai.vision.data as vis_data
import fastai.vision.augment as vis_augment
import fastai.vision.learner as vis_learner
import fastai.data.external as external_data

In [2]:
# Pretrained resnet model
resnet = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
resnet.eval()

# Let's view the names of the children
for name, _ in resnet.named_children():
    print(name)

conv1
bn1
relu
maxpool
layer1
layer2
layer3
layer4
avgpool
fc


In [3]:
# Sanity check: resnet does the same thing as
# a sequential model built from its children
#
# We have to manually insert a flatten layer just before
# the last child, as this operation is hard-coded in the
# class torchvision.models.resnet.ResNet
resnet_children = list(resnet.children())
resnet_children.insert(-1, nn.Flatten(1))
sequential_from_resnet_children = nn.Sequential(*resnet_children)

# Verify the models have the same output
X = torch.randn(100, 3, 224, 224)  # Random input data
out_1 = resnet(X)
out_2 = sequential_from_resnet_children(X)
assert torch.equal(out_1, out_2)

In [4]:
# Create a fastai vision learner. This code is taken from
# chapter 5 of the fastai book.
path = external_data.untar_data(external_data.URLs.PETS)

pets = vis_data.DataBlock(
    blocks=(vis_data.ImageBlock, vis_data.CategoryBlock),
    get_items=fastai_transforms.get_image_files,
    splitter=fastai_transforms.RandomSplitter(seed=42),
    get_y=vis_data.using_attr(
        fastai_transforms.RegexLabeller(r"(.+)_\d+.jpg$"), "name"
    ),
    item_tfms=vis_augment.Resize(460),
    batch_tfms=vis_augment.aug_transforms(size=224, min_scale=0.75),
)
dls = pets.dataloaders(path / "images")

learn = vis_learner.vision_learner(
    dls, models.resnet18, metrics=fastai.metrics.error_rate
)
fastai_model = learn.model

# The vision model created by fastai is a sequential model
# with two steps.
assert isinstance(fastai_model, nn.Sequential)
assert len(fastai_model) == 2

In [5]:
# Layer 0 of the fastai model is resnet with
# the last two layers removed. We can verify this
# by plugging in our random input into layer 0 of
# the fastai model, and into all but the last two
# layers of resnet, and seeing that the results are
# equal.
resnet_children = list(resnet.children())
resnet_w_last_2_layers_removed = nn.Sequential(*resnet_children[:-2])
out_1 = fastai_model[0](X)
out_2 = resnet_w_last_2_layers_removed(X)
assert torch.equal(out_1, out_2)

In [6]:
# The layers removed from the end of resnet are a
# pooling layer and a linear layer.
print(resnet_children[-2:])

[AdaptiveAvgPool2d(output_size=(1, 1)), Linear(in_features=512, out_features=1000, bias=True)]


In [7]:
# Layer 1 of the fastai model is something new that has
# been added to resnet. It is a little more complex than
# the pooling and linear layers that were removed.
print(fastai_model[1])

Sequential(
  (0): AdaptiveConcatPool2d(
    (ap): AdaptiveAvgPool2d(output_size=1)
    (mp): AdaptiveMaxPool2d(output_size=1)
  )
  (1): fastai.layers.Flatten(full=False)
  (2): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): Dropout(p=0.25, inplace=False)
  (4): Linear(in_features=1024, out_features=512, bias=False)
  (5): ReLU(inplace=True)
  (6): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (7): Dropout(p=0.5, inplace=False)
  (8): Linear(in_features=512, out_features=37, bias=False)
)


In [8]:
# Fastai model is frozen by default, which is indicated by
# frozen_idx = 2.
#
# All parameters in layer 1 (the new layer) have requires_grad=True
# In layer 0, which comes from resnet, requires_grad is True for
# some parameters, False for others.
# Only parameters with requires_grad=True will be updated
# during training.
assert learn.opt.frozen_idx == 2
assert all(param.requires_grad for param in fastai_model[1].parameters())

for name, param in fastai_model[0].named_parameters():
    print(name, param.requires_grad)

0.weight False
1.weight True
1.bias True
4.0.conv1.weight False
4.0.bn1.weight True
4.0.bn1.bias True
4.0.conv2.weight False
4.0.bn2.weight True
4.0.bn2.bias True
4.1.conv1.weight False
4.1.bn1.weight True
4.1.bn1.bias True
4.1.conv2.weight False
4.1.bn2.weight True
4.1.bn2.bias True
5.0.conv1.weight False
5.0.bn1.weight True
5.0.bn1.bias True
5.0.conv2.weight False
5.0.bn2.weight True
5.0.bn2.bias True
5.0.downsample.0.weight False
5.0.downsample.1.weight True
5.0.downsample.1.bias True
5.1.conv1.weight False
5.1.bn1.weight True
5.1.bn1.bias True
5.1.conv2.weight False
5.1.bn2.weight True
5.1.bn2.bias True
6.0.conv1.weight False
6.0.bn1.weight True
6.0.bn1.bias True
6.0.conv2.weight False
6.0.bn2.weight True
6.0.bn2.bias True
6.0.downsample.0.weight False
6.0.downsample.1.weight True
6.0.downsample.1.bias True
6.1.conv1.weight False
6.1.bn1.weight True
6.1.bn1.bias True
6.1.conv2.weight False
6.1.bn2.weight True
6.1.bn2.bias True
7.0.conv1.weight False
7.0.bn1.weight True
7.0.bn1.bias

In [9]:
# We can examine more closely to see that
# in layer 0 (coming from resnet), requires_grad
# is only True for parameters in Norm layers, and
# for bias parameters.
def show_parameters(model, level=0):
    """
    Recursively iterate through model's children, and show
    requires_grad for each parameter
    """
    align = level * "  "
    for name, child in model.named_children():
        print(f"{align}Child {name}: {str(type(child)).split('.')[-1][:-2]}")
        if list(child.named_children()):
            show_parameters(child, level + 1)
        else:
            for name, param in child.named_parameters():
                print(f"{align}  {name}: {param.requires_grad}")


show_parameters(fastai_model[0])

Child 0: Conv2d
  weight: False
Child 1: BatchNorm2d
  weight: True
  bias: True
Child 2: ReLU
Child 3: MaxPool2d
Child 4: Sequential
  Child 0: BasicBlock
    Child conv1: Conv2d
      weight: False
    Child bn1: BatchNorm2d
      weight: True
      bias: True
    Child relu: ReLU
    Child conv2: Conv2d
      weight: False
    Child bn2: BatchNorm2d
      weight: True
      bias: True
  Child 1: BasicBlock
    Child conv1: Conv2d
      weight: False
    Child bn1: BatchNorm2d
      weight: True
      bias: True
    Child relu: ReLU
    Child conv2: Conv2d
      weight: False
    Child bn2: BatchNorm2d
      weight: True
      bias: True
Child 5: Sequential
  Child 0: BasicBlock
    Child conv1: Conv2d
      weight: False
    Child bn1: BatchNorm2d
      weight: True
      bias: True
    Child relu: ReLU
    Child conv2: Conv2d
      weight: False
    Child bn2: BatchNorm2d
      weight: True
      bias: True
    Child downsample: Sequential
      Child 0: Conv2d
        weight: Fals

In [10]:
# Double-check that calling learn.freeze() doesn't change
# the value of frozen_idx, since the model was frozen
# to begin with.
learn.freeze()
assert learn.opt.frozen_idx == 2

In [11]:
# When the model is unfrozen, all parameters should have
# requires_grad=True. We can see the model is unfrozen
# because frozen_idx = 0.
learn.unfreeze()
assert learn.opt.frozen_idx == 0
assert all(param.requires_grad for param in fastai_model.parameters())