## Imports

In [30]:
from PIL import Image

import torch
import torch.nn as nn
from torchvision import models
from torchvision import transforms

## Model Instantiation

In [2]:
# Show available models
dir(models)

['AlexNet',
 'AlexNet_Weights',
 'ConvNeXt',
 'ConvNeXt_Base_Weights',
 'ConvNeXt_Large_Weights',
 'ConvNeXt_Small_Weights',
 'ConvNeXt_Tiny_Weights',
 'DenseNet',
 'DenseNet121_Weights',
 'DenseNet161_Weights',
 'DenseNet169_Weights',
 'DenseNet201_Weights',
 'EfficientNet',
 'EfficientNet_B0_Weights',
 'EfficientNet_B1_Weights',
 'EfficientNet_B2_Weights',
 'EfficientNet_B3_Weights',
 'EfficientNet_B4_Weights',
 'EfficientNet_B5_Weights',
 'EfficientNet_B6_Weights',
 'EfficientNet_B7_Weights',
 'EfficientNet_V2_L_Weights',
 'EfficientNet_V2_M_Weights',
 'EfficientNet_V2_S_Weights',
 'GoogLeNet',
 'GoogLeNetOutputs',
 'GoogLeNet_Weights',
 'Inception3',
 'InceptionOutputs',
 'Inception_V3_Weights',
 'MNASNet',
 'MNASNet0_5_Weights',
 'MNASNet0_75_Weights',
 'MNASNet1_0_Weights',
 'MNASNet1_3_Weights',
 'MaxVit',
 'MaxVit_T_Weights',
 'MobileNetV2',
 'MobileNetV3',
 'MobileNet_V2_Weights',
 'MobileNet_V3_Large_Weights',
 'MobileNet_V3_Small_Weights',
 'RegNet',
 'RegNet_X_16GF_Weights'

In [3]:
# Instantiated a model architecture
alexnet = models.AlexNet()

## Image Labeling

In [6]:
# Instantiate pre-trained weights
resnet = models.resnet101(weights=models.ResNet101_Weights.DEFAULT)

Downloading: "https://download.pytorch.org/models/resnet101-cd907fc2.pth" to /home/nico/.cache/torch/hub/checkpoints/resnet101-cd907fc2.pth


100%|██████████| 171M/171M [00:19<00:00, 9.18MB/s] 


In [7]:
# Print model layers
resnet

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [9]:
# Preprocessing for an image labeling task
preprocess = transforms.Compose([
    transforms.Resize(256),         # Resize to 256x256
    transforms.CenterCrop(224),     # Crop image to 224x224 around the center
    transforms.ToTensor(),          # Transform to tensor
    transforms.Normalize(           # Normalize RGB values
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [13]:
# Load sample image
img = Image.open("./res/bobby.jpg")
img.show()

In [16]:
# Preprocess and reshape
img_t = preprocess(img)
batch_t = torch.unsqueeze(img_t, 0)

In [22]:
# Run model
resnet.eval()
out = resnet(batch_t)
out

tensor([[-1.1233e-01, -5.1926e-02,  4.7648e-02,  1.5143e-02, -1.9250e-02,
         -1.1728e-01, -3.3896e-01, -7.1969e-02, -1.1153e-01,  2.3872e-01,
         -4.1327e-02, -2.6325e-01, -7.3170e-02,  1.6063e-01,  1.2323e-02,
         -5.0542e-02, -9.7163e-02, -4.7461e-02,  2.8756e-02,  1.4415e-01,
          4.5259e-02,  9.5436e-02, -2.0497e-01,  1.0033e-01,  8.7251e-03,
          2.1565e-01, -1.3909e-01,  1.0818e-01, -7.9363e-02, -1.6043e-01,
         -3.1740e-02, -2.1645e-01, -3.9362e-02, -2.5555e-01,  1.2436e-01,
          2.6881e-01, -1.5909e-01, -9.4458e-02,  4.5123e-02, -4.2908e-02,
         -1.7030e-01,  1.4350e-01, -2.4532e-02, -1.5386e-01,  1.2039e-01,
          1.9509e-01, -9.1743e-02,  1.5325e-01,  3.7678e-01,  1.0692e-02,
         -1.2965e-01, -3.1077e-02, -4.6631e-02, -1.9775e-01, -1.8386e-01,
         -4.5000e-02,  2.5424e-01,  7.9787e-02, -2.1713e-01,  8.9276e-02,
         -2.1015e-01,  2.3264e-02,  2.8800e-02, -1.5181e-01, -1.4051e-01,
          4.2469e-02,  6.4939e-03, -1.

In [23]:
# Load labels
with open('res/imagenet_classes.txt') as f:
    labels = [line.strip() for line in f.readlines()]

In [24]:
# Determine maximum score label
_, index = torch.max(out, 1)

In [26]:
# Calculate confidence of output
percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100
labels[index[0]], percentage[index[0]].item()

('golden retriever', 74.87625122070312)

In [28]:
# Top 5 classes
_, indices = torch.sort(out, descending=True)
[(labels[idx], percentage[idx].item()) for idx in indices[0][:5]]

[('golden retriever', 74.87625122070312),
 ('Labrador retriever', 1.4267538785934448),
 ('tennis ball', 1.1334866285324097),
 ('toilet tissue, toilet paper, bathroom tissue', 0.21008385717868805),
 ('Bernese mountain dog', 0.20377537608146667)]

## CycleGAN network

In [31]:
class ResNetBlock(nn.Module):
    def __init__(self, dim):
        super(ResNetBlock, self).__init__()
        self.conv_block = self.build_conv_block(dim)

    @staticmethod
    def build_conv_block(dim):
        conv_block = []

        conv_block += [nn.ReflectionPad2d(1)]

        conv_block += [
            nn.Conv2d(dim, dim, kernel_size=3, padding=0, bias=True),
            nn.InstanceNorm2d(dim),
            nn.ReLU(True)
        ]

        conv_block += [nn.ReflectionPad2d(1)]

        conv_block += [
            nn.Conv2d(dim, dim, kernel_size=3, padding=0, bias=True),
            nn.InstanceNorm2d(dim)
        ]

        return nn.Sequential(*conv_block)

    def forward(self, x):
        return x + self.conv_block(x)


class ResNetGenerator(nn.Module):
    def __init__(self, input_nc=3, output_nc=3, ngf=64, n_blocks=9): # <3>
        assert(n_blocks >= 0)
        super(ResNetGenerator, self).__init__()

        self.input_nc = input_nc
        self.output_nc = output_nc
        self.ngf = ngf

        model = [
            nn.ReflectionPad2d(3),
            nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias=True),
            nn.InstanceNorm2d(ngf),
            nn.ReLU(True)
        ]

        n_downsampling = 2
        for i in range(n_downsampling):
            mult = 2**i
            model += [
                nn.Conv2d(
                    ngf * mult, ngf * mult * 2, kernel_size=3,
                    stride=2, padding=1, bias=True
                ),
                nn.InstanceNorm2d(ngf * mult * 2),
                nn.ReLU(True)
            ]

        mult = 2**n_downsampling
        for i in range(n_blocks):
            model += [ResNetBlock(ngf * mult)]

        for i in range(n_downsampling):
            mult = 2**(n_downsampling - i)
            model += [
                nn.ConvTranspose2d(
                    ngf * mult, int(ngf * mult / 2), kernel_size=3,
                    stride=2, padding=1, output_padding=1, bias=True
                ),
                nn.InstanceNorm2d(int(ngf * mult / 2)),
                nn.ReLU(True)
            ]

        model += [nn.ReflectionPad2d(3)]
        model += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)]
        model += [nn.Tanh()]

        self.model = nn.Sequential(*model)

    def forward(self, x):
        return self.model(x)


In [33]:
# Create ResNet with empty weights
netG = ResNetGenerator()

# Load weights from state dict
model_path = 'res/horse2zebra_0.4.0.pth'
model_data = torch.load(model_path)
netG.load_state_dict(model_data)

netG.eval()

<All keys matched successfully>

In [34]:
# Preprocess pipeline as before
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.ToTensor()
])

ResNetGenerator(
  (model): Sequential(
    (0): ReflectionPad2d((3, 3, 3, 3))
    (1): Conv2d(3, 64, kernel_size=(7, 7), stride=(1, 1))
    (2): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (3): ReLU(inplace=True)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (5): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (8): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (9): ReLU(inplace=True)
    (10): ResNetBlock(
      (conv_block): Sequential(
        (0): ReflectionPad2d((1, 1, 1, 1))
        (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1))
        (2): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
        (3): ReLU(inplace=True)
        (4): ReflectionPad2d((1, 1, 1, 1))
     

In [35]:
# Load image of horse
horse_img = Image.open('res/horse.jpg')
horse_img.show()

In [36]:
# Preprocess and transform to correct size
horse_img_t = preprocess(horse_img)
horse_batch_t = torch.unsqueeze(horse_img_t, 0)

In [39]:
# Convert horse to zebra
batch_out = netG(horse_batch_t)
out_t = (batch_out.data.squeeze() + 1.0) / 2.0
out_img = transforms.ToPILImage()(out_t)
out_img.save('res/zebra.jpg')
out_img.show()

## TorchHub

In [43]:
from torch import hub

# Load model from torch hub
resnet18_model = hub.load(
    'pytorch/vision:main',
    'resnet18',
    weights=models.ResNet18_Weights.DEFAULT
)

Using cache found in /home/nico/.cache/torch/hub/pytorch_vision_main


## Exercises

### Exercise 1

In [44]:
# Load dog image
dog_img = Image.open('data/bobby.jpg')

In [45]:
# Preprocess
dog_img_t = preprocess(dog_img)
dog_batch_t = torch.squeeze(dog_img_t, 0)

In [46]:
# Feed to model
dog_out = netG(dog_batch_t)
dog_out_t = (dog_out.data.squeeze() + 1.0) / 2.0

In [48]:
# Recreate image
out_img = transforms.ToPILImage()(dog_out_t)
out_img.save('data/dog.jpg')
out_img.show()

### Exercise 2

For later: https://github.com/NVIDIA/DeepLearningExamples/tree/master