In [3]:
import torch
from torch import nn

import torchvision
from torchvision import transforms

from torchinfo import summary

import numpy as np
import matplotlib.pyplot as plt 

In [2]:
torchvision.__version__

'0.22.1'

In [1]:
# import the code we've written earlier
import sys
try:
    from utils import data_setup, engine
except:
    print("cannot import modular")


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
device = 'mps' if torch.mps.is_available() else 'cpu'
device

'mps'

### get data

In [13]:
# get data
import os
import zipfile
from pathlib import Path

import requests

data_path = Path("git/pytorch-deep-learning/data/")

image_path = data_path #/ "pizza_steak_sushi"

if image_path.is_dir():
    print(f"image path exist")
else: 
    print("did not find file. downloading...")
    image_path.mkdir(parents=True, exist_ok=True)

    with open(data_path / "pizza_streak_sushi.zip", "wb") as f:
        request = requests.get()
        print("downloading...")
        f.write(request.content)
    with zipfile.ZipFile(data_path / "pizza_steak_sushi.zip", "r") as zip_ref:
        zip_ref.extractall(image_path)

    os.remove(data_path/"pizza_stream_sushi.zip")

image path exist


In [21]:
# set directory path


train_dir = Path('data/pizza_steak_sushi/train')
test_dir = Path('data/pizza_steak_sushi/test')
train_dir, test_dir

(PosixPath('data/pizza_steak_sushi/train'),
 PosixPath('data/pizza_steak_sushi/test'))

create datasets and dataloaders using `data_setup.py` and `create_dataloader.py` 

In [22]:
# one of the two ways:
# 1. manually create transforms - you define the transforms you want your data to go through
# 2. automatically create transforms _ the transforms for your data are defined by the model you'd like to use (req torchvision 1.13+)

# important: when usiing a pretrained model, the data, including your custom data, that you pass through it is **transformed** in the same waythat the model was trained on


In [None]:
# method 1: manually create a transform
# https://docs.pytorch.org/vision/stable/models.html#initializing-pre-trained-models
# https://docs.pytorch.org/vision/stable/transforms.html#start-here

from torchvision import transforms

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

manual_transforms = transforms.Compose([transforms.Resize((224, 224)),
                                       transforms.ToTensor(),
                                       normalize])



In [28]:
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=manual_transforms,
    batch_size=32)

In [29]:
# method 2: automatically create a data transform for torchvision.models
# https://docs.pytorch.org/vision/stable/models.html#initializing-pre-trained-models

# get a set of pretrained model weights

# https://docs.pytorch.org/vision/stable/models/generated/torchvision.models.efficientnet_b0.html#torchvision.models.efficientnet_b0
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT # <-- best available
weights


EfficientNet_B0_Weights.IMAGENET1K_V1

In [30]:
# get the transforms used to create our pretrained weights
auto_transforms = weights.transforms()
auto_transforms

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)

In [32]:
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=auto_transforms,
    batch_size=32)
train_dataloader, test_dataloader, class_names

(<torch.utils.data.dataloader.DataLoader at 0x1731800e0>,
 <torch.utils.data.dataloader.DataLoader at 0x172a78890>,
 ['pizza', 'steak', 'sushi'])

### getting a pretrained model
1. PyTorch domain libraries
2. Libraries like `timm` (torch image models)
3. HuggingFace Hub
4. Paperswithcode(for models across different problem spaces/domains)

which pretrained model should you use? A million dollor questions: *Experiment, Experiment, Experiment!*

3 things to consider: 
1. speed (how fast does it run?)
2. size (how big is the model? # of parameters and size on disk)
3. performance (how well does it go on your problem?)

Generally smaller size runs faster, although not always true. https://docs.pytorch.org/vision/stable/models.html#table-of-all-available-classification-weights

where does the model live?  Is it on device (like a self-driving car) or on a server?

However, in light of The Bitter Lesson, if we had infinite compute, we'd likely pick the biggest model + most paramters + most general method we could

In [None]:
# will use this pretrained model: https://docs.pytorch.org/vision/stable/models/generated/torchvision.models.efficientnet_b0.html#torchvision.models.EfficientNet_B0_Weights


In [33]:
# old way
model = torchvision.models.efficientnet_b0(pretrained=True)



Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /Users/mikeqin/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth


100%|██████████| 20.5M/20.5M [00:00<00:00, 30.8MB/s]


In [54]:
# new method:
model = torchvision.models.efficientnet_b0(weights=weights).to(device)
model

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [35]:
model.features

Sequential(
  (0): Conv2dNormActivation(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): SiLU(inplace=True)
  )
  (1): Sequential(
    (0): MBConv(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): SiLU(inplace=True)
        )
        (1): SqueezeExcitation(
          (avgpool): AdaptiveAvgPool2d(output_size=1)
          (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
          (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
          (activation): SiLU(inplace=True)
          (scale_activation): Sigmoid()
        )
        (2): Conv2dNormActivation(
          (0): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), 

### EfficientNet feature extractor
See 06_pytorch_tranfer_learning_ipynb


### kinds of transfer learning
1. Feature Extraction
2. Fine-Tuning


In [46]:
# getting a summary of our pretrained model using torchinfo.summary
summary(model=model,
    input_size=(1,3,224,224), # example of (batch_size, color_channels, height, width)
    col_names=["input_size", "output_size", "num_params", "trainable"],
    col_width=20,
    row_settings=['var_names'])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [1, 3, 224, 224]     [1, 1000]            --                   True
├─Sequential (features)                                      [1, 3, 224, 224]     [1, 1280, 7, 7]      --                   True
│    └─Conv2dNormActivation (0)                              [1, 3, 224, 224]     [1, 32, 112, 112]    --                   True
│    │    └─Conv2d (0)                                       [1, 3, 224, 224]     [1, 32, 112, 112]    864                  True
│    │    └─BatchNorm2d (1)                                  [1, 32, 112, 112]    [1, 32, 112, 112]    64                   True
│    │    └─SiLU (2)                                         [1, 32, 112, 112]    [1, 32, 112, 112]    --                   --
│    └─Sequential (1)                                        [1, 32, 112, 112]    [1, 16, 112,

In [58]:
# freeze the base model and change the output layer to suit our needs - this is normally a feature extractor model do.
# for param in model.features.parameters():
#     print(param)

for param in model.features.parameters():
    param.requires_grad = False


In [49]:
# note the trainable column changes:
summary(model=model,
    input_size=(1,3,224,224), # example of (batch_size, color_channels, height, width)
    col_names=["input_size", "output_size", "num_params", "trainable"],
    col_width=20,
    row_settings=['var_names'])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [1, 3, 224, 224]     [1, 1000]            --                   Partial
├─Sequential (features)                                      [1, 3, 224, 224]     [1, 1280, 7, 7]      --                   False
│    └─Conv2dNormActivation (0)                              [1, 3, 224, 224]     [1, 32, 112, 112]    --                   False
│    │    └─Conv2d (0)                                       [1, 3, 224, 224]     [1, 32, 112, 112]    (864)                False
│    │    └─BatchNorm2d (1)                                  [1, 32, 112, 112]    [1, 32, 112, 112]    (64)                 False
│    │    └─SiLU (2)                                         [1, 32, 112, 112]    [1, 32, 112, 112]    --                   --
│    └─Sequential (1)                                        [1, 32, 112, 112]    [1, 1

In [59]:
# update the classifier head of our model to suite our problem
# head: means the layers close to the outputs
torch.manual_seed(42)

model.classifier = nn.Sequential(
    nn.Dropout(p=0.2,
               inplace=True),
    nn.Linear(in_features=1280,
              out_features=len(class_names)).to(device)
)

model.classifier

Sequential(
  (0): Dropout(p=0.2, inplace=True)
  (1): Linear(in_features=1280, out_features=3, bias=True)
)

In [60]:
summary(model=model,
    input_size=(1,3,224,224), # example of (batch_size, color_channels, height, width)
    col_names=["input_size", "output_size", "num_params", "trainable"],
    col_width=20,
    row_settings=['var_names'])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [1, 3, 224, 224]     [1, 3]               --                   Partial
├─Sequential (features)                                      [1, 3, 224, 224]     [1, 1280, 7, 7]      --                   False
│    └─Conv2dNormActivation (0)                              [1, 3, 224, 224]     [1, 32, 112, 112]    --                   False
│    │    └─Conv2d (0)                                       [1, 3, 224, 224]     [1, 32, 112, 112]    (864)                False
│    │    └─BatchNorm2d (1)                                  [1, 32, 112, 112]    [1, 32, 112, 112]    (64)                 False
│    │    └─SiLU (2)                                         [1, 32, 112, 112]    [1, 32, 112, 112]    --                   --
│    └─Sequential (1)                                        [1, 32, 112, 112]    [1, 1

In [61]:
# train model

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
from utils import engine

torch.manual_seed(42)

from timeit import default_timer as timer

start = timer()

result = engine.train(model=model,
                     train_dataloader=train_dataloader, 
                     test_dataloader=test_dataloader,
                     loss_fn=loss_fn, 
                     optimizer=optimizer,
                     epochs=5,
                     device=device)

end = timer()
print(f"total training time: {end-start:.3f} seconds")


 20%|██        | 1/5 [01:14<04:57, 74.46s/it]

Epoch: 1 | train_loss: 1.0883 | train_acc: 0.4180 | test_loss: 0.8914 | test_acc: 0.6818


 40%|████      | 2/5 [02:26<03:38, 72.93s/it]

Epoch: 2 | train_loss: 0.9162 | train_acc: 0.6289 | test_loss: 0.8027 | test_acc: 0.7443


 60%|██████    | 3/5 [03:38<02:24, 72.47s/it]

Epoch: 3 | train_loss: 0.8162 | train_acc: 0.7031 | test_loss: 0.6787 | test_acc: 0.9072


 80%|████████  | 4/5 [04:50<01:12, 72.30s/it]

Epoch: 4 | train_loss: 0.7460 | train_acc: 0.7305 | test_loss: 0.6744 | test_acc: 0.8040


100%|██████████| 5/5 [06:02<00:00, 72.45s/it]

Epoch: 5 | train_loss: 0.6209 | train_acc: 0.7695 | test_loss: 0.6263 | test_acc: 0.8561
total training time: 362.282 seconds



