<a href="https://colab.research.google.com/github/linhoangce/pytorch_crash_course/blob/main/modular_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 1. Get Data


In [53]:
import os
import requests
import zipfile
from pathlib import Path

# Setup path to data folder
data_path = Path('data')
image_path = data_path / 'pizza_steak_sushi'

# check if folder exists before downloading
if image_path.is_dir():
  print(f'{image_path} exists.')
else:
  print(f"{image_path} doesn't exist. Creating folder...")
  image_path.mkdir(parents=True, exist_ok=True)

# download data
with open(data_path / 'pizza_steak_sushi.zip', 'wb') as f:
  request = requests.get('https://github.com/mrdbourke/pytorch-deep-learning/raw/refs/heads/main/data/pizza_steak_sushi.zip')
  print(f'Downloading data from {request.url}...')
  f.write(request.content)

# Unzip data
with zipfile.ZipFile(data_path / 'pizza_steak_sushi.zip', 'r') as zip_ref:
  print("Unzipping data ...")
  zip_ref.extractall(image_path)

# Remove zip file
os.remove(data_path / "pizza_steak_sushi.zip")

data/pizza_steak_sushi exists.
Downloading data from https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/refs/heads/main/data/pizza_steak_sushi.zip...
Unzipping data ...


## 2. Create Datasets and DataLoaders (`data_setup.py`)

In [54]:
# Create module folder
# module_path = Path('going_module')

# if not module_path.is_dir():
#   module_path.mkdir(parents=True, exist_ok=True)
import os

if not os.path.exists('going_modular'):
  os.makedirs('going_modular')

In [55]:
%%writefile going_modular/data_setup.py
"""
Contains functionality for creating PyTorch DataLoaders for image classification data.
"""
import os

from torchvision import datasets, transforms
from torch.utils.data import DataLoader

NUM_WORKERS = os.cpu_count()

def create_dataloaders(
    train_dir: str,
    test_dir: str,
    transform: transforms.Compose,
    batch_size: int,
    num_workers: int=NUM_WORKERS
):
  """Creates training and testing DataLoaders.

  Takes in a training and testing directory path and turns them into PyTorch Datasets and then into PyTorch DataLoaders.

  Args:
    train_dir: Path to training directory.
    test_dir: Path to testing directory.
    transform: torchvision transforms to perform on training and testing data.
    batch_size: Number of samples per batch in each of the DataLoaders.
    num_workers: An integer for number of workers per DataLoader.

  Returns:
    A tuple of (train_dataloader, test_dataloader, class_names).
    Where class_names is a list of the target classes.

  Example usage:
    train_dataloader, test_dataloader, class_names = create_dataloaders(
      train_dir=path/to/train_dir,
      test_dir=path/to/test_dir,
      trainsform=some_transform,
      batch_size=32,
      num_workers=3
    )
  """
  # Use ImageFolder to create dataset(s)
  train_data = datasets.ImageFolder(root=train_dir, transform=transform)
  test_data = datasets.ImageFolder(root=test_dir, transform=transform)

  # Get the class names
  class_names = train_data.classes

  # Turn images into DataLoaders
  train_dataloader = DataLoader(train_data,
                                batch_size=batch_size,
                                shuffle=True,
                                num_workers=num_workers,
                                pin_memory=True)

  test_dataloader = DataLoader(test_data,
                               batch_size=batch_size,
                               shuffle=False,
                               num_workers=num_workers,
                               pin_memory=True)

  return train_dataloader, test_dataloader, class_names


Overwriting going_modular/data_setup.py


In [56]:
from going_modular import data_setup

In [57]:
train_dir = image_path / 'train'
test_dir = image_path / 'test'

In [58]:
from torchvision import transforms

data_transform = transforms.Compose([
    transforms.Resize(size=(64, 64)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor()
])

In [59]:
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=data_transform,
    batch_size=32
)

train_dataloader, test_dataloader, class_names

(<torch.utils.data.dataloader.DataLoader at 0x7bdbe5f4eb10>,
 <torch.utils.data.dataloader.DataLoader at 0x7bdcbe7d3410>,
 ['pizza', 'steak', 'sushi'])

## 3. Create a model builder script

In [60]:
!rm -rf going_modular/model_builder.py

In [61]:
%%writefile going_modular/model_builder.py
"""
Contains PyTorch model code to instantiate a TinyVGG model.
"""

import torch
from torch import nn

class TinyVGG(nn.Module):
  """Creates the TinyVGG architecture.

  Replicates the TinyVGG architecture from the CNN Explainer website in PyTorch.
  See the original architecture here: https://poloclub.github.io/cnn-explainer/

  Args:
    input_shape: An integer indicating number of input channels.
    hidden_units: An integer indicating number of hidden units between layers.
    output_shape: An integer indicating number of output units.
  """
  def __init__(self,
               input_shape,
               hidden_units,
               output_shape):
    super().__init__()
    self.conv_block1 = nn.Sequential(
        nn.Conv2d(in_channels=input_shape,
                  out_channels=hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=0),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units,
                  out_channels=hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=0),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2,
                     stride=2)
    )
    self.conv_block2 = nn.Sequential(
        nn.Conv2d(in_channels=hidden_units,
                  out_channels=hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=0),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units,
                  out_channels=hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=0),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2,
                     stride=2)
    )
    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=hidden_units*13*13,
                  out_features=output_shape)
    )

  def forward(self, x):
    # x = self.conv_block1(x)
    # print(x.shape)
    # x = self.conv_block2(x)
    # print(x.shape)
    # x = self.classifier(x)
    # return x
    return self.classifier(self.conv_block2(self.conv_block1(x)))

Writing going_modular/model_builder.py


In [62]:
from going_modular import model_builder

model_0 = model_builder.TinyVGG(
    input_shape=3,
    hidden_units=10,
    output_shape=len(class_names)
)

In [63]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 1. Get a batch of images and labels from the DataLoader
img_batch, label_batch = next(iter(train_dataloader))

# 2, Get a single image from the batch and unsqueeze the image so its shape fits the model
img_single, label_single = img_batch[0].unsqueeze(dim=0), label_batch[0]
print(f"Single image shape: {img_single.shape}")

# 3. Perform a forward pass os a single image
model_0.eval()
with torch.inference_mode():
  logits = model_0(img_single.to(device))

print(f"Output logits: \n{logits}\n")
print(f"Pred probs: \n{torch.softmax(logits, dim=1)}\n")
print(f"Pred label: \n{torch.argmax(logits, dim=1)}\n")
print(f"True label: \n{label_single}")

Single image shape: torch.Size([1, 3, 64, 64])
Output logits: 
tensor([[-0.0465, -0.0021,  0.0047]])

Pred probs: 
tensor([[0.3228, 0.3375, 0.3397]])

Pred label: 
tensor([2])

True label: 
1


## 4. Turn training function into a script

In [64]:
%%writefile going_modular/engine.py
"""
Contains functions for training and testing a PyTorch model.
"""
import torch

from typing import Dict, List, Tuple


def train_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               device: torch.device):
  """Trains a PyTorch model for a single epoch.

  Turns a target PyTorch model to training mode and then
  runs through all of the required training steps (forwrd pass,
  loss calculation, optimizer step).

  Args:
    model: A PyTorch model to be trained.
    data_loader: A DataLoader instance for the model to be trained on.
    loss_fn: A PyTorch loss function to minimize.
    optimizer: A  PyTorch optimizer to help minimize the loss.
    device: A target device to compute on (e.g 'cuda' o 'cpu')

  Returns:
    A tuple of training loss and training accuracy metrics.
    In the form (train_loss, train_accuracy). For example:

    (0.1566, 0.8762)
  """
  # Move model to target device
  model.to(device)

  # Put model in train mode
  model.train()

  # Setup train loss and accuracy values
  train_loss, train_acc = 0, 0

  # Loop through data loader data batches
  for batch, (X, y) in enumerate(data_loader):
    # Send data to target device
    X, y = X.to(device), y.to(device)

    # forward pass
    logits = model(X)
    y_pred = torch.argmax(logits, dim=1)

    # calculate loss
    loss = loss_fn(logits, y)
    train_loss += loss.item()

    # Optimizer zero grad
    optimizer.zero_grad()

    # backward pass
    loss.backward()

    optimizer.step()

    # Calculate and accumulate accuracy metric across all batches
    train_acc += (y_pred==y).sum().item() / len(y_pred)

  # Calculate average loss and acc per batch
  train_loss /= len(data_loader)
  train_acc /= len(data_loader)

  return train_loss, train_acc

def test_step(model: torch.nn.Module,
              data_loader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module,
              device: torch.device):
  """Tests a PyTorch model for a single epoch.

  Turns a target PyTorch model to "eval" mode and then performs
  a forward pass on a testing dataset.

  Args:
    model: A PyTorch model to be tested.
    data_loader: A DataLoader instance for the model to be tested on.
    loss_fn: A PyTorch loss function to calculate loss on the test data.
    device: A target device to compute on

  Returns:
  A tuple of testing loss and testing accuracy metrics.
  In the form (test_loss, test_accuracy). For example:

  (0.2211, 0.8877)
  """
  # Move model to target device
  model.to(device)

  # Set up loss and acc
  test_loss, test_acc = 0, 0

  # Turn on eval mode
  model.eval()
  with torch.inference_mode():
    for X, y in data_loader:
      # move data to target device
      X, y = X.to(device), y.to(device)

      # forward pass
      logits = model(X)
      y_pred = torch.argmax(logits, dim=1)

      loss = loss_fn(logits, y)
      test_loss += loss.item()
      test_acc += (y_pred==y).sum().item() / len(y_pred)

  # calculate average loss and acc per batch
  test_loss /= len(data_loader)
  test_acc /= len(data_loader)

  return test_loss, test_acc


def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          loss_fn: torch.nn.Module,
          optimizer: torch.optim.Optimizer,
          epochs: int,
          device: torch.device) -> Dict[str, List]:
  """Trains and tests a PyTorch model.

  Passes a target PyTorch model through train_step() and test_step()
  functions for a number of epochs, training and testing the model
  in the same epoch loop.

  Calculates, prints and stores evulation metrics throughout.

  Args:
    model: A PyTorch model to be trained and tested.
    train_dataloader: A DataLoader instance for the model to be trained on.
    test_dataloader: A DataLoader instance for the model to be tested on.
    loss_fn: A PyTorch loss function to calculate loss on both datasets.
    epochs: An integer indicating how many epochs to train for.
    device: A target device to compute on.

  Returns:
    A dictionary of training and testing loss as well as training and
    testing accuracy metrics. Each metric has a value in a list for each epoch.
    In the form: {train_loss: [...],
                  train_acc: [...],
                  test_loss: [...],
                  test_acc: [...]}
    For example, if training for epochs=2:
                  {train_loss: [2.0616, 1.0537],
                  train_acc: [0.3945, 0.3945],
                  test_loss: [1.2641, 1.5706],
                  test_acc: [0.3400, 0.2973]}
  """
  # create empty result dict
  results = {"train_loss": [],
             "train_acc": [],
             "test_loss": [],
             "test_acc": []}

  # Loop thorugh training and testing steps for a number of epochs
  for epoch in range(epochs):
    train_loss, train_acc = train_step(model=model,
                                       data_loader=train_dataloader,
                                       loss_fn=loss_fn,
                                       optimizer=optimizer,
                                       device=device)
    test_loss, test_acc = test_step(model=model,
                                    data_loader=test_dataloader,
                                    loss_fn=loss_fn,
                                    device=device)

    print(f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
          )

    # Update result dict
    results["train_loss"].append(train_loss)
    results["train_acc"].append(train_acc)
    results['test_loss'].append(test_loss)
    results['test_acc'].append(test_acc)

  return results

Overwriting going_modular/engine.py


In [65]:
from going_modular import engine

In [66]:
# instantiate loss and optimizer
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model_0.parameters(),
                             lr=0.001)

In [67]:
EPOCHS = 20

engine.train(model=model_0,
             train_dataloader=train_dataloader,
             test_dataloader=test_dataloader,
             loss_fn=loss_fn,
             optimizer=optimizer,
             epochs=EPOCHS,
             device=device)

Epoch: 1 | train_loss: 1.1073 | train_acc: 0.2812 | test_loss: 1.0872 | test_acc: 0.5417
Epoch: 2 | train_loss: 1.1030 | train_acc: 0.2969 | test_loss: 1.1063 | test_acc: 0.2604
Epoch: 3 | train_loss: 1.0925 | train_acc: 0.4258 | test_loss: 1.0999 | test_acc: 0.2604
Epoch: 4 | train_loss: 1.0997 | train_acc: 0.3047 | test_loss: 1.0983 | test_acc: 0.2604
Epoch: 5 | train_loss: 1.0966 | train_acc: 0.3047 | test_loss: 1.0838 | test_acc: 0.2604
Epoch: 6 | train_loss: 1.0859 | train_acc: 0.4258 | test_loss: 1.0722 | test_acc: 0.2604
Epoch: 7 | train_loss: 1.0868 | train_acc: 0.3047 | test_loss: 1.0692 | test_acc: 0.2604
Epoch: 8 | train_loss: 1.0669 | train_acc: 0.3203 | test_loss: 1.0442 | test_acc: 0.3419
Epoch: 9 | train_loss: 1.0456 | train_acc: 0.3164 | test_loss: 1.0525 | test_acc: 0.4924
Epoch: 10 | train_loss: 1.0507 | train_acc: 0.4023 | test_loss: 0.9974 | test_acc: 0.3920
Epoch: 11 | train_loss: 0.9558 | train_acc: 0.6523 | test_loss: 1.0248 | test_acc: 0.3011
Epoch: 12 | train_l

{'train_loss': [1.107267677783966,
  1.1030047684907913,
  1.092466562986374,
  1.099650353193283,
  1.0965719819068909,
  1.085919350385666,
  1.0867832601070404,
  1.0668695718050003,
  1.045585185289383,
  1.0507371053099632,
  0.9558431878685951,
  0.910836398601532,
  0.9417855143547058,
  0.9289010614156723,
  0.9017343446612358,
  0.8258182927966118,
  0.8360626175999641,
  0.8383419886231422,
  0.8302755132317543,
  0.8226142972707748],
 'train_acc': [0.28125,
  0.296875,
  0.42578125,
  0.3046875,
  0.3046875,
  0.42578125,
  0.3046875,
  0.3203125,
  0.31640625,
  0.40234375,
  0.65234375,
  0.62890625,
  0.48828125,
  0.546875,
  0.61328125,
  0.65234375,
  0.54296875,
  0.56640625,
  0.6640625,
  0.6484375],
 'test_loss': [1.087166468302409,
  1.1062618494033813,
  1.0998746156692505,
  1.0982983509699504,
  1.0837994813919067,
  1.0722003777821858,
  1.0692476828893025,
  1.04424915711085,
  1.0525358319282532,
  0.9974040985107422,
  1.0247993469238281,
  0.97715210914611

## 5. Create a function to save a model

In [68]:
from pathlib import Path

def save_model(model: torch.nn.Module,
               target_dir: str,
               model_name: str):
  """Saves a PyTorch model to target_dir.

  Args:
    model: A target PyTorch model to save.
    target_dir: A directory for saving the model to.
    model_name: A filename for the saved model. Should include
      either ".pth" or ".pt" as the file extention.

  Example usage:
    save_model(model=model_0,
                target_dir="models",
                model_name="example_model_name")
  """
  # create target directory
  target_dir_path = Path(target_dir)
  target_dir_path.mkdir(parents=True,
                        exist_ok=True)

  # create model save path
  assert model_name.endswith(".pth") or model_name.endswith('.pt'), \
  "model_name should end with '.pth' or '.pt'"
  model_save_path = target_dir_path / model_name

  # save the model state_dict
  print(f"[INFO] Saving model to: {model_save_path}")
  torch.save(obj=model.state_dict(),
             f=model_save_path)

In [69]:
%%writefile going_modular/utils.py
"""
File containing various utility functions for PyTorch model training.
"""
import torch
from pathlib import Path

def save_model(model: torch.nn.Module,
               target_dir: str,
               model_name: str):
  """
  Saves a PyTorch model to target_dir.

  Args:
    model: A target PyTorch model to save.
    target_dir: A directory for saving the model to.
    model_name: A filename for the saved model. Should include
      either ".pth" or ".pt" as the file extention.

  Example usage:
    save_model(model=model_0,
                target_dir="models",
                model_name="example_model_name")
  """
  # Create target directory
  target_dir_path = Path(target_dir)
  target_dir_path.mkdir(parents=True,
                        exist_ok=True)

  # Create model save path
  assert model_name.endswith(".pth") or model_name.endswith(".pt"), \
  "model_name must end with '.pth' or '.pt'"
  model_save_path = target_dir_path / model_name

  # save model state_dict
  print(f'[INFO] Saving model to {model_save_path}')
  torch.save(obj=model.state_dict(),
             f=model_save_path)

Overwriting going_modular/utils.py


In [70]:
from going_modular import utils

In [71]:
utils.save_model(model=model_0,
                 target_dir='models',
                 model_name='model_0_tinyvgg.pth')

[INFO] Saving model to models/model_0_tinyvgg.pth


### Train, evaluate and save model in `train.py`

In [72]:
%%writefile going_modular/train.py
"""
Trains a PyTorch image classification model using device-agnostic code.
"""
import torch
import os
from torchvision import transforms
import data_setup, model_builder, engine, utils
from timeit import default_timer as timer

# Setup hyperparameters
EPOCHS = 5
BATCH_SIZE = 32
HIDDEN_UNITS = 10
LEARNING_RATE = 0.001

# Setup directories
train_dir = "data/pizza_steak_sushi/train"
test_dir = "data/pizza_steak_sushi/test"

# Setup device agnostic
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Create transform
data_transform = transforms.Compose([
    transforms.Resize(size=(64, 64)),
    transforms.ToTensor()
])


# Create DataLoaders and get class_names
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=data_transform,
    batch_size=BATCH_SIZE
)

# Create model
model = model_builder.TinyVGG(input_shape=3,
                              hidden_units=HIDDEN_UNITS,
                              output_shape=len(class_names))

# Setup loss and optimizer
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(),
                             lr=LEARNING_RATE)

start_time = timer()
# Start training with help from engine.py
engine.train(model=model,
             train_dataloader=train_dataloader,
             test_dataloader=test_dataloader,
             loss_fn=loss_fn,
             optimizer=optimizer,
             epochs=EPOCHS,
             device=device)
end_time = timer()
print(f"[INFO] Total training time: {end_time - start_time} seconds")

# Save model
utils.save_model(model=model,
                 target_dir='models',
                 model_name='model_0_tinyvgg1.pth')

Overwriting going_modular/train.py


In [74]:
!python going_modular/train.py

Epoch: 1 | train_loss: 1.1028 | train_acc: 0.3633 | test_loss: 1.0854 | test_acc: 0.5417
Epoch: 2 | train_loss: 1.0871 | train_acc: 0.4023 | test_loss: 1.0597 | test_acc: 0.5417
Epoch: 3 | train_loss: 1.0819 | train_acc: 0.4023 | test_loss: 1.0440 | test_acc: 0.5417
Epoch: 4 | train_loss: 1.0700 | train_acc: 0.4023 | test_loss: 1.0348 | test_acc: 0.5417
Epoch: 5 | train_loss: 1.0795 | train_acc: 0.2812 | test_loss: 1.0257 | test_acc: 0.5322
[INFO] Total training time: 10.64916669199988 seconds
[INFO] Saving model to models/model_0_tinyvgg1.pth
