# 05. PyTorch Going Modular
1. This section answers the question, "how do I turn my notebook code into Python scripts?
- data_setup.py - a file to prepare and download data if needed.
- engine.py - a file containing various training functions.
- model_builder.py or model.py - a file to create a PyTorch model.
- train.py - a file to leverage all other files and train a target PyTorch model.
- utils.py - a file dedicated to helpful utility functions

In [3]:
# # Import modules required for train.py
# import os
# import torch
# import data_setup, engine, model_builder, utils

# from torchvision import transforms

# 1. Get data

In [4]:
import os 
import requests
import zipfile 
from pathlib import Path 

# Setup path to data folder
data_path = Path("data/")
image_path = data_path / "pizza_steak_sushi"

if image_path.is_dir():
    print(f"{image_path} directory exists.")
else: 
    print(f"Did not find {image_path} directory, creating one...")
    image_path.mkdir(parents=True, exist_ok=True)

# Download pizza, steak, sushi data
with open(data_path/ "pizza_steak_sushi.zip" , "wb") as f:
    request = requests.get("https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip")
    print("Downloading pizza, steak, sushi data...")
    f.write(request.content)

# Unzip pizza, steak, sushi data
with zipfile.ZipFile(data_path/ "pizza_steak_sushi.zip", "r") as zip_ref:
    print("Unzipping pizza, steak, sushi data...") 
    zip_ref.extractall(image_path)

# remove zip file 
os.remove(data_path / "pizza_steak_sushi.zip")

data\pizza_steak_sushi directory exists.
Downloading pizza, steak, sushi data...
Unzipping pizza, steak, sushi data...


## 2. Create Datasets and DataLoaders (data_setup.py)
- We convert the useful Dataset and DataLoader creation code into a function called create_dataloaders().
- And we write it to file using the line %%writefile going_modular/data_setup.py.

In [5]:
from pathlib import Path 
modular_path = Path("going_modular")

if modular_path.is_dir():
    print(f"{modular_path} directory exists.")
else:
    print(f"Did not find {modular_path} directory, creating one...")
    modular_path.mkdir(parents=True, exist_ok=True)

going_modular directory exists.


In [6]:
%%writefile going_modular/data_setup.py 
"""
Contains functionality for creating PyTorch DataLoaders for 
image classification data.
"""
import os 

from torchvision import datasets, transforms
from torch.utils.data import DataLoader

NUM_WORKERS = os.cpu_count()


"""Creates training and testing DataLoaders.

Takes in a training directory and testing directory path and turns
them into PyTorch Datasets and then into PyTorch DataLoaders.

Args:
train_dir: Path to training directory.
test_dir: Path to testing directory.
transform: torchvision transforms to perform on training and testing data.
batch_size: Number of samples per batch in each of the DataLoaders.
num_workers: An integer for number of workers per DataLoader.

Returns:
A tuple of (train_dataloader, test_dataloader, class_names).
Where class_names is a list of the target classes.
Example usage:
  train_dataloader, test_dataloader, class_names = \
    = create_dataloaders(train_dir=path/to/train_dir,
                         test_dir=path/to/test_dir,
                         transform=some_transform,
                         batch_size=32,
                         num_workers=4)
"""
def create_dataloaders(
    train_dir: str,
    test_dir: str,
    transform: transforms.Compose,
    batch_size: int,
    num_workers: int = NUM_WORKERS):
  
    # Use ImageFolder to create dataset(s)
    train_data = datasets.ImageFolder(train_dir, transform=transform)
    test_data = datasets.ImageFolder(test_dir, transform=transform)

    # Get class names
    class_names = train_data.classes

    # Turn images into data loaders
    train_dataloader = DataLoader(
        train_data, 
        batch_size = batch_size,
        shuffle = True,
        num_workers= NUM_WORKERS,
        pin_memory=True
    )

    test_dataloader = DataLoader(
        test_data,
        batch_size=batch_size,
        shuffle=False,
        num_workers=NUM_WORKERS,
        pin_memory=True
    )

    return train_dataloader, test_dataloader, class_names


Overwriting going_modular/data_setup.py


In [7]:
# Import data_setup.py
from going_modular import data_setup
from torchvision import transforms
train_dir = image_path / "train"
test_dir = image_path / "test"
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.TrivialAugmentWide(num_magnitude_bins=31),
    transforms.ToTensor()
])
BATCH_SIZE = 32
# Create train/test dataloader and get class names as a list
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir, test_dir=test_dir, transform=transform, batch_size=BATCH_SIZE)


## 3. Making a model (model_builder.py)
- Let's put our TinyVGG() model class into a script with the line %%writefile going_modular/model_builder.py

In [8]:
%%writefile going_modular/TinyVGGmodel_builder.py

"""
Contains PyTorch model code to instantiate a TinyVGG model.
"""
import torch
from torch import nn
"""Creates the TinyVGG architecture.

Replicates the TinyVGG architecture from the CNN explainer website in PyTorch.
See the original architecture here: https://poloclub.github.io/cnn-explainer/

Args:
input_shape: An integer indicating number of input channels.
hidden_units: An integer indicating number of hidden units between layers.
output_shape: An integer indicating number of output units.
"""
class TinyVGG(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int) -> None:
        super().__init__()
        self.conv_block1 = nn.Sequential(
            # output_size = (input_size - kernel_size + 2*padding) / stride + 1
            nn.Conv2d(in_channels=input_shape,
                     out_channels=hidden_units,
                     kernel_size=3,
                     stride=1,
                     padding=0),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units,
                     out_channels=hidden_units,
                     kernel_size=3,
                     stride=1,
                     padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=0),
            nn.ReLU(),
            nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=hidden_units * 13 * 13,
                     out_features=output_shape)
        )
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = self.classifier(x)

        return x

Overwriting going_modular/TinyVGGmodel_builder.py


In [9]:
import torch 
from going_modular import TinyVGGmodel_builder
device = "cuda" if torch.cuda.is_available() else "cpu"

# Instantiate an instance of the model from the "model_builder.py" script
torch.manual_seed(42)
model = TinyVGGmodel_builder.TinyVGG(input_shape=3, 
                             hidden_units=10,
                             output_shape=len(class_names)).to(device)

In [10]:
model

TinyVGG(
  (conv_block1): Sequential(
    (0): Conv2d(3, 10, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block2): Sequential(
    (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=1690, out_features=3, bias=True)
  )
)

# 4. Creating train_step() and test_step() functions and train() to combine them

In [11]:
%%writefile going_modular/engine.py
"""
Contains functions for training and testing a PyTorch model.
"""
import torch

from tqdm.auto import tqdm 
from typing import List, Tuple, Dict 

def train_step(model: torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module,
              optimizer: torch.optim.Optimizer,
              device: torch.device)-> Tuple[float, float]:
    # Put model in train mode
    model.train()

    # Setup train loss and train accuracy values
    train_loss, train_acc = 0, 0

    # Loop through data loader data batches
    for batch, (X, y) in enumerate(dataloader):
        # Send data to target device
        X, y = X.to(device), y.to(device)

        # 1. Forward pass
        y_pred = model(X)

        # 2. Calculate  and accumulate loss
        loss = loss_fn(y_pred, y)
        train_loss += loss.item()

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Backward prop
        loss.backward()

        # 5. Optimizer step 
        optimizer.step()

        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += (y_pred_class == y).sum().item() / len(y_pred)

    # Adjust metrics to get average loss and accuracy per batch
    train_loss = train_loss / len(dataloader)
    train_acc = train_acc / len(dataloader)

    return train_loss, train_acc 

def test_step(model: torch.nn.Module,
             dataloader: torch.utils.data.DataLoader,
             loss_fn: torch.nn.Module,
             device: torch.device)-> Tuple[float, float]:
    model.eval()

    # Setup test loss and test accuracy values
    test_loss, test_acc = 0, 0

    with torch.inference_mode():
        for batch, (X, y) in enumerate(dataloader):
            X, y = X.to(device), y.to(device)

            test_pred_logits = model(X)

            loss = loss_fn(test_pred_logits, y)
            test_loss += loss.item()

            # Calculate and accumulate accuracy
            test_pred_labels = test_pred_logits.argmax(dim=1)
            test_acc += (test_pred_labels == y ).sum().item() / len(test_pred_labels)
    # Adjust metrics to get average loss and accuracy per batch 
    test_loss /= len(dataloader)
    test_acc /= len(dataloader)

    return test_loss, test_acc

def train(model: torch.nn.Module,
         train_dataloader: torch.utils.data.DataLoader,
         test_dataloader: torch.utils.data.DataLoader,
         optimizer: torch.optim.Optimizer,
         loss_fn: torch.nn.Module,
         epochs: int,
         device: torch.device)-> Dict[str, List]:
    results = {
        "train_loss": [],
        "train_acc": [],
        "test_loss": [],
        "test_acc": []
    }
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                          dataloader=train_dataloader,
                                          loss_fn=loss_fn,
                                          optimizer=optimizer,
                                          device=device)

        test_loss, test_acc = test_step(model=model,
                                       dataloader=test_dataloader,
                                       loss_fn=loss_fn,
                                       device=device)

        print(
            f"Epoch: {epoch+1} | "
            f"train_loss: {train_loss:.4f} | "
            f"train_acc: {train_acc:.4f} | "
            f"test_loss: {test_loss:.4f} | "
            f"test_acc: {test_acc:.4f}"
        )

        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

    return results
    

Overwriting going_modular/engine.py


In [12]:
# Import engine.py
from going_modular import engine
from torch import nn
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)
EPOCHS = 5
results = engine.train(model=model,
            train_dataloader=train_dataloader,
            test_dataloader=test_dataloader,
            optimizer=optimizer,
            loss_fn=loss_fn,
            epochs=EPOCHS,
            device=device)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.1068 | train_acc: 0.3008 | test_loss: 1.0999 | test_acc: 0.2396
Epoch: 2 | train_loss: 1.0985 | train_acc: 0.3086 | test_loss: 1.0724 | test_acc: 0.5417
Epoch: 3 | train_loss: 1.0891 | train_acc: 0.4336 | test_loss: 1.0830 | test_acc: 0.5417
Epoch: 4 | train_loss: 1.0982 | train_acc: 0.3242 | test_loss: 1.0732 | test_acc: 0.5019
Epoch: 5 | train_loss: 1.0902 | train_acc: 0.3516 | test_loss: 1.0916 | test_acc: 0.3409


## 5. Creating a function to save the model (utils.py)

In [13]:
model.state_dict()

OrderedDict([('conv_block1.0.weight',
              tensor([[[[ 1.5923e-01,  1.7172e-01, -3.1945e-02],
                        [ 1.8796e-01, -3.1567e-02,  5.0048e-02],
                        [-8.2667e-02,  1.2323e-01,  1.7938e-01]],
              
                       [[-1.3611e-01,  1.7294e-01,  4.3165e-02],
                        [ 1.4625e-01,  3.0368e-02,  9.7939e-02],
                        [-2.2872e-02,  1.5282e-01,  3.2349e-02]],
              
                       [[-8.5285e-02,  5.4196e-02, -8.2246e-02],
                        [-1.8922e-02, -7.4236e-02,  1.3193e-01],
                        [-1.4837e-01, -8.4645e-02, -5.1021e-02]]],
              
              
                      [[[-1.2252e-01,  1.1969e-02, -1.9479e-01],
                        [ 1.6577e-01, -1.6914e-01,  1.4081e-01],
                        [ 2.4304e-02, -7.0007e-02,  1.1119e-01]],
              
                       [[ 2.3347e-02,  1.4992e-01,  1.7215e-02],
                        [-6.7097e-02,

In [14]:
%%writefile going_modular/utils.py

"""
Contains various utility functions for PyTorch model training and saving.
"""

import torch 
from pathlib import Path

def save_model(model: torch.nn.Module,
              target_dir: str,
              model_name: str):
    target_dir_path = Path(target_dir)
    target_dir_path.mkdir(parents=True, 
                         exist_ok=True)

    # Create model save path
    assert model_name.endswith(".pth") or model_name.endswith(".pt"), "model_name should end with '.pt' or '.pth'"
    model_save_path = target_dir_path / model_name 

    # Save the model state_dict()
    print(f"[INFO] Saving model to: {model_save_path}")
    torch.save(obj=model.state_dict(),
              f=model_save_path)

Writing going_modular/utils.py


In [17]:
# Import utils.py
from going_modular import utils

# Save a model to file
utils.save_model(model=model,
                target_dir="model",
                model_name="TinyVGG.pth")

[INFO] Saving model to: model\TinyVGG.pth


# 6. Train, evaluate and save the model (train.py)

In [18]:
%%writefile going_modular/train.py

"""
Trains a PyTorch image classification model using device-agnostic code.
"""

import os 
import torch
from going_modular import data_setup, engine, model_builder, utils

from torchvision import transforms

# Setup hyperparameters
NUM_EPOCHS = 5
BATCH_SIZE = 32
HIDDEN_UNITS = 10
LEARNING_RATE = 0.001


# Setup directories
train_dir = "data/pizza_steak_sushi/train"
test_dir = "data/pizza_steak_sushi/test"

# Setup target device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Create transforms
data_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

# Create DataLoaders with help from data_setup.py
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=data_transform,
    batch_size=BATCH_SIZE
)

# Create model with help from model_builder.py
model = model_builder.TinyVGG(
    input_shape=3,
    hidden_units=HIDDEN_UNITS,
    output_shape=len(class_names)
).to(device)


# Set loss and optimizer
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),
                             lr=LEARNING_RATE)

# Start training with help from engine.py
engine.train(model=model,
             train_dataloader=train_dataloader,
             test_dataloader=test_dataloader,
             loss_fn=loss_fn,
             optimizer=optimizer,
             epochs=NUM_EPOCHS,
             device=device)

# Save the model with help from utils.py
utils.save_model(model=model,
                 target_dir="models",
                 model_name="05_going_modular_script_mode_tinyvgg_model.pth")

Writing going_modular/train.py
