# Computer Vision Classification CNN TinyVGG model on Food101 extract dataset - modular approach

This notebook enables to generate python scripts which are then used to train a model.

## 1. Get data

In [1]:
# Create a directory for modular_approach scripts

import os
os.makedirs("modular_approach", exist_ok=True)

In [2]:
%%writefile modular_approach/data_download.py
"""
Contains functionality to download and unzip prepared data
for training and testing from target URL.
"""

import requests
import zipfile
from pathlib import Path

# Setup path to a data folder
data_path = Path("data/")
image_path = data_path / "food101_extract"

# Create data folder if doesn't exist
if image_path.is_dir():
  print(f"{image_path} directory already exists")
else:
  image_path.mkdir(parents=True, exist_ok=True)
  print(f"{image_path} directory created")

# Download food101_extract data
with open(data_path / "food101_extract.zip", "wb") as f:
  request = requests.get("https://github.com/slawomirwojtas/ML-Projects/raw/main/food101_extract.zip")
  print("Downloading food101_extract data...")
  f.write(request.content)

# Unzip food101_extract data
with zipfile.ZipFile(data_path / "food101_extract.zip", "r") as zip_ref:
  print("Unzipping food101_extract data...")
  zip_ref.extractall(image_path)
  print("Done")

Writing modular_approach/data_download.py


## 2. Create Datasets and DataLoaders

In [3]:
%%writefile modular_approach/data_setup.py
"""
Contains functionality for creating PyTorch DataLoaders for image classification data.
"""

import os
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

NUM_WORKERS = os.cpu_count()



def create_dataloaders(
  train_dir: str,
  test_dir: str,
  transform: transforms.Compose,
  batch_size: int,
  num_workers: int=NUM_WORKERS
  ):
  """
  Creates training and testing DataLoaders.

  Takes in a training directory and testing directpry path and turns them into
  PyTorch Datasets and then into PyTorch DataLoaders.

  Args:
    train_dir: Path to training directory.
    test_dir: Path to testing directory.
    transform: torchvision transforms to perform on training and testing data.
    batch_size: Number of samples per batch in each of the DataLoaders.
    num_workers: An integer for number of workers per DataLoader.

  Returns:
    A tuple of (train_dataloader, test_dataloader, class_names).
    Where class_names is a list of the target classes.
    Example usage:
      train_dataloader, test_dataloader, class_names = create_dataloaders(train_dir=path/to/train_dir,
        test_dir=path/to/test_dir,
        transform=some_transform,
        batch_size=32,
        num_workers=4)
  """

  # Use ImageFolder to create datasets
  train_data = datasets.ImageFolder(train_dir, transform=transform)
  test_data = datasets.ImageFolder(test_dir, transform=transform)

  # Get class names
  class_names = train_data.classes

  # Turn images into DataLoaders
  train_dataloader = DataLoader(
      train_data,
      batch_size=batch_size,
      shuffle=True,
      num_workers=num_workers,
      pin_memory=True
  )

  test_dataloader = DataLoader(
    test_data,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True
  )

  return train_dataloader, test_dataloader, class_names




Writing modular_approach/data_setup.py


## 3. Making a model (TinyVGG for 224x224 px input)

In [4]:
%%writefile modular_approach/model_builder.py
"""
Contains PyTorh model code to instantiate a TinyVGG model from the CNN Explainer website.
"""

import torch
from torch import nn

class TinyVGG(nn.Module):
  """Creates the TinyVGG architecture.

  Replicates the TinyVGG architecture from the CNN explainer website in PyTorch.
  See the original architecture here: https://poloclub.github.io/cnn-explainer/

  Args:
    input_shape: An integer indicating number of input channels.
    hidden_units: An integer indicating number of hidden units between layers.
    output_shape: An integer indicating number of output units.
  """
  def __init__(self, input_shape: int, hidden_units: int, output_shape: int) -> None:
      super().__init__()
      self.conv_block_1 = nn.Sequential(
          nn.Conv2d(in_channels=input_shape,
                    out_channels=hidden_units,
                    kernel_size=3, # how big is the square that's going over the image?
                    stride=1, # default
                    padding=0), # options = "valid" (no padding) or "same" (output has same shape as input) or int for specific number
          nn.ReLU(),
          nn.Conv2d(in_channels=hidden_units,
                    out_channels=hidden_units,
                    kernel_size=3,
                    stride=1,
                    padding=0),
          nn.ReLU(),
          nn.MaxPool2d(kernel_size=2,
                        stride=2) # default stride value is same as kernel_size
      )
      self.conv_block_2 = nn.Sequential(
          nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=0),
          nn.ReLU(),
          nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=0),
          nn.ReLU(),
          nn.MaxPool2d(2)
      )
      self.classifier = nn.Sequential(
          nn.Flatten(),
          # Where did this in_features shape come from?
          # It's because each layer of our network compresses and changes the shape of our inputs data.
          nn.Linear(in_features=hidden_units*53*53,
                    out_features=output_shape)
      )

  def forward(self, x: torch.Tensor):
      x = self.conv_block_1(x)
      x = self.conv_block_2(x)
      x = self.classifier(x)
      return x
      # return self.classifier(self.block_2(self.block_1(x))) # <- leverage the benefits of operator fusion


Writing modular_approach/model_builder.py


## 4. Creating engine for training and testing steps combined with single train function

In [6]:
%%writefile modular_approach/engine.py
"""
Contains functions for training and testing a PyTorch model.
"""
from typing import Dict, List, Tuple
import torch
from tqdm.auto import tqdm

def train_step(model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               device: torch.device) -> Tuple[float, float]:
  """Trains a PyTorch model for a single epoch.

  Turns a target PyTorch model to training mode and then
  runs through all of the required training steps (forward
  pass, loss calculation, optimizer step).

  Args:
    model: A PyTorch model to be trained.
    dataloader: A DataLoader instance for the model to be trained on.
    loss_fn: A PyTorch loss function to minimize.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    device: A target device to compute on (e.g. "cuda" or "cpu").

  Returns:
    A tuple of training loss and training accuracy metrics.
    In the form (train_loss, train_accuracy). For example:

    (0.1112, 0.8743)
  """
  # Put model in train mode
  model.train()

  # Setup train loss and train accuracy values
  train_loss, train_acc = 0, 0

  # Loop through data loader data batches
  for batch, (X, y) in enumerate(dataloader):
      # Send data to target device
      X, y = X.to(device), y.to(device)

      # 1. Forward pass
      y_pred = model(X)

      # 2. Calculate  and accumulate loss
      loss = loss_fn(y_pred, y)
      train_loss += loss.item()

      # 3. Optimizer zero grad
      optimizer.zero_grad()

      # 4. Loss backward
      loss.backward()

      # 5. Optimizer step
      optimizer.step()

      # Calculate and accumulate accuracy metric across all batches
      y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
      train_acc += (y_pred_class == y).sum().item()/len(y_pred)

  # Adjust metrics to get average loss and accuracy per batch
  train_loss = train_loss / len(dataloader)
  train_acc = train_acc / len(dataloader)
  return train_loss, train_acc

def test_step(model: torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module,
              device: torch.device) -> Tuple[float, float]:
  """Tests a PyTorch model for a single epoch.

  Turns a target PyTorch model to "eval" mode and then performs
  a forward pass on a testing dataset.

  Args:
    model: A PyTorch model to be tested.
    dataloader: A DataLoader instance for the model to be tested on.
    loss_fn: A PyTorch loss function to calculate loss on the test data.
    device: A target device to compute on (e.g. "cuda" or "cpu").

  Returns:
    A tuple of testing loss and testing accuracy metrics.
    In the form (test_loss, test_accuracy). For example:

    (0.0223, 0.8985)
  """
  # Put model in eval mode
  model.eval()

  # Setup test loss and test accuracy values
  test_loss, test_acc = 0, 0

  # Turn on inference context manager
  with torch.inference_mode():
      # Loop through DataLoader batches
      for batch, (X, y) in enumerate(dataloader):
          # Send data to target device
          X, y = X.to(device), y.to(device)

          # 1. Forward pass
          test_pred_logits = model(X)

          # 2. Calculate and accumulate loss
          loss = loss_fn(test_pred_logits, y)
          test_loss += loss.item()

          # Calculate and accumulate accuracy
          test_pred_labels = test_pred_logits.argmax(dim=1)
          test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))

  # Adjust metrics to get average loss and accuracy per batch
  test_loss = test_loss / len(dataloader)
  test_acc = test_acc / len(dataloader)
  return test_loss, test_acc

def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device) -> Dict[str, List[float]]:
  """Trains and tests a PyTorch model.

  Passes a target PyTorch models through train_step() and test_step()
  functions for a number of epochs, training and testing the model
  in the same epoch loop.

  Calculates, prints and stores evaluation metrics throughout.

  Args:
    model: A PyTorch model to be trained and tested.
    train_dataloader: A DataLoader instance for the model to be trained on.
    test_dataloader: A DataLoader instance for the model to be tested on.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    loss_fn: A PyTorch loss function to calculate loss on both datasets.
    epochs: An integer indicating how many epochs to train for.
    device: A target device to compute on (e.g. "cuda" or "cpu").

  Returns:
    A dictionary of training and testing loss as well as training and
    testing accuracy metrics. Each metric has a value in a list for
    each epoch.
    In the form: {train_loss: [...],
                  train_acc: [...],
                  test_loss: [...],
                  test_acc: [...]}
    For example if training for epochs=2:
                 {train_loss: [2.0616, 1.0537],
                  train_acc: [0.3945, 0.3945],
                  test_loss: [1.2641, 1.5706],
                  test_acc: [0.3400, 0.2973]}
  """
  # Create empty results dictionary
  results = {"train_loss": [],
      "train_acc": [],
      "test_loss": [],
      "test_acc": []
  }

  # Loop through training and testing steps for a number of epochs
  for epoch in tqdm(range(epochs)):
      train_loss, train_acc = train_step(model=model,
                                          dataloader=train_dataloader,
                                          loss_fn=loss_fn,
                                          optimizer=optimizer,
                                          device=device)
      test_loss, test_acc = test_step(model=model,
          dataloader=test_dataloader,
          loss_fn=loss_fn,
          device=device)

      # Print out what's happening
      print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
      )

      # Update results dictionary
      results["train_loss"].append(train_loss)
      results["train_acc"].append(train_acc)
      results["test_loss"].append(test_loss)
      results["test_acc"].append(test_acc)

  # Return the filled results at the end of the epochs
  return results


Overwriting modular_approach/engine.py


## 5. Utility functions

In [7]:
%%writefile modular_approach/utils.py
"""
File containing various utility functions for PyTorch model training.
"""

import torch
from pathlib import Path


def save_model(model: torch.nn.Module,
               target_dir: str,
               model_name: str):
  """Saves a PyTorch model to a target directory.

  Args:
    model: A target PyTorch model to save.
    target_dir: A directory for saving the model to.
    model_name: A filename for the saved model. Should include
      either ".pth" or ".pt" as the file extension.

  Example usage:
    save_model(model=model_0,
               target_dir="models",
               model_name="05_going_modular_tinygvgg_model.pth")
  """
  # Create target directory
  target_dir_path = Path(target_dir)
  target_dir_path.mkdir(parents=True,
                        exist_ok=True)

  # Create model save path
  assert model_name.endswith(".pth") or model_name.endswith(".pt"), "model_name should end with '.pt' or '.pth'"
  model_save_path = target_dir_path / model_name

  # Save the model state_dict()
  print(f"[INFO] Saving model to: {model_save_path}")
  torch.save(obj=model.state_dict(),
             f=model_save_path)

Writing modular_approach/utils.py


## 6. Train, evaluate and save the model -> `main.py`

In [8]:
%%writefile modular_approach/main.py
"""
Trains a PyTorch image classification model using device-agnostic code.
"""

import os
import argparse
import torch
from torchvision import transforms
import data_setup, engine, model_builder, utils
from timeit import default_timer as timer

# Create a parser
parser = argparse.ArgumentParser(description="Get hyperparameters")

# Get an arg for NUM_EPOCHS
parser.add_argument("--num_epochs",
                    default=5,
                    type=int,
                    help="number of epochs to train for")

# Get an arg for BATCH_SIZE
parser.add_argument("--batch_size",
                    default=32,
                    type=int,
                    help="number of samples per batch")

# Get an arg for HIDDEN_UNITS
parser.add_argument("--hidden_units",
                    default=10,
                    type=int,
                    help="number of neurons in hidden layers")

# Get an arg for LEARNING_RATE
parser.add_argument("--learning_rate",
                    default=0.001,
                    type=float,
                    help="learning rate for the optimizer")

# Get an arg for train_dir
parser.add_argument("--train_dir",
                    default="data/food101_extract/train",
                    type=str,
                    help="directory file path to training data in standard image classification format")

# Get an arg for test_dir
parser.add_argument("--test_dir",
                    default="data/food101_extract/test",
                    type=str,
                    help="directory file path to testing data in standard image classification format")

# Get the arguments from the parser
args = parser.parse_args()

# Setup hyperparameters
NUM_EPOCHS = args.num_epochs
BATCH_SIZE = args.batch_size
HIDDEN_UNITS = args.hidden_units
LEARNING_RATE = args.learning_rate
print(f"[INFO] Training a model for {NUM_EPOCHS} epochs with batch size {BATCH_SIZE} using {HIDDEN_UNITS} hidden units and a learning rate of {LEARNING_RATE}")


# Setup directories
train_dir = args.train_dir
test_dir = args.test_dir
print(f"[INFO] Training data file: {train_dir}")
print(f"[INFO] Testing data file: {test_dir}")

# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"

# Create transforms
data_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ToTensor()
])


# Create DataLoaders and get class_names
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                               test_dir=test_dir,
                                                                               transform=data_transform,
                                                                               batch_size=BATCH_SIZE)

# Create a model
model = model_builder.TinyVGG(input_shape=3,
                              hidden_units=HIDDEN_UNITS,
                              output_shape=len(class_names)).to(device)

# Setup loss and optimizer
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),
                             lr=LEARNING_RATE)

# Start the timer
start_time = timer()


# Start training with help from engine.py
model_results = engine.train(model=model,
                             train_dataloader=train_dataloader,
                             test_dataloader=test_dataloader,
                             optimizer=optimizer,
                             loss_fn=loss_fn,
                             epochs=NUM_EPOCHS,
                             device=device)

# End the timer and print out how long it took
end_time = timer()
print(f"[INFO] Total training time: {end_time-start_time:.3f} seconds")


# Save the model to file
utils.save_model(model=model,
                 target_dir="models",
                 model_name="05_modular_approach_tinyvgg_model.pth")
print("model saved")



Writing modular_approach/main.py


## 7. Predict result from target URL

In [9]:
%%writefile modular_approach/predict.py

"""
Predicts class for a target image URL from a trained model
"""

import torch
import torchvision
from torchvision import transforms
import matplotlib.pyplot as plt
from pathlib import Path
import requests
import argparse
import model_builder

# Setup device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Creating a parser
parser = argparse.ArgumentParser()

# Get an IMAGE_URL
parser.add_argument("--image_url",
                    default="https://na-talerzu.pl/wp-content/uploads/2022/08/Paella-z-krewetkami-i-chorizo-0849-2.jpg",
                    type=str,
                    help="target image url to predict on")

# Get a model path
parser.add_argument("--model_path",
                    default="models/05_modular_approach_tinyvgg_model.pth",
                    type=str,
                    help="target model to use for prediction filepath")

# Get an HIDDEN_UNITS
parser.add_argument("--hidden_units",
                    default=10,
                    type=int,
                    help="number of neurons in hidden layers")

# Get the arguments from the parser
args = parser.parse_args()
IMAGE_URL = args.image_url
MODEL_PATH = args.model_path
HIDDEN_UNITS = args.hidden_units


# Setup custom image path
data_path = Path("data/")
custom_image_path = data_path / "predict_image.jpeg"

# Download the image if it doesn't already exist
if not custom_image_path.is_file():
  with open(custom_image_path, "wb") as f:
    request = requests.get(IMAGE_URL)
    print(f"Downloading {custom_image_path}...")
    f.write(request.content)
else:
  print(f"{custom_image_path} already exists, overwriting content...")
  with open(custom_image_path, "wb") as f:
    request = requests.get(IMAGE_URL)
    print(f"Downloading {custom_image_path}...")
    f.write(request.content)
print("Done")


class_names = ['cheesecake', 'gnocchi', 'guacamole', 'hamburger', 'paella']

# Reload the model
try:
  model = model_builder.TinyVGG(input_shape=3,
                                hidden_units=HIDDEN_UNITS,
                                output_shape=len(class_names)).to(device)
  model.load_state_dict(torch.load(MODEL_PATH))
except:
  print("Hidden units parameter not matching the loaded model")


# Load the image
custom_image = torchvision.io.read_image(str(custom_image_path))

# Create transform pipeline to resize image
custom_image_transform = transforms.Compose([transforms.Resize(size=(224, 224), antialias=True)])

# Transform target image
custom_image_transformed = custom_image_transform(custom_image)


def pred_and_plot_image(model: torch.nn.Module,
                        image_path: str,
                        class_names: list[str] = None,
                        transform=None,
                        device=device):
  """Makes a prediction on a target image with a trained model and plots the image and prediction."""
  # Load in the image
  target_image = torchvision.io.read_image(str(image_path)).type(torch.float32)

  # Divide the image pixel values by 255 to get them between [0, 1]
  target_image = target_image / 255.

  # Transform if necessary
  if transform:
    target_image = transform(target_image)

  # Make sure the model is on the target device
  model.to(device)

  # Turn on eval/inference mode and make a prediction
  model.eval()
  with torch.inference_mode():
    # Add an extra dimension to the image (this is the batch dimension)
    target_image = target_image.unsqueeze(0)

    # Make a prediciton on the image with an extra dimension
    target_image_pred = model(target_image.to(device))

  # Convert logits -> prediction probabilities
  target_image_pred_probs = torch.softmax(target_image_pred, dim=1)

  # Convert prediction probabilities -> prediction labels
  target_image_pred_label = torch.argmax(target_image_pred_probs, dim=1)

  # Plot the image alongside the prediction and prediction probability
  print(f"Pred: {class_names[target_image_pred_label]} | Prob: {target_image_pred_probs.max().cpu():.3f}")

# Pred on our custom image
pred_and_plot_image(model=model,
                    image_path=custom_image_path,
                    class_names=class_names,
                    transform=custom_image_transform,
                    device=device)


Writing modular_approach/predict.py


# Use console and created scripts to download the data

In [10]:
!python modular_approach/data_download.py

data/food101_extract directory created
Downloading food101_extract data...
Unzipping food101_extract data...
Done


# Use console and created scripts to train and evaluate the model

Optional - set up custom hyperparameters. Example:

`!python train.py --num_epochs 5 --batch_size 32 --hidden_units 20 --learning_rate 0.005`

In [13]:
!python modular_approach/main.py --num_epochs 10 --batch_size 16 --hidden_units 20 --learning_rate 0.001

[INFO] Training a model for 10 epochs with batch size 16 using 20 hidden units and a learning rate of 0.001
[INFO] Training data file: data/food101_extract/train
[INFO] Testing data file: data/food101_extract/test
  0% 0/10 [00:00<?, ?it/s]Epoch: 1 | train_loss: 1.6199 | train_acc: 0.2037 | test_loss: 1.5917 | test_acc: 0.1923
 10% 1/10 [00:05<00:51,  5.73s/it]Epoch: 2 | train_loss: 1.5901 | train_acc: 0.2300 | test_loss: 1.5584 | test_acc: 0.2596
 20% 2/10 [00:11<00:47,  5.97s/it]Epoch: 3 | train_loss: 1.5765 | train_acc: 0.2637 | test_loss: 1.5571 | test_acc: 0.2692
 30% 3/10 [00:16<00:36,  5.22s/it]Epoch: 4 | train_loss: 1.5563 | train_acc: 0.2913 | test_loss: 1.5445 | test_acc: 0.2788
 40% 4/10 [00:20<00:28,  4.82s/it]Epoch: 5 | train_loss: 1.5531 | train_acc: 0.2963 | test_loss: 1.5409 | test_acc: 0.3413
 50% 5/10 [00:26<00:26,  5.36s/it]Epoch: 6 | train_loss: 1.5159 | train_acc: 0.3588 | test_loss: 1.5578 | test_acc: 0.2933
 60% 6/10 [00:31<00:20,  5.01s/it]Epoch: 7 | train_loss:

# Use console to predict on image from a target URL

**Warning**: hidden units parameter (default 10) must match with a trained model

Example:

`!python modular_approach/predict.py --hidden_units 20 --image_url "https://images/image.jpg"`

In [17]:
!python modular_approach/predict.py --hidden_units 20 --image_url "https://cdn.aniagotuje.com/pictures/articles/2020/05/3989474-v-1500x1500.jpg"

data/predict_image.jpeg already exists, overwriting content...
Downloading data/predict_image.jpeg...
Done
Pred: hamburger | Prob: 0.271
