# Download data

In [29]:
!pip install -q kaggle

from google.colab import files
files.upload()

{}

In [30]:
# Setting up Kaggle
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

# Download dataset
! kaggle competitions download -c dogs-vs-cats

# Unzip dataset
! unzip dogs-vs-cats.zip
! unzip train.zip
! unzip test1.zip

mkdir: cannot create directory ‘/root/.kaggle’: File exists
Downloading dogs-vs-cats.zip to /content
100% 811M/812M [00:21<00:00, 38.3MB/s]
100% 812M/812M [00:21<00:00, 39.4MB/s]
Archive:  dogs-vs-cats.zip
replace sampleSubmission.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
  inflating: sampleSubmission.csv    
  inflating: test1.zip               
  inflating: train.zip               
Archive:  train.zip
replace train/cat.0.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: N
Archive:  test1.zip
replace test1/1.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: N


In [31]:
! rm -rf dogs-vs-cats.zip train.zip test1.zip

# Scripts

In [4]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"

torch.manual_seed(42)
if device == "cuda":
  torch.cuda.manual_seed(42)

device

'cpu'

### Creates datasetes and DataLoader

In [1]:
import os
os.makedirs("modular")

In [2]:
%%writefile modular/data_setup.py
"""
Contains functionality for creating PyTorch DataLoader's for
image classification data.
"""
import os
from typing import List, Tuple
from PIL import Image
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset

NUM_WORKERS = os.cpu_count()

class CustomDataset(Dataset):
    def __init__(self, file_list: List[str], transform=None):
        self.file_list = file_list
        self.transform = transform

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        img_path = self.file_list[idx]
        img = Image.open(img_path)
        if self.transform:
            img = self.transform(img)

        label = img_path.split('/')[-1].split('.')[0]
        if label == 'dog':
            label = 1
        elif label == 'cat':
            label = 0
        else:
            label = -1 # Undefined label

        return img, label

def create_dataloaders(
    train_list: List[str],
    valid_list: List[str],
    test_list: List[str],
    transform: transforms.Compose,
    batch_size: int,
    num_workers: int = NUM_WORKERS,
) -> Tuple[DataLoader, DataLoader, DataLoader]:
    """Creates training, validation, and testing DataLoaders.

    Args:
      train_list: List of paths to training images.
      valid_list: List of paths to validation images.
      test_list: List of paths to testing images.
      transform: torchvision transforms to perform on data.
      batch_size: Number of samples per batch in each of the DataLoaders.
      num_workers: Number of subprocesses to use for data loading.

    Returns:
      A tuple of (train_dataloader, valid_dataloader, test_dataloader).
    """
    # Create datasets
    train_data = CustomDataset(train_list, transform=transform)
    valid_data = CustomDataset(valid_list, transform=transform)
    test_data = CustomDataset(test_list, transform=transform)

    # Create DataLoaders
    train_dataloader = DataLoader(
        train_data,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True # for more on pin memory, see PyTorch docs: https://pytorch.org/docs/stable/data.html
    )

    valid_dataloader = DataLoader(
        valid_data,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True
    )

    test_dataloader = DataLoader(
        test_data,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True
    )

    return train_dataloader, valid_dataloader, test_dataloader

Writing modular/data_setup.py


In [3]:
import glob
from torchvision import transforms
from sklearn.model_selection import train_test_split
from modular.data_setup import create_dataloaders

# Load train and test data
train_dir = "train"
test_dir = "test1"

train_list = glob.glob(os.path.join(train_dir, "*.jpg"))
test_list = glob.glob(os.path.join(test_dir, "*.jpg"))

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])

batch_size = 100
train_list, valid_list = train_test_split(train_list, test_size=0.3, random_state=42)

train_dataloader, valid_dataloader, test_dataloader = create_dataloaders(
      train_list, valid_list, test_list, transform, batch_size
)

ValueError: With n_samples=0, test_size=0.3 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [None]:
train_dataloader, valid_dataloader, test_dataloader

### Making model with a script

In [None]:
%%writefile modular/model_builder.py

import torch
import torch.nn as nn

class CNN(nn.Module):
  def __init__(self):
    super(CNN, self).__init__()

    self.layer1 = nn.Sequential(
        nn.Conv2d(3, 16, kernel_size=3, padding=1, stride=2),
        nn.BatchNorm2d(16),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )

    self.layer2 = nn.Sequential(
        nn.Conv2d(16, 32, kernel_size=3, padding=1, stride=2),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )

    self.layer3 = nn.Sequential(
        nn.Conv2d(32, 64, kernel_size=3, padding=1, stride=2),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )

    self.fc1 = nn.Linear(3 * 3 * 64, 10)
    self.dropout = nn.Dropout(0.5)
    self.fc2 = nn.Linear(10, 2)
    self.relu = nn.ReLU()

  def forward(self, x):
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = x.view(x.size(0), -1)
    x = self.relu(self.fc1(x))
    x = self.fc2(x)
    return x

In [None]:
from modular.model_builder import CNN

model = CNN()
model = model.to(device)

In [None]:
model

### Turn training functions into a script (engine.py)

In [4]:
%%writefile modular/engine.py
"""
Contains functiosn for training and testing a PyTorch model.
"""
from typing import Dict, List, Tuple
import torch
from tqdm.auto import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu"
torch.manual_seed(42)
if device == "cuda":
  torch.cuda.manual_seed(42)

def train_step(model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               device = device) -> Tuple[float, float]:
  """Trains a PyTorch model for a single epoch

  Turns a target PyTorch model to training mode and then
  runs through all of the required training steps (forward
  pass, loss calculation, optimizer step).

  Args:
    model: Target PyTorch model to be trained
    dataloader: A DataLoader instance for the model to be trained on.
    loss_fn: A PyTorch loss function.
    optimizer: A PyTorch optimizer to help minimize teh boss function.
    device: A target device to compute on (e.g. "cuda" or "cpu").

  Returns:
    A tuple of training loss and training accuracy metrics.
    In the form (train_loss, train_accuracy). For example:
    (0.1112, 0.8743)
  """
  # Put the model in train mode
  model.train()

  # Setup train loss and train accuracy values
  train_loss, train_acc = 0, 0

  # Loop through data loader data batches
  for batch, (X, y) in enumerate(dataloader):
    # Send data to the target device
    X, y = X.to(device), y.to(device)

    # 1. Forward pass
    y_pred = model(X) # output model logits

    # 2. Calculate the loss
    loss = loss_fn(y_pred, y)
    train_loss += loss.item()

    # 3. Optimizer zero grad
    optimizer.zero_grad()

    # 4. Loss backward
    loss.backward()

    # 5. Optimizer step
    optimizer.step()

    # Calculate accuracy metric
    y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
    train_acc += (y_pred_class == y).sum().item() / len(y_pred)

  # Adjust metrics to get average loss and accuracy per batch
  train_loss = train_loss / len(dataloader)
  train_acc = train_acc / len(dataloader)
  return train_loss, train_acc

def test_step(model: torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module,
              device = device) -> Tuple[float, float]:
  """Tests a PyTorch model for a single epoch.

  Turns a target PyTorch model to "eval" mode and then performs
  a forward pass on a testing dataset.

  Args:
    model: A Pytorch model to be tested.
    dataloader: A DataLoader instance for the model to be tested on.
    loss_fn: A PyTorch loss function to calculate loss on the test data.
    device: A device to compute on (e.g. "cuda" or "cpu").

  Returns:
    A tuple of test loss and test accuracy metrics.
    In the form (test_loss, test_accuracy). For example:
    (0.0223, 0.8985)
  """
  # Put model in eval mode
  model.eval()

  # Setup test loss and test accuracy values
  test_loss, test_acc = 0, 0

  # Turn on inference mode
  with torch.inference_mode():
    # Loop through DataLoader batches
    for batch, (X, y) in enumerate(dataloader):
      # Send data to the target device
      X, y = X.to(device), y.to(device)

      # 1. Forward pass
      test_pred_logits = model(X)

      # 2. Calculate the loss
      loss = loss_fn(test_pred_logits, y)
      test_loss += loss.item()

      # Calculate the accuracy
      test_pred_labels = test_pred_logits.argmax(dim=1)
      test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))

  # Adjut metrics to get average loss and accuracy per batch
  test_loss = test_loss / len(dataloader)
  test_acc = test_acc / len(dataloader)
  return test_loss, test_acc

def train(model: torch.nn.Module,
          train_dataloader,
          test_dataloader,
          optimizer,
          loss_fn: torch.nn.Module = torch.nn.CrossEntropyLoss(),
          epochs: int = 5,
          device = device,
          patience: int = 5,
          delta: float = 0.0) -> Dict[str, List[float]]:
  """Trains and tests a PyTorch model with early stopping.

  Passes a target PyTorch models through train_step() and test_step()
  functions for a number of epochs, training and testing the model in
  the same epoch loop.

  Calculates, prints and stores evaluation metrics throughout.

  Args.
    model: A PyTorch model to be trained and tested.
    train_dataloader: A DataLoader instance for the model to be trained on.
    test_dataloader: A DataLoader instance for the model to be tested on.
    optimizer: An Optimizer instance for the model to be trained with.
    loss_fn: A PyTorch loss function to be used for training. Defaults to nn.CrossEntropyLoss().
    epochs: The number of epochs to train and test the model for. Defaults to 5.
    device: A target device to compute on. Defaults to "cuda" if available, else "cpu".

  Returns:
    A dictionary of training and testing metrics loss as well as training and
    testing accuracy metrics.
    In the form of a dictionary:
                  {train_loss: [],
                  train_acc: [],
                  test_loss: [],
                  test_acc: []}
    For example if training for epochs = 2:
                  {train_loss: [2.0616, 1.0537],
                  train_acc: [0.3945, 0.3945],
                  test_loss: [1.2641, 1.5706],
                  test_acc: [0.3400, 0.2973]}

  """
  # Initialize early stopping variables
  best_val_loss = float('inf')
  epochs_no_improve = 0
  early_stop = False

  # Create empty results dictionary
  results = {"train_loss": [],
             "train_acc": [],
             "test_loss": [],
             "test_acc": []}

  # Loop through training and testing steps for a number of epochs
  for epoch in tqdm(range(epochs)):
    if not early_stop:
      train_loss, train_acc = train_step(model=model,
                                        dataloader=train_dataloader,
                                        loss_fn=loss_fn,
                                        optimizer=optimizer,
                                        device=device)
      test_loss, test_acc = test_step(model=model,
                                      dataloader=train_dataloader,
                                      loss_fn=loss_fn,
                                      device=device)

      # Print out what's happening
      print(
          f"Epoch: {epoch} | "
          f"Train loss: {train_loss:.4f} | "
          f"Train acc: {train_acc:.4f} | "
          f"Test loss: {test_loss:.4f} | "
          f"Test acc: {test_acc:.4f}"
      )

      # Update results dictionary
      results["train_loss"].append(train_loss)
      results["train_acc"].append(train_acc)
      results["test_loss"].append(test_loss)
      results["test_acc"].append(test_acc)

      # Chech if validation loss has improved
      if test_loss < best_val_loss - delta:
        best_val_loss = test_loss
        epochs_no_improve = 0
      else:
        epochs_no_improve += 1

      # Chech early stopping condition
      if epochs_no_improve >= patience:
        print(f"Early stopping at epoch {epoch}.")
        early_stop = True

  # Return the filled results at the end of the epochs
  return results

Writing modular/engine.py


### Create a file called `utils.py` with utility functions

In [5]:
%%writefile modular/utils.py
"""
File contains various utility functions for PyTorch model training.
"""
import torch
from pathlib import Path

def save_model(model: torch.nn.Module,
               target_dir: str,
               model_name: str):
  """Save a PyTorch model to a target directory.

  Args:
    model: A target PyTorch model to save.
    target_dir: A directory for saving the model to.
    model_name: A filename for saving the model. Should include
      either ".pth" or ".pt" as the file extension.

  Example usage:
    save_model(model=model_0,
               target_dir="models",
               model_name="modular_tingvgg_model.pth")
  """
  # Create target directory
  target_dir_path = Path(target_dir)
  target_dir_path.mkdir(parents=True,
                        exist_ok=True)

  # Create model save path
  assert model_name.endswith(".pth") or model.name.endswith(".pt"), "model_name should end with '.pt' or '.pth'"
  model_save_path = target_dir_path / model_name

  # Save the model state_dict()
  print(f"[INFO] Saving model to: {model_save_path}")
  torch.save(obj=model.state_dict(),
             f=model_save_path)

Writing modular/utils.py


### Train, evaluate and save the model (script mode) -> `train.py`

In [6]:
%%writefile modular/train.py
"""
Trains a PyTorch image classification model.
"""
import os
import glob
import torch
import sys

from torchvision import transforms
from timeit import default_timer as timer
from sklearn.model_selection import train_test_split

# Add the parent directory of 'modular' to the system path
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from data_setup import create_dataloaders

import engine, model_builder, utils

# Setup hyperparameters
NUM_EPOCHS = 1 # done
BATCH_SIZE = 100 # done
LEARNING_RATE = 0.001 # done

# Load train, valid and test data: Done
train_dir = "train"
test_dir = "test1"
train_list = glob.glob(os.path.join(train_dir, "*.jpg"))
test_list = glob.glob(os.path.join(test_dir, "*.jpg"))
train_list, valid_list = train_test_split(train_list, test_size=0.3, random_state=42)

# Setup device agnostic code: done
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.manual_seed(42)
if device == "cuda":
  torch.cuda.manual_seed(42)

# Create transforms: done
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])

# Create DataLoader's: done
train_dataloader, valid_dataloader, test_dataloader = create_dataloaders(
      train_list, valid_list, test_list, transform, BATCH_SIZE
)

# Create model: done
model = model_builder.CNN().to(device)

# Setup loss and optimizer: done
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
loss_fn = torch.nn.CrossEntropyLoss()

# Start the timer: done
start_time = timer()

# Start training with help from engine.py
engine.train(model=model,
             train_dataloader=train_dataloader,
             test_dataloader=valid_dataloader,
             loss_fn=loss_fn,
             optimizer=optimizer,
             epochs=NUM_EPOCHS,
             device=device)

# End the timer and print out how long it took
end_time = timer()
print(f"[INFO] Total training time: {end_time-start_time:.3f} seconds")

# Save the model to file
utils.save_model(model=model,
                 target_dir="models",
                 model_name="CNN_model.pth")

Writing modular/train.py


In [39]:
!python modular/train.py

  0% 0/1 [00:00<?, ?it/s]Epoch: 0 | Train loss: 0.6433 | Train acc: 0.6276 | Test loss: 0.6131 | Test acc: 0.6698
100% 1/1 [08:27<00:00, 507.56s/it]
[INFO] Total training time: 507.561 seconds
[INFO] Saving model to: models/CNN_model.pth
