<a href="https://colab.research.google.com/github/shree-3143/Using-CNNs-for-Breast-Cancer-Histology-Detection/blob/main/FINAL_Breast_Cancer_Histology_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Read Me
To run the program, download the BreakHis histology dataset from Kaggle, convert to a Zip file, and place into your Google Drive. Provide the path from Google Drive in "zip_path", and click "Run All" for the program to commence. The last cell (at the very end) does a prediction on a random image.

In [None]:
# Import all required libraries --> for model building, training, evaluation, and image handling

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
from torchvision.models import resnet18
import os
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
# Set-up device-agnostic code

device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [None]:
# Loading and unzipping the BreakHis dataset from Google Drive
import requests
import zipfile
from pathlib import Path
from google.colab import files
from google.colab import drive

# Mounting Google Drive
drive.mount("/content/drive")
zip_path = "/content/drive/MyDrive/BreaKHis_v1.zip"

# Create data folder
data_path = Path("/data")
data_path.mkdir(parents=True, exist_ok=True)

# Unzip the uploaded BreakHis.zip
with zipfile.ZipFile(zip_path, "r") as zip_ref:
  print("Unzipping BreakHis data")
  zip_ref.extractall(data_path)

# Check extracted contents
breakhis_path = data_path
if breakhis_path.exists():
  print("extracted")
else:
  print("check folder structure")


ValueError: mount failed

In [None]:
import os # allows the code to interact with the operating system
# Walks through the target directory, returning its contents
# Returns a print of: no. of subdirectories, no. of images(files) in each subdirectory, name of each subdirectory
def walk_through_dir(dir_path):
    for dirpath, dirnames, filenames in os.walk(dir_path):
      print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")


In [None]:
# Calling the walk_through_dir to display the structure of the BreakHis dataset
walk_through_dir(breakhis_path)

In [None]:
import os # used to interact with the operating system
import shutil # useful for copying/moving entire files or filders
import random # used to generate random numbers or make random choices
from pathlib import Path # object-oriented way to handle filesystem paths

# Original dataset path
original_path = Path("/data/BreaKHis_v1/BreaKHis_v1/histology_slides/breast")

# In this project, we are only dealing with two subtypes, and only images of magnification 40x
benign_subtype = "fibroadenoma"
malignant_subtype = "ductal_carcinoma"
magnification = "40X"

# SPLITTING THE DATASET INTO TRAINING AND TESTING DATASETS

# Paths for the new split dataset
base_split_path = Path("/data/BreaKHis_split")
train_path = base_split_path / "train"
test_path = base_split_path / "test"

if base_split_path.exists():
  shutil.rmtree(base_split_path)

# Creating train/test directories, and their benign/malignant subfolders
# Looping over both paths so that we can create subfolders in each
for split in [train_path, test_path]: # list of two path objects - one for where the training data will go, and one for test data:
  (split / "benign").mkdir(parents=True, exist_ok=True) # Creating any missing parent folders, and don't throw an error if the folder already exists
  (split / "malignant").mkdir(parents=True, exist_ok=True)

# Function to copy images into train/test directories with an 80-20 split
def copy_images(label, subtype):
  source_subtype_path = original_path / label / "SOB" / subtype # Navigating to the correct subtype folder
  if not source_subtype_path.exists():
    print(f"Warning: {source_subtype_path} does not exist.") # Print a warning and exist the folder if the source subtype folder doesn't exist
    return

  patient_folders = [p for p in source_subtype_path.iterdir() if p.is_dir()] # only include directories/folders

  all_images = [] # empty list to store image file paths
  for patient_folder in patient_folders: # loop through each patient folder from earlier
    img_folder = patient_folder / magnification # look for a folder named after the magnification level
    # print a warning and skip to the next folder if the folder doesn't exist
    if not img_folder.exists():
      print(f"Missing magnification folder: {img_folder}")
      continue
    # find all image files matching these extensions
    images = list(img_folder.glob("*.png")) + list(img_folder.glob("*.jpg")) + list(img_folder.glob("*.jpeg"))
    all_images.extend(images) # add all found images to the list

  print(f"Found {len(all_images)} images for {label} - {subtype} at {magnification}")

  # Splitting into training and testing datasets using the ratio
  random.shuffle(all_images)
  split_idx = int(len(all_images) * 0.8)
  train_imgs = all_images[:split_idx] # all images up till the splitting index
  test_imgs = all_images[split_idx:] # all images after the splitting index

  # loop over every image path in the set of training images
  for img_path in train_imgs:
    # build the destination path under the correct label
    dest = train_path / label / img_path.name
    shutil.copy(img_path, dest) # copy the image file from og location to destination folder

  # repeat for set of testing images
  for img_path in test_imgs:
    dest = test_path / label / img_path.name
    shutil.copy(img_path, dest)

  print(f"Copied {len(train_imgs)} images to train/{label}")
  print(f"Copied {len(test_imgs)} images to train/{label}")

# Run for benign and malignant subtypes
copy_images("benign", benign_subtype)
copy_images("malignant", malignant_subtype)

# Check folders
for split in [train_path, test_path]:
  print(f"\nContents of {split}:")
  for label in ["benign", "malignant"]:
    folder = split / label
    # print out number of images found in benign and malignant folders
    print(f"- {label}: {len(list(folder.glob('*')))} images")



In [None]:
from pathlib import Path
breakhis_path = Path("/data/BreaKHis_split")

# Creating two new path objects --> each pointing to the training/testing folder inside the dataset folder
train_dir = breakhis_path / "train"
test_dir = breakhis_path / "test"

train_dir, test_dir

In [None]:
# Recursively list all files in the training directory
all_files = list(train_dir.rglob("*"))
print(f"Total files found in train_dir recursively: {len(all_files)}")

print("Some sample files:")
for f in all_files[:10]:
    print(f)

In [None]:
import random
from pathlib import Path
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

image_extensions = [".jpg", "jpeg", ".png"]

# Get all image files directly under benign and malignant folders
train_images = [] # creates an empty list to contain images from the training directory
for class_folder in train_dir.iterdir(): # iterates through each item in the train_dir folder
  if class_folder.is_dir(): # checkes that it is a directory
    for img_file in class_folder.iterdir(): # iterates through each image in the directory
      if img_file.suffix.lower() in image_extensions: # filters for image files based on the extensions
        train_images.append(img_file) # if it is an image, add the file to the list
print(f"Found {len(train_images)} images in train directory") # print the number of images in the directory

if train_images:
  random_img = random.choice(train_images) # select a random image
  print(f"Random image selected: {random_img}")
  print(f"Image class: {random_img.parent.name}") # print the class of the random image (Benign/Malignant)

  img = Image.open(random_img)
  img.show # show the image

  # Turn the image into an array
  img_as_array = np.asarray(img)

  # Plot the image with matplotlib
  plt.figure(figsize=(10, 7))
  plt.imshow(img_as_array)
  plt.title(f"Image class: {random_img.parent.name} | Image shape: {img_as_array.shape}")
  plt.axis(False);
else:
  print("No images found.")


In [None]:
# Repeat the same steps for the testing dataset
import random
from pathlib import Path
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

image_extensions = [".jpg", "jpeg", ".png"]

# Get all image files directly under benign and malignant folders
test_images = []
for class_folder in test_dir.iterdir():
  if class_folder.is_dir():
    for img_file in class_folder.iterdir():
      if img_file.suffix.lower() in image_extensions:
        test_images.append(img_file)
print(f"Found {len(test_images)} images in test directory")

if test_images:
  random_img = random.choice(test_images)
  print(f"Random image selected: {random_img}")
  print(f"Image class: {random_img.parent.name}")

  img = Image.open(random_img)
  img.show

  # Turn the image into an array
  img_as_array = np.asarray(img)

  # Plot the image with matplotlib
  plt.figure(figsize=(10, 7))
  plt.imshow(img_as_array)
  plt.title(f"Image class: {random_img.parent.name} | Image shape: {img_as_array.shape}")
  plt.axis(False);
else:
  print("No images found.")

In [None]:
# Importing libraries for image transformations
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [None]:
# Write transform for image
data_transform = transforms.Compose([
    # Resize the images to 224x224
    transforms.Resize(size=(224, 224)),
    # Flip the images randomly on the horizontal
    transforms.RandomHorizontalFlip(p=0.5), # p = probability of flip, 0.5 = 50% chance
    # Turn the image into a torch.Tensor
    transforms.ToTensor() # this also converts all pixel values from 0 to 255 to be between 0.0 and 1.0
])

In [None]:
def plot_transformed_images(image_paths, transform, n=3, seed=42):
    """Plots a series of random images from image_paths.

    Will open n image paths from image_paths, transform them
    with transform and plot them side by side.

    Args:
        image_paths (list): List of target image paths.
        transform (PyTorch Transforms): Transforms to apply to images.
        n (int, optional): Number of images to plot. Defaults to 3.
        seed (int, optional): Random seed for the random generator. Defaults to 42.
    """
    train_dir = breakhis_path / "train"
    test_dir = breakhis_path / "test"
    seed = 42
    random.seed(seed)

    random_train_image_paths = random.sample(train_images, k=n)
    for image_path in random_train_image_paths:
        with Image.open(image_path) as f:
            fig, ax = plt.subplots(1, 2)
            ax[0].imshow(f)
            ax[0].set_title(f"Original \nSize: {f.size}")
            ax[0].axis("off")

            # Transform and plot image
            # Note: permute() will change shape of image to suit matplotlib
            # (PyTorch default is [C, H, W] but Matplotlib is [H, W, C])
            transformed_image = transform(f).permute(1, 2, 0)
            ax[1].imshow(transformed_image)
            ax[1].set_title(f"Transformed \nSize: {transformed_image.shape}")
            ax[1].axis("off")

            fig.suptitle(f"Class: {image_path.parent.stem}", fontsize=16)

plot_transformed_images(train_images, transform=data_transform, n=3)

In [None]:
# transforms all images in the testing and training datasets
# Use ImageFolder to create dataset(s)
from torchvision import datasets
train_data = datasets.ImageFolder(root=train_dir, # target folder of images
                                  transform=data_transform, # transforms to perform on data (images)
                                  target_transform=None) # transforms to perform on labels (if necessary)

# Apply the same to the testing directory
test_data = datasets.ImageFolder(root=test_dir,
                                 transform=data_transform)

print(f"Train data:\n{train_data}\nTest data:\n{test_data}")

In [None]:
# classes in each training dataset
class_names = train_data.classes
print(class_names)

# mapping from class_name to index
class_dict = train_data.class_to_idx
print(class_dict)

In [None]:
# classes in each testing dataset
class_names = test_data.classes
print(class_names)

# mapping from class_name to index
class_dict = test_data.class_to_idx
print(class_dict)

In [None]:
# Check the lengths
len(train_images), len(test_images)

In [None]:
# Exploring the dataset
img, label = train_data[0][0], train_data[0][1] # Extract the first image and respective label from the training dataset
print(f"Image tensor:\n{img}")
print(f"Image shape: {img.shape}")
print(f"Image datatype: {img.dtype}")
print(f"Image label: {label}")
print(f"Label datatype: {type(label)}")

In [None]:
# Rearrange the order of dimensions
img_permute = img.permute(1, 2, 0)

# Print out different shapes (before and after permute)
print(f"Original shape: {img.shape} -> [color_channels, height, width]")
print(f"Image permute shape: {img_permute.shape} -> [height, width, color_channels]")

# Plot the image
plt.figure(figsize=(10, 7))
plt.imshow(img.permute(1, 2, 0))
plt.axis("off")
plt.title(class_names[label], fontsize=14);

In [None]:
# Turn train and test Datasets into DataLoaders
# A data-loader provides an efficient way to load data in batches
from torch.utils.data import DataLoader
train_dataloader = DataLoader(dataset=train_data, # pull samples from train_data
                              batch_size=1, # how many samples per batch?
                              num_workers=1, # how many subprocesses to use for data loading? (higher = more)
                              shuffle=True) # shuffle the data?

test_dataloader = DataLoader(dataset=test_data,
                             batch_size=1,
                             num_workers=1,
                             shuffle=False) # don't usually need to shuffle testing data

train_dataloader, test_dataloader

In [None]:
img, label = next(iter(train_dataloader)) # An iterator that allows us to manually pull the next batch of data

# Batch size will now be 1, try changing the batch_size parameter above and see what happens
print(f"Image shape: {img.shape} -> [batch_size, color_channels, height, width]")
print(f"Label shape: {label.shape}")

In [None]:
# Importing more libraries
import os
import pathlib
import torch

from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
from typing import Tuple, Dict, List

In [None]:
# Instance of torchvision.datasets.ImageFolder()
train_data.classes, train_data.class_to_idx # map class names to integer labels

In [None]:
# Get the class names from the target directory
img, label = next(iter(train_dataloader)) # loads another batch of data
class_names_found = train_data.classes
print(img.shape)
print(label.shape)

In [None]:
# Make function to find classes in target directory
def find_classes(directory: str) -> Tuple[List[str], Dict[str, int]]:
    """Finds the class folder names in a target directory.

    Assumes target directory is in standard image classification format.

    Args:
        directory (str): target directory to load classnames from.

    Returns:
        Tuple[List[str], Dict[str, int]]: (list_of_class_names, dict(class_name: idx...))

    Example:
        find_classes("food_images/train")
        >>> (["class_1", "class_2"], {"class_1": 0, ...})
    """
    # 1. Get the class names by scanning the target directory
    classes = sorted(entry.name for entry in os.scandir(directory) if entry.is_dir())

    # 2. Raise an error if class names not found
    if not classes:
        raise FileNotFoundError(f"Couldn't find any classes in {directory}.")

    # 3. Create a dictionary of index labels (computers prefer numerical rather than string labels)
    class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
    return classes, class_to_idx

In [None]:
# Call the function
find_classes(train_dir)

In [None]:
# Write a custom dataset class (inherits from torch.utils.data.Dataset)
from torch.utils.data import Dataset

# 1. Subclass torch.utils.data.Dataset
class ImageFolderCustom(Dataset):

    # 2. Initialize with a targ_dir and transform (optional) parameter
    def __init__(self, targ_dir: str, transform=None) -> None:

        # 3. Create class attributes
        # Get all image paths
        self.paths = list(pathlib.Path(targ_dir).glob("*/*.png")) # note: you'd have to update this if you've got .png's or .jpeg's
        # Setup transforms
        self.transform = transform
        # Create classes and class_to_idx attributes
        self.classes, self.class_to_idx = find_classes(targ_dir)

    # 4. Make function to load images
    def load_image(self, index: int) -> Image.Image:
        "Opens an image via a path and returns it."
        image_path = self.paths[index]
        return Image.open(image_path)

    # 5. Overwrite the __len__() method (optional but recommended for subclasses of torch.utils.data.Dataset)
    def __len__(self) -> int:
        "Returns the total number of samples."
        return len(self.paths)

    # 6. Overwrite the __getitem__() method (required for subclasses of torch.utils.data.Dataset)
    def __getitem__(self, index: int) -> Tuple[torch.Tensor, int]:
        "Returns one sample of data, data and label (X, y)."
        img = self.load_image(index)
        class_name  = self.paths[index].parent.name # expects path in data_folder/class_name/image.jpeg
        class_idx = self.class_to_idx[class_name]

        # Transform if necessary
        if self.transform:
            return self.transform(img), class_idx # return data, label (X, y)
        else:
            return img, class_idx # return data, label (X, y)

In [None]:
# Augment train data
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)), # resizes every image to specific dimensions
    transforms.RandomHorizontalFlip(p=0.5), # flips image horizontally with a 50% chance
    transforms.ToTensor() # converts PIL image into PyTorch tensor
])

# Don't augment test data, only reshape
test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

In [None]:
# Apply train and test transforms to the images in their respective directories

train_dir = breakhis_path / "train"
test_dir = breakhis_path / "test"

train_data_custom = ImageFolderCustom(targ_dir=train_dir,
                                      transform=train_transforms)
test_data_custom = ImageFolderCustom(targ_dir=test_dir,
                                     transform=test_transforms)

In [None]:
# Size of the datasets
len(train_data_custom), len(test_data_custom)

In [None]:
# Check classes
train_data_custom.classes

In [None]:
# Convert to integer representation
train_data_custom.class_to_idx

In [None]:
# Check for equality amongst our custom Dataset and ImageFolder Dataset
# Checking if number of samples are equal after transforms
print((len(train_data_custom) == len(train_data)) & (len(test_data_custom) == len(test_data)))
print(train_data_custom.classes == train_data.classes)
print(train_data_custom.class_to_idx == train_data.class_to_idx)

In [None]:
# 1. Take in a Dataset as well as a list of class names
def display_random_images(dataset: torch.utils.data.dataset.Dataset,
                          classes: List[str] = None,
                          n: int = 10,
                          display_shape: bool = True,
                          seed: int = None):

    # 2. Adjust display if n too high
    if n > 10:
        n = 10
        display_shape = False
        print(f"For display purposes, n shouldn't be larger than 10, setting to 10 and removing shape display.")

    # 3. Set random seed
    if seed:
        random.seed(seed)

    # 4. Get random sample indexes
    random_samples_idx = random.sample(range(len(dataset)), k=n)

    # 5. Setup plot
    plt.figure(figsize=(16, 8))

    # 6. Loop through samples and display random samples
    for i, targ_sample in enumerate(random_samples_idx):
        targ_image, targ_label = dataset[targ_sample][0], dataset[targ_sample][1]

        # 7. Adjust image tensor shape for plotting: [color_channels, height, width] -> [color_channels, height, width]
        targ_image_adjust = targ_image.permute(1, 2, 0)

        # Plot adjusted samples
        plt.subplot(1, n, i+1)
        plt.imshow(targ_image_adjust)
        plt.axis("off")
        if classes:
            title = f"class: {classes[targ_label]}"
            if display_shape:
                title = title + f"\nshape: {targ_image_adjust.shape}"
        plt.title(title)

In [None]:
# Display random images from ImageFolder created Dataset
display_random_images(train_data,
                      n=5,
                      classes=class_names,
                      seed=None)

In [None]:
# Display random images from ImageFolderCustom Dataset
display_random_images(train_data_custom,
                      n=12,
                      classes=class_names,
                      seed=None) # Try setting the seed for reproducible images

In [None]:
# Turn train and test custom Dataset's into DataLoader's
from torch.utils.data import DataLoader
train_dataloader_custom = DataLoader(dataset=train_data_custom, # use custom created train Dataset
                                     batch_size=1, # how many samples per batch?
                                     num_workers=0, # how many subprocesses to use for data loading? (higher = more)
                                     shuffle=True) # shuffle the data?

test_dataloader_custom = DataLoader(dataset=test_data_custom, # use custom created test Dataset
                                    batch_size=1,
                                    num_workers=0,
                                    shuffle=False) # don't usually need to shuffle testing data

train_dataloader_custom, test_dataloader_custom

In [None]:
# Get image and label from custom DataLoader
img_custom, label_custom = next(iter(train_dataloader_custom))

# Batch size will now be 1, try changing the batch_size parameter above and see what happens
print(f"Image shape: {img_custom.shape} -> [batch_size, color_channels, height, width]")
print(f"Label shape: {label_custom.shape}")

In [None]:
from torchvision import transforms

# Create sequence of image transformations to apply to training images
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.TrivialAugmentWide(num_magnitude_bins=31), # how intense
    transforms.ToTensor() # use ToTensor() last to get everything between 0 & 1
])

# Don't need to perform augmentation on the test data
test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

In [None]:
# Get all image paths
image_path_list = list(breakhis_path.glob("*/*/*.png"))

# Plot random images
plot_transformed_images(
    image_paths=image_path_list,
    transform=train_transforms,
    n=3,
    seed=None
)

In [None]:
# Create simple transform
simple_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

In [None]:
# 1. Load and transform data - using only simple transforms
from torchvision import datasets
train_data_simple = datasets.ImageFolder(root=train_dir, transform=simple_transform)
test_data_simple = datasets.ImageFolder(root=test_dir, transform=simple_transform)

# 2. Turn data into DataLoaders
import os
from torch.utils.data import DataLoader

# Setup batch size and number of workers
BATCH_SIZE = 32
NUM_WORKERS = os.cpu_count()
print(f"Creating DataLoader's with batch size {BATCH_SIZE} and {NUM_WORKERS} workers.")

# Create DataLoader's
train_dataloader_simple = DataLoader(train_data_simple,
                                     batch_size=BATCH_SIZE,
                                     shuffle=True,
                                     num_workers=NUM_WORKERS)

test_dataloader_simple = DataLoader(test_data_simple,
                                    batch_size=BATCH_SIZE,
                                    shuffle=False,
                                    num_workers=NUM_WORKERS)

train_dataloader_simple, test_dataloader_simple

In [None]:
# ACTUAL CNN - CLASSIFICATION MODEL

# This is a hybrid CNN
# The feature extrator is layers from the pretrained ResNet18 model CNN
# The final classification layer (classifier head) is a standard CNN from scratch
class HybridTinyVGGResNet(nn.Module):
    """
    Model architecture copying TinyVGG from:
    https://poloclub.github.io/cnn-explainer/
    """
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int) -> None:
        super().__init__() # calls the parent constructor (nn.Module) --> this is the base class for all neural network models in PyTorch
        self.device = device # assumes device was defined earlier
        resnet18 = models.resnet18(pretrained=True) # loads a pretrained ResNet-18 model with ImageNet weights
        for param in resnet18.parameters():
          param.requires_grad = False # Freeze all ResNet parameters so that they're not updated during training
        # removes the final, fully connected classification layer from ResNet
        # convolutional layers --> feature extractor is left
        self.feature_extractor = nn.Sequential(*list(resnet18.children())[:-1]) # take everything except the final FC (fully connected) layer

        # pass in a dummy tensor with the transformed dimensions of an image
        # this gets the output shape of the feature extractor
        with torch.no_grad(): # stop gradient calculations to improve efficiency in the background (and improve speed)
          dummy_input = torch.zeros(1, input_shape, 224, 224)
          dummy_output = self.feature_extractor(dummy_input)
          flattened_size = dummy_output.view(1, -1).shape[1]

        # stack layers in order
        self.classifier = nn.Sequential(
             nn.Flatten(), # flattens the feature size
             nn.Linear(flattened_size, hidden_units),
             nn.ReLU(), # adds non-linearity
             nn.Linear(hidden_units, output_shape) # final layer that maps the hidden layer to the number of output classes
        )

    # Define how the data moves through the layers
    # i.e., convolutional layers first (features extracted), and then classification
    def forward(self, x: torch.Tensor):
        x = self.feature_extractor(x) # output shape -->
        #print(x.shape)
        x = self.classifier(x)
        # print(x.shape)
        return x
        # return self.classifier(self.conv_block_2(self.conv_block_1(x))) # <- leverage the benefits of operator fusion

torch.manual_seed(42) # random seed for reproducibility
model_0 = HybridTinyVGGResNet(input_shape=3, # number of color channels (3 for RGB)
                  hidden_units=10, # number of neurons in the hidden layer of the classifier
                  output_shape=len(train_data.classes)).to(device) # number of output classes (2 in this case)

model_0 # this displays the model architecture

In [None]:
# 1. Get a batch of images and labels from the DataLoader
img_batch, label_batch = next(iter(train_dataloader_simple))

# 2. Get a single image from the batch and unsqueeze the image so its shape fits the model
img_single, label_single = img_batch[0].unsqueeze(dim=0), label_batch[0]
print(f"Single image shape: {img_single.shape}\n")

# 3. Perform a forward pass on a single image
model_0.eval()
with torch.inference_mode():
    pred = model_0(img_single.to(device))

# 4. Print out what's happening and convert model logits -> pred probs -> pred label
print(f"Output logits:\n{pred}\n")
print(f"Output prediction probabilities:\n{torch.softmax(pred, dim=1)}\n")
print(f"Output prediction label:\n{torch.argmax(torch.softmax(pred, dim=1), dim=1)}\n")
print(f"Actual label:\n{label_single}")

In [None]:
# Install torchinfo if it's not available, import it if it is
try:
    import torchinfo
except:
    !pip install torchinfo
    import torchinfo

from torchinfo import summary
summary(model_0, input_size=[1, 3, 224, 224]) # do a test pass through of an example input size

In [None]:
# This function takes:
# a neural network (model), a dataloader providing batches of training data
# a loss function to measure error
# an optimiser to update model parameters
def train_step(model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer):
    # Put model in train mode
    model.train()

    # Setup train loss and train accuracy values
    # Initialises accumulators to keep track for the epoch
    train_loss, train_acc = 0, 0

    # Loop through data loader data batches
    for batch, (X, y) in enumerate(dataloader):
        # Send data to target device
        X, y = X.to(device), y.to(device)

        # 1. Forward pass
        y_pred = model(X)

        # 2. Calculate  and accumulate loss --> stored in a loss function
        # Quantifies how far the predictions are from the actual values
        loss = loss_fn(y_pred, y)
        train_loss += loss.item()

        # 3. Optimizer zero grad
        # gradients accumulate by default in PyTorch, so we need to reset them before computing new gradients
        optimizer.zero_grad()

        # 4. Loss backward
        # Computes gradients of the loss via backpropagation
        # Tells the optimizer how to update the parameters to reduce loss
        loss.backward()

        # 5. Optimizer step
        # updates model parameters using calculated gradients
        optimizer.step()

        # Calculate and accumulate accuracy metrics across all batches
        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += (y_pred_class == y).sum().item()/len(y_pred)

    # Adjust metrics to get average loss and accuracy per batch
    train_loss = train_loss / len(dataloader)
    train_acc = train_acc / len(dataloader)
    return train_loss, train_acc

In [None]:
def test_step(model: torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module):
    # Put model in eval mode
    model.eval()

    # Setup test loss and test accuracy values
    test_loss, test_acc = 0, 0

    # Turn on inference context manager
    with torch.inference_mode():
        # Loop through DataLoader batches
        for batch, (X, y) in enumerate(dataloader):
            # Send data to target device
            X, y = X.to(device), y.to(device)

            # 1. Forward pass
            test_pred_logits = model(X)

            # 2. Calculate and accumulate loss
            loss = loss_fn(test_pred_logits, y)
            test_loss += loss.item()

            # Calculate and accumulate accuracy
            test_pred_labels = test_pred_logits.argmax(dim=1)
            test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))

    # Adjust metrics to get average loss and accuracy per batch
    test_loss = test_loss / len(dataloader)
    test_acc = test_acc / len(dataloader)
    return test_loss, test_acc

In [None]:
from tqdm.auto import tqdm
# This defines a full training loop that runs through multiple epochs

# 1. Take in various parameters required for training and test steps
def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module = nn.CrossEntropyLoss(),
          epochs: int = 5):

    # 2. Create empty results dictionary
    results = {"train_loss": [],
        "train_acc": [],
        "test_loss": [],
        "test_acc": []
    }

    # 3. Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                           dataloader=train_dataloader,
                                           loss_fn=loss_fn,
                                           optimizer=optimizer)
        test_loss, test_acc = test_step(model=model,
            dataloader=test_dataloader,
            loss_fn=loss_fn)

        # 4. Print out what's happening
        print(
            f"Epoch: {epoch+1} | "
            f"train_loss: {train_loss:.4f} | "
            f"train_acc: {train_acc:.4f} | "
            f"test_loss: {test_loss:.4f} | "
            f"test_acc: {test_acc:.4f}"
        )

        # 5. Update results dictionary
        # Ensure all data is moved to CPU and converted to float for storage
        results["train_loss"].append(train_loss.item() if isinstance(train_loss, torch.Tensor) else train_loss)
        results["train_acc"].append(train_acc.item() if isinstance(train_acc, torch.Tensor) else train_acc)
        results["test_loss"].append(test_loss.item() if isinstance(test_loss, torch.Tensor) else test_loss)
        results["test_acc"].append(test_acc.item() if isinstance(test_acc, torch.Tensor) else test_acc)

    # 6. Return the filled results at the end of the epochs
    return results

In [None]:
# Actually perform the training loop
# Model is only trained on images from simple transforms (mainly only resizing)

# Set random seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Set number of epochs
NUM_EPOCHS = 5

# Recreate an instance of TinyVGG
model_0 = HybridTinyVGGResNet(input_shape=3, # number of color channels (3 for RGB)
                  hidden_units=10,
                  output_shape=len(train_data.classes)).to(device)

# Setup loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model_0.parameters(), lr=0.001)

# Start the timer
from timeit import default_timer as timer
start_time = timer()

# Train model_0
model_0_results = train(model=model_0,
                        train_dataloader=train_dataloader_simple,
                        test_dataloader=test_dataloader_simple,
                        optimizer=optimizer,
                        loss_fn=loss_fn,
                        epochs=NUM_EPOCHS)

# End the timer and print out how long it took
end_time = timer()
print(f"Total training time: {end_time-start_time:.3f} seconds")

In [None]:
# Check the model_0_results keys
model_0_results.keys()

In [None]:
# Function plots the statistics from the training loop
def plot_loss_curves(results: Dict[str, List[float]]):
    """Plots training curves of a results dictionary.

    Args:
        results (dict): dictionary containing list of values, e.g.
            {"train_loss": [...],
             "train_acc": [...],
             "test_loss": [...],
             "test_acc": [...]}
    """

    # Get the loss values of the results dictionary (training and test)
    loss = results['train_loss']
    test_loss = results['test_loss']

    # Get the accuracy values of the results dictionary (training and test)
    accuracy = results['train_acc']
    test_accuracy = results['test_acc']

    # Figure out how many epochs there were
    epochs = range(len(results['train_loss']))

    # Setup a plot
    plt.figure(figsize=(15, 7))

    # Plot loss
    plt.subplot(1, 2, 1)
    plt.plot(epochs, loss, label='train_loss')
    plt.plot(epochs, test_loss, label='test_loss')
    plt.title('Loss')
    plt.xlabel('Epochs')
    plt.legend()

    # Plot accuracy
    plt.subplot(1, 2, 2)
    plt.plot(epochs, accuracy, label='train_accuracy')
    plt.plot(epochs, test_accuracy, label='test_accuracy')
    plt.title('Accuracy')
    plt.xlabel('Epochs')
    plt.legend();

In [None]:
# Call the function - and see the statistics
plot_loss_curves(model_0_results)

In [None]:
# Create training transform with TrivialAugment
# This now uses a data augmentation technique - not just resizing
train_transform_trivial_augment = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.TrivialAugmentWide(num_magnitude_bins=31),
    transforms.ToTensor()
])

# Create testing transform (no data augmentation)
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

In [None]:
# Turn image folders into Datasets
train_data_augmented = datasets.ImageFolder(train_dir, transform=train_transform_trivial_augment)
test_data_simple = datasets.ImageFolder(test_dir, transform=test_transform)

train_data_augmented, test_data_simple

In [None]:
# Turn Datasets into DataLoaders - like done for the previous model
import os
BATCH_SIZE = 32
NUM_WORKERS = os.cpu_count()

torch.manual_seed(42)
train_dataloader_augmented = DataLoader(train_data_augmented,
                                        batch_size=BATCH_SIZE,
                                        shuffle=True,
                                        num_workers=NUM_WORKERS)

test_dataloader_simple = DataLoader(test_data_simple,
                                    batch_size=BATCH_SIZE,
                                    shuffle=False,
                                    num_workers=NUM_WORKERS)

train_dataloader_augmented, test_dataloader

In [None]:
# This model, unlike the one before, will see different variations of the same training iamges each epoch
# This may help it generalise better
# Create model_1 and send it to the target device
torch.manual_seed(42)
model_1 = HybridTinyVGGResNet(
    input_shape=3,
    hidden_units=10,
    output_shape=len(train_data_augmented.classes)).to(device)
model_1

In [None]:
# Run a full training loop
# This time, the model uses AUGMENTED data

# Set random seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Set number of epochs
NUM_EPOCHS = 5

# Setup loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model_1.parameters(), lr=0.001)

# Start the timer
from timeit import default_timer as timer
start_time = timer()

# Train model_1
model_1_results = train(model=model_1,
                        train_dataloader=train_dataloader_augmented,
                        test_dataloader=test_dataloader_simple,
                        optimizer=optimizer,
                        loss_fn=loss_fn,
                        epochs=NUM_EPOCHS)

# End the timer and print out how long it took
end_time = timer()
print(f"Total training time: {end_time-start_time:.3f} seconds")

In [None]:
# Plot statistics from this training loop
plot_loss_curves(model_1_results)

In [None]:
# Summary of results
import pandas as pd
model_0_df = pd.DataFrame(model_0_results)
model_1_df = pd.DataFrame(model_1_results)
model_0_df

In [None]:
# Plot comparison of Model 0 and Model 1 --> simple vs. unaugmented data
# To see which had better performance

# Setup a plot
plt.figure(figsize=(15, 10))

# Get number of epochs
epochs = range(len(model_0_df))

# Plot train loss
plt.subplot(2, 2, 1)
plt.plot(epochs, model_0_df["train_loss"], label="Model 0")
plt.plot(epochs, model_1_df["train_loss"], label="Model 1")
plt.title("Train Loss")
plt.xlabel("Epochs")
plt.legend()

# Plot test loss
plt.subplot(2, 2, 2)
plt.plot(epochs, model_0_df["test_loss"], label="Model 0")
plt.plot(epochs, model_1_df["test_loss"], label="Model 1")
plt.title("Test Loss")
plt.xlabel("Epochs")
plt.legend()

# Plot train accuracy
plt.subplot(2, 2, 3)
plt.plot(epochs, model_0_df["train_acc"], label="Model 0")
plt.plot(epochs, model_1_df["train_acc"], label="Model 1")
plt.title("Train Accuracy")
plt.xlabel("Epochs")
plt.legend()

# Plot test accuracy
plt.subplot(2, 2, 4)
plt.plot(epochs, model_0_df["test_acc"], label="Model 0")
plt.plot(epochs, model_1_df["test_acc"], label="Model 1")
plt.title("Test Accuracy")
plt.xlabel("Epochs")
plt.legend();

In [None]:
# Pick a random image from the testing dataset
random_img = random.choice(test_images)
print(random_img)

In [None]:
# Converts the random image into a tensor
import torchvision

# Read in custom image
custom_image_uint8 = torchvision.io.read_image(str(random_img))

# Print out image data
print(f"Custom image tensor:\n{custom_image_uint8}\n")
print(f"Custom image shape: {custom_image_uint8.shape}\n")
print(f"Custom image dtype: {custom_image_uint8.dtype}")

In [None]:
# Load in custom image and convert the tensor values to float32
custom_image = torchvision.io.read_image(str(random_img)).type(torch.float32)

# Divide the image pixel values by 255 to get them between [0, 1]
custom_image = custom_image / 255.

# Print out image data
print(f"Custom image tensor:\n{custom_image}\n")
print(f"Custom image shape: {custom_image.shape}\n")
print(f"Custom image dtype: {custom_image.dtype}")

In [None]:
# Plot custom image
print("From testing dataset:")
custom_image = torch.clamp(custom_image, 0, 1)
plt.imshow(custom_image.permute(1, 2, 0)) # need to permute image dimensions from CHW -> HWC otherwise matplotlib will error
plt.title(f"Image shape: {custom_image.shape}")
plt.axis(False);

In [None]:
# Create transform pipeline to resize image
custom_image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
])

# Transform target image
custom_image_transformed = custom_image_transform(custom_image)

# Print out original shape and new shape
print(f"Original shape: {custom_image.shape}")
print(f"New shape: {custom_image_transformed.shape}")

In [None]:
model_1.eval() # Puts model into evaluation mode
with torch.inference_mode():
    # Add an extra dimension to image
    custom_image_transformed_with_batch_size = custom_image_transformed.unsqueeze(dim=0)

    # Print out different shapes
    print(f"Custom image transformed shape: {custom_image_transformed.shape}")
    print(f"Unsqueezed custom image shape: {custom_image_transformed_with_batch_size.shape}")

    # Make a prediction on image with an extra dimension
    custom_image_pred = model_1(custom_image_transformed.unsqueeze(dim=0).to(device))

In [None]:
# Prints the prediction of the image --> for a single, transformed image
# Will output raw scores applied to both classes, before applying softmax
custom_image_pred

In [None]:
# Print out prediction logits
print(f"Prediction logits: {custom_image_pred}")

# Convert logits -> prediction probabilities (using torch.softmax() for multi-class classification)
custom_image_pred_probs = torch.softmax(custom_image_pred, dim=1)
print(f"Prediction probabilities: {custom_image_pred_probs}")

# Convert prediction probabilities -> prediction labels
custom_image_pred_label = torch.argmax(custom_image_pred_probs, dim=1)
print(f"Prediction label: {custom_image_pred_label}")

In [None]:
# Find the predicted label
custom_image_pred_class = class_names[custom_image_pred_label.cpu()] # put pred label to CPU, otherwise will error
custom_image_pred_class

In [None]:
# The values of the prediction probabilities are quite similar
custom_image_pred_probs

In [None]:
"""Makes a prediction on a target image and plots the image with its prediction."""
def pred_and_plot_image(model: torch.nn.Module,
                        image_path: str,
                        class_names: List[str] = None,
                        transform=None,
                        device: torch.device = device):

    # 1. Load in image and convert the tensor values to float32
    target_image = torchvision.io.read_image(str(image_path)).type(torch.float32)

    # 2. Divide the image pixel values by 255 to get them between [0, 1]
    target_image = target_image / 255.

    # 3. Transform if necessary
    if transform:
        target_image = transform(target_image)

    # 4. Make sure the model is on the target device
    model.to(device)

    # 5. Turn on model evaluation mode and inference mode
    model.eval()
    with torch.inference_mode():
        # Add an extra dimension to the image
        target_image = target_image.unsqueeze(dim=0)

        # Make a prediction on image with an extra dimension and send it to the target device
        target_image_pred = model(target_image.to(device))

    # 6. Convert logits -> prediction probabilities (using torch.softmax() for multi-class classification)
    target_image_pred_probs = torch.softmax(target_image_pred, dim=1)

    # 7. Convert prediction probabilities -> prediction labels
    target_image_pred_label = torch.argmax(target_image_pred_probs, dim=1)

    # 8. Plot the image alongside the prediction and prediction probability
    image_to_plot = target_image.squeeze().permute(1, 2, 0).cpu().numpy() # make sure it's the right size for matplotlib
    image_to_plot = np.clip(image_to_plot, 0, 1)
    plt.imshow(image_to_plot)
    if class_names:
        title = f"Pred: {class_names[target_image_pred_label.cpu()]} | Prob: {target_image_pred_probs.max().cpu():.3f}"
    else:
        title = f"Pred: {class_names[target_image_pred_label.cpu()]} | Prob: {target_image_pred_probs.max().cpu():.3f}"

    plt.title(title)
    plt.axis(False);

    # Returns the prediction of the image (the integer representation)
    pred_label = int(target_image_pred_label.item())
    return pred_label

In [None]:
# Prediction on our custom (random) image
random_img = random.choice(test_images)

pred_label = pred_and_plot_image(model=model_1,
                    image_path=random_img,
                    class_names=class_names,
                    transform=custom_image_transform,
                    device=device)

true_label = random_img.parent.name
print(f"Prediction: {class_names[pred_label].capitalize()}")
print(f"True label: {true_label.capitalize()}")

if str(class_names[pred_label]) == str(true_label):
  print("Prediction is correct!")
else:
  print("Incorrect prediction.")