# Model Evaluation
Go through the process of evaluating a model.

# Load Data

In [1]:
# Import all necessary modules
import pandas as pd
import torch
import os
from torch.utils.data import Dataset
from PIL import Image
from torchvision.transforms import v2
from torch.utils.data import DataLoader

In [None]:
# Define our transformations
test_transform = v2.Compose([
    v2.Resize((128, 128)),
    v2.ToImage(), 
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=[0.485, 0.456, 0.406], 
                 std=[0.229, 0.224, 0.225])
])

In [26]:
# Define our Dataset Class and label encoding
label_encoding = {"malignant": 0, "benign": 1}

# Dataset Class
class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform, target_transform):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = lambda y: target_transform[y]

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = Image.open(img_path)
        label = self.img_labels.iloc[idx, 1]
        image = self.transform(image)
        label = self.target_transform(label)
        
        return image, label

In [27]:
# Create our Dataset
test_dataset = CustomImageDataset(
    annotations_file='test_data.csv', 
    img_dir="../../../", 
    transform=test_transform, 
    target_transform=label_encoding
)

In [28]:
# Create a Dataloader
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Load Model

In [48]:
# Import the needed modules
import torch.nn as nn
import torch.nn.functional as F


# Name the class
class BreastCancerClassification(nn.Module):
   def __init__(self):
       super(BreastCancerClassification, self).__init__()
       # Create the layers
       self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
       self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
       self.pool = nn.MaxPool2d(2, 2)
      
       self.fc1 = nn.Linear(64 * 32 * 32, 256)
       self.fc2 = nn.Linear(256, 128)
       self.fc3 = nn.Linear(128, 2)


   def forward(self, x):
       # Create the layer connections
       x = self.pool(F.relu(self.conv1(x)))
       x = self.pool(F.relu(self.conv2(x)))
       x = x.view(-1, 64 * 32 * 32)
       x = F.relu(self.fc1(x))
       x = F.relu(self.fc2(x))
       x = self.fc3(x)
       return x


In [49]:
# Create instance of a model
model = BreastCancerClassification()

In [50]:
import torch

# Load checkpoint
checkpoint = torch.load('2_checkpoint.tar', weights_only=True)

In [None]:
# Load the parameters to our model
model.load_state_dict(checkpoint['model_state_dict']) 

# Model Inference

## Real-time
Single inputs for immediate response time.

In [52]:
# Open an image
image_path = 'sample-input.jpg'
image = Image.open(image_path)

In [None]:
# Set to eval mode
model.eval()

In [None]:
%%time
# Apply the transformation
transformed_image = test_transform(image)

# Perform inference
output = model(transformed_image)
_, predicted = torch.max(output.data, 1) # Get the highest value from the raw scores

# Print the class
print(f"Class: {predicted.item()}") # item() gets the value out of the tensor

In [None]:
# Reverse index the label_encoding dictionary 
index_to_class_map = {v: k for k, v in label_encoding.items()}
print(f"Class: {index_to_class_map[predicted.item()]}")

## Batch Inference
Executing multiple inputs at a single time.

What we will use in our training loop.

A PyTorch DataLoader is done in batch!

In [None]:
# Show our test dataloader and dataset
print(f"batch size: {test_loader.batch_size}")
print(f"Dataset images: {len(test_dataset)}")

In [None]:
# Use the dataloader to perform batch inference
for i, data in enumerate(test_loader, 0): # Loop over the whole dataloader in batches of 32
    inputs, labels = data
    
    # Perform inference as we did before
    outputs = model(inputs)
    _, predicted = torch.max(outputs.data, 1)
    print(f"Class: {predicted}") # notice we didn't use .item() here

# torch.no_grad()
Run the inference without gradients being calculated.

Less resource intensive. Faster?

In [None]:
%%time
# Try again but time it
for i, data in enumerate(test_loader, 0):
    inputs, labels = data
    outputs = model(inputs)
    _, predicted = torch.max(outputs.data, 1)
    print(f"Class: {predicted}")

In [None]:
%%time
# Same but with no_grad()
with torch.no_grad():
    for i, data in enumerate(test_loader, 0):
        inputs, labels = data
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        
        print(f"Class: {predicted}")

# Torchmetrics
Torchmetrics is a handy tool that integrates very nicely into PyTorch to provide metrics. 

In [None]:
# Import module
import torchmetrics

In [None]:
# Simulate torchmetrics 
num_classes = 3

In [None]:
# Create some inputs
inputs = torch.tensor([
    [2.0, 1.0, 0.1],
    [0.5, 2.5, 0.3],
    [0.1, 0.4, 3.0],
    [1.0, 0.8, 0.9],
    [0.3, 1.5, 2.1],
    [0.9, 2.1, 1.8],
    [0.2, 3.0, 1.0],
    [2.1, 0.1, 0.5],
    [0.5, 2.5, 0.6],
    [1.0, 0.5, 2.0] 
])

In [None]:
# Create some true class labels. The first and the last are purposely incorrect.
true_labels = torch.tensor([1, 1, 2, 0, 2, 1, 1, 0, 1, 0])

In [None]:
# Initialize the accuracy metric for multi-class classification
accuracy = torchmetrics.Accuracy(task="multiclass", num_classes=num_classes) # Specifying multiple class task

In [None]:
# Simulate predictions on the inputs
_, predicted = torch.max(inputs.data, 1)

In [None]:
# Update the metric with predictions and true labels
accuracy.update(predicted, true_labels) 

In [None]:
# Compute the final accuracy
final_accuracy = accuracy.compute()
print(f"Accuracy: {final_accuracy * 100}%")  

# Test Loop
Run model evaluation as part of a test loop and include Torchmetrics as our calculation framework

For this we are going to include no_grad() and ensure our model is in evaluation mode.

In [None]:
import torchmetrics

# Initialize the accuracy metric
accuracy_metric = torchmetrics.Accuracy(task="multiclass", num_classes=3)  # Adjust `num_classes` as needed

# Test Loop with Torchmetrics
model.eval()  # Set model to evaluation mode
with torch.no_grad():  # Disable gradient computation for evaluation
    # Run test dataloader
    for i, data in enumerate(test_loader, 0):
        inputs, labels = data
        outputs = model(inputs)

        # Get predicted class
        _, predicted = torch.max(outputs.data, 1)

        # Update the accuracy metric with predictions and true labels
        accuracy_metric.update(predicted, labels)

# Compute the final accuracy
final_accuracy = accuracy_metric.compute()
print(f"Accuracy: {final_accuracy * 100}%")


In [None]:
# NOTE: We must reset the metric for future use or it will continue to update.
accuracy_metric.reset()

# Custom Metric Example

In [None]:
# Initialize number of correct and total
correct = 0
total = 0

In [None]:
# Run predictions
with torch.no_grad():
    for i, data in enumerate(test_loader, 0):
        inputs, labels = data
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)

        # Update total count with labels
        total += labels.size(0)
        # If predicted is the same as correct update the number
        correct += (predicted == labels).sum().item()

# To get accuracy percentage multiply 100 by the correct number and divide by total
print(f'Accuracy: {100 * correct // total}%')