In [None]:
# Oxford-IIIT Pet Dataset Faster R-CNN Training and Evaluation Notebook
# ---------------------------------------------------------------------

# 1. Setup and Imports
import torch
import torchvision.transforms as T
from torchvision.datasets import OxfordIIITPet
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score
from torchvision.ops import box_iou
import random

# 2. Define Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 3. Load the Dataset with Transformations
class CustomPetDataset(OxfordIIITPet):
    def __init__(self, root, target_types, transform=None, download=False, downsample_ratio=1.0):
        super().__init__(root=root, target_types=target_types, transform=transform, download=download)
        
        # Downsample dataset if specified
        if downsample_ratio < 1.0:
            self.indices = random.sample(range(len(self)), int(len(self) * downsample_ratio))
        else:
            self.indices = list(range(len(self)))
    
    def __len__(self):
        return len(self.indices)
    
    def __getitem__(self, idx):
        sample_idx = self.indices[idx]
        image, target = super().__getitem__(sample_idx)
        
        # Adjust the target dictionary to match Faster R-CNN format
        target = {
            "boxes": target['bbox'].unsqueeze(0),  # Each image has one box, convert to 2D array
            "labels": torch.tensor([target['category'] + 1])  # +1 to account for background class
        }
        
        return image, target

# Define transformations
transform = T.ToTensor()

# Load the dataset with all classes
downsample_ratio = 1.0  # Optionally adjust this to a smaller portion for quicker demo
dataset = CustomPetDataset(root='./data', target_types=['category', 'bbox'], transform=transform, download=True, downsample_ratio=downsample_ratio)

# Split dataset into train and validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

In [None]:
# Oxford-IIIT Pet Dataset Faster R-CNN Training and Evaluation Notebook
# ---------------------------------------------------------------------

# 1. Setup and Imports
import torch
import torchvision.transforms as T
from torchvision.datasets import OxfordIIITPet
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score
from torchvision.ops import box_iou
import numpy as np
import random
from PIL import Image



In [None]:
# 3. Load the Dataset with Transformations and Bounding Box Calculation
class CustomPetDataset(OxfordIIITPet):
    def __init__(self, root, transform=None, download=False, downsample_ratio=1.0):
        super().__init__(root=root, target_types=["category", "segmentation"], transform=transform, download=download)
        
        # Downsample dataset if specified
        if downsample_ratio < 1.0:
            self.indices = random.sample(range(len(self)), int(len(self) * downsample_ratio))
        else:
            self.indices = list(range(len(self)))
    
    def __len__(self):
        return len(self.indices)
    
    def __getitem__(self, idx):
        sample_idx = self.indices[idx]
        image, target = super().__getitem__(sample_idx)
        
        # Calculate bounding box from segmentation mask
        mask = target["segmentation"]
        mask_array = np.array(mask)
        non_zero_coords = np.argwhere(mask_array)
        y_min, x_min = non_zero_coords.min(axis=0)
        y_max, x_max = non_zero_coords.max(axis=0)
        bbox = [x_min, y_min, x_max, y_max]
        
        # Prepare the target dictionary
        target = {
            "boxes": torch.tensor([bbox], dtype=torch.float32),  # Wrap bbox in a list to match expected shape
            "labels": torch.tensor([target["category"] + 1])  # +1 to account for background class
        }
        
        return image, target


In [None]:
# 4. Define and Configure the Faster R-CNN Model
num_classes = 38  # 37 pet breeds + 1 background class
model = fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torch.nn.Linear(in_features, num_classes)
model.to(device)

# Define optimizer and OneCycleLR scheduler
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
steps_per_epoch = len(train_loader)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.005, steps_per_epoch=steps_per_epoch, epochs=10)

In [None]:
# 5. Training and Validation Loop
num_epochs = 10
model_save_path = "faster_rcnn_pets.pth"
for epoch in range(num_epochs):
    # Training
    model.train()
    train_loss = 0.0
    for images, targets in train_loader:
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        train_loss += losses.item()
        
        losses.backward()
        optimizer.step()
        scheduler.step()
    
    train_loss /= len(train_loader)
    print(f"Epoch [{epoch + 1}/{num_epochs}], Train Loss: {train_loss:.4f}")

    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, targets in val_loader:
            images = [image.to(device) for image in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)
            val_loss += sum(loss for loss in loss_dict.values()).item()
    
    val_loss /= len(val_loader)
    print(f"Epoch [{epoch + 1}/{num_epochs}], Validation Loss: {val_loss:.4f}")

# Save the model after training
torch.save(model.state_dict(), model_save_path)
print("Model saved successfully.")



In [None]:
# Reload the Model for Evaluation
model.load_state_dict(torch.load(model_save_path))
model.to(device)
model.eval()



In [None]:
# 6. Compute Metrics on the Validation Set with IoU-based Matching
def calculate_metrics(model, data_loader, device, iou_threshold=0.5, confidence_threshold=0.5):
    all_true_labels = []
    all_pred_labels = []

    model.eval()
    with torch.no_grad():
        for images, targets in data_loader:
            images = [image.to(device) for image in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # Get predictions
            predictions = model(images)
            
            for pred, target in zip(predictions, targets):
                # Filter predictions by confidence threshold
                keep = pred['scores'] >= confidence_threshold
                pred_boxes = pred['boxes'][keep]
                pred_labels = pred['labels'][keep]

                # Get ground truth boxes and labels
                true_boxes = target['boxes']
                true_labels = target['labels']

                if len(pred_boxes) > 0 and len(true_boxes) > 0:
                    # Compute IoU between each predicted box and each true box
                    iou_matrix = box_iou(pred_boxes, true_boxes)
                    
                    # Match predictions with ground truth based on IoU
                    for i, pred_label in enumerate(pred_labels):
                        # Find the best-matching ground truth box for this prediction
                        max_iou, max_iou_idx = iou_matrix[i].max(0)
                        
                        # Check if IoU is above threshold, and if so, it's a valid match
                        if max_iou >= iou_threshold:
                            all_pred_labels.append(pred_label.item())
                            all_true_labels.append(true_labels[max_iou_idx].item())
                            
                            # Set the matched true box IoU to zero to prevent re-matching
                            iou_matrix[:, max_iou_idx] = 0
                        else:
                            # If no match is found, count as false positive
                            all_pred_labels.append(pred_label.item())
                            all_true_labels.append(0)  # Background/false class
                else:
                    # No ground truth or predictions: count as all false negatives or false positives
                    all_pred_labels.extend(pred_labels.cpu().numpy())
                    all_true_labels.extend([0] * len(pred_labels))  # Background/false class for unmatched preds
                    all_true_labels.extend(true_labels.cpu().numpy())
                    all_pred_labels.extend([0] * len(true_labels))  # Background/false class for unmatched truths

    # Calculate precision, recall, and F1-score
    precision = precision_score(all_true_labels, all_pred_labels, average='weighted')
    recall = recall_score(all_true_labels, all_pred_labels, average='weighted')
    f1 = f1_score(all_true_labels, all_pred_labels, average='weighted')

    print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

# Run metrics calculation on the validation set
calculate_metrics(model, val_loader, device)

# 7. Visualize Predictions on Validation Samples
def plot_sample(image, target, prediction=None):
    fig, ax = plt.subplots(1, 1, figsize=(8, 8))
    
    # Display image
    ax.imshow(image.permute(1, 2, 0).cpu().numpy())
    
    # Plot true boxes in blue
    true_boxes = target["boxes"].cpu().numpy()
    for box in true_boxes:
        x_min, y_min, x_max, y_max = box
        ax.add_patch(plt.Rectangle((x_min, y_min), x_max - x_min, y_max - y_min,
                                   fill=False, color="blue", linewidth=2))

    # Plot predicted boxes in red if provided
    if prediction:
        pred_boxes = prediction["boxes"].cpu().numpy()
        for box in pred_boxes:
            x_min, y_min, x_max, y_max = box
            ax.add_patch(plt.Rectangle((x_min, y_min), x_max - x_min, y_max - y_min,
                                       fill=False, color="red", linewidth=2))
    
    plt.axis("off")
    plt.show()

# Test on random validation samples
with torch.no_grad():
    for images, targets in random.sample(list(val_loader), 3):
        images = [image.to(device) for image in images]
        predictions = model(images)
        
        for img, target, pred in zip(images, targets, predictions):
           
