## Part 1: Import Libraries

In [1]:
# Import Required Libraries
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import CocoDetection
from torchvision.transforms import Compose, ToTensor
import os
import numpy as np
from PIL import Image

## Part 2: Dataset Preparation

In [2]:
# Dataset Preparation
# Set the root directory for your dataset
root_dir = "/workspaces/ai-projects/Dataset/Fish_COCO"  # Change this to your dataset path if different

# For Fast R-CNN training with COCO format, you need:
# - A folder with images (e.g., 'images/')
# - An annotations.json file with COCO-style annotations (including bounding boxes)
# Boxes are [xmin, ymin, xmax, ymax] in pixel coordinates, labels are class IDs

# Roboflow's COCO export provides this: images/ and annotations.json with bounding boxes.
# No separate annotation files needed—PyTorch handles the parsing.

# If your data is in subfolders like Bass/, Groupers/, etc., you may need to:
# 1. Collect all images into one 'images/' folder
# 2. Ensure annotations.json references the correct image filenames
# 3. Split into train/val/test folders

# For now, assume you have prepared the data in the following structure:
# root_dir/
#   train/
#     images/
#     annotations.json
#   val/
#     images/
#     annotations.json

train_images_dir = os.path.join(root_dir, "train")
train_annotations_file = os.path.join(root_dir, "train", "_annotations.coco.json")
val_images_dir = os.path.join(root_dir, "valid")
val_annotations_file = os.path.join(root_dir, "valid", "_annotations.coco.json")

# Check if directories and files exist
print("Checking dataset directories and files...")
for dir_path in [train_images_dir, val_images_dir]:
    if os.path.exists(dir_path):
        print(f"✓ {dir_path} exists")
    else:
        print(f"✗ {dir_path} does not exist - please create and populate it")

for file_path in [train_annotations_file, val_annotations_file]:
    if os.path.exists(file_path):
        print(f"✓ {file_path} exists")
    else:
        print(f"✗ {file_path} does not exist - please export from Roboflow")

# List number of files (optional check)
if os.path.exists(train_images_dir):
    num_train_images = len([f for f in os.listdir(train_images_dir) if f.endswith(('.jpg', '.jpeg', '.png'))])
    print(f"Number of training images: {num_train_images}")
if os.path.exists(val_images_dir):
    num_val_images = len([f for f in os.listdir(val_images_dir) if f.endswith(('.jpg', '.jpeg', '.png'))])
    print(f"Number of validation images: {num_val_images}")

Checking dataset directories and files...
✓ /workspaces/ai-projects/Dataset/Fish_COCO/train exists
✓ /workspaces/ai-projects/Dataset/Fish_COCO/valid exists
✓ /workspaces/ai-projects/Dataset/Fish_COCO/train/_annotations.coco.json exists
✓ /workspaces/ai-projects/Dataset/Fish_COCO/valid/_annotations.coco.json exists
Number of training images: 2925
Number of validation images: 122


## Part 3: Defining Custom Dataset Class

In [None]:
# Define the Dataset Class
# We'll use torchvision's CocoDetection, which loads COCO-format data (images + annotations.json)
# It automatically parses the JSON for bounding boxes and labels for Fast R-CNN
# No custom class needed!

## Part 4: Define Data Transforms

In [None]:
# Define Data Transforms
def get_transforms(train):
    """
    Define transforms for training and validation.
    For now, no augmentations are applied.
    ToTensor is handled in the dataset using F.to_tensor.
    """
    transforms = []
    if train:
        # Add training augmentations here if needed, e.g.:
        # transforms.append(RandomHorizontalFlip(0.5))
        # transforms.append(RandomCrop(...))
        pass
    # Note: Normalization or resizing can be added here if required
    return Compose(transforms) if transforms else None

## Part 5: Load and Modify the Pre-trained Model

In [None]:
# Load and Modify the Pre-trained Fast R-CNN Model
# Load pre-trained Fast R-CNN with ResNet-50 backbone
model = fasterrcnn_resnet50_fpn(pretrained=True)

# Number of classes: background + number of fish classes
# For your project, if you have multiple species (e.g., Bass, Tilapia), set accordingly
# For now, assuming 2 fish classes + background = 3
num_classes = 3  # Adjust based on your classes

# Modify the box predictor head
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

print(f"Model loaded with {num_classes} classes")

## Part 6: Setting up Data loaders

In [None]:
# Setting up Data Loaders
# Create training and validation datasets using CocoDetection
# CocoDetection(root, annFile, transforms=None) - root is images folder, annFile is annotations.json
train_dataset = CocoDetection(root=train_images_dir, annFile=train_annotations_file, transforms=get_transforms(train=True))
val_dataset = CocoDetection(root=val_images_dir, annFile=val_annotations_file, transforms=get_transforms(train=False))

# Create data loaders
# Batch size: adjust based on your GPU memory (start small, e.g., 2)
batch_size = 2
# num_workers: set to 0 if you encounter issues, or match your CPU cores
num_workers = 4

data_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,
    collate_fn=lambda x: tuple(zip(*x))  # Custom collate for variable-sized targets
)

val_data_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    collate_fn=lambda x: tuple(zip(*x))
)

print(f"Training dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(val_dataset)}")
print(f"Batch size: {batch_size}")

## Part 7: Training the Model

In [None]:
# Training the Model
import torch.optim as optim

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"Using device: {device}")

# Define optimizer
optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)

# Number of epochs
num_epochs = 10  # Adjust as needed

# Training loop
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0.0
    for batch_idx, (images, targets) in enumerate(data_loader):
        # Move images and targets to device
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Forward pass
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        # Backward pass
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        epoch_loss += losses.item()

        # Optional: print batch loss
        if batch_idx % 10 == 0:
            print(f"Epoch {epoch+1}, Batch {batch_idx}, Loss: {losses.item():.4f}")

    # Average loss per epoch
    avg_epoch_loss = epoch_loss / len(data_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_epoch_loss:.4f}")

print("Training completed!")

## Part 8: Save the Model

In [None]:
# Save the Trained Model
model_save_path = "fast_rcnn_model.pth"
torch.save(model.state_dict(), model_save_path)
print(f"Model saved to {model_save_path}")

## Part 9: Evaluation

In [None]:
# Evaluation
# Load the saved model for inference
model.load_state_dict(torch.load(model_save_path))
model.eval()

# Example: Run inference on a validation image
with torch.no_grad():
    # Get a sample from validation set
    img, target = val_dataset[0]
    img = img.unsqueeze(0).to(device)  # Add batch dimension

    # Run model
    predictions = model(img)

    # Print predictions
    print("Predictions for sample image:")
    print(f"Boxes: {predictions[0]['boxes']}")
    print(f"Labels: {predictions[0]['labels']}")
    print(f"Scores: {predictions[0]['scores']}")

# For full evaluation, you could loop over val_data_loader and compute mAP, etc.
# But that requires additional libraries like pycocotools for COCO metrics