In [None]:
import os
import math
import torch
import torchvision
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
from torchvision.transforms import functional as F
from matplotlib.patches import Rectangle

# Check GPU availability
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Device: {device}")
if torch.cuda.is_available():
    print(torch.cuda.get_device_properties(0))

In [None]:
image_dir = '../data/VOC2012_images'
csv_path = '../data/test_Pascal_custom.csv'

image_filenames = os.listdir(image_dir)
print(f"Total images: {len(image_filenames)}")

targets = pd.read_csv(csv_path)
targets.head()  # Display first few rows of the dataset


In [None]:

class PascalDataset(torch.utils.data.Dataset):
    def __init__(self, phase):
        self.phase = phase
        self.targets = pd.read_csv(f'../data/{phase}_Pascal_custom.csv')
        self.imgs = self.targets['filename']
        self.label_map = {'person': 1, 'dog': 2}  # Extend as needed

    def __getitem__(self, idx):
        img_path = os.path.join('../data/VOC2012_images', self.imgs[idx])
        img = Image.open(img_path).convert('RGB')
        img = F.to_tensor(img)

        box_list = self.targets[self.targets['filename'] == self.imgs[idx]]
        idx_lbls = box_list[['class']].values
        box_list = box_list[['xmin', 'ymin', 'xmax', 'ymax']].values

        boxes = torch.tensor(box_list, dtype=torch.float32)
        labels = torch.tensor([self.label_map.get(x[0], 0) for x in idx_lbls], dtype=torch.int64)
        if labels.dim() == 0:
            labels = labels.unsqueeze(0)

        return img, {'boxes': boxes, 'labels': labels}

    def __len__(self):
        return len(self.imgs)

# Create dataset instances
train_dataset = PascalDataset('train')
test_dataset = PascalDataset('test')


In [None]:

def collate_fn(batch):
    return tuple(zip(*batch))

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=collate_fn)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=True, collate_fn=collate_fn)

# Test a batch
images, targets = next(iter(train_loader))
print(f"Batch size: {len(images)}")


In [None]:

# Load pretrained model
num_classes = 3  # Adjust based on dataset
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
model.to(device)


In [None]:

# Define optimizer and learning rate scheduler
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)


In [None]:

def train_one_epoch(model, optimizer, train_dataloader):
    model.train()
    total_loss = 0
    for images, targets in train_dataloader:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        total_loss += losses.item()

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
    return total_loss / len(train_dataloader)


In [None]:

num_epochs = 10
for epoch in range(num_epochs):
    loss = train_one_epoch(model, optimizer, train_loader)
    print(f"Epoch [{epoch}]: LR {lr_scheduler.get_last_lr()} Loss {loss:.4f}")
    lr_scheduler.step()


In [None]:

def evaluate(model, test_dataloader):
    model.eval()
    os.makedirs("../data/output_images", exist_ok=True)
    with torch.no_grad():
        for cnt, (images, targets) in enumerate(test_dataloader):
            images = [img.to(device) for img in images]
            out = model(images)[0]

            img = images[0].permute(1, 2, 0).cpu().numpy()
            gt_boxes = targets[0]['boxes'].cpu().numpy()
            pred_boxes = out['boxes'].cpu().numpy()
            pred_scores = out['scores'].cpu().numpy()
            pred_labels = out['labels'].cpu().numpy()

            fig, ax = plt.subplots(1)
            ax.imshow(img)

            # Draw Ground Truth
            for box in gt_boxes:
                rect = patches.Rectangle((box[0], box[1]), box[2] - box[0], box[3] - box[1], linewidth=2, edgecolor='g', facecolor='none')
                ax.add_patch(rect)

            # Draw Predictions
            for i, box in enumerate(pred_boxes):
                if pred_scores[i] > 0.7:
                    rect = patches.Rectangle((box[0], box[1]), box[2] - box[0], box[3] - box[1], linewidth=2, edgecolor='r', facecolor='none')
                    ax.add_patch(rect)
                    ax.text(box[0], box[1] - 10, f"{pred_labels[i]} ({pred_scores[i]:.2f})", color='m')

            fig.savefig(f"../data/output_images/{cnt}.png", dpi=90, bbox_inches='tight')
            plt.close(fig)


In [None]:
evaluate(model, test_loader)

In [None]:
torch.save(model.state_dict(), '../models/detection_model.pth')