In [None]:
import os
import torch
import torchvision
from torchvision.transforms import functional as F
from PIL import Image, ImageDraw

# Load pre-trained Faster R-CNN model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()

# Load PASCAL VOC dataset
dataset = torchvision.datasets.VOCDetection(root='./VOCdevkit', year='2007', image_set='trainval', download=True)

def detect_objects(image):
    # Transform image to tensor
    image_tensor = F.to_tensor(image)
    image_tensor = torch.unsqueeze(image_tensor, 0)

    # Run object detection
    with torch.no_grad():
        predictions = model(image_tensor)

    boxes = predictions[0]['boxes'].tolist()
    labels = predictions[0]['labels'].tolist()
    scores = predictions[0]['scores'].tolist()

    return boxes, labels, scores

def main():
    # Create a folder to save the images if it doesn't exist
    save_folder = 'RCNNimages'
    os.makedirs(save_folder, exist_ok=True)

    for i in range(len(dataset)):
        image, target = dataset[i]
        image = image.convert('RGB')
        boxes, labels, scores = detect_objects(image)

        # Draw bounding boxes and labels on the image
        draw = ImageDraw.Draw(image)
        for box, label, score in zip(boxes, labels, scores):
            if score > 0.5:  # Filter detections by confidence threshold
                x1, y1, x2, y2 = box
                draw.rectangle([(x1, y1), (x2, y2)], outline='red', width=2)

                # Write label on top of the bounding box
                label_text = f"{label}: {score:.2f}"
                text_width, text_height = draw.textsize(label_text)
                draw.rectangle([(x1, y1), (x1 + text_width, y1 + text_height)], fill='red')
                draw.text((x1, y1), label_text, fill='white')

        # Save the image
        image.save(os.path.join(save_folder, f"image_{i}.jpg"))

if __name__ == '__main__':
    main()




Using downloaded and verified file: ./VOCdevkit\VOCtrainval_06-Nov-2007.tar
Extracting ./VOCdevkit\VOCtrainval_06-Nov-2007.tar to ./VOCdevkit


  text_width, text_height = draw.textsize(label_text)
  text_width, text_height = draw.textsize(label_text)
  text_width, text_height = draw.textsize(label_text)
  text_width, text_height = draw.textsize(label_text)
  text_width, text_height = draw.textsize(label_text)
  text_width, text_height = draw.textsize(label_text)
  text_width, text_height = draw.textsize(label_text)
  text_width, text_height = draw.textsize(label_text)
  text_width, text_height = draw.textsize(label_text)
  text_width, text_height = draw.textsize(label_text)
  text_width, text_height = draw.textsize(label_text)
  text_width, text_height = draw.textsize(label_text)
  text_width, text_height = draw.textsize(label_text)
  text_width, text_height = draw.textsize(label_text)
  text_width, text_height = draw.textsize(label_text)
  text_width, text_height = draw.textsize(label_text)
  text_width, text_height = draw.textsize(label_text)
  text_width, text_height = draw.textsize(label_text)
  text_width, text_height = 