In [4]:
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

device = "cpu"

# Load the model
model = fasterrcnn_resnet50_fpn(pretrained=True)

# Change the classifier to detect only one class (background is class 0)
num_classes = 2  # 1 class (object) + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(device)

# Transfer the model to the GPU
model.to(device)

# Set the model in training mode
model.train()

# Create dummy data and labels
batch_size = 8
images = torch.randn(batch_size, 3, 512, 512).to(device)

# Ensure boxes have positive width and height by using the absolute value of random numbers
boxes = torch.abs(torch.randn(batch_size, 1, 4)).to(device) * 512  # Scale to image size
# Make sure boxes don't overflow image boundaries
boxes[..., 2:] = torch.where(boxes[..., 2:] < boxes[..., :2], boxes[..., :2] + 1, boxes[..., 2:])

labels = torch.ones(batch_size, 1).long().to(device)

# Format the labels for the Faster R-CNN
targets = []
for i in range(batch_size):
    target = {}
    target["boxes"] = boxes[i]
    target["labels"] = labels[i]
    targets.append(target)

# Forward pass


print(targets)
output = model(images, targets)

# The output is a dictionary with loss terms
print(output)



cpu
[{'boxes': tensor([[678.7259, 284.7810, 694.8099, 518.9874]]), 'labels': tensor([1])}, {'boxes': tensor([[249.5284,   3.6717, 250.5284, 292.7893]]), 'labels': tensor([1])}, {'boxes': tensor([[ 42.4624, 588.5604, 476.3267, 589.5604]]), 'labels': tensor([1])}, {'boxes': tensor([[852.9614, 712.1835, 853.9614, 828.2308]]), 'labels': tensor([1])}, {'boxes': tensor([[933.0437, 888.2699, 934.0437, 889.2699]]), 'labels': tensor([1])}, {'boxes': tensor([[551.4678,  51.6358, 662.4887, 216.5218]]), 'labels': tensor([1])}, {'boxes': tensor([[ 137.7237,  177.1430,  296.2087, 1256.0892]]), 'labels': tensor([1])}, {'boxes': tensor([[373.0323, 240.2934, 374.0323, 241.2934]]), 'labels': tensor([1])}]
{'loss_classifier': tensor(0.9961, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(0.0006, grad_fn=<DivBackward0>), 'loss_objectness': tensor(0.9815, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(9.4870, grad_fn=<DivBackward0>)}
