In [36]:
from dataset import *
import torch
import torch.nn as nn
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork
from torchvision.models.detection.backbone_utils import BackboneWithFPN
from torchvision.ops.feature_pyramid_network import ExtraFPNBlock
from torchvision.models import resnet50
from torchvision.models.detection import FasterRCNN
import torchvision

# load image
dataset = CXRDataset(
        root='/home/ec2-user/data/MIMIC-1105', 
        image_dir='downsized',
        ann_file='annotations_downsized.json',
        transforms=get_transform(train=False),
        )

image = dataset[0][0].unsqueeze(0)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
image = image.to(device)

Faster rcnn with default backbone

In [11]:
# Load the pre-trained Faster R-CNN model
model = fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()
model.to(device)

# Perform object detection
with torch.no_grad():
    predictions = model(image)

# Print the predictions (bounding boxes and class scores)
print(predictions)




[{'boxes': tensor([[  58.1865,    0.0000,  998.8453, 1004.9954],
        [   0.0000,    0.0000,  483.1834,  996.5087],
        [ 536.1624,   10.0707, 1024.0000,  884.0034],
        [ 148.8860,  283.5781,  194.8409,  326.7523],
        [ 725.3314,   92.8112,  768.7886,  123.4762],
        [ 140.9995,  346.8251,  908.9592,  985.2390],
        [ 132.5804,  304.3146,  919.8915, 1015.0481],
        [  90.7145,    0.0000,  149.0744,   69.1741]], device='cuda:0'), 'labels': tensor([ 1,  1,  1, 44, 37, 75,  3, 10], device='cuda:0'), 'scores': tensor([0.4006, 0.2551, 0.1435, 0.1241, 0.0708, 0.0658, 0.0555, 0.0512],
       device='cuda:0')}]


Faster rcnn with different backbone

In [38]:
class LastLevelMaxPool(ExtraFPNBlock):
    """
    Applies a max pooling operation over the non-spatial dimensions of the input.
    """
    def forward(self, x, names):
        result = {}
        for name, feature in zip(names, x):
            result[name] = feature.max(dim=-1)[0].max(dim=-1)[0]
        return result

    def output_shape(self, in_channels_list):
        return {name: chan for name, chan in in_channels_list.items()}


# Load the pre-trained ResNet-50 model
backbone = resnet50(pretrained=True)

# Set the number of output channels (2048 for ResNet-50)
backbone.out_channels = 2048

# Define the return layers
return_layers = {'layer4': '0'}

# Create the FPN (Feature Pyramid Network) using the custom backbone
fpn = FeaturePyramidNetwork(
    in_channels_list=[backbone.out_channels],
    out_channels=backbone.out_channels,
    extra_blocks=LastLevelMaxPool(),
)

# Combine the custom backbone and FPN
backbone = BackboneWithFPN(
    backbone,
    return_layers=return_layers,
    in_channels_list=[backbone.out_channels],
    out_channels=backbone.out_channels,
)

# Create the anchor generator
anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)

# Create the Faster R-CNN model with the custom backbone
model = FasterRCNN(
    backbone,
    num_classes=91,  # Change this to the number of classes in your dataset
    rpn_anchor_generator=anchor_generator,
)

# Load the pre-trained Faster R-CNN model with the custom backbone
model = fasterrcnn_resnet50_fpn(
    backbone=backbone,
    pretrained=False,
    anchor_generator=anchor_generator,
    num_classes=91,  # Change this to the number of classes in your dataset
)

model.eval()
model.to(device)

# Perform object detection
with torch.no_grad():
    predictions = model(image)

# Print the predictions (bounding boxes and class scores)
print(predictions)



TypeError: __init__() got multiple values for argument 'backbone'