In [1]:
import os

import torch
from torchvision.io import ImageReadMode
from torchvision.io.image import read_image
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image

In [2]:
# Step 1: Initialize model with the best available weights
weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
model = fasterrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=0.9)
model.eval()

# Step 2: Initialize the inference transforms
preprocess = weights.transforms()

In [3]:
class TestDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        self.imgs = list(sorted(os.listdir(root)))
        # remove files without image extensions
        self.imgs = [img for img in self.imgs if img.endswith(('.png', '.jpg', '.jpeg'))]

    def __getitem__(self, idx):
        img_path = os.path.join(self.root, self.imgs[idx])
        # read the image and remove alpha channel
        img = read_image(img_path, ImageReadMode.RGB)
        return img
    def __len__(self):
        return len(self.imgs)

test_data = TestDataset("Instructions", transforms=preprocess)

In [4]:
print(test_data.imgs)

['Duke Farms_2022_03_08_13_58_46.png', 'Duke Farms_2022_03_08_14_57_27.png', 'Duke Farms_2022_03_14_23_06_55.png', 'Duke Farms_2022_03_16_17_01_15.png', 'Duke Farms_2022_04_02_09_26_42.png', 'Duke Farms_2022_04_26_07_53_24.png', 'Duke Farms_2022_05_07_18_35_13.png', 'National Arboretum A_2022_03_21_19_05_20.png', 'National Arboretum A_2022_03_29_19_43_05.png', 'National Arboretum A_2022_04_04_06_28_34.png', 'National Arboretum A_2022_04_04_08_37_52.png', 'National Arboretum A_2022_04_28_11_13_18.png', 'National Arboretum A_2022_05_02_01_24_18.png', 'National Arboretum A_2022_05_07_03_23_45.png', 'National Arboretum A_2022_05_07_10_34_48.png', 'National Arboretum A_2022_05_09_03_41_02.png', 'National Arboretum A_2022_05_15_08_33_14.png']


In [4]:
test_data_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=0)

In [12]:
with torch.no_grad():
    model = model.to('cuda')
    for _, batch in enumerate(test_data_loader):
        prediction = model(preprocess(batch.to('cuda')))[0]
        print(prediction)
        for image in batch:
            labels = []
            for i in range(len(prediction["labels"])):
                labels.append(weights.meta["categories"][prediction["labels"][i]] + " " + str(round(prediction["scores"][i].item(), 3)))

            box = draw_bounding_boxes(image, boxes=prediction["boxes"],
                                    labels=labels,
                                    colors="red",
                                    width=4)
            im = to_pil_image(box.detach())
            im.save("test.png")

{'boxes': tensor([[461.5152, 267.2453, 871.1411, 710.0000]], device='cuda:0'), 'labels': tensor([16], device='cuda:0'), 'scores': tensor([0.9450], device='cuda:0')}
{'boxes': tensor([[318.1213, 101.3299, 718.3945, 530.3976]], device='cuda:0'), 'labels': tensor([16], device='cuda:0'), 'scores': tensor([0.9782], device='cuda:0')}
{'boxes': tensor([[334.8059, 192.9951, 604.6642, 499.3461]], device='cuda:0'), 'labels': tensor([16], device='cuda:0'), 'scores': tensor([0.9670], device='cuda:0')}
{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0')}




{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0')}
{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0')}
{'boxes': tensor([[447.4741, 249.1185, 564.0780, 364.1725]], device='cuda:0'), 'labels': tensor([16], device='cuda:0'), 'scores': tensor([0.9695], device='cuda:0')}
{'boxes': tensor([[615.9965, 240.3985, 861.4205, 531.9827]], device='cuda:0'), 'labels': tensor([16], device='cuda:0'), 'scores': tensor([0.9756], device='cuda:0')}
{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0')}
{'boxes': tensor([[516.9413, 122.7436, 898.1485, 545.5803]], device='cuda:0'), 'labels': tensor([16], device='cuda:0'), 'scores': tensor([0.9848], device='cuda:0')}
{'boxes': tensor([[173.4897,  83.8651, 714.5428, 751.8914]