In [None]:
import torch
from torchvision.models import detection
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import os
import urllib.request

DATASET_PREFIX = os.environ.get('DATASET_PREFIX', '')
IMAGENET_LABELS_FILE = DATASET_PREFIX + "imagenet_classes.txt"
CIFAR100_LABELS_FILE = DATASET_PREFIX + "cifar100_labels.txt"
CIFAR10_LABELS_FILE = DATASET_PREFIX + "cifar10_labels.meta"
PASCAL_VOC_LABELS_FILE = DATASET_PREFIX + "pascal_voc_labels.txt"
PLACES365_LABELS_FILE = DATASET_PREFIX + "categories_places365.txt"
COCO_LABELS_FILE = DATASET_PREFIX + "coco_labels.txt"

def get_coco_labels():
    # Download the labels file from the internet
    if not os.path.exists(COCO_LABELS_FILE):
        labels_url = "https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names"
        urllib.request.urlretrieve(labels_url, COCO_LABELS_FILE)
    
    # Load the labels file
    with open(COCO_LABELS_FILE, "r") as f:
        coco_labels = f.readlines()
        coco_labels = [label.strip() for label in coco_labels]
    
    # Return the COCO labels
    return coco_labels

# Call the function to get the labels from COCO
coco_labels = get_coco_labels()
print(coco_labels)

def load_image(image_path):
    """
    Loads an image from the given file path using the PIL library.
    """
    image = Image.open(image_path)
    array = np.array(image)
    return array

# Define model factory
def model_factory(model_name):
    if model_name == 'RetinaNet':
        # [{'boxes': tensor([], size=(0, 4)), 'scores': tensor([]), 'labels': tensor([], dtype=torch.int64)}]
        # Load RetinaNet model in inference mode
        # Dataset COCO
        model = detection.retinanet_resnet50_fpn(pretrained=True, pretrained_backbone=True)
    elif model_name == 'FasterRCNN':
        # [{'boxes': tensor([[  0.,   0., 640., 669.],
        # [  0.,   0., 640., 669.]], device='cuda:0'), 'labels': tensor([67,  1], device='cuda:0'), 
        # 'scores': tensor([1., 1.], device='cuda:0')}]
        # Load FasterRCNN model in inference mode
        # Dataset COCO
        model = detection.fasterrcnn_resnet50_fpn(pretrained=True, pretrained_backbone=True)
    elif model_name == 'SSDLite':
        model = detection.ssd300_vgg16(pretrained=True)
    elif model_name == 'Yolov5':
        # Load Yolov5 model in inference mode
        model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
    else:
        raise ValueError('Invalid model name')
    model.eval()
    return model



# Define function to detect with a given model
def detect_with_model(image_path, model_name):
    image = load_image(image_path)
    
    # Get model
    model = model_factory(model_name)
    # Preprocess image
    image = torch.from_numpy(image).permute(2, 0, 1).float().unsqueeze(0)
    
    if model_name == "Yolov5":
        image = Image.open(image_path)

    if torch.cuda.is_available():
        image = image.cuda()
        model.eval().cuda()
        print("using gpu")

    # Perform detection
    with torch.no_grad():
        detections = model(image)
    if model_name == "Yolov5":
        return detections
    
    boxes,scores,labels = post_process(detections)
    print("boxes",boxes)
    print("scores",scores)
    print("labels",labels)
    return boxes,scores,labels

def post_process(outputs):
    preds = outputs[0]
    boxes = preds['boxes'].detach().cpu().numpy()  # Bounding boxes
    scores = preds['scores'].detach().cpu().numpy()  # Confidence scores
    labels = preds['labels'].detach().cpu().numpy()  
    classs = np.empty(0)
    for lable in labels:
        cls = coco_labels[lable]
        classs = np.append(classs,cls)
    return boxes,scores,classs

In [None]:

boxes,scores,labels = detect_with_model("/workspace/tests/pexels-pixabay-45201.jpg","RetinaNet")


In [None]:

boxes,scores,labels = detect_with_model("/workspace/tests/pexels-pixabay-45201.jpg","FasterRCNN")


In [None]:

boxes,scores,labels = detect_with_model("/workspace/tests/pexels-pixabay-45201.jpg","SSDLite")


In [None]:
import torchvision.transforms as T

image = Image.open("/workspace/tests/pexels-pixabay-45201.jpg")
image = T.ToTensor()(image)
image = image.permute(1,2,0).numpy()
fig, ax = plt.subplots(1)
ax.imshow(image)

for box, score, label in zip(boxes, scores, labels):
    if score < 0.5:
        continue  # Skip low-confidence detections
        
    left = box[0] 
    top = box[1] 
    width = box[2] - left 
    height = box[3] - top
    
    rect = plt.Rectangle((left, top), width, height, fill=False, color='red', linewidth=1.5)
    ax.add_patch(rect)
    ax.text(left, top - 5, f'{label} {score:.2f}', fontsize=8, color='green')

plt.show()

In [None]:

result = detect_with_model("/workspace/tests/pexels-pixabay-45201.jpg","Yolov5")
print(result)
result.show()
result.xyxy[0]
result.pandas().xyxy[0]