In [22]:
from pathlib import Path
from typing import Union

import numpy as np
import torch
import mmcv
from mmengine.config import Config

In [23]:
from mmdet.apis.inference import init_detector, inference_detector

In [24]:
def infer_image(
    config_path: Union[str, Path],
    checkpoint_path: [str],
    image_path: Union[str, np.ndarray],
    device: str = 'cuda:0',
    cfg_options: [dict] = None
) -> Union[dict, list]:
    """
    Perform inference on an image using a detector from MMDetection.

    Args:
        config_path (str or Path): Path to the model config file.
        checkpoint_path (str, optional): Path to the model checkpoint file.
        image_path (str or np.ndarray): Path to the image file or the image as a numpy array.
        device (str): Device to run inference on. Defaults to 'cuda:0'.
        cfg_options (dict, optional): Options to override some settings in the config. Defaults to None.

    Returns:
        dict or list: Inference results. If the input is a list of images, the output will be a list of results.
    """
    # Initialize the detector
    model = init_detector(
        config=config_path,
        checkpoint=checkpoint_path,
        device=device,
        cfg_options=cfg_options
    )

    # Perform inference
    # result = inference_detector(model, image_path)
    
    try:
        # Perform inference
        result = inference_detector(model, image_path)
    finally:
        # Clean up the GPU memory
        del model
        torch.cuda.empty_cache()

    return result

In [25]:
config_file = "/home/dmsai2/mmdetection/my_configs/faster-renn_r101_fpn_1x_coco.py"

In [26]:
checkpoint_file = "/home/dmsai2/mmdetection/work_dir/epoch_48.pth"

In [27]:
image_file = "./front_100.png"

In [28]:
result = infer_image(config_file, checkpoint_file, image_file)

Loads checkpoint by local backend from path: /home/dmsai2/mmdetection/work_dir/epoch_48.pth


In [29]:
result

<DetDataSample(

    META INFORMATION
    batch_input_shape: (96, 224)
    img_path: './front_100.png'
    scale_factor: (0.1167274622199062, 0.11690140845070422)
    pad_shape: (96, 224)
    img_shape: (83, 224)
    ori_shape: (710, 1919)
    img_id: 0

    DATA FIELDS
    pred_instances: <InstanceData(
        
            META INFORMATION
        
            DATA FIELDS
            bboxes: tensor([[ 899.2087,  360.8566, 1179.9043,  708.6053],
                        [1259.7441,   90.6891, 1556.4888,  434.9768],
                        [1146.5424,  361.3486, 1415.7355,  702.3556],
                        [ 896.8765,   22.9528, 1296.3186,  431.6518],
                        [ 453.5948,  361.5567,  710.9429,  689.8738],
                        [ 249.5105,  365.6594,  483.3314,  662.6277],
                        [ 291.9146,   41.4429,  579.9786,  387.3828],
                        [ 563.7245,   16.6294,  923.7280,  397.3040],
                        [ 679.8377,  391.1128,  922.6257,  

In [30]:
import cv2
import numpy as np
from mmdet.structures import DetDataSample

In [31]:
def draw_boxes_on_image(
    image_path: str,
    result: DetDataSample,
    output_path: str = 'output.jpg',
    score_threshold: float = 0.3,
    box_color: tuple = (0, 255, 0),
    text_color: tuple = (0, 255, 0),
    font_scale: float = 0.5,
    thickness: int = 2
):
    """
    Draw bounding boxes on an image based on inference results.

    Args:
        image_path (str): Path to the input image.
        result (DetDataSample): Inference result containing bounding boxes and scores.
        output_path (str): Path to save the output image with drawn boxes. Defaults to 'output.jpg'.
        score_threshold (float): Minimum score threshold to draw the boxes. Defaults to 0.3.
        box_color (tuple): Color of the bounding boxes. Defaults to (0, 255, 0).
        text_color (tuple): Color of the text. Defaults to (0, 255, 0).
        font_scale (float): Scale of the text font. Defaults to 0.5.
        thickness (int): Thickness of the bounding boxes and text. Defaults to 2.
    """
    # Load the image
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Image not found at {image_path}")

    # Extract bounding boxes and scores from the result
    bboxes = result.pred_instances.bboxes.cpu().numpy()
    scores = result.pred_instances.scores.cpu().numpy()
    labels = result.pred_instances.labels.cpu().numpy()

    # Loop through each detection
    for bbox, score, label in zip(bboxes, scores, labels):
        if score >= score_threshold:
            x1, y1, x2, y2 = bbox.astype(int)
            # Draw the bounding box
            cv2.rectangle(image, (x1, y1), (x2, y2), box_color, thickness)
            # Put label and score text above the bounding box
            text = f'{label}: {score:.2f}'
            (text_width, text_height), baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
            cv2.rectangle(
                image,
                (x1, y1 - text_height - baseline),
                (x1 + text_width, y1),
                box_color,
                -1
            )
            cv2.putText(image, text, (x1, y1 - baseline), cv2.FONT_HERSHEY_SIMPLEX, font_scale, text_color, thickness)

    # Save the output image
    cv2.imwrite(output_path, image)
    print(f"Output image saved at {output_path}")

In [32]:
output_file = "./test_output.jpg"

In [33]:
draw_boxes_on_image(image_file, result, output_file)

Output image saved at ./test_output.jpg
