In [2]:
# Ultralytics YOLO 🚀, AGPL-3.0 license

import argparse

import cv2.dnn
import numpy as np

from ultralytics.utils import ASSETS, yaml_load
from ultralytics.utils.checks import check_yaml

CLASSES = yaml_load(check_yaml("coco8.yaml"))["names"]
colors = np.random.uniform(0, 255, size=(len(CLASSES), 3))


In [3]:

def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
    """
    Draws bounding boxes on the input image based on the provided arguments.

    Args:
        img (numpy.ndarray): The input image to draw the bounding box on.
        class_id (int): Class ID of the detected object.
        confidence (float): Confidence score of the detected object.
        x (int): X-coordinate of the top-left corner of the bounding box.
        y (int): Y-coordinate of the top-left corner of the bounding box.
        x_plus_w (int): X-coordinate of the bottom-right corner of the bounding box.
        y_plus_h (int): Y-coordinate of the bottom-right corner of the bounding box.
    """
    label = f"{CLASSES[class_id]} ({confidence:.2f})"
    color = colors[class_id]
    cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2)
    cv2.putText(img, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)



In [4]:


"""
Main function to load ONNX model, perform inference, draw bounding boxes, and display the output image.

Args:
    onnx_model (str): Path to the ONNX model.
    input_image (str): Path to the input image.

Returns:
    list: List of dictionaries containing detection information such as class_id, class_name, confidence, etc.
"""

onnx_model = '/home/xiang-tao/git/runs/detect/train/weights/best.onnx'
input_image = '/home/xiang-tao/git/bus.jpg'

# Load the ONNX model
model: cv2.dnn.Net = cv2.dnn.readNetFromONNX(onnx_model)

# Read the input image
original_image: np.ndarray = cv2.imread(input_image)
[height, width, _] = original_image.shape

print(f'original_image.shape: {original_image.shape}')



original_image.shape: (1080, 810, 3)


In [7]:
# Prepare a square image for inference
length = max((height, width))
image = np.zeros((length, length, 3), np.uint8)
image[0:height, 0:width] = original_image

# Calculate scale factor
scale = length / 640

# Preprocess the image and prepare blob for model
blob = cv2.dnn.blobFromImage(image, scalefactor=1 / 255, size=(640, 640), swapRB=True)
model.setInput(blob)

# Perform inference
outputs = model.forward()

print(f'outputs: {outputs}')
print(type(outputs))
print(outputs.shape)


# Prepare output array
outputs = np.array([cv2.transpose(outputs[0])])
print(f'outputs: {outputs}')
print(type(outputs))
print(outputs.shape)

outputs: [[[     3.4416      20.009      28.422 ...      562.86      564.78      588.44]
  [     7.1443      4.2471      3.5682 ...       595.4      592.19      591.94]
  [      7.122      40.066      56.654 ...      158.04       157.5      113.82]
  ...
  [ 2.2683e-07  1.0893e-07  8.6857e-08 ...  1.0468e-06  7.9207e-07  1.1399e-06]
  [ 9.6611e-08  6.7666e-08   4.763e-08 ...  1.2979e-06  1.1518e-06   1.232e-06]
  [ 1.2082e-07  4.5888e-08  3.7489e-08 ...  1.4236e-06  1.2964e-06  1.3837e-06]]]
<class 'numpy.ndarray'>
(1, 84, 8400)
outputs: [[[     3.4416      7.1443       7.122 ...  2.2683e-07  9.6611e-08  1.2082e-07]
  [     20.009      4.2471      40.066 ...  1.0893e-07  6.7666e-08  4.5888e-08]
  [     28.422      3.5682      56.654 ...  8.6857e-08   4.763e-08  3.7489e-08]
  ...
  [     562.86       595.4      158.04 ...  1.0468e-06  1.2979e-06  1.4236e-06]
  [     564.78      592.19       157.5 ...  7.9207e-07  1.1518e-06  1.2964e-06]
  [     588.44      591.94      113.82 ...  1.1399

In [8]:

rows = outputs.shape[1]

boxes = []
scores = []
class_ids = []

# Iterate through output to collect bounding boxes, confidence scores, and class IDs
for i in range(rows):
    classes_scores = outputs[0][i][4:]
    (minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores)
    if maxScore >= 0.25:
        box = [
            outputs[0][i][0] - (0.5 * outputs[0][i][2]),
            outputs[0][i][1] - (0.5 * outputs[0][i][3]),
            outputs[0][i][2],
            outputs[0][i][3],
        ]
        boxes.append(box)
        scores.append(maxScore)
        class_ids.append(maxClassIndex)

# Apply NMS (Non-maximum suppression)
result_boxes = cv2.dnn.NMSBoxes(boxes, scores, 0.25, 0.45, 0.5)

detections = []

# Iterate through NMS results to draw bounding boxes and labels
for i in range(len(result_boxes)):
    index = result_boxes[i]
    box = boxes[index]
    detection = {
        "class_id": class_ids[index],
        "class_name": CLASSES[class_ids[index]],
        "confidence": scores[index],
        "box": box,
        "scale": scale,
    }
    detections.append(detection)
    draw_bounding_box(
        original_image,
        class_ids[index],
        scores[index],
        round(box[0] * scale),
        round(box[1] * scale),
        round((box[0] + box[2]) * scale),
        round((box[1] + box[3]) * scale),
    )

# Display the image with bounding boxes
# cv2.imshow("image", original_image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()


In [11]:
len(boxes),boxes[0]

(46, [0.06876945495605469, 150.94861602783203, 19.131916, 41.44658])

In [13]:
len(scores),scores[0]

(46, 0.25669893622398376)

In [14]:
len(class_ids), class_ids[0]

(46, 11)

In [17]:
len(detections), detections[0]

(6,
 {'class_id': 5,
  'class_name': 'bus',
  'confidence': 0.8863571286201477,
  'box': [14.89910888671875, 135.68052673339844, 458.65472, 314.34488],
  'scale': 1.6875})