In [1]:
import torch

print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)

print("cuDNN version:", torch.backends.cudnn.version())

PyTorch version: 2.8.0+cu128
CUDA available: True
CUDA version: 12.8
cuDNN version: 91002


In [2]:
import onnxruntime

print(onnxruntime.__version__)
print(onnxruntime.get_device())

1.22.0
GPU


<br>

# Object Detection with YOLOv8 and ONNX Runtime GPU

This notebook demonstrates how to:
1. Export a pre-trained YOLOv8 model to ONNX format
2. Run the model on video using ONNX Runtime with GPU acceleration
3. Visualize object detection results in real-time

<br>

## 1. Export YOLOv8 model to ONNX format

In [3]:
from ultralytics import YOLO

# Define model input dimensions
model_width = 640
model_height = 640

# Load pretrained YOLOv8 model
model = YOLO('models/yolov8n.pt')

# Export the model to ONNX format
model.export(format='onnx')

print(f"YOLOv8 model exported to models/yolov8n.onnx with input size {model_width}x{model_height}")

Ultralytics 8.3.183 🚀 Python-3.10.18 torch-2.8.0+cu128 CPU (AMD Ryzen 7 4800H with Radeon Graphics)
YOLOv8n summary (fused): 72 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs

[34m[1mPyTorch:[0m starting from 'models/yolov8n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (6.2 MB)

[34m[1mONNX:[0m starting export with onnx 1.17.0 opset 19...
[34m[1mONNX:[0m slimming with onnxslim 0.1.64...
[34m[1mONNX:[0m export success ✅ 1.1s, saved as 'models/yolov8n.onnx' (12.2 MB)

Export complete (1.3s)
Results saved to [1m/home/omer/dl-files/Running-Deep-Learning-Models-on-ONNX-Runtime/models[0m
Predict:         yolo predict task=detect model=models/yolov8n.onnx imgsz=640  
Validate:        yolo val task=detect model=models/yolov8n.onnx imgsz=640 data=coco.yaml  
Visualize:       https://netron.app
YOLOv8 model exported to models/yolov8n.onnx with input size 640x640


<br>

## 2. Run YOLOv8 model on video with ONNX Runtime GPU

In [4]:
import onnxruntime as ort
import cv2
import numpy as np
import time

# Check available providers first
print("Available ONNX Runtime providers:", ort.get_available_providers())
print("CUDA provider available:", 'CUDAExecutionProvider' in ort.get_available_providers())

# Load ONNX model
session = ort.InferenceSession("models/yolov8n.onnx", providers=['CUDAExecutionProvider'])
print("Session providers:", session.get_providers())

# Get input details
input_name = session.get_inputs()[0].name
print(f"Input name: {input_name}")

# Load COCO class names from file
with open('resources/coco_labels_yolo.txt', 'r') as f:
    class_names = [line.strip() for line in f.readlines()]
print(f"Loaded {len(class_names)} class names")
print(f"Using model input dimensions: {model_width}x{model_height}")

Available ONNX Runtime providers: ['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']
CUDA provider available: True
Session providers: ['CUDAExecutionProvider', 'CPUExecutionProvider']
Input name: images
Loaded 91 class names
Using model input dimensions: 640x640


In [5]:
def postprocess_yolo_output(outputs, original_shape, conf_threshold=0.3, iou_threshold=0.45):
    """Post-process YOLOv8 ONNX output"""
    predictions = outputs[0]  # Shape: [1, 84, 8400]
    predictions = predictions[0]  # Remove batch dimension: [84, 8400]
    predictions = predictions.T  # Transpose to [8400, 84]
    
    # Extract boxes and scores
    boxes = predictions[:, :4]  # First 4 columns are bbox coordinates
    scores = predictions[:, 4:]  # Remaining columns are class scores
    
    # Get the class with highest score for each detection
    class_ids = np.argmax(scores, axis=1)
    confidences = np.max(scores, axis=1)
    
    # Filter by confidence threshold
    valid_detections = confidences > conf_threshold
    boxes = boxes[valid_detections]
    confidences = confidences[valid_detections]
    class_ids = class_ids[valid_detections]
    
    # Convert from center format to corner format
    x_center, y_center, width, height = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
    x1 = x_center - width / 2
    y1 = y_center - height / 2
    x2 = x_center + width / 2
    y2 = y_center + height / 2
    
    boxes = np.column_stack((x1, y1, x2, y2))
    
    # Scale boxes to original image size
    orig_h, orig_w = original_shape[:2]
    boxes[:, [0, 2]] *= orig_w / model_width  # Scale x coordinates
    boxes[:, [1, 3]] *= orig_h / model_height  # Scale y coordinates

    # Apply Non-Maximum Suppression to eliminate duplicate detections
    indices = cv2.dnn.NMSBoxes(boxes.tolist(), confidences.tolist(), conf_threshold, iou_threshold)

    # Check if any boxes remain after NMS
    if len(indices) > 0:
        indices = indices.flatten()
        return boxes[indices], confidences[indices], class_ids[indices]
    else:
        return [], [], []

In [6]:
# Process video with YOLOv8
# You can change the video path and confidence threshold here
video_path = "resources/test_video_street.mp4"
confidence_threshold = 0.3
iou_threshold = 0.45

# Open video
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# FPS calculation variables
fps_counter = 0
fps_start_time = time.time()
fps_display = 0.0

# Determine device string for display
device_str = "GPU" if 'CUDAExecutionProvider' in session.get_providers() else "CPU"
print(f"\nRunning YOLOv8 with ONNX Runtime on {device_str}...")
print("Press 'q' to quit the video display")

# Loop through video frames
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # YOLOv8 preprocessing
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img_resized = cv2.resize(img, (model_width, model_height))
    img_resized = img_resized.astype(np.float32) / 255.0
    img_resized = np.transpose(img_resized, (2, 0, 1))  # HWC -> CHW
    img_resized = np.expand_dims(img_resized, axis=0)

    # Run inference
    outputs = session.run(None, {input_name: img_resized})

    # Post-process outputs
    boxes, confidences, class_ids = postprocess_yolo_output(outputs, frame.shape, 
                                                          conf_threshold=confidence_threshold,
                                                          iou_threshold=iou_threshold)

    # Draw detections
    for box, conf, cls_id in zip(boxes, confidences, class_ids):
        x1, y1, x2, y2 = map(int, box)
        
        # Ensure coordinates are within frame bounds
        x1, y1 = max(0, x1), max(0, y1)
        x2, y2 = min(width, x2), min(height, y2)
        
        # Draw bounding box
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        
        # Draw label
        label = f"{class_names[cls_id]}: {conf:.2f}"
        label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
        cv2.rectangle(frame, (x1, y1 - label_size[1] - 10), 
                     (x1 + label_size[0], y1), (0, 255, 0), -1)
        cv2.putText(frame, label, (x1, y1 - 5),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)

    # Add title at top middle of screen
    title_text = "YOLOv8 Object Detection"
    title_size = cv2.getTextSize(title_text, cv2.FONT_HERSHEY_SIMPLEX, 1.0, 2)[0]
    title_x = (width - title_size[0]) // 2
    cv2.rectangle(frame, (title_x - 10, 10), (title_x + title_size[0] + 10, 50), (0, 0, 0), -1)
    cv2.putText(frame, title_text, (title_x, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), 2)

    # Calculate and display FPS
    fps_counter += 1
    if fps_counter % 10 == 0:
        fps_end_time = time.time()
        fps_display = 10 / (fps_end_time - fps_start_time)
        fps_start_time = fps_end_time
    
    # Draw FPS
    fps_text = f"FPS: {fps_display:.1f} (YOLOv8-{device_str})"
    cv2.rectangle(frame, (5, height - 40), (280, height - 10), (0, 0, 0), -1)
    cv2.putText(frame, fps_text, (10, height - 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    # Display 
    cv2.imshow("YOLOv8 ONNX Runtime", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release everything
cap.release()
cv2.destroyAllWindows()

print("Video processing completed!")


Running YOLOv8 with ONNX Runtime on GPU...
Press 'q' to quit the video display
Video processing completed!
