# Using the OpenVINO™ Execution Provider for YOLOv8 Detection

[Source (modified from Microsoft ONNX Runtime OpenVINO EP Examples)](https://github.com/microsoft/onnxruntime-inference-examples/tree/main/python/OpenVINO_EP/yolov8_object_detection)

In [1]:
#Defining a sample image for inference
image_url = "https://ultralytics.com/images/bus.jpg" 

In [2]:
#Ensure the path to OpenVINO DLLs are in your system PATH
import onnxruntime.tools.add_openvino_win_libs as utils
utils.add_openvino_libs_to_path()

## Importing Required Packages

In [3]:
import os
import requests
import shutil
import cv2
import numpy as np
import onnxruntime as rt
import torch
from statistics import mean
from datetime import datetime
from ultralytics import YOLO
from ultralytics.data.augment import LetterBox

In [4]:
# Parameters for pre-processing
imgsz = (640,640) # default value for this usecase.
stride = 32 # default value for this usecase( differs based on the model selected

print(image_url)
def preprocess(image_url):
    ## Set up the image URL
    path = os.getcwd()
    image_path=os.path.join(path, image_url.split("/")[-1])
    # Open the url image, set stream to True, this will return the stream content.
    r = requests.get(image_url, stream = True)
    # Check if the image was retrieved successfully
    if r.status_code == 200:
        # Set decode_content value to True, otherwise the downloaded image file's size will be zero.
        r.raw.decode_content = True
        # Open a local file with wb ( write binary ) permission.
        with open(image_path,'wb') as f:
            shutil.copyfileobj(r.raw, f)
        print('Image sucessfully downloaded: ',path)
    else:
        print('Image couldn\'t be retreived')
        return
    image_abs_path = os.path.abspath(image_path)
    if os.path.isfile(image_abs_path) and image_abs_path.split('.')[-1].lower() in ['jpg', 'jpeg', 'png']:
        # Load Image
        img0 = cv2.imread(image_abs_path)
        # Padded resize
        #Letterbox: Resize image and padding for detection, instance segmentation, pose
        img = LetterBox(imgsz, stride=stride)(image=img0.copy())
        # Convert
        img =  img.transpose((2, 0, 1))[::-1]  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)
        img = img.astype(np.float32)  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndim == 3:
            img = np.expand_dims(img, axis=0)
        return img0, img
    else:
        print("Invalid image format.")
        return

org_input, model_input = preprocess(image_url)

https://ultralytics.com/images/bus.jpg
Image sucessfully downloaded:  C:\Users\raymond\demo_test\openvino_build_deploy\demos\onnxruntime_yolov8_demo


## Downloading a YOLOv8 Model and Exporting it to ONNX

In [5]:
from ultralytics import YOLO

# Load the YOLOv8 model
model = YOLO("yolov8m.pt")

# Export the model to ONNX format
model.export(format="onnx")  # creates 'yolov8n.onnx'

# Load the exported ONNX model
onnx_model = YOLO("yolov8m.onnx")

# Run inference
results = onnx_model("https://ultralytics.com/images/bus.jpg")

Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m.pt to 'yolov8m.pt'...


100%|█████████████████████████████████████████████████████████████████████████████| 49.7M/49.7M [00:12<00:00, 4.27MB/s]


Ultralytics YOLOv8.2.81  Python-3.10.11 torch-2.5.0+cpu CPU (Intel Core(TM) Ultra 9 288V)
YOLOv8m summary (fused): 218 layers, 25,886,080 parameters, 0 gradients, 78.9 GFLOPs

[34m[1mPyTorch:[0m starting from 'yolov8m.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (49.7 MB)

[34m[1mONNX:[0m starting export with onnx 1.16.1 opset 19...
[34m[1mONNX:[0m export success  1.5s, saved as 'yolov8m.onnx' (99.0 MB)

Export complete (3.8s)
Results saved to [1mC:\Users\raymond\demo_test\openvino_build_deploy\demos\onnxruntime_yolov8_demo[0m
Predict:         yolo predict task=detect model=yolov8m.onnx imgsz=640  
Validate:        yolo val task=detect model=yolov8m.onnx imgsz=640 data=coco.yaml  
Visualize:       https://netron.app
Loading yolov8m.onnx for ONNX Runtime inference...

Found https://ultralytics.com/images/bus.jpg locally at bus.jpg
image 1/1 C:\Users\raymond\demo_test\openvino_build_deploy\demos\onnxruntime_yolov8_demo\bus.jpg: 640x640 4 persons

## Initialization

In [13]:
device_type = "NPU" #CPU, GPU, NPU

In [14]:
original_model_path = "yolov8m.onnx"

def initialize(quantize=False, device='OVEP'):
    "Initialize the model also getting model output and input names"
    initialized = True
    model_dir = os.getcwd()
    ov_model = None; mlas_model = None
    so = rt.SessionOptions()
    if device == 'OVEP':
        if quantize == True:
            print("Inferencing through OVEP")
            ov_model = rt.InferenceSession(quantized_model_path, so,
                                       providers=['OpenVINOExecutionProvider'],
                                       provider_options=[{'device_type' : device_type}])
        else:
            ov_model = rt.InferenceSession(original_model_path, so,
                                       providers=['OpenVINOExecutionProvider'],
                                        provider_options=[{'device_type' : device_type}])
    elif device == 'CPUEP':
        if quantize == True:
            mlas_model = rt.InferenceSession(quantized_model_path, so, providers=['CPUExecutionProvider'])
        else:
            mlas_model = rt.InferenceSession(original_model_path, so, providers=['CPUExecutionProvider'])

    if device == 'OVEP':
      input_names = ov_model.get_inputs()[0].name
      outputs = ov_model.get_outputs()
    else:
      input_names = mlas_model.get_inputs()[0].name
      outputs = mlas_model.get_outputs()
    output_names = list(map(lambda output:output.name, outputs))
    return input_names, output_names, mlas_model, ov_model

## Inference

In [15]:
device = 'OVEP' # OVEP, CPUEP
input_names, output_names, mlas_model, ov_model = initialize(device=device)

In [18]:
#Select number of iterations for inference
no_of_iterations = 100
warmup_iterations = 3

In [19]:
inf_lst = []
def inference(input_names, output_names, device, mlas_model, ovep_model, model_input):
    if device == 'CPUEP':
        print("Performing ONNX Runtime Inference with default CPU EP.")
        for i in range(no_of_iterations):
          start_time = datetime.now()
          prediction = mlas_model.run(output_names, {input_names: model_input})
          end_time = datetime.now()
          # print((end_time - start_time).total_seconds())
          if i > warmup_iterations:
            inf_lst.append((end_time - start_time).total_seconds())
    elif device == 'OVEP':
        print("Performing ONNX Runtime Inference with OpenVINO EP.")
        for i in range(no_of_iterations):
          start_time = datetime.now()
          prediction = ovep_model.run(output_names, {input_names: model_input})
          end_time = datetime.now()
          # print((end_time - start_time).total_seconds())
          if i > warmup_iterations:
            inf_lst.append((end_time - start_time).total_seconds())
    else:
        print("Invalid Device Option. Supported device options are 'cpu', 'CPU_FP32'.")
        return None
    return prediction, (end_time - start_time).total_seconds()

inference_output = inference(input_names, output_names, device, mlas_model, ov_model, model_input)
average_inference_time = np.average(inf_lst)
print(f'Average inference time is for {no_of_iterations - warmup_iterations} iterations is {average_inference_time} sec')

Performing ONNX Runtime Inference with OpenVINO EP.
Average inference time is for 97 iterations is 0.0124385625 sec


## Final Inference on Image and Webcam Input using OpenVINO

In [21]:
#Inference on image
from ultralytics import YOLO #you can copy and paste these

#Download and export to OV format. This will also trigger the OV plugins
model = YOLO("yolov8m.pt")
model.export(format="openvino")  # creates 'yolov8n_openvino_model/'
ov_model = YOLO("yolov8m_openvino_model/") 

#This will use AUTO Plugin by default, and thus will enable GPU
results = ov_model.predict("bus.jpg")
results[0].show()  # Show results to screen (in supported environments)
results[0].save(filename=f"bus_predictions.jpg")

Ultralytics YOLOv8.2.81  Python-3.10.11 torch-2.5.0+cpu CPU (Intel Core(TM) Ultra 9 288V)
YOLOv8m summary (fused): 218 layers, 25,886,080 parameters, 0 gradients, 78.9 GFLOPs

[34m[1mPyTorch:[0m starting from 'yolov8m.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (49.7 MB)

[34m[1mOpenVINO:[0m starting export with openvino 2024.3.0-16041-1e3b88e4e3f-releases/2024/3...
[34m[1mOpenVINO:[0m export success  3.0s, saved as 'yolov8m_openvino_model\' (99.1 MB)

Export complete (5.1s)
Results saved to [1mC:\Users\raymond\demo_test\openvino_build_deploy\demos\onnxruntime_yolov8_demo[0m
Predict:         yolo predict task=detect model=yolov8m_openvino_model imgsz=640  
Validate:        yolo val task=detect model=yolov8m_openvino_model imgsz=640 data=coco.yaml  
Visualize:       https://netron.app
Loading yolov8m_openvino_model for OpenVINO inference...
Using OpenVINO LATENCY mode for batch=1 inference...

image 1/1 C:\Users\raymond\demo_test\openvino_buil

'bus_predictions.jpg'

In [22]:
#Inference on webcam or live streams
import cv2

stream = 0 #can set to video path like /path/input.mp4
cap = cv2.VideoCapture(stream)
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
frame_count = 0
while cap.isOpened():
    success, im0 = cap.read()
    if not success:
      print("Video frame is empty or video processing has been successfully completed.")
      break
    frame_count += 1
    results = ov_model.predict(im0, verbose=False)  # Prediction also supported
    res_plotted = results[0].plot()
    cv2.imshow("YOLOv8 OpenVINO Video Stream", res_plotted)
    key = cv2.waitKey(1)
    if key == ord('q'):
        break

cv2.destroyAllWindows()
cap.release() #Release video sources