# Object detection with YOLO and OpenVINO

## Install dependencies

In [None]:
!pip install requests ultralytics openvino nncf moviepy --extra-index-url https://download.pytorch.org/whl/cpu

## Get video

In [1]:
from IPython.display import Video
from utils import download_video, avi_to_mp4

# Download the sample video
video_name = "sample_video.mp4"
video_file = f"data/{video_name}"
download_video("https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/people.mp4", video_file)

Video(video_file)

Download complete: data/sample_video.mp4


## Get model

In [2]:
import ipywidgets as widgets

# Select the model type
model_dropdown = widgets.Dropdown(
    options=["yolo11n", "yolo11s", "yolo11m", "yolo11l", "yolo11x"],
    value="yolo11n",
    description="Model:"
)
model_dropdown

Dropdown(description='Model:', options=('yolo11n', 'yolo11s', 'yolo11m', 'yolo11l', 'yolo11x'), value='yolo11n‚Ä¶

In [3]:
from ultralytics import YOLO

# Load the model
model_name = f"models/{model_dropdown.value}"
yolo_model = YOLO(model_name)

In [4]:
# Run prediction on the video
results = yolo_model(video_file, save=True, verbose=False)

# Convert the video and show
processed_video = avi_to_mp4(f"{results[0].save_dir}/{video_name.replace(".mp4", ".avi")}")
Video(processed_video)

inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

Results saved to [1m/home/adrian/repos/openvino_build_deploy/runs/detect/predict22[0m
MoviePy - Building video /home/adrian/repos/openvino_build_deploy/runs/detect/predict22/sample_video.mp4.
MoviePy - Writing video /home/adrian/repos/openvino_build_deploy/runs/detect/predict22/sample_video.mp4



                                                                                

MoviePy - Done !
MoviePy - video ready /home/adrian/repos/openvino_build_deploy/runs/detect/predict22/sample_video.mp4


In [5]:
import statistics as stat

# Calculate mean inference time (skip first inference which is usually longer)
avg_inference_time = stat.mean([r.speed["inference"] for r in results[1:]])
print(f"One image inference time in PyTorch: {avg_inference_time:.2f}ms")

One image inference time in PyTorch: 32.20ms


## Use OpenVINO

In [8]:
# Convert the model to OV format with fixed input shape (640x640) and FP16 precision
ov_model_path = yolo_model.export(format="openvino", dynamic=False, half=True)

# Reload the model
ov_yolo_model = YOLO(ov_model_path, task="detect")

# Run prediction once again on the video
ov_results = ov_yolo_model(video_file, save=True, verbose=False, device="intel:cpu")

# Convert the video and show
processed_video = avi_to_mp4(f"{ov_results[0].save_dir}/{video_name.replace(".mp4", ".avi")}")
Video(processed_video)

Ultralytics 8.3.116 üöÄ Python-3.13.7 torch-2.9.1+cpu CPU (Intel Core(TM) Ultra 7 258V)

[34m[1mPyTorch:[0m starting from 'models/yolo11n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (5.4 MB)

[34m[1mOpenVINO:[0m starting export with openvino 2025.4.1-20426-82bbf0292c5-releases/2025/4...
[34m[1mOpenVINO:[0m export success ‚úÖ 2.0s, saved as 'models/yolo11n_openvino_model/' (5.4 MB)

Export complete (2.2s)
Results saved to [1m/home/adrian/repos/openvino_build_deploy/trainings/object_detection/models[0m
Predict:         yolo predict task=detect model=models/yolo11n_openvino_model imgsz=640 half 
Validate:        yolo val task=detect model=models/yolo11n_openvino_model imgsz=640 data=/usr/src/ultralytics/ultralytics/cfg/datasets/coco.yaml half 
Visualize:       https://netron.app
Loading models/yolo11n_openvino_model for OpenVINO inference...
Using OpenVINO LATENCY mode for batch=1 inference...
inference results will accumulate in RAM unless `st

                                                                                

MoviePy - Done !
MoviePy - video ready /home/adrian/repos/openvino_build_deploy/runs/detect/predict18/sample_video.mp4


In [9]:
import statistics as stat

# Calculate mean inference time (skip first inference which is usually longer)
avg_ov_inference_time = stat.mean([r.speed["inference"] for r in ov_results[1:]])
print(f"One image inference time in OpenVINO on CPU: {avg_ov_inference_time:.2f}ms")

One image inference time in OpenVINO on CPU: 22.28ms


## Available devices

In [10]:
import openvino as ov

core = ov.Core()
available_devices = core.available_devices

print(available_devices)
print([core.get_property(device, "FULL_DEVICE_NAME") for device in available_devices])

['CPU', 'GPU', 'NPU']
['Intel(R) Core(TM) Ultra 7 258V', 'Intel(R) Arc(TM) Graphics (iGPU)', 'Intel(R) AI Boost']


## Try other devices

In [11]:
if "GPU" in available_devices:
    # Reload the model
    ov_yolo_model = YOLO(ov_model_path, task="detect")
    # Run inference on GPU
    ov_gpu_results = ov_yolo_model(video_file, save=True, verbose=False, device="intel:gpu")
    
    # Calculate mean inference time (skip first inference which is usually longer)
    avg_ov_gpu_inference_time = stat.mean([r.speed["inference"] for r in ov_gpu_results[1:]])
    print(f"One image inference time in OpenVINO on GPU: {avg_ov_gpu_inference_time:.2f}ms")

Loading models/yolo11n_openvino_model for OpenVINO inference...
Using OpenVINO LATENCY mode for batch=1 inference...
inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

Results saved to [1m/home/adrian/repos/openvino_build_deploy/runs/detect/predict19[0m
One image inference time in OpenVINO on GPU: 12.42ms


In [12]:
if "NPU" in available_devices:
    # Reload the model
    ov_yolo_model = YOLO(ov_model_path, task="detect")
    # Run inference on NPU
    ov_npu_results = ov_yolo_model(video_file, save=True, verbose=False, device="intel:npu")
    
    # Calculate mean inference time (skip first inference which is usually longer)
    avg_ov_npu_inference_time = stat.mean([r.speed["inference"] for r in ov_npu_results[1:]])
    print(f"One image inference time in OpenVINO on NPU: {avg_ov_npu_inference_time:.2f}ms")

Loading models/yolo11n_openvino_model for OpenVINO inference...
Using OpenVINO LATENCY mode for batch=1 inference...
inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

Results saved to [1m/home/adrian/repos/openvino_build_deploy/runs/detect/predict20[0m
One image inference time in OpenVINO on NPU: 6.21ms


## Quantize model

In [13]:
# Convert and quantize the model to OV format with fixed input shape (640x640) and INT8 precision
ov_int8_model_path = yolo_model.export(format="openvino", dynamic=False, int8=True, data="coco128.yaml")

Ultralytics 8.3.116 üöÄ Python-3.13.7 torch-2.9.1+cpu CPU (Intel Core(TM) Ultra 7 258V)

[34m[1mPyTorch:[0m starting from 'models/yolo11n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (5.4 MB)

[34m[1mOpenVINO:[0m starting export with openvino 2025.4.1-20426-82bbf0292c5-releases/2025/4...
[34m[1mOpenVINO:[0m collecting INT8 calibration images from 'data=coco128.yaml'
Fast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 280.8¬±101.5 MB/s, size: 49.4 KB)


Scanning /home/adrian/repos/datasets/coco128/labels/train2017.cache... 126 image






INFO:nncf:15 ignored nodes were found by patterns in the NNCFGraph
INFO:nncf:1 ignored nodes were found by types in the NNCFGraph
INFO:nncf:Not adding activation input quantizer for operation: 168 __module.model.23.dfl/aten::view/Reshape
INFO:nncf:Not adding activation input quantizer for operation: 169 __module.model.23/aten::sigmoid/Sigmoid
INFO:nncf:Not adding activation input quantizer for operation: 180 __module.model.23.dfl/aten::transpose/Transpose
INFO:nncf:Not adding activation input quantizer for operation: 191 __module.model.23.dfl/aten::softmax/Softmax
INFO:nncf:Not adding activation input quantizer for operation: 200 __module.model.23.dfl.conv/aten::_convolution/Convolution
INFO:nncf:Not adding activation input quantizer for operation: 208 __module.model.23.dfl/aten::view/Reshape_1
INFO:nncf:Not adding activation input quantizer for operation: 226 __module.model.23/aten::sub/Subtract
INFO:nncf:Not adding activation input quantizer for operation: 227 __module.model.23/aten:

Output()



Output()

[34m[1mOpenVINO:[0m export success ‚úÖ 13.1s, saved as 'models/yolo11n_int8_openvino_model/' (3.3 MB)

Export complete (13.2s)
Results saved to [1m/home/adrian/repos/openvino_build_deploy/trainings/object_detection/models[0m
Predict:         yolo predict task=detect model=models/yolo11n_int8_openvino_model imgsz=640 int8 
Validate:        yolo val task=detect model=models/yolo11n_int8_openvino_model imgsz=640 data=/usr/src/ultralytics/ultralytics/cfg/datasets/coco.yaml int8 
Visualize:       https://netron.app


In [14]:
import ipywidgets as widgets

# Select the model type
device_dropdown = widgets.Dropdown(
    options=available_devices,
    value="CPU",
    description="Device:"
)
device_dropdown

Dropdown(description='Device:', options=('CPU', 'GPU', 'NPU'), value='CPU')

In [15]:
# Load int8 model
ov_int8_yolo_model = YOLO(ov_int8_model_path, task="detect")
# Run inference on the selected device
ov_int8_results = ov_int8_yolo_model(video_file, save=True, verbose=False, device=f"intel:{device_dropdown.value}")

# Calculate mean inference time (skip first inference which is usually longer)
avg_ov_int8_inference_time = stat.mean([r.speed["inference"] for r in ov_int8_results[1:]])
print(f"One image inference time in OpenVINO on {device_dropdown.value}: {avg_ov_int8_inference_time:.2f}ms")

Loading models/yolo11n_int8_openvino_model for OpenVINO inference...
Using OpenVINO LATENCY mode for batch=1 inference...
inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

Results saved to [1m/home/adrian/repos/openvino_build_deploy/runs/detect/predict21[0m
One image inference time in OpenVINO on CPU: 12.31ms


In [17]:
# Convert the video and show
processed_video = avi_to_mp4(f"{ov_int8_results[0].save_dir}/{video_name.replace(".mp4", ".avi")}")
Video(processed_video)

MoviePy - Building video /home/adrian/repos/openvino_build_deploy/runs/detect/predict21/sample_video.mp4.
MoviePy - Writing video /home/adrian/repos/openvino_build_deploy/runs/detect/predict21/sample_video.mp4



                                                                                

MoviePy - Done !
MoviePy - video ready /home/adrian/repos/openvino_build_deploy/runs/detect/predict21/sample_video.mp4
