# Object detection with YOLO and OpenVINO

## Install dependencies

In [None]:
!pip install requests ultralytics openvino nncf moviepy --extra-index-url https://download.pytorch.org/whl/cpu

## Utils

In [None]:
from moviepy import VideoFileClip

# Browser support for mp4 is better than avi, hence conversion is often necessary
def avi_to_mp4(video_path):
    output_path = video_path.replace(".avi", ".mp4")
    with VideoFileClip(video_path) as clip:
        # These codecs provide good compression and wide compatibility
        clip.write_videofile(output_path, codec='libx264', audio_codec='aac')

    return output_path

## Get video

In [None]:
import requests

from IPython.display import Video


def download_video(url, filename):
    response = requests.get(url, stream=True)

    # Check if the request was successful
    if response.status_code == 200:
        # Open the file in write-binary mode and save the content
        with open(filename, 'wb') as file:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:
                    file.write(chunk)
        print(f"Download complete: {filename}")
    else:
        print("Failed to retrieve the file. HTTP Status Code:", response.status_code)

# Download the sample video
video_file = "sample_video.mp4"
download_video("https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/people.mp4", video_file)

Video(video_file)

## Get model

In [None]:
from ultralytics import YOLO

# Load the model
model_name = "yolo11n"
yolo_model = YOLO(model_name)

# Run prediction on the video
results = yolo_model(video_file, save=True, verbose=False)

# Convert the video and show
processed_video = avi_to_mp4(f"{results[0].save_dir}/{video_file.replace(".mp4", ".avi")}")
Video(processed_video)

In [None]:
import statistics as stat

# Calculate mean inference time (skip first inference which is usually longer)
avg_inference_time = stat.mean([r.speed["inference"] for r in results[1:]])
print(f"One image inference time in PyTorch: {avg_inference_time:.2f}ms")

## Use OpenVINO

In [None]:
# Convert the model to OV format with fixed input shape (640x640) and FP16 precision
ov_model_path = yolo_model.export(format="openvino", dynamic=False, half=True)

# Reload the model
ov_yolo_model = YOLO(ov_model_path, task="detect")

# Run prediction once again on the video
ov_results = ov_yolo_model(video_file, save=True, verbose=False, device="intel:cpu")

# Convert the video and show
processed_video = avi_to_mp4(f"{ov_results[0].save_dir}/{video_file.replace(".mp4", ".avi")}")
Video(processed_video)

In [None]:
import statistics as stat

# Calculate mean inference time (skip first inference which is usually longer)
avg_ov_inference_time = stat.mean([r.speed["inference"] for r in ov_results[1:]])
print(f"One image inference time in OpenVINO on CPU: {avg_ov_inference_time:.2f}ms")

## Available devices

In [None]:
import openvino as ov

core = ov.Core()
print(core.available_devices)
print([core.get_property(device, "FULL_DEVICE_NAME") for device in core.available_devices])

## Try other devices

In [None]:
# Reload the model
ov_yolo_model = YOLO(ov_model_path, task="detect")
# Run inference on GPU
ov_gpu_results = ov_yolo_model(video_file, save=True, verbose=False, device="intel:gpu")

# Calculate mean inference time (skip first inference which is usually longer)
avg_ov_gpu_inference_time = stat.mean([r.speed["inference"] for r in ov_gpu_results[1:]])
print(f"One image inference time in OpenVINO on GPU: {avg_ov_gpu_inference_time:.2f}ms")

In [None]:
# Reload the model
ov_yolo_model = YOLO(ov_model_path, task="detect")
# Run inference on NPU
ov_npu_results = ov_yolo_model(video_file, save=True, verbose=False, device="intel:npu")

# Calculate mean inference time (skip first inference which is usually longer)
avg_ov_npu_inference_time = stat.mean([r.speed["inference"] for r in ov_npu_results[1:]])
print(f"One image inference time in OpenVINO on NPU: {avg_ov_npu_inference_time:.2f}ms")

## Quantize model

In [None]:
# Convert and quantize the model to OV format with fixed input shape (640x640) and INT8 precision
ov_int8_model_path = yolo_model.export(format="openvino", dynamic=False, int8=True, data="coco128.yaml")

In [None]:
# Load int8 model
ov_int8_yolo_model = YOLO(ov_int8_model_path, task="detect")
# Run inference on GPU
ov_int8_gpu_results = ov_int8_yolo_model(video_file, save=True, verbose=False, device="intel:gpu")

# Calculate mean inference time (skip first inference which is usually longer)
avg_ov_int8_gpu_inference_time = stat.mean([r.speed["inference"] for r in ov_int8_gpu_results[1:]])
print(f"One image inference time in OpenVINO on GPU: {avg_ov_int8_gpu_inference_time:.2f}ms")

In [None]:
# Convert the video and show
processed_video = avi_to_mp4(f"{ov_int8_gpu_results[0].save_dir}/{video_file.replace(".mp4", ".avi")}")
Video(processed_video)