# 🧪 Benchmark ONNX vs OpenVINO (FP32 & INT8)
Modèle : exporté depuis Paddle, déjà au format ONNX

In [None]:
# 📦 Install required packages

# Upgrade pip
%pip install --upgrade pip
# Install PyTorch for CPU
%pip install -q torch==2.7.0 torchvision==0.22.0 --index-url https://download.pytorch.org/whl/cpu
# Install OpenVINO
%pip install -q "openvino==2025.1.0" nncf==2.16.0
# Install ONNX Runtime
%pip install -q onnx==1.18.0 onnxruntime==1.22.0
# Install other dependencies
%pip install -q tqdm==4.67.1 opencv-python==4.11.0.86

In [None]:
# Imports
import os
import time
import numpy as np
import onnx
import nncf
import math
import cv2
import torch
import onnxruntime as ort
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
import openvino as ov
from pathlib import Path

In [None]:
def get_onnx_model_input_shapes(model_path):
    model = onnx.load(model_path)
    graph = model.graph
    input_info = {}
    for input_tensor in graph.input:
        name = input_tensor.name
        shape = []
        tensor_type = input_tensor.type.tensor_type
        if tensor_type.HasField("shape"):
            for dim in tensor_type.shape.dim:
                if dim.HasField("dim_value"):
                    shape.append(dim.dim_value)
                elif dim.HasField("dim_param"):
                    shape.append(dim.dim_param)
                else:
                    shape.append("?")
        else:
            shape.append("?")
        input_info[name] = shape
    return input_info

# Paths to models
det_model_path = "../models/en_PP-OCRv3_det.onnx"
rec_model_path = "../models/model_en_rec_v4.onnx"

# Output directory
quantized_models_dir = "../models"
os.makedirs(quantized_models_dir, exist_ok=True)


inputs = get_onnx_model_input_shapes(det_model_path)
print(f"Input shapes for model {det_model_path}:")
for name, shape in inputs.items():
    print(f" - {name}: {shape}")

inputs = get_onnx_model_input_shapes(det_model_path)
print(f"Input shapes for model {rec_model_path}:")
for name, shape in inputs.items():
    print(f" - {name}: {shape}")

# Input shapes for each model
input_shapes = {
    "en_PP-OCRv3_det.onnx": (640, 640, 3),
    "model_en_rec_v4.onnx": (640, 640, 3)
}


In [None]:
# 📂 Download CIFAR-10 Dataset
transform = transforms.Compose([
    transforms.Resize((640, 640)),
    transforms.ToTensor()
])

test_dataset = CIFAR10(root="./data", train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:

def convert_onnx_to_openvino(onnx_model_path: str, output_dir: str = "../models"):
    print(f"Converting ONNX model to OpenVINO IR: {onnx_model_path}")
    
    # Conversion
    model = ov.convert_model(onnx_model_path)

    # Create output directory
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    
    # Save the model in OpenVINO IR format
    model_name = Path(onnx_model_path).stem + "_openvino_fp32"
    xml_path = str(Path(output_dir) / f"{model_name}.xml")
    ov.save_model(model, xml_path)

    print(f"Model saved to: {xml_path}")

convert_onnx_to_openvino(det_model_path)
convert_onnx_to_openvino(rec_model_path)

In [None]:
def resize_norm_img_rec(img):
    imgW = 320
    imgH = 48
    img_h, img_w = img.shape[0], img.shape[1]
    new_ratio = max((imgW/ imgH), ((img_w * 1.0) / img_h))
    imgW = int((imgH * new_ratio))
    h, w = img.shape[:2]
    ratio = w / float(h)
    if math.ceil(imgH * ratio) > imgW:
        resized_w = imgW
    else:
        resized_w = int(math.ceil(imgH * ratio))
    resized_image = cv2.resize(img, (resized_w, imgH))
    resized_image = resized_image.astype('float32')
    resized_image = resized_image.transpose((2, 0, 1)) / 255
    resized_image -= 0.5
    resized_image /= 0.5
    padding_im = np.zeros((3, imgH, imgW), dtype=np.float32)
    padding_im[:, :, 0:resized_w] = resized_image
    return padding_im

def resize_image_det(img, limit_side_len = 960,limit_type ='max'):
        """
        resize image to a size multiple of 32 which is required by the network
        args:
            img(array): array with shape [h, w, c]
        return(tuple):
            img, (ratio_h, ratio_w)
        """
        limit_side_len = limit_side_len
        h, w, c = img.shape

        # limit the max side
        if limit_type == 'max':
            if max(h, w) > limit_side_len:
                if h > w:
                    ratio = float(limit_side_len) / h
                else:
                    ratio = float(limit_side_len) / w
            else:
                ratio = 1.
        elif limit_type == 'min':
            if min(h, w) < limit_side_len:
                if h < w:
                    ratio = float(limit_side_len) / h
                else:
                    ratio = float(limit_side_len) / w
            else:
                ratio = 1.
        elif limit_type == 'resize_long':
            ratio = float(limit_side_len) / max(h, w)
        else:
            raise Exception('not support limit type, image ')
        resize_h = int(h * ratio)
        resize_w = int(w * ratio)

        resize_h = max(int(round(resize_h / 32) * 32), 32)
        resize_w = max(int(round(resize_w / 32) * 32), 32)

        try:
            if int(resize_w) <= 0 or int(resize_h) <= 0:
                return None, (None, None)
            img = cv2.resize(img, (int(resize_w), int(resize_h)))
        except:
            return None, None
        ratio_h = resize_h / float(h)
        ratio_w = resize_w / float(w)
        return img.transpose((2, 0, 1))

# Benchmark ONNX Runtime FP32 optimized
def benchmark_onnxruntime(model_path, input_shape, n_runs=100):
    print(f"\nBenchmarking ONNX Runtime model: {model_path}")
    sess_options = ort.SessionOptions()
    sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
    sess_options.enable_mem_pattern = True
    sess_options.execution_mode = ort.ExecutionMode.ORT_PARALLEL

    session = ort.InferenceSession(model_path, sess_options=sess_options)
    input_name = session.get_inputs()[0].name
    input_data = np.random.rand(*input_shape).astype(np.float32)
    #input_data = cv2.imread("test.png").astype(np.float32)

    if "rec" in model_path:
        input_data = resize_norm_img_rec(input_data)
    if "det" in model_path:
        input_data = resize_image_det(input_data)

    # Vérifiez que input_data est un tableau NumPy
    if not isinstance(input_data, np.ndarray):
        input_data = np.array(input_data)

    input_data = np.expand_dims(input_data, axis=0)
    
    # Warm-up
    for _ in range(10):
        _ = session.run(None, {input_name: input_data})

    # Benchmark
    start_time = time.time()
    for _ in range(n_runs):
        _ = session.run(None, {input_name: input_data})
    end_time = time.time() 
    
    avg_latency_ms = (end_time - start_time) * 1000 / n_runs
    print(f"Avg inference time over {n_runs} runs: {avg_latency_ms:.2f} ms")
    return avg_latency_ms


In [None]:
def benchmark_openvino_model(model_path, input_shape, device="CPU", n_runs=100):
    print(f"\nBenchmarking OpenVINO FP32 model: {model_path}")
    
    core = ov.Core()
    model = core.read_model(model=model_path)
    compiled_model = core.compile_model(model=model, device_name=device)

    # Dummy input
    input_data = np.random.rand(*input_shape).astype(np.float32)
    #input_data = cv2.imread("test.png").astype(np.float32)

    if "rec" in model_path:
        input_data = resize_norm_img_rec(input_data)
    if "det" in model_path:
        input_data = resize_image_det(input_data)

    # Vérifiez que input_data est un tableau NumPy
    if not isinstance(input_data, np.ndarray):
        input_data = np.array(input_data)

    input_data = np.expand_dims(input_data, axis=0)

    # Warm-up
    for _ in range(10):
        compiled_model([input_data])

    # Benchmark
    start_time = time.time()
    for _ in range(n_runs):
        _ = compiled_model([input_data])
    end_time = time.time()

    avg_latency_ms = (end_time - start_time) * 1000 / n_runs
    print(f"Avg inference time over {n_runs} runs: {avg_latency_ms:.2f} ms")
    return avg_latency_ms

In [None]:
# 🔁 Quantize to INT8 with NNCF

def quantize_openvino_model(model_path):
    print(f"\nQuantizing model with NNCF: {model_path}")
    model_name = model_path.replace("_openvino_fp32.xml", "_openvino_int8")
    xml_path = str(Path(quantized_models_dir) / f"{model_name}.xml")

    if Path(xml_path).exists():
        print(f"Quantized model already exists: {xml_path}... Skiping quantization.")
        return

    core = ov.Core()
    model = core.read_model(model=model_path)

    def transform_fn(data_item: dict):
        input_tensor = data_item[0].numpy()
        return input_tensor

    quantization_dataset = nncf.Dataset(test_loader)

    quantized_model = nncf.quantize(
        model,
        quantization_dataset,
        subset_size=len(test_loader),
        preset=nncf.QuantizationPreset.MIXED
    )

    # Save the model in OpenVINO IR format
    ov.save_model(quantized_model, xml_path)

    print(f"Quantized model saved to: {xml_path}")

quantize_openvino_model(det_model_path.replace(".onnx", "_openvino_fp32.xml"))
quantize_openvino_model(rec_model_path.replace(".onnx", "_openvino_fp32.xml"))


In [None]:


# Main execution
for model_path in [det_model_path, rec_model_path]:
    input_shape = input_shapes[os.path.basename(model_path)]
    fp32_onxx_time = benchmark_onnxruntime(model_path, input_shape)
    fp32_openvino_time = benchmark_openvino_model(model_path.replace(".onnx", "_openvino_fp32.xml"), input_shape)
    int8_time = benchmark_openvino_model(model_path.replace(".onnx", "_openvino_int8.xml"), input_shape)
    print(f"\n--- Results for {os.path.basename(model_path)} ---")
    print(f"FP32 ONNX Runtime inference time : {fp32_onxx_time*1000:.2f} ms")
    print(f"FP32 OpenVino Runtime inference time : {fp32_openvino_time*1000:.2f} ms")
    print(f"INT8 NNCF quantized inference time : {int8_time*1000:.2f} ms")
