# 02 · Inference Engines Benchmark

Load ONNX models from models_saved/onnx and benchmark on: PyTorch (CPU/CUDA), ONNX Runtime (CPU/CUDA), OpenVINO (CPU/GPU/NPU).
Measure latency, throughput, accuracy, and energy when possible (NVML for NVIDIA). Save to a single CSV and avoid duplicates via model hash + configuration.

In [None]:
# Runtime diagnostics
import os

os.environ.setdefault("LOG_LEVEL", "INFO")
from utils.logging_utils import get_logger
from utils.io import runtime_versions

logger = get_logger("nb02")
versions = runtime_versions()
logger.info("Runtime versions: %s", versions)
try:
    import openvino as ov

    logger.info("OpenVINO version: %s, convert_model=%s", getattr(ov, '__version__', '?'), hasattr(ov, 'convert_model'))
except Exception as ex:
    logger.warning("OpenVINO import failed: %s", ex)

In [None]:
import os
import time
# Imports and config
from pathlib import Path

import numpy as np
import torch
import yaml
from torchvision import datasets, transforms

from models.cnn import CNN
from models.efficientnet_lite0 import EfficientNetLite0
from models.mlp import MLP
from models.mobilenetv3 import MobileNetV3
from utils.consistency import compare_logits_torch_ort_ov
from utils.device_utils import get_cpu_name, get_gpu_name_and_driver
from utils.energy import GpuEnergyMeterNVML
from utils.infer_openvino import benchmark_numpy as ov_bench
from utils.infer_ort import benchmark_numpy as ort_bench
from utils.io import CSV_SCHEMA, csv_append_row, csv_has_row_with, sha256_file, utc_timestamp, runtime_versions
from utils.ov_convert import onnx_to_ir
from utils.ov_utils import get_available_devices as ov_available
from utils.preprocess import preprocess_np
from utils.logging_utils import get_logger

# Logging
os.environ.setdefault("LOG_LEVEL", "INFO")
logger = get_logger("nb02")

root = Path(__file__).resolve().parent.parent if '__file__' in globals() else Path(os.getcwd()).parent


def _default_cfg():
    return {
        'defaults': {
            'dataset': 'cifar10', 'precision': 'fp32', 'batch': 64,
            'warmup': 10, 'runs': 100, 'image_size': 32, 'num_workers': 2,
        },
        'engines': [
            {'engine': 'pytorch', 'providers': ['cpu', 'cuda']},
            {'engine': 'onnxruntime', 'providers': ['cpu', 'cuda']},
            {'engine': 'openvino', 'providers': ['CPU', 'GPU']},
        ],
        'models': [
            {'name': 'cnn', 'file_onnx': 'cnn_cifar10.onnx', 'file_pt': 'cnn_cifar10.pt'},
            {'name': 'mlp', 'file_onnx': 'mlp_cifar10.onnx', 'file_pt': 'mlp_cifar10.pt'},
            {'name': 'mobilenetv3', 'file_onnx': 'mobilenetv3_cifar10.onnx', 'file_pt': 'mobilenetv3_cifar10.pt'},
            {'name': 'efficientnetlite0', 'file_onnx': 'efficientnetlite0_cifar10.onnx', 'file_pt': 'efficientnetlite0_cifar10.pt'},
        ],
        'outputs': {
            'infer_csv': 'metrics/infer_metrics.csv'
        }
    }


def load_cfg():
    cfg_path = root / 'config/bench_matrix.yaml'
    if cfg_path.exists():
        try:
            return yaml.safe_load(open(cfg_path, 'r', encoding='utf-8'))
        except Exception as ex:
            logger.warning("Failed to read config/bench_matrix.yaml, using defaults: %s", ex)
    return _default_cfg()


cfg = load_cfg()
defs = cfg['defaults']
engines = cfg['engines']
models_cfg = {m['name']: m for m in cfg['models']}
out_csv = root / cfg['outputs']['infer_csv']
onnx_dir = root / 'models_saved/onnx'
pt_dir = root / 'models_saved/pytorch'
versions = runtime_versions()
os_str = versions['os']

# Dataset (download used only to match labels; we will preprocess with our function)
transform = transforms.Compose([transforms.ToTensor()])
try:
    testset = datasets.CIFAR10(root=f'{root}/data', train=False, download=True, transform=transform)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=defs['batch'], shuffle=False,
                                              num_workers=defs['num_workers'])
except Exception as ex:
    logger.exception("Failed to prepare CIFAR10 test loader: %s", ex)
    raise


def build_model(name: str):
    if name == 'cnn': return CNN()
    if name == 'mlp': return MLP(input_size=32 * 32 * 3)
    if name == 'mobilenetv3': return MobileNetV3()
    if name == 'efficientnetlite0': return EfficientNetLite0()
    raise ValueError(f'Unknown model {name}')


def bench_pytorch_single_batch(model, xb, yb, device, warmup: int, runs: int):
    model = model.to(device).eval()
    x_np = preprocess_np(xb.permute(0, 2, 3, 1).numpy())  # ensure NCHW normalized
    x_t = torch.from_numpy(x_np).to(device)
    # Warmup
    for _ in range(warmup):
        _ = model(x_t)
        if device.type == 'cuda': torch.cuda.synchronize()
    # Timed
    lat = []
    correct = 0
    total = 0
    for _ in range(runs):
        t0 = time.perf_counter()
        out = model(x_t)
        if device.type == 'cuda': torch.cuda.synchronize()
        dt = time.perf_counter() - t0
        lat.append(dt)
        pred = out.argmax(1).detach().cpu().numpy()
        correct += int((pred == yb.numpy()).sum())
        total += yb.shape[0]
    lat_ms = np.array(lat) * 1000.0
    lat_ms_mean = float(lat_ms.mean()) if lat_ms.size else 0.0
    lat_ms_p95 = float(np.percentile(lat_ms, 95)) if lat_ms.size else 0.0
    thr = float(total / sum(lat)) if lat else 0.0
    acc = float(correct / total) if total else 0.0
    return {"lat_ms_mean": lat_ms_mean, "lat_ms_p95": lat_ms_p95, "thr_ips": thr, "acc": acc}


def make_ort_session(onnx_path: str, provider: str):
    import onnxruntime as ort
    providers = [provider]
    return ort.InferenceSession(onnx_path, providers=providers)


def make_ov_compiled(ir_path: str, device: str):
    from openvino import Core
    core = Core()
    model = core.read_model(ir_path)
    return core.compile_model(model, device)


def consistency_check(model_name: str, onnx_path: Path, ir_path: Path, xb, device):
    # Build single-batch normalized sample
    x_np = preprocess_np(xb.permute(0, 2, 3, 1).numpy())
    # Torch
    model = build_model(model_name)
    state = torch.load(pt_dir / models_cfg[model_name]['file_pt'], map_location=str(device))
    model.load_state_dict(state['model_state_dict'])
    model = model.to(device).eval()
    # ORT sess
    sess = make_ort_session(str(onnx_path),
                            'CUDAExecutionProvider' if device.type == 'cuda' else 'CPUExecutionProvider')
    # OV compiled
    ov_comp = make_ov_compiled(str(ir_path), 'GPU' if device.type == 'cuda' else 'CPU')
    res = compare_logits_torch_ort_ov(model, sess, ov_comp, x_np)
    return res


# --------------- helpers to reduce bench_once complexity ---------------

def _normalize_provider(engine: str, provider: str) -> str:
    return provider.upper() if engine in ('pytorch', 'onnxruntime') else provider


def _cache_match(model_hash: str, engine: str, provider_csv: str, defs: dict, versions: dict, driver_ver: str) -> dict:
    return {
        "model_hash": model_hash,
        "engine": engine,
        "provider": provider_csv,
        "dataset": defs['dataset'],
        "precision": defs['precision'],
        "batch": defs['batch'],
        "warmup": defs['warmup'],
        "runs": defs['runs'],
        "torch_ver": versions['torch_ver'],
        "ort_ver": versions['ort_ver'],
        "ov_ver": versions['ov_ver'],
        "driver_ver": driver_ver,
        "cached": True,
    }


def _fixed_batch(loader):
    xb, yb = next(iter(loader))
    x_np = preprocess_np(xb.permute(0, 2, 3, 1).numpy())
    return xb, yb, x_np, yb.numpy()


def _ensure_ir(model_name: str, onnx_path: Path) -> Path | None:
    ir_path = (root / 'models_saved/openvino_ir' / (models_cfg[model_name]['file_onnx'].replace('.onnx', '.xml')))
    if ir_path.exists():
        return ir_path
    try:
        logger.info("Converting to OpenVINO IR: %s -> %s", onnx_path, ir_path)
        return onnx_to_ir(onnx_path, ir_path.parent, ir_path.name,
                          input_shape=(1, 3, defs['image_size'], defs['image_size']))
    except Exception as ex:
        logger.exception("IR conversion failed: %s", ex)
        return None


def _row_common(model_name, engine, provider_csv, cached, device_name, cpu_name, gpu_name, os_str, versions,
                driver_ver, model_hash):
    return {
        'ts': utc_timestamp(), 'exp_id': 'engines-bench', 'model': model_name, 'dataset': defs['dataset'],
        'precision': defs['precision'], 'engine': engine, 'provider': provider_csv, 'batch': defs['batch'],
        'warmup': defs['warmup'], 'runs': defs['runs'], 'cached': cached, 'device_name': device_name,
        'cpu_name': cpu_name, 'gpu_name': gpu_name, 'os': os_str, 'torch_ver': versions['torch_ver'],
        'ort_ver': versions['ort_ver'], 'ov_ver': versions['ov_ver'], 'driver_ver': driver_ver,
        'model_hash': model_hash,
    }


def _write_cached_row(out_csv, base):
    row = {**base,
           'lat_ms_mean': '', 'lat_ms_p95': '', 'thr_ips': '', 'acc': '', 'energy_j': 'N/D',
           'consistency_ok': '', 'max_abs_diff_torch_ort': '', 'max_abs_diff_torch_ov': '',
           'top1_agree_torch_ort': '', 'top1_agree_torch_ov': ''}
    csv_append_row(str(out_csv), row, CSV_SCHEMA)


def _ov_provider_available(provider: str) -> bool:
    try:
        return provider in set(ov_available())
    except Exception:
        return False


def _run_pytorch(model_name, onnx_path, xb, yb, x_np, cpu_name, device_pref):
    dev = 'cuda' if device_pref.lower() == 'cuda' and torch.cuda.is_available() else 'cpu'
    model = build_model(model_name)
    state = torch.load(pt_dir / models_cfg[model_name]['file_pt'], map_location=dev)
    model.load_state_dict(state['model_state_dict'])
    device = torch.device(dev)
    # Energy
    meter = GpuEnergyMeterNVML(0) if (device.type == 'cuda') else None
    if meter:
        def _one():
            _ = model.to(device)(torch.from_numpy(x_np).to(device))
            torch.cuda.synchronize() if device.type == 'cuda' else None
        e_j, _ = meter.measure(_one)
    else:
        e_j = -1.0
    metrics = bench_pytorch_single_batch(model, xb, yb, device=device, warmup=defs['warmup'], runs=defs['runs'])
    energy_val = e_j if e_j >= 0 else 'N/D'
    # Consistency
    ir_path = _ensure_ir(model_name, onnx_path)
    consistency = {'consistency_ok': False, 'max_abs_diff_torch_ort': '', 'max_abs_diff_torch_ov': '',
                   'top1_agree_torch_ort': '', 'top1_agree_torch_ov': ''}
    if ir_path is not None and ir_path.exists():
        try:
            consistency = consistency_check(model_name, onnx_path, ir_path, xb, device)
        except Exception as ex:
            logger.warning("Consistency check failed: %s", ex)
    device_name = (torch.cuda.get_device_name(0) if device.type == 'cuda' else cpu_name)
    return {"status": "ok", "metrics": metrics, "energy_j": energy_val, "device_name": device_name,
            "consistency": consistency}


def _run_ort(onnx_path, x_np, provider, y, cpu_name):
    prov = 'CUDAExecutionProvider' if provider.lower() == 'cuda' else 'CPUExecutionProvider'
    # Energy
    meter = GpuEnergyMeterNVML(0) if (prov == 'CUDAExecutionProvider' and torch.cuda.is_available()) else None
    if meter:
        def _one_inf():
            sess = make_ort_session(str(onnx_path), prov)
            _ = sess.run(None, {sess.get_inputs()[0].name: x_np})
        e_j, _ = meter.measure(_one_inf)
    else:
        e_j = -1.0
    # Metrics
    try:
        metrics = ort_bench(str(onnx_path), x_np, provider=prov, warmup=defs['warmup'], runs=defs['runs'], y_true=y)
    except Exception as ex:
        logger.exception("ORT bench failed: %s", ex)
        metrics = {"lat_ms_mean": '', "lat_ms_p95": '', "thr_ips": '', "acc": ''}
    device_name = (torch.cuda.get_device_name(0) if provider.lower() == 'cuda' else cpu_name)
    return {"status": "ok", "metrics": metrics, "energy_j": (e_j if e_j >= 0 else 'N/D'),
            "device_name": device_name, "consistency": None}


def _run_ov(model_name, onnx_path, x_np, provider, y):
    if not _ov_provider_available(provider):
        return {"status": "unavailable"}
    ir_path = _ensure_ir(model_name, onnx_path)
    if ir_path is None:
        return {"status": "missing_ir"}
    metrics = ov_bench(str(ir_path), x_np, device=provider, warmup=defs['warmup'], runs=defs['runs'], y_true=y)
    return {"status": "ok", "metrics": metrics, "energy_j": 'N/D', "device_name": provider, "consistency": None}


def _exec_engine(model_name, engine, provider, onnx_path, xb, yb, x_np, y, cpu_name):
    if engine == 'pytorch':
        return _run_pytorch(model_name, onnx_path, xb, yb, x_np, cpu_name, provider)
    if engine == 'onnxruntime':
        return _run_ort(onnx_path, x_np, provider, y, cpu_name)
    if engine == 'openvino':
        return _run_ov(model_name, onnx_path, x_np, provider, y)
    return {"status": "error", "error": f"Unknown engine {engine}"}


# --------------------------- minimized bench_once ---------------------------

def bench_once(model_name: str, engine: str, provider: str):
    onnx_path = onnx_dir / models_cfg[model_name]['file_onnx']
    model_hash = sha256_file(str(onnx_path))
    gpu_name, driver_ver = get_gpu_name_and_driver()
    cpu_name = get_cpu_name()
    provider_csv = _normalize_provider(engine, provider)

    # Cache
    match = _cache_match(model_hash, engine, provider_csv, defs, versions, driver_ver)
    if csv_has_row_with(str(out_csv), match):
        base = _row_common(model_name, engine, provider_csv, True, provider_csv, cpu_name, gpu_name, os_str, versions,
                           driver_ver, model_hash)
        _write_cached_row(out_csv, base)
        return 'cached'

    # Fixed batch
    xb, yb, x_np, y = _fixed_batch(test_loader)

    # Execute
    res = _exec_engine(model_name, engine, provider, onnx_path, xb, yb, x_np, y, cpu_name)
    status = res.get('status')

    # Handle non-ok statuses (write informative rows)
    if status in ('unavailable', 'missing_ir', 'error'):
        device_name = provider_csv
        base = _row_common(model_name, engine, provider_csv, False, device_name, cpu_name, gpu_name, os_str, versions,
                           driver_ver, model_hash)
        row = {**base, 'lat_ms_mean': '', 'lat_ms_p95': '', 'thr_ips': '', 'acc': '', 'energy_j': 'N/D',
               'consistency_ok': False, 'max_abs_diff_torch_ort': '', 'max_abs_diff_torch_ov': '',
               'top1_agree_torch_ort': '', 'top1_agree_torch_ov': ''}
        csv_append_row(str(out_csv), row, CSV_SCHEMA)
        return status

    # OK path: write results
    metrics = res['metrics']
    energy_j = res['energy_j']
    device_name = res['device_name']
    base = _row_common(model_name, engine, provider_csv, False, device_name, cpu_name, gpu_name, os_str, versions,
                       driver_ver, model_hash)
    if engine == 'pytorch':
        cons = res.get('consistency', {}) or {}
        row = {**base, 'lat_ms_mean': metrics['lat_ms_mean'], 'lat_ms_p95': metrics['lat_ms_p95'],
               'thr_ips': metrics['thr_ips'], 'acc': metrics['acc'], 'energy_j': energy_j,
               'consistency_ok': cons.get('consistency_ok', False),
               'max_abs_diff_torch_ort': cons.get('max_abs_diff_torch_ort', ''),
               'max_abs_diff_torch_ov': cons.get('max_abs_diff_torch_ov', ''),
               'top1_agree_torch_ort': cons.get('top1_agree_torch_ort', ''),
               'top1_agree_torch_ov': cons.get('top1_agree_torch_ov', '')}
    else:
        row = {**base, 'lat_ms_mean': metrics['lat_ms_mean'], 'lat_ms_p95': metrics['lat_ms_p95'],
               'thr_ips': metrics['thr_ips'], 'acc': metrics['acc'], 'energy_j': energy_j,
               'consistency_ok': '', 'max_abs_diff_torch_ort': '', 'max_abs_diff_torch_ov': '',
               'top1_agree_torch_ort': '', 'top1_agree_torch_ov': ''}
    csv_append_row(str(out_csv), row, CSV_SCHEMA)
    return 'done'


# Main loop
for m in models_cfg.keys():
    for e in engines:
        eng = e['engine']
        for prov in e['providers']:
            logger.info("Benchmark: model=%s engine=%s provider=%s", m, eng, prov)
            try:
                _ = bench_once(m, eng, prov)
            except Exception as ex:
                logger.exception("Error in bench_once: %s", ex)
print('Benchmark completed.')

2025-08-13 11:52:16 | INFO | nb02 | Benchmark: model=cnn engine=pytorch provider=cpu
2025-08-13 11:52:23 | INFO | nb02 | Benchmark: model=cnn engine=pytorch provider=cuda
2025-08-13 11:52:34 | INFO | nb02 | Benchmark: model=cnn engine=onnxruntime provider=cpu
2025-08-13 11:52:41 | INFO | infer_ort | Creating ORT session: C:\Users\padul\OneDrive\Universidad\Doctorado\Desarrollo\federated-lab-multihw\models_saved\onnx\cnn_cifar10.onnx | provider=CPUExecutionProvider
2025-08-13 11:52:54 | INFO | infer_ort | ORT metrics: {'lat_ms_mean': 116.85089200094808, 'lat_ms_p95': 168.18604500294896, 'thr_ips': 547.7065592231913, 'acc': 0.828125}
2025-08-13 11:52:54 | INFO | nb02 | Benchmark: model=cnn engine=onnxruntime provider=cuda
2025-08-13 11:53:00 | INFO | infer_ort | Creating ORT session: C:\Users\padul\OneDrive\Universidad\Doctorado\Desarrollo\federated-lab-multihw\models_saved\onnx\cnn_cifar10.onnx | provider=CUDAExecutionProvider
2025-08-13 11:53:02 | INFO | infer_ort | ORT metrics: {'lat_

Benchmark completed.


In [3]:
# Summarize energy rows into a dedicated CSV for plots
try:
    import pandas as pd

    df = pd.read_csv(out_csv)
    energy_csv = root / 'metrics' / 'inference_energy_summary.csv'
    from utils.io import ensure_dir

    ensure_dir(str(energy_csv.parent))
    cols = ['model', 'engine', 'provider', 'batch', 'runs', 'energy_j', 'device_name', 'gpu_name', 'cpu_name', 'os']
    df_energy = df[df['energy_j'].astype(str) != 'N/D'][cols]
    df_energy.to_csv(energy_csv, index=False)
    logger.info("Wrote energy summary: %s (rows=%d)", energy_csv, len(df_energy))
except Exception as ex:
    logger.warning("Energy summary not generated: %s", ex)


2025-08-13 11:57:21 | INFO | nb02 | Wrote energy summary: C:\Users\padul\OneDrive\Universidad\Doctorado\Desarrollo\federated-lab-multihw\metrics\inference_energy_summary.csv (rows=8)


## Notes (Windows assumptions)

- No CPU energy via RAPL/PCM on Windows; energy_j is measured only on NVIDIA GPUs via NVML. Others are marked as "N/D".
- NPU is optional; if not available, rows are recorded with provider status unavailable/missing.
- This notebook writes a single CSV and prints sanity tables only; all plots are produced in `04_results_and_plots.ipynb`.
