diff --git a/tests/e2e/test_perf_e2e.py b/tests/e2e/test_perf_e2e.py index 76ecf23df..f53621860 100644 --- a/tests/e2e/test_perf_e2e.py +++ b/tests/e2e/test_perf_e2e.py @@ -4,24 +4,39 @@ # -------------------------------------------------------------------------- """E2E tests for the perf CLI command. -Tests ONNX direct benchmark using a generated ONNX model fixture. -The perf command uses @click.pass_context and requires obj={}. - -Note: HuggingFace model benchmarks are not tested here because they -require the full build pipeline (WinMLAutoModel). We only test the -ONNX direct path which creates a WinMLSession directly. +A single ``_PerfBenchmarkSuite`` base class defines every test; each concrete +subclass overrides the ``model_arg`` fixture to point at a different model +source (a generated ONNX file or a HuggingFace model id). The perf command +uses @click.pass_context and requires obj={}. Markers: e2e: Full end-to-end test + +Running these tests: + E2E tests are auto-skipped unless ``-m e2e`` is explicitly passed + (see ``tests/e2e/conftest.py``). GPU / NPU / QNN tests additionally + skip when the required hardware or EP is not available on the host. + + # Run the full file + uv run pytest -m e2e tests/e2e/test_perf_e2e.py + + # Run a single test + uv run pytest -m e2e tests/e2e/test_perf_e2e.py::TestPerfONNXDirect::test_benchmark_cpu + + # Verbose output (per-test pass/skip lines) + uv run pytest -m e2e -v tests/e2e/test_perf_e2e.py """ + from __future__ import annotations import json +import sys from typing import TYPE_CHECKING import pytest from click.testing import CliRunner +from tests.e2e.require_ep import require_ep from winml.modelkit.commands.perf import perf @@ -33,14 +48,44 @@ # =========================================================================== -# ONNX direct benchmark +# Helpers # =========================================================================== -class TestPerfONNXDirect: - """Benchmark a pre-exported ONNX file directly via WinMLSession.""" - def test_onnx_benchmark_cpu(self, tmp_path: Path, onnx_model_path: Path): - """ONNX direct benchmark on CPU with minimal iterations. +def _require_gpu() -> None: + """Skip the current test unless a GPU is discoverable via PDH.""" + if sys.platform != "win32": + pytest.skip("GPU discovery via PDH is Windows-only") + from winml.modelkit.session.monitor._pdh import PdhPoller + + if not PdhPoller.is_gpu_available(): + pytest.skip("No GPU detected via PDH") + + +def _require_npu() -> None: + """Skip the current test unless an NPU is discoverable via PDH.""" + if sys.platform != "win32": + pytest.skip("NPU discovery via PDH is Windows-only") + from winml.modelkit.session.monitor._pdh import PdhPoller + + if not PdhPoller.is_npu_available(): + pytest.skip("No NPU detected via PDH") + + +# =========================================================================== +# Shared test suite +# =========================================================================== + + +class _PerfBenchmarkSuite: + """Shared perf-CLI tests. Subclasses override ``model_arg`` fixture.""" + + @pytest.fixture + def model_arg(self) -> str: + raise NotImplementedError("Subclasses must override model_arg fixture") + + def test_benchmark_cpu(self, tmp_path: Path, model_arg: str): + """Benchmark on CPU with minimal iterations. Uses --device cpu --iterations 3 --warmup 1 for speed. Verifies JSON output file is created with expected schema. @@ -51,18 +96,21 @@ def test_onnx_benchmark_cpu(self, tmp_path: Path, onnx_model_path: Path): result = runner.invoke( perf, [ - "-m", str(onnx_model_path), - "--device", "cpu", - "--iterations", "3", - "--warmup", "1", - "-o", str(output_file), + "-m", + model_arg, + "--device", + "cpu", + "--iterations", + "3", + "--warmup", + "1", + "-o", + str(output_file), ], obj={}, catch_exceptions=False, ) - assert result.exit_code == 0, ( - f"perf failed (exit {result.exit_code}):\n{result.output}" - ) + assert result.exit_code == 0, f"perf failed (exit {result.exit_code}):\n{result.output}" # Verify JSON output file exists and has expected structure assert output_file.exists(), f"Output file not created: {output_file}" @@ -97,7 +145,7 @@ def test_onnx_benchmark_cpu(self, tmp_path: Path, onnx_model_path: Path): # Verify raw samples count matches iterations assert len(data["raw_samples_ms"]) == 3 - def test_onnx_benchmark_verbose(self, tmp_path: Path, onnx_model_path: Path): + def test_benchmark_verbose(self, tmp_path: Path, model_arg: str): """Benchmark with --verbose should succeed and show debug output.""" output_file = tmp_path / "verbose_result.json" @@ -105,18 +153,304 @@ def test_onnx_benchmark_verbose(self, tmp_path: Path, onnx_model_path: Path): result = runner.invoke( perf, [ - "-m", str(onnx_model_path), - "--device", "cpu", - "--iterations", "2", - "--warmup", "1", - "-o", str(output_file), + "-m", + model_arg, + "--device", + "cpu", + "--iterations", + "2", + "--warmup", + "1", + "-o", + str(output_file), "--verbose", ], obj={}, catch_exceptions=False, ) - assert result.exit_code == 0, ( - f"perf failed (exit {result.exit_code}):\n{result.output}" - ) + assert result.exit_code == 0, f"perf failed (exit {result.exit_code}):\n{result.output}" assert output_file.exists() assert "Results saved to" in result.output + + def test_benchmark_gpu_monitor(self, tmp_path: Path, model_arg: str): + """Benchmark on GPU with --monitor. + + Requires a real GPU discoverable via PDH. Verifies the JSON output + contains the hw_monitor section produced by HWMonitor. + """ + _require_gpu() + + output_file = tmp_path / "perf_gpu_monitor.json" + + runner = CliRunner() + result = runner.invoke( + perf, + [ + "-m", + model_arg, + "--device", + "gpu", + "--iterations", + "100", + "--warmup", + "1", + "-o", + str(output_file), + "--monitor", + ], + obj={}, + catch_exceptions=False, + ) + assert result.exit_code == 0, f"perf failed (exit {result.exit_code}):\n{result.output}" + + assert output_file.exists(), f"Output file not created: {output_file}" + data = json.loads(output_file.read_text()) + + assert data["benchmark_info"]["device"] == "gpu" + assert data["latency_ms"]["mean"] > 0 + assert "hw_monitor" in data, "hw_monitor section missing with --monitor" + assert data["hw_monitor"]["device_kind"] == "gpu" + assert data["hw_monitor"]["adapter_luid"] is not None + assert data["hw_monitor"]["gpu"]["mean_pct"] > 0 + + def test_benchmark_npu_monitor(self, tmp_path: Path, model_arg: str): + """Benchmark on NPU with --monitor. + + Requires a real NPU discoverable via PDH. Verifies the JSON output + contains the hw_monitor section produced by HWMonitor. + """ + _require_npu() + + output_file = tmp_path / "perf_npu_monitor.json" + + runner = CliRunner() + result = runner.invoke( + perf, + [ + "-m", + model_arg, + "--device", + "npu", + "--iterations", + "100", + "--warmup", + "1", + "-o", + str(output_file), + "--monitor", + ], + obj={}, + catch_exceptions=False, + ) + assert result.exit_code == 0, f"perf failed (exit {result.exit_code}):\n{result.output}" + + assert output_file.exists(), f"Output file not created: {output_file}" + data = json.loads(output_file.read_text()) + + assert data["benchmark_info"]["device"] == "npu" + assert data["latency_ms"]["mean"] > 0 + assert "hw_monitor" in data, "hw_monitor section missing with --monitor" + assert data["hw_monitor"]["device_kind"] == "npu" + assert data["hw_monitor"]["adapter_luid"] is not None + assert data["hw_monitor"]["npu"]["mean_pct"] > 0 + + def test_benchmark_auto(self, tmp_path: Path, model_arg: str): + """Benchmark with --device auto. + + Auto resolves to whatever is available on the host and should always + succeed (CPU is the universal fallback). + """ + output_file = tmp_path / "perf_auto.json" + + runner = CliRunner() + result = runner.invoke( + perf, + [ + "-m", + model_arg, + "--device", + "auto", + "--iterations", + "3", + "--warmup", + "1", + "-o", + str(output_file), + ], + obj={}, + catch_exceptions=False, + ) + assert result.exit_code == 0, f"perf failed (exit {result.exit_code}):\n{result.output}" + + assert output_file.exists() + data = json.loads(output_file.read_text()) + assert data["benchmark_info"]["device"] == "auto" + # At least a non-cpu should exist and picked up + assert data["benchmark_info"]["ep"] != "CPUExecutionProvider" + assert data["latency_ms"]["mean"] > 0 + + def test_benchmark_ep_qnn(self, tmp_path: Path, model_arg: str): + """Benchmark with --ep qnn. + + Skipped if QNNExecutionProvider is not available on the host. + """ + require_ep("qnn") + + output_file = tmp_path / "perf_qnn.json" + + runner = CliRunner() + result = runner.invoke( + perf, + [ + "-m", + model_arg, + "--ep", + "qnn", + "--iterations", + "3", + "--warmup", + "1", + "-o", + str(output_file), + ], + obj={}, + catch_exceptions=False, + ) + assert result.exit_code == 0, f"perf failed (exit {result.exit_code}):\n{result.output}" + + assert output_file.exists() + data = json.loads(output_file.read_text()) + assert data["benchmark_info"]["ep"] == "QNNExecutionProvider" + assert data["latency_ms"]["mean"] > 0 + + def test_benchmark_ep_qnn_device_gpu(self, tmp_path: Path, model_arg: str): + """Benchmark with --ep qnn and --device gpu. + + --ep overrides the device-to-provider mapping, so the session should + bind to QNN even though the requested device is GPU. Skipped if QNN + or a GPU is unavailable on the host. + """ + require_ep("qnn") + _require_gpu() + + output_file = tmp_path / "perf_qnn_gpu.json" + + runner = CliRunner() + result = runner.invoke( + perf, + [ + "-m", + model_arg, + "--device", + "gpu", + "--ep", + "qnn", + "--iterations", + "3", + "--warmup", + "1", + "-o", + str(output_file), + ], + obj={}, + catch_exceptions=False, + ) + assert result.exit_code == 0, f"perf failed (exit {result.exit_code}):\n{result.output}" + + assert output_file.exists() + data = json.loads(output_file.read_text()) + assert data["benchmark_info"]["device"] == "gpu" + assert data["benchmark_info"]["ep"] == "QNNExecutionProvider" + assert data["latency_ms"]["mean"] > 0 + + +# =========================================================================== +# Concrete suites +# =========================================================================== + + +class TestPerfONNXDirect(_PerfBenchmarkSuite): + """Benchmark a pre-exported ONNX file directly via WinMLSession.""" + + @pytest.fixture + def model_arg(self, onnx_model_path: Path) -> str: + return str(onnx_model_path) + + +# =========================================================================== +# Per-module benchmark +# =========================================================================== + + +class TestPerfModule: + """Per-module benchmark via --module on a HuggingFace model.""" + + def test_module_benchmark_cpu(self, tmp_path: Path): + """Per-module benchmark on CPU for ResNetStage submodules of resnet-50.""" + output_file = tmp_path / "perf_module.json" + + runner = CliRunner() + result = runner.invoke( + perf, + [ + "-m", + "microsoft/resnet-50", + "--module", + "ResNetStage", + "--device", + "cpu", + "--iterations", + "3", + "--warmup", + "1", + "-o", + str(output_file), + ], + obj={}, + catch_exceptions=False, + ) + assert result.exit_code == 0, f"perf failed (exit {result.exit_code}):\n{result.output}" + + assert output_file.exists(), f"Output file not created: {output_file}" + data = json.loads(output_file.read_text()) + + assert data["model_id"] == "microsoft/resnet-50" + assert data["module_class"] == "ResNetStage" + assert data["iterations"] == 3 + assert data["warmup"] == 1 + assert data["instance_count"] == 4 + assert len(data["instances"]) == data["instance_count"] + for instance in data["instances"]: + assert instance["mean_ms"] > 0 + + def test_module_invalid_lists_available(self, tmp_path: Path): + """Invalid --module should fail and list available module classes.""" + output_file = tmp_path / "perf_module_invalid.json" + + runner = CliRunner() + result = runner.invoke( + perf, + [ + "-m", + "microsoft/resnet-50", + "--module", + "NotAValidModuleXyz", + "--device", + "cpu", + "--iterations", + "3", + "--warmup", + "1", + "-o", + str(output_file), + ], + obj={}, + catch_exceptions=False, + ) + + assert result.exit_code != 0, "perf should fail for an invalid --module" + assert "No modules matching 'NotAValidModuleXyz' found" in result.output + assert "Available module class names in this model:" in result.output + # The real ResNetStage class should appear in the available list. + assert "ResNetStage" in result.output + assert not output_file.exists(), "Output file should not be written on failure"