From 3749c9365bd0a2a7e8ee8b45be515a8c5b44af32 Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Fri, 27 Mar 2026 03:50:18 +0000 Subject: [PATCH 1/5] Add multi context binary package support --- olive/cache.py | 56 ++- olive/cli/launcher.py | 2 + olive/cli/model_package.py | 119 +++++ olive/cli/optimize.py | 2 +- olive/engine/engine.py | 15 +- olive/model/handler/__init__.py | 2 + olive/model/handler/multi_target.py | 101 +++++ olive/olive_config.json | 9 + olive/passes/olive_pass.py | 18 + olive/passes/onnx/context_binary.py | 93 +++- olive/passes/onnx/ep_context_packager.py | 166 +++++++ olive/passes/onnx/model_builder.py | 4 +- olive/passes/openvino/encapsulation.py | 78 +++- olive/passes/openvino/optimum_intel.py | 19 +- olive/systems/system_config.py | 4 +- test/cli/test_model_package.py | 275 ++++++++++++ test/model/test_multi_target_model.py | 76 ++++ test/passes/onnx/test_context_binary.py | 100 +++++ .../onnx/test_multi_target_context_binary.py | 407 ++++++++++++++++++ .../openvino/test_openvino_encapsulation.py | 88 ++++ 20 files changed, 1604 insertions(+), 30 deletions(-) create mode 100644 olive/cli/model_package.py create mode 100644 olive/model/handler/multi_target.py create mode 100644 olive/passes/onnx/ep_context_packager.py create mode 100644 test/cli/test_model_package.py create mode 100644 test/model/test_multi_target_model.py create mode 100644 test/passes/onnx/test_multi_target_context_binary.py diff --git a/olive/cache.py b/olive/cache.py index fe351057b9..42e94cdae5 100644 --- a/olive/cache.py +++ b/olive/cache.py @@ -385,13 +385,61 @@ def save_model( """Save a model from the cache to a given path.""" output_dir = Path(output_dir) if output_dir else Path.cwd() - # If output_dir has a suffix (like .onnx), it's a file path - # Use parent directory for saving files - actual_output_dir = output_dir.parent if output_dir.suffix else output_dir + # Check if output_dir is an existing file; otherwise treat as directory + if output_dir.is_file(): + actual_output_dir = output_dir.parent + else: + actual_output_dir = output_dir actual_output_dir.mkdir(parents=True, exist_ok=True) model_json = self.load_model(model_id) - if model_json["type"].lower() == "compositemodel": + if model_json["type"].lower() == "multitargetmodel": + model_json_config = model_json["config"] + source_path = Path(model_json_config["model_path"]) + actual_output_dir.mkdir(parents=True, exist_ok=True) + + if source_path.exists(): + # Only copy target subdirectories (soc_60/, soc_73/, etc.) and manifest.json. + # Skip top-level additional_files (tokenizer, config) since each target subdir has its own copy. + for item in source_path.iterdir(): + dest = actual_output_dir / item.name + if item.is_dir(): + shutil.copytree(str(item), str(dest), dirs_exist_ok=overwrite) + elif item.name == "manifest.json": + shutil.copy2(str(item), str(dest)) + + # Update paths to point to new location + model_json_config["model_path"] = str(actual_output_dir) + + # Update target model paths + for target_model in model_json_config.get("target_models", []): + target_config = target_model.get("config", {}) + old_model_path = target_config.get("model_path", "") + if old_model_path and str(source_path) in old_model_path: + target_config["model_path"] = old_model_path.replace(str(source_path), str(actual_output_dir)) + + # Clear additional_files since each target subdir has its own copies + model_attributes = model_json_config.get("model_attributes") or {} + model_attributes.pop("additional_files", None) + + # Update manifest_path + if model_attributes.get("manifest_path"): + model_attributes["manifest_path"] = str( + actual_output_dir / Path(model_attributes["manifest_path"]).name + ) + + # Update manifest name: if pass config set model_name explicitly, keep it; + # otherwise update to the output directory name (e.g., "qwen_2.5_1.5b_Instruct") + manifest_file = actual_output_dir / "manifest.json" + if manifest_file.exists(): + manifest = json.loads(manifest_file.read_text()) + # The pass defaults model_name to the cache dir name (not meaningful). + # Replace it with the final output directory name unless it was explicitly configured. + source_dir_name = source_path.name if source_path else None + if not manifest.get("name") or manifest.get("name") == source_dir_name: + manifest["name"] = actual_output_dir.name + manifest_file.write_text(json.dumps(manifest, indent=2)) + elif model_json["type"].lower() == "compositemodel": model_json_config = model_json["config"] model_attributes = model_json_config.get("model_attributes") or {} diff --git a/olive/cli/launcher.py b/olive/cli/launcher.py index d9088bc89b..e37bb5248a 100644 --- a/olive/cli/launcher.py +++ b/olive/cli/launcher.py @@ -16,6 +16,7 @@ from olive.cli.finetune import FineTuneCommand from olive.cli.generate_adapter import GenerateAdapterCommand from olive.cli.generate_cost_model import GenerateCostModelCommand +from olive.cli.model_package import ModelPackageCommand from olive.cli.optimize import OptimizeCommand from olive.cli.quantize import QuantizeCommand from olive.cli.run import WorkflowRunCommand @@ -52,6 +53,7 @@ def get_cli_parser(called_as_console_script: bool = True) -> ArgumentParser: ConfigureQualcommSDKCommand.register_subcommand(commands_parser) SharedCacheCommand.register_subcommand(commands_parser) ExtractAdaptersCommand.register_subcommand(commands_parser) + ModelPackageCommand.register_subcommand(commands_parser) BenchmarkCommand.register_subcommand(commands_parser) return parser diff --git a/olive/cli/model_package.py b/olive/cli/model_package.py new file mode 100644 index 0000000000..bad21ff468 --- /dev/null +++ b/olive/cli/model_package.py @@ -0,0 +1,119 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import json +import logging +from argparse import ArgumentParser +from pathlib import Path + +from olive.cli.base import BaseOliveCLICommand, add_logging_options, add_telemetry_options +from olive.common.utils import hardlink_copy_dir +from olive.telemetry import action + +logger = logging.getLogger(__name__) + + +@action +class ModelPackageCommand(BaseOliveCLICommand): + """Merge multiple single-target context binary outputs into a multi-target package with manifest.json.""" + + @staticmethod + def register_subcommand(parser: ArgumentParser): + sub_parser = parser.add_parser( + "model-package", + help="Merge multiple context binary outputs into a multi-target package with manifest.json", + ) + + sub_parser.add_argument( + "-s", + "--source", + type=str, + action="append", + required=True, + help=("Source context binary output directory. Can be specified multiple times. "), + ) + + sub_parser.add_argument( + "-o", + "--output_path", + type=str, + required=True, + help="Output directory for the merged multi-target package.", + ) + + sub_parser.add_argument( + "--model_name", + type=str, + default=None, + help="Model name for the manifest. If not set, derived from the output directory name.", + ) + + add_logging_options(sub_parser) + add_telemetry_options(sub_parser) + sub_parser.set_defaults(func=ModelPackageCommand) + + def run(self): + sources = self._parse_sources() + output_dir = Path(self.args.output_path) + output_dir.mkdir(parents=True, exist_ok=True) + + model_name = self.args.model_name or output_dir.name + manifest = {"name": model_name, "components": []} + + for target_name, source_path in sources: + # Read model_config.json from source + model_config = self._read_model_config(source_path) + model_attrs = model_config.get("config", {}).get("model_attributes") or {} + + # Copy source directory to output/{target_name}/ + target_dir = output_dir / target_name + hardlink_copy_dir(source_path, target_dir) + + constraints = {} + for key in ("ep", "device", "architecture", "precision", "sdk_version"): + if model_attrs.get(key) is not None: + constraints[key] = model_attrs[key] + + entry = { + "variant_name": target_name, + "file": {model_config["model_path"]}, + "constraints": constraints, + } + + manifest["components"].append(entry) + + # Write manifest.json + manifest_path = output_dir / "manifest.json" + with open(manifest_path, "w") as f: + json.dump(manifest, f, indent=2) + + print(f"Merged {len(sources)} targets into {output_dir}") + + def _parse_sources(self) -> list[tuple[str, Path]]: + sources = [] + for source in self.args.source: + path = Path(source) + if not path.is_dir(): + raise ValueError(f"Source path does not exist or is not a directory: {path}") + + # Validate model_config.json exists + if not (path / "model_config.json").exists(): + raise ValueError( + f"No model_config.json found in {path}. " + "Source must be an Olive output directory with model_config.json." + ) + + sources.append((path.name, path)) + + if len(sources) < 2: + raise ValueError("At least two --source directories are required to merge.") + + return sources + + @staticmethod + def _read_model_config(source_path: Path) -> dict: + """Read and return model_config.json from a source directory.""" + config_path = source_path / "model_config.json" + with open(config_path) as f: + return json.load(f) diff --git a/olive/cli/optimize.py b/olive/cli/optimize.py index d80392ecf9..db9e23f2e9 100644 --- a/olive/cli/optimize.py +++ b/olive/cli/optimize.py @@ -583,7 +583,7 @@ def _get_matmul_nbits_to_qdq_pass_config(self) -> dict[str, Any]: "add_zero_point": "true", "save_as_external_data": "true", } - config["nodes_to_exclude"] = ["/lm_head/MatMul_Q4"] + config["nodes_to_exclude"] = ["/lm_head/MatMulNBits"] if precision.value == Precision.INT4: config["use_int4"] = "true" return config diff --git a/olive/engine/engine.py b/olive/engine/engine.py index de6b7019a3..39805b3b14 100644 --- a/olive/engine/engine.py +++ b/olive/engine/engine.py @@ -195,15 +195,13 @@ def run( self.initialize(log_to_file, log_severity_level) output_dir: Path = (Path(output_dir) if output_dir else Path.cwd()).resolve() - if output_dir.suffix: + # Check if output_dir is an existing file; otherwise treat as directory + if output_dir.is_file(): output_dir.parent.mkdir(parents=True, exist_ok=True) + artifacts_dir = output_dir.parent else: output_dir.mkdir(parents=True, exist_ok=True) - - # Determine the directory for artifacts (run_history, etc.) - # If output_dir is a file path (has suffix), use parent directory - # Otherwise use output_dir itself - artifacts_dir = output_dir.parent if output_dir.suffix else output_dir + artifacts_dir = output_dir logger.info("Running Olive on accelerator: %s", accelerator_spec) with self._create_system(): @@ -255,9 +253,8 @@ def run_accelerator( self.footprint.record(is_input_model=True, model_id=input_model_id) # Determine the directory for artifacts - # If output_dir is a file path (has suffix like .onnx), use parent directory - # Otherwise use output_dir itself - artifacts_dir = output_dir.parent if output_dir.suffix else output_dir + # If output_dir is an existing file, use its parent; otherwise use output_dir itself + artifacts_dir = output_dir.parent if output_dir.is_file() else output_dir try: if evaluate_input_model and not self.evaluator_config: diff --git a/olive/model/handler/__init__.py b/olive/model/handler/__init__.py index 35ae6c0d49..b8514209dc 100644 --- a/olive/model/handler/__init__.py +++ b/olive/model/handler/__init__.py @@ -6,6 +6,7 @@ from olive.model.handler.composite import CompositeModelHandler from olive.model.handler.diffusers import DiffusersModelHandler from olive.model.handler.hf import DistributedHfModelHandler, HfModelHandler +from olive.model.handler.multi_target import MultiTargetModelHandler from olive.model.handler.onnx import DistributedOnnxModelHandler, ONNXModelHandler from olive.model.handler.openvino import OpenVINOModelHandler from olive.model.handler.pytorch import PyTorchModelHandler @@ -18,6 +19,7 @@ "DistributedHfModelHandler", "DistributedOnnxModelHandler", "HfModelHandler", + "MultiTargetModelHandler", "ONNXModelHandler", "OliveModelHandler", "OpenVINOModelHandler", diff --git a/olive/model/handler/multi_target.py b/olive/model/handler/multi_target.py new file mode 100644 index 0000000000..7a8be4c5c6 --- /dev/null +++ b/olive/model/handler/multi_target.py @@ -0,0 +1,101 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import logging +from typing import Any, Optional, Union + +from olive.common.config_utils import serialize_to_json, validate_config +from olive.common.utils import dict_diff +from olive.constants import Framework, ModelFileFormat +from olive.hardware.accelerator import Device +from olive.model.config.model_config import ModelConfig +from olive.model.config.registry import model_handler_registry +from olive.model.handler.base import OliveModelHandler +from olive.resource_path import OLIVE_RESOURCE_ANNOTATIONS + +logger = logging.getLogger(__name__) + + +@model_handler_registry("MultiTargetModel") +class MultiTargetModelHandler(OliveModelHandler): + """MultiTargetModel represents the same model compiled for multiple hardware targets. + + Unlike CompositeModelHandler which holds different component models (e.g., split parts of a pipeline), + MultiTargetModelHandler holds the same logical model compiled for different hardware targets + (e.g., different SoC models for QNN). + + When a pass encounters a MultiTargetModelHandler, it runs independently on each target model, + preserving the multi-target structure through the pipeline. + """ + + resource_keys: tuple[str, ...] = ("model_path",) + json_config_keys: tuple[str, ...] = ("target_names",) + + def __init__( + self, + target_models: list[Union[OliveModelHandler, dict[str, Any]]], + target_names: list[str], + model_path: OLIVE_RESOURCE_ANNOTATIONS = None, + model_attributes: Optional[dict[str, Any]] = None, + ): + super().__init__( + model_path=model_path, + framework=Framework.ONNX, + model_file_format=ModelFileFormat.COMPOSITE_MODEL, + model_attributes=model_attributes, + ) + self._target_models = [ + validate_config(m, ModelConfig).create_model() if isinstance(m, dict) else m for m in target_models + ] + assert all(isinstance(m, OliveModelHandler) for m in self._target_models), ( + "All target models must be OliveModelHandler or dict" + ) + assert len(self._target_models) == len(target_names), "Number of target models and names must match" + self.target_names = target_names + + @property + def target_models(self): + for m in self._target_models: + m.model_attributes = {**(self.model_attributes or {}), **(m.model_attributes or {})} + yield m + + def to_json(self, check_object: bool = False): + json_dict = super().to_json(check_object) + json_dict["config"]["target_models"] = [] + for m in self._target_models: + target_json = m.to_json(check_object) + target_json["config"]["model_attributes"] = dict_diff( + target_json["config"]["model_attributes"], self.model_attributes + ) + json_dict["config"]["target_models"].append(target_json) + return serialize_to_json(json_dict, check_object) + + def get_target_models(self) -> list[tuple[str, OliveModelHandler]]: + """Iterate over (target_name, target_model) pairs.""" + return zip(self.target_names, self.target_models) + + def load_model(self, rank: int = None, cache_model: bool = True): + raise NotImplementedError + + @property + def size_on_disk(self) -> int: + """Compute size of the model on disk.""" + raise NotImplementedError + + def prepare_session( + self, + inference_settings: Optional[dict[str, Any]] = None, + device: Device = Device.CPU, + execution_providers: Union[str, list[str]] = None, + rank: Optional[int] = None, + ): + raise RuntimeError("MultiTargetModelHandler doesn't have a session of its own") + + def run_session( + self, + session: Any = None, + inputs: Union[dict[str, Any], list[Any], tuple[Any, ...]] = None, + **kwargs: dict[str, Any], + ) -> Any: + raise RuntimeError("MultiTargetModelHandler doesn't have a session of its own") diff --git a/olive/olive_config.json b/olive/olive_config.json index 66613a779d..73b05b9bd8 100644 --- a/olive/olive_config.json +++ b/olive/olive_config.json @@ -78,6 +78,15 @@ "supported_quantization_encodings": [ ], "run_on_target": true }, + "EPContextBinaryPackager": { + "module_path": "olive.passes.onnx.ep_context_packager.EPContextBinaryPackager", + "supported_providers": [ "QNNExecutionProvider", "OpenVINOExecutionProvider" ], + "supported_accelerators": [ "npu", "gpu", "cpu" ], + "supported_precisions": [ "*" ], + "supported_algorithms": [ ], + "supported_quantization_encodings": [ ], + "run_on_target": true + }, "ExtractAdapters": { "module_path": "olive.passes.onnx.extract_adapters.ExtractAdapters", "supported_providers": [ "*" ], diff --git a/olive/passes/olive_pass.py b/olive/passes/olive_pass.py index 627202a0c7..ebb9cd1c73 100644 --- a/olive/passes/olive_pass.py +++ b/olive/passes/olive_pass.py @@ -48,6 +48,9 @@ class Pass(ABC): # True if the pass processes a composite model at once. Otherwise, the components of the # composite model will be processed individually. _accepts_composite_model: bool = False + # True if the pass processes a multi-target model at once. Otherwise, each target + # will be processed independently. + _accepts_multi_target_model: bool = False @classmethod def __init_subclass__(cls, **kwargs) -> None: @@ -206,6 +209,7 @@ def validate_config( def run(self, model: OliveModelHandler, output_model_path: str) -> OliveModelHandler: """Run the pass on the model at a specific point in the search space.""" from olive.model import CompositeModelHandler, DistributedOnnxModelHandler + from olive.model.handler.multi_target import MultiTargetModelHandler if not self._initialized: self._initialize() @@ -227,6 +231,20 @@ def run(self, model: OliveModelHandler, output_model_path: str) -> OliveModelHan inference_settings=model.inference_settings, model_attributes=model.model_attributes, ) + elif isinstance(model, MultiTargetModelHandler) and not self._accepts_multi_target_model: + # Run the pass independently for each hardware target + targets = [] + target_names = [] + model_dir = Path(output_model_path).with_suffix("") + model_dir.mkdir(parents=True, exist_ok=True) + for target_name, target_model in model.get_target_models(): + target_output_path = model_dir / target_name + output_target = self.run(target_model, str(target_output_path)) + targets.append(output_target) + target_names.append(target_name) + output_model = MultiTargetModelHandler( + targets, target_names, model_path=model_dir, model_attributes=model.model_attributes + ) elif isinstance(model, CompositeModelHandler) and not self._accepts_composite_model: components = [] component_names = [] diff --git a/olive/passes/onnx/context_binary.py b/olive/passes/onnx/context_binary.py index d802fcc575..bf0f16b443 100644 --- a/olive/passes/onnx/context_binary.py +++ b/olive/passes/onnx/context_binary.py @@ -13,6 +13,7 @@ from olive.hardware.accelerator import AcceleratorSpec, Device from olive.hardware.constants import ExecutionProvider from olive.model import CompositeModelHandler, ONNXModelHandler +from olive.model.handler.multi_target import MultiTargetModelHandler from olive.model.utils import resolve_onnx_path from olive.passes import Pass from olive.passes.onnx.common import ( @@ -26,7 +27,11 @@ class EPContextBinaryGenerator(Pass): - """Generate EP specific context binary for the model.""" + """Generate EP specific context binary for the model. + + When provider_options is a list of dicts, generates context binaries for each set of provider options + (e.g., multiple SoC models) and returns a MultiTargetModelHandler. + """ _accepts_composite_model = True @@ -47,9 +52,13 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassCon ), ), "provider_options": PassConfigParam( - type_=dict, + type_=Union[dict, list], default_value=None, - description="Provider options for the EP.", + description=( + "Provider options for the EP. Can be a single dict or a list of dicts for multi-target" + " generation (e.g., multiple SoC models). When a list is provided, context binaries are" + " generated for each set of options and returned as a MultiTargetModelHandler." + ), ), "session_options": PassConfigParam( type_=dict, @@ -73,9 +82,7 @@ def _run_for_config( model: Union[ONNXModelHandler, CompositeModelHandler], config: type[BasePassConfig], output_model_path: str, - ) -> Union[ONNXModelHandler, CompositeModelHandler]: - from onnxruntime import __version__ as OrtVersion - + ) -> Union[ONNXModelHandler, CompositeModelHandler, MultiTargetModelHandler]: # session created using providers argument so will use the ort.get_available_providers() # TODO(jambayk): consider switching to the new EP API for Windows from onnxruntime import get_available_providers @@ -89,6 +96,80 @@ def _run_for_config( f" {get_available_providers()}" ) + # Multi-target mode: provider_options is a list of dicts + if isinstance(config.provider_options, list): + return self._run_multi_target(model, config, output_model_path) + + # Single-target mode: existing behavior + result = self._run_single_target(model, config, output_model_path) + + # Populate model_attributes with context binary metadata so it persists in model_config.json + result.model_attributes = {**(model.model_attributes or {}), **(result.model_attributes or {})} + result.model_attributes["ep"] = self.accelerator_spec.execution_provider + result.model_attributes["device"] = str(self.accelerator_spec.accelerator_type).upper() + if config.provider_options: + result.model_attributes["provider_options"] = config.provider_options + result.model_attributes["architecture"] = config.provider_options.get("soc_model") + + return result + + def _run_multi_target( + self, + model: Union[ONNXModelHandler, CompositeModelHandler], + config: type[BasePassConfig], + output_model_path: str, + ) -> MultiTargetModelHandler: + """Generate context binaries for multiple hardware targets. + + Each entry in config.provider_options is a separate set of provider options + (e.g., different soc_model values). The result is a MultiTargetModelHandler + wrapping per-target outputs. + """ + provider_options_list = config.provider_options + assert all(isinstance(po, dict) for po in provider_options_list), ( + "Each entry in provider_options list must be a dict" + ) + + output_dir = Path(output_model_path).with_suffix("") + output_dir.mkdir(parents=True, exist_ok=True) + + targets = [] + target_names = [] + for idx, provider_options in enumerate(provider_options_list): + target_name = f"soc_{provider_options.get('soc_model', idx)}" + target_output_path = str(output_dir / target_name) + + # Create a shallow copy of config with this specific provider_options + single_config = deepcopy(config) + object.__setattr__(single_config, "provider_options", provider_options) + + result = self._run_single_target(model, single_config, target_output_path) + # Store target-specific metadata + result.model_attributes = {**(model.model_attributes or {}), **(result.model_attributes or {})} + result.model_attributes["ep"] = self.accelerator_spec.execution_provider + result.model_attributes["device"] = str(self.accelerator_spec.accelerator_type).upper() + result.model_attributes["provider_options"] = provider_options + result.model_attributes["architecture"] = provider_options.get("soc_model") + + targets.append(result) + target_names.append(target_name) + + return MultiTargetModelHandler( + targets, + target_names, + model_path=output_dir, + model_attributes=model.model_attributes, + ) + + def _run_single_target( + self, + model: Union[ONNXModelHandler, CompositeModelHandler], + config: type[BasePassConfig], + output_model_path: str, + ) -> Union[ONNXModelHandler, CompositeModelHandler]: + """Generate context binary for a single target. This is the original logic.""" + from onnxruntime import __version__ as OrtVersion + generate_kwargs = { "execution_provider": self.accelerator_spec.execution_provider, "provider_options": config.provider_options, diff --git a/olive/passes/onnx/ep_context_packager.py b/olive/passes/onnx/ep_context_packager.py new file mode 100644 index 0000000000..27114f84be --- /dev/null +++ b/olive/passes/onnx/ep_context_packager.py @@ -0,0 +1,166 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import json +import logging +import shutil +from pathlib import Path +from typing import Union + +from olive.hardware.accelerator import AcceleratorSpec +from olive.model import CompositeModelHandler, ONNXModelHandler +from olive.model.handler.multi_target import MultiTargetModelHandler +from olive.passes import Pass +from olive.passes.pass_config import BasePassConfig, PassConfigParam + +logger = logging.getLogger(__name__) + + +class EPContextBinaryPackager(Pass): + """Generate a manifest.json metadata file for multi-target EP context binaries. + + This pass takes a MultiTargetModelHandler (produced by EPContextBinaryGenerator with + a list of provider_options) and generates a manifest.json file describing each target's + context binary with metadata required by ONNX Runtime. + + The manifest includes: + - ep: execution provider name + - device_type: CPU, NPU, or GPU + - architecture: hardware architecture (e.g., SoC model) + - precision: model precision (from model_attributes) + - sdk_version: optional SDK version + - compile_options: optional compilation options + """ + + _accepts_composite_model = True + _accepts_multi_target_model = True + + @classmethod + def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassConfigParam]: + return { + "model_name": PassConfigParam( + type_=str, + default_value=None, + description="Model name for the manifest. If not set, derived from the output directory name.", + ), + "sdk_version": PassConfigParam( + type_=str, + default_value=None, + description="SDK version string (e.g., 'qnn_sdk_2.28').", + ), + "compile_options": PassConfigParam( + type_=dict, + default_value=None, + description="Additional compile options to include in the manifest (e.g., dynamic shape, batch size).", + ), + } + + @staticmethod + def is_accelerator_agnostic(accelerator_spec: AcceleratorSpec) -> bool: + return False + + def _run_for_config( + self, + model: MultiTargetModelHandler, + config: type[BasePassConfig], + output_model_path: str, + ) -> MultiTargetModelHandler: + assert isinstance(model, MultiTargetModelHandler), ( + "EPContextBinaryPackager requires a MultiTargetModelHandler as input. " + "Use EPContextBinaryGenerator with a list of provider_options to produce one." + ) + + output_dir = Path(output_model_path).with_suffix("") + output_dir.mkdir(parents=True, exist_ok=True) + + # Derive model name from config or output directory + model_name = config.model_name or output_dir.name + + manifest = {"name": model_name, "components": []} + + for target_name, target_model in model.get_target_models(): + target_attrs = target_model.model_attributes or {} + + # Copy target model files to output directory + self._copy_target_model(target_name, target_model, output_dir) + + # Determine the model path relative to output directory + model_path = self._get_relative_model_path(target_name, target_model) + + entry = { + "variant_name": target_name, + "file": model_path, + "constraints": { + "ep": self.accelerator_spec.execution_provider, + "device": target_attrs.get("target_device", str(self.accelerator_spec.accelerator_type).upper()), + "architecture": target_attrs.get("architecture", target_name), + }, + } + + # Add precision from model_attributes if available + precision = target_attrs.get("precision") + if precision: + entry["constraints"]["precision"] = precision + + # Add sdk_version from model_attributes or config + sdk_version = target_attrs.get("sdk_version") or config.sdk_version + if sdk_version: + entry["constraints"]["sdk_version"] = sdk_version + if config.compile_options: + entry["constraints"]["compile_options"] = config.compile_options + + manifest["components"].append(entry) + + # Write manifest.json + manifest_path = output_dir / "manifest.json" + with open(manifest_path, "w") as f: + json.dump(manifest, f, indent=2) + logger.info("Generated manifest at %s", manifest_path) + + # Update model_attributes to include manifest path + # Remove additional_files since each target subfolder already contains its own tokenizer/config files + new_model_attributes = model.model_attributes or {} + new_model_attributes = {**new_model_attributes, "manifest_path": str(manifest_path)} + new_model_attributes.pop("additional_files", None) + + # Return the same MultiTargetModelHandler with updated attributes and path + return MultiTargetModelHandler( + [target_model for _, target_model in model.get_target_models()], + [target_name for target_name, _ in model.get_target_models()], + model_path=output_dir, + model_attributes=new_model_attributes, + ) + + @staticmethod + def _copy_target_model( + target_name: str, + target_model: Union[ONNXModelHandler, CompositeModelHandler], + output_dir: Path, + ) -> None: + """Copy target model files to the output directory under target_name/.""" + dest_dir = output_dir / target_name + if dest_dir.exists(): + return + + if isinstance(target_model, CompositeModelHandler): + src_dir = Path(target_model.model_path) + else: + src_dir = Path(target_model.model_path).parent + + if src_dir.is_dir(): + shutil.copytree(str(src_dir), str(dest_dir)) + else: + dest_dir.mkdir(parents=True, exist_ok=True) + shutil.copy2(str(target_model.model_path), str(dest_dir)) + + @staticmethod + def _get_relative_model_path( + target_name: str, + target_model: Union[ONNXModelHandler, CompositeModelHandler], + ) -> str: + """Get the model path relative to the target name for the manifest.""" + if isinstance(target_model, ONNXModelHandler): + return f"{target_name}/{Path(target_model.model_path).name}" + # For CompositeModelHandler or other types, use the directory + return f"{target_name}/" diff --git a/olive/passes/onnx/model_builder.py b/olive/passes/onnx/model_builder.py index 978744ec1c..e2539fecac 100644 --- a/olive/passes/onnx/model_builder.py +++ b/olive/passes/onnx/model_builder.py @@ -214,12 +214,12 @@ def _run_for_config( ) -> ONNXModelHandler: try: from onnxruntime_genai.models.builder import create_model - except ImportError: + except ImportError as e: raise ImportError( "onnxruntime-genai package is required to run ModelBuilder pass. Please install the package" " corresponding to your onnxruntime installation using pip. cpu: onnxruntime-genai, cuda:" " onnxruntime-genai-cuda, directml: onnxruntime-genai-directml" - ) from None + ) from e self.maybe_patch_quant() precision = config.precision diff --git a/olive/passes/openvino/encapsulation.py b/olive/passes/openvino/encapsulation.py index c8e24a2b37..59f000fa26 100644 --- a/olive/passes/openvino/encapsulation.py +++ b/olive/passes/openvino/encapsulation.py @@ -4,6 +4,7 @@ # -------------------------------------------------------------------------- import logging import os +from copy import deepcopy from pathlib import Path from typing import ClassVar, Union @@ -13,6 +14,7 @@ from olive.common.utils import hardlink_copy_dir, hardlink_copy_file from olive.hardware.accelerator import AcceleratorSpec, Device from olive.model import ONNXModelHandler, OpenVINOModelHandler +from olive.model.handler.multi_target import MultiTargetModelHandler from olive.passes import Pass from olive.passes.openvino.ov_utils import create_genai_config from olive.passes.pass_config import BasePassConfig, PassConfigParam @@ -21,7 +23,11 @@ class OpenVINOEncapsulation(Pass): - """Encapsulates OpenVINO models with onnx context nodes.""" + """Encapsulates OpenVINO models with onnx context nodes. + + When ov_version is a list of strings, generates encapsulated models for each version + and returns a MultiTargetModelHandler. + """ openvino_to_onnx_dtype: ClassVar[dict] = { "f32": TensorProto.FLOAT, @@ -62,12 +68,14 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassCon description=("Device the encapsulated model should run on. Available devices are cpu, gpu, npu."), ), "ov_version": PassConfigParam( - type_=str, + type_=Union[str, list], default_value=None, required=False, description=( - "Name of the OpenVINO version to override in model SDK version." - "Requires a minimum version of OpenVINO 2025.1" + "OpenVINO version to override in model SDK version. Can be a single string or a list" + " of strings for multi-target generation. When a list is provided, encapsulated models" + " are generated for each version and returned as a MultiTargetModelHandler." + " Requires a minimum version of OpenVINO 2025.1" ), ), "opset_imports": PassConfigParam( @@ -114,7 +122,59 @@ def _run_for_config( model: Union[OpenVINOModelHandler], config: type[BasePassConfig], output_model_path: str, + ) -> Union[ONNXModelHandler, MultiTargetModelHandler]: + # Multi-target mode: ov_version is a list of strings + if isinstance(config.ov_version, list): + return self._run_multi_target(model, config, output_model_path) + + # Single-target mode: existing behavior + return self._run_single_target(model, config, output_model_path) + + def _run_multi_target( + self, + model: Union[OpenVINOModelHandler], + config: type[BasePassConfig], + output_model_path: str, + ) -> MultiTargetModelHandler: + """Generate encapsulated models for multiple OpenVINO versions. + + Each entry in config.ov_version is a separate version string. + The result is a MultiTargetModelHandler wrapping per-version outputs. + """ + ov_version_list = config.ov_version + assert all(isinstance(v, str) for v in ov_version_list), "Each entry in ov_version list must be a string" + + output_dir = Path(output_model_path).with_suffix("") + output_dir.mkdir(parents=True, exist_ok=True) + + targets = [] + target_names = [] + for ov_ver in ov_version_list: + target_name = f"ov_{ov_ver}" + target_output_path = str(output_dir / target_name) + + single_config = deepcopy(config) + object.__setattr__(single_config, "ov_version", ov_ver) + + result = self._run_single_target(model, single_config, target_output_path) + + targets.append(result) + target_names.append(target_name) + + return MultiTargetModelHandler( + targets, + target_names, + model_path=output_dir, + model_attributes=model.model_attributes, + ) + + def _run_single_target( + self, + model: Union[OpenVINOModelHandler], + config: type[BasePassConfig], + output_model_path: str, ) -> ONNXModelHandler: + """Encapsulate a single OpenVINO model. This is the original logic.""" try: import openvino as ov except ImportError: @@ -245,7 +305,15 @@ def _run_for_config( # generate the genai_config.json file for GenAI models create_genai_config(context_model_output, output_model_path, config) - return ONNXModelHandler(model_path=output_model_path) + # Populate model_attributes with context binary metadata so it persists in model_config.json + context_binary_attrs = { + **(model.model_attributes or {}), + "ep": "OpenVINOExecutionProvider", + "device": str(config.target_device).upper(), + "sdk_version": ov_version, + } + + return ONNXModelHandler(model_path=output_model_path, model_attributes=context_binary_attrs) def extract_shape_list(shape, config, prefix: str = "input_0_") -> list: diff --git a/olive/passes/openvino/optimum_intel.py b/olive/passes/openvino/optimum_intel.py index 2105f512f2..e051ec05ef 100644 --- a/olive/passes/openvino/optimum_intel.py +++ b/olive/passes/openvino/optimum_intel.py @@ -3,6 +3,7 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- import logging +import os from copy import deepcopy from pathlib import Path from typing import Any, Optional, Union @@ -497,6 +498,16 @@ def _run_for_config( extra_args.pop("disable_convert_tokenizer", False) extra_args["library_name"] = lib_name extra_args.pop("library", None) + + # Workaround for optimum-intel using Path.rename() which fails across filesystems. + # Set tempdir to output path so temp files are on the same filesystem as the cache. + import tempfile + + original_tmpdir = os.environ.get("TMPDIR") + original_tempdir = tempfile.tempdir + os.environ["TMPDIR"] = output_model_path + tempfile.tempdir = output_model_path + export_optimum_intel( model.model_name_or_path, output_model_path, @@ -516,7 +527,13 @@ def _run_for_config( model_kwargs=model.load_kwargs.__dict__ if model.load_kwargs else None, ) except Exception as e: - raise RuntimeError(f"OpenVINO optimum export failed: {e}") from None + raise RuntimeError(f"OpenVINO optimum export failed: {e}") from e + finally: + tempfile.tempdir = original_tempdir + if original_tmpdir is None: + os.environ.pop("TMPDIR", None) + else: + os.environ["TMPDIR"] = original_tmpdir # check the exported components exported_models = [name.stem for name in Path(output_model_path).iterdir() if name.suffix == ".xml"] diff --git a/olive/systems/system_config.py b/olive/systems/system_config.py index dab5da3503..5addeadc61 100644 --- a/olive/systems/system_config.py +++ b/olive/systems/system_config.py @@ -6,7 +6,7 @@ from pathlib import Path from typing import Optional, Union -from pydantic import ConfigDict, Field, field_validator +from pydantic import ConfigDict, Field, SerializeAsAny, field_validator from olive.common.config_utils import ConfigBase, NestedConfig, validate_config from olive.systems.common import AcceleratorConfig, SystemType @@ -88,7 +88,7 @@ def import_system_from_type(system_type: SystemType): class SystemConfig(NestedConfig): type: SystemType - config: Optional[TargetUserConfig] = Field(default=None, validate_default=True) + config: Optional[SerializeAsAny[TargetUserConfig]] = Field(default=None, validate_default=True) @field_validator("config", mode="before") @classmethod diff --git a/test/cli/test_model_package.py b/test/cli/test_model_package.py new file mode 100644 index 0000000000..6e5d9c5b31 --- /dev/null +++ b/test/cli/test_model_package.py @@ -0,0 +1,275 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import json + +import pytest + +from olive.cli.model_package import ModelPackageCommand + + +def _create_source_dir(tmp_path, name, model_attributes, model_type="ONNXModel"): + source_dir = tmp_path / name + source_dir.mkdir(parents=True) + + model_config = { + "type": model_type, + "config": { + "model_path": str(source_dir), + "model_attributes": model_attributes, + }, + } + with open(source_dir / "model_config.json", "w") as f: + json.dump(model_config, f) + + # Create a dummy model file + (source_dir / "model_ctx.onnx").write_text("dummy") + (source_dir / "model_ctx_QnnHtp_ctx.bin").write_text("dummy") + + return source_dir + + +class TestModelPackageCommand: + def _run_command(self, args): + from argparse import ArgumentParser + + parser = ArgumentParser() + commands_parser = parser.add_subparsers() + ModelPackageCommand.register_subcommand(commands_parser) + parsed_args, unknown = parser.parse_known_args(args) + cmd = parsed_args.func(parser, parsed_args, unknown) + cmd.run() + + def test_merge_two_targets(self, tmp_path): + """Test merging two context binary outputs.""" + soc_60 = _create_source_dir( + tmp_path, + "soc_60", + { + "ep": "QNNExecutionProvider", + "device": "NPU", + "architecture": "60", + "precision": "int4", + }, + ) + soc_73 = _create_source_dir( + tmp_path, + "soc_73", + { + "ep": "QNNExecutionProvider", + "device": "NPU", + "architecture": "73", + "precision": "int4", + }, + ) + + output_dir = tmp_path / "output" + self._run_command( + [ + "model-package", + "--source", + f"soc_60={soc_60}", + "--source", + f"soc_73={soc_73}", + "-o", + str(output_dir), + ] + ) + + # Check manifest.json + manifest_path = output_dir / "manifest.json" + assert manifest_path.exists() + + with open(manifest_path) as f: + manifest = json.load(f) + + assert len(manifest["components"]) == 2 + assert manifest["name"] == "output" + assert manifest["components"][0]["variant_name"] == "soc_60" + assert manifest["components"][0]["file"] == "soc_60/" + assert manifest["components"][0]["constraints"]["ep"] == "QNNExecutionProvider" + assert manifest["components"][0]["constraints"]["device"] == "NPU" + assert manifest["components"][0]["constraints"]["architecture"] == "60" + assert manifest["components"][0]["constraints"]["precision"] == "int4" + assert manifest["components"][1]["variant_name"] == "soc_73" + assert manifest["components"][1]["file"] == "soc_73/" + assert manifest["components"][1]["constraints"]["architecture"] == "73" + + # Check files were copied + assert (output_dir / "soc_60" / "model_ctx.onnx").exists() + assert (output_dir / "soc_73" / "model_ctx.onnx").exists() + + def test_merge_infer_name_from_dir(self, tmp_path): + """Test that target name is inferred from directory name when not specified.""" + soc_60 = _create_source_dir( + tmp_path, + "soc_60", + {"ep": "QNNExecutionProvider", "device": "NPU"}, + ) + soc_73 = _create_source_dir( + tmp_path, + "soc_73", + {"ep": "QNNExecutionProvider", "device": "NPU"}, + ) + + output_dir = tmp_path / "output" + self._run_command( + [ + "model-package", + "--source", + str(soc_60), + "--source", + str(soc_73), + "-o", + str(output_dir), + ] + ) + + with open(output_dir / "manifest.json") as f: + manifest = json.load(f) + + assert manifest["components"][0]["file"] == "soc_60/" + assert manifest["components"][1]["file"] == "soc_73/" + + def test_merge_openvino_targets(self, tmp_path): + """Test merging OpenVINO context binary outputs.""" + ov_2025_1 = _create_source_dir( + tmp_path, + "ov_2025.1", + { + "ep": "OpenVINOExecutionProvider", + "device": "NPU", + "sdk_version": "2025.1", + "architecture": "NPU", + }, + ) + ov_2025_2 = _create_source_dir( + tmp_path, + "ov_2025.2", + { + "ep": "OpenVINOExecutionProvider", + "device": "NPU", + "sdk_version": "2025.2", + "architecture": "NPU", + }, + ) + + output_dir = tmp_path / "output" + self._run_command( + [ + "model-package", + "--source", + f"ov_2025.1={ov_2025_1}", + "--source", + f"ov_2025.2={ov_2025_2}", + "-o", + str(output_dir), + ] + ) + + with open(output_dir / "manifest.json") as f: + manifest = json.load(f) + + assert len(manifest["components"]) == 2 + assert manifest["components"][0]["constraints"]["ep"] == "OpenVINOExecutionProvider" + assert manifest["components"][0]["constraints"]["sdk_version"] == "2025.1" + assert manifest["components"][1]["constraints"]["sdk_version"] == "2025.2" + + def test_merge_rejects_single_source(self, tmp_path): + """Test that merging with a single source raises an error.""" + soc_60 = _create_source_dir( + tmp_path, + "soc_60", + {"ep": "QNNExecutionProvider"}, + ) + + with pytest.raises(ValueError, match="At least two"): + self._run_command( + [ + "model-package", + "--source", + str(soc_60), + "-o", + str(tmp_path / "output"), + ] + ) + + def test_merge_rejects_missing_model_config(self, tmp_path): + """Test that merging rejects a directory without model_config.json.""" + source_dir = tmp_path / "no_config" + source_dir.mkdir() + + another = _create_source_dir( + tmp_path, + "valid", + {"ep": "QNNExecutionProvider"}, + ) + + with pytest.raises(ValueError, match="model_config.json"): + self._run_command( + [ + "model-package", + "--source", + str(source_dir), + "--source", + str(another), + "-o", + str(tmp_path / "output"), + ] + ) + + def test_merge_rejects_nonexistent_path(self, tmp_path): + """Test that merging rejects a nonexistent path.""" + valid = _create_source_dir( + tmp_path, + "valid", + {"ep": "QNNExecutionProvider"}, + ) + + with pytest.raises(ValueError, match="does not exist"): + self._run_command( + [ + "model-package", + "--source", + "/nonexistent/path", + "--source", + str(valid), + "-o", + str(tmp_path / "output"), + ] + ) + + def test_merge_optional_fields_omitted(self, tmp_path): + """Test that optional fields are omitted from manifest when not in model_attributes.""" + soc_60 = _create_source_dir( + tmp_path, + "soc_60", + {"ep": "QNNExecutionProvider", "device": "NPU"}, + ) + soc_73 = _create_source_dir( + tmp_path, + "soc_73", + {"ep": "QNNExecutionProvider", "device": "NPU"}, + ) + + output_dir = tmp_path / "output" + self._run_command( + [ + "model-package", + "--source", + str(soc_60), + "--source", + str(soc_73), + "-o", + str(output_dir), + ] + ) + + with open(output_dir / "manifest.json") as f: + manifest = json.load(f) + + # precision, sdk_version, architecture should not be present + assert "precision" not in manifest["components"][0]["constraints"] + assert "sdk_version" not in manifest["components"][0]["constraints"] + assert "architecture" not in manifest["components"][0]["constraints"] diff --git a/test/model/test_multi_target_model.py b/test/model/test_multi_target_model.py new file mode 100644 index 0000000000..ebfdfe3cd5 --- /dev/null +++ b/test/model/test_multi_target_model.py @@ -0,0 +1,76 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import pytest + +from olive.model import ONNXModelHandler +from olive.model.handler.multi_target import MultiTargetModelHandler + + +def _make_onnx_handler(tmp_path, name="model", model_attributes=None): + model_dir = tmp_path / name + model_dir.mkdir(parents=True, exist_ok=True) + model_file = model_dir / f"{name}.onnx" + model_file.write_text("dummy") + return ONNXModelHandler(model_path=str(model_file), model_attributes=model_attributes) + + +class TestMultiTargetModelHandler: + def test_create_multi_target_handler(self, tmp_path): + h1 = _make_onnx_handler(tmp_path, "t1") + h2 = _make_onnx_handler(tmp_path, "t2") + + mt = MultiTargetModelHandler([h1, h2], ["t1", "t2"], model_path=tmp_path) + + assert mt.target_names == ["t1", "t2"] + pairs = list(mt.get_target_models()) + assert len(pairs) == 2 + assert pairs[0][0] == "t1" + assert pairs[1][0] == "t2" + + def test_multi_target_handler_inherits_attributes(self, tmp_path): + """Parent-level model_attributes are merged into each target model.""" + h1 = _make_onnx_handler(tmp_path, "t1", model_attributes={"architecture": "60"}) + h2 = _make_onnx_handler(tmp_path, "t2", model_attributes={"architecture": "73"}) + + mt = MultiTargetModelHandler( + [h1, h2], + ["t1", "t2"], + model_path=tmp_path, + model_attributes={"ep": "QNNExecutionProvider", "device": "NPU"}, + ) + + for _, target in mt.get_target_models(): + # Parent attributes are merged in + assert target.model_attributes["ep"] == "QNNExecutionProvider" + assert target.model_attributes["device"] == "NPU" + + # Target-specific attributes are preserved + pairs = list(mt.get_target_models()) + assert pairs[0][1].model_attributes["architecture"] == "60" + assert pairs[1][1].model_attributes["architecture"] == "73" + + def test_multi_target_handler_to_json(self, tmp_path): + h1 = _make_onnx_handler(tmp_path, "t1", model_attributes={"architecture": "60"}) + h2 = _make_onnx_handler(tmp_path, "t2", model_attributes={"architecture": "73"}) + + mt = MultiTargetModelHandler( + [h1, h2], + ["t1", "t2"], + model_path=tmp_path, + model_attributes={"ep": "QNNExecutionProvider"}, + ) + + json_dict = mt.to_json() + + assert json_dict["type"].lower() == "multitargetmodel" + assert json_dict["config"]["target_names"] == ["t1", "t2"] + assert len(json_dict["config"]["target_models"]) == 2 + # Parent-level "ep" is in the parent config, not duplicated in targets + assert json_dict["config"]["model_attributes"]["ep"] == "QNNExecutionProvider" + + def test_multi_target_handler_mismatched_names_raises(self, tmp_path): + h1 = _make_onnx_handler(tmp_path, "t1") + with pytest.raises(AssertionError, match="Number of target models and names must match"): + MultiTargetModelHandler([h1], ["t1", "t2"], model_path=tmp_path) diff --git a/test/passes/onnx/test_context_binary.py b/test/passes/onnx/test_context_binary.py index deee87c550..d760296459 100644 --- a/test/passes/onnx/test_context_binary.py +++ b/test/passes/onnx/test_context_binary.py @@ -9,6 +9,7 @@ from olive.hardware.accelerator import AcceleratorSpec from olive.model import CompositeModelHandler, ONNXModelHandler +from olive.model.handler.multi_target import MultiTargetModelHandler from olive.passes.olive_pass import create_pass_from_dict from olive.passes.onnx.common import resave_model from olive.passes.onnx.context_binary import EPContextBinaryGenerator @@ -132,3 +133,102 @@ def test_ep_context_binary_generator_composite(tmp_path, is_llm): assert expected_model_path.exists() if not is_skipped: assert len(list(output_model_path.glob(f"{name}_ctx*.bin"))) == 1 + + +# =========================================================================== +# Multi-target tests +# =========================================================================== + + +def _mock_get_available_providers(): + return ["QNNExecutionProvider", "CPUExecutionProvider"] + + +def test_multi_target_returns_multi_target_handler(tmp_path): + """When provider_options is a list, result should be MultiTargetModelHandler.""" + from pathlib import Path + from unittest.mock import patch + + accelerator_spec = AcceleratorSpec(accelerator_type="NPU", execution_provider="QNNExecutionProvider") + + p = create_pass_from_dict( + EPContextBinaryGenerator, + { + "provider_options": [ + {"soc_model": "60", "htp_performance_mode": "burst"}, + {"soc_model": "73", "htp_performance_mode": "burst"}, + ], + }, + disable_search=True, + accelerator_spec=accelerator_spec, + ) + + with ( + patch.object(EPContextBinaryGenerator, "_run_single_target") as mock_single, + patch("onnxruntime.get_available_providers", _mock_get_available_providers), + ): + + def side_effect(model, config, output_model_path): + out_dir = Path(output_model_path) + out_dir.mkdir(parents=True, exist_ok=True) + model_file = out_dir / "model_ctx.onnx" + model_file.write_text("dummy") + return ONNXModelHandler(model_path=str(model_file)) + + mock_single.side_effect = side_effect + + input_model = get_onnx_model() + output_path = str(tmp_path / "output.onnx") + result = p.run(input_model, output_path) + + assert isinstance(result, MultiTargetModelHandler) + assert result.target_names == ["soc_60", "soc_73"] + assert mock_single.call_count == 2 + + for _, target in result.get_target_models(): + assert target.model_attributes["ep"] == "QNNExecutionProvider" + assert target.model_attributes["device"] == "NPU" + assert "provider_options" in target.model_attributes + + +def test_single_target_populates_model_attributes(tmp_path): + """Single-target mode should also populate model_attributes.""" + from pathlib import Path + from unittest.mock import patch + + accelerator_spec = AcceleratorSpec(accelerator_type="NPU", execution_provider="QNNExecutionProvider") + + p = create_pass_from_dict( + EPContextBinaryGenerator, + { + "provider_options": { + "soc_model": "60", + "htp_performance_mode": "burst", + }, + }, + disable_search=True, + accelerator_spec=accelerator_spec, + ) + + with ( + patch.object(EPContextBinaryGenerator, "_run_single_target") as mock_single, + patch("onnxruntime.get_available_providers", _mock_get_available_providers), + ): + + def side_effect(model, config, output_model_path): + out_path = Path(output_model_path) + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.write_text("dummy") + return ONNXModelHandler(model_path=str(out_path)) + + mock_single.side_effect = side_effect + + input_model = get_onnx_model() + output_path = str(tmp_path / "output.onnx") + result = p.run(input_model, output_path) + + assert isinstance(result, ONNXModelHandler) + assert result.model_attributes["ep"] == "QNNExecutionProvider" + assert result.model_attributes["device"] == "NPU" + assert result.model_attributes["architecture"] == "60" + assert result.model_attributes["provider_options"]["soc_model"] == "60" diff --git a/test/passes/onnx/test_multi_target_context_binary.py b/test/passes/onnx/test_multi_target_context_binary.py new file mode 100644 index 0000000000..9fd3f6ef4e --- /dev/null +++ b/test/passes/onnx/test_multi_target_context_binary.py @@ -0,0 +1,407 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import json +from pathlib import Path +from unittest.mock import patch + +import pytest + +from olive.hardware.accelerator import AcceleratorSpec +from olive.model import ONNXModelHandler +from olive.model.handler.multi_target import MultiTargetModelHandler +from olive.passes.olive_pass import create_pass_from_dict +from olive.passes.onnx.ep_context_packager import EPContextBinaryPackager + + +def _make_onnx_handler(tmp_path, name="model", model_attributes=None): + model_dir = tmp_path / name + model_dir.mkdir(parents=True, exist_ok=True) + model_file = model_dir / f"{name}.onnx" + model_file.write_text("dummy") + return ONNXModelHandler(model_path=str(model_file), model_attributes=model_attributes) + + +def _make_multi_target(tmp_path, target_configs): + targets = [] + names = [] + for name, attrs in target_configs: + handler = _make_onnx_handler(tmp_path, name=name, model_attributes=attrs) + targets.append(handler) + names.append(name) + return MultiTargetModelHandler(targets, names, model_path=tmp_path, model_attributes={}) + +# =========================================================================== +# EPContextBinaryPackager tests +# =========================================================================== + + +class TestEPContextBinaryPackager: + def _create_packager(self, ep="QNNExecutionProvider", device="NPU", config=None): + accelerator_spec = AcceleratorSpec(accelerator_type=device, execution_provider=ep) + return create_pass_from_dict( + EPContextBinaryPackager, + config or {}, + disable_search=True, + accelerator_spec=accelerator_spec, + ) + + def test_packager_generates_manifest(self, tmp_path): + mt = _make_multi_target( + tmp_path, + [ + ("soc_60", {"architecture": "60", "precision": "int4"}), + ("soc_73", {"architecture": "73", "precision": "int4"}), + ], + ) + + p = self._create_packager() + output_path = str(tmp_path / "output.onnx") + result = p.run(mt, output_path) + + # Result is still a MultiTargetModelHandler + assert isinstance(result, MultiTargetModelHandler) + + # manifest.json exists + manifest_path = tmp_path / "output" / "manifest.json" + assert manifest_path.exists() + + with open(manifest_path) as f: + manifest = json.load(f) + + assert len(manifest["components"]) == 2 + assert manifest["components"][0]["variant_name"] == "soc_60" + assert manifest["components"][0]["constraints"]["architecture"] == "60" + assert manifest["components"][0]["constraints"]["precision"] == "int4" + assert manifest["components"][1]["variant_name"] == "soc_73" + + def test_packager_with_sdk_version(self, tmp_path): + mt = _make_multi_target( + tmp_path, + [ + ("soc_60", {"architecture": "60", "sdk_version": "qnn_2.28"}), + ("soc_73", {"architecture": "73", "sdk_version": "qnn_2.28"}), + ], + ) + + p = self._create_packager() + output_path = str(tmp_path / "output.onnx") + p.run(mt, output_path) + + manifest_path = tmp_path / "output" / "manifest.json" + with open(manifest_path) as f: + manifest = json.load(f) + + assert manifest["components"][0]["constraints"]["sdk_version"] == "qnn_2.28" + + def test_packager_sdk_version_from_config(self, tmp_path): + """sdk_version from pass config is used when model_attributes doesn't have it.""" + mt = _make_multi_target( + tmp_path, + [("soc_60", {"architecture": "60"}), ("soc_73", {"architecture": "73"})], + ) + + p = self._create_packager(config={"sdk_version": "qnn_2.30"}) + output_path = str(tmp_path / "output.onnx") + p.run(mt, output_path) + + manifest_path = tmp_path / "output" / "manifest.json" + with open(manifest_path) as f: + manifest = json.load(f) + + assert manifest["components"][0]["constraints"]["sdk_version"] == "qnn_2.30" + + def test_packager_compile_options(self, tmp_path): + mt = _make_multi_target( + tmp_path, + [("soc_60", {"architecture": "60"}), ("soc_73", {"architecture": "73"})], + ) + + p = self._create_packager(config={"compile_options": {"dynamic_shape": True}}) + output_path = str(tmp_path / "output.onnx") + p.run(mt, output_path) + + manifest_path = tmp_path / "output" / "manifest.json" + with open(manifest_path) as f: + manifest = json.load(f) + + assert manifest["components"][0]["constraints"]["compile_options"] == {"dynamic_shape": True} + + def test_packager_custom_model_name(self, tmp_path): + mt = _make_multi_target( + tmp_path, + [("soc_60", {}), ("soc_73", {})], + ) + + p = self._create_packager(config={"model_name": "my_model"}) + output_path = str(tmp_path / "output.onnx") + p.run(mt, output_path) + + manifest_path = tmp_path / "output" / "manifest.json" + with open(manifest_path) as f: + manifest = json.load(f) + + assert manifest["name"] == "my_model" + + def test_packager_rejects_non_multi_target(self, tmp_path): + handler = _make_onnx_handler(tmp_path, "single") + p = self._create_packager() + output_path = str(tmp_path / "output.onnx") + with pytest.raises(AssertionError, match="requires a MultiTargetModelHandler"): + p.run(handler, output_path) + + def test_packager_copies_files(self, tmp_path): + mt = _make_multi_target( + tmp_path, + [("soc_60", {"architecture": "60"}), ("soc_73", {"architecture": "73"})], + ) + + p = self._create_packager() + output_path = str(tmp_path / "output.onnx") + p.run(mt, output_path) + + # Check files were copied + assert (tmp_path / "output" / "soc_60").is_dir() + assert (tmp_path / "output" / "soc_73").is_dir() + + def test_packager_default_model_name_from_dir(self, tmp_path): + mt = _make_multi_target( + tmp_path, + [("t1", {"architecture": "a"}), ("t2", {"architecture": "b"})], + ) + + p = self._create_packager() + output_path = str(tmp_path / "my_package.onnx") + p.run(mt, output_path) + + with open(tmp_path / "my_package" / "manifest.json") as f: + manifest = json.load(f) + + assert manifest["name"] == "my_package" + + def test_packager_device_fallback_from_accelerator(self, tmp_path): + mt = _make_multi_target( + tmp_path, + [("t1", {"architecture": "a"}), ("t2", {"architecture": "b"})], + ) + + p = self._create_packager(device="NPU") + output_path = str(tmp_path / "output.onnx") + p.run(mt, output_path) + + with open(tmp_path / "output" / "manifest.json") as f: + manifest = json.load(f) + + assert manifest["components"][0]["constraints"]["device"] == "NPU" + + def test_packager_device_from_target_device_attr(self, tmp_path): + mt = _make_multi_target( + tmp_path, + [("t1", {"architecture": "a", "target_device": "GPU"}), ("t2", {"architecture": "b"})], + ) + + p = self._create_packager(device="NPU") + output_path = str(tmp_path / "output.onnx") + p.run(mt, output_path) + + with open(tmp_path / "output" / "manifest.json") as f: + manifest = json.load(f) + + assert manifest["components"][0]["constraints"]["device"] == "GPU" + assert manifest["components"][1]["constraints"]["device"] == "NPU" + + def test_packager_architecture_fallback_to_target_name(self, tmp_path): + mt = _make_multi_target( + tmp_path, + [("soc_60", {}), ("soc_73", {})], + ) + + p = self._create_packager() + output_path = str(tmp_path / "output.onnx") + p.run(mt, output_path) + + with open(tmp_path / "output" / "manifest.json") as f: + manifest = json.load(f) + + assert manifest["components"][0]["constraints"]["architecture"] == "soc_60" + assert manifest["components"][1]["constraints"]["architecture"] == "soc_73" + + def test_packager_precision_omitted_when_absent(self, tmp_path): + mt = _make_multi_target( + tmp_path, + [("t1", {"architecture": "a"}), ("t2", {"architecture": "b"})], + ) + + p = self._create_packager() + output_path = str(tmp_path / "output.onnx") + p.run(mt, output_path) + + with open(tmp_path / "output" / "manifest.json") as f: + manifest = json.load(f) + + assert "precision" not in manifest["components"][0]["constraints"] + assert "precision" not in manifest["components"][1]["constraints"] + + def test_packager_manifest_path_in_result_attributes(self, tmp_path): + mt = _make_multi_target( + tmp_path, + [("t1", {"architecture": "a"}), ("t2", {"architecture": "b"})], + ) + + p = self._create_packager() + output_path = str(tmp_path / "output.onnx") + result = p.run(mt, output_path) + + assert "manifest_path" in result.model_attributes + assert Path(result.model_attributes["manifest_path"]).name == "manifest.json" + + def test_packager_copy_skips_existing_dest(self, tmp_path): + mt = _make_multi_target( + tmp_path, + [("t1", {"architecture": "a"}), ("t2", {"architecture": "b"})], + ) + + p = self._create_packager() + output_path = str(tmp_path / "output.onnx") + output_dir = tmp_path / "output" + output_dir.mkdir(parents=True) + + # Pre-create dest with a marker file + (output_dir / "t1").mkdir() + (output_dir / "t1" / "marker.txt").write_text("pre-existing") + + p.run(mt, output_path) + + # marker.txt should still be there (not overwritten by copytree) + assert (output_dir / "t1" / "marker.txt").read_text() == "pre-existing" + + def test_packager_with_composite_model_handler(self, tmp_path): + from olive.model import CompositeModelHandler + + # Create composite model targets + comp_dir_1 = tmp_path / "comp1" + comp_dir_1.mkdir() + (comp_dir_1 / "model.onnx").write_text("dummy") + + comp_dir_2 = tmp_path / "comp2" + comp_dir_2.mkdir() + (comp_dir_2 / "model.onnx").write_text("dummy") + + sub1 = ONNXModelHandler(model_path=str(comp_dir_1 / "model.onnx")) + sub2 = ONNXModelHandler(model_path=str(comp_dir_2 / "model.onnx")) + + comp1 = CompositeModelHandler( + model_components=[sub1], + model_component_names=["part1"], + model_path=str(comp_dir_1), + model_attributes={"architecture": "60"}, + ) + comp2 = CompositeModelHandler( + model_components=[sub2], + model_component_names=["part1"], + model_path=str(comp_dir_2), + model_attributes={"architecture": "73"}, + ) + + mt = MultiTargetModelHandler([comp1, comp2], ["soc_60", "soc_73"], model_path=tmp_path) + + p = self._create_packager() + output_path = str(tmp_path / "output.onnx") + result = p.run(mt, output_path) + + with open(tmp_path / "output" / "manifest.json") as f: + manifest = json.load(f) + + # CompositeModelHandler should use directory path (target_name/) + assert manifest["components"][0]["file"] == "soc_60/" + assert manifest["components"][1]["file"] == "soc_73/" + + # Files should be copied + assert (tmp_path / "output" / "soc_60" / "model.onnx").exists() + assert (tmp_path / "output" / "soc_73" / "model.onnx").exists() + + assert isinstance(result, MultiTargetModelHandler) + + def test_packager_onnx_model_uses_filename_in_file_field(self, tmp_path): + mt = _make_multi_target( + tmp_path, + [("soc_60", {"architecture": "60"})], + ) + # Add a second target to satisfy multi-target requirement + h2 = _make_onnx_handler(tmp_path, name="soc_73", model_attributes={"architecture": "73"}) + mt = MultiTargetModelHandler( + [next(t for _, t in mt.get_target_models()), h2], + ["soc_60", "soc_73"], + model_path=tmp_path, + ) + + p = self._create_packager() + output_path = str(tmp_path / "output.onnx") + p.run(mt, output_path) + + with open(tmp_path / "output" / "manifest.json") as f: + manifest = json.load(f) + + # ONNXModelHandler should include the filename + assert manifest["components"][0]["file"] == "soc_60/soc_60.onnx" + assert manifest["components"][1]["file"] == "soc_73/soc_73.onnx" + + def test_packager_sdk_version_attr_takes_precedence_over_config(self, tmp_path): + mt = _make_multi_target( + tmp_path, + [ + ("t1", {"architecture": "a", "sdk_version": "from_attrs"}), + ("t2", {"architecture": "b"}), + ], + ) + + p = self._create_packager(config={"sdk_version": "from_config"}) + output_path = str(tmp_path / "output.onnx") + p.run(mt, output_path) + + with open(tmp_path / "output" / "manifest.json") as f: + manifest = json.load(f) + + # t1 has sdk_version in attrs → use that + assert manifest["components"][0]["constraints"]["sdk_version"] == "from_attrs" + # t2 has no sdk_version in attrs → fall back to config + assert manifest["components"][1]["constraints"]["sdk_version"] == "from_config" + + +# =========================================================================== +# Pass.run() multi-target auto-dispatch tests +# =========================================================================== + + +class TestPassRunMultiTarget: + def test_pass_run_iterates_targets(self, tmp_path): + """A pass that does NOT accept multi-target should iterate over each target independently.""" + from olive.passes.onnx.float16_conversion import OnnxFloatToFloat16 + + h1 = _make_onnx_handler(tmp_path, "t1", model_attributes={"architecture": "60"}) + h2 = _make_onnx_handler(tmp_path, "t2", model_attributes={"architecture": "73"}) + mt = MultiTargetModelHandler([h1, h2], ["t1", "t2"], model_path=tmp_path) + + accelerator_spec = AcceleratorSpec(accelerator_type="NPU", execution_provider="QNNExecutionProvider") + + # Mock _run_for_config to just return a new handler (avoid real ONNX ops) + with patch.object(OnnxFloatToFloat16, "_run_for_config") as mock_run: + + def side_effect(model, config, output_model_path): + out_file = Path(output_model_path) + out_file.parent.mkdir(parents=True, exist_ok=True) + out_file.write_text("dummy") + return ONNXModelHandler(model_path=str(out_file), model_attributes=model.model_attributes) + + mock_run.side_effect = side_effect + + p = create_pass_from_dict(OnnxFloatToFloat16, {}, disable_search=True, accelerator_spec=accelerator_spec) + output_path = str(tmp_path / "output.onnx") + result = p.run(mt, output_path) + + # Result should still be MultiTargetModelHandler + assert isinstance(result, MultiTargetModelHandler) + assert result.target_names == ["t1", "t2"] + # _run_for_config was called twice, once per target + assert mock_run.call_count == 2 diff --git a/test/passes/openvino/test_openvino_encapsulation.py b/test/passes/openvino/test_openvino_encapsulation.py index bfbc15a260..a01da68472 100644 --- a/test/passes/openvino/test_openvino_encapsulation.py +++ b/test/passes/openvino/test_openvino_encapsulation.py @@ -3,9 +3,13 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- from pathlib import Path +from unittest.mock import MagicMock, patch import pytest +from olive.hardware.accelerator import AcceleratorSpec, Device +from olive.model import ONNXModelHandler +from olive.model.handler.multi_target import MultiTargetModelHandler from olive.passes.olive_pass import create_pass_from_dict from olive.passes.openvino.conversion import OpenVINOConversion from olive.passes.openvino.encapsulation import OpenVINOEncapsulation @@ -101,3 +105,87 @@ def test_openvino_encapsulate_pass_dynamic_keep_ov_dynamic_dims(tmp_path): # assert assert Path(onnx_model.model_path).exists() assert (Path(onnx_model.model_path)).is_file() + + +# =========================================================================== +# Multi-target tests +# =========================================================================== + + +def test_multi_target_returns_multi_target_handler(tmp_path): + accelerator_spec = AcceleratorSpec(accelerator_type=Device.NPU, execution_provider="OpenVINOExecutionProvider") + + p = create_pass_from_dict( + OpenVINOEncapsulation, + {"ov_version": ["2025.1", "2025.2"], "target_device": "npu"}, + disable_search=True, + accelerator_spec=accelerator_spec, + ) + + with patch.object(OpenVINOEncapsulation, "_run_single_target") as mock_single: + + def side_effect(model, config, output_model_path): + out_dir = Path(output_model_path) + out_dir.mkdir(parents=True, exist_ok=True) + model_file = out_dir / "model.onnx" + model_file.write_text("dummy") + return ONNXModelHandler( + model_path=str(model_file), + model_attributes={ + "ep": "OpenVINOExecutionProvider", + "device": "NPU", + "sdk_version": config.ov_version, + "architecture": "NPU", + }, + ) + + mock_single.side_effect = side_effect + + input_model = MagicMock() + input_model.model_attributes = {} + output_path = str(tmp_path / "output.onnx") + result = p.run(input_model, output_path) + + assert isinstance(result, MultiTargetModelHandler) + assert result.target_names == ["ov_2025.1", "ov_2025.2"] + assert mock_single.call_count == 2 + + +def test_single_target_populates_model_attributes(tmp_path): + accelerator_spec = AcceleratorSpec(accelerator_type=Device.NPU, execution_provider="OpenVINOExecutionProvider") + + p = create_pass_from_dict( + OpenVINOEncapsulation, + {"ov_version": "2025.1", "target_device": "npu"}, + disable_search=True, + accelerator_spec=accelerator_spec, + ) + + with patch.object(OpenVINOEncapsulation, "_run_single_target") as mock_single: + + def side_effect(model, config, output_model_path): + out_dir = Path(output_model_path) + out_dir.parent.mkdir(parents=True, exist_ok=True) + out_dir.mkdir(parents=True, exist_ok=True) + model_file = out_dir / "model.onnx" + model_file.write_text("dummy") + return ONNXModelHandler( + model_path=str(model_file), + model_attributes={ + "ep": "OpenVINOExecutionProvider", + "device": "NPU", + "sdk_version": "2025.1", + "architecture": "NPU", + }, + ) + + mock_single.side_effect = side_effect + + input_model = MagicMock() + input_model.model_attributes = {} + output_path = str(tmp_path / "output.onnx") + result = p.run(input_model, output_path) + + assert isinstance(result, ONNXModelHandler) + assert result.model_attributes["ep"] == "OpenVINOExecutionProvider" + assert result.model_attributes["sdk_version"] == "2025.1" From 6140481325901abf1837c0b5fd7f668953225735 Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Fri, 27 Mar 2026 03:57:08 +0000 Subject: [PATCH 2/5] rename --- olive/olive_config.json | 4 ++-- .../onnx/{ep_context_packager.py => model_packager.py} | 9 ++------- test/passes/onnx/test_multi_target_context_binary.py | 9 +++++---- 3 files changed, 9 insertions(+), 13 deletions(-) rename olive/passes/onnx/{ep_context_packager.py => model_packager.py} (93%) diff --git a/olive/olive_config.json b/olive/olive_config.json index 73b05b9bd8..0fabe495e4 100644 --- a/olive/olive_config.json +++ b/olive/olive_config.json @@ -78,8 +78,8 @@ "supported_quantization_encodings": [ ], "run_on_target": true }, - "EPContextBinaryPackager": { - "module_path": "olive.passes.onnx.ep_context_packager.EPContextBinaryPackager", + "ModelPackager": { + "module_path": "olive.passes.onnx.model_packager.ModelPackager", "supported_providers": [ "QNNExecutionProvider", "OpenVINOExecutionProvider" ], "supported_accelerators": [ "npu", "gpu", "cpu" ], "supported_precisions": [ "*" ], diff --git a/olive/passes/onnx/ep_context_packager.py b/olive/passes/onnx/model_packager.py similarity index 93% rename from olive/passes/onnx/ep_context_packager.py rename to olive/passes/onnx/model_packager.py index 27114f84be..fa74258a0f 100644 --- a/olive/passes/onnx/ep_context_packager.py +++ b/olive/passes/onnx/model_packager.py @@ -17,7 +17,7 @@ logger = logging.getLogger(__name__) -class EPContextBinaryPackager(Pass): +class ModelPackager(Pass): """Generate a manifest.json metadata file for multi-target EP context binaries. This pass takes a MultiTargetModelHandler (produced by EPContextBinaryGenerator with @@ -66,10 +66,7 @@ def _run_for_config( config: type[BasePassConfig], output_model_path: str, ) -> MultiTargetModelHandler: - assert isinstance(model, MultiTargetModelHandler), ( - "EPContextBinaryPackager requires a MultiTargetModelHandler as input. " - "Use EPContextBinaryGenerator with a list of provider_options to produce one." - ) + assert isinstance(model, MultiTargetModelHandler), "ModelPackager requires a MultiTargetModelHandler as input." output_dir = Path(output_model_path).with_suffix("") output_dir.mkdir(parents=True, exist_ok=True) @@ -138,7 +135,6 @@ def _copy_target_model( target_model: Union[ONNXModelHandler, CompositeModelHandler], output_dir: Path, ) -> None: - """Copy target model files to the output directory under target_name/.""" dest_dir = output_dir / target_name if dest_dir.exists(): return @@ -159,7 +155,6 @@ def _get_relative_model_path( target_name: str, target_model: Union[ONNXModelHandler, CompositeModelHandler], ) -> str: - """Get the model path relative to the target name for the manifest.""" if isinstance(target_model, ONNXModelHandler): return f"{target_name}/{Path(target_model.model_path).name}" # For CompositeModelHandler or other types, use the directory diff --git a/test/passes/onnx/test_multi_target_context_binary.py b/test/passes/onnx/test_multi_target_context_binary.py index 9fd3f6ef4e..e8ba62f379 100644 --- a/test/passes/onnx/test_multi_target_context_binary.py +++ b/test/passes/onnx/test_multi_target_context_binary.py @@ -12,7 +12,7 @@ from olive.model import ONNXModelHandler from olive.model.handler.multi_target import MultiTargetModelHandler from olive.passes.olive_pass import create_pass_from_dict -from olive.passes.onnx.ep_context_packager import EPContextBinaryPackager +from olive.passes.onnx.model_packager import ModelPackager def _make_onnx_handler(tmp_path, name="model", model_attributes=None): @@ -32,16 +32,17 @@ def _make_multi_target(tmp_path, target_configs): names.append(name) return MultiTargetModelHandler(targets, names, model_path=tmp_path, model_attributes={}) + # =========================================================================== -# EPContextBinaryPackager tests +# ModelPackager tests # =========================================================================== -class TestEPContextBinaryPackager: +class TestModelPackager: def _create_packager(self, ep="QNNExecutionProvider", device="NPU", config=None): accelerator_spec = AcceleratorSpec(accelerator_type=device, execution_provider=ep) return create_pass_from_dict( - EPContextBinaryPackager, + ModelPackager, config or {}, disable_search=True, accelerator_spec=accelerator_spec, From 81dd0d23aabc376146b0040a9939e54591a56f34 Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Fri, 27 Mar 2026 04:36:20 +0000 Subject: [PATCH 3/5] address comments --- olive/cache.py | 4 +--- olive/cli/model_package.py | 3 +-- olive/engine/engine.py | 10 +++++----- olive/model/handler/multi_target.py | 3 ++- olive/passes/onnx/model_packager.py | 2 +- olive/passes/openvino/encapsulation.py | 2 +- olive/passes/openvino/optimum_intel.py | 1 + test/cli/test_model_package.py | 16 ++++++++-------- .../onnx/test_multi_target_context_binary.py | 2 +- .../openvino/test_openvino_encapsulation.py | 2 +- 10 files changed, 22 insertions(+), 23 deletions(-) diff --git a/olive/cache.py b/olive/cache.py index 42e94cdae5..5c24aad38f 100644 --- a/olive/cache.py +++ b/olive/cache.py @@ -384,9 +384,7 @@ def save_model( ): """Save a model from the cache to a given path.""" output_dir = Path(output_dir) if output_dir else Path.cwd() - - # Check if output_dir is an existing file; otherwise treat as directory - if output_dir.is_file(): + if output_dir.suffix and not output_dir.is_dir(): actual_output_dir = output_dir.parent else: actual_output_dir = output_dir diff --git a/olive/cli/model_package.py b/olive/cli/model_package.py index bad21ff468..93631b2e7a 100644 --- a/olive/cli/model_package.py +++ b/olive/cli/model_package.py @@ -77,7 +77,7 @@ def run(self): entry = { "variant_name": target_name, - "file": {model_config["model_path"]}, + "file": model_config.get("config", {}).get("model_path", f"{target_name}/"), "constraints": constraints, } @@ -97,7 +97,6 @@ def _parse_sources(self) -> list[tuple[str, Path]]: if not path.is_dir(): raise ValueError(f"Source path does not exist or is not a directory: {path}") - # Validate model_config.json exists if not (path / "model_config.json").exists(): raise ValueError( f"No model_config.json found in {path}. " diff --git a/olive/engine/engine.py b/olive/engine/engine.py index 39805b3b14..5d7cee3f26 100644 --- a/olive/engine/engine.py +++ b/olive/engine/engine.py @@ -195,8 +195,9 @@ def run( self.initialize(log_to_file, log_severity_level) output_dir: Path = (Path(output_dir) if output_dir else Path.cwd()).resolve() - # Check if output_dir is an existing file; otherwise treat as directory - if output_dir.is_file(): + # Treat as file path only if it has a suffix and is not an existing directory + is_file_path = output_dir.suffix and not output_dir.is_dir() + if is_file_path: output_dir.parent.mkdir(parents=True, exist_ok=True) artifacts_dir = output_dir.parent else: @@ -252,9 +253,8 @@ def run_accelerator( self.footprint.record(is_input_model=True, model_id=input_model_id) - # Determine the directory for artifacts - # If output_dir is an existing file, use its parent; otherwise use output_dir itself - artifacts_dir = output_dir.parent if output_dir.is_file() else output_dir + # Artifacts directory: file path (has suffix, not existing dir) uses parent + artifacts_dir = output_dir.parent if (output_dir.suffix and not output_dir.is_dir()) else output_dir try: if evaluate_input_model and not self.evaluator_config: diff --git a/olive/model/handler/multi_target.py b/olive/model/handler/multi_target.py index 7a8be4c5c6..1327bc9724 100644 --- a/olive/model/handler/multi_target.py +++ b/olive/model/handler/multi_target.py @@ -3,6 +3,7 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- import logging +from collections.abc import Iterator from typing import Any, Optional, Union from olive.common.config_utils import serialize_to_json, validate_config @@ -71,7 +72,7 @@ def to_json(self, check_object: bool = False): json_dict["config"]["target_models"].append(target_json) return serialize_to_json(json_dict, check_object) - def get_target_models(self) -> list[tuple[str, OliveModelHandler]]: + def get_target_models(self) -> Iterator[tuple[str, OliveModelHandler]]: """Iterate over (target_name, target_model) pairs.""" return zip(self.target_names, self.target_models) diff --git a/olive/passes/onnx/model_packager.py b/olive/passes/onnx/model_packager.py index fa74258a0f..e33d300a45 100644 --- a/olive/passes/onnx/model_packager.py +++ b/olive/passes/onnx/model_packager.py @@ -90,7 +90,7 @@ def _run_for_config( "file": model_path, "constraints": { "ep": self.accelerator_spec.execution_provider, - "device": target_attrs.get("target_device", str(self.accelerator_spec.accelerator_type).upper()), + "device": target_attrs.get("device", str(self.accelerator_spec.accelerator_type).upper()), "architecture": target_attrs.get("architecture", target_name), }, } diff --git a/olive/passes/openvino/encapsulation.py b/olive/passes/openvino/encapsulation.py index 59f000fa26..6a788a2412 100644 --- a/olive/passes/openvino/encapsulation.py +++ b/olive/passes/openvino/encapsulation.py @@ -150,7 +150,7 @@ def _run_multi_target( targets = [] target_names = [] for ov_ver in ov_version_list: - target_name = f"ov_{ov_ver}" + target_name = f"ov_{ov_ver.replace('.', '_')}" target_output_path = str(output_dir / target_name) single_config = deepcopy(config) diff --git a/olive/passes/openvino/optimum_intel.py b/olive/passes/openvino/optimum_intel.py index e051ec05ef..d898e665eb 100644 --- a/olive/passes/openvino/optimum_intel.py +++ b/olive/passes/openvino/optimum_intel.py @@ -503,6 +503,7 @@ def _run_for_config( # Set tempdir to output path so temp files are on the same filesystem as the cache. import tempfile + Path(output_model_path).mkdir(parents=True, exist_ok=True) original_tmpdir = os.environ.get("TMPDIR") original_tempdir = tempfile.tempdir os.environ["TMPDIR"] = output_model_path diff --git a/test/cli/test_model_package.py b/test/cli/test_model_package.py index 6e5d9c5b31..9d181d9cfb 100644 --- a/test/cli/test_model_package.py +++ b/test/cli/test_model_package.py @@ -69,9 +69,9 @@ def test_merge_two_targets(self, tmp_path): [ "model-package", "--source", - f"soc_60={soc_60}", + str(soc_60), "--source", - f"soc_73={soc_73}", + str(soc_73), "-o", str(output_dir), ] @@ -87,13 +87,13 @@ def test_merge_two_targets(self, tmp_path): assert len(manifest["components"]) == 2 assert manifest["name"] == "output" assert manifest["components"][0]["variant_name"] == "soc_60" - assert manifest["components"][0]["file"] == "soc_60/" + assert manifest["components"][0]["file"] == str(soc_60) assert manifest["components"][0]["constraints"]["ep"] == "QNNExecutionProvider" assert manifest["components"][0]["constraints"]["device"] == "NPU" assert manifest["components"][0]["constraints"]["architecture"] == "60" assert manifest["components"][0]["constraints"]["precision"] == "int4" assert manifest["components"][1]["variant_name"] == "soc_73" - assert manifest["components"][1]["file"] == "soc_73/" + assert manifest["components"][1]["file"] == str(soc_73) assert manifest["components"][1]["constraints"]["architecture"] == "73" # Check files were copied @@ -129,8 +129,8 @@ def test_merge_infer_name_from_dir(self, tmp_path): with open(output_dir / "manifest.json") as f: manifest = json.load(f) - assert manifest["components"][0]["file"] == "soc_60/" - assert manifest["components"][1]["file"] == "soc_73/" + assert manifest["components"][0]["file"] == str(soc_60) + assert manifest["components"][1]["file"] == str(soc_73) def test_merge_openvino_targets(self, tmp_path): """Test merging OpenVINO context binary outputs.""" @@ -160,9 +160,9 @@ def test_merge_openvino_targets(self, tmp_path): [ "model-package", "--source", - f"ov_2025.1={ov_2025_1}", + str(ov_2025_1), "--source", - f"ov_2025.2={ov_2025_2}", + str(ov_2025_2), "-o", str(output_dir), ] diff --git a/test/passes/onnx/test_multi_target_context_binary.py b/test/passes/onnx/test_multi_target_context_binary.py index e8ba62f379..5620835bbb 100644 --- a/test/passes/onnx/test_multi_target_context_binary.py +++ b/test/passes/onnx/test_multi_target_context_binary.py @@ -199,7 +199,7 @@ def test_packager_device_fallback_from_accelerator(self, tmp_path): def test_packager_device_from_target_device_attr(self, tmp_path): mt = _make_multi_target( tmp_path, - [("t1", {"architecture": "a", "target_device": "GPU"}), ("t2", {"architecture": "b"})], + [("t1", {"architecture": "a", "device": "GPU"}), ("t2", {"architecture": "b"})], ) p = self._create_packager(device="NPU") diff --git a/test/passes/openvino/test_openvino_encapsulation.py b/test/passes/openvino/test_openvino_encapsulation.py index a01da68472..4ae9bf7a34 100644 --- a/test/passes/openvino/test_openvino_encapsulation.py +++ b/test/passes/openvino/test_openvino_encapsulation.py @@ -147,7 +147,7 @@ def side_effect(model, config, output_model_path): result = p.run(input_model, output_path) assert isinstance(result, MultiTargetModelHandler) - assert result.target_names == ["ov_2025.1", "ov_2025.2"] + assert result.target_names == ["ov_2025_1", "ov_2025_2"] assert mock_single.call_count == 2 From 98eca70d17ff7915e77ec977967d05d21a3b5a64 Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Fri, 27 Mar 2026 04:53:03 +0000 Subject: [PATCH 4/5] fix format --- test/passes/pytorch/test_selective_mixed_precision.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/passes/pytorch/test_selective_mixed_precision.py b/test/passes/pytorch/test_selective_mixed_precision.py index b30683fc97..db2570a3cf 100644 --- a/test/passes/pytorch/test_selective_mixed_precision.py +++ b/test/passes/pytorch/test_selective_mixed_precision.py @@ -17,7 +17,7 @@ def input_model_fixture(tmp_path_factory): save_path = tmp_path_factory.mktemp("selective-mixed-precision-test") model = LlamaForCausalLM( - LlamaConfig( + LlamaConfig( # pylint: disable=unexpected-keyword-arg hidden_size=16, intermediate_size=64, num_hidden_layers=8, From 9db91b8e26c460626fd32d72fea97b792f37e734 Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Fri, 27 Mar 2026 23:28:55 +0000 Subject: [PATCH 5/5] update manifest schema --- olive/cli/model_package.py | 32 ++- olive/passes/onnx/model_packager.py | 104 +++++----- test/cli/test_model_package.py | 54 ++--- .../onnx/test_multi_target_context_binary.py | 193 ++++++------------ 4 files changed, 163 insertions(+), 220 deletions(-) diff --git a/olive/cli/model_package.py b/olive/cli/model_package.py index 93631b2e7a..f34b74f705 100644 --- a/olive/cli/model_package.py +++ b/olive/cli/model_package.py @@ -59,36 +59,48 @@ def run(self): output_dir.mkdir(parents=True, exist_ok=True) model_name = self.args.model_name or output_dir.name - manifest = {"name": model_name, "components": []} + # Create component model directory + component_dir = output_dir / model_name + component_dir.mkdir(parents=True, exist_ok=True) + + model_variants = {} for target_name, source_path in sources: - # Read model_config.json from source model_config = self._read_model_config(source_path) model_attrs = model_config.get("config", {}).get("model_attributes") or {} - # Copy source directory to output/{target_name}/ - target_dir = output_dir / target_name + # Copy source directory into component_dir/{target_name}/ + target_dir = component_dir / target_name hardlink_copy_dir(source_path, target_dir) constraints = {} - for key in ("ep", "device", "architecture", "precision", "sdk_version"): + for key in ("ep", "device", "architecture", "ep_compatibility_info"): if model_attrs.get(key) is not None: constraints[key] = model_attrs[key] - entry = { - "variant_name": target_name, + model_variants[target_name] = { "file": model_config.get("config", {}).get("model_path", f"{target_name}/"), "constraints": constraints, } - manifest["components"].append(entry) - - # Write manifest.json + # Write metadata.json in component directory + metadata = {"name": model_name, "model_variants": model_variants} + with open(component_dir / "metadata.json", "w") as f: + json.dump(metadata, f, indent=2) + + # Write manifest.json at package root + manifest = { + "name": model_name, + "component_models": { + model_name: {"model_variants": model_variants}, + }, + } manifest_path = output_dir / "manifest.json" with open(manifest_path, "w") as f: json.dump(manifest, f, indent=2) print(f"Merged {len(sources)} targets into {output_dir}") + print(f"Manifest written to {manifest_path}") def _parse_sources(self) -> list[tuple[str, Path]]: sources = [] diff --git a/olive/passes/onnx/model_packager.py b/olive/passes/onnx/model_packager.py index e33d300a45..9911c410f5 100644 --- a/olive/passes/onnx/model_packager.py +++ b/olive/passes/onnx/model_packager.py @@ -18,19 +18,19 @@ class ModelPackager(Pass): - """Generate a manifest.json metadata file for multi-target EP context binaries. + """Generate an ORT model package with manifest.json and per-component metadata.json. This pass takes a MultiTargetModelHandler (produced by EPContextBinaryGenerator with - a list of provider_options) and generates a manifest.json file describing each target's - context binary with metadata required by ONNX Runtime. - - The manifest includes: - - ep: execution provider name - - device_type: CPU, NPU, or GPU - - architecture: hardware architecture (e.g., SoC model) - - precision: model precision (from model_attributes) - - sdk_version: optional SDK version - - compile_options: optional compilation options + a list of provider_options) and generates a model package following the ORT spec: + + - manifest.json at package root with component_models and model_variants + - metadata.json per component model directory with variant descriptors + + Variant constraints include: + - ep (required): execution provider name + - device (optional): target device type (cpu, gpu, npu) + - architecture (optional): hardware architecture hint + - ep_compatibility_info (optional): EP-specific compatibility string """ _accepts_composite_model = True @@ -44,16 +44,6 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassCon default_value=None, description="Model name for the manifest. If not set, derived from the output directory name.", ), - "sdk_version": PassConfigParam( - type_=str, - default_value=None, - description="SDK version string (e.g., 'qnn_sdk_2.28').", - ), - "compile_options": PassConfigParam( - type_=dict, - default_value=None, - description="Additional compile options to include in the manifest (e.g., dynamic shape, batch size).", - ), } @staticmethod @@ -71,57 +61,57 @@ def _run_for_config( output_dir = Path(output_model_path).with_suffix("") output_dir.mkdir(parents=True, exist_ok=True) - # Derive model name from config or output directory model_name = config.model_name or output_dir.name - manifest = {"name": model_name, "components": []} + # Build model_variants dict and copy files into component directory + component_dir = output_dir / model_name + component_dir.mkdir(parents=True, exist_ok=True) + model_variants = {} for target_name, target_model in model.get_target_models(): target_attrs = target_model.model_attributes or {} - # Copy target model files to output directory - self._copy_target_model(target_name, target_model, output_dir) - - # Determine the model path relative to output directory - model_path = self._get_relative_model_path(target_name, target_model) - - entry = { - "variant_name": target_name, - "file": model_path, - "constraints": { - "ep": self.accelerator_spec.execution_provider, - "device": target_attrs.get("device", str(self.accelerator_spec.accelerator_type).upper()), - "architecture": target_attrs.get("architecture", target_name), - }, - } - - # Add precision from model_attributes if available - precision = target_attrs.get("precision") - if precision: - entry["constraints"]["precision"] = precision - - # Add sdk_version from model_attributes or config - sdk_version = target_attrs.get("sdk_version") or config.sdk_version - if sdk_version: - entry["constraints"]["sdk_version"] = sdk_version - if config.compile_options: - entry["constraints"]["compile_options"] = config.compile_options - - manifest["components"].append(entry) - - # Write manifest.json + self._copy_target_model(target_name, target_model, component_dir) + + file_path = self._get_relative_model_path(target_name, target_model) + + constraints = {"ep": self.accelerator_spec.execution_provider} + device = target_attrs.get("device") + if device: + constraints["device"] = device + architecture = target_attrs.get("architecture") + if architecture: + constraints["architecture"] = architecture + ep_compat = target_attrs.get("ep_compatibility_info") + if ep_compat: + constraints["ep_compatibility_info"] = ep_compat + + model_variants[target_name] = {"file": file_path, "constraints": constraints} + + # Write metadata.json in the component directory + metadata = {"name": model_name, "model_variants": model_variants} + metadata_path = component_dir / "metadata.json" + with open(metadata_path, "w") as f: + json.dump(metadata, f, indent=2) + logger.info("Generated metadata at %s", metadata_path) + + # Write manifest.json at package root + manifest = { + "name": model_name, + "component_models": { + model_name: {"model_variants": model_variants}, + }, + } manifest_path = output_dir / "manifest.json" with open(manifest_path, "w") as f: json.dump(manifest, f, indent=2) logger.info("Generated manifest at %s", manifest_path) - # Update model_attributes to include manifest path - # Remove additional_files since each target subfolder already contains its own tokenizer/config files + # Update model_attributes new_model_attributes = model.model_attributes or {} new_model_attributes = {**new_model_attributes, "manifest_path": str(manifest_path)} new_model_attributes.pop("additional_files", None) - # Return the same MultiTargetModelHandler with updated attributes and path return MultiTargetModelHandler( [target_model for _, target_model in model.get_target_models()], [target_name for target_name, _ in model.get_target_models()], diff --git a/test/cli/test_model_package.py b/test/cli/test_model_package.py index 9d181d9cfb..b074f660dc 100644 --- a/test/cli/test_model_package.py +++ b/test/cli/test_model_package.py @@ -84,21 +84,24 @@ def test_merge_two_targets(self, tmp_path): with open(manifest_path) as f: manifest = json.load(f) - assert len(manifest["components"]) == 2 assert manifest["name"] == "output" - assert manifest["components"][0]["variant_name"] == "soc_60" - assert manifest["components"][0]["file"] == str(soc_60) - assert manifest["components"][0]["constraints"]["ep"] == "QNNExecutionProvider" - assert manifest["components"][0]["constraints"]["device"] == "NPU" - assert manifest["components"][0]["constraints"]["architecture"] == "60" - assert manifest["components"][0]["constraints"]["precision"] == "int4" - assert manifest["components"][1]["variant_name"] == "soc_73" - assert manifest["components"][1]["file"] == str(soc_73) - assert manifest["components"][1]["constraints"]["architecture"] == "73" - - # Check files were copied - assert (output_dir / "soc_60" / "model_ctx.onnx").exists() - assert (output_dir / "soc_73" / "model_ctx.onnx").exists() + assert "output" in manifest["component_models"] + variants = manifest["component_models"]["output"]["model_variants"] + assert "soc_60" in variants + assert "soc_73" in variants + assert variants["soc_60"]["file"] == str(soc_60) + assert variants["soc_60"]["constraints"]["ep"] == "QNNExecutionProvider" + assert variants["soc_60"]["constraints"]["device"] == "NPU" + assert variants["soc_60"]["constraints"]["architecture"] == "60" + assert variants["soc_73"]["constraints"]["architecture"] == "73" + + # Check metadata.json in component directory + metadata_path = output_dir / "output" / "metadata.json" + assert metadata_path.exists() + + # Check files were copied into component dir + assert (output_dir / "output" / "soc_60" / "model_ctx.onnx").exists() + assert (output_dir / "output" / "soc_73" / "model_ctx.onnx").exists() def test_merge_infer_name_from_dir(self, tmp_path): """Test that target name is inferred from directory name when not specified.""" @@ -129,8 +132,9 @@ def test_merge_infer_name_from_dir(self, tmp_path): with open(output_dir / "manifest.json") as f: manifest = json.load(f) - assert manifest["components"][0]["file"] == str(soc_60) - assert manifest["components"][1]["file"] == str(soc_73) + variants = manifest["component_models"]["output"]["model_variants"] + assert variants["soc_60"]["file"] == str(soc_60) + assert variants["soc_73"]["file"] == str(soc_73) def test_merge_openvino_targets(self, tmp_path): """Test merging OpenVINO context binary outputs.""" @@ -171,10 +175,11 @@ def test_merge_openvino_targets(self, tmp_path): with open(output_dir / "manifest.json") as f: manifest = json.load(f) - assert len(manifest["components"]) == 2 - assert manifest["components"][0]["constraints"]["ep"] == "OpenVINOExecutionProvider" - assert manifest["components"][0]["constraints"]["sdk_version"] == "2025.1" - assert manifest["components"][1]["constraints"]["sdk_version"] == "2025.2" + variants = manifest["component_models"]["output"]["model_variants"] + assert len(variants) == 2 + assert variants["ov_2025.1"]["constraints"]["ep"] == "OpenVINOExecutionProvider" + assert variants["ov_2025.1"]["constraints"]["device"] == "NPU" + assert variants["ov_2025.2"]["constraints"]["device"] == "NPU" def test_merge_rejects_single_source(self, tmp_path): """Test that merging with a single source raises an error.""" @@ -269,7 +274,8 @@ def test_merge_optional_fields_omitted(self, tmp_path): with open(output_dir / "manifest.json") as f: manifest = json.load(f) - # precision, sdk_version, architecture should not be present - assert "precision" not in manifest["components"][0]["constraints"] - assert "sdk_version" not in manifest["components"][0]["constraints"] - assert "architecture" not in manifest["components"][0]["constraints"] + variants = manifest["component_models"]["output"]["model_variants"] + for v in variants.values(): + # architecture, ep_compatibility_info should not be present + assert "architecture" not in v["constraints"] + assert "ep_compatibility_info" not in v["constraints"] diff --git a/test/passes/onnx/test_multi_target_context_binary.py b/test/passes/onnx/test_multi_target_context_binary.py index 5620835bbb..afee2160ba 100644 --- a/test/passes/onnx/test_multi_target_context_binary.py +++ b/test/passes/onnx/test_multi_target_context_binary.py @@ -52,8 +52,8 @@ def test_packager_generates_manifest(self, tmp_path): mt = _make_multi_target( tmp_path, [ - ("soc_60", {"architecture": "60", "precision": "int4"}), - ("soc_73", {"architecture": "73", "precision": "int4"}), + ("soc_60", {"architecture": "60", "device": "NPU"}), + ("soc_73", {"architecture": "73", "device": "NPU"}), ], ) @@ -61,73 +61,53 @@ def test_packager_generates_manifest(self, tmp_path): output_path = str(tmp_path / "output.onnx") result = p.run(mt, output_path) - # Result is still a MultiTargetModelHandler assert isinstance(result, MultiTargetModelHandler) - # manifest.json exists + # manifest.json at package root manifest_path = tmp_path / "output" / "manifest.json" assert manifest_path.exists() with open(manifest_path) as f: manifest = json.load(f) - assert len(manifest["components"]) == 2 - assert manifest["components"][0]["variant_name"] == "soc_60" - assert manifest["components"][0]["constraints"]["architecture"] == "60" - assert manifest["components"][0]["constraints"]["precision"] == "int4" - assert manifest["components"][1]["variant_name"] == "soc_73" + assert manifest["name"] == "output" + assert "output" in manifest["component_models"] + variants = manifest["component_models"]["output"]["model_variants"] + assert "soc_60" in variants + assert "soc_73" in variants + assert variants["soc_60"]["constraints"]["architecture"] == "60" + assert variants["soc_73"]["constraints"]["architecture"] == "73" + assert variants["soc_60"]["constraints"]["ep"] == "QNNExecutionProvider" - def test_packager_with_sdk_version(self, tmp_path): - mt = _make_multi_target( - tmp_path, - [ - ("soc_60", {"architecture": "60", "sdk_version": "qnn_2.28"}), - ("soc_73", {"architecture": "73", "sdk_version": "qnn_2.28"}), - ], - ) + # metadata.json in component directory + metadata_path = tmp_path / "output" / "output" / "metadata.json" + assert metadata_path.exists() - p = self._create_packager() - output_path = str(tmp_path / "output.onnx") - p.run(mt, output_path) - - manifest_path = tmp_path / "output" / "manifest.json" - with open(manifest_path) as f: - manifest = json.load(f) + with open(metadata_path) as f: + metadata = json.load(f) - assert manifest["components"][0]["constraints"]["sdk_version"] == "qnn_2.28" + assert metadata["name"] == "output" + assert metadata["model_variants"] == variants - def test_packager_sdk_version_from_config(self, tmp_path): - """sdk_version from pass config is used when model_attributes doesn't have it.""" + def test_packager_ep_compatibility_info(self, tmp_path): mt = _make_multi_target( tmp_path, - [("soc_60", {"architecture": "60"}), ("soc_73", {"architecture": "73"})], - ) - - p = self._create_packager(config={"sdk_version": "qnn_2.30"}) - output_path = str(tmp_path / "output.onnx") - p.run(mt, output_path) - - manifest_path = tmp_path / "output" / "manifest.json" - with open(manifest_path) as f: - manifest = json.load(f) - - assert manifest["components"][0]["constraints"]["sdk_version"] == "qnn_2.30" - - def test_packager_compile_options(self, tmp_path): - mt = _make_multi_target( - tmp_path, - [("soc_60", {"architecture": "60"}), ("soc_73", {"architecture": "73"})], + [ + ("soc_60", {"architecture": "60", "ep_compatibility_info": "device=npu;soc=60"}), + ("soc_73", {"architecture": "73", "ep_compatibility_info": "device=npu;soc=73"}), + ], ) - p = self._create_packager(config={"compile_options": {"dynamic_shape": True}}) + p = self._create_packager() output_path = str(tmp_path / "output.onnx") p.run(mt, output_path) - manifest_path = tmp_path / "output" / "manifest.json" - with open(manifest_path) as f: + with open(tmp_path / "output" / "manifest.json") as f: manifest = json.load(f) - assert manifest["components"][0]["constraints"]["compile_options"] == {"dynamic_shape": True} + variants = manifest["component_models"]["output"]["model_variants"] + assert variants["soc_60"]["constraints"]["ep_compatibility_info"] == "device=npu;soc=60" + assert variants["soc_73"]["constraints"]["ep_compatibility_info"] == "device=npu;soc=73" def test_packager_custom_model_name(self, tmp_path): mt = _make_multi_target( @@ -139,11 +119,16 @@ def test_packager_custom_model_name(self, tmp_path): output_path = str(tmp_path / "output.onnx") p.run(mt, output_path) - manifest_path = tmp_path / "output" / "manifest.json" - with open(manifest_path) as f: + with open(tmp_path / "output" / "manifest.json") as f: manifest = json.load(f) assert manifest["name"] == "my_model" + assert "my_model" in manifest["component_models"] + + # metadata.json under my_model/ + with open(tmp_path / "output" / "my_model" / "metadata.json") as f: + metadata = json.load(f) + assert metadata["name"] == "my_model" def test_packager_rejects_non_multi_target(self, tmp_path): handler = _make_onnx_handler(tmp_path, "single") @@ -162,9 +147,9 @@ def test_packager_copies_files(self, tmp_path): output_path = str(tmp_path / "output.onnx") p.run(mt, output_path) - # Check files were copied - assert (tmp_path / "output" / "soc_60").is_dir() - assert (tmp_path / "output" / "soc_73").is_dir() + # Files are under output/// + assert (tmp_path / "output" / "output" / "soc_60").is_dir() + assert (tmp_path / "output" / "output" / "soc_73").is_dir() def test_packager_default_model_name_from_dir(self, tmp_path): mt = _make_multi_target( @@ -181,22 +166,7 @@ def test_packager_default_model_name_from_dir(self, tmp_path): assert manifest["name"] == "my_package" - def test_packager_device_fallback_from_accelerator(self, tmp_path): - mt = _make_multi_target( - tmp_path, - [("t1", {"architecture": "a"}), ("t2", {"architecture": "b"})], - ) - - p = self._create_packager(device="NPU") - output_path = str(tmp_path / "output.onnx") - p.run(mt, output_path) - - with open(tmp_path / "output" / "manifest.json") as f: - manifest = json.load(f) - - assert manifest["components"][0]["constraints"]["device"] == "NPU" - - def test_packager_device_from_target_device_attr(self, tmp_path): + def test_packager_device_only_when_present(self, tmp_path): mt = _make_multi_target( tmp_path, [("t1", {"architecture": "a", "device": "GPU"}), ("t2", {"architecture": "b"})], @@ -209,29 +179,14 @@ def test_packager_device_from_target_device_attr(self, tmp_path): with open(tmp_path / "output" / "manifest.json") as f: manifest = json.load(f) - assert manifest["components"][0]["constraints"]["device"] == "GPU" - assert manifest["components"][1]["constraints"]["device"] == "NPU" - - def test_packager_architecture_fallback_to_target_name(self, tmp_path): - mt = _make_multi_target( - tmp_path, - [("soc_60", {}), ("soc_73", {})], - ) - - p = self._create_packager() - output_path = str(tmp_path / "output.onnx") - p.run(mt, output_path) - - with open(tmp_path / "output" / "manifest.json") as f: - manifest = json.load(f) - - assert manifest["components"][0]["constraints"]["architecture"] == "soc_60" - assert manifest["components"][1]["constraints"]["architecture"] == "soc_73" + variants = manifest["component_models"]["output"]["model_variants"] + assert variants["t1"]["constraints"]["device"] == "GPU" + assert "device" not in variants["t2"]["constraints"] - def test_packager_precision_omitted_when_absent(self, tmp_path): + def test_packager_optional_fields_omitted_when_absent(self, tmp_path): mt = _make_multi_target( tmp_path, - [("t1", {"architecture": "a"}), ("t2", {"architecture": "b"})], + [("t1", {}), ("t2", {})], ) p = self._create_packager() @@ -241,8 +196,11 @@ def test_packager_precision_omitted_when_absent(self, tmp_path): with open(tmp_path / "output" / "manifest.json") as f: manifest = json.load(f) - assert "precision" not in manifest["components"][0]["constraints"] - assert "precision" not in manifest["components"][1]["constraints"] + variants = manifest["component_models"]["output"]["model_variants"] + for v in variants.values(): + assert "device" not in v["constraints"] + assert "architecture" not in v["constraints"] + assert "ep_compatibility_info" not in v["constraints"] def test_packager_manifest_path_in_result_attributes(self, tmp_path): mt = _make_multi_target( @@ -263,24 +221,23 @@ def test_packager_copy_skips_existing_dest(self, tmp_path): [("t1", {"architecture": "a"}), ("t2", {"architecture": "b"})], ) - p = self._create_packager() + p = self._create_packager(config={"model_name": "mdl"}) output_path = str(tmp_path / "output.onnx") output_dir = tmp_path / "output" - output_dir.mkdir(parents=True) + component_dir = output_dir / "mdl" + component_dir.mkdir(parents=True) # Pre-create dest with a marker file - (output_dir / "t1").mkdir() - (output_dir / "t1" / "marker.txt").write_text("pre-existing") + (component_dir / "t1").mkdir() + (component_dir / "t1" / "marker.txt").write_text("pre-existing") p.run(mt, output_path) - # marker.txt should still be there (not overwritten by copytree) - assert (output_dir / "t1" / "marker.txt").read_text() == "pre-existing" + assert (component_dir / "t1" / "marker.txt").read_text() == "pre-existing" def test_packager_with_composite_model_handler(self, tmp_path): from olive.model import CompositeModelHandler - # Create composite model targets comp_dir_1 = tmp_path / "comp1" comp_dir_1.mkdir() (comp_dir_1 / "model.onnx").write_text("dummy") @@ -314,13 +271,13 @@ def test_packager_with_composite_model_handler(self, tmp_path): with open(tmp_path / "output" / "manifest.json") as f: manifest = json.load(f) - # CompositeModelHandler should use directory path (target_name/) - assert manifest["components"][0]["file"] == "soc_60/" - assert manifest["components"][1]["file"] == "soc_73/" + variants = manifest["component_models"]["output"]["model_variants"] + assert variants["soc_60"]["file"] == "soc_60/" + assert variants["soc_73"]["file"] == "soc_73/" - # Files should be copied - assert (tmp_path / "output" / "soc_60" / "model.onnx").exists() - assert (tmp_path / "output" / "soc_73" / "model.onnx").exists() + # Files under component dir + assert (tmp_path / "output" / "output" / "soc_60" / "model.onnx").exists() + assert (tmp_path / "output" / "output" / "soc_73" / "model.onnx").exists() assert isinstance(result, MultiTargetModelHandler) @@ -329,7 +286,6 @@ def test_packager_onnx_model_uses_filename_in_file_field(self, tmp_path): tmp_path, [("soc_60", {"architecture": "60"})], ) - # Add a second target to satisfy multi-target requirement h2 = _make_onnx_handler(tmp_path, name="soc_73", model_attributes={"architecture": "73"}) mt = MultiTargetModelHandler( [next(t for _, t in mt.get_target_models()), h2], @@ -344,30 +300,9 @@ def test_packager_onnx_model_uses_filename_in_file_field(self, tmp_path): with open(tmp_path / "output" / "manifest.json") as f: manifest = json.load(f) - # ONNXModelHandler should include the filename - assert manifest["components"][0]["file"] == "soc_60/soc_60.onnx" - assert manifest["components"][1]["file"] == "soc_73/soc_73.onnx" - - def test_packager_sdk_version_attr_takes_precedence_over_config(self, tmp_path): - mt = _make_multi_target( - tmp_path, - [ - ("t1", {"architecture": "a", "sdk_version": "from_attrs"}), - ("t2", {"architecture": "b"}), - ], - ) - - p = self._create_packager(config={"sdk_version": "from_config"}) - output_path = str(tmp_path / "output.onnx") - p.run(mt, output_path) - - with open(tmp_path / "output" / "manifest.json") as f: - manifest = json.load(f) - - # t1 has sdk_version in attrs → use that - assert manifest["components"][0]["constraints"]["sdk_version"] == "from_attrs" - # t2 has no sdk_version in attrs → fall back to config - assert manifest["components"][1]["constraints"]["sdk_version"] == "from_config" + variants = manifest["component_models"]["output"]["model_variants"] + assert variants["soc_60"]["file"] == "soc_60/soc_60.onnx" + assert variants["soc_73"]["file"] == "soc_73/soc_73.onnx" # ===========================================================================