microsoft · xiaoyu-work · Mar 27, 2026 · Mar 27, 2026 · Mar 27, 2026 · Mar 27, 2026
diff --git a/olive/cache.py b/olive/cache.py
@@ -384,14 +384,60 @@ def save_model(
     ):
         """Save a model from the cache to a given path."""
         output_dir = Path(output_dir) if output_dir else Path.cwd()
-
-        # If output_dir has a suffix (like .onnx), it's a file path
-        # Use parent directory for saving files
-        actual_output_dir = output_dir.parent if output_dir.suffix else output_dir
+        if output_dir.suffix and not output_dir.is_dir():
+            actual_output_dir = output_dir.parent
+        else:
+            actual_output_dir = output_dir
         actual_output_dir.mkdir(parents=True, exist_ok=True)
 
         model_json = self.load_model(model_id)
-        if model_json["type"].lower() == "compositemodel":
+        if model_json["type"].lower() == "multitargetmodel":
+            model_json_config = model_json["config"]
+            source_path = Path(model_json_config["model_path"])
+            actual_output_dir.mkdir(parents=True, exist_ok=True)
+
+            if source_path.exists():
+                # Only copy target subdirectories (soc_60/, soc_73/, etc.) and manifest.json.
+                # Skip top-level additional_files (tokenizer, config) since each target subdir has its own copy.
+                for item in source_path.iterdir():
+                    dest = actual_output_dir / item.name
+                    if item.is_dir():
+                        shutil.copytree(str(item), str(dest), dirs_exist_ok=overwrite)
+                    elif item.name == "manifest.json":
+                        shutil.copy2(str(item), str(dest))
+
+            # Update paths to point to new location
+            model_json_config["model_path"] = str(actual_output_dir)
+
+            # Update target model paths
+            for target_model in model_json_config.get("target_models", []):
+                target_config = target_model.get("config", {})
+                old_model_path = target_config.get("model_path", "")
+                if old_model_path and str(source_path) in old_model_path:
+                    target_config["model_path"] = old_model_path.replace(str(source_path), str(actual_output_dir))
+
+            # Clear additional_files since each target subdir has its own copies
+            model_attributes = model_json_config.get("model_attributes") or {}
+            model_attributes.pop("additional_files", None)
+
+            # Update manifest_path
+            if model_attributes.get("manifest_path"):
+                model_attributes["manifest_path"] = str(
+                    actual_output_dir / Path(model_attributes["manifest_path"]).name
+                )
+
+            # Update manifest name: if pass config set model_name explicitly, keep it;
+            # otherwise update to the output directory name (e.g., "qwen_2.5_1.5b_Instruct")
+            manifest_file = actual_output_dir / "manifest.json"
+            if manifest_file.exists():
+                manifest = json.loads(manifest_file.read_text())
+                # The pass defaults model_name to the cache dir name (not meaningful).
+                # Replace it with the final output directory name unless it was explicitly configured.
+                source_dir_name = source_path.name if source_path else None
+                if not manifest.get("name") or manifest.get("name") == source_dir_name:
+                    manifest["name"] = actual_output_dir.name
+                manifest_file.write_text(json.dumps(manifest, indent=2))
+        elif model_json["type"].lower() == "compositemodel":
             model_json_config = model_json["config"]
             model_attributes = model_json_config.get("model_attributes") or {}
 

diff --git a/olive/cli/launcher.py b/olive/cli/launcher.py
@@ -16,6 +16,7 @@
 from olive.cli.finetune import FineTuneCommand
 from olive.cli.generate_adapter import GenerateAdapterCommand
 from olive.cli.generate_cost_model import GenerateCostModelCommand
+from olive.cli.model_package import ModelPackageCommand
 from olive.cli.optimize import OptimizeCommand
 from olive.cli.quantize import QuantizeCommand
 from olive.cli.run import WorkflowRunCommand
@@ -52,6 +53,7 @@ def get_cli_parser(called_as_console_script: bool = True) -> ArgumentParser:
     ConfigureQualcommSDKCommand.register_subcommand(commands_parser)
     SharedCacheCommand.register_subcommand(commands_parser)
     ExtractAdaptersCommand.register_subcommand(commands_parser)
+    ModelPackageCommand.register_subcommand(commands_parser)
     BenchmarkCommand.register_subcommand(commands_parser)
 
     return parser

diff --git a/olive/cli/model_package.py b/olive/cli/model_package.py
@@ -0,0 +1,130 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import json
+import logging
+from argparse import ArgumentParser
+from pathlib import Path
+
+from olive.cli.base import BaseOliveCLICommand, add_logging_options, add_telemetry_options
+from olive.common.utils import hardlink_copy_dir
+from olive.telemetry import action
+
+logger = logging.getLogger(__name__)
+
+
+@action
+class ModelPackageCommand(BaseOliveCLICommand):
+    """Merge multiple single-target context binary outputs into a multi-target package with manifest.json."""
+
+    @staticmethod
+    def register_subcommand(parser: ArgumentParser):
+        sub_parser = parser.add_parser(
+            "model-package",
+            help="Merge multiple context binary outputs into a multi-target package with manifest.json",
+        )
+
+        sub_parser.add_argument(
+            "-s",
+            "--source",
+            type=str,
+            action="append",
+            required=True,
+            help=("Source context binary output directory. Can be specified multiple times. "),
+        )
+
+        sub_parser.add_argument(
+            "-o",
+            "--output_path",
+            type=str,
+            required=True,
+            help="Output directory for the merged multi-target package.",
+        )
+
+        sub_parser.add_argument(
+            "--model_name",
+            type=str,
+            default=None,
+            help="Model name for the manifest. If not set, derived from the output directory name.",
+        )
+
+        add_logging_options(sub_parser)
+        add_telemetry_options(sub_parser)
+        sub_parser.set_defaults(func=ModelPackageCommand)
+
+    def run(self):
+        sources = self._parse_sources()
+        output_dir = Path(self.args.output_path)
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        model_name = self.args.model_name or output_dir.name
+
+        # Create component model directory
+        component_dir = output_dir / model_name
+        component_dir.mkdir(parents=True, exist_ok=True)
+
+        model_variants = {}
+        for target_name, source_path in sources:
+            model_config = self._read_model_config(source_path)
+            model_attrs = model_config.get("config", {}).get("model_attributes") or {}
+
+            # Copy source directory into component_dir/{target_name}/
+            target_dir = component_dir / target_name
+            hardlink_copy_dir(source_path, target_dir)
+
+            constraints = {}
+            for key in ("ep", "device", "architecture", "ep_compatibility_info"):
+                if model_attrs.get(key) is not None:
+                    constraints[key] = model_attrs[key]
+
+            model_variants[target_name] = {
+                "file": model_config.get("config", {}).get("model_path", f"{target_name}/"),
+                "constraints": constraints,
+            }
+
+        # Write metadata.json in component directory
+        metadata = {"name": model_name, "model_variants": model_variants}
+        with open(component_dir / "metadata.json", "w") as f:
+            json.dump(metadata, f, indent=2)
+
+        # Write manifest.json at package root
+        manifest = {
+            "name": model_name,
+            "component_models": {
+                model_name: {"model_variants": model_variants},
+            },
+        }
+        manifest_path = output_dir / "manifest.json"
+        with open(manifest_path, "w") as f:
+            json.dump(manifest, f, indent=2)
+
+        print(f"Merged {len(sources)} targets into {output_dir}")
+        print(f"Manifest written to {manifest_path}")
+
+    def _parse_sources(self) -> list[tuple[str, Path]]:
+        sources = []
+        for source in self.args.source:
+            path = Path(source)
+            if not path.is_dir():
+                raise ValueError(f"Source path does not exist or is not a directory: {path}")
+
+            if not (path / "model_config.json").exists():
+                raise ValueError(
+                    f"No model_config.json found in {path}. "
+                    "Source must be an Olive output directory with model_config.json."
+                )
+
+            sources.append((path.name, path))
+
+        if len(sources) < 2:
+            raise ValueError("At least two --source directories are required to merge.")
+
+        return sources
+
+    @staticmethod
+    def _read_model_config(source_path: Path) -> dict:
+        """Read and return model_config.json from a source directory."""
+        config_path = source_path / "model_config.json"
+        with open(config_path) as f:
+            return json.load(f)
diff --git a/olive/cli/optimize.py b/olive/cli/optimize.py
@@ -583,7 +583,7 @@ def _get_matmul_nbits_to_qdq_pass_config(self) -> dict[str, Any]:
             "add_zero_point": "true",
             "save_as_external_data": "true",
         }
-        config["nodes_to_exclude"] = ["/lm_head/MatMul_Q4"]
+        config["nodes_to_exclude"] = ["/lm_head/MatMulNBits"]
         if precision.value == Precision.INT4:
             config["use_int4"] = "true"
         return config

diff --git a/olive/engine/engine.py b/olive/engine/engine.py
@@ -195,15 +195,14 @@ def run(
             self.initialize(log_to_file, log_severity_level)
 
         output_dir: Path = (Path(output_dir) if output_dir else Path.cwd()).resolve()
-        if output_dir.suffix:
+        # Treat as file path only if it has a suffix and is not an existing directory
+        is_file_path = output_dir.suffix and not output_dir.is_dir()
+        if is_file_path:
             output_dir.parent.mkdir(parents=True, exist_ok=True)
+            artifacts_dir = output_dir.parent
         else:
             output_dir.mkdir(parents=True, exist_ok=True)
-
-        # Determine the directory for artifacts (run_history, etc.)
-        # If output_dir is a file path (has suffix), use parent directory
-        # Otherwise use output_dir itself
-        artifacts_dir = output_dir.parent if output_dir.suffix else output_dir
+            artifacts_dir = output_dir
 
         logger.info("Running Olive on accelerator: %s", accelerator_spec)
         with self._create_system():
@@ -254,10 +253,8 @@ def run_accelerator(
 
         self.footprint.record(is_input_model=True, model_id=input_model_id)
 
-        # Determine the directory for artifacts
-        # If output_dir is a file path (has suffix like .onnx), use parent directory
-        # Otherwise use output_dir itself
-        artifacts_dir = output_dir.parent if output_dir.suffix else output_dir
+        # Artifacts directory: file path (has suffix, not existing dir) uses parent
+        artifacts_dir = output_dir.parent if (output_dir.suffix and not output_dir.is_dir()) else output_dir
 
         try:
             if evaluate_input_model and not self.evaluator_config:

diff --git a/olive/model/handler/__init__.py b/olive/model/handler/__init__.py
@@ -6,6 +6,7 @@
 from olive.model.handler.composite import CompositeModelHandler
 from olive.model.handler.diffusers import DiffusersModelHandler
 from olive.model.handler.hf import DistributedHfModelHandler, HfModelHandler
+from olive.model.handler.multi_target import MultiTargetModelHandler
 from olive.model.handler.onnx import DistributedOnnxModelHandler, ONNXModelHandler
 from olive.model.handler.openvino import OpenVINOModelHandler
 from olive.model.handler.pytorch import PyTorchModelHandler
@@ -18,6 +19,7 @@
     "DistributedHfModelHandler",
     "DistributedOnnxModelHandler",
     "HfModelHandler",
+    "MultiTargetModelHandler",
     "ONNXModelHandler",
     "OliveModelHandler",
     "OpenVINOModelHandler",

diff --git a/olive/model/handler/multi_target.py b/olive/model/handler/multi_target.py
@@ -0,0 +1,102 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import logging
+from collections.abc import Iterator
+from typing import Any, Optional, Union
+
+from olive.common.config_utils import serialize_to_json, validate_config
+from olive.common.utils import dict_diff
+from olive.constants import Framework, ModelFileFormat
+from olive.hardware.accelerator import Device
+from olive.model.config.model_config import ModelConfig
+from olive.model.config.registry import model_handler_registry
+from olive.model.handler.base import OliveModelHandler
+from olive.resource_path import OLIVE_RESOURCE_ANNOTATIONS
+
+logger = logging.getLogger(__name__)
+
+
+@model_handler_registry("MultiTargetModel")
+class MultiTargetModelHandler(OliveModelHandler):
+    """MultiTargetModel represents the same model compiled for multiple hardware targets.
+
+    Unlike CompositeModelHandler which holds different component models (e.g., split parts of a pipeline),
+    MultiTargetModelHandler holds the same logical model compiled for different hardware targets
+    (e.g., different SoC models for QNN).
+
+    When a pass encounters a MultiTargetModelHandler, it runs independently on each target model,
+    preserving the multi-target structure through the pipeline.
+    """
+
+    resource_keys: tuple[str, ...] = ("model_path",)
+    json_config_keys: tuple[str, ...] = ("target_names",)
+
+    def __init__(
+        self,
+        target_models: list[Union[OliveModelHandler, dict[str, Any]]],
+        target_names: list[str],
+        model_path: OLIVE_RESOURCE_ANNOTATIONS = None,
+        model_attributes: Optional[dict[str, Any]] = None,
+    ):
+        super().__init__(
+            model_path=model_path,
+            framework=Framework.ONNX,
+            model_file_format=ModelFileFormat.COMPOSITE_MODEL,
+            model_attributes=model_attributes,
+        )
+        self._target_models = [
+            validate_config(m, ModelConfig).create_model() if isinstance(m, dict) else m for m in target_models
+        ]
+        assert all(isinstance(m, OliveModelHandler) for m in self._target_models), (
+            "All target models must be OliveModelHandler or dict"
+        )
+        assert len(self._target_models) == len(target_names), "Number of target models and names must match"
+        self.target_names = target_names
+
+    @property
+    def target_models(self):
+        for m in self._target_models:
+            m.model_attributes = {**(self.model_attributes or {}), **(m.model_attributes or {})}
+            yield m
+
+    def to_json(self, check_object: bool = False):
+        json_dict = super().to_json(check_object)
+        json_dict["config"]["target_models"] = []
+        for m in self._target_models:
+            target_json = m.to_json(check_object)
+            target_json["config"]["model_attributes"] = dict_diff(
+                target_json["config"]["model_attributes"], self.model_attributes
+            )
+            json_dict["config"]["target_models"].append(target_json)
+        return serialize_to_json(json_dict, check_object)
+
+    def get_target_models(self) -> Iterator[tuple[str, OliveModelHandler]]:
+        """Iterate over (target_name, target_model) pairs."""
+        return zip(self.target_names, self.target_models)
+
+    def load_model(self, rank: int = None, cache_model: bool = True):
+        raise NotImplementedError
+
+    @property
+    def size_on_disk(self) -> int:
+        """Compute size of the model on disk."""
+        raise NotImplementedError
+
+    def prepare_session(
+        self,
+        inference_settings: Optional[dict[str, Any]] = None,
+        device: Device = Device.CPU,
+        execution_providers: Union[str, list[str]] = None,
+        rank: Optional[int] = None,
+    ):
+        raise RuntimeError("MultiTargetModelHandler doesn't have a session of its own")
+
+    def run_session(
+        self,
+        session: Any = None,
+        inputs: Union[dict[str, Any], list[Any], tuple[Any, ...]] = None,
+        **kwargs: dict[str, Any],
+    ) -> Any:
+        raise RuntimeError("MultiTargetModelHandler doesn't have a session of its own")
diff --git a/olive/olive_config.json b/olive/olive_config.json
@@ -78,6 +78,15 @@
             "supported_quantization_encodings": [  ],
             "run_on_target": true
         },
+        "ModelPackager": {
+            "module_path": "olive.passes.onnx.model_packager.ModelPackager",
+            "supported_providers": [ "QNNExecutionProvider", "OpenVINOExecutionProvider" ],
+            "supported_accelerators": [ "npu", "gpu", "cpu" ],
+            "supported_precisions": [ "*" ],
+            "supported_algorithms": [  ],
+            "supported_quantization_encodings": [  ],
+            "run_on_target": true
+        },
         "ExtractAdapters": {
             "module_path": "olive.passes.onnx.extract_adapters.ExtractAdapters",
             "supported_providers": [ "*" ],