From 3749c9365bd0a2a7e8ee8b45be515a8c5b44af32 Mon Sep 17 00:00:00 2001
From: Xiaoyu <xiaoyuzhang@microsoft.com>
Date: Fri, 27 Mar 2026 03:50:18 +0000
Subject: [PATCH 1/5] Add multi context binary package support

---
 olive/cache.py                                |  56 ++-
 olive/cli/launcher.py                         |   2 +
 olive/cli/model_package.py                    | 119 +++++
 olive/cli/optimize.py                         |   2 +-
 olive/engine/engine.py                        |  15 +-
 olive/model/handler/__init__.py               |   2 +
 olive/model/handler/multi_target.py           | 101 +++++
 olive/olive_config.json                       |   9 +
 olive/passes/olive_pass.py                    |  18 +
 olive/passes/onnx/context_binary.py           |  93 +++-
 olive/passes/onnx/ep_context_packager.py      | 166 +++++++
 olive/passes/onnx/model_builder.py            |   4 +-
 olive/passes/openvino/encapsulation.py        |  78 +++-
 olive/passes/openvino/optimum_intel.py        |  19 +-
 olive/systems/system_config.py                |   4 +-
 test/cli/test_model_package.py                | 275 ++++++++++++
 test/model/test_multi_target_model.py         |  76 ++++
 test/passes/onnx/test_context_binary.py       | 100 +++++
 .../onnx/test_multi_target_context_binary.py  | 407 ++++++++++++++++++
 .../openvino/test_openvino_encapsulation.py   |  88 ++++
 20 files changed, 1604 insertions(+), 30 deletions(-)
 create mode 100644 olive/cli/model_package.py
 create mode 100644 olive/model/handler/multi_target.py
 create mode 100644 olive/passes/onnx/ep_context_packager.py
 create mode 100644 test/cli/test_model_package.py
 create mode 100644 test/model/test_multi_target_model.py
 create mode 100644 test/passes/onnx/test_multi_target_context_binary.py

diff --git a/olive/cache.py b/olive/cache.py
index fe351057b9..42e94cdae5 100644
--- a/olive/cache.py
+++ b/olive/cache.py
@@ -385,13 +385,61 @@ def save_model(
         """Save a model from the cache to a given path."""
         output_dir = Path(output_dir) if output_dir else Path.cwd()
 
-        # If output_dir has a suffix (like .onnx), it's a file path
-        # Use parent directory for saving files
-        actual_output_dir = output_dir.parent if output_dir.suffix else output_dir
+        # Check if output_dir is an existing file; otherwise treat as directory
+        if output_dir.is_file():
+            actual_output_dir = output_dir.parent
+        else:
+            actual_output_dir = output_dir
         actual_output_dir.mkdir(parents=True, exist_ok=True)
 
         model_json = self.load_model(model_id)
-        if model_json["type"].lower() == "compositemodel":
+        if model_json["type"].lower() == "multitargetmodel":
+            model_json_config = model_json["config"]
+            source_path = Path(model_json_config["model_path"])
+            actual_output_dir.mkdir(parents=True, exist_ok=True)
+
+            if source_path.exists():
+                # Only copy target subdirectories (soc_60/, soc_73/, etc.) and manifest.json.
+                # Skip top-level additional_files (tokenizer, config) since each target subdir has its own copy.
+                for item in source_path.iterdir():
+                    dest = actual_output_dir / item.name
+                    if item.is_dir():
+                        shutil.copytree(str(item), str(dest), dirs_exist_ok=overwrite)
+                    elif item.name == "manifest.json":
+                        shutil.copy2(str(item), str(dest))
+
+            # Update paths to point to new location
+            model_json_config["model_path"] = str(actual_output_dir)
+
+            # Update target model paths
+            for target_model in model_json_config.get("target_models", []):
+                target_config = target_model.get("config", {})
+                old_model_path = target_config.get("model_path", "")
+                if old_model_path and str(source_path) in old_model_path:
+                    target_config["model_path"] = old_model_path.replace(str(source_path), str(actual_output_dir))
+
+            # Clear additional_files since each target subdir has its own copies
+            model_attributes = model_json_config.get("model_attributes") or {}
+            model_attributes.pop("additional_files", None)
+
+            # Update manifest_path
+            if model_attributes.get("manifest_path"):
+                model_attributes["manifest_path"] = str(
+                    actual_output_dir / Path(model_attributes["manifest_path"]).name
+                )
+
+            # Update manifest name: if pass config set model_name explicitly, keep it;
+            # otherwise update to the output directory name (e.g., "qwen_2.5_1.5b_Instruct")
+            manifest_file = actual_output_dir / "manifest.json"
+            if manifest_file.exists():
+                manifest = json.loads(manifest_file.read_text())
+                # The pass defaults model_name to the cache dir name (not meaningful).
+                # Replace it with the final output directory name unless it was explicitly configured.
+                source_dir_name = source_path.name if source_path else None
+                if not manifest.get("name") or manifest.get("name") == source_dir_name:
+                    manifest["name"] = actual_output_dir.name
+                manifest_file.write_text(json.dumps(manifest, indent=2))
+        elif model_json["type"].lower() == "compositemodel":
             model_json_config = model_json["config"]
             model_attributes = model_json_config.get("model_attributes") or {}
 
diff --git a/olive/cli/launcher.py b/olive/cli/launcher.py
index d9088bc89b..e37bb5248a 100644
--- a/olive/cli/launcher.py
+++ b/olive/cli/launcher.py
@@ -16,6 +16,7 @@
 from olive.cli.finetune import FineTuneCommand
 from olive.cli.generate_adapter import GenerateAdapterCommand
 from olive.cli.generate_cost_model import GenerateCostModelCommand
+from olive.cli.model_package import ModelPackageCommand
 from olive.cli.optimize import OptimizeCommand
 from olive.cli.quantize import QuantizeCommand
 from olive.cli.run import WorkflowRunCommand
@@ -52,6 +53,7 @@ def get_cli_parser(called_as_console_script: bool = True) -> ArgumentParser:
     ConfigureQualcommSDKCommand.register_subcommand(commands_parser)
     SharedCacheCommand.register_subcommand(commands_parser)
     ExtractAdaptersCommand.register_subcommand(commands_parser)
+    ModelPackageCommand.register_subcommand(commands_parser)
     BenchmarkCommand.register_subcommand(commands_parser)
 
     return parser
diff --git a/olive/cli/model_package.py b/olive/cli/model_package.py
new file mode 100644
index 0000000000..bad21ff468
--- /dev/null
+++ b/olive/cli/model_package.py
@@ -0,0 +1,119 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import json
+import logging
+from argparse import ArgumentParser
+from pathlib import Path
+
+from olive.cli.base import BaseOliveCLICommand, add_logging_options, add_telemetry_options
+from olive.common.utils import hardlink_copy_dir
+from olive.telemetry import action
+
+logger = logging.getLogger(__name__)
+
+
+@action
+class ModelPackageCommand(BaseOliveCLICommand):
+    """Merge multiple single-target context binary outputs into a multi-target package with manifest.json."""
+
+    @staticmethod
+    def register_subcommand(parser: ArgumentParser):
+        sub_parser = parser.add_parser(
+            "model-package",
+            help="Merge multiple context binary outputs into a multi-target package with manifest.json",
+        )
+
+        sub_parser.add_argument(
+            "-s",
+            "--source",
+            type=str,
+            action="append",
+            required=True,
+            help=("Source context binary output directory. Can be specified multiple times. "),
+        )
+
+        sub_parser.add_argument(
+            "-o",
+            "--output_path",
+            type=str,
+            required=True,
+            help="Output directory for the merged multi-target package.",
+        )
+
+        sub_parser.add_argument(
+            "--model_name",
+            type=str,
+            default=None,
+            help="Model name for the manifest. If not set, derived from the output directory name.",
+        )
+
+        add_logging_options(sub_parser)
+        add_telemetry_options(sub_parser)
+        sub_parser.set_defaults(func=ModelPackageCommand)
+
+    def run(self):
+        sources = self._parse_sources()
+        output_dir = Path(self.args.output_path)
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        model_name = self.args.model_name or output_dir.name
+        manifest = {"name": model_name, "components": []}
+
+        for target_name, source_path in sources:
+            # Read model_config.json from source
+            model_config = self._read_model_config(source_path)
+            model_attrs = model_config.get("config", {}).get("model_attributes") or {}
+
+            # Copy source directory to output/{target_name}/
+            target_dir = output_dir / target_name
+            hardlink_copy_dir(source_path, target_dir)
+
+            constraints = {}
+            for key in ("ep", "device", "architecture", "precision", "sdk_version"):
+                if model_attrs.get(key) is not None:
+                    constraints[key] = model_attrs[key]
+
+            entry = {
+                "variant_name": target_name,
+                "file": {model_config["model_path"]},
+                "constraints": constraints,
+            }
+
+            manifest["components"].append(entry)
+
+        # Write manifest.json
+        manifest_path = output_dir / "manifest.json"
+        with open(manifest_path, "w") as f:
+            json.dump(manifest, f, indent=2)
+
+        print(f"Merged {len(sources)} targets into {output_dir}")
+
+    def _parse_sources(self) -> list[tuple[str, Path]]:
+        sources = []
+        for source in self.args.source:
+            path = Path(source)
+            if not path.is_dir():
+                raise ValueError(f"Source path does not exist or is not a directory: {path}")
+
+            # Validate model_config.json exists
+            if not (path / "model_config.json").exists():
+                raise ValueError(
+                    f"No model_config.json found in {path}. "
+                    "Source must be an Olive output directory with model_config.json."
+                )
+
+            sources.append((path.name, path))
+
+        if len(sources) < 2:
+            raise ValueError("At least two --source directories are required to merge.")
+
+        return sources
+
+    @staticmethod
+    def _read_model_config(source_path: Path) -> dict:
+        """Read and return model_config.json from a source directory."""
+        config_path = source_path / "model_config.json"
+        with open(config_path) as f:
+            return json.load(f)
diff --git a/olive/cli/optimize.py b/olive/cli/optimize.py
index d80392ecf9..db9e23f2e9 100644
--- a/olive/cli/optimize.py
+++ b/olive/cli/optimize.py
@@ -583,7 +583,7 @@ def _get_matmul_nbits_to_qdq_pass_config(self) -> dict[str, Any]:
             "add_zero_point": "true",
             "save_as_external_data": "true",
         }
-        config["nodes_to_exclude"] = ["/lm_head/MatMul_Q4"]
+        config["nodes_to_exclude"] = ["/lm_head/MatMulNBits"]
         if precision.value == Precision.INT4:
             config["use_int4"] = "true"
         return config
diff --git a/olive/engine/engine.py b/olive/engine/engine.py
index de6b7019a3..39805b3b14 100644
--- a/olive/engine/engine.py
+++ b/olive/engine/engine.py
@@ -195,15 +195,13 @@ def run(
             self.initialize(log_to_file, log_severity_level)
 
         output_dir: Path = (Path(output_dir) if output_dir else Path.cwd()).resolve()
-        if output_dir.suffix:
+        # Check if output_dir is an existing file; otherwise treat as directory
+        if output_dir.is_file():
             output_dir.parent.mkdir(parents=True, exist_ok=True)
+            artifacts_dir = output_dir.parent
         else:
             output_dir.mkdir(parents=True, exist_ok=True)
-
-        # Determine the directory for artifacts (run_history, etc.)
-        # If output_dir is a file path (has suffix), use parent directory
-        # Otherwise use output_dir itself
-        artifacts_dir = output_dir.parent if output_dir.suffix else output_dir
+            artifacts_dir = output_dir
 
         logger.info("Running Olive on accelerator: %s", accelerator_spec)
         with self._create_system():
@@ -255,9 +253,8 @@ def run_accelerator(
         self.footprint.record(is_input_model=True, model_id=input_model_id)
 
         # Determine the directory for artifacts
-        # If output_dir is a file path (has suffix like .onnx), use parent directory
-        # Otherwise use output_dir itself
-        artifacts_dir = output_dir.parent if output_dir.suffix else output_dir
+        # If output_dir is an existing file, use its parent; otherwise use output_dir itself
+        artifacts_dir = output_dir.parent if output_dir.is_file() else output_dir
 
         try:
             if evaluate_input_model and not self.evaluator_config:
diff --git a/olive/model/handler/__init__.py b/olive/model/handler/__init__.py
index 35ae6c0d49..b8514209dc 100644
--- a/olive/model/handler/__init__.py
+++ b/olive/model/handler/__init__.py
@@ -6,6 +6,7 @@
 from olive.model.handler.composite import CompositeModelHandler
 from olive.model.handler.diffusers import DiffusersModelHandler
 from olive.model.handler.hf import DistributedHfModelHandler, HfModelHandler
+from olive.model.handler.multi_target import MultiTargetModelHandler
 from olive.model.handler.onnx import DistributedOnnxModelHandler, ONNXModelHandler
 from olive.model.handler.openvino import OpenVINOModelHandler
 from olive.model.handler.pytorch import PyTorchModelHandler
@@ -18,6 +19,7 @@
     "DistributedHfModelHandler",
     "DistributedOnnxModelHandler",
     "HfModelHandler",
+    "MultiTargetModelHandler",
     "ONNXModelHandler",
     "OliveModelHandler",
     "OpenVINOModelHandler",
diff --git a/olive/model/handler/multi_target.py b/olive/model/handler/multi_target.py
new file mode 100644
index 0000000000..7a8be4c5c6
--- /dev/null
+++ b/olive/model/handler/multi_target.py
@@ -0,0 +1,101 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import logging
+from typing import Any, Optional, Union
+
+from olive.common.config_utils import serialize_to_json, validate_config
+from olive.common.utils import dict_diff
+from olive.constants import Framework, ModelFileFormat
+from olive.hardware.accelerator import Device
+from olive.model.config.model_config import ModelConfig
+from olive.model.config.registry import model_handler_registry
+from olive.model.handler.base import OliveModelHandler
+from olive.resource_path import OLIVE_RESOURCE_ANNOTATIONS
+
+logger = logging.getLogger(__name__)
+
+
+@model_handler_registry("MultiTargetModel")
+class MultiTargetModelHandler(OliveModelHandler):
+    """MultiTargetModel represents the same model compiled for multiple hardware targets.
+
+    Unlike CompositeModelHandler which holds different component models (e.g., split parts of a pipeline),
+    MultiTargetModelHandler holds the same logical model compiled for different hardware targets
+    (e.g., different SoC models for QNN).
+
+    When a pass encounters a MultiTargetModelHandler, it runs independently on each target model,
+    preserving the multi-target structure through the pipeline.
+    """
+
+    resource_keys: tuple[str, ...] = ("model_path",)
+    json_config_keys: tuple[str, ...] = ("target_names",)
+
+    def __init__(
+        self,
+        target_models: list[Union[OliveModelHandler, dict[str, Any]]],
+        target_names: list[str],
+        model_path: OLIVE_RESOURCE_ANNOTATIONS = None,
+        model_attributes: Optional[dict[str, Any]] = None,
+    ):
+        super().__init__(
+            model_path=model_path,
+            framework=Framework.ONNX,
+            model_file_format=ModelFileFormat.COMPOSITE_MODEL,
+            model_attributes=model_attributes,
+        )
+        self._target_models = [
+            validate_config(m, ModelConfig).create_model() if isinstance(m, dict) else m for m in target_models
+        ]
+        assert all(isinstance(m, OliveModelHandler) for m in self._target_models), (
+            "All target models must be OliveModelHandler or dict"
+        )
+        assert len(self._target_models) == len(target_names), "Number of target models and names must match"
+        self.target_names = target_names
+
+    @property
+    def target_models(self):
+        for m in self._target_models:
+            m.model_attributes = {**(self.model_attributes or {}), **(m.model_attributes or {})}
+            yield m
+
+    def to_json(self, check_object: bool = False):
+        json_dict = super().to_json(check_object)
+        json_dict["config"]["target_models"] = []
+        for m in self._target_models:
+            target_json = m.to_json(check_object)
+            target_json["config"]["model_attributes"] = dict_diff(
+                target_json["config"]["model_attributes"], self.model_attributes
+            )
+            json_dict["config"]["target_models"].append(target_json)
+        return serialize_to_json(json_dict, check_object)
+
+    def get_target_models(self) -> list[tuple[str, OliveModelHandler]]:
+        """Iterate over (target_name, target_model) pairs."""
+        return zip(self.target_names, self.target_models)
+
+    def load_model(self, rank: int = None, cache_model: bool = True):
+        raise NotImplementedError
+
+    @property
+    def size_on_disk(self) -> int:
+        """Compute size of the model on disk."""
+        raise NotImplementedError
+
+    def prepare_session(
+        self,
+        inference_settings: Optional[dict[str, Any]] = None,
+        device: Device = Device.CPU,
+        execution_providers: Union[str, list[str]] = None,
+        rank: Optional[int] = None,
+    ):
+        raise RuntimeError("MultiTargetModelHandler doesn't have a session of its own")
+
+    def run_session(
+        self,
+        session: Any = None,
+        inputs: Union[dict[str, Any], list[Any], tuple[Any, ...]] = None,
+        **kwargs: dict[str, Any],
+    ) -> Any:
+        raise RuntimeError("MultiTargetModelHandler doesn't have a session of its own")
diff --git a/olive/olive_config.json b/olive/olive_config.json
index 66613a779d..73b05b9bd8 100644
--- a/olive/olive_config.json
+++ b/olive/olive_config.json
@@ -78,6 +78,15 @@
             "supported_quantization_encodings": [  ],
             "run_on_target": true
         },
+        "EPContextBinaryPackager": {
+            "module_path": "olive.passes.onnx.ep_context_packager.EPContextBinaryPackager",
+            "supported_providers": [ "QNNExecutionProvider", "OpenVINOExecutionProvider" ],
+            "supported_accelerators": [ "npu", "gpu", "cpu" ],
+            "supported_precisions": [ "*" ],
+            "supported_algorithms": [  ],
+            "supported_quantization_encodings": [  ],
+            "run_on_target": true
+        },
         "ExtractAdapters": {
             "module_path": "olive.passes.onnx.extract_adapters.ExtractAdapters",
             "supported_providers": [ "*" ],
diff --git a/olive/passes/olive_pass.py b/olive/passes/olive_pass.py
index 627202a0c7..ebb9cd1c73 100644
--- a/olive/passes/olive_pass.py
+++ b/olive/passes/olive_pass.py
@@ -48,6 +48,9 @@ class Pass(ABC):
     # True if the pass processes a composite model at once. Otherwise, the components of the
     # composite model will be processed individually.
     _accepts_composite_model: bool = False
+    # True if the pass processes a multi-target model at once. Otherwise, each target
+    # will be processed independently.
+    _accepts_multi_target_model: bool = False
 
     @classmethod
     def __init_subclass__(cls, **kwargs) -> None:
@@ -206,6 +209,7 @@ def validate_config(
     def run(self, model: OliveModelHandler, output_model_path: str) -> OliveModelHandler:
         """Run the pass on the model at a specific point in the search space."""
         from olive.model import CompositeModelHandler, DistributedOnnxModelHandler
+        from olive.model.handler.multi_target import MultiTargetModelHandler
 
         if not self._initialized:
             self._initialize()
@@ -227,6 +231,20 @@ def run(self, model: OliveModelHandler, output_model_path: str) -> OliveModelHan
                 inference_settings=model.inference_settings,
                 model_attributes=model.model_attributes,
             )
+        elif isinstance(model, MultiTargetModelHandler) and not self._accepts_multi_target_model:
+            # Run the pass independently for each hardware target
+            targets = []
+            target_names = []
+            model_dir = Path(output_model_path).with_suffix("")
+            model_dir.mkdir(parents=True, exist_ok=True)
+            for target_name, target_model in model.get_target_models():
+                target_output_path = model_dir / target_name
+                output_target = self.run(target_model, str(target_output_path))
+                targets.append(output_target)
+                target_names.append(target_name)
+            output_model = MultiTargetModelHandler(
+                targets, target_names, model_path=model_dir, model_attributes=model.model_attributes
+            )
         elif isinstance(model, CompositeModelHandler) and not self._accepts_composite_model:
             components = []
             component_names = []
diff --git a/olive/passes/onnx/context_binary.py b/olive/passes/onnx/context_binary.py
index d802fcc575..bf0f16b443 100644
--- a/olive/passes/onnx/context_binary.py
+++ b/olive/passes/onnx/context_binary.py
@@ -13,6 +13,7 @@
 from olive.hardware.accelerator import AcceleratorSpec, Device
 from olive.hardware.constants import ExecutionProvider
 from olive.model import CompositeModelHandler, ONNXModelHandler
+from olive.model.handler.multi_target import MultiTargetModelHandler
 from olive.model.utils import resolve_onnx_path
 from olive.passes import Pass
 from olive.passes.onnx.common import (
@@ -26,7 +27,11 @@
 
 
 class EPContextBinaryGenerator(Pass):
-    """Generate EP specific context binary for the model."""
+    """Generate EP specific context binary for the model.
+
+    When provider_options is a list of dicts, generates context binaries for each set of provider options
+    (e.g., multiple SoC models) and returns a MultiTargetModelHandler.
+    """
 
     _accepts_composite_model = True
 
@@ -47,9 +52,13 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassCon
                 ),
             ),
             "provider_options": PassConfigParam(
-                type_=dict,
+                type_=Union[dict, list],
                 default_value=None,
-                description="Provider options for the EP.",
+                description=(
+                    "Provider options for the EP. Can be a single dict or a list of dicts for multi-target"
+                    " generation (e.g., multiple SoC models). When a list is provided, context binaries are"
+                    " generated for each set of options and returned as a MultiTargetModelHandler."
+                ),
             ),
             "session_options": PassConfigParam(
                 type_=dict,
@@ -73,9 +82,7 @@ def _run_for_config(
         model: Union[ONNXModelHandler, CompositeModelHandler],
         config: type[BasePassConfig],
         output_model_path: str,
-    ) -> Union[ONNXModelHandler, CompositeModelHandler]:
-        from onnxruntime import __version__ as OrtVersion
-
+    ) -> Union[ONNXModelHandler, CompositeModelHandler, MultiTargetModelHandler]:
         # session created using providers argument so will use the ort.get_available_providers()
         # TODO(jambayk): consider switching to the new EP API for Windows
         from onnxruntime import get_available_providers
@@ -89,6 +96,80 @@ def _run_for_config(
             f" {get_available_providers()}"
         )
 
+        # Multi-target mode: provider_options is a list of dicts
+        if isinstance(config.provider_options, list):
+            return self._run_multi_target(model, config, output_model_path)
+
+        # Single-target mode: existing behavior
+        result = self._run_single_target(model, config, output_model_path)
+
+        # Populate model_attributes with context binary metadata so it persists in model_config.json
+        result.model_attributes = {**(model.model_attributes or {}), **(result.model_attributes or {})}
+        result.model_attributes["ep"] = self.accelerator_spec.execution_provider
+        result.model_attributes["device"] = str(self.accelerator_spec.accelerator_type).upper()
+        if config.provider_options:
+            result.model_attributes["provider_options"] = config.provider_options
+            result.model_attributes["architecture"] = config.provider_options.get("soc_model")
+
+        return result
+
+    def _run_multi_target(
+        self,
+        model: Union[ONNXModelHandler, CompositeModelHandler],
+        config: type[BasePassConfig],
+        output_model_path: str,
+    ) -> MultiTargetModelHandler:
+        """Generate context binaries for multiple hardware targets.
+
+        Each entry in config.provider_options is a separate set of provider options
+        (e.g., different soc_model values). The result is a MultiTargetModelHandler
+        wrapping per-target outputs.
+        """
+        provider_options_list = config.provider_options
+        assert all(isinstance(po, dict) for po in provider_options_list), (
+            "Each entry in provider_options list must be a dict"
+        )
+
+        output_dir = Path(output_model_path).with_suffix("")
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        targets = []
+        target_names = []
+        for idx, provider_options in enumerate(provider_options_list):
+            target_name = f"soc_{provider_options.get('soc_model', idx)}"
+            target_output_path = str(output_dir / target_name)
+
+            # Create a shallow copy of config with this specific provider_options
+            single_config = deepcopy(config)
+            object.__setattr__(single_config, "provider_options", provider_options)
+
+            result = self._run_single_target(model, single_config, target_output_path)
+            # Store target-specific metadata
+            result.model_attributes = {**(model.model_attributes or {}), **(result.model_attributes or {})}
+            result.model_attributes["ep"] = self.accelerator_spec.execution_provider
+            result.model_attributes["device"] = str(self.accelerator_spec.accelerator_type).upper()
+            result.model_attributes["provider_options"] = provider_options
+            result.model_attributes["architecture"] = provider_options.get("soc_model")
+
+            targets.append(result)
+            target_names.append(target_name)
+
+        return MultiTargetModelHandler(
+            targets,
+            target_names,
+            model_path=output_dir,
+            model_attributes=model.model_attributes,
+        )
+
+    def _run_single_target(
+        self,
+        model: Union[ONNXModelHandler, CompositeModelHandler],
+        config: type[BasePassConfig],
+        output_model_path: str,
+    ) -> Union[ONNXModelHandler, CompositeModelHandler]:
+        """Generate context binary for a single target. This is the original logic."""
+        from onnxruntime import __version__ as OrtVersion
+
         generate_kwargs = {
             "execution_provider": self.accelerator_spec.execution_provider,
             "provider_options": config.provider_options,
diff --git a/olive/passes/onnx/ep_context_packager.py b/olive/passes/onnx/ep_context_packager.py
new file mode 100644
index 0000000000..27114f84be
--- /dev/null
+++ b/olive/passes/onnx/ep_context_packager.py
@@ -0,0 +1,166 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import json
+import logging
+import shutil
+from pathlib import Path
+from typing import Union
+
+from olive.hardware.accelerator import AcceleratorSpec
+from olive.model import CompositeModelHandler, ONNXModelHandler
+from olive.model.handler.multi_target import MultiTargetModelHandler
+from olive.passes import Pass
+from olive.passes.pass_config import BasePassConfig, PassConfigParam
+
+logger = logging.getLogger(__name__)
+
+
+class EPContextBinaryPackager(Pass):
+    """Generate a manifest.json metadata file for multi-target EP context binaries.
+
+    This pass takes a MultiTargetModelHandler (produced by EPContextBinaryGenerator with
+    a list of provider_options) and generates a manifest.json file describing each target's
+    context binary with metadata required by ONNX Runtime.
+
+    The manifest includes:
+    - ep: execution provider name
+    - device_type: CPU, NPU, or GPU
+    - architecture: hardware architecture (e.g., SoC model)
+    - precision: model precision (from model_attributes)
+    - sdk_version: optional SDK version
+    - compile_options: optional compilation options
+    """
+
+    _accepts_composite_model = True
+    _accepts_multi_target_model = True
+
+    @classmethod
+    def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassConfigParam]:
+        return {
+            "model_name": PassConfigParam(
+                type_=str,
+                default_value=None,
+                description="Model name for the manifest. If not set, derived from the output directory name.",
+            ),
+            "sdk_version": PassConfigParam(
+                type_=str,
+                default_value=None,
+                description="SDK version string (e.g., 'qnn_sdk_2.28').",
+            ),
+            "compile_options": PassConfigParam(
+                type_=dict,
+                default_value=None,
+                description="Additional compile options to include in the manifest (e.g., dynamic shape, batch size).",
+            ),
+        }
+
+    @staticmethod
+    def is_accelerator_agnostic(accelerator_spec: AcceleratorSpec) -> bool:
+        return False
+
+    def _run_for_config(
+        self,
+        model: MultiTargetModelHandler,
+        config: type[BasePassConfig],
+        output_model_path: str,
+    ) -> MultiTargetModelHandler:
+        assert isinstance(model, MultiTargetModelHandler), (
+            "EPContextBinaryPackager requires a MultiTargetModelHandler as input. "
+            "Use EPContextBinaryGenerator with a list of provider_options to produce one."
+        )
+
+        output_dir = Path(output_model_path).with_suffix("")
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        # Derive model name from config or output directory
+        model_name = config.model_name or output_dir.name
+
+        manifest = {"name": model_name, "components": []}
+
+        for target_name, target_model in model.get_target_models():
+            target_attrs = target_model.model_attributes or {}
+
+            # Copy target model files to output directory
+            self._copy_target_model(target_name, target_model, output_dir)
+
+            # Determine the model path relative to output directory
+            model_path = self._get_relative_model_path(target_name, target_model)
+
+            entry = {
+                "variant_name": target_name,
+                "file": model_path,
+                "constraints": {
+                    "ep": self.accelerator_spec.execution_provider,
+                    "device": target_attrs.get("target_device", str(self.accelerator_spec.accelerator_type).upper()),
+                    "architecture": target_attrs.get("architecture", target_name),
+                },
+            }
+
+            # Add precision from model_attributes if available
+            precision = target_attrs.get("precision")
+            if precision:
+                entry["constraints"]["precision"] = precision
+
+            # Add sdk_version from model_attributes or config
+            sdk_version = target_attrs.get("sdk_version") or config.sdk_version
+            if sdk_version:
+                entry["constraints"]["sdk_version"] = sdk_version
+            if config.compile_options:
+                entry["constraints"]["compile_options"] = config.compile_options
+
+            manifest["components"].append(entry)
+
+        # Write manifest.json
+        manifest_path = output_dir / "manifest.json"
+        with open(manifest_path, "w") as f:
+            json.dump(manifest, f, indent=2)
+        logger.info("Generated manifest at %s", manifest_path)
+
+        # Update model_attributes to include manifest path
+        # Remove additional_files since each target subfolder already contains its own tokenizer/config files
+        new_model_attributes = model.model_attributes or {}
+        new_model_attributes = {**new_model_attributes, "manifest_path": str(manifest_path)}
+        new_model_attributes.pop("additional_files", None)
+
+        # Return the same MultiTargetModelHandler with updated attributes and path
+        return MultiTargetModelHandler(
+            [target_model for _, target_model in model.get_target_models()],
+            [target_name for target_name, _ in model.get_target_models()],
+            model_path=output_dir,
+            model_attributes=new_model_attributes,
+        )
+
+    @staticmethod
+    def _copy_target_model(
+        target_name: str,
+        target_model: Union[ONNXModelHandler, CompositeModelHandler],
+        output_dir: Path,
+    ) -> None:
+        """Copy target model files to the output directory under target_name/."""
+        dest_dir = output_dir / target_name
+        if dest_dir.exists():
+            return
+
+        if isinstance(target_model, CompositeModelHandler):
+            src_dir = Path(target_model.model_path)
+        else:
+            src_dir = Path(target_model.model_path).parent
+
+        if src_dir.is_dir():
+            shutil.copytree(str(src_dir), str(dest_dir))
+        else:
+            dest_dir.mkdir(parents=True, exist_ok=True)
+            shutil.copy2(str(target_model.model_path), str(dest_dir))
+
+    @staticmethod
+    def _get_relative_model_path(
+        target_name: str,
+        target_model: Union[ONNXModelHandler, CompositeModelHandler],
+    ) -> str:
+        """Get the model path relative to the target name for the manifest."""
+        if isinstance(target_model, ONNXModelHandler):
+            return f"{target_name}/{Path(target_model.model_path).name}"
+        # For CompositeModelHandler or other types, use the directory
+        return f"{target_name}/"
diff --git a/olive/passes/onnx/model_builder.py b/olive/passes/onnx/model_builder.py
index 978744ec1c..e2539fecac 100644
--- a/olive/passes/onnx/model_builder.py
+++ b/olive/passes/onnx/model_builder.py
@@ -214,12 +214,12 @@ def _run_for_config(
     ) -> ONNXModelHandler:
         try:
             from onnxruntime_genai.models.builder import create_model
-        except ImportError:
+        except ImportError as e:
             raise ImportError(
                 "onnxruntime-genai package is required to run ModelBuilder pass. Please install the package"
                 " corresponding to your onnxruntime installation using pip. cpu: onnxruntime-genai, cuda:"
                 " onnxruntime-genai-cuda, directml: onnxruntime-genai-directml"
-            ) from None
+            ) from e
         self.maybe_patch_quant()
 
         precision = config.precision
diff --git a/olive/passes/openvino/encapsulation.py b/olive/passes/openvino/encapsulation.py
index c8e24a2b37..59f000fa26 100644
--- a/olive/passes/openvino/encapsulation.py
+++ b/olive/passes/openvino/encapsulation.py
@@ -4,6 +4,7 @@
 # --------------------------------------------------------------------------
 import logging
 import os
+from copy import deepcopy
 from pathlib import Path
 from typing import ClassVar, Union
 
@@ -13,6 +14,7 @@
 from olive.common.utils import hardlink_copy_dir, hardlink_copy_file
 from olive.hardware.accelerator import AcceleratorSpec, Device
 from olive.model import ONNXModelHandler, OpenVINOModelHandler
+from olive.model.handler.multi_target import MultiTargetModelHandler
 from olive.passes import Pass
 from olive.passes.openvino.ov_utils import create_genai_config
 from olive.passes.pass_config import BasePassConfig, PassConfigParam
@@ -21,7 +23,11 @@
 
 
 class OpenVINOEncapsulation(Pass):
-    """Encapsulates OpenVINO models with onnx context nodes."""
+    """Encapsulates OpenVINO models with onnx context nodes.
+
+    When ov_version is a list of strings, generates encapsulated models for each version
+    and returns a MultiTargetModelHandler.
+    """
 
     openvino_to_onnx_dtype: ClassVar[dict] = {
         "f32": TensorProto.FLOAT,
@@ -62,12 +68,14 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassCon
                 description=("Device the encapsulated model should run on. Available devices are cpu, gpu, npu."),
             ),
             "ov_version": PassConfigParam(
-                type_=str,
+                type_=Union[str, list],
                 default_value=None,
                 required=False,
                 description=(
-                    "Name of the OpenVINO version to override in model SDK version."
-                    "Requires a minimum version of OpenVINO 2025.1"
+                    "OpenVINO version to override in model SDK version. Can be a single string or a list"
+                    " of strings for multi-target generation. When a list is provided, encapsulated models"
+                    " are generated for each version and returned as a MultiTargetModelHandler."
+                    " Requires a minimum version of OpenVINO 2025.1"
                 ),
             ),
             "opset_imports": PassConfigParam(
@@ -114,7 +122,59 @@ def _run_for_config(
         model: Union[OpenVINOModelHandler],
         config: type[BasePassConfig],
         output_model_path: str,
+    ) -> Union[ONNXModelHandler, MultiTargetModelHandler]:
+        # Multi-target mode: ov_version is a list of strings
+        if isinstance(config.ov_version, list):
+            return self._run_multi_target(model, config, output_model_path)
+
+        # Single-target mode: existing behavior
+        return self._run_single_target(model, config, output_model_path)
+
+    def _run_multi_target(
+        self,
+        model: Union[OpenVINOModelHandler],
+        config: type[BasePassConfig],
+        output_model_path: str,
+    ) -> MultiTargetModelHandler:
+        """Generate encapsulated models for multiple OpenVINO versions.
+
+        Each entry in config.ov_version is a separate version string.
+        The result is a MultiTargetModelHandler wrapping per-version outputs.
+        """
+        ov_version_list = config.ov_version
+        assert all(isinstance(v, str) for v in ov_version_list), "Each entry in ov_version list must be a string"
+
+        output_dir = Path(output_model_path).with_suffix("")
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        targets = []
+        target_names = []
+        for ov_ver in ov_version_list:
+            target_name = f"ov_{ov_ver}"
+            target_output_path = str(output_dir / target_name)
+
+            single_config = deepcopy(config)
+            object.__setattr__(single_config, "ov_version", ov_ver)
+
+            result = self._run_single_target(model, single_config, target_output_path)
+
+            targets.append(result)
+            target_names.append(target_name)
+
+        return MultiTargetModelHandler(
+            targets,
+            target_names,
+            model_path=output_dir,
+            model_attributes=model.model_attributes,
+        )
+
+    def _run_single_target(
+        self,
+        model: Union[OpenVINOModelHandler],
+        config: type[BasePassConfig],
+        output_model_path: str,
     ) -> ONNXModelHandler:
+        """Encapsulate a single OpenVINO model. This is the original logic."""
         try:
             import openvino as ov
         except ImportError:
@@ -245,7 +305,15 @@ def _run_for_config(
         # generate the genai_config.json file for GenAI models
         create_genai_config(context_model_output, output_model_path, config)
 
-        return ONNXModelHandler(model_path=output_model_path)
+        # Populate model_attributes with context binary metadata so it persists in model_config.json
+        context_binary_attrs = {
+            **(model.model_attributes or {}),
+            "ep": "OpenVINOExecutionProvider",
+            "device": str(config.target_device).upper(),
+            "sdk_version": ov_version,
+        }
+
+        return ONNXModelHandler(model_path=output_model_path, model_attributes=context_binary_attrs)
 
 
 def extract_shape_list(shape, config, prefix: str = "input_0_") -> list:
diff --git a/olive/passes/openvino/optimum_intel.py b/olive/passes/openvino/optimum_intel.py
index 2105f512f2..e051ec05ef 100644
--- a/olive/passes/openvino/optimum_intel.py
+++ b/olive/passes/openvino/optimum_intel.py
@@ -3,6 +3,7 @@
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
 import logging
+import os
 from copy import deepcopy
 from pathlib import Path
 from typing import Any, Optional, Union
@@ -497,6 +498,16 @@ def _run_for_config(
             extra_args.pop("disable_convert_tokenizer", False)
             extra_args["library_name"] = lib_name
             extra_args.pop("library", None)
+
+            # Workaround for optimum-intel using Path.rename() which fails across filesystems.
+            # Set tempdir to output path so temp files are on the same filesystem as the cache.
+            import tempfile
+
+            original_tmpdir = os.environ.get("TMPDIR")
+            original_tempdir = tempfile.tempdir
+            os.environ["TMPDIR"] = output_model_path
+            tempfile.tempdir = output_model_path
+
             export_optimum_intel(
                 model.model_name_or_path,
                 output_model_path,
@@ -516,7 +527,13 @@ def _run_for_config(
                     model_kwargs=model.load_kwargs.__dict__ if model.load_kwargs else None,
                 )
         except Exception as e:
-            raise RuntimeError(f"OpenVINO optimum export failed: {e}") from None
+            raise RuntimeError(f"OpenVINO optimum export failed: {e}") from e
+        finally:
+            tempfile.tempdir = original_tempdir
+            if original_tmpdir is None:
+                os.environ.pop("TMPDIR", None)
+            else:
+                os.environ["TMPDIR"] = original_tmpdir
 
         # check the exported components
         exported_models = [name.stem for name in Path(output_model_path).iterdir() if name.suffix == ".xml"]
diff --git a/olive/systems/system_config.py b/olive/systems/system_config.py
index dab5da3503..5addeadc61 100644
--- a/olive/systems/system_config.py
+++ b/olive/systems/system_config.py
@@ -6,7 +6,7 @@
 from pathlib import Path
 from typing import Optional, Union
 
-from pydantic import ConfigDict, Field, field_validator
+from pydantic import ConfigDict, Field, SerializeAsAny, field_validator
 
 from olive.common.config_utils import ConfigBase, NestedConfig, validate_config
 from olive.systems.common import AcceleratorConfig, SystemType
@@ -88,7 +88,7 @@ def import_system_from_type(system_type: SystemType):
 
 class SystemConfig(NestedConfig):
     type: SystemType
-    config: Optional[TargetUserConfig] = Field(default=None, validate_default=True)
+    config: Optional[SerializeAsAny[TargetUserConfig]] = Field(default=None, validate_default=True)
 
     @field_validator("config", mode="before")
     @classmethod
diff --git a/test/cli/test_model_package.py b/test/cli/test_model_package.py
new file mode 100644
index 0000000000..6e5d9c5b31
--- /dev/null
+++ b/test/cli/test_model_package.py
@@ -0,0 +1,275 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import json
+
+import pytest
+
+from olive.cli.model_package import ModelPackageCommand
+
+
+def _create_source_dir(tmp_path, name, model_attributes, model_type="ONNXModel"):
+    source_dir = tmp_path / name
+    source_dir.mkdir(parents=True)
+
+    model_config = {
+        "type": model_type,
+        "config": {
+            "model_path": str(source_dir),
+            "model_attributes": model_attributes,
+        },
+    }
+    with open(source_dir / "model_config.json", "w") as f:
+        json.dump(model_config, f)
+
+    # Create a dummy model file
+    (source_dir / "model_ctx.onnx").write_text("dummy")
+    (source_dir / "model_ctx_QnnHtp_ctx.bin").write_text("dummy")
+
+    return source_dir
+
+
+class TestModelPackageCommand:
+    def _run_command(self, args):
+        from argparse import ArgumentParser
+
+        parser = ArgumentParser()
+        commands_parser = parser.add_subparsers()
+        ModelPackageCommand.register_subcommand(commands_parser)
+        parsed_args, unknown = parser.parse_known_args(args)
+        cmd = parsed_args.func(parser, parsed_args, unknown)
+        cmd.run()
+
+    def test_merge_two_targets(self, tmp_path):
+        """Test merging two context binary outputs."""
+        soc_60 = _create_source_dir(
+            tmp_path,
+            "soc_60",
+            {
+                "ep": "QNNExecutionProvider",
+                "device": "NPU",
+                "architecture": "60",
+                "precision": "int4",
+            },
+        )
+        soc_73 = _create_source_dir(
+            tmp_path,
+            "soc_73",
+            {
+                "ep": "QNNExecutionProvider",
+                "device": "NPU",
+                "architecture": "73",
+                "precision": "int4",
+            },
+        )
+
+        output_dir = tmp_path / "output"
+        self._run_command(
+            [
+                "model-package",
+                "--source",
+                f"soc_60={soc_60}",
+                "--source",
+                f"soc_73={soc_73}",
+                "-o",
+                str(output_dir),
+            ]
+        )
+
+        # Check manifest.json
+        manifest_path = output_dir / "manifest.json"
+        assert manifest_path.exists()
+
+        with open(manifest_path) as f:
+            manifest = json.load(f)
+
+        assert len(manifest["components"]) == 2
+        assert manifest["name"] == "output"
+        assert manifest["components"][0]["variant_name"] == "soc_60"
+        assert manifest["components"][0]["file"] == "soc_60/"
+        assert manifest["components"][0]["constraints"]["ep"] == "QNNExecutionProvider"
+        assert manifest["components"][0]["constraints"]["device"] == "NPU"
+        assert manifest["components"][0]["constraints"]["architecture"] == "60"
+        assert manifest["components"][0]["constraints"]["precision"] == "int4"
+        assert manifest["components"][1]["variant_name"] == "soc_73"
+        assert manifest["components"][1]["file"] == "soc_73/"
+        assert manifest["components"][1]["constraints"]["architecture"] == "73"
+
+        # Check files were copied
+        assert (output_dir / "soc_60" / "model_ctx.onnx").exists()
+        assert (output_dir / "soc_73" / "model_ctx.onnx").exists()
+
+    def test_merge_infer_name_from_dir(self, tmp_path):
+        """Test that target name is inferred from directory name when not specified."""
+        soc_60 = _create_source_dir(
+            tmp_path,
+            "soc_60",
+            {"ep": "QNNExecutionProvider", "device": "NPU"},
+        )
+        soc_73 = _create_source_dir(
+            tmp_path,
+            "soc_73",
+            {"ep": "QNNExecutionProvider", "device": "NPU"},
+        )
+
+        output_dir = tmp_path / "output"
+        self._run_command(
+            [
+                "model-package",
+                "--source",
+                str(soc_60),
+                "--source",
+                str(soc_73),
+                "-o",
+                str(output_dir),
+            ]
+        )
+
+        with open(output_dir / "manifest.json") as f:
+            manifest = json.load(f)
+
+        assert manifest["components"][0]["file"] == "soc_60/"
+        assert manifest["components"][1]["file"] == "soc_73/"
+
+    def test_merge_openvino_targets(self, tmp_path):
+        """Test merging OpenVINO context binary outputs."""
+        ov_2025_1 = _create_source_dir(
+            tmp_path,
+            "ov_2025.1",
+            {
+                "ep": "OpenVINOExecutionProvider",
+                "device": "NPU",
+                "sdk_version": "2025.1",
+                "architecture": "NPU",
+            },
+        )
+        ov_2025_2 = _create_source_dir(
+            tmp_path,
+            "ov_2025.2",
+            {
+                "ep": "OpenVINOExecutionProvider",
+                "device": "NPU",
+                "sdk_version": "2025.2",
+                "architecture": "NPU",
+            },
+        )
+
+        output_dir = tmp_path / "output"
+        self._run_command(
+            [
+                "model-package",
+                "--source",
+                f"ov_2025.1={ov_2025_1}",
+                "--source",
+                f"ov_2025.2={ov_2025_2}",
+                "-o",
+                str(output_dir),
+            ]
+        )
+
+        with open(output_dir / "manifest.json") as f:
+            manifest = json.load(f)
+
+        assert len(manifest["components"]) == 2
+        assert manifest["components"][0]["constraints"]["ep"] == "OpenVINOExecutionProvider"
+        assert manifest["components"][0]["constraints"]["sdk_version"] == "2025.1"
+        assert manifest["components"][1]["constraints"]["sdk_version"] == "2025.2"
+
+    def test_merge_rejects_single_source(self, tmp_path):
+        """Test that merging with a single source raises an error."""
+        soc_60 = _create_source_dir(
+            tmp_path,
+            "soc_60",
+            {"ep": "QNNExecutionProvider"},
+        )
+
+        with pytest.raises(ValueError, match="At least two"):
+            self._run_command(
+                [
+                    "model-package",
+                    "--source",
+                    str(soc_60),
+                    "-o",
+                    str(tmp_path / "output"),
+                ]
+            )
+
+    def test_merge_rejects_missing_model_config(self, tmp_path):
+        """Test that merging rejects a directory without model_config.json."""
+        source_dir = tmp_path / "no_config"
+        source_dir.mkdir()
+
+        another = _create_source_dir(
+            tmp_path,
+            "valid",
+            {"ep": "QNNExecutionProvider"},
+        )
+
+        with pytest.raises(ValueError, match="model_config.json"):
+            self._run_command(
+                [
+                    "model-package",
+                    "--source",
+                    str(source_dir),
+                    "--source",
+                    str(another),
+                    "-o",
+                    str(tmp_path / "output"),
+                ]
+            )
+
+    def test_merge_rejects_nonexistent_path(self, tmp_path):
+        """Test that merging rejects a nonexistent path."""
+        valid = _create_source_dir(
+            tmp_path,
+            "valid",
+            {"ep": "QNNExecutionProvider"},
+        )
+
+        with pytest.raises(ValueError, match="does not exist"):
+            self._run_command(
+                [
+                    "model-package",
+                    "--source",
+                    "/nonexistent/path",
+                    "--source",
+                    str(valid),
+                    "-o",
+                    str(tmp_path / "output"),
+                ]
+            )
+
+    def test_merge_optional_fields_omitted(self, tmp_path):
+        """Test that optional fields are omitted from manifest when not in model_attributes."""
+        soc_60 = _create_source_dir(
+            tmp_path,
+            "soc_60",
+            {"ep": "QNNExecutionProvider", "device": "NPU"},
+        )
+        soc_73 = _create_source_dir(
+            tmp_path,
+            "soc_73",
+            {"ep": "QNNExecutionProvider", "device": "NPU"},
+        )
+
+        output_dir = tmp_path / "output"
+        self._run_command(
+            [
+                "model-package",
+                "--source",
+                str(soc_60),
+                "--source",
+                str(soc_73),
+                "-o",
+                str(output_dir),
+            ]
+        )
+
+        with open(output_dir / "manifest.json") as f:
+            manifest = json.load(f)
+
+        # precision, sdk_version, architecture should not be present
+        assert "precision" not in manifest["components"][0]["constraints"]
+        assert "sdk_version" not in manifest["components"][0]["constraints"]
+        assert "architecture" not in manifest["components"][0]["constraints"]
diff --git a/test/model/test_multi_target_model.py b/test/model/test_multi_target_model.py
new file mode 100644
index 0000000000..ebfdfe3cd5
--- /dev/null
+++ b/test/model/test_multi_target_model.py
@@ -0,0 +1,76 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import pytest
+
+from olive.model import ONNXModelHandler
+from olive.model.handler.multi_target import MultiTargetModelHandler
+
+
+def _make_onnx_handler(tmp_path, name="model", model_attributes=None):
+    model_dir = tmp_path / name
+    model_dir.mkdir(parents=True, exist_ok=True)
+    model_file = model_dir / f"{name}.onnx"
+    model_file.write_text("dummy")
+    return ONNXModelHandler(model_path=str(model_file), model_attributes=model_attributes)
+
+
+class TestMultiTargetModelHandler:
+    def test_create_multi_target_handler(self, tmp_path):
+        h1 = _make_onnx_handler(tmp_path, "t1")
+        h2 = _make_onnx_handler(tmp_path, "t2")
+
+        mt = MultiTargetModelHandler([h1, h2], ["t1", "t2"], model_path=tmp_path)
+
+        assert mt.target_names == ["t1", "t2"]
+        pairs = list(mt.get_target_models())
+        assert len(pairs) == 2
+        assert pairs[0][0] == "t1"
+        assert pairs[1][0] == "t2"
+
+    def test_multi_target_handler_inherits_attributes(self, tmp_path):
+        """Parent-level model_attributes are merged into each target model."""
+        h1 = _make_onnx_handler(tmp_path, "t1", model_attributes={"architecture": "60"})
+        h2 = _make_onnx_handler(tmp_path, "t2", model_attributes={"architecture": "73"})
+
+        mt = MultiTargetModelHandler(
+            [h1, h2],
+            ["t1", "t2"],
+            model_path=tmp_path,
+            model_attributes={"ep": "QNNExecutionProvider", "device": "NPU"},
+        )
+
+        for _, target in mt.get_target_models():
+            # Parent attributes are merged in
+            assert target.model_attributes["ep"] == "QNNExecutionProvider"
+            assert target.model_attributes["device"] == "NPU"
+
+        # Target-specific attributes are preserved
+        pairs = list(mt.get_target_models())
+        assert pairs[0][1].model_attributes["architecture"] == "60"
+        assert pairs[1][1].model_attributes["architecture"] == "73"
+
+    def test_multi_target_handler_to_json(self, tmp_path):
+        h1 = _make_onnx_handler(tmp_path, "t1", model_attributes={"architecture": "60"})
+        h2 = _make_onnx_handler(tmp_path, "t2", model_attributes={"architecture": "73"})
+
+        mt = MultiTargetModelHandler(
+            [h1, h2],
+            ["t1", "t2"],
+            model_path=tmp_path,
+            model_attributes={"ep": "QNNExecutionProvider"},
+        )
+
+        json_dict = mt.to_json()
+
+        assert json_dict["type"].lower() == "multitargetmodel"
+        assert json_dict["config"]["target_names"] == ["t1", "t2"]
+        assert len(json_dict["config"]["target_models"]) == 2
+        # Parent-level "ep" is in the parent config, not duplicated in targets
+        assert json_dict["config"]["model_attributes"]["ep"] == "QNNExecutionProvider"
+
+    def test_multi_target_handler_mismatched_names_raises(self, tmp_path):
+        h1 = _make_onnx_handler(tmp_path, "t1")
+        with pytest.raises(AssertionError, match="Number of target models and names must match"):
+            MultiTargetModelHandler([h1], ["t1", "t2"], model_path=tmp_path)
diff --git a/test/passes/onnx/test_context_binary.py b/test/passes/onnx/test_context_binary.py
index deee87c550..d760296459 100644
--- a/test/passes/onnx/test_context_binary.py
+++ b/test/passes/onnx/test_context_binary.py
@@ -9,6 +9,7 @@
 
 from olive.hardware.accelerator import AcceleratorSpec
 from olive.model import CompositeModelHandler, ONNXModelHandler
+from olive.model.handler.multi_target import MultiTargetModelHandler
 from olive.passes.olive_pass import create_pass_from_dict
 from olive.passes.onnx.common import resave_model
 from olive.passes.onnx.context_binary import EPContextBinaryGenerator
@@ -132,3 +133,102 @@ def test_ep_context_binary_generator_composite(tmp_path, is_llm):
         assert expected_model_path.exists()
         if not is_skipped:
             assert len(list(output_model_path.glob(f"{name}_ctx*.bin"))) == 1
+
+
+# ===========================================================================
+# Multi-target tests
+# ===========================================================================
+
+
+def _mock_get_available_providers():
+    return ["QNNExecutionProvider", "CPUExecutionProvider"]
+
+
+def test_multi_target_returns_multi_target_handler(tmp_path):
+    """When provider_options is a list, result should be MultiTargetModelHandler."""
+    from pathlib import Path
+    from unittest.mock import patch
+
+    accelerator_spec = AcceleratorSpec(accelerator_type="NPU", execution_provider="QNNExecutionProvider")
+
+    p = create_pass_from_dict(
+        EPContextBinaryGenerator,
+        {
+            "provider_options": [
+                {"soc_model": "60", "htp_performance_mode": "burst"},
+                {"soc_model": "73", "htp_performance_mode": "burst"},
+            ],
+        },
+        disable_search=True,
+        accelerator_spec=accelerator_spec,
+    )
+
+    with (
+        patch.object(EPContextBinaryGenerator, "_run_single_target") as mock_single,
+        patch("onnxruntime.get_available_providers", _mock_get_available_providers),
+    ):
+
+        def side_effect(model, config, output_model_path):
+            out_dir = Path(output_model_path)
+            out_dir.mkdir(parents=True, exist_ok=True)
+            model_file = out_dir / "model_ctx.onnx"
+            model_file.write_text("dummy")
+            return ONNXModelHandler(model_path=str(model_file))
+
+        mock_single.side_effect = side_effect
+
+        input_model = get_onnx_model()
+        output_path = str(tmp_path / "output.onnx")
+        result = p.run(input_model, output_path)
+
+    assert isinstance(result, MultiTargetModelHandler)
+    assert result.target_names == ["soc_60", "soc_73"]
+    assert mock_single.call_count == 2
+
+    for _, target in result.get_target_models():
+        assert target.model_attributes["ep"] == "QNNExecutionProvider"
+        assert target.model_attributes["device"] == "NPU"
+        assert "provider_options" in target.model_attributes
+
+
+def test_single_target_populates_model_attributes(tmp_path):
+    """Single-target mode should also populate model_attributes."""
+    from pathlib import Path
+    from unittest.mock import patch
+
+    accelerator_spec = AcceleratorSpec(accelerator_type="NPU", execution_provider="QNNExecutionProvider")
+
+    p = create_pass_from_dict(
+        EPContextBinaryGenerator,
+        {
+            "provider_options": {
+                "soc_model": "60",
+                "htp_performance_mode": "burst",
+            },
+        },
+        disable_search=True,
+        accelerator_spec=accelerator_spec,
+    )
+
+    with (
+        patch.object(EPContextBinaryGenerator, "_run_single_target") as mock_single,
+        patch("onnxruntime.get_available_providers", _mock_get_available_providers),
+    ):
+
+        def side_effect(model, config, output_model_path):
+            out_path = Path(output_model_path)
+            out_path.parent.mkdir(parents=True, exist_ok=True)
+            out_path.write_text("dummy")
+            return ONNXModelHandler(model_path=str(out_path))
+
+        mock_single.side_effect = side_effect
+
+        input_model = get_onnx_model()
+        output_path = str(tmp_path / "output.onnx")
+        result = p.run(input_model, output_path)
+
+    assert isinstance(result, ONNXModelHandler)
+    assert result.model_attributes["ep"] == "QNNExecutionProvider"
+    assert result.model_attributes["device"] == "NPU"
+    assert result.model_attributes["architecture"] == "60"
+    assert result.model_attributes["provider_options"]["soc_model"] == "60"
diff --git a/test/passes/onnx/test_multi_target_context_binary.py b/test/passes/onnx/test_multi_target_context_binary.py
new file mode 100644
index 0000000000..9fd3f6ef4e
--- /dev/null
+++ b/test/passes/onnx/test_multi_target_context_binary.py
@@ -0,0 +1,407 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import json
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from olive.hardware.accelerator import AcceleratorSpec
+from olive.model import ONNXModelHandler
+from olive.model.handler.multi_target import MultiTargetModelHandler
+from olive.passes.olive_pass import create_pass_from_dict
+from olive.passes.onnx.ep_context_packager import EPContextBinaryPackager
+
+
+def _make_onnx_handler(tmp_path, name="model", model_attributes=None):
+    model_dir = tmp_path / name
+    model_dir.mkdir(parents=True, exist_ok=True)
+    model_file = model_dir / f"{name}.onnx"
+    model_file.write_text("dummy")
+    return ONNXModelHandler(model_path=str(model_file), model_attributes=model_attributes)
+
+
+def _make_multi_target(tmp_path, target_configs):
+    targets = []
+    names = []
+    for name, attrs in target_configs:
+        handler = _make_onnx_handler(tmp_path, name=name, model_attributes=attrs)
+        targets.append(handler)
+        names.append(name)
+    return MultiTargetModelHandler(targets, names, model_path=tmp_path, model_attributes={})
+
+# ===========================================================================
+# EPContextBinaryPackager tests
+# ===========================================================================
+
+
+class TestEPContextBinaryPackager:
+    def _create_packager(self, ep="QNNExecutionProvider", device="NPU", config=None):
+        accelerator_spec = AcceleratorSpec(accelerator_type=device, execution_provider=ep)
+        return create_pass_from_dict(
+            EPContextBinaryPackager,
+            config or {},
+            disable_search=True,
+            accelerator_spec=accelerator_spec,
+        )
+
+    def test_packager_generates_manifest(self, tmp_path):
+        mt = _make_multi_target(
+            tmp_path,
+            [
+                ("soc_60", {"architecture": "60", "precision": "int4"}),
+                ("soc_73", {"architecture": "73", "precision": "int4"}),
+            ],
+        )
+
+        p = self._create_packager()
+        output_path = str(tmp_path / "output.onnx")
+        result = p.run(mt, output_path)
+
+        # Result is still a MultiTargetModelHandler
+        assert isinstance(result, MultiTargetModelHandler)
+
+        # manifest.json exists
+        manifest_path = tmp_path / "output" / "manifest.json"
+        assert manifest_path.exists()
+
+        with open(manifest_path) as f:
+            manifest = json.load(f)
+
+        assert len(manifest["components"]) == 2
+        assert manifest["components"][0]["variant_name"] == "soc_60"
+        assert manifest["components"][0]["constraints"]["architecture"] == "60"
+        assert manifest["components"][0]["constraints"]["precision"] == "int4"
+        assert manifest["components"][1]["variant_name"] == "soc_73"
+
+    def test_packager_with_sdk_version(self, tmp_path):
+        mt = _make_multi_target(
+            tmp_path,
+            [
+                ("soc_60", {"architecture": "60", "sdk_version": "qnn_2.28"}),
+                ("soc_73", {"architecture": "73", "sdk_version": "qnn_2.28"}),
+            ],
+        )
+
+        p = self._create_packager()
+        output_path = str(tmp_path / "output.onnx")
+        p.run(mt, output_path)
+
+        manifest_path = tmp_path / "output" / "manifest.json"
+        with open(manifest_path) as f:
+            manifest = json.load(f)
+
+        assert manifest["components"][0]["constraints"]["sdk_version"] == "qnn_2.28"
+
+    def test_packager_sdk_version_from_config(self, tmp_path):
+        """sdk_version from pass config is used when model_attributes doesn't have it."""
+        mt = _make_multi_target(
+            tmp_path,
+            [("soc_60", {"architecture": "60"}), ("soc_73", {"architecture": "73"})],
+        )
+
+        p = self._create_packager(config={"sdk_version": "qnn_2.30"})
+        output_path = str(tmp_path / "output.onnx")
+        p.run(mt, output_path)
+
+        manifest_path = tmp_path / "output" / "manifest.json"
+        with open(manifest_path) as f:
+            manifest = json.load(f)
+
+        assert manifest["components"][0]["constraints"]["sdk_version"] == "qnn_2.30"
+
+    def test_packager_compile_options(self, tmp_path):
+        mt = _make_multi_target(
+            tmp_path,
+            [("soc_60", {"architecture": "60"}), ("soc_73", {"architecture": "73"})],
+        )
+
+        p = self._create_packager(config={"compile_options": {"dynamic_shape": True}})
+        output_path = str(tmp_path / "output.onnx")
+        p.run(mt, output_path)
+
+        manifest_path = tmp_path / "output" / "manifest.json"
+        with open(manifest_path) as f:
+            manifest = json.load(f)
+
+        assert manifest["components"][0]["constraints"]["compile_options"] == {"dynamic_shape": True}
+
+    def test_packager_custom_model_name(self, tmp_path):
+        mt = _make_multi_target(
+            tmp_path,
+            [("soc_60", {}), ("soc_73", {})],
+        )
+
+        p = self._create_packager(config={"model_name": "my_model"})
+        output_path = str(tmp_path / "output.onnx")
+        p.run(mt, output_path)
+
+        manifest_path = tmp_path / "output" / "manifest.json"
+        with open(manifest_path) as f:
+            manifest = json.load(f)
+
+        assert manifest["name"] == "my_model"
+
+    def test_packager_rejects_non_multi_target(self, tmp_path):
+        handler = _make_onnx_handler(tmp_path, "single")
+        p = self._create_packager()
+        output_path = str(tmp_path / "output.onnx")
+        with pytest.raises(AssertionError, match="requires a MultiTargetModelHandler"):
+            p.run(handler, output_path)
+
+    def test_packager_copies_files(self, tmp_path):
+        mt = _make_multi_target(
+            tmp_path,
+            [("soc_60", {"architecture": "60"}), ("soc_73", {"architecture": "73"})],
+        )
+
+        p = self._create_packager()
+        output_path = str(tmp_path / "output.onnx")
+        p.run(mt, output_path)
+
+        # Check files were copied
+        assert (tmp_path / "output" / "soc_60").is_dir()
+        assert (tmp_path / "output" / "soc_73").is_dir()
+
+    def test_packager_default_model_name_from_dir(self, tmp_path):
+        mt = _make_multi_target(
+            tmp_path,
+            [("t1", {"architecture": "a"}), ("t2", {"architecture": "b"})],
+        )
+
+        p = self._create_packager()
+        output_path = str(tmp_path / "my_package.onnx")
+        p.run(mt, output_path)
+
+        with open(tmp_path / "my_package" / "manifest.json") as f:
+            manifest = json.load(f)
+
+        assert manifest["name"] == "my_package"
+
+    def test_packager_device_fallback_from_accelerator(self, tmp_path):
+        mt = _make_multi_target(
+            tmp_path,
+            [("t1", {"architecture": "a"}), ("t2", {"architecture": "b"})],
+        )
+
+        p = self._create_packager(device="NPU")
+        output_path = str(tmp_path / "output.onnx")
+        p.run(mt, output_path)
+
+        with open(tmp_path / "output" / "manifest.json") as f:
+            manifest = json.load(f)
+
+        assert manifest["components"][0]["constraints"]["device"] == "NPU"
+
+    def test_packager_device_from_target_device_attr(self, tmp_path):
+        mt = _make_multi_target(
+            tmp_path,
+            [("t1", {"architecture": "a", "target_device": "GPU"}), ("t2", {"architecture": "b"})],
+        )
+
+        p = self._create_packager(device="NPU")
+        output_path = str(tmp_path / "output.onnx")
+        p.run(mt, output_path)
+
+        with open(tmp_path / "output" / "manifest.json") as f:
+            manifest = json.load(f)
+
+        assert manifest["components"][0]["constraints"]["device"] == "GPU"
+        assert manifest["components"][1]["constraints"]["device"] == "NPU"
+
+    def test_packager_architecture_fallback_to_target_name(self, tmp_path):
+        mt = _make_multi_target(
+            tmp_path,
+            [("soc_60", {}), ("soc_73", {})],
+        )
+
+        p = self._create_packager()
+        output_path = str(tmp_path / "output.onnx")
+        p.run(mt, output_path)
+
+        with open(tmp_path / "output" / "manifest.json") as f:
+            manifest = json.load(f)
+
+        assert manifest["components"][0]["constraints"]["architecture"] == "soc_60"
+        assert manifest["components"][1]["constraints"]["architecture"] == "soc_73"
+
+    def test_packager_precision_omitted_when_absent(self, tmp_path):
+        mt = _make_multi_target(
+            tmp_path,
+            [("t1", {"architecture": "a"}), ("t2", {"architecture": "b"})],
+        )
+
+        p = self._create_packager()
+        output_path = str(tmp_path / "output.onnx")
+        p.run(mt, output_path)
+
+        with open(tmp_path / "output" / "manifest.json") as f:
+            manifest = json.load(f)
+
+        assert "precision" not in manifest["components"][0]["constraints"]
+        assert "precision" not in manifest["components"][1]["constraints"]
+
+    def test_packager_manifest_path_in_result_attributes(self, tmp_path):
+        mt = _make_multi_target(
+            tmp_path,
+            [("t1", {"architecture": "a"}), ("t2", {"architecture": "b"})],
+        )
+
+        p = self._create_packager()
+        output_path = str(tmp_path / "output.onnx")
+        result = p.run(mt, output_path)
+
+        assert "manifest_path" in result.model_attributes
+        assert Path(result.model_attributes["manifest_path"]).name == "manifest.json"
+
+    def test_packager_copy_skips_existing_dest(self, tmp_path):
+        mt = _make_multi_target(
+            tmp_path,
+            [("t1", {"architecture": "a"}), ("t2", {"architecture": "b"})],
+        )
+
+        p = self._create_packager()
+        output_path = str(tmp_path / "output.onnx")
+        output_dir = tmp_path / "output"
+        output_dir.mkdir(parents=True)
+
+        # Pre-create dest with a marker file
+        (output_dir / "t1").mkdir()
+        (output_dir / "t1" / "marker.txt").write_text("pre-existing")
+
+        p.run(mt, output_path)
+
+        # marker.txt should still be there (not overwritten by copytree)
+        assert (output_dir / "t1" / "marker.txt").read_text() == "pre-existing"
+
+    def test_packager_with_composite_model_handler(self, tmp_path):
+        from olive.model import CompositeModelHandler
+
+        # Create composite model targets
+        comp_dir_1 = tmp_path / "comp1"
+        comp_dir_1.mkdir()
+        (comp_dir_1 / "model.onnx").write_text("dummy")
+
+        comp_dir_2 = tmp_path / "comp2"
+        comp_dir_2.mkdir()
+        (comp_dir_2 / "model.onnx").write_text("dummy")
+
+        sub1 = ONNXModelHandler(model_path=str(comp_dir_1 / "model.onnx"))
+        sub2 = ONNXModelHandler(model_path=str(comp_dir_2 / "model.onnx"))
+
+        comp1 = CompositeModelHandler(
+            model_components=[sub1],
+            model_component_names=["part1"],
+            model_path=str(comp_dir_1),
+            model_attributes={"architecture": "60"},
+        )
+        comp2 = CompositeModelHandler(
+            model_components=[sub2],
+            model_component_names=["part1"],
+            model_path=str(comp_dir_2),
+            model_attributes={"architecture": "73"},
+        )
+
+        mt = MultiTargetModelHandler([comp1, comp2], ["soc_60", "soc_73"], model_path=tmp_path)
+
+        p = self._create_packager()
+        output_path = str(tmp_path / "output.onnx")
+        result = p.run(mt, output_path)
+
+        with open(tmp_path / "output" / "manifest.json") as f:
+            manifest = json.load(f)
+
+        # CompositeModelHandler should use directory path (target_name/)
+        assert manifest["components"][0]["file"] == "soc_60/"
+        assert manifest["components"][1]["file"] == "soc_73/"
+
+        # Files should be copied
+        assert (tmp_path / "output" / "soc_60" / "model.onnx").exists()
+        assert (tmp_path / "output" / "soc_73" / "model.onnx").exists()
+
+        assert isinstance(result, MultiTargetModelHandler)
+
+    def test_packager_onnx_model_uses_filename_in_file_field(self, tmp_path):
+        mt = _make_multi_target(
+            tmp_path,
+            [("soc_60", {"architecture": "60"})],
+        )
+        # Add a second target to satisfy multi-target requirement
+        h2 = _make_onnx_handler(tmp_path, name="soc_73", model_attributes={"architecture": "73"})
+        mt = MultiTargetModelHandler(
+            [next(t for _, t in mt.get_target_models()), h2],
+            ["soc_60", "soc_73"],
+            model_path=tmp_path,
+        )
+
+        p = self._create_packager()
+        output_path = str(tmp_path / "output.onnx")
+        p.run(mt, output_path)
+
+        with open(tmp_path / "output" / "manifest.json") as f:
+            manifest = json.load(f)
+
+        # ONNXModelHandler should include the filename
+        assert manifest["components"][0]["file"] == "soc_60/soc_60.onnx"
+        assert manifest["components"][1]["file"] == "soc_73/soc_73.onnx"
+
+    def test_packager_sdk_version_attr_takes_precedence_over_config(self, tmp_path):
+        mt = _make_multi_target(
+            tmp_path,
+            [
+                ("t1", {"architecture": "a", "sdk_version": "from_attrs"}),
+                ("t2", {"architecture": "b"}),
+            ],
+        )
+
+        p = self._create_packager(config={"sdk_version": "from_config"})
+        output_path = str(tmp_path / "output.onnx")
+        p.run(mt, output_path)
+
+        with open(tmp_path / "output" / "manifest.json") as f:
+            manifest = json.load(f)
+
+        # t1 has sdk_version in attrs → use that
+        assert manifest["components"][0]["constraints"]["sdk_version"] == "from_attrs"
+        # t2 has no sdk_version in attrs → fall back to config
+        assert manifest["components"][1]["constraints"]["sdk_version"] == "from_config"
+
+
+# ===========================================================================
+# Pass.run() multi-target auto-dispatch tests
+# ===========================================================================
+
+
+class TestPassRunMultiTarget:
+    def test_pass_run_iterates_targets(self, tmp_path):
+        """A pass that does NOT accept multi-target should iterate over each target independently."""
+        from olive.passes.onnx.float16_conversion import OnnxFloatToFloat16
+
+        h1 = _make_onnx_handler(tmp_path, "t1", model_attributes={"architecture": "60"})
+        h2 = _make_onnx_handler(tmp_path, "t2", model_attributes={"architecture": "73"})
+        mt = MultiTargetModelHandler([h1, h2], ["t1", "t2"], model_path=tmp_path)
+
+        accelerator_spec = AcceleratorSpec(accelerator_type="NPU", execution_provider="QNNExecutionProvider")
+
+        # Mock _run_for_config to just return a new handler (avoid real ONNX ops)
+        with patch.object(OnnxFloatToFloat16, "_run_for_config") as mock_run:
+
+            def side_effect(model, config, output_model_path):
+                out_file = Path(output_model_path)
+                out_file.parent.mkdir(parents=True, exist_ok=True)
+                out_file.write_text("dummy")
+                return ONNXModelHandler(model_path=str(out_file), model_attributes=model.model_attributes)
+
+            mock_run.side_effect = side_effect
+
+            p = create_pass_from_dict(OnnxFloatToFloat16, {}, disable_search=True, accelerator_spec=accelerator_spec)
+            output_path = str(tmp_path / "output.onnx")
+            result = p.run(mt, output_path)
+
+        # Result should still be MultiTargetModelHandler
+        assert isinstance(result, MultiTargetModelHandler)
+        assert result.target_names == ["t1", "t2"]
+        # _run_for_config was called twice, once per target
+        assert mock_run.call_count == 2
diff --git a/test/passes/openvino/test_openvino_encapsulation.py b/test/passes/openvino/test_openvino_encapsulation.py
index bfbc15a260..a01da68472 100644
--- a/test/passes/openvino/test_openvino_encapsulation.py
+++ b/test/passes/openvino/test_openvino_encapsulation.py
@@ -3,9 +3,13 @@
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
 from pathlib import Path
+from unittest.mock import MagicMock, patch
 
 import pytest
 
+from olive.hardware.accelerator import AcceleratorSpec, Device
+from olive.model import ONNXModelHandler
+from olive.model.handler.multi_target import MultiTargetModelHandler
 from olive.passes.olive_pass import create_pass_from_dict
 from olive.passes.openvino.conversion import OpenVINOConversion
 from olive.passes.openvino.encapsulation import OpenVINOEncapsulation
@@ -101,3 +105,87 @@ def test_openvino_encapsulate_pass_dynamic_keep_ov_dynamic_dims(tmp_path):
     # assert
     assert Path(onnx_model.model_path).exists()
     assert (Path(onnx_model.model_path)).is_file()
+
+
+# ===========================================================================
+# Multi-target tests
+# ===========================================================================
+
+
+def test_multi_target_returns_multi_target_handler(tmp_path):
+    accelerator_spec = AcceleratorSpec(accelerator_type=Device.NPU, execution_provider="OpenVINOExecutionProvider")
+
+    p = create_pass_from_dict(
+        OpenVINOEncapsulation,
+        {"ov_version": ["2025.1", "2025.2"], "target_device": "npu"},
+        disable_search=True,
+        accelerator_spec=accelerator_spec,
+    )
+
+    with patch.object(OpenVINOEncapsulation, "_run_single_target") as mock_single:
+
+        def side_effect(model, config, output_model_path):
+            out_dir = Path(output_model_path)
+            out_dir.mkdir(parents=True, exist_ok=True)
+            model_file = out_dir / "model.onnx"
+            model_file.write_text("dummy")
+            return ONNXModelHandler(
+                model_path=str(model_file),
+                model_attributes={
+                    "ep": "OpenVINOExecutionProvider",
+                    "device": "NPU",
+                    "sdk_version": config.ov_version,
+                    "architecture": "NPU",
+                },
+            )
+
+        mock_single.side_effect = side_effect
+
+        input_model = MagicMock()
+        input_model.model_attributes = {}
+        output_path = str(tmp_path / "output.onnx")
+        result = p.run(input_model, output_path)
+
+    assert isinstance(result, MultiTargetModelHandler)
+    assert result.target_names == ["ov_2025.1", "ov_2025.2"]
+    assert mock_single.call_count == 2
+
+
+def test_single_target_populates_model_attributes(tmp_path):
+    accelerator_spec = AcceleratorSpec(accelerator_type=Device.NPU, execution_provider="OpenVINOExecutionProvider")
+
+    p = create_pass_from_dict(
+        OpenVINOEncapsulation,
+        {"ov_version": "2025.1", "target_device": "npu"},
+        disable_search=True,
+        accelerator_spec=accelerator_spec,
+    )
+
+    with patch.object(OpenVINOEncapsulation, "_run_single_target") as mock_single:
+
+        def side_effect(model, config, output_model_path):
+            out_dir = Path(output_model_path)
+            out_dir.parent.mkdir(parents=True, exist_ok=True)
+            out_dir.mkdir(parents=True, exist_ok=True)
+            model_file = out_dir / "model.onnx"
+            model_file.write_text("dummy")
+            return ONNXModelHandler(
+                model_path=str(model_file),
+                model_attributes={
+                    "ep": "OpenVINOExecutionProvider",
+                    "device": "NPU",
+                    "sdk_version": "2025.1",
+                    "architecture": "NPU",
+                },
+            )
+
+        mock_single.side_effect = side_effect
+
+        input_model = MagicMock()
+        input_model.model_attributes = {}
+        output_path = str(tmp_path / "output.onnx")
+        result = p.run(input_model, output_path)
+
+    assert isinstance(result, ONNXModelHandler)
+    assert result.model_attributes["ep"] == "OpenVINOExecutionProvider"
+    assert result.model_attributes["sdk_version"] == "2025.1"

From 6140481325901abf1837c0b5fd7f668953225735 Mon Sep 17 00:00:00 2001
From: Xiaoyu <xiaoyuzhang@microsoft.com>
Date: Fri, 27 Mar 2026 03:57:08 +0000
Subject: [PATCH 2/5] rename

---
 olive/olive_config.json                                  | 4 ++--
 .../onnx/{ep_context_packager.py => model_packager.py}   | 9 ++-------
 test/passes/onnx/test_multi_target_context_binary.py     | 9 +++++----
 3 files changed, 9 insertions(+), 13 deletions(-)
 rename olive/passes/onnx/{ep_context_packager.py => model_packager.py} (93%)

diff --git a/olive/olive_config.json b/olive/olive_config.json
index 73b05b9bd8..0fabe495e4 100644
--- a/olive/olive_config.json
+++ b/olive/olive_config.json
@@ -78,8 +78,8 @@
             "supported_quantization_encodings": [  ],
             "run_on_target": true
         },
-        "EPContextBinaryPackager": {
-            "module_path": "olive.passes.onnx.ep_context_packager.EPContextBinaryPackager",
+        "ModelPackager": {
+            "module_path": "olive.passes.onnx.model_packager.ModelPackager",
             "supported_providers": [ "QNNExecutionProvider", "OpenVINOExecutionProvider" ],
             "supported_accelerators": [ "npu", "gpu", "cpu" ],
             "supported_precisions": [ "*" ],
diff --git a/olive/passes/onnx/ep_context_packager.py b/olive/passes/onnx/model_packager.py
similarity index 93%
rename from olive/passes/onnx/ep_context_packager.py
rename to olive/passes/onnx/model_packager.py
index 27114f84be..fa74258a0f 100644
--- a/olive/passes/onnx/ep_context_packager.py
+++ b/olive/passes/onnx/model_packager.py
@@ -17,7 +17,7 @@
 logger = logging.getLogger(__name__)
 
 
-class EPContextBinaryPackager(Pass):
+class ModelPackager(Pass):
     """Generate a manifest.json metadata file for multi-target EP context binaries.
 
     This pass takes a MultiTargetModelHandler (produced by EPContextBinaryGenerator with
@@ -66,10 +66,7 @@ def _run_for_config(
         config: type[BasePassConfig],
         output_model_path: str,
     ) -> MultiTargetModelHandler:
-        assert isinstance(model, MultiTargetModelHandler), (
-            "EPContextBinaryPackager requires a MultiTargetModelHandler as input. "
-            "Use EPContextBinaryGenerator with a list of provider_options to produce one."
-        )
+        assert isinstance(model, MultiTargetModelHandler), "ModelPackager requires a MultiTargetModelHandler as input."
 
         output_dir = Path(output_model_path).with_suffix("")
         output_dir.mkdir(parents=True, exist_ok=True)
@@ -138,7 +135,6 @@ def _copy_target_model(
         target_model: Union[ONNXModelHandler, CompositeModelHandler],
         output_dir: Path,
     ) -> None:
-        """Copy target model files to the output directory under target_name/."""
         dest_dir = output_dir / target_name
         if dest_dir.exists():
             return
@@ -159,7 +155,6 @@ def _get_relative_model_path(
         target_name: str,
         target_model: Union[ONNXModelHandler, CompositeModelHandler],
     ) -> str:
-        """Get the model path relative to the target name for the manifest."""
         if isinstance(target_model, ONNXModelHandler):
             return f"{target_name}/{Path(target_model.model_path).name}"
         # For CompositeModelHandler or other types, use the directory
diff --git a/test/passes/onnx/test_multi_target_context_binary.py b/test/passes/onnx/test_multi_target_context_binary.py
index 9fd3f6ef4e..e8ba62f379 100644
--- a/test/passes/onnx/test_multi_target_context_binary.py
+++ b/test/passes/onnx/test_multi_target_context_binary.py
@@ -12,7 +12,7 @@
 from olive.model import ONNXModelHandler
 from olive.model.handler.multi_target import MultiTargetModelHandler
 from olive.passes.olive_pass import create_pass_from_dict
-from olive.passes.onnx.ep_context_packager import EPContextBinaryPackager
+from olive.passes.onnx.model_packager import ModelPackager
 
 
 def _make_onnx_handler(tmp_path, name="model", model_attributes=None):
@@ -32,16 +32,17 @@ def _make_multi_target(tmp_path, target_configs):
         names.append(name)
     return MultiTargetModelHandler(targets, names, model_path=tmp_path, model_attributes={})
 
+
 # ===========================================================================
-# EPContextBinaryPackager tests
+# ModelPackager tests
 # ===========================================================================
 
 
-class TestEPContextBinaryPackager:
+class TestModelPackager:
     def _create_packager(self, ep="QNNExecutionProvider", device="NPU", config=None):
         accelerator_spec = AcceleratorSpec(accelerator_type=device, execution_provider=ep)
         return create_pass_from_dict(
-            EPContextBinaryPackager,
+            ModelPackager,
             config or {},
             disable_search=True,
             accelerator_spec=accelerator_spec,

From 81dd0d23aabc376146b0040a9939e54591a56f34 Mon Sep 17 00:00:00 2001
From: Xiaoyu <xiaoyuzhang@microsoft.com>
Date: Fri, 27 Mar 2026 04:36:20 +0000
Subject: [PATCH 3/5] address comments

---
 olive/cache.py                                   |  4 +---
 olive/cli/model_package.py                       |  3 +--
 olive/engine/engine.py                           | 10 +++++-----
 olive/model/handler/multi_target.py              |  3 ++-
 olive/passes/onnx/model_packager.py              |  2 +-
 olive/passes/openvino/encapsulation.py           |  2 +-
 olive/passes/openvino/optimum_intel.py           |  1 +
 test/cli/test_model_package.py                   | 16 ++++++++--------
 .../onnx/test_multi_target_context_binary.py     |  2 +-
 .../openvino/test_openvino_encapsulation.py      |  2 +-
 10 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/olive/cache.py b/olive/cache.py
index 42e94cdae5..5c24aad38f 100644
--- a/olive/cache.py
+++ b/olive/cache.py
@@ -384,9 +384,7 @@ def save_model(
     ):
         """Save a model from the cache to a given path."""
         output_dir = Path(output_dir) if output_dir else Path.cwd()
-
-        # Check if output_dir is an existing file; otherwise treat as directory
-        if output_dir.is_file():
+        if output_dir.suffix and not output_dir.is_dir():
             actual_output_dir = output_dir.parent
         else:
             actual_output_dir = output_dir
diff --git a/olive/cli/model_package.py b/olive/cli/model_package.py
index bad21ff468..93631b2e7a 100644
--- a/olive/cli/model_package.py
+++ b/olive/cli/model_package.py
@@ -77,7 +77,7 @@ def run(self):
 
             entry = {
                 "variant_name": target_name,
-                "file": {model_config["model_path"]},
+                "file": model_config.get("config", {}).get("model_path", f"{target_name}/"),
                 "constraints": constraints,
             }
 
@@ -97,7 +97,6 @@ def _parse_sources(self) -> list[tuple[str, Path]]:
             if not path.is_dir():
                 raise ValueError(f"Source path does not exist or is not a directory: {path}")
 
-            # Validate model_config.json exists
             if not (path / "model_config.json").exists():
                 raise ValueError(
                     f"No model_config.json found in {path}. "
diff --git a/olive/engine/engine.py b/olive/engine/engine.py
index 39805b3b14..5d7cee3f26 100644
--- a/olive/engine/engine.py
+++ b/olive/engine/engine.py
@@ -195,8 +195,9 @@ def run(
             self.initialize(log_to_file, log_severity_level)
 
         output_dir: Path = (Path(output_dir) if output_dir else Path.cwd()).resolve()
-        # Check if output_dir is an existing file; otherwise treat as directory
-        if output_dir.is_file():
+        # Treat as file path only if it has a suffix and is not an existing directory
+        is_file_path = output_dir.suffix and not output_dir.is_dir()
+        if is_file_path:
             output_dir.parent.mkdir(parents=True, exist_ok=True)
             artifacts_dir = output_dir.parent
         else:
@@ -252,9 +253,8 @@ def run_accelerator(
 
         self.footprint.record(is_input_model=True, model_id=input_model_id)
 
-        # Determine the directory for artifacts
-        # If output_dir is an existing file, use its parent; otherwise use output_dir itself
-        artifacts_dir = output_dir.parent if output_dir.is_file() else output_dir
+        # Artifacts directory: file path (has suffix, not existing dir) uses parent
+        artifacts_dir = output_dir.parent if (output_dir.suffix and not output_dir.is_dir()) else output_dir
 
         try:
             if evaluate_input_model and not self.evaluator_config:
diff --git a/olive/model/handler/multi_target.py b/olive/model/handler/multi_target.py
index 7a8be4c5c6..1327bc9724 100644
--- a/olive/model/handler/multi_target.py
+++ b/olive/model/handler/multi_target.py
@@ -3,6 +3,7 @@
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
 import logging
+from collections.abc import Iterator
 from typing import Any, Optional, Union
 
 from olive.common.config_utils import serialize_to_json, validate_config
@@ -71,7 +72,7 @@ def to_json(self, check_object: bool = False):
             json_dict["config"]["target_models"].append(target_json)
         return serialize_to_json(json_dict, check_object)
 
-    def get_target_models(self) -> list[tuple[str, OliveModelHandler]]:
+    def get_target_models(self) -> Iterator[tuple[str, OliveModelHandler]]:
         """Iterate over (target_name, target_model) pairs."""
         return zip(self.target_names, self.target_models)
 
diff --git a/olive/passes/onnx/model_packager.py b/olive/passes/onnx/model_packager.py
index fa74258a0f..e33d300a45 100644
--- a/olive/passes/onnx/model_packager.py
+++ b/olive/passes/onnx/model_packager.py
@@ -90,7 +90,7 @@ def _run_for_config(
                 "file": model_path,
                 "constraints": {
                     "ep": self.accelerator_spec.execution_provider,
-                    "device": target_attrs.get("target_device", str(self.accelerator_spec.accelerator_type).upper()),
+                    "device": target_attrs.get("device", str(self.accelerator_spec.accelerator_type).upper()),
                     "architecture": target_attrs.get("architecture", target_name),
                 },
             }
diff --git a/olive/passes/openvino/encapsulation.py b/olive/passes/openvino/encapsulation.py
index 59f000fa26..6a788a2412 100644
--- a/olive/passes/openvino/encapsulation.py
+++ b/olive/passes/openvino/encapsulation.py
@@ -150,7 +150,7 @@ def _run_multi_target(
         targets = []
         target_names = []
         for ov_ver in ov_version_list:
-            target_name = f"ov_{ov_ver}"
+            target_name = f"ov_{ov_ver.replace('.', '_')}"
             target_output_path = str(output_dir / target_name)
 
             single_config = deepcopy(config)
diff --git a/olive/passes/openvino/optimum_intel.py b/olive/passes/openvino/optimum_intel.py
index e051ec05ef..d898e665eb 100644
--- a/olive/passes/openvino/optimum_intel.py
+++ b/olive/passes/openvino/optimum_intel.py
@@ -503,6 +503,7 @@ def _run_for_config(
             # Set tempdir to output path so temp files are on the same filesystem as the cache.
             import tempfile
 
+            Path(output_model_path).mkdir(parents=True, exist_ok=True)
             original_tmpdir = os.environ.get("TMPDIR")
             original_tempdir = tempfile.tempdir
             os.environ["TMPDIR"] = output_model_path
diff --git a/test/cli/test_model_package.py b/test/cli/test_model_package.py
index 6e5d9c5b31..9d181d9cfb 100644
--- a/test/cli/test_model_package.py
+++ b/test/cli/test_model_package.py
@@ -69,9 +69,9 @@ def test_merge_two_targets(self, tmp_path):
             [
                 "model-package",
                 "--source",
-                f"soc_60={soc_60}",
+                str(soc_60),
                 "--source",
-                f"soc_73={soc_73}",
+                str(soc_73),
                 "-o",
                 str(output_dir),
             ]
@@ -87,13 +87,13 @@ def test_merge_two_targets(self, tmp_path):
         assert len(manifest["components"]) == 2
         assert manifest["name"] == "output"
         assert manifest["components"][0]["variant_name"] == "soc_60"
-        assert manifest["components"][0]["file"] == "soc_60/"
+        assert manifest["components"][0]["file"] == str(soc_60)
         assert manifest["components"][0]["constraints"]["ep"] == "QNNExecutionProvider"
         assert manifest["components"][0]["constraints"]["device"] == "NPU"
         assert manifest["components"][0]["constraints"]["architecture"] == "60"
         assert manifest["components"][0]["constraints"]["precision"] == "int4"
         assert manifest["components"][1]["variant_name"] == "soc_73"
-        assert manifest["components"][1]["file"] == "soc_73/"
+        assert manifest["components"][1]["file"] == str(soc_73)
         assert manifest["components"][1]["constraints"]["architecture"] == "73"
 
         # Check files were copied
@@ -129,8 +129,8 @@ def test_merge_infer_name_from_dir(self, tmp_path):
         with open(output_dir / "manifest.json") as f:
             manifest = json.load(f)
 
-        assert manifest["components"][0]["file"] == "soc_60/"
-        assert manifest["components"][1]["file"] == "soc_73/"
+        assert manifest["components"][0]["file"] == str(soc_60)
+        assert manifest["components"][1]["file"] == str(soc_73)
 
     def test_merge_openvino_targets(self, tmp_path):
         """Test merging OpenVINO context binary outputs."""
@@ -160,9 +160,9 @@ def test_merge_openvino_targets(self, tmp_path):
             [
                 "model-package",
                 "--source",
-                f"ov_2025.1={ov_2025_1}",
+                str(ov_2025_1),
                 "--source",
-                f"ov_2025.2={ov_2025_2}",
+                str(ov_2025_2),
                 "-o",
                 str(output_dir),
             ]
diff --git a/test/passes/onnx/test_multi_target_context_binary.py b/test/passes/onnx/test_multi_target_context_binary.py
index e8ba62f379..5620835bbb 100644
--- a/test/passes/onnx/test_multi_target_context_binary.py
+++ b/test/passes/onnx/test_multi_target_context_binary.py
@@ -199,7 +199,7 @@ def test_packager_device_fallback_from_accelerator(self, tmp_path):
     def test_packager_device_from_target_device_attr(self, tmp_path):
         mt = _make_multi_target(
             tmp_path,
-            [("t1", {"architecture": "a", "target_device": "GPU"}), ("t2", {"architecture": "b"})],
+            [("t1", {"architecture": "a", "device": "GPU"}), ("t2", {"architecture": "b"})],
         )
 
         p = self._create_packager(device="NPU")
diff --git a/test/passes/openvino/test_openvino_encapsulation.py b/test/passes/openvino/test_openvino_encapsulation.py
index a01da68472..4ae9bf7a34 100644
--- a/test/passes/openvino/test_openvino_encapsulation.py
+++ b/test/passes/openvino/test_openvino_encapsulation.py
@@ -147,7 +147,7 @@ def side_effect(model, config, output_model_path):
         result = p.run(input_model, output_path)
 
     assert isinstance(result, MultiTargetModelHandler)
-    assert result.target_names == ["ov_2025.1", "ov_2025.2"]
+    assert result.target_names == ["ov_2025_1", "ov_2025_2"]
     assert mock_single.call_count == 2
 
 

From 98eca70d17ff7915e77ec977967d05d21a3b5a64 Mon Sep 17 00:00:00 2001
From: Xiaoyu <xiaoyuzhang@microsoft.com>
Date: Fri, 27 Mar 2026 04:53:03 +0000
Subject: [PATCH 4/5] fix format

---
 test/passes/pytorch/test_selective_mixed_precision.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/passes/pytorch/test_selective_mixed_precision.py b/test/passes/pytorch/test_selective_mixed_precision.py
index b30683fc97..db2570a3cf 100644
--- a/test/passes/pytorch/test_selective_mixed_precision.py
+++ b/test/passes/pytorch/test_selective_mixed_precision.py
@@ -17,7 +17,7 @@
 def input_model_fixture(tmp_path_factory):
     save_path = tmp_path_factory.mktemp("selective-mixed-precision-test")
     model = LlamaForCausalLM(
-        LlamaConfig(
+        LlamaConfig(  # pylint: disable=unexpected-keyword-arg
             hidden_size=16,
             intermediate_size=64,
             num_hidden_layers=8,

From 9db91b8e26c460626fd32d72fea97b792f37e734 Mon Sep 17 00:00:00 2001
From: Xiaoyu <xiaoyuzhang@microsoft.com>
Date: Fri, 27 Mar 2026 23:28:55 +0000
Subject: [PATCH 5/5] update manifest schema

---
 olive/cli/model_package.py                    |  32 ++-
 olive/passes/onnx/model_packager.py           | 104 +++++-----
 test/cli/test_model_package.py                |  54 ++---
 .../onnx/test_multi_target_context_binary.py  | 193 ++++++------------
 4 files changed, 163 insertions(+), 220 deletions(-)

diff --git a/olive/cli/model_package.py b/olive/cli/model_package.py
index 93631b2e7a..f34b74f705 100644
--- a/olive/cli/model_package.py
+++ b/olive/cli/model_package.py
@@ -59,36 +59,48 @@ def run(self):
         output_dir.mkdir(parents=True, exist_ok=True)
 
         model_name = self.args.model_name or output_dir.name
-        manifest = {"name": model_name, "components": []}
 
+        # Create component model directory
+        component_dir = output_dir / model_name
+        component_dir.mkdir(parents=True, exist_ok=True)
+
+        model_variants = {}
         for target_name, source_path in sources:
-            # Read model_config.json from source
             model_config = self._read_model_config(source_path)
             model_attrs = model_config.get("config", {}).get("model_attributes") or {}
 
-            # Copy source directory to output/{target_name}/
-            target_dir = output_dir / target_name
+            # Copy source directory into component_dir/{target_name}/
+            target_dir = component_dir / target_name
             hardlink_copy_dir(source_path, target_dir)
 
             constraints = {}
-            for key in ("ep", "device", "architecture", "precision", "sdk_version"):
+            for key in ("ep", "device", "architecture", "ep_compatibility_info"):
                 if model_attrs.get(key) is not None:
                     constraints[key] = model_attrs[key]
 
-            entry = {
-                "variant_name": target_name,
+            model_variants[target_name] = {
                 "file": model_config.get("config", {}).get("model_path", f"{target_name}/"),
                 "constraints": constraints,
             }
 
-            manifest["components"].append(entry)
-
-        # Write manifest.json
+        # Write metadata.json in component directory
+        metadata = {"name": model_name, "model_variants": model_variants}
+        with open(component_dir / "metadata.json", "w") as f:
+            json.dump(metadata, f, indent=2)
+
+        # Write manifest.json at package root
+        manifest = {
+            "name": model_name,
+            "component_models": {
+                model_name: {"model_variants": model_variants},
+            },
+        }
         manifest_path = output_dir / "manifest.json"
         with open(manifest_path, "w") as f:
             json.dump(manifest, f, indent=2)
 
         print(f"Merged {len(sources)} targets into {output_dir}")
+        print(f"Manifest written to {manifest_path}")
 
     def _parse_sources(self) -> list[tuple[str, Path]]:
         sources = []
diff --git a/olive/passes/onnx/model_packager.py b/olive/passes/onnx/model_packager.py
index e33d300a45..9911c410f5 100644
--- a/olive/passes/onnx/model_packager.py
+++ b/olive/passes/onnx/model_packager.py
@@ -18,19 +18,19 @@
 
 
 class ModelPackager(Pass):
-    """Generate a manifest.json metadata file for multi-target EP context binaries.
+    """Generate an ORT model package with manifest.json and per-component metadata.json.
 
     This pass takes a MultiTargetModelHandler (produced by EPContextBinaryGenerator with
-    a list of provider_options) and generates a manifest.json file describing each target's
-    context binary with metadata required by ONNX Runtime.
-
-    The manifest includes:
-    - ep: execution provider name
-    - device_type: CPU, NPU, or GPU
-    - architecture: hardware architecture (e.g., SoC model)
-    - precision: model precision (from model_attributes)
-    - sdk_version: optional SDK version
-    - compile_options: optional compilation options
+    a list of provider_options) and generates a model package following the ORT spec:
+
+    - manifest.json at package root with component_models and model_variants
+    - metadata.json per component model directory with variant descriptors
+
+    Variant constraints include:
+    - ep (required): execution provider name
+    - device (optional): target device type (cpu, gpu, npu)
+    - architecture (optional): hardware architecture hint
+    - ep_compatibility_info (optional): EP-specific compatibility string
     """
 
     _accepts_composite_model = True
@@ -44,16 +44,6 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassCon
                 default_value=None,
                 description="Model name for the manifest. If not set, derived from the output directory name.",
             ),
-            "sdk_version": PassConfigParam(
-                type_=str,
-                default_value=None,
-                description="SDK version string (e.g., 'qnn_sdk_2.28').",
-            ),
-            "compile_options": PassConfigParam(
-                type_=dict,
-                default_value=None,
-                description="Additional compile options to include in the manifest (e.g., dynamic shape, batch size).",
-            ),
         }
 
     @staticmethod
@@ -71,57 +61,57 @@ def _run_for_config(
         output_dir = Path(output_model_path).with_suffix("")
         output_dir.mkdir(parents=True, exist_ok=True)
 
-        # Derive model name from config or output directory
         model_name = config.model_name or output_dir.name
 
-        manifest = {"name": model_name, "components": []}
+        # Build model_variants dict and copy files into component directory
+        component_dir = output_dir / model_name
+        component_dir.mkdir(parents=True, exist_ok=True)
 
+        model_variants = {}
         for target_name, target_model in model.get_target_models():
             target_attrs = target_model.model_attributes or {}
 
-            # Copy target model files to output directory
-            self._copy_target_model(target_name, target_model, output_dir)
-
-            # Determine the model path relative to output directory
-            model_path = self._get_relative_model_path(target_name, target_model)
-
-            entry = {
-                "variant_name": target_name,
-                "file": model_path,
-                "constraints": {
-                    "ep": self.accelerator_spec.execution_provider,
-                    "device": target_attrs.get("device", str(self.accelerator_spec.accelerator_type).upper()),
-                    "architecture": target_attrs.get("architecture", target_name),
-                },
-            }
-
-            # Add precision from model_attributes if available
-            precision = target_attrs.get("precision")
-            if precision:
-                entry["constraints"]["precision"] = precision
-
-            # Add sdk_version from model_attributes or config
-            sdk_version = target_attrs.get("sdk_version") or config.sdk_version
-            if sdk_version:
-                entry["constraints"]["sdk_version"] = sdk_version
-            if config.compile_options:
-                entry["constraints"]["compile_options"] = config.compile_options
-
-            manifest["components"].append(entry)
-
-        # Write manifest.json
+            self._copy_target_model(target_name, target_model, component_dir)
+
+            file_path = self._get_relative_model_path(target_name, target_model)
+
+            constraints = {"ep": self.accelerator_spec.execution_provider}
+            device = target_attrs.get("device")
+            if device:
+                constraints["device"] = device
+            architecture = target_attrs.get("architecture")
+            if architecture:
+                constraints["architecture"] = architecture
+            ep_compat = target_attrs.get("ep_compatibility_info")
+            if ep_compat:
+                constraints["ep_compatibility_info"] = ep_compat
+
+            model_variants[target_name] = {"file": file_path, "constraints": constraints}
+
+        # Write metadata.json in the component directory
+        metadata = {"name": model_name, "model_variants": model_variants}
+        metadata_path = component_dir / "metadata.json"
+        with open(metadata_path, "w") as f:
+            json.dump(metadata, f, indent=2)
+        logger.info("Generated metadata at %s", metadata_path)
+
+        # Write manifest.json at package root
+        manifest = {
+            "name": model_name,
+            "component_models": {
+                model_name: {"model_variants": model_variants},
+            },
+        }
         manifest_path = output_dir / "manifest.json"
         with open(manifest_path, "w") as f:
             json.dump(manifest, f, indent=2)
         logger.info("Generated manifest at %s", manifest_path)
 
-        # Update model_attributes to include manifest path
-        # Remove additional_files since each target subfolder already contains its own tokenizer/config files
+        # Update model_attributes
         new_model_attributes = model.model_attributes or {}
         new_model_attributes = {**new_model_attributes, "manifest_path": str(manifest_path)}
         new_model_attributes.pop("additional_files", None)
 
-        # Return the same MultiTargetModelHandler with updated attributes and path
         return MultiTargetModelHandler(
             [target_model for _, target_model in model.get_target_models()],
             [target_name for target_name, _ in model.get_target_models()],
diff --git a/test/cli/test_model_package.py b/test/cli/test_model_package.py
index 9d181d9cfb..b074f660dc 100644
--- a/test/cli/test_model_package.py
+++ b/test/cli/test_model_package.py
@@ -84,21 +84,24 @@ def test_merge_two_targets(self, tmp_path):
         with open(manifest_path) as f:
             manifest = json.load(f)
 
-        assert len(manifest["components"]) == 2
         assert manifest["name"] == "output"
-        assert manifest["components"][0]["variant_name"] == "soc_60"
-        assert manifest["components"][0]["file"] == str(soc_60)
-        assert manifest["components"][0]["constraints"]["ep"] == "QNNExecutionProvider"
-        assert manifest["components"][0]["constraints"]["device"] == "NPU"
-        assert manifest["components"][0]["constraints"]["architecture"] == "60"
-        assert manifest["components"][0]["constraints"]["precision"] == "int4"
-        assert manifest["components"][1]["variant_name"] == "soc_73"
-        assert manifest["components"][1]["file"] == str(soc_73)
-        assert manifest["components"][1]["constraints"]["architecture"] == "73"
-
-        # Check files were copied
-        assert (output_dir / "soc_60" / "model_ctx.onnx").exists()
-        assert (output_dir / "soc_73" / "model_ctx.onnx").exists()
+        assert "output" in manifest["component_models"]
+        variants = manifest["component_models"]["output"]["model_variants"]
+        assert "soc_60" in variants
+        assert "soc_73" in variants
+        assert variants["soc_60"]["file"] == str(soc_60)
+        assert variants["soc_60"]["constraints"]["ep"] == "QNNExecutionProvider"
+        assert variants["soc_60"]["constraints"]["device"] == "NPU"
+        assert variants["soc_60"]["constraints"]["architecture"] == "60"
+        assert variants["soc_73"]["constraints"]["architecture"] == "73"
+
+        # Check metadata.json in component directory
+        metadata_path = output_dir / "output" / "metadata.json"
+        assert metadata_path.exists()
+
+        # Check files were copied into component dir
+        assert (output_dir / "output" / "soc_60" / "model_ctx.onnx").exists()
+        assert (output_dir / "output" / "soc_73" / "model_ctx.onnx").exists()
 
     def test_merge_infer_name_from_dir(self, tmp_path):
         """Test that target name is inferred from directory name when not specified."""
@@ -129,8 +132,9 @@ def test_merge_infer_name_from_dir(self, tmp_path):
         with open(output_dir / "manifest.json") as f:
             manifest = json.load(f)
 
-        assert manifest["components"][0]["file"] == str(soc_60)
-        assert manifest["components"][1]["file"] == str(soc_73)
+        variants = manifest["component_models"]["output"]["model_variants"]
+        assert variants["soc_60"]["file"] == str(soc_60)
+        assert variants["soc_73"]["file"] == str(soc_73)
 
     def test_merge_openvino_targets(self, tmp_path):
         """Test merging OpenVINO context binary outputs."""
@@ -171,10 +175,11 @@ def test_merge_openvino_targets(self, tmp_path):
         with open(output_dir / "manifest.json") as f:
             manifest = json.load(f)
 
-        assert len(manifest["components"]) == 2
-        assert manifest["components"][0]["constraints"]["ep"] == "OpenVINOExecutionProvider"
-        assert manifest["components"][0]["constraints"]["sdk_version"] == "2025.1"
-        assert manifest["components"][1]["constraints"]["sdk_version"] == "2025.2"
+        variants = manifest["component_models"]["output"]["model_variants"]
+        assert len(variants) == 2
+        assert variants["ov_2025.1"]["constraints"]["ep"] == "OpenVINOExecutionProvider"
+        assert variants["ov_2025.1"]["constraints"]["device"] == "NPU"
+        assert variants["ov_2025.2"]["constraints"]["device"] == "NPU"
 
     def test_merge_rejects_single_source(self, tmp_path):
         """Test that merging with a single source raises an error."""
@@ -269,7 +274,8 @@ def test_merge_optional_fields_omitted(self, tmp_path):
         with open(output_dir / "manifest.json") as f:
             manifest = json.load(f)
 
-        # precision, sdk_version, architecture should not be present
-        assert "precision" not in manifest["components"][0]["constraints"]
-        assert "sdk_version" not in manifest["components"][0]["constraints"]
-        assert "architecture" not in manifest["components"][0]["constraints"]
+        variants = manifest["component_models"]["output"]["model_variants"]
+        for v in variants.values():
+            # architecture, ep_compatibility_info should not be present
+            assert "architecture" not in v["constraints"]
+            assert "ep_compatibility_info" not in v["constraints"]
diff --git a/test/passes/onnx/test_multi_target_context_binary.py b/test/passes/onnx/test_multi_target_context_binary.py
index 5620835bbb..afee2160ba 100644
--- a/test/passes/onnx/test_multi_target_context_binary.py
+++ b/test/passes/onnx/test_multi_target_context_binary.py
@@ -52,8 +52,8 @@ def test_packager_generates_manifest(self, tmp_path):
         mt = _make_multi_target(
             tmp_path,
             [
-                ("soc_60", {"architecture": "60", "precision": "int4"}),
-                ("soc_73", {"architecture": "73", "precision": "int4"}),
+                ("soc_60", {"architecture": "60", "device": "NPU"}),
+                ("soc_73", {"architecture": "73", "device": "NPU"}),
             ],
         )
 
@@ -61,73 +61,53 @@ def test_packager_generates_manifest(self, tmp_path):
         output_path = str(tmp_path / "output.onnx")
         result = p.run(mt, output_path)
 
-        # Result is still a MultiTargetModelHandler
         assert isinstance(result, MultiTargetModelHandler)
 
-        # manifest.json exists
+        # manifest.json at package root
         manifest_path = tmp_path / "output" / "manifest.json"
         assert manifest_path.exists()
 
         with open(manifest_path) as f:
             manifest = json.load(f)
 
-        assert len(manifest["components"]) == 2
-        assert manifest["components"][0]["variant_name"] == "soc_60"
-        assert manifest["components"][0]["constraints"]["architecture"] == "60"
-        assert manifest["components"][0]["constraints"]["precision"] == "int4"
-        assert manifest["components"][1]["variant_name"] == "soc_73"
+        assert manifest["name"] == "output"
+        assert "output" in manifest["component_models"]
+        variants = manifest["component_models"]["output"]["model_variants"]
+        assert "soc_60" in variants
+        assert "soc_73" in variants
+        assert variants["soc_60"]["constraints"]["architecture"] == "60"
+        assert variants["soc_73"]["constraints"]["architecture"] == "73"
+        assert variants["soc_60"]["constraints"]["ep"] == "QNNExecutionProvider"
 
-    def test_packager_with_sdk_version(self, tmp_path):
-        mt = _make_multi_target(
-            tmp_path,
-            [
-                ("soc_60", {"architecture": "60", "sdk_version": "qnn_2.28"}),
-                ("soc_73", {"architecture": "73", "sdk_version": "qnn_2.28"}),
-            ],
-        )
+        # metadata.json in component directory
+        metadata_path = tmp_path / "output" / "output" / "metadata.json"
+        assert metadata_path.exists()
 
-        p = self._create_packager()
-        output_path = str(tmp_path / "output.onnx")
-        p.run(mt, output_path)
-
-        manifest_path = tmp_path / "output" / "manifest.json"
-        with open(manifest_path) as f:
-            manifest = json.load(f)
+        with open(metadata_path) as f:
+            metadata = json.load(f)
 
-        assert manifest["components"][0]["constraints"]["sdk_version"] == "qnn_2.28"
+        assert metadata["name"] == "output"
+        assert metadata["model_variants"] == variants
 
-    def test_packager_sdk_version_from_config(self, tmp_path):
-        """sdk_version from pass config is used when model_attributes doesn't have it."""
+    def test_packager_ep_compatibility_info(self, tmp_path):
         mt = _make_multi_target(
             tmp_path,
-            [("soc_60", {"architecture": "60"}), ("soc_73", {"architecture": "73"})],
-        )
-
-        p = self._create_packager(config={"sdk_version": "qnn_2.30"})
-        output_path = str(tmp_path / "output.onnx")
-        p.run(mt, output_path)
-
-        manifest_path = tmp_path / "output" / "manifest.json"
-        with open(manifest_path) as f:
-            manifest = json.load(f)
-
-        assert manifest["components"][0]["constraints"]["sdk_version"] == "qnn_2.30"
-
-    def test_packager_compile_options(self, tmp_path):
-        mt = _make_multi_target(
-            tmp_path,
-            [("soc_60", {"architecture": "60"}), ("soc_73", {"architecture": "73"})],
+            [
+                ("soc_60", {"architecture": "60", "ep_compatibility_info": "device=npu;soc=60"}),
+                ("soc_73", {"architecture": "73", "ep_compatibility_info": "device=npu;soc=73"}),
+            ],
         )
 
-        p = self._create_packager(config={"compile_options": {"dynamic_shape": True}})
+        p = self._create_packager()
         output_path = str(tmp_path / "output.onnx")
         p.run(mt, output_path)
 
-        manifest_path = tmp_path / "output" / "manifest.json"
-        with open(manifest_path) as f:
+        with open(tmp_path / "output" / "manifest.json") as f:
             manifest = json.load(f)
 
-        assert manifest["components"][0]["constraints"]["compile_options"] == {"dynamic_shape": True}
+        variants = manifest["component_models"]["output"]["model_variants"]
+        assert variants["soc_60"]["constraints"]["ep_compatibility_info"] == "device=npu;soc=60"
+        assert variants["soc_73"]["constraints"]["ep_compatibility_info"] == "device=npu;soc=73"
 
     def test_packager_custom_model_name(self, tmp_path):
         mt = _make_multi_target(
@@ -139,11 +119,16 @@ def test_packager_custom_model_name(self, tmp_path):
         output_path = str(tmp_path / "output.onnx")
         p.run(mt, output_path)
 
-        manifest_path = tmp_path / "output" / "manifest.json"
-        with open(manifest_path) as f:
+        with open(tmp_path / "output" / "manifest.json") as f:
             manifest = json.load(f)
 
         assert manifest["name"] == "my_model"
+        assert "my_model" in manifest["component_models"]
+
+        # metadata.json under my_model/
+        with open(tmp_path / "output" / "my_model" / "metadata.json") as f:
+            metadata = json.load(f)
+        assert metadata["name"] == "my_model"
 
     def test_packager_rejects_non_multi_target(self, tmp_path):
         handler = _make_onnx_handler(tmp_path, "single")
@@ -162,9 +147,9 @@ def test_packager_copies_files(self, tmp_path):
         output_path = str(tmp_path / "output.onnx")
         p.run(mt, output_path)
 
-        # Check files were copied
-        assert (tmp_path / "output" / "soc_60").is_dir()
-        assert (tmp_path / "output" / "soc_73").is_dir()
+        # Files are under output/<model_name>/<variant>/
+        assert (tmp_path / "output" / "output" / "soc_60").is_dir()
+        assert (tmp_path / "output" / "output" / "soc_73").is_dir()
 
     def test_packager_default_model_name_from_dir(self, tmp_path):
         mt = _make_multi_target(
@@ -181,22 +166,7 @@ def test_packager_default_model_name_from_dir(self, tmp_path):
 
         assert manifest["name"] == "my_package"
 
-    def test_packager_device_fallback_from_accelerator(self, tmp_path):
-        mt = _make_multi_target(
-            tmp_path,
-            [("t1", {"architecture": "a"}), ("t2", {"architecture": "b"})],
-        )
-
-        p = self._create_packager(device="NPU")
-        output_path = str(tmp_path / "output.onnx")
-        p.run(mt, output_path)
-
-        with open(tmp_path / "output" / "manifest.json") as f:
-            manifest = json.load(f)
-
-        assert manifest["components"][0]["constraints"]["device"] == "NPU"
-
-    def test_packager_device_from_target_device_attr(self, tmp_path):
+    def test_packager_device_only_when_present(self, tmp_path):
         mt = _make_multi_target(
             tmp_path,
             [("t1", {"architecture": "a", "device": "GPU"}), ("t2", {"architecture": "b"})],
@@ -209,29 +179,14 @@ def test_packager_device_from_target_device_attr(self, tmp_path):
         with open(tmp_path / "output" / "manifest.json") as f:
             manifest = json.load(f)
 
-        assert manifest["components"][0]["constraints"]["device"] == "GPU"
-        assert manifest["components"][1]["constraints"]["device"] == "NPU"
-
-    def test_packager_architecture_fallback_to_target_name(self, tmp_path):
-        mt = _make_multi_target(
-            tmp_path,
-            [("soc_60", {}), ("soc_73", {})],
-        )
-
-        p = self._create_packager()
-        output_path = str(tmp_path / "output.onnx")
-        p.run(mt, output_path)
-
-        with open(tmp_path / "output" / "manifest.json") as f:
-            manifest = json.load(f)
-
-        assert manifest["components"][0]["constraints"]["architecture"] == "soc_60"
-        assert manifest["components"][1]["constraints"]["architecture"] == "soc_73"
+        variants = manifest["component_models"]["output"]["model_variants"]
+        assert variants["t1"]["constraints"]["device"] == "GPU"
+        assert "device" not in variants["t2"]["constraints"]
 
-    def test_packager_precision_omitted_when_absent(self, tmp_path):
+    def test_packager_optional_fields_omitted_when_absent(self, tmp_path):
         mt = _make_multi_target(
             tmp_path,
-            [("t1", {"architecture": "a"}), ("t2", {"architecture": "b"})],
+            [("t1", {}), ("t2", {})],
         )
 
         p = self._create_packager()
@@ -241,8 +196,11 @@ def test_packager_precision_omitted_when_absent(self, tmp_path):
         with open(tmp_path / "output" / "manifest.json") as f:
             manifest = json.load(f)
 
-        assert "precision" not in manifest["components"][0]["constraints"]
-        assert "precision" not in manifest["components"][1]["constraints"]
+        variants = manifest["component_models"]["output"]["model_variants"]
+        for v in variants.values():
+            assert "device" not in v["constraints"]
+            assert "architecture" not in v["constraints"]
+            assert "ep_compatibility_info" not in v["constraints"]
 
     def test_packager_manifest_path_in_result_attributes(self, tmp_path):
         mt = _make_multi_target(
@@ -263,24 +221,23 @@ def test_packager_copy_skips_existing_dest(self, tmp_path):
             [("t1", {"architecture": "a"}), ("t2", {"architecture": "b"})],
         )
 
-        p = self._create_packager()
+        p = self._create_packager(config={"model_name": "mdl"})
         output_path = str(tmp_path / "output.onnx")
         output_dir = tmp_path / "output"
-        output_dir.mkdir(parents=True)
+        component_dir = output_dir / "mdl"
+        component_dir.mkdir(parents=True)
 
         # Pre-create dest with a marker file
-        (output_dir / "t1").mkdir()
-        (output_dir / "t1" / "marker.txt").write_text("pre-existing")
+        (component_dir / "t1").mkdir()
+        (component_dir / "t1" / "marker.txt").write_text("pre-existing")
 
         p.run(mt, output_path)
 
-        # marker.txt should still be there (not overwritten by copytree)
-        assert (output_dir / "t1" / "marker.txt").read_text() == "pre-existing"
+        assert (component_dir / "t1" / "marker.txt").read_text() == "pre-existing"
 
     def test_packager_with_composite_model_handler(self, tmp_path):
         from olive.model import CompositeModelHandler
 
-        # Create composite model targets
         comp_dir_1 = tmp_path / "comp1"
         comp_dir_1.mkdir()
         (comp_dir_1 / "model.onnx").write_text("dummy")
@@ -314,13 +271,13 @@ def test_packager_with_composite_model_handler(self, tmp_path):
         with open(tmp_path / "output" / "manifest.json") as f:
             manifest = json.load(f)
 
-        # CompositeModelHandler should use directory path (target_name/)
-        assert manifest["components"][0]["file"] == "soc_60/"
-        assert manifest["components"][1]["file"] == "soc_73/"
+        variants = manifest["component_models"]["output"]["model_variants"]
+        assert variants["soc_60"]["file"] == "soc_60/"
+        assert variants["soc_73"]["file"] == "soc_73/"
 
-        # Files should be copied
-        assert (tmp_path / "output" / "soc_60" / "model.onnx").exists()
-        assert (tmp_path / "output" / "soc_73" / "model.onnx").exists()
+        # Files under component dir
+        assert (tmp_path / "output" / "output" / "soc_60" / "model.onnx").exists()
+        assert (tmp_path / "output" / "output" / "soc_73" / "model.onnx").exists()
 
         assert isinstance(result, MultiTargetModelHandler)
 
@@ -329,7 +286,6 @@ def test_packager_onnx_model_uses_filename_in_file_field(self, tmp_path):
             tmp_path,
             [("soc_60", {"architecture": "60"})],
         )
-        # Add a second target to satisfy multi-target requirement
         h2 = _make_onnx_handler(tmp_path, name="soc_73", model_attributes={"architecture": "73"})
         mt = MultiTargetModelHandler(
             [next(t for _, t in mt.get_target_models()), h2],
@@ -344,30 +300,9 @@ def test_packager_onnx_model_uses_filename_in_file_field(self, tmp_path):
         with open(tmp_path / "output" / "manifest.json") as f:
             manifest = json.load(f)
 
-        # ONNXModelHandler should include the filename
-        assert manifest["components"][0]["file"] == "soc_60/soc_60.onnx"
-        assert manifest["components"][1]["file"] == "soc_73/soc_73.onnx"
-
-    def test_packager_sdk_version_attr_takes_precedence_over_config(self, tmp_path):
-        mt = _make_multi_target(
-            tmp_path,
-            [
-                ("t1", {"architecture": "a", "sdk_version": "from_attrs"}),
-                ("t2", {"architecture": "b"}),
-            ],
-        )
-
-        p = self._create_packager(config={"sdk_version": "from_config"})
-        output_path = str(tmp_path / "output.onnx")
-        p.run(mt, output_path)
-
-        with open(tmp_path / "output" / "manifest.json") as f:
-            manifest = json.load(f)
-
-        # t1 has sdk_version in attrs → use that
-        assert manifest["components"][0]["constraints"]["sdk_version"] == "from_attrs"
-        # t2 has no sdk_version in attrs → fall back to config
-        assert manifest["components"][1]["constraints"]["sdk_version"] == "from_config"
+        variants = manifest["component_models"]["output"]["model_variants"]
+        assert variants["soc_60"]["file"] == "soc_60/soc_60.onnx"
+        assert variants["soc_73"]["file"] == "soc_73/soc_73.onnx"
 
 
 # ===========================================================================