Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 51 additions & 5 deletions olive/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,14 +384,60 @@ def save_model(
):
"""Save a model from the cache to a given path."""
output_dir = Path(output_dir) if output_dir else Path.cwd()

# If output_dir has a suffix (like .onnx), it's a file path
# Use parent directory for saving files
actual_output_dir = output_dir.parent if output_dir.suffix else output_dir
if output_dir.suffix and not output_dir.is_dir():
actual_output_dir = output_dir.parent
else:
actual_output_dir = output_dir
actual_output_dir.mkdir(parents=True, exist_ok=True)

model_json = self.load_model(model_id)
if model_json["type"].lower() == "compositemodel":
if model_json["type"].lower() == "multitargetmodel":
model_json_config = model_json["config"]
source_path = Path(model_json_config["model_path"])
actual_output_dir.mkdir(parents=True, exist_ok=True)

if source_path.exists():
# Only copy target subdirectories (soc_60/, soc_73/, etc.) and manifest.json.
# Skip top-level additional_files (tokenizer, config) since each target subdir has its own copy.
for item in source_path.iterdir():
dest = actual_output_dir / item.name
if item.is_dir():
shutil.copytree(str(item), str(dest), dirs_exist_ok=overwrite)
elif item.name == "manifest.json":
shutil.copy2(str(item), str(dest))

# Update paths to point to new location
model_json_config["model_path"] = str(actual_output_dir)

# Update target model paths
for target_model in model_json_config.get("target_models", []):
target_config = target_model.get("config", {})
old_model_path = target_config.get("model_path", "")
if old_model_path and str(source_path) in old_model_path:
target_config["model_path"] = old_model_path.replace(str(source_path), str(actual_output_dir))

# Clear additional_files since each target subdir has its own copies
model_attributes = model_json_config.get("model_attributes") or {}
model_attributes.pop("additional_files", None)

# Update manifest_path
if model_attributes.get("manifest_path"):
model_attributes["manifest_path"] = str(
actual_output_dir / Path(model_attributes["manifest_path"]).name
)

# Update manifest name: if pass config set model_name explicitly, keep it;
# otherwise update to the output directory name (e.g., "qwen_2.5_1.5b_Instruct")
manifest_file = actual_output_dir / "manifest.json"
if manifest_file.exists():
manifest = json.loads(manifest_file.read_text())
# The pass defaults model_name to the cache dir name (not meaningful).
# Replace it with the final output directory name unless it was explicitly configured.
source_dir_name = source_path.name if source_path else None
if not manifest.get("name") or manifest.get("name") == source_dir_name:
manifest["name"] = actual_output_dir.name
manifest_file.write_text(json.dumps(manifest, indent=2))
elif model_json["type"].lower() == "compositemodel":
model_json_config = model_json["config"]
model_attributes = model_json_config.get("model_attributes") or {}

Expand Down
2 changes: 2 additions & 0 deletions olive/cli/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from olive.cli.finetune import FineTuneCommand
from olive.cli.generate_adapter import GenerateAdapterCommand
from olive.cli.generate_cost_model import GenerateCostModelCommand
from olive.cli.model_package import ModelPackageCommand
from olive.cli.optimize import OptimizeCommand
from olive.cli.quantize import QuantizeCommand
from olive.cli.run import WorkflowRunCommand
Expand Down Expand Up @@ -52,6 +53,7 @@ def get_cli_parser(called_as_console_script: bool = True) -> ArgumentParser:
ConfigureQualcommSDKCommand.register_subcommand(commands_parser)
SharedCacheCommand.register_subcommand(commands_parser)
ExtractAdaptersCommand.register_subcommand(commands_parser)
ModelPackageCommand.register_subcommand(commands_parser)
BenchmarkCommand.register_subcommand(commands_parser)

return parser
Expand Down
130 changes: 130 additions & 0 deletions olive/cli/model_package.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
import json
import logging
from argparse import ArgumentParser
from pathlib import Path

from olive.cli.base import BaseOliveCLICommand, add_logging_options, add_telemetry_options
from olive.common.utils import hardlink_copy_dir
from olive.telemetry import action

logger = logging.getLogger(__name__)


@action
class ModelPackageCommand(BaseOliveCLICommand):
"""Merge multiple single-target context binary outputs into a multi-target package with manifest.json."""

@staticmethod
def register_subcommand(parser: ArgumentParser):
sub_parser = parser.add_parser(
"model-package",
help="Merge multiple context binary outputs into a multi-target package with manifest.json",
)

sub_parser.add_argument(
"-s",
"--source",
type=str,
action="append",
required=True,
help=("Source context binary output directory. Can be specified multiple times. "),
)

sub_parser.add_argument(
"-o",
"--output_path",
type=str,
required=True,
help="Output directory for the merged multi-target package.",
)

sub_parser.add_argument(
"--model_name",
type=str,
default=None,
help="Model name for the manifest. If not set, derived from the output directory name.",
)

add_logging_options(sub_parser)
add_telemetry_options(sub_parser)
sub_parser.set_defaults(func=ModelPackageCommand)

def run(self):
sources = self._parse_sources()
output_dir = Path(self.args.output_path)
output_dir.mkdir(parents=True, exist_ok=True)

model_name = self.args.model_name or output_dir.name

# Create component model directory
component_dir = output_dir / model_name
component_dir.mkdir(parents=True, exist_ok=True)

model_variants = {}
for target_name, source_path in sources:
model_config = self._read_model_config(source_path)
model_attrs = model_config.get("config", {}).get("model_attributes") or {}

# Copy source directory into component_dir/{target_name}/
target_dir = component_dir / target_name
hardlink_copy_dir(source_path, target_dir)

constraints = {}
for key in ("ep", "device", "architecture", "ep_compatibility_info"):
if model_attrs.get(key) is not None:
constraints[key] = model_attrs[key]

model_variants[target_name] = {
"file": model_config.get("config", {}).get("model_path", f"{target_name}/"),
"constraints": constraints,
}

# Write metadata.json in component directory
metadata = {"name": model_name, "model_variants": model_variants}
with open(component_dir / "metadata.json", "w") as f:
json.dump(metadata, f, indent=2)

# Write manifest.json at package root
manifest = {
"name": model_name,
"component_models": {
model_name: {"model_variants": model_variants},
},
}
manifest_path = output_dir / "manifest.json"
with open(manifest_path, "w") as f:
json.dump(manifest, f, indent=2)

print(f"Merged {len(sources)} targets into {output_dir}")
print(f"Manifest written to {manifest_path}")

def _parse_sources(self) -> list[tuple[str, Path]]:
sources = []
for source in self.args.source:
path = Path(source)
if not path.is_dir():
raise ValueError(f"Source path does not exist or is not a directory: {path}")

if not (path / "model_config.json").exists():
raise ValueError(
f"No model_config.json found in {path}. "
"Source must be an Olive output directory with model_config.json."
)

sources.append((path.name, path))

if len(sources) < 2:
raise ValueError("At least two --source directories are required to merge.")

return sources

@staticmethod
def _read_model_config(source_path: Path) -> dict:
"""Read and return model_config.json from a source directory."""
config_path = source_path / "model_config.json"
with open(config_path) as f:
return json.load(f)
2 changes: 1 addition & 1 deletion olive/cli/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,7 @@ def _get_matmul_nbits_to_qdq_pass_config(self) -> dict[str, Any]:
"add_zero_point": "true",
"save_as_external_data": "true",
}
config["nodes_to_exclude"] = ["/lm_head/MatMul_Q4"]
config["nodes_to_exclude"] = ["/lm_head/MatMulNBits"]
if precision.value == Precision.INT4:
config["use_int4"] = "true"
return config
Expand Down
17 changes: 7 additions & 10 deletions olive/engine/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,15 +195,14 @@ def run(
self.initialize(log_to_file, log_severity_level)

output_dir: Path = (Path(output_dir) if output_dir else Path.cwd()).resolve()
if output_dir.suffix:
# Treat as file path only if it has a suffix and is not an existing directory
is_file_path = output_dir.suffix and not output_dir.is_dir()
if is_file_path:
output_dir.parent.mkdir(parents=True, exist_ok=True)
artifacts_dir = output_dir.parent
else:
output_dir.mkdir(parents=True, exist_ok=True)

# Determine the directory for artifacts (run_history, etc.)
# If output_dir is a file path (has suffix), use parent directory
# Otherwise use output_dir itself
artifacts_dir = output_dir.parent if output_dir.suffix else output_dir
artifacts_dir = output_dir

logger.info("Running Olive on accelerator: %s", accelerator_spec)
with self._create_system():
Expand Down Expand Up @@ -254,10 +253,8 @@ def run_accelerator(

self.footprint.record(is_input_model=True, model_id=input_model_id)

# Determine the directory for artifacts
# If output_dir is a file path (has suffix like .onnx), use parent directory
# Otherwise use output_dir itself
artifacts_dir = output_dir.parent if output_dir.suffix else output_dir
# Artifacts directory: file path (has suffix, not existing dir) uses parent
artifacts_dir = output_dir.parent if (output_dir.suffix and not output_dir.is_dir()) else output_dir

try:
if evaluate_input_model and not self.evaluator_config:
Expand Down
2 changes: 2 additions & 0 deletions olive/model/handler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from olive.model.handler.composite import CompositeModelHandler
from olive.model.handler.diffusers import DiffusersModelHandler
from olive.model.handler.hf import DistributedHfModelHandler, HfModelHandler
from olive.model.handler.multi_target import MultiTargetModelHandler
from olive.model.handler.onnx import DistributedOnnxModelHandler, ONNXModelHandler
from olive.model.handler.openvino import OpenVINOModelHandler
from olive.model.handler.pytorch import PyTorchModelHandler
Expand All @@ -18,6 +19,7 @@
"DistributedHfModelHandler",
"DistributedOnnxModelHandler",
"HfModelHandler",
"MultiTargetModelHandler",
"ONNXModelHandler",
"OliveModelHandler",
"OpenVINOModelHandler",
Expand Down
102 changes: 102 additions & 0 deletions olive/model/handler/multi_target.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
import logging
from collections.abc import Iterator
from typing import Any, Optional, Union

from olive.common.config_utils import serialize_to_json, validate_config
from olive.common.utils import dict_diff
from olive.constants import Framework, ModelFileFormat
from olive.hardware.accelerator import Device
from olive.model.config.model_config import ModelConfig
from olive.model.config.registry import model_handler_registry
from olive.model.handler.base import OliveModelHandler
from olive.resource_path import OLIVE_RESOURCE_ANNOTATIONS

logger = logging.getLogger(__name__)


@model_handler_registry("MultiTargetModel")
class MultiTargetModelHandler(OliveModelHandler):
"""MultiTargetModel represents the same model compiled for multiple hardware targets.

Unlike CompositeModelHandler which holds different component models (e.g., split parts of a pipeline),
MultiTargetModelHandler holds the same logical model compiled for different hardware targets
(e.g., different SoC models for QNN).

When a pass encounters a MultiTargetModelHandler, it runs independently on each target model,
preserving the multi-target structure through the pipeline.
"""

resource_keys: tuple[str, ...] = ("model_path",)
json_config_keys: tuple[str, ...] = ("target_names",)

def __init__(
self,
target_models: list[Union[OliveModelHandler, dict[str, Any]]],
target_names: list[str],
model_path: OLIVE_RESOURCE_ANNOTATIONS = None,
model_attributes: Optional[dict[str, Any]] = None,
):
super().__init__(
model_path=model_path,
framework=Framework.ONNX,
model_file_format=ModelFileFormat.COMPOSITE_MODEL,
model_attributes=model_attributes,
)
self._target_models = [
validate_config(m, ModelConfig).create_model() if isinstance(m, dict) else m for m in target_models
]
assert all(isinstance(m, OliveModelHandler) for m in self._target_models), (
"All target models must be OliveModelHandler or dict"
)
assert len(self._target_models) == len(target_names), "Number of target models and names must match"
self.target_names = target_names

@property
def target_models(self):
for m in self._target_models:
m.model_attributes = {**(self.model_attributes or {}), **(m.model_attributes or {})}
yield m

def to_json(self, check_object: bool = False):
json_dict = super().to_json(check_object)
json_dict["config"]["target_models"] = []
for m in self._target_models:
target_json = m.to_json(check_object)
target_json["config"]["model_attributes"] = dict_diff(
target_json["config"]["model_attributes"], self.model_attributes
)
json_dict["config"]["target_models"].append(target_json)
return serialize_to_json(json_dict, check_object)

def get_target_models(self) -> Iterator[tuple[str, OliveModelHandler]]:
"""Iterate over (target_name, target_model) pairs."""
return zip(self.target_names, self.target_models)

def load_model(self, rank: int = None, cache_model: bool = True):
raise NotImplementedError

@property
def size_on_disk(self) -> int:
"""Compute size of the model on disk."""
raise NotImplementedError

def prepare_session(
self,
inference_settings: Optional[dict[str, Any]] = None,
device: Device = Device.CPU,
execution_providers: Union[str, list[str]] = None,
rank: Optional[int] = None,
):
raise RuntimeError("MultiTargetModelHandler doesn't have a session of its own")

def run_session(
self,
session: Any = None,
inputs: Union[dict[str, Any], list[Any], tuple[Any, ...]] = None,
**kwargs: dict[str, Any],
) -> Any:
raise RuntimeError("MultiTargetModelHandler doesn't have a session of its own")
9 changes: 9 additions & 0 deletions olive/olive_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,15 @@
"supported_quantization_encodings": [ ],
"run_on_target": true
},
"ModelPackager": {
"module_path": "olive.passes.onnx.model_packager.ModelPackager",
"supported_providers": [ "QNNExecutionProvider", "OpenVINOExecutionProvider" ],
"supported_accelerators": [ "npu", "gpu", "cpu" ],
"supported_precisions": [ "*" ],
"supported_algorithms": [ ],
"supported_quantization_encodings": [ ],
"run_on_target": true
},
"ExtractAdapters": {
"module_path": "olive.passes.onnx.extract_adapters.ExtractAdapters",
"supported_providers": [ "*" ],
Expand Down
Loading
Loading