Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions ads/aqua/common/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ class AquaMultiModelRef(Serializable):
Number of GPUs required for deployment.
env_var : Optional[Dict[str, Any]]
Optional environment variables to override during deployment.
artifact_location : Optional[str]
Artifact path of model in the multimodel group.
"""

model_id: str = Field(..., description="The model OCID to deploy.")
Expand All @@ -163,6 +165,9 @@ class AquaMultiModelRef(Serializable):
env_var: Optional[dict] = Field(
default_factory=dict, description="The environment variables of the model."
)
artifact_location: Optional[str] = Field(
None, description="Artifact path of model in the multimodel group."
)

class Config:
extra = "ignore"
Expand Down
36 changes: 34 additions & 2 deletions ads/aqua/evaluation/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from ads.aqua import logger
from ads.aqua.app import AquaApp
from ads.aqua.common import utils
from ads.aqua.common.entities import AquaMultiModelRef
from ads.aqua.common.enums import (
DataScienceResource,
Resource,
Expand Down Expand Up @@ -97,6 +98,7 @@
from ads.model.model_metadata import (
MetadataTaxonomyKeys,
ModelCustomMetadata,
ModelCustomMetadataItem,
ModelProvenanceMetadata,
ModelTaxonomyMetadata,
)
Expand Down Expand Up @@ -578,6 +580,7 @@ def validate_model_name(

This function verifies that:
- The model group is not empty.
- The model multi metadata is present in the DataScienceModel metadata.
- The user provided a non-empty model name.
- The provided model name exists in the DataScienceModel metadata.
- The deployment configuration contains core metadata required for validation.
Expand Down Expand Up @@ -636,10 +639,39 @@ def validate_model_name(
f"'{create_aqua_evaluation_details.evaluation_source_id}' does not contain any information about deployed models."
)

multi_model_metadata_value = custom_metadata_list.get(
ModelCustomMetadataFields.MULTIMODEL_METADATA,
ModelCustomMetadataItem(key=ModelCustomMetadataFields.MULTIMODEL_METADATA),
).value

if not multi_model_metadata_value:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

error_message = (
        f"Required model metadata is missing for evaluation source ID: {create_aqua_evaluation_details.evaluation_source_id}. "
        f"A valid multi-model deployment requires {ModelCustomMetadataFields.MULTIMODEL_METADATA}. "
        "Please recreate the model deployment and retry the evaluation, as an issue occurred during the initialization of the model group."
    )
    logger.debug(error_message)
    raise AquaRuntimeError(error_message)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

error_message = (
f"Required model metadata is missing for evaluation source ID: {evaluation_source.id}. "
f"A valid multi-model deployment requires {ModelCustomMetadataFields.MULTIMODEL_METADATA}. "
"Please recreate the model deployment and retry the evaluation, as an issue occurred during the initialization of the model group."
)
logger.debug(error_message)
raise AquaRuntimeError(error_message)

try:
multi_model_metadata = json.loads(
evaluation_source.dsc_model.get_custom_metadata_artifact(
metadata_key_name=ModelCustomMetadataFields.MULTIMODEL_METADATA
).decode("utf-8")
)
except Exception as ex:
error_message = (
f"Error fetching {ModelCustomMetadataFields.MULTIMODEL_METADATA} "
f"from custom metadata for evaluation source ID '{evaluation_source.id}'. "
f"Details: {ex}"
)
logger.error(error_message)
raise AquaRuntimeError(error_message)

# Build the list of valid model names from custom metadata.
model_names = [
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should load metadata back to the AquaMultiModelRef

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

custom_metadata_list.get(f"model-name-{idx}").value
for idx in range(model_group_count)
AquaMultiModelRef(**metadata).model_name
for metadata in multi_model_metadata
]

# Check if the provided model name is among the valid names.
Expand Down
1 change: 1 addition & 0 deletions ads/aqua/model/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class ModelCustomMetadataFields(ExtendedEnum):
FINETUNE_CONTAINER = "finetune-container"
DEPLOYMENT_CONTAINER_URI = "deployment-container-uri"
MULTIMODEL_GROUP_COUNT = "model_group_count"
MULTIMODEL_METADATA = "multi_model_metadata"


class ModelTask(ExtendedEnum):
Expand Down
62 changes: 20 additions & 42 deletions ads/aqua/model/model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python
# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
import json
import os
import pathlib
from datetime import datetime, timedelta
Expand Down Expand Up @@ -93,6 +94,7 @@
TENANCY_OCID,
)
from ads.model import DataScienceModel
from ads.model.common.utils import MetadataArtifactPathType
from ads.model.model_metadata import (
MetadataCustomCategory,
ModelCustomMetadata,
Expand Down Expand Up @@ -279,9 +281,10 @@ def create_multi(
selected_models_deployment_containers = set()

# Process each model
for idx, model in enumerate(models):
for model in models:
source_model = DataScienceModel.from_id(model.model_id)
display_name = source_model.display_name
# Update model name in user's input model
model.model_name = model.model_name or display_name

# TODO Uncomment the section below, if only service models should be allowed for multi-model deployment
Expand Down Expand Up @@ -310,6 +313,9 @@ def create_multi(
"Please register the model first."
)

# Update model artifact location in user's input model
model.artifact_location = model_artifact_path

artifact_list.append(model_artifact_path)

# Validate deployment container consistency
Expand All @@ -328,47 +334,6 @@ def create_multi(

selected_models_deployment_containers.add(deployment_container)

# Add model-specific metadata
model_custom_metadata.add(
key=f"model-id-{idx}",
value=source_model.id,
description=f"ID of '{display_name}' in the multimodel group.",
category="Other",
)
model_custom_metadata.add(
key=f"model-name-{idx}",
value=display_name,
description=f"Name of '{display_name}' in the multimodel group.",
category="Other",
)
if model.gpu_count:
model_custom_metadata.add(
key=f"model-gpu-count-{idx}",
value=model.gpu_count,
description=f"GPU count of '{display_name}' in the multimodel group.",
category="Other",
)
user_params = (
" ".join(
f"{name} {value}" for name, value in model.env_var.items()
).strip()
if model.env_var
else UNKNOWN
)
if user_params:
model_custom_metadata.add(
key=f"model-user-params-{idx}",
value=user_params,
description=f"User params of '{display_name}' in the multimodel group.",
category="Other",
)
model_custom_metadata.add(
key=f"{ModelCustomMetadataFields.ARTIFACT_LOCATION}-{idx}",
value=model_artifact_path,
description=f"Artifact path for '{display_name}' in the multimodel group.",
category="Other",
)

# Check if the all models in the group shares same container family
if len(selected_models_deployment_containers) > 1:
raise AquaValueError(
Expand Down Expand Up @@ -430,6 +395,19 @@ def create_multi(
f"Aqua Model '{custom_model.id}' created with models: {', '.join(display_name_list)}."
)

# Create custom metadata for multi model metadata
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's add more debug statements here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

custom_model.create_custom_metadata_artifact(
metadata_key_name=ModelCustomMetadataFields.MULTIMODEL_METADATA,
artifact_path_or_content=json.dumps(
[model.model_dump() for model in models]
),
path_type=MetadataArtifactPathType.CONTENT,
)

logger.debug(
f"Multi model metadata uploaded for Aqua model: {custom_model.id}."
)

# Track telemetry event
self.telemetry.record_event_async(
category="aqua/multimodel",
Expand Down
51 changes: 19 additions & 32 deletions ads/aqua/modeldeployment/deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,7 @@ def _create_multi(

container_params = container_spec.get(ContainerSpec.CLI_PARM, UNKNOWN).strip()

for idx, model in enumerate(create_deployment_details.models):
for model in create_deployment_details.models:
user_params = build_params_string(model.env_var)
if user_params:
restricted_params = self._find_restricted_params(
Expand Down Expand Up @@ -588,22 +588,13 @@ def _create_multi(
params = f"{params} {get_combined_params(config_parameters, user_params)}".strip()
break

artifact_location_key = (
f"{ModelCustomMetadataFields.ARTIFACT_LOCATION}-{idx}"
)
artifact_path_prefix = aqua_model.custom_metadata_list.get(
artifact_location_key
).value.rstrip("/")
artifact_path_prefix = model.artifact_location.rstrip("/")
if ObjectStorageDetails.is_oci_path(artifact_path_prefix):
os_path = ObjectStorageDetails.from_path(artifact_path_prefix)
artifact_path_prefix = os_path.filepath.rstrip("/")

model_config.append({"params": params, "model_path": artifact_path_prefix})

model_name_key = f"model-name-{idx}"
model_name_list.append(
aqua_model.custom_metadata_list.get(model_name_key).value
)
model_name_list.append(model.model_name)

env_var.update({AQUA_MULTI_MODEL_CONFIG: json.dumps({"models": model_config})})

Expand Down Expand Up @@ -959,28 +950,24 @@ def get(self, model_deployment_id: str, **kwargs) -> "AquaDeploymentDetail":
)
aqua_model = DataScienceModel.from_id(aqua_model_id)
custom_metadata_list = aqua_model.custom_metadata_list
model_group_count = int(
custom_metadata_list.get(
ModelCustomMetadataFields.MULTIMODEL_GROUP_COUNT
).value
multi_model_metadata_value = custom_metadata_list.get(
ModelCustomMetadataFields.MULTIMODEL_METADATA,
ModelCustomMetadataItem(
key=ModelCustomMetadataFields.MULTIMODEL_METADATA
),
).value
if not multi_model_metadata_value:
raise AquaRuntimeError(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

raise AquaRuntimeError(
        f"Invalid multi-model deployment: {model_deployment_id}. "
        f"Ensure that the required custom metadata `{ModelCustomMetadataFields.MULTIMODEL_METADATA}` is added to the AQUA multi-model `{aqua_model.display_name}` ({aqua_model.id})."
    )

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

f"Invalid multi-model deployment: {model_deployment_id}. "
f"Ensure that the required custom metadata `{ModelCustomMetadataFields.MULTIMODEL_METADATA}` is added to the AQUA multi-model `{aqua_model.display_name}` ({aqua_model.id})."
)
multi_model_metadata = json.loads(
aqua_model.dsc_model.get_custom_metadata_artifact(
metadata_key_name=ModelCustomMetadataFields.MULTIMODEL_METADATA
).decode("utf-8")
)
aqua_deployment.models = [
AquaMultiModelRef(
model_id=custom_metadata_list.get(f"model-id-{idx}").value,
model_name=custom_metadata_list.get(f"model-name-{idx}").value,
gpu_count=custom_metadata_list.get(
f"model-gpu-count-{idx}",
ModelCustomMetadataItem(key=f"model-gpu-count-{idx}"),
).value,
env_var=get_params_dict(
custom_metadata_list.get(
f"model-user-params-{idx}",
ModelCustomMetadataItem(key=f"model-user-params-{idx}"),
).value
or UNKNOWN
),
)
for idx in range(model_group_count)
AquaMultiModelRef(**metadata) for metadata in multi_model_metadata
]

return AquaDeploymentDetail(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,66 +4,10 @@ spec:
compartmentId: ocid1.compartment.oc1..<OCID>
customMetadataList:
data:
- category: Other
description: ID of model_one in the multimodel group.
key: model-id-0
value: ocid1.compartment.oc1..<OCID>
- category: Other
description: Name of model_one in the multimodel group.
key: model-name-0
value: model_one
- category: Other
description: GPU count of model_one in the multimodel group.
key: model-gpu-count-0
value: 1
- category: Other
description: User params of model_one in the multimodel group.
key: model-user-params-0
value: --test_key_one test_value_one
- category: Other
description: Artifact path for model_one in the multimodel group.
key: artifact_location-0
value: model_one_path
- category: Other
description: ID of model_two in the multimodel group.
key: model-id-1
value: ocid1.compartment.oc1..<OCID>
- category: Other
description: Name of model_two in the multimodel group.
key: model-name-1
value: model_two
- category: Other
description: GPU count of model_two in the multimodel group.
key: model-gpu-count-1
value: 1
- category: Other
description: User params of model_two in the multimodel group.
key: model-user-params-1
value: --test_key_two test_value_two
- category: Other
description: Artifact path for model_two in the multimodel group.
key: artifact_location-1
value: model_two_path
- category: Other
description: ID of model_three in the multimodel group.
key: model-id-2
value: ocid1.compartment.oc1..<OCID>
- category: Other
description: Name of model_three in the multimodel group.
key: model-name-2
value: model_three
- category: Other
description: GPU count of model_three in the multimodel group.
key: model-gpu-count-2
value: 1
- category: Other
description: User params of model_three in the multimodel group.
key: model-user-params-2
value: --test_key_three test_value_three
- category: Other
description: Artifact path for model_three in the multimodel group.
key: artifact_location-2
value: model_three_path
- category: null
description: null
key: multi_model_metadata
value: Uploaded
- category: Other
description: Inference container mapping for multi_model
key: deployment-container
Expand Down
Loading