Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 75 additions & 30 deletions ads/aqua/modeldeployment/deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
get_params_list,
get_resource_name,
get_restricted_params_by_container,
is_valid_ocid,
load_gpu_shapes_index,
validate_cmd_var,
)
Expand Down Expand Up @@ -125,6 +126,9 @@ class AquaDeploymentApp(AquaApp):
the GPU allocations for all compatible shapes.
list_shapes(self, **kwargs) -> List[Dict]:
Lists the valid model deployment shapes.
recommend_shape(self, **kwargs) -> ShapeRecommendationReport:
Generates a recommendation report or table of valid GPU deployment shapes
for the provided model and configuration.

Note:
Use `ads aqua deployment <method_name> --help` to get more details on the parameters available.
Expand Down Expand Up @@ -1283,56 +1287,97 @@ def validate_deployment_params(
)
return {"valid": True}

@cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=1), timer=datetime.now))
def recommend_shape(self, **kwargs) -> Union[Table, ShapeRecommendationReport]:
"""
For the CLI (set by default, generate_table = True), generates the table (in rich diff) with valid
GPU deployment shapes for the provided model and configuration.
Generates a recommendation report or table of valid GPU deployment shapes
for the provided model and configuration.

For the API (set generate_table = False), generates the JSON with valid
GPU deployment shapes for the provided model and configuration.
For CLI (default `generate_table=True`): generates a rich table.
For API (`generate_table=False`): returns a structured JSON report.
Example: ads aqua deployment recommend_shape --model-id meta-llama/Llama-3.3-70B-Instruct --generate_table false

Validates the input and determines whether recommendations are available.

Parameters
----------
**kwargs
model_ocid : str
(Required) The OCID of the model to recommend feasible compute shapes for.
Args:
model_id : str
(Required) The OCID or Hugging Face model ID to recommend compute shapes for.
generate_table : bool, optional
If True, generate and return a rich-diff table; if False, return a JSON response (default is False).
compartment_id : str, optional
The OCID of the user's compartment to use for the recommendation.
If True, generates and returns a table (default: False).

Returns
-------
Table (generate_table = True)
If `generate_table` is True, a table displaying the recommendation report with compatible deployment shapes,
or troubleshooting info if no shape is suitable.
Table
If `generate_table=True`, returns a table of shape recommendations.

ShapeRecommendationReport (generate_table = False)
If `generate_table` is False, a structured recommendation report with compatible deployment shapes,
or troubleshooting info and citing the largest shapes if no shape is suitable.
ShapeRecommendationReport
If `generate_table=False`, returns a structured recommendation report.

Raises
------
AquaValueError
If the model type is unsupported and no recommendation report can be generated.
If required parameters are missing or invalid.
"""
deployment_config = self.get_deployment_config(model_id=kwargs.get("model_id"))
kwargs["deployment_config"] = deployment_config
model_id = kwargs.pop("model_id", None)
if not model_id:
raise AquaValueError(
"The 'model_id' parameter is required to generate shape recommendations. "
"Please provide a valid OCID or Hugging Face model identifier."
)

logger.info(f"Starting shape recommendation for model_id: {model_id}")

self.telemetry.record_event_async(
category="aqua/deployment",
action="recommend_shape",
detail=get_ocid_substring(model_id, key_len=8)
if is_valid_ocid(ocid=model_id)
else model_id,
**kwargs,
)

if is_valid_ocid(ocid=model_id):
logger.debug(
f"Attempting to retrieve deployment configuration for model_id={model_id}"
)
try:
deployment_config = self.get_deployment_config(model_id=model_id)
kwargs["deployment_config"] = deployment_config
logger.debug(
f"Retrieved deployment configuration for model: {model_id}"
)
except Exception as e:
logger.warning(
f"Failed to retrieve deployment configuration for model_id={model_id}: {e}"
)

try:
request = RequestRecommend(**kwargs)
request = RequestRecommend(model_id=model_id, **kwargs)
except ValidationError as e:
custom_error = build_pydantic_error_message(e)
raise AquaValueError( # noqa: B904
f"Failed to request shape recommendation due to invalid input parameters: {custom_error}"
logger.error(
f"Validation failed for shape recommendation request: {custom_error}"
)
raise AquaValueError(
f"Invalid input parameters for shape recommendation: {custom_error}"
) from e

shape_recommend = AquaShapeRecommend()
shape_recommend_report = shape_recommend.which_shapes(request)

return shape_recommend_report
try:
shape_recommend = AquaShapeRecommend()
logger.info(
f"Running shape recommendation for model '{model_id}' "
f"with generate_table={getattr(request, 'generate_table', False)}"
)
shape_recommend_report = shape_recommend.which_shapes(request)
logger.info(f"Shape recommendation completed successfully for {model_id}")
return shape_recommend_report
except AquaValueError:
raise
except Exception as e:
logger.exception(
f"Unexpected error while generating shape recommendations: {e}"
)
raise AquaValueError(
f"An unexpected error occurred during shape recommendation: {e}"
) from e

@telemetry(entry_point="plugin=deployment&action=list_shapes", name="aqua")
@cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=5), timer=datetime.now))
Expand Down
10 changes: 8 additions & 2 deletions ads/aqua/shaperecommend/recommend.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

import json
import os
import re
import shutil
from typing import Dict, List, Optional, Tuple, Union
Expand Down Expand Up @@ -189,7 +188,7 @@ def _fetch_hf_config(self, model_id: str) -> Dict:
"""
try:
config_path = hf_hub_download(repo_id=model_id, filename="config.json")
with open(config_path, "r", encoding="utf-8") as f:
with open(config_path, encoding="utf-8") as f:
return json.load(f)
except HfHubHTTPError as e:
format_hf_custom_error_message(e)
Expand Down Expand Up @@ -281,6 +280,13 @@ def _rich_diff_table(shape_report: ShapeRecommendationReport) -> Table:
if name
else "Model Deployment Recommendations"
)

header = (
f"{header}\n"
"Currently, only the VLLM container is supported. "
"All shape and parameter recommendations will be generated for the VLLM container."
)

logger.debug(f"Table header set to: {header!r}")

if shape_report.troubleshoot:
Expand Down
24 changes: 16 additions & 8 deletions ads/aqua/shaperecommend/shape_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,29 @@ class RequestRecommend(BaseModel):

model_id: str = Field(
...,
description="The OCID or Hugging Face ID of the model to recommend feasible compute shapes.",
description=(
"The OCID or Hugging Face ID of the model for which to recommend feasible compute shapes."
),
)
generate_table: Optional[bool] = (
Field(
True,
description="True - to generate the rich diff Table, False - generate the JSON response",

generate_table: Optional[bool] = Field(
True,
description=(
"If True, generate a rich formatted table as the response. "
"If False, return the recommendation as a JSON structure."
),
)

compartment_id: Optional[str] = Field(
COMPARTMENT_OCID, description="The OCID of user's compartment"
COMPARTMENT_OCID,
description="The OCID of the user's compartment.",
)

deployment_config: Optional[AquaDeploymentConfig] = Field(
deployment_config: Optional["AquaDeploymentConfig"] = Field(
None,
description="The deployment configuration for model (only available for service models).",
description=(
"The deployment configuration for the model (only available for service models)."
),
)

class Config:
Expand Down
Loading