Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ celerybeat-schedule
*.sage.py

# dotenv
.env
.env*
run_ads.sh

# virtualenv
.venv
Expand Down
5 changes: 3 additions & 2 deletions ads/aqua/extension/deployment_handler.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# Copyright (c) 2024 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

import logging
from urllib.parse import urlparse

from tornado.web import HTTPError
Expand Down Expand Up @@ -259,9 +259,10 @@ class AquaDeploymentParamsHandler(AquaAPIhandler):
def get(self, model_id):
"""Handle GET request."""
instance_shape = self.get_argument("instance_shape")
gpu_count = self.get_argument("gpu_count", default=None)
return self.finish(
AquaDeploymentApp().get_deployment_default_params(
model_id=model_id, instance_shape=instance_shape
model_id=model_id, instance_shape=instance_shape, gpu_count=gpu_count
)
)

Expand Down
36 changes: 30 additions & 6 deletions ads/aqua/modeldeployment/deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,6 +660,7 @@ def get_deployment_default_params(
self,
model_id: str,
instance_shape: str,
gpu_count: int = None,
) -> List[str]:
"""Gets the default params set in the deployment configs for the given model and instance shape.

Expand All @@ -671,6 +672,9 @@ def get_deployment_default_params(
instance_shape: (str).
The shape of the instance used for deployment.

gpu_count: (int, optional).
The number of GPUs used by the Aqua model. Defaults to None.

Returns
-------
List[str]:
Expand All @@ -679,6 +683,7 @@ def get_deployment_default_params(

"""
default_params = []
config_params = {}
model = DataScienceModel.from_id(model_id)
try:
container_type_key = model.custom_metadata_list.get(
Expand All @@ -695,12 +700,31 @@ def get_deployment_default_params(
and container_type_key in InferenceContainerTypeFamily.values()
):
deployment_config = self.get_deployment_config(model_id)
config_params = (
deployment_config.get("configuration", UNKNOWN_DICT)
.get(instance_shape, UNKNOWN_DICT)
.get("parameters", UNKNOWN_DICT)
.get(get_container_params_type(container_type_key), UNKNOWN)
)

instance_shape_config = deployment_config.get(
"configuration", UNKNOWN_DICT
).get(instance_shape, UNKNOWN_DICT)

if "multi_model_deployment" in instance_shape_config and gpu_count:
gpu_params = (
instance_shape_config
.get("multi_model_deployment", UNKNOWN_DICT)
)

for gpu_config in gpu_params:
if gpu_config["gpu_count"] == gpu_count:
config_params = gpu_config.get("parameters", UNKNOWN_DICT).get(
get_container_params_type(container_type_key), UNKNOWN
)
break

else:
config_params = (
instance_shape_config
.get("parameters", UNKNOWN_DICT)
.get(get_container_params_type(container_type_key), UNKNOWN)
)

if config_params:
params_list = get_params_list(config_params)
restricted_params_set = get_restricted_params_by_container(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"configuration": {
"VM.GPU.A10.1": {
"VM.GPU.A10.4": {
"parameters": {
"TGI_PARAMS": "--max-stop-sequences 6",
"VLLM_PARAMS": "--max-model-len 4096"
Expand All @@ -24,7 +24,7 @@
}
},
"shape": [
"VM.GPU.A10.1",
"VM.GPU.A10.4",
"VM.Standard.A1.Flex"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"shape": [
"VM.GPU.A10.1",
"VM.GPU.A10.2",
"BM.GPU.A10.4",
"BM.GPU.L40S-NC.4"
],
"configuration": {
"VM.GPU.A10.2": {
"parameters": {
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
},
"multi_model_deployment": [
{
"gpu_count": 1
}
]
},
"BM.GPU.A10.4": {
"parameters": {
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
},
"multi_model_deployment": [
{
"gpu_count": 1
},
{
"gpu_count": 2
}
]
},
"BM.GPU.L40S-NC.4": {
"parameters": {
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
},
"multi_model_deployment": [
{
"gpu_count": 2
}
]
}
}
}
49 changes: 32 additions & 17 deletions tests/unitary/with_extras/aqua/test_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ class TestDataset:
MODEL_DEPLOYMENT_URL = "https://modeldeployment.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>"
MODEL_ID = "ocid1.datasciencemodeldeployment.oc1.<region>.<MODEL_OCID>"
DEPLOYMENT_IMAGE_NAME = "dsmc://image-name:1.0.0.0"
DEPLOYMENT_SHAPE_NAME = "VM.GPU.A10.1"
DEPLOYMENT_SHAPE_NAME = "BM.GPU.A10.4"
DEPLOYMENT_GPU_COUNT = 1
DEPLOYMENT_GPU_COUNT_B = 2
DEPLOYMENT_SHAPE_NAME_CPU = "VM.Standard.A1.Flex"

model_deployment_object = [
Expand Down Expand Up @@ -762,49 +764,59 @@ def test_create_deployment_for_tei_byoc_embedding_model(
(
"VLLM_PARAMS",
"odsc-vllm-serving",
2,
["--max-model-len 4096", "--seed 42", "--trust-remote-code"],
["--max-model-len 4096", "--trust-remote-code"],
),
(
"VLLM_PARAMS",
"odsc-vllm-serving",
[],
[],
None,
["--max-model-len 4096"],
["--max-model-len 4096"],
),
(
"TGI_PARAMS",
"odsc-tgi-serving",
["--sharded true", "--trust-remote-code", "--max-stop-sequences"],
["--max-stop-sequences"],
1,
[],
[],
),
(
"CUSTOM_PARAMS",
"custom-container-key",
None,
["--max-model-len 4096", "--seed 42", "--trust-remote-code"],
["--max-model-len 4096", "--seed 42", "--trust-remote-code"],
),
]
)
@patch("ads.model.datascience_model.DataScienceModel.from_id")
def test_get_deployment_default_params(
self,
container_params_field,
container_type_key,
params,
allowed_params,
mock_from_id,
):
self,
container_params_field,
container_type_key,
gpu_count,
params,
allowed_params,
mock_from_id,
):
"""Test for fetching config details for a given deployment."""

config_json = os.path.join(
self.curr_dir, "test_data/deployment/deployment_config.json"
self.curr_dir, "test_data/deployment/deployment_gpu_config.json"
)
with open(config_json, "r") as _file:
config = json.load(_file)
# update config params for testing
config["configuration"][TestDataset.DEPLOYMENT_SHAPE_NAME]["parameters"][
if gpu_count:
# build field for multi_model_deployment
config["configuration"][TestDataset.DEPLOYMENT_SHAPE_NAME]["multi_model_deployment"] = [{"gpu_count": gpu_count, "parameters": {container_params_field: " ".join(params)}}]
else:
# build field for normal deployment
config["configuration"][TestDataset.DEPLOYMENT_SHAPE_NAME]["parameters"][
container_params_field
] = " ".join(params)
] = " ".join(params)

mock_model = MagicMock()
custom_metadata_list = ModelCustomMetadata()
Expand All @@ -815,10 +827,12 @@ def test_get_deployment_default_params(
mock_from_id.return_value = mock_model

self.app.get_deployment_config = MagicMock(return_value=config)

result = self.app.get_deployment_default_params(
TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME
TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME, gpu_count
)
if container_params_field == "CUSTOM_PARAMS":

if container_params_field in ("CUSTOM_PARAMS", "TGI_PARAMS"):
assert result == []
else:
assert result == allowed_params
Expand Down Expand Up @@ -851,6 +865,7 @@ def test_get_deployment_default_params(
),
]
)

@patch("ads.model.datascience_model.DataScienceModel.from_id")
@patch("ads.aqua.modeldeployment.deployment.get_container_config")
def test_validate_deployment_params(
Expand Down