diff --git a/.gitignore b/.gitignore index 8abb0d36e..081c68f93 100644 --- a/.gitignore +++ b/.gitignore @@ -86,7 +86,8 @@ celerybeat-schedule *.sage.py # dotenv -.env +.env* +run_ads.sh # virtualenv .venv diff --git a/ads/aqua/extension/deployment_handler.py b/ads/aqua/extension/deployment_handler.py index 2a3e827c3..dd629bbd4 100644 --- a/ads/aqua/extension/deployment_handler.py +++ b/ads/aqua/extension/deployment_handler.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # Copyright (c) 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ - +import logging from urllib.parse import urlparse from tornado.web import HTTPError @@ -259,9 +259,10 @@ class AquaDeploymentParamsHandler(AquaAPIhandler): def get(self, model_id): """Handle GET request.""" instance_shape = self.get_argument("instance_shape") + gpu_count = self.get_argument("gpu_count", default=None) return self.finish( AquaDeploymentApp().get_deployment_default_params( - model_id=model_id, instance_shape=instance_shape + model_id=model_id, instance_shape=instance_shape, gpu_count=gpu_count ) ) diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py index b7787ea21..bc34abdac 100644 --- a/ads/aqua/modeldeployment/deployment.py +++ b/ads/aqua/modeldeployment/deployment.py @@ -660,6 +660,7 @@ def get_deployment_default_params( self, model_id: str, instance_shape: str, + gpu_count: int = None, ) -> List[str]: """Gets the default params set in the deployment configs for the given model and instance shape. @@ -671,6 +672,9 @@ def get_deployment_default_params( instance_shape: (str). The shape of the instance used for deployment. + gpu_count: (int, optional). + The number of GPUs used by the Aqua model. Defaults to None. + Returns ------- List[str]: @@ -679,6 +683,7 @@ def get_deployment_default_params( """ default_params = [] + config_params = {} model = DataScienceModel.from_id(model_id) try: container_type_key = model.custom_metadata_list.get( @@ -695,12 +700,31 @@ def get_deployment_default_params( and container_type_key in InferenceContainerTypeFamily.values() ): deployment_config = self.get_deployment_config(model_id) - config_params = ( - deployment_config.get("configuration", UNKNOWN_DICT) - .get(instance_shape, UNKNOWN_DICT) - .get("parameters", UNKNOWN_DICT) - .get(get_container_params_type(container_type_key), UNKNOWN) - ) + + instance_shape_config = deployment_config.get( + "configuration", UNKNOWN_DICT + ).get(instance_shape, UNKNOWN_DICT) + + if "multi_model_deployment" in instance_shape_config and gpu_count: + gpu_params = ( + instance_shape_config + .get("multi_model_deployment", UNKNOWN_DICT) + ) + + for gpu_config in gpu_params: + if gpu_config["gpu_count"] == gpu_count: + config_params = gpu_config.get("parameters", UNKNOWN_DICT).get( + get_container_params_type(container_type_key), UNKNOWN + ) + break + + else: + config_params = ( + instance_shape_config + .get("parameters", UNKNOWN_DICT) + .get(get_container_params_type(container_type_key), UNKNOWN) + ) + if config_params: params_list = get_params_list(config_params) restricted_params_set = get_restricted_params_by_container( diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json index b60178403..824fa8541 100644 --- a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json +++ b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json @@ -1,6 +1,6 @@ { "configuration": { - "VM.GPU.A10.1": { + "VM.GPU.A10.4": { "parameters": { "TGI_PARAMS": "--max-stop-sequences 6", "VLLM_PARAMS": "--max-model-len 4096" @@ -24,7 +24,7 @@ } }, "shape": [ - "VM.GPU.A10.1", + "VM.GPU.A10.4", "VM.Standard.A1.Flex" ] } diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json new file mode 100644 index 000000000..8764c354b --- /dev/null +++ b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json @@ -0,0 +1,43 @@ +{ + "shape": [ + "VM.GPU.A10.1", + "VM.GPU.A10.2", + "BM.GPU.A10.4", + "BM.GPU.L40S-NC.4" + ], + "configuration": { + "VM.GPU.A10.2": { + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "multi_model_deployment": [ + { + "gpu_count": 1 + } + ] + }, + "BM.GPU.A10.4": { + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "multi_model_deployment": [ + { + "gpu_count": 1 + }, + { + "gpu_count": 2 + } + ] + }, + "BM.GPU.L40S-NC.4": { + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "multi_model_deployment": [ + { + "gpu_count": 2 + } + ] + } + } +} diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py index 74612ac8d..9ee399826 100644 --- a/tests/unitary/with_extras/aqua/test_deployment.py +++ b/tests/unitary/with_extras/aqua/test_deployment.py @@ -40,7 +40,9 @@ class TestDataset: MODEL_DEPLOYMENT_URL = "https://modeldeployment.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.." MODEL_ID = "ocid1.datasciencemodeldeployment.oc1.." DEPLOYMENT_IMAGE_NAME = "dsmc://image-name:1.0.0.0" - DEPLOYMENT_SHAPE_NAME = "VM.GPU.A10.1" + DEPLOYMENT_SHAPE_NAME = "BM.GPU.A10.4" + DEPLOYMENT_GPU_COUNT = 1 + DEPLOYMENT_GPU_COUNT_B = 2 DEPLOYMENT_SHAPE_NAME_CPU = "VM.Standard.A1.Flex" model_deployment_object = [ @@ -762,24 +764,28 @@ def test_create_deployment_for_tei_byoc_embedding_model( ( "VLLM_PARAMS", "odsc-vllm-serving", + 2, ["--max-model-len 4096", "--seed 42", "--trust-remote-code"], ["--max-model-len 4096", "--trust-remote-code"], ), ( "VLLM_PARAMS", "odsc-vllm-serving", - [], - [], + None, + ["--max-model-len 4096"], + ["--max-model-len 4096"], ), ( "TGI_PARAMS", "odsc-tgi-serving", - ["--sharded true", "--trust-remote-code", "--max-stop-sequences"], - ["--max-stop-sequences"], + 1, + [], + [], ), ( "CUSTOM_PARAMS", "custom-container-key", + None, ["--max-model-len 4096", "--seed 42", "--trust-remote-code"], ["--max-model-len 4096", "--seed 42", "--trust-remote-code"], ), @@ -787,24 +793,30 @@ def test_create_deployment_for_tei_byoc_embedding_model( ) @patch("ads.model.datascience_model.DataScienceModel.from_id") def test_get_deployment_default_params( - self, - container_params_field, - container_type_key, - params, - allowed_params, - mock_from_id, - ): + self, + container_params_field, + container_type_key, + gpu_count, + params, + allowed_params, + mock_from_id, + ): """Test for fetching config details for a given deployment.""" config_json = os.path.join( - self.curr_dir, "test_data/deployment/deployment_config.json" + self.curr_dir, "test_data/deployment/deployment_gpu_config.json" ) with open(config_json, "r") as _file: config = json.load(_file) # update config params for testing - config["configuration"][TestDataset.DEPLOYMENT_SHAPE_NAME]["parameters"][ + if gpu_count: + # build field for multi_model_deployment + config["configuration"][TestDataset.DEPLOYMENT_SHAPE_NAME]["multi_model_deployment"] = [{"gpu_count": gpu_count, "parameters": {container_params_field: " ".join(params)}}] + else: + # build field for normal deployment + config["configuration"][TestDataset.DEPLOYMENT_SHAPE_NAME]["parameters"][ container_params_field - ] = " ".join(params) + ] = " ".join(params) mock_model = MagicMock() custom_metadata_list = ModelCustomMetadata() @@ -815,10 +827,12 @@ def test_get_deployment_default_params( mock_from_id.return_value = mock_model self.app.get_deployment_config = MagicMock(return_value=config) + result = self.app.get_deployment_default_params( - TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME + TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME, gpu_count ) - if container_params_field == "CUSTOM_PARAMS": + + if container_params_field in ("CUSTOM_PARAMS", "TGI_PARAMS"): assert result == [] else: assert result == allowed_params @@ -851,6 +865,7 @@ def test_get_deployment_default_params( ), ] ) + @patch("ads.model.datascience_model.DataScienceModel.from_id") @patch("ads.aqua.modeldeployment.deployment.get_container_config") def test_validate_deployment_params(