From 07eb59d3644f4d334416f8a99f90418675855992 Mon Sep 17 00:00:00 2001 From: Liz Johnson Date: Tue, 4 Feb 2025 16:21:46 -0800 Subject: [PATCH 1/2] inital AQUA API code changes in get_deployment_default_params --- .gitignore | 3 +- ads/aqua/extension/deployment_handler.py | 5 +- ads/aqua/modeldeployment/deployment.py | 36 +++++++-- .../deployment/deployment_config.json | 4 +- .../deployment/deployment_gpu_config.json | 30 +++++++ .../deployment/deployment_gpu_config2.json | 34 ++++++++ .../with_extras/aqua/test_deployment.py | 78 ++++++++++++++++++- 7 files changed, 178 insertions(+), 12 deletions(-) create mode 100644 tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json create mode 100644 tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config2.json diff --git a/.gitignore b/.gitignore index 8abb0d36e..081c68f93 100644 --- a/.gitignore +++ b/.gitignore @@ -86,7 +86,8 @@ celerybeat-schedule *.sage.py # dotenv -.env +.env* +run_ads.sh # virtualenv .venv diff --git a/ads/aqua/extension/deployment_handler.py b/ads/aqua/extension/deployment_handler.py index 2a3e827c3..dd629bbd4 100644 --- a/ads/aqua/extension/deployment_handler.py +++ b/ads/aqua/extension/deployment_handler.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # Copyright (c) 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ - +import logging from urllib.parse import urlparse from tornado.web import HTTPError @@ -259,9 +259,10 @@ class AquaDeploymentParamsHandler(AquaAPIhandler): def get(self, model_id): """Handle GET request.""" instance_shape = self.get_argument("instance_shape") + gpu_count = self.get_argument("gpu_count", default=None) return self.finish( AquaDeploymentApp().get_deployment_default_params( - model_id=model_id, instance_shape=instance_shape + model_id=model_id, instance_shape=instance_shape, gpu_count=gpu_count ) ) diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py index b7787ea21..bc34abdac 100644 --- a/ads/aqua/modeldeployment/deployment.py +++ b/ads/aqua/modeldeployment/deployment.py @@ -660,6 +660,7 @@ def get_deployment_default_params( self, model_id: str, instance_shape: str, + gpu_count: int = None, ) -> List[str]: """Gets the default params set in the deployment configs for the given model and instance shape. @@ -671,6 +672,9 @@ def get_deployment_default_params( instance_shape: (str). The shape of the instance used for deployment. + gpu_count: (int, optional). + The number of GPUs used by the Aqua model. Defaults to None. + Returns ------- List[str]: @@ -679,6 +683,7 @@ def get_deployment_default_params( """ default_params = [] + config_params = {} model = DataScienceModel.from_id(model_id) try: container_type_key = model.custom_metadata_list.get( @@ -695,12 +700,31 @@ def get_deployment_default_params( and container_type_key in InferenceContainerTypeFamily.values() ): deployment_config = self.get_deployment_config(model_id) - config_params = ( - deployment_config.get("configuration", UNKNOWN_DICT) - .get(instance_shape, UNKNOWN_DICT) - .get("parameters", UNKNOWN_DICT) - .get(get_container_params_type(container_type_key), UNKNOWN) - ) + + instance_shape_config = deployment_config.get( + "configuration", UNKNOWN_DICT + ).get(instance_shape, UNKNOWN_DICT) + + if "multi_model_deployment" in instance_shape_config and gpu_count: + gpu_params = ( + instance_shape_config + .get("multi_model_deployment", UNKNOWN_DICT) + ) + + for gpu_config in gpu_params: + if gpu_config["gpu_count"] == gpu_count: + config_params = gpu_config.get("parameters", UNKNOWN_DICT).get( + get_container_params_type(container_type_key), UNKNOWN + ) + break + + else: + config_params = ( + instance_shape_config + .get("parameters", UNKNOWN_DICT) + .get(get_container_params_type(container_type_key), UNKNOWN) + ) + if config_params: params_list = get_params_list(config_params) restricted_params_set = get_restricted_params_by_container( diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json index b60178403..824fa8541 100644 --- a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json +++ b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json @@ -1,6 +1,6 @@ { "configuration": { - "VM.GPU.A10.1": { + "VM.GPU.A10.4": { "parameters": { "TGI_PARAMS": "--max-stop-sequences 6", "VLLM_PARAMS": "--max-model-len 4096" @@ -24,7 +24,7 @@ } }, "shape": [ - "VM.GPU.A10.1", + "VM.GPU.A10.4", "VM.Standard.A1.Flex" ] } diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json new file mode 100644 index 000000000..7ff81d11a --- /dev/null +++ b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json @@ -0,0 +1,30 @@ +{ + "shape": [ + "VM.GPU.A10.2", + "VM.GPU.A10.4", + "BM.GPU.A100-v2.8", + "BM.GPU.H100.8" + ], + "configuration": { + "VM.GPU.A10.4": { + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "multi_model_deployment": [ + { + "gpu_count": 1, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + }, + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code 6" + } + } + ] + } + } +} + diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config2.json b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config2.json new file mode 100644 index 000000000..d470b6130 --- /dev/null +++ b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config2.json @@ -0,0 +1,34 @@ +{ + "shape": [ + "VM.GPU.A10.1", + "VM.GPU.A10.2", + "BM.GPU.A10.4", + "BM.GPU.L40S-NC.4" + ], + "configuration": { + "VM.GPU.A10.2": { + "multi_model_deployment": [ + { + "gpu_count": 1 + } + ] + }, + "VM.GPU.A10.4": { + "multi_model_deployment": [ + { + "gpu_count": 1 + }, + { + "gpu_count": 2 + } + ] + }, + "BM.GPU.L40S-NC.4": { + "multi_model_deployment": [ + { + "gpu_count": 2 + } + ] + } + } +} diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py index 74612ac8d..7a5f06f41 100644 --- a/tests/unitary/with_extras/aqua/test_deployment.py +++ b/tests/unitary/with_extras/aqua/test_deployment.py @@ -40,7 +40,9 @@ class TestDataset: MODEL_DEPLOYMENT_URL = "https://modeldeployment.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.." MODEL_ID = "ocid1.datasciencemodeldeployment.oc1.." DEPLOYMENT_IMAGE_NAME = "dsmc://image-name:1.0.0.0" - DEPLOYMENT_SHAPE_NAME = "VM.GPU.A10.1" + DEPLOYMENT_SHAPE_NAME = "VM.GPU.A10.4" + DEPLOYMENT_GPU_COUNT = 1 + DEPLOYMENT_GPU_COUNT_B = 2 DEPLOYMENT_SHAPE_NAME_CPU = "VM.Standard.A1.Flex" model_deployment_object = [ @@ -818,11 +820,84 @@ def test_get_deployment_default_params( result = self.app.get_deployment_default_params( TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME ) + if container_params_field == "CUSTOM_PARAMS": assert result == [] else: assert result == allowed_params + + # @parameterized.expand( + # [ + # ( + # "VLLM_PARAMS", + # "odsc-vllm-serving", + # 1, + # ["--max-model-len 4096"], + # ["--max-model-len 4096"], + # ), + # ( + # "TGI_PARAMS", + # "odsc-tgi-serving", + # 1, + # [], + # [], + # ), + # ( + # "CUSTOM_PARAMS", + # "custom-container-key", + # None, + # ["--max-model-len 4096", "--seed 42", "--trust-remote-code"], + # ["--max-model-len 4096", "--seed 42", "--trust-remote-code"], + # ), + # ] + # ) + @patch("ads.model.datascience_model.DataScienceModel.from_id") + def test_get_deployment_default_params_multimodel( + self, + # container_params_field, + # container_type_key, + # gpu_count, + # params, + # allowed_params, + mock_from_id, + ): + """Test for fetching config details for a given deployment.""" + + config_json = os.path.join( + self.curr_dir, "test_data/deployment/deployment_gpu_config2.json" + ) + with open(config_json, "r") as _file: + config = json.load(_file) + # update config params for testing + # config["configuration"][TestDataset.DEPLOYMENT_SHAPE_NAME]["multi_model_deployment"] = [{"gpu_count": gpu_count, "parameters": {container_params_field: " ".join(params)}}] + + mock_model = MagicMock() + custom_metadata_list = ModelCustomMetadata() + custom_metadata_list.add( + **{"key": "deployment-container", "value": "odsc-vllm-serving"} + ) + mock_model.custom_metadata_list = custom_metadata_list + mock_from_id.return_value = mock_model + + self.app.get_deployment_config = MagicMock(return_value=config) + # result = self.app.get_deployment_default_params( + # TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME, gpu_count + # ) + + result = self.app.get_deployment_default_params( + TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME, TestDataset.DEPLOYMENT_GPU_COUNT_B + ) + + assert result == [] + # if container_params_field in ("CUSTOM_PARAMS", "TGI_PARAMS"): + # assert result == [] + # else: + # assert result == allowed_params + + + + @parameterized.expand( [ ( @@ -851,6 +926,7 @@ def test_get_deployment_default_params( ), ] ) + @patch("ads.model.datascience_model.DataScienceModel.from_id") @patch("ads.aqua.modeldeployment.deployment.get_container_config") def test_validate_deployment_params( From 02fe8299cfb14e93de9dda6a667002b11989c2e2 Mon Sep 17 00:00:00 2001 From: Liz Johnson Date: Wed, 5 Feb 2025 11:17:42 -0800 Subject: [PATCH 2/2] combined multimodel and normal model unit tests for test_get_deployment_default_params --- .../deployment/deployment_gpu_config.json | 39 ++++-- .../deployment/deployment_gpu_config2.json | 34 ----- .../with_extras/aqua/test_deployment.py | 117 +++++------------- 3 files changed, 54 insertions(+), 136 deletions(-) delete mode 100644 tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config2.json diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json index 7ff81d11a..8764c354b 100644 --- a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json +++ b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json @@ -1,30 +1,43 @@ { "shape": [ + "VM.GPU.A10.1", "VM.GPU.A10.2", - "VM.GPU.A10.4", - "BM.GPU.A100-v2.8", - "BM.GPU.H100.8" + "BM.GPU.A10.4", + "BM.GPU.L40S-NC.4" ], "configuration": { - "VM.GPU.A10.4": { + "VM.GPU.A10.2": { "parameters": { "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" }, "multi_model_deployment": [ { - "gpu_count": 1, - "parameters": { - "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" - } + "gpu_count": 1 + } + ] + }, + "BM.GPU.A10.4": { + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "multi_model_deployment": [ + { + "gpu_count": 1 }, { - "gpu_count": 2, - "parameters": { - "VLLM_PARAMS": "--trust-remote-code 6" - } + "gpu_count": 2 + } + ] + }, + "BM.GPU.L40S-NC.4": { + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "multi_model_deployment": [ + { + "gpu_count": 2 } ] } } } - diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config2.json b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config2.json deleted file mode 100644 index d470b6130..000000000 --- a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config2.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "shape": [ - "VM.GPU.A10.1", - "VM.GPU.A10.2", - "BM.GPU.A10.4", - "BM.GPU.L40S-NC.4" - ], - "configuration": { - "VM.GPU.A10.2": { - "multi_model_deployment": [ - { - "gpu_count": 1 - } - ] - }, - "VM.GPU.A10.4": { - "multi_model_deployment": [ - { - "gpu_count": 1 - }, - { - "gpu_count": 2 - } - ] - }, - "BM.GPU.L40S-NC.4": { - "multi_model_deployment": [ - { - "gpu_count": 2 - } - ] - } - } -} diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py index 7a5f06f41..9ee399826 100644 --- a/tests/unitary/with_extras/aqua/test_deployment.py +++ b/tests/unitary/with_extras/aqua/test_deployment.py @@ -40,7 +40,7 @@ class TestDataset: MODEL_DEPLOYMENT_URL = "https://modeldeployment.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.." MODEL_ID = "ocid1.datasciencemodeldeployment.oc1.." DEPLOYMENT_IMAGE_NAME = "dsmc://image-name:1.0.0.0" - DEPLOYMENT_SHAPE_NAME = "VM.GPU.A10.4" + DEPLOYMENT_SHAPE_NAME = "BM.GPU.A10.4" DEPLOYMENT_GPU_COUNT = 1 DEPLOYMENT_GPU_COUNT_B = 2 DEPLOYMENT_SHAPE_NAME_CPU = "VM.Standard.A1.Flex" @@ -764,24 +764,28 @@ def test_create_deployment_for_tei_byoc_embedding_model( ( "VLLM_PARAMS", "odsc-vllm-serving", + 2, ["--max-model-len 4096", "--seed 42", "--trust-remote-code"], ["--max-model-len 4096", "--trust-remote-code"], ), ( "VLLM_PARAMS", "odsc-vllm-serving", - [], - [], + None, + ["--max-model-len 4096"], + ["--max-model-len 4096"], ), ( "TGI_PARAMS", "odsc-tgi-serving", - ["--sharded true", "--trust-remote-code", "--max-stop-sequences"], - ["--max-stop-sequences"], + 1, + [], + [], ), ( "CUSTOM_PARAMS", "custom-container-key", + None, ["--max-model-len 4096", "--seed 42", "--trust-remote-code"], ["--max-model-len 4096", "--seed 42", "--trust-remote-code"], ), @@ -789,24 +793,30 @@ def test_create_deployment_for_tei_byoc_embedding_model( ) @patch("ads.model.datascience_model.DataScienceModel.from_id") def test_get_deployment_default_params( - self, - container_params_field, - container_type_key, - params, - allowed_params, - mock_from_id, - ): + self, + container_params_field, + container_type_key, + gpu_count, + params, + allowed_params, + mock_from_id, + ): """Test for fetching config details for a given deployment.""" config_json = os.path.join( - self.curr_dir, "test_data/deployment/deployment_config.json" + self.curr_dir, "test_data/deployment/deployment_gpu_config.json" ) with open(config_json, "r") as _file: config = json.load(_file) # update config params for testing - config["configuration"][TestDataset.DEPLOYMENT_SHAPE_NAME]["parameters"][ + if gpu_count: + # build field for multi_model_deployment + config["configuration"][TestDataset.DEPLOYMENT_SHAPE_NAME]["multi_model_deployment"] = [{"gpu_count": gpu_count, "parameters": {container_params_field: " ".join(params)}}] + else: + # build field for normal deployment + config["configuration"][TestDataset.DEPLOYMENT_SHAPE_NAME]["parameters"][ container_params_field - ] = " ".join(params) + ] = " ".join(params) mock_model = MagicMock() custom_metadata_list = ModelCustomMetadata() @@ -817,87 +827,16 @@ def test_get_deployment_default_params( mock_from_id.return_value = mock_model self.app.get_deployment_config = MagicMock(return_value=config) + result = self.app.get_deployment_default_params( - TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME + TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME, gpu_count ) - if container_params_field == "CUSTOM_PARAMS": + if container_params_field in ("CUSTOM_PARAMS", "TGI_PARAMS"): assert result == [] else: assert result == allowed_params - - # @parameterized.expand( - # [ - # ( - # "VLLM_PARAMS", - # "odsc-vllm-serving", - # 1, - # ["--max-model-len 4096"], - # ["--max-model-len 4096"], - # ), - # ( - # "TGI_PARAMS", - # "odsc-tgi-serving", - # 1, - # [], - # [], - # ), - # ( - # "CUSTOM_PARAMS", - # "custom-container-key", - # None, - # ["--max-model-len 4096", "--seed 42", "--trust-remote-code"], - # ["--max-model-len 4096", "--seed 42", "--trust-remote-code"], - # ), - # ] - # ) - @patch("ads.model.datascience_model.DataScienceModel.from_id") - def test_get_deployment_default_params_multimodel( - self, - # container_params_field, - # container_type_key, - # gpu_count, - # params, - # allowed_params, - mock_from_id, - ): - """Test for fetching config details for a given deployment.""" - - config_json = os.path.join( - self.curr_dir, "test_data/deployment/deployment_gpu_config2.json" - ) - with open(config_json, "r") as _file: - config = json.load(_file) - # update config params for testing - # config["configuration"][TestDataset.DEPLOYMENT_SHAPE_NAME]["multi_model_deployment"] = [{"gpu_count": gpu_count, "parameters": {container_params_field: " ".join(params)}}] - - mock_model = MagicMock() - custom_metadata_list = ModelCustomMetadata() - custom_metadata_list.add( - **{"key": "deployment-container", "value": "odsc-vllm-serving"} - ) - mock_model.custom_metadata_list = custom_metadata_list - mock_from_id.return_value = mock_model - - self.app.get_deployment_config = MagicMock(return_value=config) - # result = self.app.get_deployment_default_params( - # TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME, gpu_count - # ) - - result = self.app.get_deployment_default_params( - TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME, TestDataset.DEPLOYMENT_GPU_COUNT_B - ) - - assert result == [] - # if container_params_field in ("CUSTOM_PARAMS", "TGI_PARAMS"): - # assert result == [] - # else: - # assert result == allowed_params - - - - @parameterized.expand( [ (