From d6b20d5f916d74ca830c5fce1a9e27b834fbe17e Mon Sep 17 00:00:00 2001 From: Lu Peng Date: Fri, 7 Mar 2025 13:33:02 -0500 Subject: [PATCH 1/5] Support deploy single model for multi deployment. --- ads/aqua/modeldeployment/deployment.py | 39 +++++++----- ads/aqua/modeldeployment/entities.py | 12 +++- ads/aqua/modeldeployment/utils.py | 87 ++++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 15 deletions(-) diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py index cfabf65b2..830ac86db 100644 --- a/ads/aqua/modeldeployment/deployment.py +++ b/ads/aqua/modeldeployment/deployment.py @@ -145,18 +145,10 @@ def create( f"Invalid parameters for creating a model deployment. Error details: {custom_errors}." ) from ex - # If a single model is provided, delegate to `create` method - if ( - not create_deployment_details.model_id - and create_deployment_details.models - and len(create_deployment_details.models) == 1 - ): - single_model = create_deployment_details.models[0] - logger.info( - f"Single model ({single_model.model_id}) provided. " - "Delegating to single model creation method." + if not (create_deployment_details.model_id or create_deployment_details.models): + raise AquaValueError( + "Invalid parameters for creating a model deployment. Either `model_id` or `models` must be provided." ) - create_deployment_details.model_id = single_model.model_id # Set defaults for compartment and project if not provided. compartment_id = create_deployment_details.compartment_id or COMPARTMENT_OCID @@ -170,6 +162,10 @@ def create( # Create an AquaModelApp instance once to perform the deployment creation. model_app = AquaModelApp() if create_deployment_details.model_id: + logger.info( + f"Single model ({create_deployment_details.model_id}) provided. " + "Delegating to single model creation method." + ) aqua_model = model_app.create( model_id=create_deployment_details.model_id, compartment_id=compartment_id, @@ -254,6 +250,10 @@ def create( f"Only the following container families are supported: {supported_container_families}." ) + logger.info( + f"Multi models ({model_ids}) provided. Delegating to multi model creation method." + ) + aqua_model = model_app.create_multi( models=create_deployment_details.models, compartment_id=compartment_id, @@ -1051,15 +1051,26 @@ def get_multimodel_deployment_config( ModelDeploymentConfigSummary A summary of the model deployment configurations and GPU allocations. """ + if not model_ids: + raise AquaValueError( + "Invalid or empty parameter `model_ids`. Specify a list of valid model ids to get multi model deployment config." + ) compartment_id = kwargs.pop("compartment_id", COMPARTMENT_OCID) # Get the all model deployment available shapes in a given compartment available_shapes = self.list_shapes(compartment_id=compartment_id) - return MultiModelDeploymentConfigLoader( - deployment_app=self, - ).load( + multi_model_deployment_config_loader = MultiModelDeploymentConfigLoader( + deployment_app=self + ) + + if len(model_ids) == 1: + return multi_model_deployment_config_loader.load_single( + shapes=available_shapes, model_id=model_ids[0] + ) + + return multi_model_deployment_config_loader.load( shapes=available_shapes, model_ids=model_ids, primary_model_id=primary_model_id, diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py index fee4ad44c..f67d820fe 100644 --- a/ads/aqua/modeldeployment/entities.py +++ b/ads/aqua/modeldeployment/entities.py @@ -7,7 +7,7 @@ from oci.data_science.models import ModelDeployment, ModelDeploymentSummary from pydantic import BaseModel, Field, model_validator -from ads.aqua import logger +from ads.aqua import ODSC_MODEL_COMPARTMENT_OCID, logger from ads.aqua.common.entities import AquaMultiModelRef from ads.aqua.common.enums import Tags from ads.aqua.config.utils.serializer import Serializable @@ -15,6 +15,7 @@ from ads.aqua.data import AquaResourceIdentifier from ads.common.serializer import DataClassSerializable from ads.common.utils import get_console_link +from ads.model.datascience_model import DataScienceModel class ShapeInfo(Serializable): @@ -550,6 +551,15 @@ def validate_multimodel_deployment_feasibility( sum_model_gpus += model.gpu_count aqua_deployment_config = model_deployment_config[model.model_id] + source_model = DataScienceModel.from_id(model.model_id) + # Validates custom model with deployment config + # Skips validating custom models without deployment config as the configuration is empty and gpu counts are generated by ads sdk + if ( + source_model.compartment_id != ODSC_MODEL_COMPARTMENT_OCID + and not aqua_deployment_config.configuration + ): + continue + # We cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1). # However, Falcon can also be deployed on a single card in other A10 shapes, such as A10.2. # Our current configuration does not support this flexibility. diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py index fffd4ddab..32f95ae14 100644 --- a/ads/aqua/modeldeployment/utils.py +++ b/ads/aqua/modeldeployment/utils.py @@ -134,6 +134,93 @@ def load( summary.gpu_allocation = gpu_allocation return summary + def load_single( + self, + shapes: List[ComputeShapeSummary], + model_id: str, + ) -> ModelDeploymentConfigSummary: + """ + Retrieves deployment configuration for single model and allocate all available GPU count to it. + + Parameters + ---------- + shapes : List[ComputeShapeSummary] + Model deployment available shapes. + model_id : str + The OCID for the Aqua model. + + Returns + ------- + ModelDeploymentConfigSummary + A summary of the deployment configurations and GPU allocations. If GPU allocation + cannot be determined, an appropriate error message is included in the summary. + """ + # Fetch deployment configuration concurrently. + logger.debug(f"Loading model deployment configuration for model: {model_id}") + deployment_config = self._fetch_deployment_configs_concurrently([model_id])[ + model_id + ] + + deployment = { + model_id: { + "shape": [shape.upper() for shape in deployment_config.shape], + "configuration": { + shape.upper(): deployment_config.configuration.get( + shape, ConfigurationItem() + ) + for shape in deployment_config.shape + }, + } + } + + # Initialize the summary result with the deployment configurations. + summary = ModelDeploymentConfigSummary(deployment_config=deployment) + + # Find out the common shapes from deployment config and available deployment shapes + shape = [shape.upper() for shape in deployment_config.shape] + common_shapes = [shape.name.upper() for shape in shapes] + if shape: + common_shapes = list(set(common_shapes).intersection(set(shape))) + + if not common_shapes: + summary.error_message = ( + "The selected model does not have any available deployment shape. " + "Please ensure that chosen model is compatible for multi-model deployment." + ) + logger.debug( + f"No compatible deployment shapes found for selected model: {model_id}" + ) + return summary + + logger.debug(f"Available Common Shapes: {common_shapes}") + + gpu_allocation = {} + for shape in common_shapes: + total_gpus_available = 0 + shape_summary = next( + ( + deployment_shape + for deployment_shape in shapes + if deployment_shape.name.upper() == shape + ), + None, + ) + if shape_summary and shape_summary.gpu_specs: + total_gpus_available = shape_summary.gpu_specs.gpu_count + + if total_gpus_available != 0: + gpu_allocation[shape] = GPUShapeAllocation( + models=[ + GPUModelAllocation( + ocid=model_id, gpu_count=total_gpus_available + ) + ], + total_gpus_available=total_gpus_available, + ) + + summary.gpu_allocation = gpu_allocation + return summary + def _fetch_deployment_configs_concurrently( self, model_ids: List[str] ) -> Dict[str, AquaDeploymentConfig]: From 1b1268b093f05f5ddb70b4a6dd600b5b35f84b00 Mon Sep 17 00:00:00 2001 From: Lu Peng Date: Fri, 7 Mar 2025 14:10:31 -0500 Subject: [PATCH 2/5] Updated pr. --- ads/aqua/modeldeployment/entities.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py index f67d820fe..5fff72789 100644 --- a/ads/aqua/modeldeployment/entities.py +++ b/ads/aqua/modeldeployment/entities.py @@ -7,7 +7,7 @@ from oci.data_science.models import ModelDeployment, ModelDeploymentSummary from pydantic import BaseModel, Field, model_validator -from ads.aqua import ODSC_MODEL_COMPARTMENT_OCID, logger +from ads.aqua import logger from ads.aqua.common.entities import AquaMultiModelRef from ads.aqua.common.enums import Tags from ads.aqua.config.utils.serializer import Serializable @@ -15,7 +15,6 @@ from ads.aqua.data import AquaResourceIdentifier from ads.common.serializer import DataClassSerializable from ads.common.utils import get_console_link -from ads.model.datascience_model import DataScienceModel class ShapeInfo(Serializable): @@ -551,13 +550,9 @@ def validate_multimodel_deployment_feasibility( sum_model_gpus += model.gpu_count aqua_deployment_config = model_deployment_config[model.model_id] - source_model = DataScienceModel.from_id(model.model_id) # Validates custom model with deployment config # Skips validating custom models without deployment config as the configuration is empty and gpu counts are generated by ads sdk - if ( - source_model.compartment_id != ODSC_MODEL_COMPARTMENT_OCID - and not aqua_deployment_config.configuration - ): + if not aqua_deployment_config.configuration: continue # We cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1). From 080a2516632d0d6e911a45c2f72fd5a56d05ecb9 Mon Sep 17 00:00:00 2001 From: Lu Peng Date: Sun, 9 Mar 2025 19:08:41 -0400 Subject: [PATCH 3/5] Added unit test. --- .../with_extras/aqua/test_deployment.py | 85 ++++++++++++++++++- 1 file changed, 82 insertions(+), 3 deletions(-) diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py index b4fe292e4..c3daf82a1 100644 --- a/tests/unitary/with_extras/aqua/test_deployment.py +++ b/tests/unitary/with_extras/aqua/test_deployment.py @@ -499,10 +499,10 @@ class TestDataset: "deployment_config": { "model_a": { "shape": [ - "BM.GPU.A100-V2.8", - "BM.GPU.H100.8", "VM.GPU.A10.2", "VM.GPU.A10.4", + "BM.GPU.A100-V2.8", + "BM.GPU.H100.8", ], "configuration": { "VM.GPU.A10.2": { @@ -593,6 +593,73 @@ class TestDataset: "error_message": None, } + aqua_deployment_multi_model_config_single_custom = { + "deployment_config": {"model_a": {"shape": [], "configuration": {}}}, + "gpu_allocation": { + "VM.GPU2.1": { + "models": [{"ocid": "model_a", "gpu_count": 1}], + "total_gpus_available": 1, + }, + "VM.GPU3.1": { + "models": [{"ocid": "model_a", "gpu_count": 1}], + "total_gpus_available": 1, + }, + "VM.GPU3.2": { + "models": [{"ocid": "model_a", "gpu_count": 2}], + "total_gpus_available": 2, + }, + "VM.GPU3.4": { + "models": [{"ocid": "model_a", "gpu_count": 4}], + "total_gpus_available": 4, + }, + "BM.GPU2.2": { + "models": [{"ocid": "model_a", "gpu_count": 2}], + "total_gpus_available": 2, + }, + "BM.GPU3.8": { + "models": [{"ocid": "model_a", "gpu_count": 8}], + "total_gpus_available": 8, + }, + "BM.GPU4.8": { + "models": [{"ocid": "model_a", "gpu_count": 8}], + "total_gpus_available": 8, + }, + "BM.GPU.A100-V2.8": { + "models": [{"ocid": "model_a", "gpu_count": 8}], + "total_gpus_available": 8, + }, + "BM.GPU.H100.8": { + "models": [{"ocid": "model_a", "gpu_count": 8}], + "total_gpus_available": 8, + }, + "BM.GPU.T1.2": { + "models": [{"ocid": "model_a", "gpu_count": 2}], + "total_gpus_available": 2, + }, + "BM.GPU.A10.4": { + "models": [{"ocid": "model_a", "gpu_count": 4}], + "total_gpus_available": 4, + }, + "VM.GPU.A10.4": { + "models": [{"ocid": "model_a", "gpu_count": 4}], + "total_gpus_available": 4, + }, + "BM.GPU.L40S-NC.4": { + "models": [{"ocid": "model_a", "gpu_count": 4}], + "total_gpus_available": 4, + }, + "VM.GPU.A10.1": { + "models": [{"ocid": "model_a", "gpu_count": 1}], + "total_gpus_available": 1, + }, + "VM.GPU.A10.2": { + "models": [{"ocid": "model_a", "gpu_count": 2}], + "total_gpus_available": 2, + }, + }, + "error_message": None, + } + aqua_deployment_multi_model_config_summary_hybrid = { "deployment_config": { "model_a": { @@ -1001,7 +1068,7 @@ def test_get_deployment_config(self): "ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently" ) @patch("ads.aqua.modeldeployment.AquaDeploymentApp.list_shapes") - def test_get_multimodel_deployment_config( + def test_get_multimodel_deployment_config_single( self, mock_list_shapes, mock_fetch_deployment_configs_concurrently ): config_json = os.path.join( @@ -1035,6 +1102,18 @@ def test_get_multimodel_deployment_config( == TestDataset.aqua_deployment_multi_model_config_summary ) + # custom model without deployment config + # deployment shape should be collected from `list_shapes`. + mock_fetch_deployment_configs_concurrently.return_value = { + "model_a": AquaDeploymentConfig() + } + result = self.app.get_multimodel_deployment_config(["model_a"]) + + assert ( + result.model_dump() + == TestDataset.aqua_deployment_multi_model_config_single_custom + ) + @patch( "ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently" ) From 5041e699d39fbedeb65ea07ee5f20da4924bc993 Mon Sep 17 00:00:00 2001 From: Lu Peng Date: Mon, 10 Mar 2025 11:56:36 -0400 Subject: [PATCH 4/5] Updated pr. --- ads/aqua/modeldeployment/deployment.py | 8 ++++---- ads/aqua/modeldeployment/utils.py | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py index 830ac86db..bc5673f20 100644 --- a/ads/aqua/modeldeployment/deployment.py +++ b/ads/aqua/modeldeployment/deployment.py @@ -162,7 +162,7 @@ def create( # Create an AquaModelApp instance once to perform the deployment creation. model_app = AquaModelApp() if create_deployment_details.model_id: - logger.info( + logger.debug( f"Single model ({create_deployment_details.model_id}) provided. " "Delegating to single model creation method." ) @@ -250,7 +250,7 @@ def create( f"Only the following container families are supported: {supported_container_families}." ) - logger.info( + logger.debug( f"Multi models ({model_ids}) provided. Delegating to multi model creation method." ) @@ -1066,11 +1066,11 @@ def get_multimodel_deployment_config( ) if len(model_ids) == 1: - return multi_model_deployment_config_loader.load_single( + return multi_model_deployment_config_loader.load_model_deployment_configuration( shapes=available_shapes, model_id=model_ids[0] ) - return multi_model_deployment_config_loader.load( + return multi_model_deployment_config_loader.load_multi_model_deployment_configuration( shapes=available_shapes, model_ids=model_ids, primary_model_id=primary_model_id, diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py index fc65b8df2..e6571120d 100644 --- a/ads/aqua/modeldeployment/utils.py +++ b/ads/aqua/modeldeployment/utils.py @@ -44,7 +44,7 @@ def __init__(self, deployment_app: AquaApp): """ self.deployment_app = deployment_app - def load( + def load_multi_model_deployment_configuration( self, shapes: List[ComputeShapeSummary], model_ids: List[str], @@ -132,7 +132,7 @@ def load( summary.gpu_allocation = gpu_allocation return summary - def load_single( + def load_model_deployment_configuration( self, shapes: List[ComputeShapeSummary], model_id: str, @@ -160,15 +160,15 @@ def load_single( ] deployment = { - model_id: { - "shape": [shape.upper() for shape in deployment_config.shape], - "configuration": { + model_id: AquaDeploymentConfig( + shape=[shape.upper() for shape in deployment_config.shape], + configuration={ shape.upper(): deployment_config.configuration.get( shape, ConfigurationItem() ) for shape in deployment_config.shape }, - } + ) } # Initialize the summary result with the deployment configurations. From 76cc18a284e29c013af46da0c359d6eab6d6f69b Mon Sep 17 00:00:00 2001 From: Lu Peng Date: Mon, 10 Mar 2025 18:13:55 -0400 Subject: [PATCH 5/5] Updated pr. --- ads/aqua/modeldeployment/deployment.py | 15 +-- ads/aqua/modeldeployment/utils.py | 132 +++++++++++++++---------- 2 files changed, 85 insertions(+), 62 deletions(-) diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py index bc5673f20..d60aadc36 100644 --- a/ads/aqua/modeldeployment/deployment.py +++ b/ads/aqua/modeldeployment/deployment.py @@ -1053,7 +1053,7 @@ def get_multimodel_deployment_config( """ if not model_ids: raise AquaValueError( - "Invalid or empty parameter `model_ids`. Specify a list of valid model ids to get multi model deployment config." + "Model IDs were not provided. Please provide a valid list of model IDs to retrieve the multi-model deployment configuration." ) compartment_id = kwargs.pop("compartment_id", COMPARTMENT_OCID) @@ -1061,16 +1061,9 @@ def get_multimodel_deployment_config( # Get the all model deployment available shapes in a given compartment available_shapes = self.list_shapes(compartment_id=compartment_id) - multi_model_deployment_config_loader = MultiModelDeploymentConfigLoader( - deployment_app=self - ) - - if len(model_ids) == 1: - return multi_model_deployment_config_loader.load_model_deployment_configuration( - shapes=available_shapes, model_id=model_ids[0] - ) - - return multi_model_deployment_config_loader.load_multi_model_deployment_configuration( + return MultiModelDeploymentConfigLoader( + deployment_app=self, + ).load( shapes=available_shapes, model_ids=model_ids, primary_model_id=primary_model_id, diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py index e6571120d..9d2188872 100644 --- a/ads/aqua/modeldeployment/utils.py +++ b/ads/aqua/modeldeployment/utils.py @@ -44,14 +44,14 @@ def __init__(self, deployment_app: AquaApp): """ self.deployment_app = deployment_app - def load_multi_model_deployment_configuration( + def load( self, shapes: List[ComputeShapeSummary], model_ids: List[str], primary_model_id: Optional[str] = None, ) -> ModelDeploymentConfigSummary: """ - Retrieves deployment configurations for multiple models and calculates compatible GPU allocations. + Retrieves deployment configurations for multiple/single model and calculates compatible GPU allocations. Parameters ---------- @@ -69,24 +69,48 @@ def load_multi_model_deployment_configuration( A summary of the deployment configurations and GPU allocations. If GPU allocation cannot be determined, an appropriate error message is included in the summary. """ - # Fetch deployment configurations concurrently. - logger.debug(f"Loading model deployment configuration for models: {model_ids}") - deployment_configs = self._fetch_deployment_configs_concurrently(model_ids) + if len(model_ids) == 1: + return self._load_model_deployment_configuration( + shapes=shapes, model_ids=model_ids + ) - logger.debug(f"Loaded config: {deployment_configs}") - model_shape_gpu, deployment = self._extract_model_shape_gpu(deployment_configs) + return self._load_multi_model_deployment_configuration( + shapes=shapes, model_ids=model_ids, primary_model_id=primary_model_id + ) - # Initialize the summary result with the deployment configurations. - summary = ModelDeploymentConfigSummary(deployment_config=deployment) + def _load_multi_model_deployment_configuration( + self, + shapes: List[ComputeShapeSummary], + model_ids: List[str], + primary_model_id: Optional[str] = None, + ) -> ModelDeploymentConfigSummary: + """ + Retrieves deployment configurations for multiple models and calculates compatible GPU allocations. + + Parameters + ---------- + shapes : List[ComputeShapeSummary] + Model deployment available shapes. + model_ids : List[str] + A list of OCIDs for the Aqua models. + primary_model_id : Optional[str], optional + The OCID of the primary Aqua model. If provided, GPU allocation prioritizes this model. + Otherwise, GPUs are evenly allocated. + + Returns + ------- + ModelDeploymentConfigSummary + A summary of the deployment configurations and GPU allocations. If GPU allocation + cannot be determined, an appropriate error message is included in the summary. + """ + model_shape_gpu, available_shapes, summary = self._fetch_model_shape_gpu( + shapes=shapes, model_ids=model_ids + ) # Identify common deployment shapes among all models. common_shapes, empty_configs = self._get_common_shapes(model_shape_gpu) logger.debug(f"Common Shapes: {common_shapes} from: {model_shape_gpu}") - # Filter out not available shapes - available_shapes = [item.name.upper() for item in shapes] - logger.debug(f"Service Available Shapes: {available_shapes}") - # If all models' shape configs are empty, use default deployment shapes instead common_shapes = ( available_shapes @@ -132,10 +156,10 @@ def load_multi_model_deployment_configuration( summary.gpu_allocation = gpu_allocation return summary - def load_model_deployment_configuration( + def _load_model_deployment_configuration( self, shapes: List[ComputeShapeSummary], - model_id: str, + model_ids: List[str], ) -> ModelDeploymentConfigSummary: """ Retrieves deployment configuration for single model and allocate all available GPU count to it. @@ -144,8 +168,8 @@ def load_model_deployment_configuration( ---------- shapes : List[ComputeShapeSummary] Model deployment available shapes. - model_id : str - The OCID for the Aqua model. + model_ids : List[str] + A list of OCIDs for the Aqua models. Returns ------- @@ -153,30 +177,13 @@ def load_model_deployment_configuration( A summary of the deployment configurations and GPU allocations. If GPU allocation cannot be determined, an appropriate error message is included in the summary. """ - # Fetch deployment configuration concurrently. - logger.debug(f"Loading model deployment configuration for model: {model_id}") - deployment_config = self._fetch_deployment_configs_concurrently([model_id])[ - model_id - ] - - deployment = { - model_id: AquaDeploymentConfig( - shape=[shape.upper() for shape in deployment_config.shape], - configuration={ - shape.upper(): deployment_config.configuration.get( - shape, ConfigurationItem() - ) - for shape in deployment_config.shape - }, - ) - } - - # Initialize the summary result with the deployment configurations. - summary = ModelDeploymentConfigSummary(deployment_config=deployment) + model_id = model_ids[0] + _, common_shapes, summary = self._fetch_model_shape_gpu( + shapes=shapes, model_ids=model_ids + ) # Find out the common shapes from deployment config and available deployment shapes - shape = [shape.upper() for shape in deployment_config.shape] - common_shapes = [shape.name.upper() for shape in shapes] + shape = [shape.upper() for shape in summary.deployment_config[model_id].shape] if shape: common_shapes = list(set(common_shapes).intersection(set(shape))) @@ -219,6 +226,24 @@ def load_model_deployment_configuration( summary.gpu_allocation = gpu_allocation return summary + def _fetch_model_shape_gpu(self, shapes: List[ComputeShapeSummary], model_ids: str): + """Fetches dict of model shape and gpu, list of available shapes and builds `ModelDeploymentConfigSummary` instance.""" + # Fetch deployment configurations concurrently. + logger.debug(f"Loading model deployment configuration for models: {model_ids}") + deployment_configs = self._fetch_deployment_configs_concurrently(model_ids) + + logger.debug(f"Loaded config: {deployment_configs}") + model_shape_gpu, deployment = self._extract_model_shape_gpu(deployment_configs) + + # Initialize the summary result with the deployment configurations. + summary = ModelDeploymentConfigSummary(deployment_config=deployment) + + # Filter out not available shapes + available_shapes = [item.name.upper() for item in shapes] + logger.debug(f"Service Available Shapes: {available_shapes}") + + return model_shape_gpu, available_shapes, summary + def _fetch_deployment_configs_concurrently( self, model_ids: List[str] ) -> Dict[str, AquaDeploymentConfig]: @@ -241,25 +266,30 @@ def _extract_model_shape_gpu( ): """Extracts shape and GPU count details from deployment configurations. Supported shapes for multi model deployment will be collected from `configuration` entry in deployment config. + Supported shapes for single model deployment will be collected from `shape` entry in deployment config. """ model_shape_gpu = {} deployment = {} + is_single_model = len(deployment_configs) == 1 for model_id, config in deployment_configs.items(): - # We cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1). + # For multi model deployment, we cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1). # However, Falcon can also be deployed on a single card in other A10 shapes, such as A10.2. # Our current configuration does not support this flexibility. - # multi_deployment_shape = config.shape - multi_deployment_shape = list(config.configuration.keys()) - model_shape_gpu[model_id] = { - shape.upper(): [ - item.gpu_count - for item in config.configuration.get( - shape, ConfigurationItem() - ).multi_model_deployment - ] - for shape in multi_deployment_shape - } + # For single model deployment, we use `config.shape` to find the available shapes. + multi_deployment_shape = ( + config.shape if is_single_model else list(config.configuration.keys()) + ) + if not is_single_model: + model_shape_gpu[model_id] = { + shape.upper(): [ + item.gpu_count + for item in config.configuration.get( + shape, ConfigurationItem() + ).multi_model_deployment + ] + for shape in multi_deployment_shape + } deployment[model_id] = { "shape": [shape.upper() for shape in multi_deployment_shape], "configuration": {