diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py index cfabf65b2..d60aadc36 100644 --- a/ads/aqua/modeldeployment/deployment.py +++ b/ads/aqua/modeldeployment/deployment.py @@ -145,18 +145,10 @@ def create( f"Invalid parameters for creating a model deployment. Error details: {custom_errors}." ) from ex - # If a single model is provided, delegate to `create` method - if ( - not create_deployment_details.model_id - and create_deployment_details.models - and len(create_deployment_details.models) == 1 - ): - single_model = create_deployment_details.models[0] - logger.info( - f"Single model ({single_model.model_id}) provided. " - "Delegating to single model creation method." + if not (create_deployment_details.model_id or create_deployment_details.models): + raise AquaValueError( + "Invalid parameters for creating a model deployment. Either `model_id` or `models` must be provided." ) - create_deployment_details.model_id = single_model.model_id # Set defaults for compartment and project if not provided. compartment_id = create_deployment_details.compartment_id or COMPARTMENT_OCID @@ -170,6 +162,10 @@ def create( # Create an AquaModelApp instance once to perform the deployment creation. model_app = AquaModelApp() if create_deployment_details.model_id: + logger.debug( + f"Single model ({create_deployment_details.model_id}) provided. " + "Delegating to single model creation method." + ) aqua_model = model_app.create( model_id=create_deployment_details.model_id, compartment_id=compartment_id, @@ -254,6 +250,10 @@ def create( f"Only the following container families are supported: {supported_container_families}." ) + logger.debug( + f"Multi models ({model_ids}) provided. Delegating to multi model creation method." + ) + aqua_model = model_app.create_multi( models=create_deployment_details.models, compartment_id=compartment_id, @@ -1051,6 +1051,10 @@ def get_multimodel_deployment_config( ModelDeploymentConfigSummary A summary of the model deployment configurations and GPU allocations. """ + if not model_ids: + raise AquaValueError( + "Model IDs were not provided. Please provide a valid list of model IDs to retrieve the multi-model deployment configuration." + ) compartment_id = kwargs.pop("compartment_id", COMPARTMENT_OCID) diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py index 707b46f05..9d2188872 100644 --- a/ads/aqua/modeldeployment/utils.py +++ b/ads/aqua/modeldeployment/utils.py @@ -51,7 +51,7 @@ def load( primary_model_id: Optional[str] = None, ) -> ModelDeploymentConfigSummary: """ - Retrieves deployment configurations for multiple models and calculates compatible GPU allocations. + Retrieves deployment configurations for multiple/single model and calculates compatible GPU allocations. Parameters ---------- @@ -69,24 +69,48 @@ def load( A summary of the deployment configurations and GPU allocations. If GPU allocation cannot be determined, an appropriate error message is included in the summary. """ - # Fetch deployment configurations concurrently. - logger.debug(f"Loading model deployment configuration for models: {model_ids}") - deployment_configs = self._fetch_deployment_configs_concurrently(model_ids) + if len(model_ids) == 1: + return self._load_model_deployment_configuration( + shapes=shapes, model_ids=model_ids + ) - logger.debug(f"Loaded config: {deployment_configs}") - model_shape_gpu, deployment = self._extract_model_shape_gpu(deployment_configs) + return self._load_multi_model_deployment_configuration( + shapes=shapes, model_ids=model_ids, primary_model_id=primary_model_id + ) - # Initialize the summary result with the deployment configurations. - summary = ModelDeploymentConfigSummary(deployment_config=deployment) + def _load_multi_model_deployment_configuration( + self, + shapes: List[ComputeShapeSummary], + model_ids: List[str], + primary_model_id: Optional[str] = None, + ) -> ModelDeploymentConfigSummary: + """ + Retrieves deployment configurations for multiple models and calculates compatible GPU allocations. + + Parameters + ---------- + shapes : List[ComputeShapeSummary] + Model deployment available shapes. + model_ids : List[str] + A list of OCIDs for the Aqua models. + primary_model_id : Optional[str], optional + The OCID of the primary Aqua model. If provided, GPU allocation prioritizes this model. + Otherwise, GPUs are evenly allocated. + + Returns + ------- + ModelDeploymentConfigSummary + A summary of the deployment configurations and GPU allocations. If GPU allocation + cannot be determined, an appropriate error message is included in the summary. + """ + model_shape_gpu, available_shapes, summary = self._fetch_model_shape_gpu( + shapes=shapes, model_ids=model_ids + ) # Identify common deployment shapes among all models. common_shapes, empty_configs = self._get_common_shapes(model_shape_gpu) logger.debug(f"Common Shapes: {common_shapes} from: {model_shape_gpu}") - # Filter out not available shapes - available_shapes = [item.name.upper() for item in shapes] - logger.debug(f"Service Available Shapes: {available_shapes}") - # If all models' shape configs are empty, use default deployment shapes instead common_shapes = ( available_shapes @@ -132,6 +156,94 @@ def load( summary.gpu_allocation = gpu_allocation return summary + def _load_model_deployment_configuration( + self, + shapes: List[ComputeShapeSummary], + model_ids: List[str], + ) -> ModelDeploymentConfigSummary: + """ + Retrieves deployment configuration for single model and allocate all available GPU count to it. + + Parameters + ---------- + shapes : List[ComputeShapeSummary] + Model deployment available shapes. + model_ids : List[str] + A list of OCIDs for the Aqua models. + + Returns + ------- + ModelDeploymentConfigSummary + A summary of the deployment configurations and GPU allocations. If GPU allocation + cannot be determined, an appropriate error message is included in the summary. + """ + model_id = model_ids[0] + _, common_shapes, summary = self._fetch_model_shape_gpu( + shapes=shapes, model_ids=model_ids + ) + + # Find out the common shapes from deployment config and available deployment shapes + shape = [shape.upper() for shape in summary.deployment_config[model_id].shape] + if shape: + common_shapes = list(set(common_shapes).intersection(set(shape))) + + if not common_shapes: + summary.error_message = ( + "The selected model does not have any available deployment shape. " + "Please ensure that chosen model is compatible for multi-model deployment." + ) + logger.debug( + f"No compatible deployment shapes found for selected model: {model_id}" + ) + return summary + + logger.debug(f"Available Common Shapes: {common_shapes}") + + gpu_allocation = {} + for shape in common_shapes: + total_gpus_available = 0 + shape_summary = next( + ( + deployment_shape + for deployment_shape in shapes + if deployment_shape.name.upper() == shape + ), + None, + ) + if shape_summary and shape_summary.gpu_specs: + total_gpus_available = shape_summary.gpu_specs.gpu_count + + if total_gpus_available != 0: + gpu_allocation[shape] = GPUShapeAllocation( + models=[ + GPUModelAllocation( + ocid=model_id, gpu_count=total_gpus_available + ) + ], + total_gpus_available=total_gpus_available, + ) + + summary.gpu_allocation = gpu_allocation + return summary + + def _fetch_model_shape_gpu(self, shapes: List[ComputeShapeSummary], model_ids: str): + """Fetches dict of model shape and gpu, list of available shapes and builds `ModelDeploymentConfigSummary` instance.""" + # Fetch deployment configurations concurrently. + logger.debug(f"Loading model deployment configuration for models: {model_ids}") + deployment_configs = self._fetch_deployment_configs_concurrently(model_ids) + + logger.debug(f"Loaded config: {deployment_configs}") + model_shape_gpu, deployment = self._extract_model_shape_gpu(deployment_configs) + + # Initialize the summary result with the deployment configurations. + summary = ModelDeploymentConfigSummary(deployment_config=deployment) + + # Filter out not available shapes + available_shapes = [item.name.upper() for item in shapes] + logger.debug(f"Service Available Shapes: {available_shapes}") + + return model_shape_gpu, available_shapes, summary + def _fetch_deployment_configs_concurrently( self, model_ids: List[str] ) -> Dict[str, AquaDeploymentConfig]: @@ -154,25 +266,30 @@ def _extract_model_shape_gpu( ): """Extracts shape and GPU count details from deployment configurations. Supported shapes for multi model deployment will be collected from `configuration` entry in deployment config. + Supported shapes for single model deployment will be collected from `shape` entry in deployment config. """ model_shape_gpu = {} deployment = {} + is_single_model = len(deployment_configs) == 1 for model_id, config in deployment_configs.items(): - # We cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1). + # For multi model deployment, we cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1). # However, Falcon can also be deployed on a single card in other A10 shapes, such as A10.2. # Our current configuration does not support this flexibility. - # multi_deployment_shape = config.shape - multi_deployment_shape = list(config.configuration.keys()) - model_shape_gpu[model_id] = { - shape.upper(): [ - item.gpu_count - for item in config.configuration.get( - shape, ConfigurationItem() - ).multi_model_deployment - ] - for shape in multi_deployment_shape - } + # For single model deployment, we use `config.shape` to find the available shapes. + multi_deployment_shape = ( + config.shape if is_single_model else list(config.configuration.keys()) + ) + if not is_single_model: + model_shape_gpu[model_id] = { + shape.upper(): [ + item.gpu_count + for item in config.configuration.get( + shape, ConfigurationItem() + ).multi_model_deployment + ] + for shape in multi_deployment_shape + } deployment[model_id] = { "shape": [shape.upper() for shape in multi_deployment_shape], "configuration": { diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py index b4fe292e4..c3daf82a1 100644 --- a/tests/unitary/with_extras/aqua/test_deployment.py +++ b/tests/unitary/with_extras/aqua/test_deployment.py @@ -499,10 +499,10 @@ class TestDataset: "deployment_config": { "model_a": { "shape": [ - "BM.GPU.A100-V2.8", - "BM.GPU.H100.8", "VM.GPU.A10.2", "VM.GPU.A10.4", + "BM.GPU.A100-V2.8", + "BM.GPU.H100.8", ], "configuration": { "VM.GPU.A10.2": { @@ -593,6 +593,73 @@ class TestDataset: "error_message": None, } + aqua_deployment_multi_model_config_single_custom = { + "deployment_config": {"model_a": {"shape": [], "configuration": {}}}, + "gpu_allocation": { + "VM.GPU2.1": { + "models": [{"ocid": "model_a", "gpu_count": 1}], + "total_gpus_available": 1, + }, + "VM.GPU3.1": { + "models": [{"ocid": "model_a", "gpu_count": 1}], + "total_gpus_available": 1, + }, + "VM.GPU3.2": { + "models": [{"ocid": "model_a", "gpu_count": 2}], + "total_gpus_available": 2, + }, + "VM.GPU3.4": { + "models": [{"ocid": "model_a", "gpu_count": 4}], + "total_gpus_available": 4, + }, + "BM.GPU2.2": { + "models": [{"ocid": "model_a", "gpu_count": 2}], + "total_gpus_available": 2, + }, + "BM.GPU3.8": { + "models": [{"ocid": "model_a", "gpu_count": 8}], + "total_gpus_available": 8, + }, + "BM.GPU4.8": { + "models": [{"ocid": "model_a", "gpu_count": 8}], + "total_gpus_available": 8, + }, + "BM.GPU.A100-V2.8": { + "models": [{"ocid": "model_a", "gpu_count": 8}], + "total_gpus_available": 8, + }, + "BM.GPU.H100.8": { + "models": [{"ocid": "model_a", "gpu_count": 8}], + "total_gpus_available": 8, + }, + "BM.GPU.T1.2": { + "models": [{"ocid": "model_a", "gpu_count": 2}], + "total_gpus_available": 2, + }, + "BM.GPU.A10.4": { + "models": [{"ocid": "model_a", "gpu_count": 4}], + "total_gpus_available": 4, + }, + "VM.GPU.A10.4": { + "models": [{"ocid": "model_a", "gpu_count": 4}], + "total_gpus_available": 4, + }, + "BM.GPU.L40S-NC.4": { + "models": [{"ocid": "model_a", "gpu_count": 4}], + "total_gpus_available": 4, + }, + "VM.GPU.A10.1": { + "models": [{"ocid": "model_a", "gpu_count": 1}], + "total_gpus_available": 1, + }, + "VM.GPU.A10.2": { + "models": [{"ocid": "model_a", "gpu_count": 2}], + "total_gpus_available": 2, + }, + }, + "error_message": None, + } + aqua_deployment_multi_model_config_summary_hybrid = { "deployment_config": { "model_a": { @@ -1001,7 +1068,7 @@ def test_get_deployment_config(self): "ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently" ) @patch("ads.aqua.modeldeployment.AquaDeploymentApp.list_shapes") - def test_get_multimodel_deployment_config( + def test_get_multimodel_deployment_config_single( self, mock_list_shapes, mock_fetch_deployment_configs_concurrently ): config_json = os.path.join( @@ -1035,6 +1102,18 @@ def test_get_multimodel_deployment_config( == TestDataset.aqua_deployment_multi_model_config_summary ) + # custom model without deployment config + # deployment shape should be collected from `list_shapes`. + mock_fetch_deployment_configs_concurrently.return_value = { + "model_a": AquaDeploymentConfig() + } + result = self.app.get_multimodel_deployment_config(["model_a"]) + + assert ( + result.model_dump() + == TestDataset.aqua_deployment_multi_model_config_single_custom + ) + @patch( "ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently" )