oracle · mrDzurb · Mar 10, 2025 · Mar 7, 2025 · Mar 7, 2025 · Mar 7, 2025
@@ -145,18 +145,10 @@ def create(
                     f"Invalid parameters for creating a model deployment. Error details: {custom_errors}."
                 ) from ex
 
-        # If a single model is provided, delegate to `create` method
-        if (
-            not create_deployment_details.model_id
-            and create_deployment_details.models
-            and len(create_deployment_details.models) == 1
-        ):
-            single_model = create_deployment_details.models[0]
-            logger.info(
-                f"Single model ({single_model.model_id}) provided. "
-                "Delegating to single model creation method."
+        if not (create_deployment_details.model_id or create_deployment_details.models):
+            raise AquaValueError(
+                "Invalid parameters for creating a model deployment. Either `model_id` or `models` must be provided."
             )
-            create_deployment_details.model_id = single_model.model_id
 
         # Set defaults for compartment and project if not provided.
         compartment_id = create_deployment_details.compartment_id or COMPARTMENT_OCID
@@ -170,6 +162,10 @@ def create(
         # Create an AquaModelApp instance once to perform the deployment creation.
         model_app = AquaModelApp()
         if create_deployment_details.model_id:
+            logger.debug(
+                f"Single model ({create_deployment_details.model_id}) provided. "
+                "Delegating to single model creation method."
+            )
             aqua_model = model_app.create(
                 model_id=create_deployment_details.model_id,
                 compartment_id=compartment_id,
@@ -254,6 +250,10 @@ def create(
                         f"Only the following container families are supported: {supported_container_families}."
                     )
 
+            logger.debug(
+                f"Multi models ({model_ids}) provided. Delegating to multi model creation method."
+            )
+
             aqua_model = model_app.create_multi(
                 models=create_deployment_details.models,
                 compartment_id=compartment_id,
@@ -1051,6 +1051,10 @@ def get_multimodel_deployment_config(
         ModelDeploymentConfigSummary
             A summary of the model deployment configurations and GPU allocations.
         """
+        if not model_ids:
+            raise AquaValueError(
+                "Model IDs were not provided. Please provide a valid list of model IDs to retrieve the multi-model deployment configuration."
+            )
 
         compartment_id = kwargs.pop("compartment_id", COMPARTMENT_OCID)
 

@@ -51,7 +51,7 @@ def load(
         primary_model_id: Optional[str] = None,
     ) -> ModelDeploymentConfigSummary:
         """
-        Retrieves deployment configurations for multiple models and calculates compatible GPU allocations.
+        Retrieves deployment configurations for multiple/single model and calculates compatible GPU allocations.
 
         Parameters
         ----------
@@ -69,24 +69,48 @@ def load(
             A summary of the deployment configurations and GPU allocations. If GPU allocation
             cannot be determined, an appropriate error message is included in the summary.
         """
-        # Fetch deployment configurations concurrently.
-        logger.debug(f"Loading model deployment configuration for models: {model_ids}")
-        deployment_configs = self._fetch_deployment_configs_concurrently(model_ids)
+        if len(model_ids) == 1:
+            return self._load_model_deployment_configuration(
+                shapes=shapes, model_ids=model_ids
+            )
 
-        logger.debug(f"Loaded config: {deployment_configs}")
-        model_shape_gpu, deployment = self._extract_model_shape_gpu(deployment_configs)
+        return self._load_multi_model_deployment_configuration(
+            shapes=shapes, model_ids=model_ids, primary_model_id=primary_model_id
+        )
 
-        # Initialize the summary result with the deployment configurations.
-        summary = ModelDeploymentConfigSummary(deployment_config=deployment)
+    def _load_multi_model_deployment_configuration(
+        self,
+        shapes: List[ComputeShapeSummary],
+        model_ids: List[str],
+        primary_model_id: Optional[str] = None,
+    ) -> ModelDeploymentConfigSummary:
+        """
+        Retrieves deployment configurations for multiple models and calculates compatible GPU allocations.
+
+        Parameters
+        ----------
+        shapes : List[ComputeShapeSummary]
+            Model deployment available shapes.
+        model_ids : List[str]
+            A list of OCIDs for the Aqua models.
+        primary_model_id : Optional[str], optional
+            The OCID of the primary Aqua model. If provided, GPU allocation prioritizes this model.
+            Otherwise, GPUs are evenly allocated.
+
+        Returns
+        -------
+        ModelDeploymentConfigSummary
+            A summary of the deployment configurations and GPU allocations. If GPU allocation
+            cannot be determined, an appropriate error message is included in the summary.
+        """
+        model_shape_gpu, available_shapes, summary = self._fetch_model_shape_gpu(
+            shapes=shapes, model_ids=model_ids
+        )
 
         # Identify common deployment shapes among all models.
         common_shapes, empty_configs = self._get_common_shapes(model_shape_gpu)
         logger.debug(f"Common Shapes: {common_shapes} from: {model_shape_gpu}")
 
-        # Filter out not available shapes
-        available_shapes = [item.name.upper() for item in shapes]
-        logger.debug(f"Service Available Shapes: {available_shapes}")
-
         # If all models' shape configs are empty, use default deployment shapes instead
         common_shapes = (
             available_shapes
@@ -132,6 +156,94 @@ def load(
         summary.gpu_allocation = gpu_allocation
         return summary
 
+    def _load_model_deployment_configuration(
+        self,
+        shapes: List[ComputeShapeSummary],
+        model_ids: List[str],
+    ) -> ModelDeploymentConfigSummary:
+        """
+        Retrieves deployment configuration for single model and allocate all available GPU count to it.
+
+        Parameters
+        ----------
+        shapes : List[ComputeShapeSummary]
+            Model deployment available shapes.
+        model_ids : List[str]
+            A list of OCIDs for the Aqua models.
+
+        Returns
+        -------
+        ModelDeploymentConfigSummary
+            A summary of the deployment configurations and GPU allocations. If GPU allocation
+            cannot be determined, an appropriate error message is included in the summary.
+        """
+        model_id = model_ids[0]
+        _, common_shapes, summary = self._fetch_model_shape_gpu(
+            shapes=shapes, model_ids=model_ids
+        )
+
+        # Find out the common shapes from deployment config and available deployment shapes
+        shape = [shape.upper() for shape in summary.deployment_config[model_id].shape]
+        if shape:
+            common_shapes = list(set(common_shapes).intersection(set(shape)))
+
+        if not common_shapes:
+            summary.error_message = (
+                "The selected model does not have any available deployment shape. "
+                "Please ensure that chosen model is compatible for multi-model deployment."
+            )
+            logger.debug(
+                f"No compatible deployment shapes found for selected model: {model_id}"
+            )
+            return summary
+
+        logger.debug(f"Available Common Shapes: {common_shapes}")
+
+        gpu_allocation = {}
+        for shape in common_shapes:
+            total_gpus_available = 0
+            shape_summary = next(
+                (
+                    deployment_shape
+                    for deployment_shape in shapes
+                    if deployment_shape.name.upper() == shape
+                ),
+                None,
+            )
+            if shape_summary and shape_summary.gpu_specs:
+                total_gpus_available = shape_summary.gpu_specs.gpu_count
+
+            if total_gpus_available != 0:
+                gpu_allocation[shape] = GPUShapeAllocation(
+                    models=[
+                        GPUModelAllocation(
+                            ocid=model_id, gpu_count=total_gpus_available
+                        )
+                    ],
+                    total_gpus_available=total_gpus_available,
+                )
+
+        summary.gpu_allocation = gpu_allocation
+        return summary
+
+    def _fetch_model_shape_gpu(self, shapes: List[ComputeShapeSummary], model_ids: str):
+        """Fetches dict of model shape and gpu, list of available shapes and builds `ModelDeploymentConfigSummary` instance."""
+        # Fetch deployment configurations concurrently.
+        logger.debug(f"Loading model deployment configuration for models: {model_ids}")
+        deployment_configs = self._fetch_deployment_configs_concurrently(model_ids)
+
+        logger.debug(f"Loaded config: {deployment_configs}")
+        model_shape_gpu, deployment = self._extract_model_shape_gpu(deployment_configs)
+
+        # Initialize the summary result with the deployment configurations.
+        summary = ModelDeploymentConfigSummary(deployment_config=deployment)
+
+        # Filter out not available shapes
+        available_shapes = [item.name.upper() for item in shapes]
+        logger.debug(f"Service Available Shapes: {available_shapes}")
+
+        return model_shape_gpu, available_shapes, summary
+
     def _fetch_deployment_configs_concurrently(
         self, model_ids: List[str]
     ) -> Dict[str, AquaDeploymentConfig]:
@@ -154,25 +266,30 @@ def _extract_model_shape_gpu(
     ):
         """Extracts shape and GPU count details from deployment configurations.
         Supported shapes for multi model deployment will be collected from `configuration` entry in deployment config.
+        Supported shapes for single model deployment will be collected from `shape` entry in deployment config.
         """
         model_shape_gpu = {}
         deployment = {}
+        is_single_model = len(deployment_configs) == 1
 
         for model_id, config in deployment_configs.items():
-            # We cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
+            # For multi model deployment, we cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
             # However, Falcon can also be deployed on a single card in other A10 shapes, such as A10.2.
             # Our current configuration does not support this flexibility.
-            # multi_deployment_shape = config.shape
-            multi_deployment_shape = list(config.configuration.keys())
-            model_shape_gpu[model_id] = {
-                shape.upper(): [
-                    item.gpu_count
-                    for item in config.configuration.get(
-                        shape, ConfigurationItem()
-                    ).multi_model_deployment
-                ]
-                for shape in multi_deployment_shape
-            }
+            # For single model deployment, we use `config.shape` to find the available shapes.
+            multi_deployment_shape = (
+                config.shape if is_single_model else list(config.configuration.keys())
+            )
+            if not is_single_model:
+                model_shape_gpu[model_id] = {
+                    shape.upper(): [
+                        item.gpu_count
+                        for item in config.configuration.get(
+                            shape, ConfigurationItem()
+                        ).multi_model_deployment
+                    ]
+                    for shape in multi_deployment_shape
+                }
             deployment[model_id] = {
                 "shape": [shape.upper() for shape in multi_deployment_shape],
                 "configuration": {

@@ -499,10 +499,10 @@ class TestDataset:
         "deployment_config": {
             "model_a": {
                 "shape": [
-                    "BM.GPU.A100-V2.8",
-                    "BM.GPU.H100.8",
                     "VM.GPU.A10.2",
                     "VM.GPU.A10.4",
+                    "BM.GPU.A100-V2.8",
+                    "BM.GPU.H100.8",
                 ],
                 "configuration": {
                     "VM.GPU.A10.2": {
@@ -593,6 +593,73 @@ class TestDataset:
         "error_message": None,
     }
 
+    aqua_deployment_multi_model_config_single_custom = {
+        "deployment_config": {"model_a": {"shape": [], "configuration": {}}},
+        "gpu_allocation": {
+            "VM.GPU2.1": {
+                "models": [{"ocid": "model_a", "gpu_count": 1}],
+                "total_gpus_available": 1,
+            },
+            "VM.GPU3.1": {
+                "models": [{"ocid": "model_a", "gpu_count": 1}],
+                "total_gpus_available": 1,
+            },
+            "VM.GPU3.2": {
+                "models": [{"ocid": "model_a", "gpu_count": 2}],
+                "total_gpus_available": 2,
+            },
+            "VM.GPU3.4": {
+                "models": [{"ocid": "model_a", "gpu_count": 4}],
+                "total_gpus_available": 4,
+            },
+            "BM.GPU2.2": {
+                "models": [{"ocid": "model_a", "gpu_count": 2}],
+                "total_gpus_available": 2,
+            },
+            "BM.GPU3.8": {
+                "models": [{"ocid": "model_a", "gpu_count": 8}],
+                "total_gpus_available": 8,
+            },
+            "BM.GPU4.8": {
+                "models": [{"ocid": "model_a", "gpu_count": 8}],
+                "total_gpus_available": 8,
+            },
+            "BM.GPU.A100-V2.8": {
+                "models": [{"ocid": "model_a", "gpu_count": 8}],
+                "total_gpus_available": 8,
+            },
+            "BM.GPU.H100.8": {
+                "models": [{"ocid": "model_a", "gpu_count": 8}],
+                "total_gpus_available": 8,
+            },
+            "BM.GPU.T1.2": {
+                "models": [{"ocid": "model_a", "gpu_count": 2}],
+                "total_gpus_available": 2,
+            },
+            "BM.GPU.A10.4": {
+                "models": [{"ocid": "model_a", "gpu_count": 4}],
+                "total_gpus_available": 4,
+            },
+            "VM.GPU.A10.4": {
+                "models": [{"ocid": "model_a", "gpu_count": 4}],
+                "total_gpus_available": 4,
+            },
+            "BM.GPU.L40S-NC.4": {
+                "models": [{"ocid": "model_a", "gpu_count": 4}],
+                "total_gpus_available": 4,
+            },
+            "VM.GPU.A10.1": {
+                "models": [{"ocid": "model_a", "gpu_count": 1}],
+                "total_gpus_available": 1,
+            },
+            "VM.GPU.A10.2": {
+                "models": [{"ocid": "model_a", "gpu_count": 2}],
+                "total_gpus_available": 2,
+            },
+        },
+        "error_message": None,
+    }
+
     aqua_deployment_multi_model_config_summary_hybrid = {
         "deployment_config": {
             "model_a": {
@@ -1001,7 +1068,7 @@ def test_get_deployment_config(self):
         "ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently"
     )
     @patch("ads.aqua.modeldeployment.AquaDeploymentApp.list_shapes")
-    def test_get_multimodel_deployment_config(
+    def test_get_multimodel_deployment_config_single(
         self, mock_list_shapes, mock_fetch_deployment_configs_concurrently
     ):
         config_json = os.path.join(
@@ -1035,6 +1102,18 @@ def test_get_multimodel_deployment_config(
             == TestDataset.aqua_deployment_multi_model_config_summary
         )
 
+        # custom model without deployment config
+        # deployment shape should be collected from `list_shapes`.
+        mock_fetch_deployment_configs_concurrently.return_value = {
+            "model_a": AquaDeploymentConfig()
+        }
+        result = self.app.get_multimodel_deployment_config(["model_a"])
+
+        assert (
+            result.model_dump()
+            == TestDataset.aqua_deployment_multi_model_config_single_custom
+        )
+
     @patch(
         "ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently"
     )