From d6b20d5f916d74ca830c5fce1a9e27b834fbe17e Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Fri, 7 Mar 2025 13:33:02 -0500
Subject: [PATCH 1/5] Support deploy single model for multi deployment.

---
 ads/aqua/modeldeployment/deployment.py | 39 +++++++-----
 ads/aqua/modeldeployment/entities.py   | 12 +++-
 ads/aqua/modeldeployment/utils.py      | 87 ++++++++++++++++++++++++++
 3 files changed, 123 insertions(+), 15 deletions(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index cfabf65b2..830ac86db 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -145,18 +145,10 @@ def create(
                     f"Invalid parameters for creating a model deployment. Error details: {custom_errors}."
                 ) from ex
 
-        # If a single model is provided, delegate to `create` method
-        if (
-            not create_deployment_details.model_id
-            and create_deployment_details.models
-            and len(create_deployment_details.models) == 1
-        ):
-            single_model = create_deployment_details.models[0]
-            logger.info(
-                f"Single model ({single_model.model_id}) provided. "
-                "Delegating to single model creation method."
+        if not (create_deployment_details.model_id or create_deployment_details.models):
+            raise AquaValueError(
+                "Invalid parameters for creating a model deployment. Either `model_id` or `models` must be provided."
             )
-            create_deployment_details.model_id = single_model.model_id
 
         # Set defaults for compartment and project if not provided.
         compartment_id = create_deployment_details.compartment_id or COMPARTMENT_OCID
@@ -170,6 +162,10 @@ def create(
         # Create an AquaModelApp instance once to perform the deployment creation.
         model_app = AquaModelApp()
         if create_deployment_details.model_id:
+            logger.info(
+                f"Single model ({create_deployment_details.model_id}) provided. "
+                "Delegating to single model creation method."
+            )
             aqua_model = model_app.create(
                 model_id=create_deployment_details.model_id,
                 compartment_id=compartment_id,
@@ -254,6 +250,10 @@ def create(
                         f"Only the following container families are supported: {supported_container_families}."
                     )
 
+            logger.info(
+                f"Multi models ({model_ids}) provided. Delegating to multi model creation method."
+            )
+
             aqua_model = model_app.create_multi(
                 models=create_deployment_details.models,
                 compartment_id=compartment_id,
@@ -1051,15 +1051,26 @@ def get_multimodel_deployment_config(
         ModelDeploymentConfigSummary
             A summary of the model deployment configurations and GPU allocations.
         """
+        if not model_ids:
+            raise AquaValueError(
+                "Invalid or empty parameter `model_ids`. Specify a list of valid model ids to get multi model deployment config."
+            )
 
         compartment_id = kwargs.pop("compartment_id", COMPARTMENT_OCID)
 
         # Get the all model deployment available shapes in a given compartment
         available_shapes = self.list_shapes(compartment_id=compartment_id)
 
-        return MultiModelDeploymentConfigLoader(
-            deployment_app=self,
-        ).load(
+        multi_model_deployment_config_loader = MultiModelDeploymentConfigLoader(
+            deployment_app=self
+        )
+
+        if len(model_ids) == 1:
+            return multi_model_deployment_config_loader.load_single(
+                shapes=available_shapes, model_id=model_ids[0]
+            )
+
+        return multi_model_deployment_config_loader.load(
             shapes=available_shapes,
             model_ids=model_ids,
             primary_model_id=primary_model_id,
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index fee4ad44c..f67d820fe 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -7,7 +7,7 @@
 from oci.data_science.models import ModelDeployment, ModelDeploymentSummary
 from pydantic import BaseModel, Field, model_validator
 
-from ads.aqua import logger
+from ads.aqua import ODSC_MODEL_COMPARTMENT_OCID, logger
 from ads.aqua.common.entities import AquaMultiModelRef
 from ads.aqua.common.enums import Tags
 from ads.aqua.config.utils.serializer import Serializable
@@ -15,6 +15,7 @@
 from ads.aqua.data import AquaResourceIdentifier
 from ads.common.serializer import DataClassSerializable
 from ads.common.utils import get_console_link
+from ads.model.datascience_model import DataScienceModel
 
 
 class ShapeInfo(Serializable):
@@ -550,6 +551,15 @@ def validate_multimodel_deployment_feasibility(
             sum_model_gpus += model.gpu_count
             aqua_deployment_config = model_deployment_config[model.model_id]
 
+            source_model = DataScienceModel.from_id(model.model_id)
+            # Validates custom model with deployment config
+            # Skips validating custom models without deployment config as the configuration is empty and gpu counts are generated by ads sdk
+            if (
+                source_model.compartment_id != ODSC_MODEL_COMPARTMENT_OCID
+                and not aqua_deployment_config.configuration
+            ):
+                continue
+
             # We cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
             # However, Falcon can also be deployed on a single card in other A10 shapes, such as A10.2.
             # Our current configuration does not support this flexibility.
diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index fffd4ddab..32f95ae14 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -134,6 +134,93 @@ def load(
         summary.gpu_allocation = gpu_allocation
         return summary
 
+    def load_single(
+        self,
+        shapes: List[ComputeShapeSummary],
+        model_id: str,
+    ) -> ModelDeploymentConfigSummary:
+        """
+        Retrieves deployment configuration for single model and allocate all available GPU count to it.
+
+        Parameters
+        ----------
+        shapes : List[ComputeShapeSummary]
+            Model deployment available shapes.
+        model_id : str
+            The OCID for the Aqua model.
+
+        Returns
+        -------
+        ModelDeploymentConfigSummary
+            A summary of the deployment configurations and GPU allocations. If GPU allocation
+            cannot be determined, an appropriate error message is included in the summary.
+        """
+        # Fetch deployment configuration concurrently.
+        logger.debug(f"Loading model deployment configuration for model: {model_id}")
+        deployment_config = self._fetch_deployment_configs_concurrently([model_id])[
+            model_id
+        ]
+
+        deployment = {
+            model_id: {
+                "shape": [shape.upper() for shape in deployment_config.shape],
+                "configuration": {
+                    shape.upper(): deployment_config.configuration.get(
+                        shape, ConfigurationItem()
+                    )
+                    for shape in deployment_config.shape
+                },
+            }
+        }
+
+        # Initialize the summary result with the deployment configurations.
+        summary = ModelDeploymentConfigSummary(deployment_config=deployment)
+
+        # Find out the common shapes from deployment config and available deployment shapes
+        shape = [shape.upper() for shape in deployment_config.shape]
+        common_shapes = [shape.name.upper() for shape in shapes]
+        if shape:
+            common_shapes = list(set(common_shapes).intersection(set(shape)))
+
+        if not common_shapes:
+            summary.error_message = (
+                "The selected model does not have any available deployment shape. "
+                "Please ensure that chosen model is compatible for multi-model deployment."
+            )
+            logger.debug(
+                f"No compatible deployment shapes found for selected model: {model_id}"
+            )
+            return summary
+
+        logger.debug(f"Available Common Shapes: {common_shapes}")
+
+        gpu_allocation = {}
+        for shape in common_shapes:
+            total_gpus_available = 0
+            shape_summary = next(
+                (
+                    deployment_shape
+                    for deployment_shape in shapes
+                    if deployment_shape.name.upper() == shape
+                ),
+                None,
+            )
+            if shape_summary and shape_summary.gpu_specs:
+                total_gpus_available = shape_summary.gpu_specs.gpu_count
+
+            if total_gpus_available != 0:
+                gpu_allocation[shape] = GPUShapeAllocation(
+                    models=[
+                        GPUModelAllocation(
+                            ocid=model_id, gpu_count=total_gpus_available
+                        )
+                    ],
+                    total_gpus_available=total_gpus_available,
+                )
+
+        summary.gpu_allocation = gpu_allocation
+        return summary
+
     def _fetch_deployment_configs_concurrently(
         self, model_ids: List[str]
     ) -> Dict[str, AquaDeploymentConfig]:

From 1b1268b093f05f5ddb70b4a6dd600b5b35f84b00 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Fri, 7 Mar 2025 14:10:31 -0500
Subject: [PATCH 2/5] Updated pr.

---
 ads/aqua/modeldeployment/entities.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index f67d820fe..5fff72789 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -7,7 +7,7 @@
 from oci.data_science.models import ModelDeployment, ModelDeploymentSummary
 from pydantic import BaseModel, Field, model_validator
 
-from ads.aqua import ODSC_MODEL_COMPARTMENT_OCID, logger
+from ads.aqua import logger
 from ads.aqua.common.entities import AquaMultiModelRef
 from ads.aqua.common.enums import Tags
 from ads.aqua.config.utils.serializer import Serializable
@@ -15,7 +15,6 @@
 from ads.aqua.data import AquaResourceIdentifier
 from ads.common.serializer import DataClassSerializable
 from ads.common.utils import get_console_link
-from ads.model.datascience_model import DataScienceModel
 
 
 class ShapeInfo(Serializable):
@@ -551,13 +550,9 @@ def validate_multimodel_deployment_feasibility(
             sum_model_gpus += model.gpu_count
             aqua_deployment_config = model_deployment_config[model.model_id]
 
-            source_model = DataScienceModel.from_id(model.model_id)
             # Validates custom model with deployment config
             # Skips validating custom models without deployment config as the configuration is empty and gpu counts are generated by ads sdk
-            if (
-                source_model.compartment_id != ODSC_MODEL_COMPARTMENT_OCID
-                and not aqua_deployment_config.configuration
-            ):
+            if not aqua_deployment_config.configuration:
                 continue
 
             # We cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).

From 080a2516632d0d6e911a45c2f72fd5a56d05ecb9 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Sun, 9 Mar 2025 19:08:41 -0400
Subject: [PATCH 3/5] Added unit test.

---
 .../with_extras/aqua/test_deployment.py       | 85 ++++++++++++++++++-
 1 file changed, 82 insertions(+), 3 deletions(-)

diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index b4fe292e4..c3daf82a1 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -499,10 +499,10 @@ class TestDataset:
         "deployment_config": {
             "model_a": {
                 "shape": [
-                    "BM.GPU.A100-V2.8",
-                    "BM.GPU.H100.8",
                     "VM.GPU.A10.2",
                     "VM.GPU.A10.4",
+                    "BM.GPU.A100-V2.8",
+                    "BM.GPU.H100.8",
                 ],
                 "configuration": {
                     "VM.GPU.A10.2": {
@@ -593,6 +593,73 @@ class TestDataset:
         "error_message": None,
     }
 
+    aqua_deployment_multi_model_config_single_custom = {
+        "deployment_config": {"model_a": {"shape": [], "configuration": {}}},
+        "gpu_allocation": {
+            "VM.GPU2.1": {
+                "models": [{"ocid": "model_a", "gpu_count": 1}],
+                "total_gpus_available": 1,
+            },
+            "VM.GPU3.1": {
+                "models": [{"ocid": "model_a", "gpu_count": 1}],
+                "total_gpus_available": 1,
+            },
+            "VM.GPU3.2": {
+                "models": [{"ocid": "model_a", "gpu_count": 2}],
+                "total_gpus_available": 2,
+            },
+            "VM.GPU3.4": {
+                "models": [{"ocid": "model_a", "gpu_count": 4}],
+                "total_gpus_available": 4,
+            },
+            "BM.GPU2.2": {
+                "models": [{"ocid": "model_a", "gpu_count": 2}],
+                "total_gpus_available": 2,
+            },
+            "BM.GPU3.8": {
+                "models": [{"ocid": "model_a", "gpu_count": 8}],
+                "total_gpus_available": 8,
+            },
+            "BM.GPU4.8": {
+                "models": [{"ocid": "model_a", "gpu_count": 8}],
+                "total_gpus_available": 8,
+            },
+            "BM.GPU.A100-V2.8": {
+                "models": [{"ocid": "model_a", "gpu_count": 8}],
+                "total_gpus_available": 8,
+            },
+            "BM.GPU.H100.8": {
+                "models": [{"ocid": "model_a", "gpu_count": 8}],
+                "total_gpus_available": 8,
+            },
+            "BM.GPU.T1.2": {
+                "models": [{"ocid": "model_a", "gpu_count": 2}],
+                "total_gpus_available": 2,
+            },
+            "BM.GPU.A10.4": {
+                "models": [{"ocid": "model_a", "gpu_count": 4}],
+                "total_gpus_available": 4,
+            },
+            "VM.GPU.A10.4": {
+                "models": [{"ocid": "model_a", "gpu_count": 4}],
+                "total_gpus_available": 4,
+            },
+            "BM.GPU.L40S-NC.4": {
+                "models": [{"ocid": "model_a", "gpu_count": 4}],
+                "total_gpus_available": 4,
+            },
+            "VM.GPU.A10.1": {
+                "models": [{"ocid": "model_a", "gpu_count": 1}],
+                "total_gpus_available": 1,
+            },
+            "VM.GPU.A10.2": {
+                "models": [{"ocid": "model_a", "gpu_count": 2}],
+                "total_gpus_available": 2,
+            },
+        },
+        "error_message": None,
+    }
+
     aqua_deployment_multi_model_config_summary_hybrid = {
         "deployment_config": {
             "model_a": {
@@ -1001,7 +1068,7 @@ def test_get_deployment_config(self):
         "ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently"
     )
     @patch("ads.aqua.modeldeployment.AquaDeploymentApp.list_shapes")
-    def test_get_multimodel_deployment_config(
+    def test_get_multimodel_deployment_config_single(
         self, mock_list_shapes, mock_fetch_deployment_configs_concurrently
     ):
         config_json = os.path.join(
@@ -1035,6 +1102,18 @@ def test_get_multimodel_deployment_config(
             == TestDataset.aqua_deployment_multi_model_config_summary
         )
 
+        # custom model without deployment config
+        # deployment shape should be collected from `list_shapes`.
+        mock_fetch_deployment_configs_concurrently.return_value = {
+            "model_a": AquaDeploymentConfig()
+        }
+        result = self.app.get_multimodel_deployment_config(["model_a"])
+
+        assert (
+            result.model_dump()
+            == TestDataset.aqua_deployment_multi_model_config_single_custom
+        )
+
     @patch(
         "ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently"
     )

From 5041e699d39fbedeb65ea07ee5f20da4924bc993 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Mon, 10 Mar 2025 11:56:36 -0400
Subject: [PATCH 4/5] Updated pr.

---
 ads/aqua/modeldeployment/deployment.py |  8 ++++----
 ads/aqua/modeldeployment/utils.py      | 12 ++++++------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 830ac86db..bc5673f20 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -162,7 +162,7 @@ def create(
         # Create an AquaModelApp instance once to perform the deployment creation.
         model_app = AquaModelApp()
         if create_deployment_details.model_id:
-            logger.info(
+            logger.debug(
                 f"Single model ({create_deployment_details.model_id}) provided. "
                 "Delegating to single model creation method."
             )
@@ -250,7 +250,7 @@ def create(
                         f"Only the following container families are supported: {supported_container_families}."
                     )
 
-            logger.info(
+            logger.debug(
                 f"Multi models ({model_ids}) provided. Delegating to multi model creation method."
             )
 
@@ -1066,11 +1066,11 @@ def get_multimodel_deployment_config(
         )
 
         if len(model_ids) == 1:
-            return multi_model_deployment_config_loader.load_single(
+            return multi_model_deployment_config_loader.load_model_deployment_configuration(
                 shapes=available_shapes, model_id=model_ids[0]
             )
 
-        return multi_model_deployment_config_loader.load(
+        return multi_model_deployment_config_loader.load_multi_model_deployment_configuration(
             shapes=available_shapes,
             model_ids=model_ids,
             primary_model_id=primary_model_id,
diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index fc65b8df2..e6571120d 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -44,7 +44,7 @@ def __init__(self, deployment_app: AquaApp):
         """
         self.deployment_app = deployment_app
 
-    def load(
+    def load_multi_model_deployment_configuration(
         self,
         shapes: List[ComputeShapeSummary],
         model_ids: List[str],
@@ -132,7 +132,7 @@ def load(
         summary.gpu_allocation = gpu_allocation
         return summary
 
-    def load_single(
+    def load_model_deployment_configuration(
         self,
         shapes: List[ComputeShapeSummary],
         model_id: str,
@@ -160,15 +160,15 @@ def load_single(
         ]
 
         deployment = {
-            model_id: {
-                "shape": [shape.upper() for shape in deployment_config.shape],
-                "configuration": {
+            model_id: AquaDeploymentConfig(
+                shape=[shape.upper() for shape in deployment_config.shape],
+                configuration={
                     shape.upper(): deployment_config.configuration.get(
                         shape, ConfigurationItem()
                     )
                     for shape in deployment_config.shape
                 },
-            }
+            )
         }
 
         # Initialize the summary result with the deployment configurations.

From 76cc18a284e29c013af46da0c359d6eab6d6f69b Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Mon, 10 Mar 2025 18:13:55 -0400
Subject: [PATCH 5/5] Updated pr.

---
 ads/aqua/modeldeployment/deployment.py |  15 +--
 ads/aqua/modeldeployment/utils.py      | 132 +++++++++++++++----------
 2 files changed, 85 insertions(+), 62 deletions(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index bc5673f20..d60aadc36 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -1053,7 +1053,7 @@ def get_multimodel_deployment_config(
         """
         if not model_ids:
             raise AquaValueError(
-                "Invalid or empty parameter `model_ids`. Specify a list of valid model ids to get multi model deployment config."
+                "Model IDs were not provided. Please provide a valid list of model IDs to retrieve the multi-model deployment configuration."
             )
 
         compartment_id = kwargs.pop("compartment_id", COMPARTMENT_OCID)
@@ -1061,16 +1061,9 @@ def get_multimodel_deployment_config(
         # Get the all model deployment available shapes in a given compartment
         available_shapes = self.list_shapes(compartment_id=compartment_id)
 
-        multi_model_deployment_config_loader = MultiModelDeploymentConfigLoader(
-            deployment_app=self
-        )
-
-        if len(model_ids) == 1:
-            return multi_model_deployment_config_loader.load_model_deployment_configuration(
-                shapes=available_shapes, model_id=model_ids[0]
-            )
-
-        return multi_model_deployment_config_loader.load_multi_model_deployment_configuration(
+        return MultiModelDeploymentConfigLoader(
+            deployment_app=self,
+        ).load(
             shapes=available_shapes,
             model_ids=model_ids,
             primary_model_id=primary_model_id,
diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index e6571120d..9d2188872 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -44,14 +44,14 @@ def __init__(self, deployment_app: AquaApp):
         """
         self.deployment_app = deployment_app
 
-    def load_multi_model_deployment_configuration(
+    def load(
         self,
         shapes: List[ComputeShapeSummary],
         model_ids: List[str],
         primary_model_id: Optional[str] = None,
     ) -> ModelDeploymentConfigSummary:
         """
-        Retrieves deployment configurations for multiple models and calculates compatible GPU allocations.
+        Retrieves deployment configurations for multiple/single model and calculates compatible GPU allocations.
 
         Parameters
         ----------
@@ -69,24 +69,48 @@ def load_multi_model_deployment_configuration(
             A summary of the deployment configurations and GPU allocations. If GPU allocation
             cannot be determined, an appropriate error message is included in the summary.
         """
-        # Fetch deployment configurations concurrently.
-        logger.debug(f"Loading model deployment configuration for models: {model_ids}")
-        deployment_configs = self._fetch_deployment_configs_concurrently(model_ids)
+        if len(model_ids) == 1:
+            return self._load_model_deployment_configuration(
+                shapes=shapes, model_ids=model_ids
+            )
 
-        logger.debug(f"Loaded config: {deployment_configs}")
-        model_shape_gpu, deployment = self._extract_model_shape_gpu(deployment_configs)
+        return self._load_multi_model_deployment_configuration(
+            shapes=shapes, model_ids=model_ids, primary_model_id=primary_model_id
+        )
 
-        # Initialize the summary result with the deployment configurations.
-        summary = ModelDeploymentConfigSummary(deployment_config=deployment)
+    def _load_multi_model_deployment_configuration(
+        self,
+        shapes: List[ComputeShapeSummary],
+        model_ids: List[str],
+        primary_model_id: Optional[str] = None,
+    ) -> ModelDeploymentConfigSummary:
+        """
+        Retrieves deployment configurations for multiple models and calculates compatible GPU allocations.
+
+        Parameters
+        ----------
+        shapes : List[ComputeShapeSummary]
+            Model deployment available shapes.
+        model_ids : List[str]
+            A list of OCIDs for the Aqua models.
+        primary_model_id : Optional[str], optional
+            The OCID of the primary Aqua model. If provided, GPU allocation prioritizes this model.
+            Otherwise, GPUs are evenly allocated.
+
+        Returns
+        -------
+        ModelDeploymentConfigSummary
+            A summary of the deployment configurations and GPU allocations. If GPU allocation
+            cannot be determined, an appropriate error message is included in the summary.
+        """
+        model_shape_gpu, available_shapes, summary = self._fetch_model_shape_gpu(
+            shapes=shapes, model_ids=model_ids
+        )
 
         # Identify common deployment shapes among all models.
         common_shapes, empty_configs = self._get_common_shapes(model_shape_gpu)
         logger.debug(f"Common Shapes: {common_shapes} from: {model_shape_gpu}")
 
-        # Filter out not available shapes
-        available_shapes = [item.name.upper() for item in shapes]
-        logger.debug(f"Service Available Shapes: {available_shapes}")
-
         # If all models' shape configs are empty, use default deployment shapes instead
         common_shapes = (
             available_shapes
@@ -132,10 +156,10 @@ def load_multi_model_deployment_configuration(
         summary.gpu_allocation = gpu_allocation
         return summary
 
-    def load_model_deployment_configuration(
+    def _load_model_deployment_configuration(
         self,
         shapes: List[ComputeShapeSummary],
-        model_id: str,
+        model_ids: List[str],
     ) -> ModelDeploymentConfigSummary:
         """
         Retrieves deployment configuration for single model and allocate all available GPU count to it.
@@ -144,8 +168,8 @@ def load_model_deployment_configuration(
         ----------
         shapes : List[ComputeShapeSummary]
             Model deployment available shapes.
-        model_id : str
-            The OCID for the Aqua model.
+        model_ids : List[str]
+            A list of OCIDs for the Aqua models.
 
         Returns
         -------
@@ -153,30 +177,13 @@ def load_model_deployment_configuration(
             A summary of the deployment configurations and GPU allocations. If GPU allocation
             cannot be determined, an appropriate error message is included in the summary.
         """
-        # Fetch deployment configuration concurrently.
-        logger.debug(f"Loading model deployment configuration for model: {model_id}")
-        deployment_config = self._fetch_deployment_configs_concurrently([model_id])[
-            model_id
-        ]
-
-        deployment = {
-            model_id: AquaDeploymentConfig(
-                shape=[shape.upper() for shape in deployment_config.shape],
-                configuration={
-                    shape.upper(): deployment_config.configuration.get(
-                        shape, ConfigurationItem()
-                    )
-                    for shape in deployment_config.shape
-                },
-            )
-        }
-
-        # Initialize the summary result with the deployment configurations.
-        summary = ModelDeploymentConfigSummary(deployment_config=deployment)
+        model_id = model_ids[0]
+        _, common_shapes, summary = self._fetch_model_shape_gpu(
+            shapes=shapes, model_ids=model_ids
+        )
 
         # Find out the common shapes from deployment config and available deployment shapes
-        shape = [shape.upper() for shape in deployment_config.shape]
-        common_shapes = [shape.name.upper() for shape in shapes]
+        shape = [shape.upper() for shape in summary.deployment_config[model_id].shape]
         if shape:
             common_shapes = list(set(common_shapes).intersection(set(shape)))
 
@@ -219,6 +226,24 @@ def load_model_deployment_configuration(
         summary.gpu_allocation = gpu_allocation
         return summary
 
+    def _fetch_model_shape_gpu(self, shapes: List[ComputeShapeSummary], model_ids: str):
+        """Fetches dict of model shape and gpu, list of available shapes and builds `ModelDeploymentConfigSummary` instance."""
+        # Fetch deployment configurations concurrently.
+        logger.debug(f"Loading model deployment configuration for models: {model_ids}")
+        deployment_configs = self._fetch_deployment_configs_concurrently(model_ids)
+
+        logger.debug(f"Loaded config: {deployment_configs}")
+        model_shape_gpu, deployment = self._extract_model_shape_gpu(deployment_configs)
+
+        # Initialize the summary result with the deployment configurations.
+        summary = ModelDeploymentConfigSummary(deployment_config=deployment)
+
+        # Filter out not available shapes
+        available_shapes = [item.name.upper() for item in shapes]
+        logger.debug(f"Service Available Shapes: {available_shapes}")
+
+        return model_shape_gpu, available_shapes, summary
+
     def _fetch_deployment_configs_concurrently(
         self, model_ids: List[str]
     ) -> Dict[str, AquaDeploymentConfig]:
@@ -241,25 +266,30 @@ def _extract_model_shape_gpu(
     ):
         """Extracts shape and GPU count details from deployment configurations.
         Supported shapes for multi model deployment will be collected from `configuration` entry in deployment config.
+        Supported shapes for single model deployment will be collected from `shape` entry in deployment config.
         """
         model_shape_gpu = {}
         deployment = {}
+        is_single_model = len(deployment_configs) == 1
 
         for model_id, config in deployment_configs.items():
-            # We cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
+            # For multi model deployment, we cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
             # However, Falcon can also be deployed on a single card in other A10 shapes, such as A10.2.
             # Our current configuration does not support this flexibility.
-            # multi_deployment_shape = config.shape
-            multi_deployment_shape = list(config.configuration.keys())
-            model_shape_gpu[model_id] = {
-                shape.upper(): [
-                    item.gpu_count
-                    for item in config.configuration.get(
-                        shape, ConfigurationItem()
-                    ).multi_model_deployment
-                ]
-                for shape in multi_deployment_shape
-            }
+            # For single model deployment, we use `config.shape` to find the available shapes.
+            multi_deployment_shape = (
+                config.shape if is_single_model else list(config.configuration.keys())
+            )
+            if not is_single_model:
+                model_shape_gpu[model_id] = {
+                    shape.upper(): [
+                        item.gpu_count
+                        for item in config.configuration.get(
+                            shape, ConfigurationItem()
+                        ).multi_model_deployment
+                    ]
+                    for shape in multi_deployment_shape
+                }
             deployment[model_id] = {
                 "shape": [shape.upper() for shape in multi_deployment_shape],
                 "configuration": {