Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 15 additions & 11 deletions ads/aqua/modeldeployment/deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,18 +145,10 @@ def create(
f"Invalid parameters for creating a model deployment. Error details: {custom_errors}."
) from ex

# If a single model is provided, delegate to `create` method
if (
not create_deployment_details.model_id
and create_deployment_details.models
and len(create_deployment_details.models) == 1
):
single_model = create_deployment_details.models[0]
logger.info(
f"Single model ({single_model.model_id}) provided. "
"Delegating to single model creation method."
if not (create_deployment_details.model_id or create_deployment_details.models):
raise AquaValueError(
"Invalid parameters for creating a model deployment. Either `model_id` or `models` must be provided."
)
create_deployment_details.model_id = single_model.model_id

# Set defaults for compartment and project if not provided.
compartment_id = create_deployment_details.compartment_id or COMPARTMENT_OCID
Expand All @@ -170,6 +162,10 @@ def create(
# Create an AquaModelApp instance once to perform the deployment creation.
model_app = AquaModelApp()
if create_deployment_details.model_id:
logger.debug(
f"Single model ({create_deployment_details.model_id}) provided. "
"Delegating to single model creation method."
)
aqua_model = model_app.create(
model_id=create_deployment_details.model_id,
compartment_id=compartment_id,
Expand Down Expand Up @@ -254,6 +250,10 @@ def create(
f"Only the following container families are supported: {supported_container_families}."
)

logger.debug(
f"Multi models ({model_ids}) provided. Delegating to multi model creation method."
)

aqua_model = model_app.create_multi(
models=create_deployment_details.models,
compartment_id=compartment_id,
Expand Down Expand Up @@ -1051,6 +1051,10 @@ def get_multimodel_deployment_config(
ModelDeploymentConfigSummary
A summary of the model deployment configurations and GPU allocations.
"""
if not model_ids:
raise AquaValueError(
"Model IDs were not provided. Please provide a valid list of model IDs to retrieve the multi-model deployment configuration."
)

compartment_id = kwargs.pop("compartment_id", COMPARTMENT_OCID)

Expand Down
165 changes: 141 additions & 24 deletions ads/aqua/modeldeployment/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def load(
primary_model_id: Optional[str] = None,
) -> ModelDeploymentConfigSummary:
"""
Retrieves deployment configurations for multiple models and calculates compatible GPU allocations.
Retrieves deployment configurations for multiple/single model and calculates compatible GPU allocations.

Parameters
----------
Expand All @@ -69,24 +69,48 @@ def load(
A summary of the deployment configurations and GPU allocations. If GPU allocation
cannot be determined, an appropriate error message is included in the summary.
"""
# Fetch deployment configurations concurrently.
logger.debug(f"Loading model deployment configuration for models: {model_ids}")
deployment_configs = self._fetch_deployment_configs_concurrently(model_ids)
if len(model_ids) == 1:
return self._load_model_deployment_configuration(
shapes=shapes, model_ids=model_ids
)

logger.debug(f"Loaded config: {deployment_configs}")
model_shape_gpu, deployment = self._extract_model_shape_gpu(deployment_configs)
return self._load_multi_model_deployment_configuration(
shapes=shapes, model_ids=model_ids, primary_model_id=primary_model_id
)

# Initialize the summary result with the deployment configurations.
summary = ModelDeploymentConfigSummary(deployment_config=deployment)
def _load_multi_model_deployment_configuration(
self,
shapes: List[ComputeShapeSummary],
model_ids: List[str],
primary_model_id: Optional[str] = None,
) -> ModelDeploymentConfigSummary:
"""
Retrieves deployment configurations for multiple models and calculates compatible GPU allocations.

Parameters
----------
shapes : List[ComputeShapeSummary]
Model deployment available shapes.
model_ids : List[str]
A list of OCIDs for the Aqua models.
primary_model_id : Optional[str], optional
The OCID of the primary Aqua model. If provided, GPU allocation prioritizes this model.
Otherwise, GPUs are evenly allocated.

Returns
-------
ModelDeploymentConfigSummary
A summary of the deployment configurations and GPU allocations. If GPU allocation
cannot be determined, an appropriate error message is included in the summary.
"""
model_shape_gpu, available_shapes, summary = self._fetch_model_shape_gpu(
shapes=shapes, model_ids=model_ids
)

# Identify common deployment shapes among all models.
common_shapes, empty_configs = self._get_common_shapes(model_shape_gpu)
logger.debug(f"Common Shapes: {common_shapes} from: {model_shape_gpu}")

# Filter out not available shapes
available_shapes = [item.name.upper() for item in shapes]
logger.debug(f"Service Available Shapes: {available_shapes}")

# If all models' shape configs are empty, use default deployment shapes instead
common_shapes = (
available_shapes
Expand Down Expand Up @@ -132,6 +156,94 @@ def load(
summary.gpu_allocation = gpu_allocation
return summary

def _load_model_deployment_configuration(
self,
shapes: List[ComputeShapeSummary],
model_ids: List[str],
) -> ModelDeploymentConfigSummary:
"""
Retrieves deployment configuration for single model and allocate all available GPU count to it.

Parameters
----------
shapes : List[ComputeShapeSummary]
Model deployment available shapes.
model_ids : List[str]
A list of OCIDs for the Aqua models.

Returns
-------
ModelDeploymentConfigSummary
A summary of the deployment configurations and GPU allocations. If GPU allocation
cannot be determined, an appropriate error message is included in the summary.
"""
model_id = model_ids[0]
_, common_shapes, summary = self._fetch_model_shape_gpu(
shapes=shapes, model_ids=model_ids
)

# Find out the common shapes from deployment config and available deployment shapes
shape = [shape.upper() for shape in summary.deployment_config[model_id].shape]
if shape:
common_shapes = list(set(common_shapes).intersection(set(shape)))

if not common_shapes:
summary.error_message = (
"The selected model does not have any available deployment shape. "
"Please ensure that chosen model is compatible for multi-model deployment."
)
logger.debug(
f"No compatible deployment shapes found for selected model: {model_id}"
)
return summary

logger.debug(f"Available Common Shapes: {common_shapes}")

gpu_allocation = {}
for shape in common_shapes:
total_gpus_available = 0
shape_summary = next(
(
deployment_shape
for deployment_shape in shapes
if deployment_shape.name.upper() == shape
),
None,
)
if shape_summary and shape_summary.gpu_specs:
total_gpus_available = shape_summary.gpu_specs.gpu_count

if total_gpus_available != 0:
gpu_allocation[shape] = GPUShapeAllocation(
models=[
GPUModelAllocation(
ocid=model_id, gpu_count=total_gpus_available
)
],
total_gpus_available=total_gpus_available,
)

summary.gpu_allocation = gpu_allocation
return summary

def _fetch_model_shape_gpu(self, shapes: List[ComputeShapeSummary], model_ids: str):
"""Fetches dict of model shape and gpu, list of available shapes and builds `ModelDeploymentConfigSummary` instance."""
# Fetch deployment configurations concurrently.
logger.debug(f"Loading model deployment configuration for models: {model_ids}")
deployment_configs = self._fetch_deployment_configs_concurrently(model_ids)

logger.debug(f"Loaded config: {deployment_configs}")
model_shape_gpu, deployment = self._extract_model_shape_gpu(deployment_configs)

# Initialize the summary result with the deployment configurations.
summary = ModelDeploymentConfigSummary(deployment_config=deployment)

# Filter out not available shapes
available_shapes = [item.name.upper() for item in shapes]
logger.debug(f"Service Available Shapes: {available_shapes}")

return model_shape_gpu, available_shapes, summary

def _fetch_deployment_configs_concurrently(
self, model_ids: List[str]
) -> Dict[str, AquaDeploymentConfig]:
Expand All @@ -154,25 +266,30 @@ def _extract_model_shape_gpu(
):
"""Extracts shape and GPU count details from deployment configurations.
Supported shapes for multi model deployment will be collected from `configuration` entry in deployment config.
Supported shapes for single model deployment will be collected from `shape` entry in deployment config.
"""
model_shape_gpu = {}
deployment = {}
is_single_model = len(deployment_configs) == 1

for model_id, config in deployment_configs.items():
# We cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
# For multi model deployment, we cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
# However, Falcon can also be deployed on a single card in other A10 shapes, such as A10.2.
# Our current configuration does not support this flexibility.
# multi_deployment_shape = config.shape
multi_deployment_shape = list(config.configuration.keys())
model_shape_gpu[model_id] = {
shape.upper(): [
item.gpu_count
for item in config.configuration.get(
shape, ConfigurationItem()
).multi_model_deployment
]
for shape in multi_deployment_shape
}
# For single model deployment, we use `config.shape` to find the available shapes.
multi_deployment_shape = (
config.shape if is_single_model else list(config.configuration.keys())
)
if not is_single_model:
model_shape_gpu[model_id] = {
shape.upper(): [
item.gpu_count
for item in config.configuration.get(
shape, ConfigurationItem()
).multi_model_deployment
]
for shape in multi_deployment_shape
}
deployment[model_id] = {
"shape": [shape.upper() for shape in multi_deployment_shape],
"configuration": {
Expand Down
85 changes: 82 additions & 3 deletions tests/unitary/with_extras/aqua/test_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,10 +499,10 @@ class TestDataset:
"deployment_config": {
"model_a": {
"shape": [
"BM.GPU.A100-V2.8",
"BM.GPU.H100.8",
"VM.GPU.A10.2",
"VM.GPU.A10.4",
"BM.GPU.A100-V2.8",
"BM.GPU.H100.8",
],
"configuration": {
"VM.GPU.A10.2": {
Expand Down Expand Up @@ -593,6 +593,73 @@ class TestDataset:
"error_message": None,
}

aqua_deployment_multi_model_config_single_custom = {
"deployment_config": {"model_a": {"shape": [], "configuration": {}}},
"gpu_allocation": {
"VM.GPU2.1": {
"models": [{"ocid": "model_a", "gpu_count": 1}],
"total_gpus_available": 1,
},
"VM.GPU3.1": {
"models": [{"ocid": "model_a", "gpu_count": 1}],
"total_gpus_available": 1,
},
"VM.GPU3.2": {
"models": [{"ocid": "model_a", "gpu_count": 2}],
"total_gpus_available": 2,
},
"VM.GPU3.4": {
"models": [{"ocid": "model_a", "gpu_count": 4}],
"total_gpus_available": 4,
},
"BM.GPU2.2": {
"models": [{"ocid": "model_a", "gpu_count": 2}],
"total_gpus_available": 2,
},
"BM.GPU3.8": {
"models": [{"ocid": "model_a", "gpu_count": 8}],
"total_gpus_available": 8,
},
"BM.GPU4.8": {
"models": [{"ocid": "model_a", "gpu_count": 8}],
"total_gpus_available": 8,
},
"BM.GPU.A100-V2.8": {
"models": [{"ocid": "model_a", "gpu_count": 8}],
"total_gpus_available": 8,
},
"BM.GPU.H100.8": {
"models": [{"ocid": "model_a", "gpu_count": 8}],
"total_gpus_available": 8,
},
"BM.GPU.T1.2": {
"models": [{"ocid": "model_a", "gpu_count": 2}],
"total_gpus_available": 2,
},
"BM.GPU.A10.4": {
"models": [{"ocid": "model_a", "gpu_count": 4}],
"total_gpus_available": 4,
},
"VM.GPU.A10.4": {
"models": [{"ocid": "model_a", "gpu_count": 4}],
"total_gpus_available": 4,
},
"BM.GPU.L40S-NC.4": {
"models": [{"ocid": "model_a", "gpu_count": 4}],
"total_gpus_available": 4,
},
"VM.GPU.A10.1": {
"models": [{"ocid": "model_a", "gpu_count": 1}],
"total_gpus_available": 1,
},
"VM.GPU.A10.2": {
"models": [{"ocid": "model_a", "gpu_count": 2}],
"total_gpus_available": 2,
},
},
"error_message": None,
}

aqua_deployment_multi_model_config_summary_hybrid = {
"deployment_config": {
"model_a": {
Expand Down Expand Up @@ -1001,7 +1068,7 @@ def test_get_deployment_config(self):
"ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently"
)
@patch("ads.aqua.modeldeployment.AquaDeploymentApp.list_shapes")
def test_get_multimodel_deployment_config(
def test_get_multimodel_deployment_config_single(
self, mock_list_shapes, mock_fetch_deployment_configs_concurrently
):
config_json = os.path.join(
Expand Down Expand Up @@ -1035,6 +1102,18 @@ def test_get_multimodel_deployment_config(
== TestDataset.aqua_deployment_multi_model_config_summary
)

# custom model without deployment config
# deployment shape should be collected from `list_shapes`.
mock_fetch_deployment_configs_concurrently.return_value = {
"model_a": AquaDeploymentConfig()
}
result = self.app.get_multimodel_deployment_config(["model_a"])

assert (
result.model_dump()
== TestDataset.aqua_deployment_multi_model_config_single_custom
)

@patch(
"ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently"
)
Expand Down