Skip to content

Commit 76cc18a

Browse files
committed
Updated pr.
1 parent 5041e69 commit 76cc18a

File tree

2 files changed

+85
-62
lines changed

2 files changed

+85
-62
lines changed

ads/aqua/modeldeployment/deployment.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,24 +1053,17 @@ def get_multimodel_deployment_config(
10531053
"""
10541054
if not model_ids:
10551055
raise AquaValueError(
1056-
"Invalid or empty parameter `model_ids`. Specify a list of valid model ids to get multi model deployment config."
1056+
"Model IDs were not provided. Please provide a valid list of model IDs to retrieve the multi-model deployment configuration."
10571057
)
10581058

10591059
compartment_id = kwargs.pop("compartment_id", COMPARTMENT_OCID)
10601060

10611061
# Get the all model deployment available shapes in a given compartment
10621062
available_shapes = self.list_shapes(compartment_id=compartment_id)
10631063

1064-
multi_model_deployment_config_loader = MultiModelDeploymentConfigLoader(
1065-
deployment_app=self
1066-
)
1067-
1068-
if len(model_ids) == 1:
1069-
return multi_model_deployment_config_loader.load_model_deployment_configuration(
1070-
shapes=available_shapes, model_id=model_ids[0]
1071-
)
1072-
1073-
return multi_model_deployment_config_loader.load_multi_model_deployment_configuration(
1064+
return MultiModelDeploymentConfigLoader(
1065+
deployment_app=self,
1066+
).load(
10741067
shapes=available_shapes,
10751068
model_ids=model_ids,
10761069
primary_model_id=primary_model_id,

ads/aqua/modeldeployment/utils.py

Lines changed: 81 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,14 @@ def __init__(self, deployment_app: AquaApp):
4444
"""
4545
self.deployment_app = deployment_app
4646

47-
def load_multi_model_deployment_configuration(
47+
def load(
4848
self,
4949
shapes: List[ComputeShapeSummary],
5050
model_ids: List[str],
5151
primary_model_id: Optional[str] = None,
5252
) -> ModelDeploymentConfigSummary:
5353
"""
54-
Retrieves deployment configurations for multiple models and calculates compatible GPU allocations.
54+
Retrieves deployment configurations for multiple/single model and calculates compatible GPU allocations.
5555
5656
Parameters
5757
----------
@@ -69,24 +69,48 @@ def load_multi_model_deployment_configuration(
6969
A summary of the deployment configurations and GPU allocations. If GPU allocation
7070
cannot be determined, an appropriate error message is included in the summary.
7171
"""
72-
# Fetch deployment configurations concurrently.
73-
logger.debug(f"Loading model deployment configuration for models: {model_ids}")
74-
deployment_configs = self._fetch_deployment_configs_concurrently(model_ids)
72+
if len(model_ids) == 1:
73+
return self._load_model_deployment_configuration(
74+
shapes=shapes, model_ids=model_ids
75+
)
7576

76-
logger.debug(f"Loaded config: {deployment_configs}")
77-
model_shape_gpu, deployment = self._extract_model_shape_gpu(deployment_configs)
77+
return self._load_multi_model_deployment_configuration(
78+
shapes=shapes, model_ids=model_ids, primary_model_id=primary_model_id
79+
)
7880

79-
# Initialize the summary result with the deployment configurations.
80-
summary = ModelDeploymentConfigSummary(deployment_config=deployment)
81+
def _load_multi_model_deployment_configuration(
82+
self,
83+
shapes: List[ComputeShapeSummary],
84+
model_ids: List[str],
85+
primary_model_id: Optional[str] = None,
86+
) -> ModelDeploymentConfigSummary:
87+
"""
88+
Retrieves deployment configurations for multiple models and calculates compatible GPU allocations.
89+
90+
Parameters
91+
----------
92+
shapes : List[ComputeShapeSummary]
93+
Model deployment available shapes.
94+
model_ids : List[str]
95+
A list of OCIDs for the Aqua models.
96+
primary_model_id : Optional[str], optional
97+
The OCID of the primary Aqua model. If provided, GPU allocation prioritizes this model.
98+
Otherwise, GPUs are evenly allocated.
99+
100+
Returns
101+
-------
102+
ModelDeploymentConfigSummary
103+
A summary of the deployment configurations and GPU allocations. If GPU allocation
104+
cannot be determined, an appropriate error message is included in the summary.
105+
"""
106+
model_shape_gpu, available_shapes, summary = self._fetch_model_shape_gpu(
107+
shapes=shapes, model_ids=model_ids
108+
)
81109

82110
# Identify common deployment shapes among all models.
83111
common_shapes, empty_configs = self._get_common_shapes(model_shape_gpu)
84112
logger.debug(f"Common Shapes: {common_shapes} from: {model_shape_gpu}")
85113

86-
# Filter out not available shapes
87-
available_shapes = [item.name.upper() for item in shapes]
88-
logger.debug(f"Service Available Shapes: {available_shapes}")
89-
90114
# If all models' shape configs are empty, use default deployment shapes instead
91115
common_shapes = (
92116
available_shapes
@@ -132,10 +156,10 @@ def load_multi_model_deployment_configuration(
132156
summary.gpu_allocation = gpu_allocation
133157
return summary
134158

135-
def load_model_deployment_configuration(
159+
def _load_model_deployment_configuration(
136160
self,
137161
shapes: List[ComputeShapeSummary],
138-
model_id: str,
162+
model_ids: List[str],
139163
) -> ModelDeploymentConfigSummary:
140164
"""
141165
Retrieves deployment configuration for single model and allocate all available GPU count to it.
@@ -144,39 +168,22 @@ def load_model_deployment_configuration(
144168
----------
145169
shapes : List[ComputeShapeSummary]
146170
Model deployment available shapes.
147-
model_id : str
148-
The OCID for the Aqua model.
171+
model_ids : List[str]
172+
A list of OCIDs for the Aqua models.
149173
150174
Returns
151175
-------
152176
ModelDeploymentConfigSummary
153177
A summary of the deployment configurations and GPU allocations. If GPU allocation
154178
cannot be determined, an appropriate error message is included in the summary.
155179
"""
156-
# Fetch deployment configuration concurrently.
157-
logger.debug(f"Loading model deployment configuration for model: {model_id}")
158-
deployment_config = self._fetch_deployment_configs_concurrently([model_id])[
159-
model_id
160-
]
161-
162-
deployment = {
163-
model_id: AquaDeploymentConfig(
164-
shape=[shape.upper() for shape in deployment_config.shape],
165-
configuration={
166-
shape.upper(): deployment_config.configuration.get(
167-
shape, ConfigurationItem()
168-
)
169-
for shape in deployment_config.shape
170-
},
171-
)
172-
}
173-
174-
# Initialize the summary result with the deployment configurations.
175-
summary = ModelDeploymentConfigSummary(deployment_config=deployment)
180+
model_id = model_ids[0]
181+
_, common_shapes, summary = self._fetch_model_shape_gpu(
182+
shapes=shapes, model_ids=model_ids
183+
)
176184

177185
# Find out the common shapes from deployment config and available deployment shapes
178-
shape = [shape.upper() for shape in deployment_config.shape]
179-
common_shapes = [shape.name.upper() for shape in shapes]
186+
shape = [shape.upper() for shape in summary.deployment_config[model_id].shape]
180187
if shape:
181188
common_shapes = list(set(common_shapes).intersection(set(shape)))
182189

@@ -219,6 +226,24 @@ def load_model_deployment_configuration(
219226
summary.gpu_allocation = gpu_allocation
220227
return summary
221228

229+
def _fetch_model_shape_gpu(self, shapes: List[ComputeShapeSummary], model_ids: str):
230+
"""Fetches dict of model shape and gpu, list of available shapes and builds `ModelDeploymentConfigSummary` instance."""
231+
# Fetch deployment configurations concurrently.
232+
logger.debug(f"Loading model deployment configuration for models: {model_ids}")
233+
deployment_configs = self._fetch_deployment_configs_concurrently(model_ids)
234+
235+
logger.debug(f"Loaded config: {deployment_configs}")
236+
model_shape_gpu, deployment = self._extract_model_shape_gpu(deployment_configs)
237+
238+
# Initialize the summary result with the deployment configurations.
239+
summary = ModelDeploymentConfigSummary(deployment_config=deployment)
240+
241+
# Filter out not available shapes
242+
available_shapes = [item.name.upper() for item in shapes]
243+
logger.debug(f"Service Available Shapes: {available_shapes}")
244+
245+
return model_shape_gpu, available_shapes, summary
246+
222247
def _fetch_deployment_configs_concurrently(
223248
self, model_ids: List[str]
224249
) -> Dict[str, AquaDeploymentConfig]:
@@ -241,25 +266,30 @@ def _extract_model_shape_gpu(
241266
):
242267
"""Extracts shape and GPU count details from deployment configurations.
243268
Supported shapes for multi model deployment will be collected from `configuration` entry in deployment config.
269+
Supported shapes for single model deployment will be collected from `shape` entry in deployment config.
244270
"""
245271
model_shape_gpu = {}
246272
deployment = {}
273+
is_single_model = len(deployment_configs) == 1
247274

248275
for model_id, config in deployment_configs.items():
249-
# We cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
276+
# For multi model deployment, we cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
250277
# However, Falcon can also be deployed on a single card in other A10 shapes, such as A10.2.
251278
# Our current configuration does not support this flexibility.
252-
# multi_deployment_shape = config.shape
253-
multi_deployment_shape = list(config.configuration.keys())
254-
model_shape_gpu[model_id] = {
255-
shape.upper(): [
256-
item.gpu_count
257-
for item in config.configuration.get(
258-
shape, ConfigurationItem()
259-
).multi_model_deployment
260-
]
261-
for shape in multi_deployment_shape
262-
}
279+
# For single model deployment, we use `config.shape` to find the available shapes.
280+
multi_deployment_shape = (
281+
config.shape if is_single_model else list(config.configuration.keys())
282+
)
283+
if not is_single_model:
284+
model_shape_gpu[model_id] = {
285+
shape.upper(): [
286+
item.gpu_count
287+
for item in config.configuration.get(
288+
shape, ConfigurationItem()
289+
).multi_model_deployment
290+
]
291+
for shape in multi_deployment_shape
292+
}
263293
deployment[model_id] = {
264294
"shape": [shape.upper() for shape in multi_deployment_shape],
265295
"configuration": {

0 commit comments

Comments
 (0)