@@ -44,14 +44,14 @@ def __init__(self, deployment_app: AquaApp):
4444 """
4545 self .deployment_app = deployment_app
4646
47- def load_multi_model_deployment_configuration (
47+ def load (
4848 self ,
4949 shapes : List [ComputeShapeSummary ],
5050 model_ids : List [str ],
5151 primary_model_id : Optional [str ] = None ,
5252 ) -> ModelDeploymentConfigSummary :
5353 """
54- Retrieves deployment configurations for multiple models and calculates compatible GPU allocations.
54+ Retrieves deployment configurations for multiple/single model and calculates compatible GPU allocations.
5555
5656 Parameters
5757 ----------
@@ -69,24 +69,48 @@ def load_multi_model_deployment_configuration(
6969 A summary of the deployment configurations and GPU allocations. If GPU allocation
7070 cannot be determined, an appropriate error message is included in the summary.
7171 """
72- # Fetch deployment configurations concurrently.
73- logger .debug (f"Loading model deployment configuration for models: { model_ids } " )
74- deployment_configs = self ._fetch_deployment_configs_concurrently (model_ids )
72+ if len (model_ids ) == 1 :
73+ return self ._load_model_deployment_configuration (
74+ shapes = shapes , model_ids = model_ids
75+ )
7576
76- logger .debug (f"Loaded config: { deployment_configs } " )
77- model_shape_gpu , deployment = self ._extract_model_shape_gpu (deployment_configs )
77+ return self ._load_multi_model_deployment_configuration (
78+ shapes = shapes , model_ids = model_ids , primary_model_id = primary_model_id
79+ )
7880
79- # Initialize the summary result with the deployment configurations.
80- summary = ModelDeploymentConfigSummary (deployment_config = deployment )
81+ def _load_multi_model_deployment_configuration (
82+ self ,
83+ shapes : List [ComputeShapeSummary ],
84+ model_ids : List [str ],
85+ primary_model_id : Optional [str ] = None ,
86+ ) -> ModelDeploymentConfigSummary :
87+ """
88+ Retrieves deployment configurations for multiple models and calculates compatible GPU allocations.
89+
90+ Parameters
91+ ----------
92+ shapes : List[ComputeShapeSummary]
93+ Model deployment available shapes.
94+ model_ids : List[str]
95+ A list of OCIDs for the Aqua models.
96+ primary_model_id : Optional[str], optional
97+ The OCID of the primary Aqua model. If provided, GPU allocation prioritizes this model.
98+ Otherwise, GPUs are evenly allocated.
99+
100+ Returns
101+ -------
102+ ModelDeploymentConfigSummary
103+ A summary of the deployment configurations and GPU allocations. If GPU allocation
104+ cannot be determined, an appropriate error message is included in the summary.
105+ """
106+ model_shape_gpu , available_shapes , summary = self ._fetch_model_shape_gpu (
107+ shapes = shapes , model_ids = model_ids
108+ )
81109
82110 # Identify common deployment shapes among all models.
83111 common_shapes , empty_configs = self ._get_common_shapes (model_shape_gpu )
84112 logger .debug (f"Common Shapes: { common_shapes } from: { model_shape_gpu } " )
85113
86- # Filter out not available shapes
87- available_shapes = [item .name .upper () for item in shapes ]
88- logger .debug (f"Service Available Shapes: { available_shapes } " )
89-
90114 # If all models' shape configs are empty, use default deployment shapes instead
91115 common_shapes = (
92116 available_shapes
@@ -132,10 +156,10 @@ def load_multi_model_deployment_configuration(
132156 summary .gpu_allocation = gpu_allocation
133157 return summary
134158
135- def load_model_deployment_configuration (
159+ def _load_model_deployment_configuration (
136160 self ,
137161 shapes : List [ComputeShapeSummary ],
138- model_id : str ,
162+ model_ids : List [ str ] ,
139163 ) -> ModelDeploymentConfigSummary :
140164 """
141165 Retrieves deployment configuration for single model and allocate all available GPU count to it.
@@ -144,39 +168,22 @@ def load_model_deployment_configuration(
144168 ----------
145169 shapes : List[ComputeShapeSummary]
146170 Model deployment available shapes.
147- model_id : str
148- The OCID for the Aqua model .
171+ model_ids : List[ str]
172+ A list of OCIDs for the Aqua models .
149173
150174 Returns
151175 -------
152176 ModelDeploymentConfigSummary
153177 A summary of the deployment configurations and GPU allocations. If GPU allocation
154178 cannot be determined, an appropriate error message is included in the summary.
155179 """
156- # Fetch deployment configuration concurrently.
157- logger .debug (f"Loading model deployment configuration for model: { model_id } " )
158- deployment_config = self ._fetch_deployment_configs_concurrently ([model_id ])[
159- model_id
160- ]
161-
162- deployment = {
163- model_id : AquaDeploymentConfig (
164- shape = [shape .upper () for shape in deployment_config .shape ],
165- configuration = {
166- shape .upper (): deployment_config .configuration .get (
167- shape , ConfigurationItem ()
168- )
169- for shape in deployment_config .shape
170- },
171- )
172- }
173-
174- # Initialize the summary result with the deployment configurations.
175- summary = ModelDeploymentConfigSummary (deployment_config = deployment )
180+ model_id = model_ids [0 ]
181+ _ , common_shapes , summary = self ._fetch_model_shape_gpu (
182+ shapes = shapes , model_ids = model_ids
183+ )
176184
177185 # Find out the common shapes from deployment config and available deployment shapes
178- shape = [shape .upper () for shape in deployment_config .shape ]
179- common_shapes = [shape .name .upper () for shape in shapes ]
186+ shape = [shape .upper () for shape in summary .deployment_config [model_id ].shape ]
180187 if shape :
181188 common_shapes = list (set (common_shapes ).intersection (set (shape )))
182189
@@ -219,6 +226,24 @@ def load_model_deployment_configuration(
219226 summary .gpu_allocation = gpu_allocation
220227 return summary
221228
229+ def _fetch_model_shape_gpu (self , shapes : List [ComputeShapeSummary ], model_ids : str ):
230+ """Fetches dict of model shape and gpu, list of available shapes and builds `ModelDeploymentConfigSummary` instance."""
231+ # Fetch deployment configurations concurrently.
232+ logger .debug (f"Loading model deployment configuration for models: { model_ids } " )
233+ deployment_configs = self ._fetch_deployment_configs_concurrently (model_ids )
234+
235+ logger .debug (f"Loaded config: { deployment_configs } " )
236+ model_shape_gpu , deployment = self ._extract_model_shape_gpu (deployment_configs )
237+
238+ # Initialize the summary result with the deployment configurations.
239+ summary = ModelDeploymentConfigSummary (deployment_config = deployment )
240+
241+ # Filter out not available shapes
242+ available_shapes = [item .name .upper () for item in shapes ]
243+ logger .debug (f"Service Available Shapes: { available_shapes } " )
244+
245+ return model_shape_gpu , available_shapes , summary
246+
222247 def _fetch_deployment_configs_concurrently (
223248 self , model_ids : List [str ]
224249 ) -> Dict [str , AquaDeploymentConfig ]:
@@ -241,25 +266,30 @@ def _extract_model_shape_gpu(
241266 ):
242267 """Extracts shape and GPU count details from deployment configurations.
243268 Supported shapes for multi model deployment will be collected from `configuration` entry in deployment config.
269+ Supported shapes for single model deployment will be collected from `shape` entry in deployment config.
244270 """
245271 model_shape_gpu = {}
246272 deployment = {}
273+ is_single_model = len (deployment_configs ) == 1
247274
248275 for model_id , config in deployment_configs .items ():
249- # We cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
276+ # For multi model deployment, we cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
250277 # However, Falcon can also be deployed on a single card in other A10 shapes, such as A10.2.
251278 # Our current configuration does not support this flexibility.
252- # multi_deployment_shape = config.shape
253- multi_deployment_shape = list (config .configuration .keys ())
254- model_shape_gpu [model_id ] = {
255- shape .upper (): [
256- item .gpu_count
257- for item in config .configuration .get (
258- shape , ConfigurationItem ()
259- ).multi_model_deployment
260- ]
261- for shape in multi_deployment_shape
262- }
279+ # For single model deployment, we use `config.shape` to find the available shapes.
280+ multi_deployment_shape = (
281+ config .shape if is_single_model else list (config .configuration .keys ())
282+ )
283+ if not is_single_model :
284+ model_shape_gpu [model_id ] = {
285+ shape .upper (): [
286+ item .gpu_count
287+ for item in config .configuration .get (
288+ shape , ConfigurationItem ()
289+ ).multi_model_deployment
290+ ]
291+ for shape in multi_deployment_shape
292+ }
263293 deployment [model_id ] = {
264294 "shape" : [shape .upper () for shape in multi_deployment_shape ],
265295 "configuration" : {
0 commit comments