diff --git a/charts/model-engine/templates/balloon_cpu_deployment.yaml b/charts/model-engine/templates/balloon_cpu_deployment.yaml index 1fd9e6c1..a7be9011 100644 --- a/charts/model-engine/templates/balloon_cpu_deployment.yaml +++ b/charts/model-engine/templates/balloon_cpu_deployment.yaml @@ -34,7 +34,7 @@ spec: resources: limits: memory: 28Gi - cpu: 8 + cpu: 6 command: - /bin/bash - -c diff --git a/clients/python/llmengine/model.py b/clients/python/llmengine/model.py index 26bbcf2d..fd18b7b1 100644 --- a/clients/python/llmengine/model.py +++ b/clients/python/llmengine/model.py @@ -41,10 +41,10 @@ def create( quantize: Optional[Quantization] = None, checkpoint_path: Optional[str] = None, # General endpoint fields - cpus: int = 32, - memory: str = "192Gi", + cpus: int = 8, + memory: str = "40Gi", storage: str = "96Gi", - gpus: int = 4, + gpus: int = 1, min_workers: int = 0, max_workers: int = 1, per_worker: int = 10, diff --git a/model-engine/model_engine_server/infra/services/image_cache_service.py b/model-engine/model_engine_server/infra/services/image_cache_service.py index 53b14980..b6343dcc 100644 --- a/model-engine/model_engine_server/infra/services/image_cache_service.py +++ b/model-engine/model_engine_server/infra/services/image_cache_service.py @@ -128,7 +128,9 @@ async def execute(self, endpoint_infra_states: Dict[str, Tuple[bool, ModelEndpoi if state.resource_state.gpus == 0 and ( ( state.image not in images_to_cache_priority["cpu"] - or last_updated_at + or last_updated_at.replace( + tzinfo=images_to_cache_priority["cpu"][state.image].last_updated_at.tzinfo + ) > images_to_cache_priority["cpu"][state.image].last_updated_at ) and self.docker_repository.image_exists(image_tag, repository_name) @@ -143,7 +145,11 @@ async def execute(self, endpoint_infra_states: Dict[str, Tuple[bool, ModelEndpoi if state.resource_state.gpu_type == gpu_type and ( ( state.image not in images_to_cache_priority[key] - or last_updated_at + or last_updated_at.replace( + tzinfo=images_to_cache_priority[key][ + state.image + ].last_updated_at.tzinfo + ) > images_to_cache_priority[key][state.image].last_updated_at ) and self.docker_repository.image_exists(image_tag, repository_name)