From 47ba8b017c63ae45fd492356d50d490a3311c5a2 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Thu, 27 Mar 2025 11:42:24 -0400 Subject: [PATCH] fix: Don't retry for non-recoverable server http errors This is specifically addressing the issue where server returning Not Implemented (code 501) would receive two more attempts for the same request, even though there's no reason to expect it to serve the request any better on further attempts. This patch reduces the number of >=500 codes that would be restarted to those where there seems to be a chance of recover on further attempts. These codes are now explicitly listed instead of broad >=500 filter. For all possible server codes, please consult e.g. here: https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status#server_error_responses Signed-off-by: Ihar Hrachyshka --- src/llama_stack_client/_base_client.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/llama_stack_client/_base_client.py b/src/llama_stack_client/_base_client.py index 5a0376e6..c7d89a69 100644 --- a/src/llama_stack_client/_base_client.py +++ b/src/llama_stack_client/_base_client.py @@ -734,7 +734,11 @@ def _should_retry(self, response: httpx.Response) -> bool: return True # Retry internal errors. - if response.status_code >= 500: + if response.status_code in ( + 502, # Bad Gateway + 503, # Service Unavailable + 504, # Gateway Timeout + ): log.debug("Retrying due to status code %i", response.status_code) return True