From f79df3cf01974dd520a79edf73bf21209f51c0df Mon Sep 17 00:00:00 2001 From: Alon Maor <48641682+alonmr@users.noreply.github.com> Date: Sun, 2 Jun 2024 14:32:57 +0300 Subject: [PATCH] [Iguazio] Retry on session verification (#5673) --- server/api/utils/clients/iguazio.py | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/server/api/utils/clients/iguazio.py b/server/api/utils/clients/iguazio.py index 489c30996a4..e579a13f7c8 100644 --- a/server/api/utils/clients/iguazio.py +++ b/server/api/utils/clients/iguazio.py @@ -139,11 +139,22 @@ class Client( ): def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) + retry_on_exception = ( + mlrun.mlconf.httpdb.projects.retry_leader_request_on_exception + == mlrun.common.schemas.HTTPSessionRetryMode.enabled.value + ) self._session = mlrun.utils.HTTPSessionWithRetry( - retry_on_exception=mlrun.mlconf.httpdb.projects.retry_leader_request_on_exception - == mlrun.common.schemas.HTTPSessionRetryMode.enabled.value, + retry_on_exception=retry_on_exception, verbose=True, ) + self._retry_on_post_session = None + if retry_on_exception: + self._retry_on_post_session = mlrun.utils.HTTPSessionWithRetry( + retry_on_exception=mlrun.mlconf.httpdb.projects.retry_leader_request_on_exception + == mlrun.common.schemas.HTTPSessionRetryMode.enabled.value, + retry_on_post=True, + verbose=True, + ) self._api_url = mlrun.mlconf.iguazio_api_url # The job is expected to be completed in less than 5 seconds. If 10 seconds have passed and the job # has not been completed, increase the interval to retry every 5 seconds @@ -172,6 +183,7 @@ def verify_request_session( "authorization": request.headers.get("authorization"), "cookie": request.headers.get("cookie"), }, + retry_on_post=True, ) return self._generate_auth_info_from_session_verification_response( response.headers, response.json() @@ -618,11 +630,20 @@ def _get_job_from_iguazio(self, session: str, job_id: str) -> dict: return response.json() def _send_request_to_api( - self, method, path, error_message: str, session=None, **kwargs + self, + method, + path, + error_message: str, + session=None, + retry_on_post=False, + **kwargs, ): url = f"{self._api_url}/api/{path}" self._prepare_request_kwargs(session, path, kwargs=kwargs) - response = self._session.request( + http_session = self._session + if retry_on_post and self._retry_on_post_session: + http_session = self._retry_on_post_session + response = http_session.request( method, url, verify=mlrun.mlconf.httpdb.http.verify, **kwargs ) if not response.ok: