-
Notifications
You must be signed in to change notification settings - Fork 8
Retry logic improvements + Fix Tests warnings #50
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
d1ac49c
6074335
b9c7a1f
908cce8
e1b87c8
0228bb4
e255450
59f4587
f69abd7
bc12b9f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,6 +11,7 @@ | |
|
|
||
| from __future__ import annotations | ||
|
|
||
| import random | ||
| import time | ||
| from typing import Any, Optional | ||
|
|
||
|
|
@@ -38,17 +39,26 @@ def __init__( | |
| retries: Optional[int] = None, | ||
| backoff: Optional[float] = None, | ||
| timeout: Optional[float] = None, | ||
| max_backoff: Optional[float] = None, | ||
| jitter: bool = True, | ||
| retry_transient_errors: bool = True, | ||
| ) -> None: | ||
| self.max_attempts = retries if retries is not None else 5 | ||
| self.base_delay = backoff if backoff is not None else 0.5 | ||
| self.max_backoff = max_backoff if max_backoff is not None else 60.0 | ||
| self.default_timeout: Optional[float] = timeout | ||
| self.jitter = jitter | ||
| self.retry_transient_errors = retry_transient_errors | ||
|
|
||
| # Transient HTTP status codes that should be retried | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should probably use the TRANSIENT_STATUS defined in error_codes.py for this |
||
| self.transient_status_codes = {429, 502, 503, 504} | ||
|
|
||
| def request(self, method: str, url: str, **kwargs: Any) -> requests.Response: | ||
| """ | ||
| Execute an HTTP request with automatic retry logic and timeout management. | ||
|
|
||
| Applies default timeouts based on HTTP method (120s for POST/DELETE, 10s for others) | ||
| and retries on network errors with exponential backoff. | ||
| and retries on transient network errors and HTTP status codes with exponential backoff. | ||
|
|
||
| :param method: HTTP method (GET, POST, PUT, DELETE, etc.). | ||
| :type method: str | ||
|
|
@@ -68,13 +78,117 @@ def request(self, method: str, url: str, **kwargs: Any) -> requests.Response: | |
| m = (method or "").lower() | ||
| kwargs["timeout"] = 120 if m in ("post", "delete") else 10 | ||
|
|
||
| # Small backoff retry on network errors only | ||
| # Enhanced retry logic with exponential backoff, jitter, and HTTP status retries | ||
| for attempt in range(self.max_attempts): | ||
| try: | ||
| return requests.request(method, url, **kwargs) | ||
| response = requests.request(method, url, **kwargs) | ||
|
|
||
| # Check if we should retry based on HTTP status code | ||
| if (self.retry_transient_errors and | ||
| response.status_code in self.transient_status_codes and | ||
| attempt < self.max_attempts - 1): | ||
|
|
||
| delay = self._calculate_retry_delay(attempt, response) | ||
| time.sleep(delay) | ||
| continue | ||
|
|
||
| return response | ||
|
|
||
| except requests.exceptions.RequestException: | ||
| if attempt == self.max_attempts - 1: | ||
| raise | ||
| delay = self.base_delay * (2 ** attempt) | ||
| delay = self._calculate_retry_delay(attempt) | ||
| time.sleep(delay) | ||
| continue | ||
|
|
||
| # This should never be reached due to the logic above | ||
| raise RuntimeError("Unexpected end of retry loop") | ||
suyask-msft marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| def _calculate_retry_delay(self, attempt: int, response: Optional[requests.Response] = None) -> float: | ||
| """ | ||
| Calculate the delay before the next retry attempt using exponential backoff with jitter. | ||
|
|
||
| This method implements an intelligent retry delay strategy that prioritizes server-provided | ||
| guidance (Retry-After header) over client-calculated delays, uses exponential backoff to | ||
| reduce load on failing services, and applies jitter to prevent thundering herd problems | ||
| when multiple clients retry simultaneously. | ||
|
|
||
| The delay calculation follows this priority order: | ||
| 1. **Retry-After header**: If present and valid, use the server-specified delay (capped at max_backoff) | ||
| 2. **Exponential backoff**: Calculate base_delay * (2^attempt), capped at max_backoff | ||
| 3. **Jitter**: If enabled, add ±25% random variation to prevent synchronized retries | ||
|
|
||
| :param attempt: Zero-based retry attempt number (0 = first retry, 1 = second retry, etc.). | ||
| :type attempt: int | ||
| :param response: Optional HTTP response object containing headers. Used to extract Retry-After | ||
| header when available. If None, only exponential backoff calculation is performed. | ||
| :type response: requests.Response or None | ||
|
|
||
| :return: Delay in seconds before the next retry attempt. Always >= 0, capped at max_backoff. | ||
| :rtype: float | ||
|
|
||
| :raises: Does not raise exceptions. Invalid Retry-After values fall back to exponential backoff. | ||
|
|
||
| .. note:: | ||
| **Retry-After Header Handling (RFC 7231):** | ||
|
|
||
| - Supports integer seconds format: ``Retry-After: 120`` | ||
| - Invalid formats (non-integer, HTTP-date) fall back to exponential backoff | ||
| - Server-provided delays are capped at ``max_backoff`` to prevent excessive waits | ||
|
|
||
| .. note:: | ||
| **Exponential Backoff Formula:** | ||
|
|
||
| - Base calculation: ``delay = base_delay * (2^attempt)`` | ||
| - Example with ``base_delay=0.5``: 0.5s, 1.0s, 2.0s, 4.0s, 8.0s, 16.0s... | ||
| - Always capped at ``max_backoff`` (default 60s) to prevent unbounded delays | ||
|
|
||
| .. note:: | ||
| **Jitter Application:** | ||
|
|
||
| - When ``jitter=True``, adds random variation of ±25% of calculated delay | ||
| - Prevents thundering herd when multiple clients retry simultaneously | ||
| - Example: 4.0s delay becomes random value between 3.0s and 5.0s | ||
| - Final result is always >= 0 even with negative jitter | ||
|
|
||
| Example: | ||
| Calculate delays for successive retry attempts:: | ||
|
|
||
| client = HttpClient(backoff=1.0, max_backoff=30.0, jitter=True) | ||
|
|
||
| # Exponential backoff examples (without jitter for predictable values) | ||
| client_no_jitter = HttpClient(backoff=0.5, jitter=False) | ||
|
|
||
| # Attempt 0: 0.5s (0.5 * 2^0) | ||
| delay0 = client_no_jitter._calculate_retry_delay(0) | ||
|
|
||
| # Attempt 2: 2.0s (0.5 * 2^2) | ||
| delay2 = client_no_jitter._calculate_retry_delay(2) | ||
|
|
||
| # With jitter enabled, delays vary randomly within ±25% | ||
| # Attempt 1 with jitter: ~1.0s ± 0.25s = 0.75s - 1.25s | ||
| delay_with_jitter = client._calculate_retry_delay(1) | ||
| """ | ||
|
|
||
| # Check for Retry-After header (RFC 7231) | ||
| if response and "Retry-After" in response.headers: | ||
| try: | ||
| retry_after = int(response.headers["Retry-After"]) | ||
| # Respect Retry-After but cap it at max_backoff | ||
| return min(retry_after, self.max_backoff) | ||
| except (ValueError, TypeError): | ||
| # If Retry-After is not a valid integer, fall back to exponential backoff | ||
| pass | ||
|
|
||
| # Exponential backoff: base_delay * (2^attempt) | ||
| delay = self.base_delay * (2 ** attempt) | ||
|
|
||
| # Cap the delay at max_backoff | ||
| delay = min(delay, self.max_backoff) | ||
|
|
||
| # Add jitter to prevent thundering herd (±25% of delay) | ||
| if self.jitter: | ||
| jitter_range = delay * 0.25 | ||
| jitter_offset = random.uniform(-jitter_range, jitter_range) | ||
| delay = max(0, delay + jitter_offset) | ||
|
|
||
| return delay | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -47,7 +47,10 @@ def __init__( | |
| self._http = HttpClient( | ||
| retries=self.config.http_retries, | ||
| backoff=self.config.http_backoff, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we unify the format used here? |
||
| max_backoff=getattr(self.config, 'http_max_backoff', None), | ||
| timeout=self.config.http_timeout, | ||
| jitter=getattr(self.config, 'http_jitter', True), | ||
| retry_transient_errors=getattr(self.config, 'http_retry_transient_errors', True), | ||
| ) | ||
| # Cache: logical name -> entity set name (plural) resolved from metadata | ||
| self._logical_to_entityset_cache: dict[str, str] = {} | ||
|
|
@@ -903,6 +906,35 @@ def _normalize_picklist_label(self, label: str) -> str: | |
| norm = re.sub(r"\s+", " ", norm).strip().lower() | ||
| return norm | ||
|
|
||
| def _request_metadata_with_retry(self, method: str, url: str, **kwargs) -> Any: | ||
suyask-msft marked this conversation as resolved.
Show resolved
Hide resolved
suyask-msft marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| """ | ||
| Make HTTP request with metadata-specific retry logic. | ||
|
|
||
| Retries 404 errors for metadata operations since attribute metadata | ||
| may not be immediately available after creation or schema changes. | ||
suyask-msft marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| :param method: HTTP method (e.g., 'get', 'post'). | ||
| :type method: str | ||
| :param url: Target URL for the metadata request. | ||
| :type url: str | ||
| :param kwargs: Additional arguments passed to the underlying request method. | ||
| :return: HTTP response object. | ||
| :rtype: Any | ||
| :raises HttpError: If the request fails after all retry attempts. | ||
| """ | ||
| last_error = None | ||
| for attempt in range(3): # 3 attempts for metadata operations | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this would be on top of the http retries we already have, so by default could retry 3 * 5 times? is this intended? |
||
| try: | ||
| return self._request(method, url, **kwargs) | ||
| except HttpError as err: | ||
| last_error = err | ||
| if getattr(err, "status_code", None) == 404 and attempt < 2: | ||
| # Exponential backoff: 0.4s, 0.8s for metadata propagation delays | ||
| delay = 0.4 * (2 ** attempt) | ||
| time.sleep(delay) | ||
| continue | ||
| raise | ||
|
|
||
| def _optionset_map(self, logical_name: str, attr_logical: str) -> Optional[Dict[str, int]]: | ||
| """Build or return cached mapping of normalized label -> value for a picklist attribute. | ||
|
|
||
|
|
@@ -930,23 +962,14 @@ def _optionset_map(self, logical_name: str, attr_logical: str) -> Optional[Dict[ | |
| f"?$filter=LogicalName eq '{attr_esc}'&$select=LogicalName,AttributeType" | ||
| ) | ||
| # Retry up to 3 times on 404 (new or not-yet-published attribute metadata). If still 404, raise. | ||
| r_type = None | ||
| for attempt in range(3): | ||
| try: | ||
| r_type = self._request("get", url_type) | ||
| break | ||
| except HttpError as err: | ||
| if getattr(err, "status_code", None) == 404: | ||
| if attempt < 2: | ||
| # Exponential-ish backoff: 0.4s, 0.8s | ||
| time.sleep(0.4 * (2 ** attempt)) | ||
| continue | ||
| raise RuntimeError( | ||
| f"Picklist attribute metadata not found after retries: entity='{logical_name}' attribute='{attr_logical}' (404)" | ||
| ) from err | ||
| raise | ||
| if r_type is None: | ||
| raise RuntimeError("Failed to retrieve attribute metadata due to repeated request failures.") | ||
| try: | ||
| r_type = self._request_metadata_with_retry("get", url_type) | ||
| except HttpError as err: | ||
| if getattr(err, "status_code", None) == 404: | ||
| raise RuntimeError( | ||
| f"Picklist attribute metadata not found after retries: entity='{logical_name}' attribute='{attr_logical}' (404)" | ||
| ) from err | ||
| raise | ||
|
|
||
| body_type = r_type.json() | ||
| items = body_type.get("value", []) if isinstance(body_type, dict) else [] | ||
|
|
@@ -964,22 +987,14 @@ def _optionset_map(self, logical_name: str, attr_logical: str) -> Optional[Dict[ | |
| "Microsoft.Dynamics.CRM.PicklistAttributeMetadata?$select=LogicalName&$expand=OptionSet($select=Options)" | ||
| ) | ||
| # Step 2 fetch with retries: expanded OptionSet (cast form first) | ||
| r_opts = None | ||
| for attempt in range(3): | ||
| try: | ||
| r_opts = self._request("get", cast_url) | ||
| break | ||
| except HttpError as err: | ||
| if getattr(err, "status_code", None) == 404: | ||
| if attempt < 2: | ||
| time.sleep(0.4 * (2 ** attempt)) # 0.4s, 0.8s | ||
| continue | ||
| raise RuntimeError( | ||
| f"Picklist OptionSet metadata not found after retries: entity='{logical_name}' attribute='{attr_logical}' (404)" | ||
| ) from err | ||
| raise | ||
| if r_opts is None: | ||
| raise RuntimeError("Failed to retrieve picklist OptionSet metadata due to repeated request failures.") | ||
| try: | ||
| r_opts = self._request_metadata_with_retry("get", cast_url) | ||
| except HttpError as err: | ||
| if getattr(err, "status_code", None) == 404: | ||
| raise RuntimeError( | ||
| f"Picklist OptionSet metadata not found after retries: entity='{logical_name}' attribute='{attr_logical}' (404)" | ||
| ) from err | ||
| raise | ||
|
|
||
| attr_full = {} | ||
| try: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why is default value added in code but not in signatures?