Skip to content
Open
1 change: 0 additions & 1 deletion examples/advanced/complete_walkthrough.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,6 @@ def _status_value_for_index(idx: int, use_french: bool):
# 4) Query records via SQL (?sql parameter))
print("Query (SQL via ?sql query parameter):")
try:
import time
pause("Execute SQL Query")

def _run_query():
Expand Down
2 changes: 0 additions & 2 deletions examples/advanced/pandas_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,6 @@ def backoff_retry(op, *, delays=(0, 2, 5, 10, 20), retry_http_statuses=(400, 403
# 4) Query records via SQL (Web API ?sql=)
print("(Pandas) Query (SQL via Web API ?sql=):")
try:
import time

def _run_query():
id_key = f"{logical}id"
cols = f"{id_key}, {attr_prefix}_code, {attr_prefix}_amount, {attr_prefix}_when"
Expand Down
28 changes: 20 additions & 8 deletions src/PowerPlatform/Dataverse/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,28 @@ class DataverseConfig:

:param language_code: LCID (Locale ID) for localized labels and messages. Default is 1033 (English - United States).
:type language_code: int
:param http_retries: Optional maximum number of retry attempts for transient HTTP errors. Reserved for future use.
:param http_retries: Maximum number of retry attempts for HTTP requests (default: 5).
:type http_retries: int or None
:param http_backoff: Optional backoff multiplier (in seconds) between retry attempts. Reserved for future use.
:param http_backoff: Base delay in seconds for exponential backoff (default: 0.5).
:type http_backoff: float or None
:param http_timeout: Optional request timeout in seconds. Reserved for future use.
:param http_max_backoff: Maximum delay between retry attempts in seconds (default: 60.0).
:type http_max_backoff: float or None
:param http_timeout: Request timeout in seconds (default: method-dependent).
:type http_timeout: float or None
:param http_jitter: Whether to add jitter to retry delays to prevent thundering herd (default: True).
:type http_jitter: bool or None
:param http_retry_transient_errors: Whether to retry transient HTTP errors like 429, 502, 503, 504 (default: True).
:type http_retry_transient_errors: bool or None
"""
language_code: int = 1033

# Optional HTTP tuning (not yet wired everywhere; reserved for future use)
# HTTP retry and resilience configuration
http_retries: Optional[int] = None
http_backoff: Optional[float] = None
http_max_backoff: Optional[float] = None
http_timeout: Optional[float] = None
http_jitter: Optional[bool] = None
http_retry_transient_errors: Optional[bool] = None

@classmethod
def from_env(cls) -> "DataverseConfig":
Expand All @@ -36,10 +45,13 @@ def from_env(cls) -> "DataverseConfig":
:return: Configuration instance with default values.
:rtype: ~PowerPlatform.Dataverse.core.config.DataverseConfig
"""
# Environment-free defaults
# Environment-free defaults with enhanced retry configuration
return cls(
language_code=1033,
http_retries=None,
http_backoff=None,
http_timeout=None,
http_retries=None, # Will default to 5 in HttpClient
http_backoff=None, # Will default to 0.5 in HttpClient
http_max_backoff=None, # Will default to 60.0 in HttpClient
http_timeout=None, # Will use method-dependent defaults in HttpClient
http_jitter=None, # Will default to True in HttpClient
http_retry_transient_errors=None, # Will default to True in HttpClient
)
2 changes: 1 addition & 1 deletion src/PowerPlatform/Dataverse/core/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(
self.details = details or {}
self.source = source or "client"
self.is_transient = is_transient
self.timestamp = _dt.datetime.utcnow().isoformat() + "Z"
self.timestamp = _dt.datetime.now(_dt.timezone.utc).isoformat() + "Z"

def to_dict(self) -> Dict[str, Any]:
"""
Expand Down
124 changes: 119 additions & 5 deletions src/PowerPlatform/Dataverse/core/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from __future__ import annotations

import random
import time
from typing import Any, Optional

Expand Down Expand Up @@ -38,17 +39,26 @@ def __init__(
retries: Optional[int] = None,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is default value added in code but not in signatures?

backoff: Optional[float] = None,
timeout: Optional[float] = None,
max_backoff: Optional[float] = None,
jitter: bool = True,
retry_transient_errors: bool = True,
) -> None:
self.max_attempts = retries if retries is not None else 5
self.base_delay = backoff if backoff is not None else 0.5
self.max_backoff = max_backoff if max_backoff is not None else 60.0
self.default_timeout: Optional[float] = timeout
self.jitter = jitter
self.retry_transient_errors = retry_transient_errors

# Transient HTTP status codes that should be retried
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should probably use the TRANSIENT_STATUS defined in error_codes.py for this

self.transient_status_codes = {429, 502, 503, 504}

def request(self, method: str, url: str, **kwargs: Any) -> requests.Response:
"""
Execute an HTTP request with automatic retry logic and timeout management.

Applies default timeouts based on HTTP method (120s for POST/DELETE, 10s for others)
and retries on network errors with exponential backoff.
and retries on transient network errors and HTTP status codes with exponential backoff.

:param method: HTTP method (GET, POST, PUT, DELETE, etc.).
:type method: str
Expand All @@ -68,13 +78,117 @@ def request(self, method: str, url: str, **kwargs: Any) -> requests.Response:
m = (method or "").lower()
kwargs["timeout"] = 120 if m in ("post", "delete") else 10

# Small backoff retry on network errors only
# Enhanced retry logic with exponential backoff, jitter, and HTTP status retries
for attempt in range(self.max_attempts):
try:
return requests.request(method, url, **kwargs)
response = requests.request(method, url, **kwargs)

# Check if we should retry based on HTTP status code
if (self.retry_transient_errors and
response.status_code in self.transient_status_codes and
attempt < self.max_attempts - 1):

delay = self._calculate_retry_delay(attempt, response)
time.sleep(delay)
continue

return response

except requests.exceptions.RequestException:
if attempt == self.max_attempts - 1:
raise
delay = self.base_delay * (2 ** attempt)
delay = self._calculate_retry_delay(attempt)
time.sleep(delay)
continue

# This should never be reached due to the logic above
raise RuntimeError("Unexpected end of retry loop")

def _calculate_retry_delay(self, attempt: int, response: Optional[requests.Response] = None) -> float:
"""
Calculate the delay before the next retry attempt using exponential backoff with jitter.

This method implements an intelligent retry delay strategy that prioritizes server-provided
guidance (Retry-After header) over client-calculated delays, uses exponential backoff to
reduce load on failing services, and applies jitter to prevent thundering herd problems
when multiple clients retry simultaneously.

The delay calculation follows this priority order:
1. **Retry-After header**: If present and valid, use the server-specified delay (capped at max_backoff)
2. **Exponential backoff**: Calculate base_delay * (2^attempt), capped at max_backoff
3. **Jitter**: If enabled, add ±25% random variation to prevent synchronized retries

:param attempt: Zero-based retry attempt number (0 = first retry, 1 = second retry, etc.).
:type attempt: int
:param response: Optional HTTP response object containing headers. Used to extract Retry-After
header when available. If None, only exponential backoff calculation is performed.
:type response: requests.Response or None

:return: Delay in seconds before the next retry attempt. Always >= 0, capped at max_backoff.
:rtype: float

:raises: Does not raise exceptions. Invalid Retry-After values fall back to exponential backoff.

.. note::
**Retry-After Header Handling (RFC 7231):**

- Supports integer seconds format: ``Retry-After: 120``
- Invalid formats (non-integer, HTTP-date) fall back to exponential backoff
- Server-provided delays are capped at ``max_backoff`` to prevent excessive waits

.. note::
**Exponential Backoff Formula:**

- Base calculation: ``delay = base_delay * (2^attempt)``
- Example with ``base_delay=0.5``: 0.5s, 1.0s, 2.0s, 4.0s, 8.0s, 16.0s...
- Always capped at ``max_backoff`` (default 60s) to prevent unbounded delays

.. note::
**Jitter Application:**

- When ``jitter=True``, adds random variation of ±25% of calculated delay
- Prevents thundering herd when multiple clients retry simultaneously
- Example: 4.0s delay becomes random value between 3.0s and 5.0s
- Final result is always >= 0 even with negative jitter

Example:
Calculate delays for successive retry attempts::

client = HttpClient(backoff=1.0, max_backoff=30.0, jitter=True)

# Exponential backoff examples (without jitter for predictable values)
client_no_jitter = HttpClient(backoff=0.5, jitter=False)

# Attempt 0: 0.5s (0.5 * 2^0)
delay0 = client_no_jitter._calculate_retry_delay(0)

# Attempt 2: 2.0s (0.5 * 2^2)
delay2 = client_no_jitter._calculate_retry_delay(2)

# With jitter enabled, delays vary randomly within ±25%
# Attempt 1 with jitter: ~1.0s ± 0.25s = 0.75s - 1.25s
delay_with_jitter = client._calculate_retry_delay(1)
"""

# Check for Retry-After header (RFC 7231)
if response and "Retry-After" in response.headers:
try:
retry_after = int(response.headers["Retry-After"])
# Respect Retry-After but cap it at max_backoff
return min(retry_after, self.max_backoff)
except (ValueError, TypeError):
# If Retry-After is not a valid integer, fall back to exponential backoff
pass

# Exponential backoff: base_delay * (2^attempt)
delay = self.base_delay * (2 ** attempt)

# Cap the delay at max_backoff
delay = min(delay, self.max_backoff)

# Add jitter to prevent thundering herd (±25% of delay)
if self.jitter:
jitter_range = delay * 0.25
jitter_offset = random.uniform(-jitter_range, jitter_range)
delay = max(0, delay + jitter_offset)

return delay
81 changes: 48 additions & 33 deletions src/PowerPlatform/Dataverse/data/odata.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,10 @@ def __init__(
self._http = HttpClient(
retries=self.config.http_retries,
backoff=self.config.http_backoff,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we unify the format used here?

max_backoff=getattr(self.config, 'http_max_backoff', None),
timeout=self.config.http_timeout,
jitter=getattr(self.config, 'http_jitter', True),
retry_transient_errors=getattr(self.config, 'http_retry_transient_errors', True),
)
# Cache: logical name -> entity set name (plural) resolved from metadata
self._logical_to_entityset_cache: dict[str, str] = {}
Expand Down Expand Up @@ -903,6 +906,35 @@ def _normalize_picklist_label(self, label: str) -> str:
norm = re.sub(r"\s+", " ", norm).strip().lower()
return norm

def _request_metadata_with_retry(self, method: str, url: str, **kwargs) -> Any:
"""
Make HTTP request with metadata-specific retry logic.

Retries 404 errors for metadata operations since attribute metadata
may not be immediately available after creation or schema changes.

:param method: HTTP method (e.g., 'get', 'post').
:type method: str
:param url: Target URL for the metadata request.
:type url: str
:param kwargs: Additional arguments passed to the underlying request method.
:return: HTTP response object.
:rtype: Any
:raises HttpError: If the request fails after all retry attempts.
"""
last_error = None
for attempt in range(3): # 3 attempts for metadata operations
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this would be on top of the http retries we already have, so by default could retry 3 * 5 times? is this intended?

try:
return self._request(method, url, **kwargs)
except HttpError as err:
last_error = err
if getattr(err, "status_code", None) == 404 and attempt < 2:
# Exponential backoff: 0.4s, 0.8s for metadata propagation delays
delay = 0.4 * (2 ** attempt)
time.sleep(delay)
continue
raise

def _optionset_map(self, logical_name: str, attr_logical: str) -> Optional[Dict[str, int]]:
"""Build or return cached mapping of normalized label -> value for a picklist attribute.

Expand Down Expand Up @@ -930,23 +962,14 @@ def _optionset_map(self, logical_name: str, attr_logical: str) -> Optional[Dict[
f"?$filter=LogicalName eq '{attr_esc}'&$select=LogicalName,AttributeType"
)
# Retry up to 3 times on 404 (new or not-yet-published attribute metadata). If still 404, raise.
r_type = None
for attempt in range(3):
try:
r_type = self._request("get", url_type)
break
except HttpError as err:
if getattr(err, "status_code", None) == 404:
if attempt < 2:
# Exponential-ish backoff: 0.4s, 0.8s
time.sleep(0.4 * (2 ** attempt))
continue
raise RuntimeError(
f"Picklist attribute metadata not found after retries: entity='{logical_name}' attribute='{attr_logical}' (404)"
) from err
raise
if r_type is None:
raise RuntimeError("Failed to retrieve attribute metadata due to repeated request failures.")
try:
r_type = self._request_metadata_with_retry("get", url_type)
except HttpError as err:
if getattr(err, "status_code", None) == 404:
raise RuntimeError(
f"Picklist attribute metadata not found after retries: entity='{logical_name}' attribute='{attr_logical}' (404)"
) from err
raise

body_type = r_type.json()
items = body_type.get("value", []) if isinstance(body_type, dict) else []
Expand All @@ -964,22 +987,14 @@ def _optionset_map(self, logical_name: str, attr_logical: str) -> Optional[Dict[
"Microsoft.Dynamics.CRM.PicklistAttributeMetadata?$select=LogicalName&$expand=OptionSet($select=Options)"
)
# Step 2 fetch with retries: expanded OptionSet (cast form first)
r_opts = None
for attempt in range(3):
try:
r_opts = self._request("get", cast_url)
break
except HttpError as err:
if getattr(err, "status_code", None) == 404:
if attempt < 2:
time.sleep(0.4 * (2 ** attempt)) # 0.4s, 0.8s
continue
raise RuntimeError(
f"Picklist OptionSet metadata not found after retries: entity='{logical_name}' attribute='{attr_logical}' (404)"
) from err
raise
if r_opts is None:
raise RuntimeError("Failed to retrieve picklist OptionSet metadata due to repeated request failures.")
try:
r_opts = self._request_metadata_with_retry("get", cast_url)
except HttpError as err:
if getattr(err, "status_code", None) == 404:
raise RuntimeError(
f"Picklist OptionSet metadata not found after retries: entity='{logical_name}' attribute='{attr_logical}' (404)"
) from err
raise

attr_full = {}
try:
Expand Down
2 changes: 1 addition & 1 deletion src/PowerPlatform/Dataverse/utils/pandas_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def update(self, logical_name: str, record_id: str, entity_data: pd.Series) -> N
raise TypeError("entity_data must be a pandas Series")
payload = {k: v for k, v in entity_data.items()}
if not payload:
return # nothing to send
return None # nothing to send
self._c.update(logical_name, record_id, payload)

# ---------------------------- Delete ---------------------------------
Expand Down
Loading
Loading