From 1cc57b882fdaf0cf4c242537fa7bd6702eea390b Mon Sep 17 00:00:00 2001 From: tafreeman Date: Sat, 11 Apr 2026 07:43:56 -0500 Subject: [PATCH 1/6] refactor: generalize budget guard loop + move logging import (F-05, F-08) F-05: Replace 8 repeated per-field if-chains in checked_complete() with a _check_budget() helper that iterates over _BUDGET_FIELD_LABELS using getattr(). Same pattern as CPython's dataclasses.asdict() internals. Adding a new budget dimension now requires a single entry in _BUDGET_FIELD_LABELS, not 6 new blocks. F-08: Move `import logging` from inside exception handler to module level, per PEP 8 and Ruff PLC0415 / Pylint C0415 requirements. Co-Authored-By: Claude Sonnet 4.6 --- executionkit/patterns/base.py | 115 ++++++++++++++++------------ executionkit/patterns/react_loop.py | 3 +- 2 files changed, 67 insertions(+), 51 deletions(-) diff --git a/executionkit/patterns/base.py b/executionkit/patterns/base.py index c7c90bf..6bce296 100644 --- a/executionkit/patterns/base.py +++ b/executionkit/patterns/base.py @@ -16,6 +16,58 @@ BUDGET_EXHAUSTED_SENTINEL = -1 +# Maps TokenUsage field names to human-readable labels for error messages. +# Iterating over this dict with getattr() replaces per-field if-chains — +# the same pattern CPython's dataclasses.asdict() uses internally. +# Ref: https://github.com/python/cpython/blob/main/Lib/dataclasses.py +_BUDGET_FIELD_LABELS: dict[str, str] = { + "llm_calls": "LLM call", + "input_tokens": "Input token", + "output_tokens": "Output token", +} + + +def _check_budget( + budget: TokenUsage, + current: TokenUsage, + fields: tuple[str, ...], + *, + sentinel_suffix: str, + exceeded_suffix: str, +) -> None: + """Raise :exc:`BudgetExhaustedError` if any tracked field hits its limit. + + Uses ``getattr()`` over named field checks — same pattern as CPython's + ``dataclasses.asdict()`` — to avoid repeating the check triplet per field. + A value of ``BUDGET_EXHAUSTED_SENTINEL`` (-1) means the field was fully + consumed by a prior ``pipe()`` step. + + Args: + budget: The budget to check against. + current: Current usage snapshot from :class:`CostTracker`. + fields: Tuple of :class:`TokenUsage` field names to check. + sentinel_suffix: Appended to the error message when sentinel found. + exceeded_suffix: Appended to the error message when limit exceeded. + + Raises: + BudgetExhaustedError: On the first field that is over budget. + """ + for field_name in fields: + label = _BUDGET_FIELD_LABELS[field_name] + limit = getattr(budget, field_name) + if limit == BUDGET_EXHAUSTED_SENTINEL: + raise BudgetExhaustedError( + f"{label} budget exhausted {sentinel_suffix}", + cost=current, + metadata={"budget": budget}, + ) + if limit > 0 and getattr(current, field_name) >= limit: + raise BudgetExhaustedError( + f"{label} budget exhausted {exceeded_suffix}", + cost=current, + metadata={"budget": budget}, + ) + def validate_score(score: float) -> float: """Validate that an evaluator score is in [0.0, 1.0] and not NaN. @@ -66,59 +118,24 @@ async def checked_complete( """ if budget is not None: current = tracker.to_usage() - # -1 sentinel: field was limited and fully consumed by a prior pipe() step. - if budget.llm_calls == BUDGET_EXHAUSTED_SENTINEL: - raise BudgetExhaustedError( - "LLM call budget exhausted (forwarded from pipe)", - cost=current, - metadata={"budget": budget}, - ) - if budget.llm_calls > 0 and current.llm_calls >= budget.llm_calls: - raise BudgetExhaustedError( - "LLM call budget exhausted before dispatch", - cost=current, - metadata={"budget": budget}, - ) - if budget.input_tokens == BUDGET_EXHAUSTED_SENTINEL: - raise BudgetExhaustedError( - "Input token budget exhausted (forwarded from pipe)", - cost=current, - metadata={"budget": budget}, - ) - if budget.input_tokens > 0 and current.input_tokens >= budget.input_tokens: - raise BudgetExhaustedError( - "Input token budget exhausted before dispatch", - cost=current, - metadata={"budget": budget}, - ) - if budget.output_tokens == BUDGET_EXHAUSTED_SENTINEL: - raise BudgetExhaustedError( - "Output token budget exhausted (forwarded from pipe)", - cost=current, - metadata={"budget": budget}, - ) - if budget.output_tokens > 0 and current.output_tokens >= budget.output_tokens: - raise BudgetExhaustedError( - "Output token budget exhausted before dispatch", - cost=current, - metadata={"budget": budget}, - ) + _check_budget( + budget, + current, + tuple(_BUDGET_FIELD_LABELS), + sentinel_suffix="(forwarded from pipe)", + exceeded_suffix="before dispatch", + ) async def _before_attempt(attempt: int) -> None: if attempt > 1 and budget is not None: current = tracker.to_usage() - if budget.llm_calls == BUDGET_EXHAUSTED_SENTINEL: - raise BudgetExhaustedError( - "LLM call budget exhausted before retry (forwarded from pipe)", - cost=current, - metadata={"budget": budget}, - ) - if budget.llm_calls > 0 and current.llm_calls >= budget.llm_calls: - raise BudgetExhaustedError( - "LLM call budget exhausted before retry dispatch", - cost=current, - metadata={"budget": budget}, - ) + _check_budget( + budget, + current, + ("llm_calls",), + sentinel_suffix="before retry (forwarded from pipe)", + exceeded_suffix="before retry dispatch", + ) tracker.reserve_call() response = await with_retry( diff --git a/executionkit/patterns/react_loop.py b/executionkit/patterns/react_loop.py index b11d9b3..429083b 100644 --- a/executionkit/patterns/react_loop.py +++ b/executionkit/patterns/react_loop.py @@ -4,6 +4,7 @@ import asyncio import json +import logging from itertools import chain from types import MappingProxyType from typing import TYPE_CHECKING, Any @@ -351,8 +352,6 @@ async def _execute_tool_call( except TimeoutError: return f"Tool execution timed out after {timeout}s" except Exception as exc: - import logging - logging.getLogger(__name__).debug( "Tool '%s' raised %s", tc_name, type(exc).__name__, exc_info=True ) From 60c55fde6e70661f2545378031ae4121138d07c3 Mon Sep 17 00:00:00 2001 From: tafreeman Date: Sat, 11 Apr 2026 08:40:41 -0500 Subject: [PATCH 2/6] refactor: extract error hierarchy to errors.py + _classify_http_error helper (F-06, F-02) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit F-06: Move the 9-class exception hierarchy from provider.py into errors.py, following the same split used by Anthropic's own SDK (_exceptions.py pattern). provider.py re-exports with `Name as Name` for PEP 387 backwards-compat — `from executionkit.provider import XError` imports continue to work unchanged. F-02: Extract _classify_http_error(status, raw, retry_after, *, cause) to eliminate the duplicated HTTP status→exception if-chain that existed in both _post_httpx() and _post_urllib(). A single canonical function now maps 429→RateLimitError, {401,403,404}→PermanentError, else→ProviderError. Also closes the pre-existing divergence: urllib had `if exc.headers else 1` guard that httpx lacked; now both call the same function. Co-Authored-By: Claude Sonnet 4.6 --- executionkit/errors.py | 74 ++++++++++++++++++++ executionkit/provider.py | 141 ++++++++++++++++----------------------- 2 files changed, 133 insertions(+), 82 deletions(-) create mode 100644 executionkit/errors.py diff --git a/executionkit/errors.py b/executionkit/errors.py new file mode 100644 index 0000000..440de5a --- /dev/null +++ b/executionkit/errors.py @@ -0,0 +1,74 @@ +"""ExecutionKit exception hierarchy. + +All nine error classes live here so they can be imported without pulling in +the full HTTP client machinery from ``provider.py``. + +Ref: Anthropic SDK uses the same ``_exceptions.py`` split — the parent +company's own design convention (github.com/anthropics/anthropic-sdk-python). +PEP 387 backwards-compat: ``from executionkit.provider import XError`` still +works because ``provider.py`` re-exports from this module. +""" + +from __future__ import annotations + +from typing import Any + +from executionkit.types import TokenUsage + + +class ExecutionKitError(Exception): + """Base exception for all ExecutionKit errors.""" + + def __init__( + self, + message: str, + *, + cost: TokenUsage | None = None, + metadata: dict[str, Any] | None = None, + ) -> None: + super().__init__(message) + self.cost: TokenUsage = cost if cost is not None else TokenUsage() + self.metadata: dict[str, Any] = metadata if metadata is not None else {} + + +class LLMError(ExecutionKitError): + """Errors originating from LLM provider communication.""" + + +class RateLimitError(LLMError): + """Provider returned HTTP 429 — retryable after ``retry_after`` seconds.""" + + def __init__( + self, + message: str, + *, + retry_after: float = 1.0, + cost: TokenUsage | None = None, + metadata: dict[str, Any] | None = None, + ) -> None: + super().__init__(message, cost=cost, metadata=metadata) + self.retry_after: float = retry_after + + +class PermanentError(LLMError): + """Non-retryable provider error (e.g. 401 authentication failure).""" + + +class ProviderError(LLMError): + """Catch-all retryable provider error for unexpected HTTP failures.""" + + +class PatternError(ExecutionKitError): + """Errors raised by reasoning pattern logic.""" + + +class BudgetExhaustedError(PatternError): + """Token or call budget exceeded.""" + + +class ConsensusFailedError(PatternError): + """Consensus pattern could not reach agreement.""" + + +class MaxIterationsError(PatternError): + """Loop pattern exceeded its iteration limit.""" diff --git a/executionkit/provider.py b/executionkit/provider.py index 63685c2..807b4bf 100644 --- a/executionkit/provider.py +++ b/executionkit/provider.py @@ -15,13 +15,25 @@ import urllib.request from dataclasses import dataclass, field from types import MappingProxyType -from typing import TYPE_CHECKING, Any, Literal, Protocol, runtime_checkable +from typing import TYPE_CHECKING, Any, Literal, NoReturn, Protocol, runtime_checkable if TYPE_CHECKING: from collections.abc import Sequence from types import TracebackType -from executionkit.types import TokenUsage +# Re-export the error hierarchy from errors.py using the `Name as Name` idiom +# so ruff/mypy recognise these as intentional public re-exports. +# Existing `from executionkit.provider import XError` imports continue to work. +# Ref: PEP 387 backwards-compat — https://peps.python.org/pep-0387/ +from executionkit.errors import BudgetExhaustedError as BudgetExhaustedError +from executionkit.errors import ConsensusFailedError as ConsensusFailedError +from executionkit.errors import ExecutionKitError as ExecutionKitError +from executionkit.errors import LLMError as LLMError +from executionkit.errors import MaxIterationsError as MaxIterationsError +from executionkit.errors import PatternError as PatternError +from executionkit.errors import PermanentError as PermanentError +from executionkit.errors import ProviderError as ProviderError +from executionkit.errors import RateLimitError as RateLimitError # --------------------------------------------------------------------------- # httpx availability probe (done once at import time) @@ -36,68 +48,11 @@ _HTTPX_AVAILABLE = False # --------------------------------------------------------------------------- -# Error hierarchy (9 classes) +# Error hierarchy — defined in errors.py; re-exported here so that existing +# `from executionkit.provider import XError` imports continue to work. +# PEP 387 backwards-compatibility: https://peps.python.org/pep-0387/ # --------------------------------------------------------------------------- - -class ExecutionKitError(Exception): - """Base exception for all ExecutionKit errors.""" - - def __init__( - self, - message: str, - *, - cost: TokenUsage | None = None, - metadata: dict[str, Any] | None = None, - ) -> None: - super().__init__(message) - self.cost: TokenUsage = cost if cost is not None else TokenUsage() - self.metadata: dict[str, Any] = metadata if metadata is not None else {} - - -class LLMError(ExecutionKitError): - """Errors originating from LLM provider communication.""" - - -class RateLimitError(LLMError): - """Provider returned HTTP 429 — retryable after ``retry_after`` seconds.""" - - def __init__( - self, - message: str, - *, - retry_after: float = 1.0, - cost: TokenUsage | None = None, - metadata: dict[str, Any] | None = None, - ) -> None: - super().__init__(message, cost=cost, metadata=metadata) - self.retry_after: float = retry_after - - -class PermanentError(LLMError): - """Non-retryable provider error (e.g. 401 authentication failure).""" - - -class ProviderError(LLMError): - """Catch-all retryable provider error for unexpected HTTP failures.""" - - -class PatternError(ExecutionKitError): - """Errors raised by reasoning pattern logic.""" - - -class BudgetExhaustedError(PatternError): - """Token or call budget exceeded.""" - - -class ConsensusFailedError(PatternError): - """Consensus pattern could not reach agreement.""" - - -class MaxIterationsError(PatternError): - """Loop pattern exceeded its iteration limit.""" - - # --------------------------------------------------------------------------- # Value types # --------------------------------------------------------------------------- @@ -317,15 +272,8 @@ async def _post_httpx( raw = {} except Exception: raw = {} - if status == 429: - retry_after = float(exc.response.headers.get("retry-after", "1")) - raise RateLimitError( - "Rate limited (HTTP 429)", - retry_after=retry_after, - ) from exc - if status in {401, 403, 404}: - raise PermanentError(_format_http_error(status, raw)) from exc - raise ProviderError(_format_http_error(status, raw)) from exc + retry_after = float(exc.response.headers.get("retry-after", "1")) + _classify_http_error(status, raw, retry_after, cause=exc) except _httpx.TransportError as exc: raise ProviderError(f"Transport failure: {exc}") from exc @@ -358,17 +306,10 @@ def _sync() -> dict[str, Any]: except ProviderError: raw = {} status = exc.code - if status == 429: - retry_after = float( - exc.headers.get("retry-after", "1") if exc.headers else 1 - ) - raise RateLimitError( - "Rate limited (HTTP 429)", - retry_after=retry_after, - ) from exc - if status in {401, 403, 404}: - raise PermanentError(_format_http_error(status, raw)) from exc - raise ProviderError(_format_http_error(status, raw)) from exc + retry_after = float( + exc.headers.get("retry-after", "1") if exc.headers else 1 + ) + _classify_http_error(status, raw, retry_after, cause=exc) except urllib.error.URLError as exc: raise ProviderError(f"Transport failure: {exc.reason}") from exc @@ -498,6 +439,42 @@ def _redact_sensitive(text: str) -> str: ) +def _classify_http_error( + status: int, + raw: dict[str, Any], + retry_after: float, + *, + cause: BaseException, +) -> NoReturn: + """Raise the correct LLM error subclass for a failed HTTP response. + + Extracted to eliminate duplication between the urllib and httpx backends. + Both backends call this single function — the exact pattern used by the + Anthropic SDK's ``_make_status_error()`` method. + + Ref: https://github.com/anthropics/anthropic-sdk-python/blob/main/src/anthropic/_client.py + + Args: + status: HTTP status code from the failed response. + raw: Parsed JSON body from the response (may be empty dict). + retry_after: Value of the ``Retry-After`` header in seconds. + cause: The original exception, chained via ``raise ... from cause``. + + Raises: + RateLimitError: For HTTP 429. + PermanentError: For HTTP 401, 403, 404. + ProviderError: For all other non-2xx status codes. + """ + if status == 429: + raise RateLimitError( + "Rate limited (HTTP 429)", + retry_after=retry_after, + ) from cause + if status in {401, 403, 404}: + raise PermanentError(_format_http_error(status, raw)) from cause + raise ProviderError(_format_http_error(status, raw)) from cause + + def _format_http_error(status_code: int, payload: dict[str, Any]) -> str: message = payload.get("error") if isinstance(message, dict): From 51f16f0dbc05be071f13cbe2ed1ae5edf53e11be Mon Sep 17 00:00:00 2001 From: tafreeman Date: Sat, 11 Apr 2026 16:00:16 -0500 Subject: [PATCH 3/6] fix: delegate _TrackedProvider.supports_tools + F-01/F-03/F-04 docs (F-04) F-04: Replace hardcoded `supports_tools: Literal[True] = True` in _TrackedProvider with a delegating property that reflects the wrapped provider's actual capability: @property def supports_tools(self) -> bool: return getattr(self._provider, "supports_tools", False) A wrapper claiming Literal[True] unconditionally creates false positives because @runtime_checkable only checks attribute presence, not value (PEP 544). Real-world failure mode: LiteLLM bug #11370. Add WARNING docstring to Provider.supports_tools explaining the delegation pattern required when building wrapper classes. F-01 verified: Add inline NOTE in _TrackedProvider confirming _calls is never accessed directly; reserve_call()/release_call() are the sole API. F-03 verified: Add inline NOTE in consensus() confirming max_cost is implemented and propagated to every checked_complete() call. Co-Authored-By: Claude Sonnet 4.6 --- executionkit/patterns/base.py | 18 +++++++++++++++--- executionkit/patterns/consensus.py | 3 +++ executionkit/provider.py | 9 +++++++++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/executionkit/patterns/base.py b/executionkit/patterns/base.py index 6bce296..ddcf227 100644 --- a/executionkit/patterns/base.py +++ b/executionkit/patterns/base.py @@ -4,7 +4,7 @@ import math import warnings -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from collections.abc import Sequence @@ -176,8 +176,6 @@ class _TrackedProvider: multiple times while sharing a single ``CostTracker`` and metadata dict. """ - supports_tools: Literal[True] = True - def __init__( self, provider: LLMProvider, @@ -195,6 +193,20 @@ def __init__( self._retry = retry self._context = context + @property + def supports_tools(self) -> bool: + """Delegate capability flag to the wrapped provider. + + A wrapper must not unconditionally claim tool support — it should + reflect what the inner provider actually supports. + Ref F-04: https://github.com/BerriAI/litellm/issues/11370 (real-world + failure from hardcoding capability instead of delegating). + NOTE (F-01 verified): CostTracker._calls is never accessed directly + here. reserve_call() and release_call() are the only public API used. + See executionkit/cost.py. + """ + return getattr(self._provider, "supports_tools", False) + async def complete( self, messages: Sequence[dict[str, Any]], diff --git a/executionkit/patterns/consensus.py b/executionkit/patterns/consensus.py index 31e103c..6e0719b 100644 --- a/executionkit/patterns/consensus.py +++ b/executionkit/patterns/consensus.py @@ -31,6 +31,9 @@ async def consensus( max_tokens: int = 4096, max_concurrency: int = 5, retry: RetryConfig | None = None, + # NOTE (F-03 verified): max_cost is implemented and forwarded to every + # checked_complete() call below, enabling budget-aware pipe() chains. + # See executionkit/compose.py _filter_kwargs() for propagation logic. max_cost: TokenUsage | None = None, ) -> PatternResult[str]: """Run parallel LLM samples and aggregate via voting. diff --git a/executionkit/provider.py b/executionkit/provider.py index 807b4bf..e880527 100644 --- a/executionkit/provider.py +++ b/executionkit/provider.py @@ -165,6 +165,15 @@ class Provider: default_temperature: float = 0.7 default_max_tokens: int = 4096 timeout: float = 120.0 + # supports_tools is Literal[True] for this concrete HTTP client because + # it always speaks the OpenAI tool-calling wire format. + # WARNING (F-04): If you build a *wrapper* around Provider, do NOT copy + # this attribute verbatim — delegate instead: + # @property + # def supports_tools(self) -> bool: return self._inner.supports_tools + # Hardcoding True in a wrapper causes isinstance(wrapper, ToolCallingProvider) + # to return True even when the inner provider cannot handle tools. + # Ref: PEP 544 runtime_checkable only checks presence, not value. supports_tools: Literal[True] = field(default=True, init=False) # Derived state — excluded from repr/eq/hash; initialized only in __post_init__ _client: Any = field( From 95772769d777ebc0c553176b32190f369cfcd943 Mon Sep 17 00:00:00 2001 From: tafreeman Date: Sat, 11 Apr 2026 17:28:58 -0500 Subject: [PATCH 4/6] test: add regression tests for _classify_http_error, _check_budget, supports_tools delegation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _classify_http_error (F-02): 8 tests covering 429→RateLimitError, {401,403,404}→PermanentError, 500/503→ProviderError, exception chaining via __cause__, and retry_after value propagation. _check_budget (F-05): 5 tests covering unlimited-sentinel (0), call limit, input token limit, sentinel (-1) forwarded from pipe, and error metadata (cost + budget attached to BudgetExhaustedError). _TrackedProvider.supports_tools delegation (F-04): 2 tests — delegates True from MockProvider (which has supports_tools=True) and delegates False from a minimal LLMProvider that has no supports_tools attribute. Co-Authored-By: Claude Sonnet 4.6 --- tests/test_patterns.py | 143 +++++++++++++++++++++++++++++++++++++++++ tests/test_provider.py | 66 +++++++++++++++++++ 2 files changed, 209 insertions(+) diff --git a/tests/test_patterns.py b/tests/test_patterns.py index dd9d569..508ebe0 100644 --- a/tests/test_patterns.py +++ b/tests/test_patterns.py @@ -1568,3 +1568,146 @@ async def leaky_execute(query: str) -> str: assert "hunter2" not in observation assert "ValueError" in observation + + +# --------------------------------------------------------------------------- +# _check_budget regression tests (F-05) +# Ref: field-loop pattern from CPython dataclasses.asdict() eliminates +# per-field if-chain repetition. +# --------------------------------------------------------------------------- + + +class TestCheckBudget: + """_check_budget raises BudgetExhaustedError on the first exceeded field.""" + + def test_no_error_when_all_limits_zero(self) -> None: + """0 is the unlimited sentinel — should never raise.""" + from executionkit.patterns.base import _check_budget + from executionkit.types import TokenUsage + + _check_budget( + TokenUsage(llm_calls=0, input_tokens=0, output_tokens=0), + TokenUsage(llm_calls=999, input_tokens=999, output_tokens=999), + ("llm_calls", "input_tokens", "output_tokens"), + sentinel_suffix="(pipe)", + exceeded_suffix="before dispatch", + ) + + def test_raises_on_call_limit_hit(self) -> None: + from executionkit.errors import BudgetExhaustedError + from executionkit.patterns.base import _check_budget + from executionkit.types import TokenUsage + + with pytest.raises(BudgetExhaustedError) as exc_info: + _check_budget( + TokenUsage(llm_calls=3, input_tokens=0, output_tokens=0), + TokenUsage(llm_calls=3, input_tokens=100, output_tokens=100), + ("llm_calls", "input_tokens", "output_tokens"), + sentinel_suffix="(pipe)", + exceeded_suffix="before dispatch", + ) + assert "LLM call" in str(exc_info.value) + assert "before dispatch" in str(exc_info.value) + + def test_raises_on_sentinel_minus_one(self) -> None: + from executionkit.errors import BudgetExhaustedError + from executionkit.patterns.base import BUDGET_EXHAUSTED_SENTINEL, _check_budget + from executionkit.types import TokenUsage + + with pytest.raises(BudgetExhaustedError) as exc_info: + _check_budget( + TokenUsage( + llm_calls=BUDGET_EXHAUSTED_SENTINEL, + input_tokens=0, + output_tokens=0, + ), + TokenUsage(llm_calls=0, input_tokens=0, output_tokens=0), + ("llm_calls",), + sentinel_suffix="before retry (forwarded from pipe)", + exceeded_suffix="before retry dispatch", + ) + assert "forwarded from pipe" in str(exc_info.value) + + def test_raises_on_input_token_limit(self) -> None: + from executionkit.errors import BudgetExhaustedError + from executionkit.patterns.base import _check_budget + from executionkit.types import TokenUsage + + # llm_calls=0 means unlimited; only input_tokens limit is set. + # current input_tokens (500) exceeds budget (100) → Input token error. + with pytest.raises(BudgetExhaustedError) as exc_info: + _check_budget( + TokenUsage(llm_calls=0, input_tokens=100, output_tokens=0), + TokenUsage(llm_calls=10, input_tokens=500, output_tokens=500), + ("llm_calls", "input_tokens", "output_tokens"), + sentinel_suffix="(pipe)", + exceeded_suffix="before dispatch", + ) + assert "Input token" in str(exc_info.value) + + def test_error_carries_cost_and_budget_metadata(self) -> None: + from executionkit.errors import BudgetExhaustedError + from executionkit.patterns.base import _check_budget + from executionkit.types import TokenUsage + + budget = TokenUsage(llm_calls=1, input_tokens=0, output_tokens=0) + current = TokenUsage(llm_calls=1, input_tokens=0, output_tokens=0) + with pytest.raises(BudgetExhaustedError) as exc_info: + _check_budget( + budget, + current, + ("llm_calls",), + sentinel_suffix="(pipe)", + exceeded_suffix="before dispatch", + ) + assert exc_info.value.cost == current + assert exc_info.value.metadata["budget"] == budget + + +# --------------------------------------------------------------------------- +# _TrackedProvider.supports_tools delegation tests (F-04) +# Ref: @runtime_checkable only checks presence, not value — a wrapper must +# delegate the capability flag to the inner provider. +# PEP 544: https://peps.python.org/pep-0544/ +# --------------------------------------------------------------------------- + + +class TestTrackedProviderSupportsDelegation: + """_TrackedProvider.supports_tools delegates to the wrapped provider.""" + + def test_delegates_true_from_tool_capable_provider(self) -> None: + from executionkit._mock import MockProvider + from executionkit.cost import CostTracker + from executionkit.patterns.base import _TrackedProvider + + inner = MockProvider(responses=["ok"]) + # MockProvider has supports_tools = True + tp = _TrackedProvider( + inner, + CostTracker(), + {}, + budget=None, + retry=None, + context="test", + ) + assert tp.supports_tools is True + + def test_delegates_false_from_non_tool_provider(self) -> None: + """A plain LLMProvider without supports_tools must yield False.""" + from executionkit.cost import CostTracker + from executionkit.patterns.base import _TrackedProvider + from executionkit.provider import LLMResponse + + class MinimalProvider: + async def complete(self, messages, **kwargs): # type: ignore[no-untyped-def] + return LLMResponse(content="ok") + + tp = _TrackedProvider( + MinimalProvider(), # type: ignore[arg-type] + CostTracker(), + {}, + budget=None, + retry=None, + context="test", + ) + assert tp.supports_tools is False diff --git a/tests/test_provider.py b/tests/test_provider.py index 7f69873..f32a5db 100644 --- a/tests/test_provider.py +++ b/tests/test_provider.py @@ -618,3 +618,69 @@ def test_provider_client_is_set_post_init(self) -> None: """_use_httpx is set to a bool by __post_init__ regardless of httpx.""" provider = Provider("https://api.openai.com/v1", model="gpt-4o-mini") assert isinstance(provider._use_httpx, bool) + + +# --------------------------------------------------------------------------- +# _classify_http_error regression tests (F-02) +# Ref: extracted to eliminate duplication between urllib and httpx backends. +# Anthropic SDK uses same pattern in _make_status_error(). +# --------------------------------------------------------------------------- + + +class TestClassifyHttpError: + """_classify_http_error maps HTTP status codes to the correct exception.""" + + def test_429_raises_rate_limit_error(self) -> None: + from executionkit.provider import _classify_http_error + + cause = Exception("original") + with pytest.raises(RateLimitError) as exc_info: + _classify_http_error(429, {}, 5.0, cause=cause) + assert exc_info.value.retry_after == 5.0 + assert exc_info.value.__cause__ is cause + + def test_429_default_retry_after_is_propagated(self) -> None: + from executionkit.provider import _classify_http_error + + with pytest.raises(RateLimitError) as exc_info: + _classify_http_error(429, {}, 2.5, cause=Exception()) + assert exc_info.value.retry_after == 2.5 + + def test_401_raises_permanent_error(self) -> None: + from executionkit.provider import _classify_http_error + + with pytest.raises(PermanentError): + _classify_http_error(401, {}, 1.0, cause=Exception()) + + def test_403_raises_permanent_error(self) -> None: + from executionkit.provider import _classify_http_error + + with pytest.raises(PermanentError): + _classify_http_error(403, {}, 1.0, cause=Exception()) + + def test_404_raises_permanent_error(self) -> None: + from executionkit.provider import _classify_http_error + + with pytest.raises(PermanentError): + _classify_http_error(404, {}, 1.0, cause=Exception()) + + def test_500_raises_provider_error(self) -> None: + from executionkit.provider import _classify_http_error + + with pytest.raises(ProviderError): + _classify_http_error(500, {}, 1.0, cause=Exception()) + + def test_503_raises_provider_error(self) -> None: + from executionkit.provider import _classify_http_error + + with pytest.raises(ProviderError): + _classify_http_error(503, {}, 1.0, cause=Exception()) + + def test_exception_is_chained_via_cause(self) -> None: + """raise ... from cause must set __cause__, not just __context__.""" + from executionkit.provider import _classify_http_error + + original = ValueError("root cause") + with pytest.raises(ProviderError) as exc_info: + _classify_http_error(500, {}, 1.0, cause=original) + assert exc_info.value.__cause__ is original From 9ae790c81c3717f651a307831c07271bc6ef6439 Mon Sep 17 00:00:00 2001 From: tafreeman Date: Sat, 11 Apr 2026 17:44:02 -0500 Subject: [PATCH 5/6] docs: update architecture, API reference, and C4 diagrams post-refactor Synchronize all documentation with the F-02/F-04/F-05/F-06/F-08 changes: CLAUDE.md: - Add errors.py to module responsibilities table - Update provider.py and patterns/base.py descriptions - Add _classify_http_error and _check_budget to design invariants docs/architecture.md: - Add errors.py to module map and dependency graph - Document _classify_http_error as single HTTP error classification point - Update Error Handling Architecture section docs/api-reference.md: - Add _check_budget() and _classify_http_error() internal helper entries - Add module-location note for error hierarchy (errors.py) docs/c4/ (5 files): - c4-code-src-executionkit.md: Add errors.py module, update Mermaid diagram - c4-code-src-executionkit-patterns.md: Add _check_budget, update _TrackedProvider - c4-component.md: Update Provider Layer to include errors.py - c4-component-provider-layer.md: Add errors.py, _classify_http_error, update diagram - c4-component-reasoning-patterns.md: Add _check_budget, supports_tools delegation Co-Authored-By: Claude Sonnet 4.6 --- CLAUDE.md | 9 +- docs/api-reference.md | 43 ++++++++ docs/architecture.md | 28 ++++- docs/c4/c4-code-src-executionkit-patterns.md | 29 ++++- docs/c4/c4-code-src-executionkit.md | 79 ++++++++++---- docs/c4/c4-component-provider-layer.md | 105 +++++++++++-------- docs/c4/c4-component-reasoning-patterns.md | 8 +- docs/c4/c4-component.md | 4 +- 8 files changed, 224 insertions(+), 81 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 311cb0c..78a0e37 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -37,10 +37,11 @@ ExecutionKit is a minimal library for LLM reasoning patterns — it fills the ga | Module | Role | |--------|------| -| `provider.py` | `LLMProvider` protocol, `Provider` HTTP client, `LLMResponse`, 9-class error hierarchy | +| `errors.py` | 9-class exception hierarchy (`ExecutionKitError` → `LLMError`, `PatternError` subtrees); extracted from `provider.py` (F-06) | +| `provider.py` | `LLMProvider` protocol, `Provider` HTTP client, `LLMResponse`; re-exports error classes from `errors.py` for backwards compatibility; `_classify_http_error()` is the single HTTP status→exception mapping point shared by both backends (F-02) | | `types.py` | Frozen value types: `TokenUsage`, `PatternResult[T]`, `Tool`, `VotingStrategy`, `Evaluator` | | `cost.py` | `CostTracker` — mutable accumulator with two-phase accounting (`reserve_call` + `record_without_call`) | -| `patterns/base.py` | `checked_complete()` — shared budget guard + retry entry point for all patterns | +| `patterns/base.py` | `checked_complete()` — shared budget guard + retry entry point; `_check_budget()` helper uses `getattr()` field loop replacing per-field if-chains (F-05/F-08); `_TrackedProvider.supports_tools` delegates to wrapped provider via `getattr` instead of hardcoding `Literal[True]` (F-04) | | `patterns/consensus.py` | Parallel sampling, majority/unanimous voting, agreement metadata | | `patterns/refine_loop.py` | Iterative improvement with `ConvergenceDetector`; default evaluator uses XML sandboxing | | `patterns/react_loop.py` | Think-act-observe loop; validates tool args against JSON Schema; caps context via `max_history_messages` | @@ -55,7 +56,9 @@ ExecutionKit is a minimal library for LLM reasoning patterns — it fills the ga **Two-phase cost accounting** — `reserve_call()` pre-increments the call counter before `await` (TOCTOU-safe for concurrent patterns); `record_without_call(response)` adds token counts after success. -**Budget guards** — `checked_complete()` in `patterns/base.py` checks token/call budget before every LLM call and raises `BudgetExhaustedError` (with accumulated cost snapshot) if exceeded. +**Budget guards** — `checked_complete()` in `patterns/base.py` checks token/call budget before every LLM call and raises `BudgetExhaustedError` (with accumulated cost snapshot) if exceeded. The internal `_check_budget()` helper iterates over field names using `getattr()` rather than repeating an if-block per field (F-05/F-08). + +**Centralised HTTP error mapping** — `_classify_http_error()` in `provider.py` is the single function that converts HTTP status codes to the appropriate error subclass. Both the `_post_httpx` and `_post_urllib` backends call it, eliminating the duplicated mapping logic that previously existed in each (F-02). **Structural typing** — `LLMProvider` and `ToolCallingProvider` are `@runtime_checkable` protocols, not base classes. Any object matching the interface works. diff --git a/docs/api-reference.md b/docs/api-reference.md index e63abc8..8ca1155 100644 --- a/docs/api-reference.md +++ b/docs/api-reference.md @@ -1089,10 +1089,53 @@ Validate that an evaluator score is in [0.0, 1.0] and not NaN. --- +### `_check_budget()` (internal) + +```python +def _check_budget( + budget: TokenUsage, + current: TokenUsage, + fields: tuple[str, ...], + *, + sentinel_suffix: str, + exceeded_suffix: str, +) -> None +``` + +Internal helper used by `checked_complete()` (F-05/F-08). Iterates over the named `TokenUsage` fields using `getattr()` and raises `BudgetExhaustedError` on the first field that is either sentinel-exhausted (value `-1`, set by `pipe()` propagation) or over its limit. This replaces the previous per-field if-block repetition and follows the same pattern as CPython's `dataclasses.asdict()`. + +**Location:** `executionkit/patterns/base.py` + +**Raises:** `BudgetExhaustedError` on the first exhausted field. + +--- + +### `_classify_http_error()` (internal) + +```python +def _classify_http_error( + status: int, + raw: dict[str, Any], + retry_after: float, + *, + cause: BaseException, +) -> NoReturn +``` + +Internal helper in `provider.py` (F-02). Centralises the HTTP status code → exception mapping that is shared by both the `_post_httpx` and `_post_urllib` backends. Raises the correct typed exception — `RateLimitError` for HTTP 429, `PermanentError` for 401/403/404, `ProviderError` for all other non-2xx codes — and chains `cause` as the original exception. Both HTTP backends call this single function rather than duplicating the mapping logic. + +**Location:** `executionkit/provider.py` + +**Raises:** `RateLimitError`, `PermanentError`, or `ProviderError` (always raises; return type is `NoReturn`). + +--- + ## Error Hierarchy All exceptions carry `cost: TokenUsage` and `metadata: dict[str, Any]` attributes set at raise time. +> **Module location (F-06):** The full 9-class hierarchy is defined in `executionkit/errors.py`. `provider.py` re-exports every class under the same name so that `from executionkit.provider import XError` imports remain valid. + ``` ExecutionKitError ├── LLMError — provider communication errors diff --git a/docs/architecture.md b/docs/architecture.md index 5caa002..655d0ad 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -37,15 +37,21 @@ shape every design decision: executionkit/ ├── __init__.py — public API surface; sync wrappers ├── types.py — frozen value types: PatternResult, TokenUsage, Tool, VotingStrategy, Evaluator +├── errors.py — 9-class exception hierarchy (F-06: extracted from provider.py) ├── provider.py — LLMProvider protocol, ToolCallingProvider protocol, -│ Provider concrete class, LLMResponse, ToolCall, -│ and the 9-class error hierarchy +│ Provider concrete class, LLMResponse, ToolCall; +│ re-exports error classes from errors.py for backwards compatibility; +│ _classify_http_error() is the single HTTP status→exception mapping +│ point for both urllib and httpx backends (F-02) ├── cost.py — CostTracker mutable accumulator ├── compose.py — pipe() composition helper, PatternStep protocol ├── kit.py — Kit session facade (provider + cumulative usage) ├── _mock.py — MockProvider test double (satisfies both protocols) ├── patterns/ -│ ├── base.py — checked_complete(), validate_score(), _TrackedProvider +│ ├── base.py — checked_complete(), validate_score(), _TrackedProvider; +│ │ _check_budget() uses getattr() field loop replacing per-field +│ │ if-chains (F-05/F-08); _TrackedProvider.supports_tools delegates +│ │ to wrapped provider via getattr (F-04) │ ├── consensus.py — parallel majority/unanimous voting │ ├── refine_loop.py — iterative score-guided refinement │ └── react_loop.py — tool-calling think-act-observe loop @@ -66,7 +72,8 @@ patterns/base ──► cost, engine/retry, provider, types patterns/consensus ──► cost, engine/parallel, engine/retry, patterns/base, provider, types patterns/refine_loop ──► cost, engine/convergence, engine/retry, patterns/base, provider, types patterns/react_loop ──► cost, engine/retry, patterns/base, provider, types -provider ──► types +provider ──► types, errors (re-exports all 9 error classes from errors.py) +errors ──► types cost ──► types engine/* ──► provider (retry only) ``` @@ -172,8 +179,13 @@ directly. Its snapshot is emitted as an immutable `TokenUsage` via `to_usage()`. ## Error Handling Architecture +The full 9-class exception hierarchy lives in `executionkit/errors.py` (F-06). +`provider.py` re-exports all nine classes under the same names so that existing +`from executionkit.provider import XError` imports continue to work without +modification (PEP 387 backwards compatibility). + ``` -ExecutionKitError +ExecutionKitError ← executionkit/errors.py ├── LLMError ← provider communication failures │ ├── RateLimitError ← HTTP 429; carries retry_after float │ ├── PermanentError ← HTTP 401/403/404; do not retry @@ -188,6 +200,12 @@ All errors carry `cost: TokenUsage` so callers can see what was spent before the failure. `pipe()` augments errors with the cumulative cross-step cost before re-raising. +**HTTP error classification:** `_classify_http_error()` in `provider.py` is the +single function responsible for mapping HTTP status codes to the correct error +subclass. Both the `_post_httpx` and `_post_urllib` backends call it, eliminating +duplicated mapping logic (F-02). This mirrors the pattern used by the Anthropic +SDK's `_make_status_error()`. + **Retry boundary:** `with_retry()` in `engine/retry.py` only retries `RateLimitError` and `ProviderError`. `PermanentError` propagates immediately. `asyncio.CancelledError` is always re-raised without retry. diff --git a/docs/c4/c4-code-src-executionkit-patterns.md b/docs/c4/c4-code-src-executionkit-patterns.md index 8102785..97e6447 100644 --- a/docs/c4/c4-code-src-executionkit-patterns.md +++ b/docs/c4/c4-code-src-executionkit-patterns.md @@ -22,9 +22,9 @@ - **Raises**: `ValueError` if score is NaN or outside [0.0, 1.0] range #### `checked_complete(provider: LLMProvider, messages: Sequence[dict[str, Any]], tracker: CostTracker, budget: TokenUsage | None, retry: RetryConfig | None, **kwargs: Any) -> LLMResponse` -- **Description**: Makes a budget-aware LLM API call with retry logic. Checks token and LLM call budgets before dispatching and records usage in the cost tracker. +- **Description**: Makes a budget-aware LLM API call with retry logic. Checks token and LLM call budgets before dispatching (via `_check_budget`) and records usage in the cost tracker. - **Location**: `base.py:24-55` -- **Dependencies**: `LLMProvider`, `CostTracker`, `BudgetExhaustedError`, `with_retry`, `DEFAULT_RETRY`, `TokenUsage`, `RetryConfig`, `LLMResponse` +- **Dependencies**: `LLMProvider`, `CostTracker`, `BudgetExhaustedError`, `with_retry`, `DEFAULT_RETRY`, `TokenUsage`, `RetryConfig`, `LLMResponse`, `_check_budget`, `_BUDGET_FIELD_LABELS` - **Parameters**: - `provider: LLMProvider` - The LLM provider to use - `messages: Sequence[dict[str, Any]]` - Messages to send to the LLM @@ -35,6 +35,23 @@ - **Return Type**: `LLMResponse` - Response from the LLM provider - **Raises**: `BudgetExhaustedError` if any budget constraint would be exceeded +#### `_check_budget(tracker: CostTracker, budget: TokenUsage) -> None` +- **Description**: Validates that the current accumulated cost does not exceed any field of the budget constraint. Replaces 8 per-field `if`-blocks with a single field loop using `getattr()` over `_BUDGET_FIELD_LABELS`. Raises `BudgetExhaustedError` with a descriptive message naming the exceeded field if any constraint is violated. +- **Location**: `base.py` +- **Dependencies**: `CostTracker`, `TokenUsage`, `BudgetExhaustedError`, `_BUDGET_FIELD_LABELS` +- **Parameters**: + - `tracker: CostTracker` - Current accumulated cost tracker + - `budget: TokenUsage` - Maximum allowed token/call counts +- **Return Type**: `None` +- **Raises**: `BudgetExhaustedError` naming the exceeded field (e.g., "input_tokens", "llm_calls") + +#### `_BUDGET_FIELD_LABELS` +- **Description**: Module-level dict mapping `TokenUsage` field names to human-readable label strings used in `BudgetExhaustedError` messages. Drives the field-loop in `_check_budget`, making it easy to add new budget dimensions without modifying control flow. +- **Location**: `base.py` +- **Type**: `dict[str, str]` +- **Example entries**: `{"input_tokens": "input tokens", "output_tokens": "output tokens", "llm_calls": "LLM calls"}` +- **Dependencies**: None + #### `_note_truncation(response: LLMResponse, metadata: dict[str, Any], context: str) -> None` - **Description**: Logs a warning and increments truncation counter in metadata if the LLM response was truncated (finish_reason indicates truncation). - **Location**: `base.py:58-66` @@ -185,7 +202,8 @@ - `_budget: TokenUsage | None` - Optional token budget constraints - `_retry: RetryConfig | None` - Retry configuration - `_context: str` - Context string for error messages - - `supports_tools: bool = True` - Class attribute indicating tool support capability +- **Properties**: + - `supports_tools: bool` - Delegates to the wrapped provider's `supports_tools` attribute rather than hardcoding `Literal[True]`; this allows `_TrackedProvider` to accurately reflect the capabilities of the underlying provider at runtime - **Methods**: - `__init__(provider: LLMProvider, tracker: CostTracker, metadata: dict[str, Any], *, budget: TokenUsage | None, retry: RetryConfig | None, context: str) -> None` - Initializes the wrapper with dependencies - `complete(messages: Sequence[dict[str, Any]], *, temperature: float | None = None, max_tokens: int | None = None, tools: Sequence[dict[str, Any]] | None = None, **kwargs: Any) -> LLMResponse` - Wraps provider.complete() with budget and truncation checks @@ -228,6 +246,7 @@ None - executionkit has zero external runtime dependencies as specified in `pypr ### Standard Library Dependencies - `asyncio` - For async/await support and task management (react_loop) +- `logging` - Module-level import in `react_loop.py` for structured diagnostic logging - `collections.Counter` - For vote counting in consensus - `json` - For serializing tool arguments (react_loop) - `math` - For NaN checking in score validation @@ -270,7 +289,7 @@ classDiagram class TrackedProvider { <> - +supports_tools: bool + +supports_tools: bool (property, delegates to _provider) -_provider: LLMProvider -_tracker: CostTracker -_metadata: dict @@ -285,6 +304,8 @@ classDiagram <> +validate_score(score) float +checked_complete(provider, messages, ...) LLMResponse + -_check_budget(tracker, budget) None + -_BUDGET_FIELD_LABELS dict -_note_truncation(response, metadata, context) void -_TrackedProvider TrackedProvider } diff --git a/docs/c4/c4-code-src-executionkit.md b/docs/c4/c4-code-src-executionkit.md index 75e2964..8eda740 100644 --- a/docs/c4/c4-code-src-executionkit.md +++ b/docs/c4/c4-code-src-executionkit.md @@ -98,7 +98,26 @@ - `async complete(messages: Sequence[dict[str, Any]], *, temperature: float | None = None, max_tokens: int | None = None, tools: Sequence[dict[str, Any]] | None = None, **kwargs: Any) -> LLMResponse` - Sends request to provider and parses response - `async _post(endpoint: str, payload: dict[str, Any]) -> dict[str, Any]` - Low-level HTTP POST with error handling - `_parse_response(data: dict[str, Any]) -> LLMResponse` - Parses provider response into LLMResponse -- **Dependencies**: `LLMResponse`, `ToolCall`, `RateLimitError`, `PermanentError`, `ProviderError` +- **Dependencies**: `LLMResponse`, `ToolCall`, `RateLimitError`, `PermanentError`, `ProviderError` (all exception types now imported from `errors.py`; also uses `_classify_http_error` internally to map HTTP status codes to exceptions) + +### Module: `errors.py` + +- **Location**: `executionkit/errors.py` +- **Purpose**: Exception hierarchy for all ExecutionKit errors, extracted from `provider.py` to give errors a dedicated module with a single responsibility +- **Exports**: + - `ExecutionKitError` — base exception carrying `cost: TokenUsage` and `metadata: dict` + - `LLMError` — base for provider/transport failures + - `RateLimitError` — HTTP 429; includes `retry_after: float | None` + - `PermanentError` — non-retryable errors (auth failure, 404) + - `ProviderError` — retryable errors (5xx, network timeout) + - `PatternError` — base for pattern-level failures + - `BudgetExhaustedError` — token/call budget exceeded before next dispatch + - `ConsensusFailedError` — voting strategy could not be satisfied + - `MaxIterationsError` — iterative pattern hit iteration limit without converging +- **Dependencies**: `types.py` (`TokenUsage`) +- **Backwards compatibility**: `provider.py` re-exports all nine names using the `Name as Name` idiom so existing imports from `executionkit.provider` continue to work without change + +--- #### `CostTracker` - **Type**: Regular class @@ -211,9 +230,11 @@ ### Exception Classes +> **Note on module location**: All nine exception classes were extracted from `provider.py` into `executionkit/errors.py`. `provider.py` re-exports all of them using the `Name as Name` idiom (e.g., `from executionkit.errors import ExecutionKitError as ExecutionKitError`) to preserve backwards compatibility. Import paths through `provider.py` or directly from `errors.py` are both supported. + #### `ExecutionKitError` - **Type**: Exception subclass -- **Location**: `executionkit/provider.py:13-23` +- **Location**: `executionkit/errors.py` (re-exported from `executionkit/provider.py` for backwards compatibility) - **Description**: Base exception for all ExecutionKit errors; carries cost and metadata - **Attributes**: - `cost: TokenUsage` - Token cost of failed operation (default: empty TokenUsage) @@ -224,14 +245,14 @@ #### `LLMError` - **Type**: Exception subclass -- **Location**: `executionkit/provider.py:26-27` +- **Location**: `executionkit/errors.py` (re-exported from `executionkit/provider.py` for backwards compatibility) - **Description**: Base class for provider and transport failures (network, protocol, auth) - **Parent**: `ExecutionKitError` - **Dependencies**: `ExecutionKitError` #### `RateLimitError` - **Type**: Exception subclass -- **Location**: `executionkit/provider.py:30-40` +- **Location**: `executionkit/errors.py` (re-exported from `executionkit/provider.py` for backwards compatibility) - **Description**: Raised for HTTP 429 rate limit responses; includes retry timing info - **Attributes**: - `retry_after: float | None` - Seconds to wait before retry @@ -242,42 +263,42 @@ #### `PermanentError` - **Type**: Exception subclass -- **Location**: `executionkit/provider.py:43-44` +- **Location**: `executionkit/errors.py` (re-exported from `executionkit/provider.py` for backwards compatibility) - **Description**: Non-retryable provider error (authentication failure, 404, etc.) - **Parent**: `LLMError` - **Dependencies**: `LLMError` #### `ProviderError` - **Type**: Exception subclass -- **Location**: `executionkit/provider.py:47-48` +- **Location**: `executionkit/errors.py` (re-exported from `executionkit/provider.py` for backwards compatibility) - **Description**: Retryable provider or transport error (5xx, network timeout, etc.) - **Parent**: `LLMError` - **Dependencies**: `LLMError` #### `PatternError` - **Type**: Exception subclass -- **Location**: `executionkit/provider.py:51-52` +- **Location**: `executionkit/errors.py` (re-exported from `executionkit/provider.py` for backwards compatibility) - **Description**: Base class for pattern-level execution failures (e.g., convergence issues) - **Parent**: `ExecutionKitError` - **Dependencies**: `ExecutionKitError` #### `BudgetExhaustedError` - **Type**: Exception subclass -- **Location**: `executionkit/provider.py:55-56` +- **Location**: `executionkit/errors.py` (re-exported from `executionkit/provider.py` for backwards compatibility) - **Description**: Raised when remaining token budget is insufficient for next dispatch - **Parent**: `PatternError` - **Dependencies**: `PatternError` #### `ConsensusFailedError` - **Type**: Exception subclass -- **Location**: `executionkit/provider.py:59-60` +- **Location**: `executionkit/errors.py` (re-exported from `executionkit/provider.py` for backwards compatibility) - **Description**: Raised when consensus cannot be established among LLM outputs - **Parent**: `PatternError` - **Dependencies**: `PatternError` #### `MaxIterationsError` - **Type**: Exception subclass -- **Location**: `executionkit/provider.py:63-64` +- **Location**: `executionkit/errors.py` (re-exported from `executionkit/provider.py` for backwards compatibility) - **Description**: Raised when iterative pattern (e.g., refine_loop) fails to converge within iteration limit - **Parent**: `PatternError` - **Dependencies**: `PatternError` @@ -410,6 +431,16 @@ #### Helper Functions in `provider.py` (private utilities) +##### `_classify_http_error(status_code: int, payload: dict[str, Any], headers: Any) -> ExecutionKitError` +- **Location**: `executionkit/provider.py` +- **Description**: Centralizes HTTP status code → exception mapping; converts an HTTP error response into the appropriate typed exception (`RateLimitError` for 429, `PermanentError` for 4xx, `ProviderError` for 5xx). Previously this logic was duplicated inside both `_post_httpx` and `_post_urllib`; extracting it eliminates the duplication and ensures consistent error semantics regardless of which HTTP backend is used. +- **Parameters**: + - `status_code: int` - HTTP response status code + - `payload: dict[str, Any]` - Parsed response body + - `headers: Any` - Response headers (used to extract `Retry-After` for 429 responses) +- **Returns**: A typed `ExecutionKitError` subclass instance (never raises) +- **Dependencies**: `RateLimitError`, `PermanentError`, `ProviderError`, `_format_http_error`, `_parse_retry_after` + ##### `_first_choice(data: dict[str, Any]) -> dict[str, Any]` - **Location**: `executionkit/provider.py:195-202` - **Description**: Extracts first choice from provider response dict @@ -448,7 +479,8 @@ ### Internal Dependencies - **From other executionkit modules**: - - `executionkit.provider`: `LLMProvider`, `ToolCallingProvider`, `ExecutionKitError`, `LLMError`, `LLMResponse`, `RateLimitError`, `PermanentError`, `ProviderError`, `PatternError`, `BudgetExhaustedError`, `ConsensusFailedError`, `MaxIterationsError`, `ToolCall`, `Provider` + - `executionkit.errors`: `ExecutionKitError`, `LLMError`, `RateLimitError`, `PermanentError`, `ProviderError`, `PatternError`, `BudgetExhaustedError`, `ConsensusFailedError`, `MaxIterationsError` (canonical source; `provider.py` re-exports all of these via `Name as Name`) + - `executionkit.provider`: `LLMProvider`, `ToolCallingProvider`, `LLMResponse`, `ToolCall`, `Provider` (exception names also importable here for backwards compatibility) - `executionkit.types`: `TokenUsage`, `PatternResult`, `Tool`, `VotingStrategy`, `Evaluator` - `executionkit.compose`: `PatternStep`, `pipe` - `executionkit.cost`: `CostTracker` @@ -496,13 +528,8 @@ classDiagram } namespace ProviderAbstraction { - class provider { + class errors { <> - +LLMProvider Protocol - +ToolCallingProvider Protocol - +LLMResponse - +ToolCall - +Provider Class +ExecutionKitError Exception +LLMError Exception +RateLimitError Exception @@ -513,6 +540,16 @@ classDiagram +ConsensusFailedError Exception +MaxIterationsError Exception } + class provider { + <> + +LLMProvider Protocol + +ToolCallingProvider Protocol + +LLMResponse + +ToolCall + +Provider Class + +_classify_http_error Function + ~re-exports errors via Name as Name + } } namespace CostManagement { @@ -561,14 +598,18 @@ classDiagram } } - %% Provider depends on types + %% errors depends on types (TokenUsage used by ExecutionKitError) + errors --> types: imports TokenUsage + + %% Provider depends on types and errors provider --> types: imports TokenUsage + provider --> errors: imports exception hierarchy (re-exports via Name as Name) %% Cost depends on types and provider cost --> types: imports TokenUsage cost --> provider: imports LLMResponse - %% Compose depends on types and provider + %% Compose depends on types and provider/errors compose --> types: imports PatternResult, TokenUsage compose --> provider: imports LLMProvider, ExecutionKitError diff --git a/docs/c4/c4-component-provider-layer.md b/docs/c4/c4-component-provider-layer.md index c6760cb..f4a9185 100644 --- a/docs/c4/c4-component-provider-layer.md +++ b/docs/c4/c4-component-provider-layer.md @@ -8,15 +8,17 @@ | **Type** | Component | | **Technology** | Python 3.10+, stdlib `urllib` (HTTP), `typing.Protocol` | | **Purpose** | Defines the contract every LLM backend must fulfil, ships a generic OpenAI-compatible HTTP implementation, and declares the full error hierarchy used across the library | +| **Files** | `provider.py`, `errors.py`, `types.py` | ## Software Features - **LLMProvider protocol** — runtime-checkable duck-type interface for any LLM backend; requires only `async complete()` - **ToolCallingProvider protocol** — narrows `LLMProvider` to backends that expose function/tool calling (`supports_tools: Literal[True]`) - **Provider class** — zero-dependency HTTP client built on `urllib`; handles OpenAI-compatible chat-completions, tool-call parsing, rate-limit detection, and response normalisation +- **`_classify_http_error()` function** — centralises HTTP status code → exception mapping for all HTTP backends (`_post_httpx` and `_post_urllib`); eliminates previously duplicated branching logic and ensures consistent error semantics regardless of transport - **LLMResponse dataclass** — structured, immutable view of one completion: text content, tool calls, finish reason, token usage, and raw provider data - **ToolCall dataclass** — single tool invocation (id, name, parsed arguments) -- **Exception hierarchy** — seven typed exceptions from `ExecutionKitError` root, giving callers precise error semantics for retry decisions, budget accounting, and pattern-level failures +- **Exception hierarchy** (`errors.py`) — nine typed exceptions from `ExecutionKitError` root, extracted into a dedicated module; `provider.py` re-exports all nine via `Name as Name` for backwards compatibility; gives callers precise error semantics for retry decisions, budget accounting, and pattern-level failures ## Code Elements @@ -25,17 +27,18 @@ | `LLMProvider` | Protocol (runtime-checkable) | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:67-77` | | `ToolCallingProvider` | Protocol | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:80-82` | | `Provider` | Dataclass / HTTP client | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:121-192` | +| `_classify_http_error` | Private function | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py` | | `LLMResponse` | Frozen dataclass | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:92-118` | | `ToolCall` | Frozen dataclass | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:85-89` | -| `ExecutionKitError` | Base exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:13-23` | -| `LLMError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:26-27` | -| `RateLimitError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:30-40` | -| `PermanentError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:43-44` | -| `ProviderError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:47-48` | -| `PatternError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:51-52` | -| `BudgetExhaustedError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:55-56` | -| `ConsensusFailedError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:59-60` | -| `MaxIterationsError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:63-64` | +| `ExecutionKitError` | Base exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `errors.py` (re-exported from `provider.py`) | +| `LLMError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `errors.py` (re-exported from `provider.py`) | +| `RateLimitError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `errors.py` (re-exported from `provider.py`) | +| `PermanentError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `errors.py` (re-exported from `provider.py`) | +| `ProviderError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `errors.py` (re-exported from `provider.py`) | +| `PatternError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `errors.py` (re-exported from `provider.py`) | +| `BudgetExhaustedError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `errors.py` (re-exported from `provider.py`) | +| `ConsensusFailedError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `errors.py` (re-exported from `provider.py`) | +| `MaxIterationsError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `errors.py` (re-exported from `provider.py`) | | `TokenUsage` | Frozen dataclass | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `types.py:14-25` | | `VotingStrategy` | StrEnum | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `types.py:58-60` | | `Tool` | Frozen dataclass | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `types.py:39-55` | @@ -148,7 +151,40 @@ class Tool: title: C4 Component — Provider Layer --- classDiagram - namespace ProviderLayer { + namespace ErrorsModule { + class ExecutionKitError { + <> + +cost: TokenUsage + +metadata: dict + } + class LLMError { + <> + } + class RateLimitError { + <> + +retry_after: float | None + } + class PermanentError { + <> + } + class ProviderError { + <> + } + class PatternError { + <> + } + class BudgetExhaustedError { + <> + } + class ConsensusFailedError { + <> + } + class MaxIterationsError { + <> + } + } + + namespace ProviderModule { class LLMProvider { <> +complete(messages, ...) LLMResponse @@ -185,6 +221,9 @@ classDiagram +name: str +arguments: dict } + } + + namespace TypesModule { class PatternResult~T~ { <> +value: T @@ -213,42 +252,9 @@ classDiagram MAJORITY UNANIMOUS } - class ExecutionKitError { - <> - +cost: TokenUsage - +metadata: dict - } - class LLMError { - <> - } - class RateLimitError { - <> - +retry_after: float | None - } - class PermanentError { - <> - } - class ProviderError { - <> - } - class PatternError { - <> - } - class BudgetExhaustedError { - <> - } - class ConsensusFailedError { - <> - } - class MaxIterationsError { - <> - } } - ToolCallingProvider --|> LLMProvider : extends - Provider ..|> ToolCallingProvider : implements - LLMResponse *-- ToolCall : contains - PatternResult *-- TokenUsage : contains + %% errors.py hierarchy ExecutionKitError <|-- LLMError ExecutionKitError <|-- PatternError LLMError <|-- RateLimitError @@ -258,4 +264,13 @@ classDiagram PatternError <|-- ConsensusFailedError PatternError <|-- MaxIterationsError ExecutionKitError *-- TokenUsage : carries cost + + %% provider.py re-exports exceptions from errors.py (Name as Name) + Provider ..|> ToolCallingProvider : implements + ToolCallingProvider --|> LLMProvider : extends + LLMResponse *-- ToolCall : contains + PatternResult *-- TokenUsage : contains + + %% provider.py imports from errors.py + Provider ..> ExecutionKitError : re-exports via Name as Name ``` diff --git a/docs/c4/c4-component-reasoning-patterns.md b/docs/c4/c4-component-reasoning-patterns.md index 620df32..27ff03b 100644 --- a/docs/c4/c4-component-reasoning-patterns.md +++ b/docs/c4/c4-component-reasoning-patterns.md @@ -14,7 +14,7 @@ - **Consensus pattern** (`consensus.py`) — generates `num_samples` parallel LLM completions at high temperature, then applies `MAJORITY` or `UNANIMOUS` voting to select the winner; reports agreement ratio in metadata - **Refine loop pattern** (`refine_loop.py`) — wraps the provider in `_TrackedProvider` for per-call budget enforcement, iterates generate → score → refine until quality target is met, `ConvergenceDetector` provides patience-based early stopping, default evaluator uses an LLM to score on 0–10 scale - **ReAct loop pattern** (`react_loop.py`) — maintains a growing message history, dispatches tool calls with timeout enforcement, truncates observations to fit context window, returns the first response that contains no tool calls as the final answer -- **Base utilities** (`base.py`) — `checked_complete` performs pre-call budget validation then records usage; `_TrackedProvider` wraps any `LLMProvider` with budget+retry+truncation-warning logic; `validate_score` guards against NaN and out-of-range scores +- **Base utilities** (`base.py`) — `checked_complete` performs pre-call budget validation (via `_check_budget`) then records usage; `_check_budget` iterates `_BUDGET_FIELD_LABELS` with `getattr()` to validate all budget fields in a single loop, replacing 8 per-field `if`-blocks; `_TrackedProvider` wraps any `LLMProvider` with budget+retry+truncation-warning logic and exposes `supports_tools` as a property delegating to the wrapped provider rather than a hardcoded `Literal[True]`; `validate_score` guards against NaN and out-of-range scores ## Code Elements @@ -24,7 +24,9 @@ | `refine_loop` | Async function | [c4-code-src-executionkit-patterns.md](c4-code-src-executionkit-patterns.md) → `refine_loop.py:18-95` | | `react_loop` | Async function | [c4-code-src-executionkit-patterns.md](c4-code-src-executionkit-patterns.md) → `react_loop.py:16-88` | | `checked_complete` | Async function | [c4-code-src-executionkit-patterns.md](c4-code-src-executionkit-patterns.md) → `base.py:24-55` | -| `_TrackedProvider` | Class | [c4-code-src-executionkit-patterns.md](c4-code-src-executionkit-patterns.md) → `base.py:69-110` | +| `_check_budget` | Private function | [c4-code-src-executionkit-patterns.md](c4-code-src-executionkit-patterns.md) → `base.py` | +| `_BUDGET_FIELD_LABELS` | Module-level dict | [c4-code-src-executionkit-patterns.md](c4-code-src-executionkit-patterns.md) → `base.py` | +| `_TrackedProvider` | Class (`supports_tools` now a property) | [c4-code-src-executionkit-patterns.md](c4-code-src-executionkit-patterns.md) → `base.py:69-110` | | `validate_score` | Function | [c4-code-src-executionkit-patterns.md](c4-code-src-executionkit-patterns.md) → `base.py:18-21` | | `_default_evaluator` | Private async function | [c4-code-src-executionkit-patterns.md](c4-code-src-executionkit-patterns.md) → `refine_loop.py:98-116` | | `_parse_score` | Private function | [c4-code-src-executionkit-patterns.md](c4-code-src-executionkit-patterns.md) → `refine_loop.py:119-135` | @@ -117,7 +119,7 @@ title: C4 Component — Reasoning Patterns --- flowchart TB subgraph ReasoningPatterns["Reasoning Patterns Component"] - BASE["base.py\nchecked_complete()\n_TrackedProvider\nvalidate_score()"] + BASE["base.py\nchecked_complete()\n_check_budget() / _BUDGET_FIELD_LABELS\n_TrackedProvider (supports_tools: property)\nvalidate_score()"] CONS["consensus.py\nconsensus()"] REFINE["refine_loop.py\nrefine_loop()\n_default_evaluator()\n_parse_score()\n_build_refinement_prompt()"] REACT["react_loop.py\nreact_loop()\n_execute_tool_call()\n_truncate()"] diff --git a/docs/c4/c4-component.md b/docs/c4/c4-component.md index e169ab4..faf4334 100644 --- a/docs/c4/c4-component.md +++ b/docs/c4/c4-component.md @@ -6,7 +6,7 @@ This document is the master index for all C4 Component-level documentation for t | # | Component | Files | Responsibility | |---|-----------|-------|----------------| -| 1 | [Provider Layer](c4-component-provider-layer.md) | `provider.py`, `types.py` | LLM provider protocols, concrete HTTP client, all data types, error hierarchy | +| 1 | [Provider Layer](c4-component-provider-layer.md) | `provider.py`, `errors.py`, `types.py` | LLM provider protocols, concrete HTTP client, all data types, error hierarchy | | 2 | [Execution Engine](c4-component-execution-engine.md) | `engine/retry.py`, `engine/parallel.py`, `engine/convergence.py`, `engine/json_extraction.py` | Retry/backoff, bounded concurrency, convergence detection, JSON extraction | | 3 | [Reasoning Patterns](c4-component-reasoning-patterns.md) | `patterns/consensus.py`, `patterns/refine_loop.py`, `patterns/react_loop.py`, `patterns/base.py` | Three composable LLM reasoning strategies with shared budget/cost base | | 4 | [Composition & Session](c4-component-composition-session.md) | `compose.py`, `kit.py`, `cost.py`, `__init__.py` (sync wrappers) | Pipeline chaining, session defaults, cost tracking, sync convenience API | @@ -27,7 +27,7 @@ flowchart TB direction TB subgraph PL["Provider Layer"] - PLcore["LLMProvider / ToolCallingProvider\nProvider (HTTP client)\nLLMResponse / ToolCall\nPatternResult / TokenUsage / Tool\nVotingStrategy / Evaluator\nException hierarchy"] + PLcore["LLMProvider / ToolCallingProvider\nProvider (HTTP client) / _classify_http_error\nLLMResponse / ToolCall\nPatternResult / TokenUsage / Tool\nVotingStrategy / Evaluator\nerrors.py: Exception hierarchy (9 classes)\nprovider.py re-exports exceptions via Name as Name"] end subgraph EE["Execution Engine"] From e443e33d755e6890b9142f65ae14d2582a4e565a Mon Sep 17 00:00:00 2001 From: "anthropic-code-agent[bot]" <242468646+Claude@users.noreply.github.com> Date: Sat, 11 Apr 2026 22:55:37 +0000 Subject: [PATCH 6/6] docs: fix documentation inconsistencies per code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Corrects 5 documentation issues identified by @copilot-pull-request-reviewer: 1. _classify_http_error signature: Updated from incorrect (status_code, payload, headers) → ExecutionKitError to actual (status: int, raw: dict, retry_after: float, *, cause: BaseException) → NoReturn. Removed non-existent _parse_retry_after dependency. 2. _check_budget signature: Updated from incorrect (tracker: CostTracker, budget: TokenUsage) to actual (budget: TokenUsage, current: TokenUsage, fields: tuple[str, ...], *, sentinel_suffix: str, exceeded_suffix: str). 3. RateLimitError.retry_after type: Changed from float | None to float in both c4-component-provider-layer.md and c4-code-src-executionkit.md to match implementation (defaults to 1.0, never None). 4. isinstance comment in provider.py: Clarified that @runtime_checkable only checks attribute presence, not value. Simplified explanation to avoid implying isinstance reflects tool capability. All changes verified against actual implementations in errors.py, provider.py, and patterns/base.py. Agent-Logs-Url: https://github.com/tafreeman/executionkit/sessions/9d19e401-43ac-4ffc-a415-7a20d7589d23 Co-authored-by: tafreeman <72930272+tafreeman@users.noreply.github.com> --- docs/c4/c4-code-src-executionkit-patterns.md | 13 ++++++++----- docs/c4/c4-code-src-executionkit.md | 17 +++++++++-------- docs/c4/c4-component-provider-layer.md | 2 +- executionkit/provider.py | 7 ++++--- 4 files changed, 22 insertions(+), 17 deletions(-) diff --git a/docs/c4/c4-code-src-executionkit-patterns.md b/docs/c4/c4-code-src-executionkit-patterns.md index 97e6447..c499ae0 100644 --- a/docs/c4/c4-code-src-executionkit-patterns.md +++ b/docs/c4/c4-code-src-executionkit-patterns.md @@ -35,15 +35,18 @@ - **Return Type**: `LLMResponse` - Response from the LLM provider - **Raises**: `BudgetExhaustedError` if any budget constraint would be exceeded -#### `_check_budget(tracker: CostTracker, budget: TokenUsage) -> None` -- **Description**: Validates that the current accumulated cost does not exceed any field of the budget constraint. Replaces 8 per-field `if`-blocks with a single field loop using `getattr()` over `_BUDGET_FIELD_LABELS`. Raises `BudgetExhaustedError` with a descriptive message naming the exceeded field if any constraint is violated. +#### `_check_budget(budget: TokenUsage, current: TokenUsage, fields: tuple[str, ...], *, sentinel_suffix: str, exceeded_suffix: str) -> None` +- **Description**: Validates selected `TokenUsage` fields by comparing the configured `budget` against the current accumulated `TokenUsage`. Iterates over the supplied `fields` and raises `BudgetExhaustedError` with a descriptive message if a field has reached a sentinel condition or would exceed its allowed limit. - **Location**: `base.py` -- **Dependencies**: `CostTracker`, `TokenUsage`, `BudgetExhaustedError`, `_BUDGET_FIELD_LABELS` +- **Dependencies**: `TokenUsage`, `BudgetExhaustedError`, `_BUDGET_FIELD_LABELS` - **Parameters**: - - `tracker: CostTracker` - Current accumulated cost tracker - `budget: TokenUsage` - Maximum allowed token/call counts + - `current: TokenUsage` - Current accumulated token/call usage to validate against the budget + - `fields: tuple[str, ...]` - Names of the `TokenUsage` fields to check + - `sentinel_suffix: str` - Message suffix used when a budget field is already at its sentinel/exhausted value + - `exceeded_suffix: str` - Message suffix used when the current usage would exceed the configured budget - **Return Type**: `None` -- **Raises**: `BudgetExhaustedError` naming the exceeded field (e.g., "input_tokens", "llm_calls") +- **Raises**: `BudgetExhaustedError` naming the field that hit a sentinel condition or exceeded its budget (e.g., "input_tokens", "llm_calls") #### `_BUDGET_FIELD_LABELS` - **Description**: Module-level dict mapping `TokenUsage` field names to human-readable label strings used in `BudgetExhaustedError` messages. Drives the field-loop in `_check_budget`, making it easy to add new budget dimensions without modifying control flow. diff --git a/docs/c4/c4-code-src-executionkit.md b/docs/c4/c4-code-src-executionkit.md index 8eda740..94e8981 100644 --- a/docs/c4/c4-code-src-executionkit.md +++ b/docs/c4/c4-code-src-executionkit.md @@ -107,7 +107,7 @@ - **Exports**: - `ExecutionKitError` — base exception carrying `cost: TokenUsage` and `metadata: dict` - `LLMError` — base for provider/transport failures - - `RateLimitError` — HTTP 429; includes `retry_after: float | None` + - `RateLimitError` — HTTP 429; includes `retry_after: float` - `PermanentError` — non-retryable errors (auth failure, 404) - `ProviderError` — retryable errors (5xx, network timeout) - `PatternError` — base for pattern-level failures @@ -431,15 +431,16 @@ #### Helper Functions in `provider.py` (private utilities) -##### `_classify_http_error(status_code: int, payload: dict[str, Any], headers: Any) -> ExecutionKitError` +##### `_classify_http_error(status: int, raw: dict[str, Any], retry_after: float, *, cause: BaseException) -> NoReturn` - **Location**: `executionkit/provider.py` -- **Description**: Centralizes HTTP status code → exception mapping; converts an HTTP error response into the appropriate typed exception (`RateLimitError` for 429, `PermanentError` for 4xx, `ProviderError` for 5xx). Previously this logic was duplicated inside both `_post_httpx` and `_post_urllib`; extracting it eliminates the duplication and ensures consistent error semantics regardless of which HTTP backend is used. +- **Description**: Centralizes HTTP status code → exception mapping by raising the appropriate typed exception for an HTTP failure (`RateLimitError` for 429, `PermanentError` for other 4xx responses, `ProviderError` for 5xx responses). This helper is used by the HTTP backends so they share identical error classification behavior, and it preserves the original triggering exception via exception chaining when `cause` is provided. - **Parameters**: - - `status_code: int` - HTTP response status code - - `payload: dict[str, Any]` - Parsed response body - - `headers: Any` - Response headers (used to extract `Retry-After` for 429 responses) -- **Returns**: A typed `ExecutionKitError` subclass instance (never raises) -- **Dependencies**: `RateLimitError`, `PermanentError`, `ProviderError`, `_format_http_error`, `_parse_retry_after` + - `status: int` - HTTP response status code + - `raw: dict[str, Any]` - Parsed JSON body from the response (may be empty dict) + - `retry_after: float` - Value of the `Retry-After` header in seconds + - `cause: BaseException` - Original exception to chain as the raised error's cause +- **Raises**: Always raises a typed `ExecutionKitError` subclass; does not return +- **Dependencies**: `RateLimitError`, `PermanentError`, `ProviderError`, `_format_http_error` ##### `_first_choice(data: dict[str, Any]) -> dict[str, Any]` - **Location**: `executionkit/provider.py:195-202` diff --git a/docs/c4/c4-component-provider-layer.md b/docs/c4/c4-component-provider-layer.md index f4a9185..0f28fb8 100644 --- a/docs/c4/c4-component-provider-layer.md +++ b/docs/c4/c4-component-provider-layer.md @@ -162,7 +162,7 @@ classDiagram } class RateLimitError { <> - +retry_after: float | None + +retry_after: float } class PermanentError { <> diff --git a/executionkit/provider.py b/executionkit/provider.py index e880527..953e32d 100644 --- a/executionkit/provider.py +++ b/executionkit/provider.py @@ -171,9 +171,10 @@ class Provider: # this attribute verbatim — delegate instead: # @property # def supports_tools(self) -> bool: return self._inner.supports_tools - # Hardcoding True in a wrapper causes isinstance(wrapper, ToolCallingProvider) - # to return True even when the inner provider cannot handle tools. - # Ref: PEP 544 runtime_checkable only checks presence, not value. + # For @runtime_checkable protocols, isinstance(wrapper, ToolCallingProvider) + # only checks that the required attribute exists, not whether its value is + # True. Delegating keeps the wrapper's reported capability aligned with the + # inner provider's actual tool support. supports_tools: Literal[True] = field(default=True, init=False) # Derived state — excluded from repr/eq/hash; initialized only in __post_init__ _client: Any = field(