From 1cc57b882fdaf0cf4c242537fa7bd6702eea390b Mon Sep 17 00:00:00 2001
From: tafreeman <knowlesjim287@gmail.com>
Date: Sat, 11 Apr 2026 07:43:56 -0500
Subject: [PATCH 1/6] refactor: generalize budget guard loop + move logging
 import (F-05, F-08)

F-05: Replace 8 repeated per-field if-chains in checked_complete() with a
_check_budget() helper that iterates over _BUDGET_FIELD_LABELS using getattr().
Same pattern as CPython's dataclasses.asdict() internals. Adding a new budget
dimension now requires a single entry in _BUDGET_FIELD_LABELS, not 6 new blocks.

F-08: Move `import logging` from inside exception handler to module level,
per PEP 8 and Ruff PLC0415 / Pylint C0415 requirements.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 executionkit/patterns/base.py       | 115 ++++++++++++++++------------
 executionkit/patterns/react_loop.py |   3 +-
 2 files changed, 67 insertions(+), 51 deletions(-)

diff --git a/executionkit/patterns/base.py b/executionkit/patterns/base.py
index c7c90bf..6bce296 100644
--- a/executionkit/patterns/base.py
+++ b/executionkit/patterns/base.py
@@ -16,6 +16,58 @@
 
 BUDGET_EXHAUSTED_SENTINEL = -1
 
+# Maps TokenUsage field names to human-readable labels for error messages.
+# Iterating over this dict with getattr() replaces per-field if-chains —
+# the same pattern CPython's dataclasses.asdict() uses internally.
+# Ref: https://github.com/python/cpython/blob/main/Lib/dataclasses.py
+_BUDGET_FIELD_LABELS: dict[str, str] = {
+    "llm_calls": "LLM call",
+    "input_tokens": "Input token",
+    "output_tokens": "Output token",
+}
+
+
+def _check_budget(
+    budget: TokenUsage,
+    current: TokenUsage,
+    fields: tuple[str, ...],
+    *,
+    sentinel_suffix: str,
+    exceeded_suffix: str,
+) -> None:
+    """Raise :exc:`BudgetExhaustedError` if any tracked field hits its limit.
+
+    Uses ``getattr()`` over named field checks — same pattern as CPython's
+    ``dataclasses.asdict()`` — to avoid repeating the check triplet per field.
+    A value of ``BUDGET_EXHAUSTED_SENTINEL`` (-1) means the field was fully
+    consumed by a prior ``pipe()`` step.
+
+    Args:
+        budget: The budget to check against.
+        current: Current usage snapshot from :class:`CostTracker`.
+        fields: Tuple of :class:`TokenUsage` field names to check.
+        sentinel_suffix: Appended to the error message when sentinel found.
+        exceeded_suffix: Appended to the error message when limit exceeded.
+
+    Raises:
+        BudgetExhaustedError: On the first field that is over budget.
+    """
+    for field_name in fields:
+        label = _BUDGET_FIELD_LABELS[field_name]
+        limit = getattr(budget, field_name)
+        if limit == BUDGET_EXHAUSTED_SENTINEL:
+            raise BudgetExhaustedError(
+                f"{label} budget exhausted {sentinel_suffix}",
+                cost=current,
+                metadata={"budget": budget},
+            )
+        if limit > 0 and getattr(current, field_name) >= limit:
+            raise BudgetExhaustedError(
+                f"{label} budget exhausted {exceeded_suffix}",
+                cost=current,
+                metadata={"budget": budget},
+            )
+
 
 def validate_score(score: float) -> float:
     """Validate that an evaluator score is in [0.0, 1.0] and not NaN.
@@ -66,59 +118,24 @@ async def checked_complete(
     """
     if budget is not None:
         current = tracker.to_usage()
-        # -1 sentinel: field was limited and fully consumed by a prior pipe() step.
-        if budget.llm_calls == BUDGET_EXHAUSTED_SENTINEL:
-            raise BudgetExhaustedError(
-                "LLM call budget exhausted (forwarded from pipe)",
-                cost=current,
-                metadata={"budget": budget},
-            )
-        if budget.llm_calls > 0 and current.llm_calls >= budget.llm_calls:
-            raise BudgetExhaustedError(
-                "LLM call budget exhausted before dispatch",
-                cost=current,
-                metadata={"budget": budget},
-            )
-        if budget.input_tokens == BUDGET_EXHAUSTED_SENTINEL:
-            raise BudgetExhaustedError(
-                "Input token budget exhausted (forwarded from pipe)",
-                cost=current,
-                metadata={"budget": budget},
-            )
-        if budget.input_tokens > 0 and current.input_tokens >= budget.input_tokens:
-            raise BudgetExhaustedError(
-                "Input token budget exhausted before dispatch",
-                cost=current,
-                metadata={"budget": budget},
-            )
-        if budget.output_tokens == BUDGET_EXHAUSTED_SENTINEL:
-            raise BudgetExhaustedError(
-                "Output token budget exhausted (forwarded from pipe)",
-                cost=current,
-                metadata={"budget": budget},
-            )
-        if budget.output_tokens > 0 and current.output_tokens >= budget.output_tokens:
-            raise BudgetExhaustedError(
-                "Output token budget exhausted before dispatch",
-                cost=current,
-                metadata={"budget": budget},
-            )
+        _check_budget(
+            budget,
+            current,
+            tuple(_BUDGET_FIELD_LABELS),
+            sentinel_suffix="(forwarded from pipe)",
+            exceeded_suffix="before dispatch",
+        )
 
     async def _before_attempt(attempt: int) -> None:
         if attempt > 1 and budget is not None:
             current = tracker.to_usage()
-            if budget.llm_calls == BUDGET_EXHAUSTED_SENTINEL:
-                raise BudgetExhaustedError(
-                    "LLM call budget exhausted before retry (forwarded from pipe)",
-                    cost=current,
-                    metadata={"budget": budget},
-                )
-            if budget.llm_calls > 0 and current.llm_calls >= budget.llm_calls:
-                raise BudgetExhaustedError(
-                    "LLM call budget exhausted before retry dispatch",
-                    cost=current,
-                    metadata={"budget": budget},
-                )
+            _check_budget(
+                budget,
+                current,
+                ("llm_calls",),
+                sentinel_suffix="before retry (forwarded from pipe)",
+                exceeded_suffix="before retry dispatch",
+            )
         tracker.reserve_call()
 
     response = await with_retry(
diff --git a/executionkit/patterns/react_loop.py b/executionkit/patterns/react_loop.py
index b11d9b3..429083b 100644
--- a/executionkit/patterns/react_loop.py
+++ b/executionkit/patterns/react_loop.py
@@ -4,6 +4,7 @@
 
 import asyncio
 import json
+import logging
 from itertools import chain
 from types import MappingProxyType
 from typing import TYPE_CHECKING, Any
@@ -351,8 +352,6 @@ async def _execute_tool_call(
     except TimeoutError:
         return f"Tool execution timed out after {timeout}s"
     except Exception as exc:
-        import logging
-
         logging.getLogger(__name__).debug(
             "Tool '%s' raised %s", tc_name, type(exc).__name__, exc_info=True
         )

From 60c55fde6e70661f2545378031ae4121138d07c3 Mon Sep 17 00:00:00 2001
From: tafreeman <knowlesjim287@gmail.com>
Date: Sat, 11 Apr 2026 08:40:41 -0500
Subject: [PATCH 2/6] refactor: extract error hierarchy to errors.py +
 _classify_http_error helper (F-06, F-02)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

F-06: Move the 9-class exception hierarchy from provider.py into errors.py,
following the same split used by Anthropic's own SDK (_exceptions.py pattern).
provider.py re-exports with `Name as Name` for PEP 387 backwards-compat —
`from executionkit.provider import XError` imports continue to work unchanged.

F-02: Extract _classify_http_error(status, raw, retry_after, *, cause) to
eliminate the duplicated HTTP status→exception if-chain that existed in both
_post_httpx() and _post_urllib(). A single canonical function now maps
429→RateLimitError, {401,403,404}→PermanentError, else→ProviderError.
Also closes the pre-existing divergence: urllib had `if exc.headers else 1`
guard that httpx lacked; now both call the same function.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 executionkit/errors.py   |  74 ++++++++++++++++++++
 executionkit/provider.py | 141 ++++++++++++++++-----------------------
 2 files changed, 133 insertions(+), 82 deletions(-)
 create mode 100644 executionkit/errors.py

diff --git a/executionkit/errors.py b/executionkit/errors.py
new file mode 100644
index 0000000..440de5a
--- /dev/null
+++ b/executionkit/errors.py
@@ -0,0 +1,74 @@
+"""ExecutionKit exception hierarchy.
+
+All nine error classes live here so they can be imported without pulling in
+the full HTTP client machinery from ``provider.py``.
+
+Ref: Anthropic SDK uses the same ``_exceptions.py`` split — the parent
+company's own design convention (github.com/anthropics/anthropic-sdk-python).
+PEP 387 backwards-compat: ``from executionkit.provider import XError`` still
+works because ``provider.py`` re-exports from this module.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from executionkit.types import TokenUsage
+
+
+class ExecutionKitError(Exception):
+    """Base exception for all ExecutionKit errors."""
+
+    def __init__(
+        self,
+        message: str,
+        *,
+        cost: TokenUsage | None = None,
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        super().__init__(message)
+        self.cost: TokenUsage = cost if cost is not None else TokenUsage()
+        self.metadata: dict[str, Any] = metadata if metadata is not None else {}
+
+
+class LLMError(ExecutionKitError):
+    """Errors originating from LLM provider communication."""
+
+
+class RateLimitError(LLMError):
+    """Provider returned HTTP 429 — retryable after ``retry_after`` seconds."""
+
+    def __init__(
+        self,
+        message: str,
+        *,
+        retry_after: float = 1.0,
+        cost: TokenUsage | None = None,
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        super().__init__(message, cost=cost, metadata=metadata)
+        self.retry_after: float = retry_after
+
+
+class PermanentError(LLMError):
+    """Non-retryable provider error (e.g. 401 authentication failure)."""
+
+
+class ProviderError(LLMError):
+    """Catch-all retryable provider error for unexpected HTTP failures."""
+
+
+class PatternError(ExecutionKitError):
+    """Errors raised by reasoning pattern logic."""
+
+
+class BudgetExhaustedError(PatternError):
+    """Token or call budget exceeded."""
+
+
+class ConsensusFailedError(PatternError):
+    """Consensus pattern could not reach agreement."""
+
+
+class MaxIterationsError(PatternError):
+    """Loop pattern exceeded its iteration limit."""
diff --git a/executionkit/provider.py b/executionkit/provider.py
index 63685c2..807b4bf 100644
--- a/executionkit/provider.py
+++ b/executionkit/provider.py
@@ -15,13 +15,25 @@
 import urllib.request
 from dataclasses import dataclass, field
 from types import MappingProxyType
-from typing import TYPE_CHECKING, Any, Literal, Protocol, runtime_checkable
+from typing import TYPE_CHECKING, Any, Literal, NoReturn, Protocol, runtime_checkable
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
     from types import TracebackType
 
-from executionkit.types import TokenUsage
+# Re-export the error hierarchy from errors.py using the `Name as Name` idiom
+# so ruff/mypy recognise these as intentional public re-exports.
+# Existing `from executionkit.provider import XError` imports continue to work.
+# Ref: PEP 387 backwards-compat — https://peps.python.org/pep-0387/
+from executionkit.errors import BudgetExhaustedError as BudgetExhaustedError
+from executionkit.errors import ConsensusFailedError as ConsensusFailedError
+from executionkit.errors import ExecutionKitError as ExecutionKitError
+from executionkit.errors import LLMError as LLMError
+from executionkit.errors import MaxIterationsError as MaxIterationsError
+from executionkit.errors import PatternError as PatternError
+from executionkit.errors import PermanentError as PermanentError
+from executionkit.errors import ProviderError as ProviderError
+from executionkit.errors import RateLimitError as RateLimitError
 
 # ---------------------------------------------------------------------------
 # httpx availability probe (done once at import time)
@@ -36,68 +48,11 @@
     _HTTPX_AVAILABLE = False
 
 # ---------------------------------------------------------------------------
-# Error hierarchy (9 classes)
+# Error hierarchy — defined in errors.py; re-exported here so that existing
+# `from executionkit.provider import XError` imports continue to work.
+# PEP 387 backwards-compatibility: https://peps.python.org/pep-0387/
 # ---------------------------------------------------------------------------
 
-
-class ExecutionKitError(Exception):
-    """Base exception for all ExecutionKit errors."""
-
-    def __init__(
-        self,
-        message: str,
-        *,
-        cost: TokenUsage | None = None,
-        metadata: dict[str, Any] | None = None,
-    ) -> None:
-        super().__init__(message)
-        self.cost: TokenUsage = cost if cost is not None else TokenUsage()
-        self.metadata: dict[str, Any] = metadata if metadata is not None else {}
-
-
-class LLMError(ExecutionKitError):
-    """Errors originating from LLM provider communication."""
-
-
-class RateLimitError(LLMError):
-    """Provider returned HTTP 429 — retryable after ``retry_after`` seconds."""
-
-    def __init__(
-        self,
-        message: str,
-        *,
-        retry_after: float = 1.0,
-        cost: TokenUsage | None = None,
-        metadata: dict[str, Any] | None = None,
-    ) -> None:
-        super().__init__(message, cost=cost, metadata=metadata)
-        self.retry_after: float = retry_after
-
-
-class PermanentError(LLMError):
-    """Non-retryable provider error (e.g. 401 authentication failure)."""
-
-
-class ProviderError(LLMError):
-    """Catch-all retryable provider error for unexpected HTTP failures."""
-
-
-class PatternError(ExecutionKitError):
-    """Errors raised by reasoning pattern logic."""
-
-
-class BudgetExhaustedError(PatternError):
-    """Token or call budget exceeded."""
-
-
-class ConsensusFailedError(PatternError):
-    """Consensus pattern could not reach agreement."""
-
-
-class MaxIterationsError(PatternError):
-    """Loop pattern exceeded its iteration limit."""
-
-
 # ---------------------------------------------------------------------------
 # Value types
 # ---------------------------------------------------------------------------
@@ -317,15 +272,8 @@ async def _post_httpx(
                     raw = {}
             except Exception:
                 raw = {}
-            if status == 429:
-                retry_after = float(exc.response.headers.get("retry-after", "1"))
-                raise RateLimitError(
-                    "Rate limited (HTTP 429)",
-                    retry_after=retry_after,
-                ) from exc
-            if status in {401, 403, 404}:
-                raise PermanentError(_format_http_error(status, raw)) from exc
-            raise ProviderError(_format_http_error(status, raw)) from exc
+            retry_after = float(exc.response.headers.get("retry-after", "1"))
+            _classify_http_error(status, raw, retry_after, cause=exc)
         except _httpx.TransportError as exc:
             raise ProviderError(f"Transport failure: {exc}") from exc
 
@@ -358,17 +306,10 @@ def _sync() -> dict[str, Any]:
                 except ProviderError:
                     raw = {}
                 status = exc.code
-                if status == 429:
-                    retry_after = float(
-                        exc.headers.get("retry-after", "1") if exc.headers else 1
-                    )
-                    raise RateLimitError(
-                        "Rate limited (HTTP 429)",
-                        retry_after=retry_after,
-                    ) from exc
-                if status in {401, 403, 404}:
-                    raise PermanentError(_format_http_error(status, raw)) from exc
-                raise ProviderError(_format_http_error(status, raw)) from exc
+                retry_after = float(
+                    exc.headers.get("retry-after", "1") if exc.headers else 1
+                )
+                _classify_http_error(status, raw, retry_after, cause=exc)
             except urllib.error.URLError as exc:
                 raise ProviderError(f"Transport failure: {exc.reason}") from exc
 
@@ -498,6 +439,42 @@ def _redact_sensitive(text: str) -> str:
     )
 
 
+def _classify_http_error(
+    status: int,
+    raw: dict[str, Any],
+    retry_after: float,
+    *,
+    cause: BaseException,
+) -> NoReturn:
+    """Raise the correct LLM error subclass for a failed HTTP response.
+
+    Extracted to eliminate duplication between the urllib and httpx backends.
+    Both backends call this single function — the exact pattern used by the
+    Anthropic SDK's ``_make_status_error()`` method.
+
+    Ref: https://github.com/anthropics/anthropic-sdk-python/blob/main/src/anthropic/_client.py
+
+    Args:
+        status: HTTP status code from the failed response.
+        raw: Parsed JSON body from the response (may be empty dict).
+        retry_after: Value of the ``Retry-After`` header in seconds.
+        cause: The original exception, chained via ``raise ... from cause``.
+
+    Raises:
+        RateLimitError: For HTTP 429.
+        PermanentError: For HTTP 401, 403, 404.
+        ProviderError: For all other non-2xx status codes.
+    """
+    if status == 429:
+        raise RateLimitError(
+            "Rate limited (HTTP 429)",
+            retry_after=retry_after,
+        ) from cause
+    if status in {401, 403, 404}:
+        raise PermanentError(_format_http_error(status, raw)) from cause
+    raise ProviderError(_format_http_error(status, raw)) from cause
+
+
 def _format_http_error(status_code: int, payload: dict[str, Any]) -> str:
     message = payload.get("error")
     if isinstance(message, dict):

From 51f16f0dbc05be071f13cbe2ed1ae5edf53e11be Mon Sep 17 00:00:00 2001
From: tafreeman <knowlesjim287@gmail.com>
Date: Sat, 11 Apr 2026 16:00:16 -0500
Subject: [PATCH 3/6] fix: delegate _TrackedProvider.supports_tools +
 F-01/F-03/F-04 docs (F-04)

F-04: Replace hardcoded `supports_tools: Literal[True] = True` in
_TrackedProvider with a delegating property that reflects the wrapped
provider's actual capability:
  @property
  def supports_tools(self) -> bool:
      return getattr(self._provider, "supports_tools", False)
A wrapper claiming Literal[True] unconditionally creates false positives
because @runtime_checkable only checks attribute presence, not value
(PEP 544). Real-world failure mode: LiteLLM bug #11370.

Add WARNING docstring to Provider.supports_tools explaining the delegation
pattern required when building wrapper classes.

F-01 verified: Add inline NOTE in _TrackedProvider confirming _calls is
never accessed directly; reserve_call()/release_call() are the sole API.

F-03 verified: Add inline NOTE in consensus() confirming max_cost is
implemented and propagated to every checked_complete() call.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 executionkit/patterns/base.py      | 18 +++++++++++++++---
 executionkit/patterns/consensus.py |  3 +++
 executionkit/provider.py           |  9 +++++++++
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/executionkit/patterns/base.py b/executionkit/patterns/base.py
index 6bce296..ddcf227 100644
--- a/executionkit/patterns/base.py
+++ b/executionkit/patterns/base.py
@@ -4,7 +4,7 @@
 
 import math
 import warnings
-from typing import TYPE_CHECKING, Any, Literal
+from typing import TYPE_CHECKING, Any
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -176,8 +176,6 @@ class _TrackedProvider:
     multiple times while sharing a single ``CostTracker`` and metadata dict.
     """
 
-    supports_tools: Literal[True] = True
-
     def __init__(
         self,
         provider: LLMProvider,
@@ -195,6 +193,20 @@ def __init__(
         self._retry = retry
         self._context = context
 
+    @property
+    def supports_tools(self) -> bool:
+        """Delegate capability flag to the wrapped provider.
+
+        A wrapper must not unconditionally claim tool support — it should
+        reflect what the inner provider actually supports.
+        Ref F-04: https://github.com/BerriAI/litellm/issues/11370 (real-world
+        failure from hardcoding capability instead of delegating).
+        NOTE (F-01 verified): CostTracker._calls is never accessed directly
+        here. reserve_call() and release_call() are the only public API used.
+        See executionkit/cost.py.
+        """
+        return getattr(self._provider, "supports_tools", False)
+
     async def complete(
         self,
         messages: Sequence[dict[str, Any]],
diff --git a/executionkit/patterns/consensus.py b/executionkit/patterns/consensus.py
index 31e103c..6e0719b 100644
--- a/executionkit/patterns/consensus.py
+++ b/executionkit/patterns/consensus.py
@@ -31,6 +31,9 @@ async def consensus(
     max_tokens: int = 4096,
     max_concurrency: int = 5,
     retry: RetryConfig | None = None,
+    # NOTE (F-03 verified): max_cost is implemented and forwarded to every
+    # checked_complete() call below, enabling budget-aware pipe() chains.
+    # See executionkit/compose.py _filter_kwargs() for propagation logic.
     max_cost: TokenUsage | None = None,
 ) -> PatternResult[str]:
     """Run parallel LLM samples and aggregate via voting.
diff --git a/executionkit/provider.py b/executionkit/provider.py
index 807b4bf..e880527 100644
--- a/executionkit/provider.py
+++ b/executionkit/provider.py
@@ -165,6 +165,15 @@ class Provider:
     default_temperature: float = 0.7
     default_max_tokens: int = 4096
     timeout: float = 120.0
+    # supports_tools is Literal[True] for this concrete HTTP client because
+    # it always speaks the OpenAI tool-calling wire format.
+    # WARNING (F-04): If you build a *wrapper* around Provider, do NOT copy
+    # this attribute verbatim — delegate instead:
+    #   @property
+    #   def supports_tools(self) -> bool: return self._inner.supports_tools
+    # Hardcoding True in a wrapper causes isinstance(wrapper, ToolCallingProvider)
+    # to return True even when the inner provider cannot handle tools.
+    # Ref: PEP 544 runtime_checkable only checks presence, not value.
     supports_tools: Literal[True] = field(default=True, init=False)
     # Derived state — excluded from repr/eq/hash; initialized only in __post_init__
     _client: Any = field(

From 95772769d777ebc0c553176b32190f369cfcd943 Mon Sep 17 00:00:00 2001
From: tafreeman <knowlesjim287@gmail.com>
Date: Sat, 11 Apr 2026 17:28:58 -0500
Subject: [PATCH 4/6] test: add regression tests for _classify_http_error,
 _check_budget, supports_tools delegation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

_classify_http_error (F-02): 8 tests covering 429→RateLimitError,
{401,403,404}→PermanentError, 500/503→ProviderError, exception chaining
via __cause__, and retry_after value propagation.

_check_budget (F-05): 5 tests covering unlimited-sentinel (0), call limit,
input token limit, sentinel (-1) forwarded from pipe, and error metadata
(cost + budget attached to BudgetExhaustedError).

_TrackedProvider.supports_tools delegation (F-04): 2 tests — delegates True
from MockProvider (which has supports_tools=True) and delegates False from a
minimal LLMProvider that has no supports_tools attribute.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 tests/test_patterns.py | 143 +++++++++++++++++++++++++++++++++++++++++
 tests/test_provider.py |  66 +++++++++++++++++++
 2 files changed, 209 insertions(+)

diff --git a/tests/test_patterns.py b/tests/test_patterns.py
index dd9d569..508ebe0 100644
--- a/tests/test_patterns.py
+++ b/tests/test_patterns.py
@@ -1568,3 +1568,146 @@ async def leaky_execute(query: str) -> str:
 
     assert "hunter2" not in observation
     assert "ValueError" in observation
+
+
+# ---------------------------------------------------------------------------
+# _check_budget regression tests (F-05)
+# Ref: field-loop pattern from CPython dataclasses.asdict() eliminates
+# per-field if-chain repetition.
+# ---------------------------------------------------------------------------
+
+
+class TestCheckBudget:
+    """_check_budget raises BudgetExhaustedError on the first exceeded field."""
+
+    def test_no_error_when_all_limits_zero(self) -> None:
+        """0 is the unlimited sentinel — should never raise."""
+        from executionkit.patterns.base import _check_budget
+        from executionkit.types import TokenUsage
+
+        _check_budget(
+            TokenUsage(llm_calls=0, input_tokens=0, output_tokens=0),
+            TokenUsage(llm_calls=999, input_tokens=999, output_tokens=999),
+            ("llm_calls", "input_tokens", "output_tokens"),
+            sentinel_suffix="(pipe)",
+            exceeded_suffix="before dispatch",
+        )
+
+    def test_raises_on_call_limit_hit(self) -> None:
+        from executionkit.errors import BudgetExhaustedError
+        from executionkit.patterns.base import _check_budget
+        from executionkit.types import TokenUsage
+
+        with pytest.raises(BudgetExhaustedError) as exc_info:
+            _check_budget(
+                TokenUsage(llm_calls=3, input_tokens=0, output_tokens=0),
+                TokenUsage(llm_calls=3, input_tokens=100, output_tokens=100),
+                ("llm_calls", "input_tokens", "output_tokens"),
+                sentinel_suffix="(pipe)",
+                exceeded_suffix="before dispatch",
+            )
+        assert "LLM call" in str(exc_info.value)
+        assert "before dispatch" in str(exc_info.value)
+
+    def test_raises_on_sentinel_minus_one(self) -> None:
+        from executionkit.errors import BudgetExhaustedError
+        from executionkit.patterns.base import BUDGET_EXHAUSTED_SENTINEL, _check_budget
+        from executionkit.types import TokenUsage
+
+        with pytest.raises(BudgetExhaustedError) as exc_info:
+            _check_budget(
+                TokenUsage(
+                    llm_calls=BUDGET_EXHAUSTED_SENTINEL,
+                    input_tokens=0,
+                    output_tokens=0,
+                ),
+                TokenUsage(llm_calls=0, input_tokens=0, output_tokens=0),
+                ("llm_calls",),
+                sentinel_suffix="before retry (forwarded from pipe)",
+                exceeded_suffix="before retry dispatch",
+            )
+        assert "forwarded from pipe" in str(exc_info.value)
+
+    def test_raises_on_input_token_limit(self) -> None:
+        from executionkit.errors import BudgetExhaustedError
+        from executionkit.patterns.base import _check_budget
+        from executionkit.types import TokenUsage
+
+        # llm_calls=0 means unlimited; only input_tokens limit is set.
+        # current input_tokens (500) exceeds budget (100) → Input token error.
+        with pytest.raises(BudgetExhaustedError) as exc_info:
+            _check_budget(
+                TokenUsage(llm_calls=0, input_tokens=100, output_tokens=0),
+                TokenUsage(llm_calls=10, input_tokens=500, output_tokens=500),
+                ("llm_calls", "input_tokens", "output_tokens"),
+                sentinel_suffix="(pipe)",
+                exceeded_suffix="before dispatch",
+            )
+        assert "Input token" in str(exc_info.value)
+
+    def test_error_carries_cost_and_budget_metadata(self) -> None:
+        from executionkit.errors import BudgetExhaustedError
+        from executionkit.patterns.base import _check_budget
+        from executionkit.types import TokenUsage
+
+        budget = TokenUsage(llm_calls=1, input_tokens=0, output_tokens=0)
+        current = TokenUsage(llm_calls=1, input_tokens=0, output_tokens=0)
+        with pytest.raises(BudgetExhaustedError) as exc_info:
+            _check_budget(
+                budget,
+                current,
+                ("llm_calls",),
+                sentinel_suffix="(pipe)",
+                exceeded_suffix="before dispatch",
+            )
+        assert exc_info.value.cost == current
+        assert exc_info.value.metadata["budget"] == budget
+
+
+# ---------------------------------------------------------------------------
+# _TrackedProvider.supports_tools delegation tests (F-04)
+# Ref: @runtime_checkable only checks presence, not value — a wrapper must
+# delegate the capability flag to the inner provider.
+# PEP 544: https://peps.python.org/pep-0544/
+# ---------------------------------------------------------------------------
+
+
+class TestTrackedProviderSupportsDelegation:
+    """_TrackedProvider.supports_tools delegates to the wrapped provider."""
+
+    def test_delegates_true_from_tool_capable_provider(self) -> None:
+        from executionkit._mock import MockProvider
+        from executionkit.cost import CostTracker
+        from executionkit.patterns.base import _TrackedProvider
+
+        inner = MockProvider(responses=["ok"])
+        # MockProvider has supports_tools = True
+        tp = _TrackedProvider(
+            inner,
+            CostTracker(),
+            {},
+            budget=None,
+            retry=None,
+            context="test",
+        )
+        assert tp.supports_tools is True
+
+    def test_delegates_false_from_non_tool_provider(self) -> None:
+        """A plain LLMProvider without supports_tools must yield False."""
+        from executionkit.cost import CostTracker
+        from executionkit.patterns.base import _TrackedProvider
+        from executionkit.provider import LLMResponse
+
+        class MinimalProvider:
+            async def complete(self, messages, **kwargs):  # type: ignore[no-untyped-def]
+                return LLMResponse(content="ok")
+
+        tp = _TrackedProvider(
+            MinimalProvider(),  # type: ignore[arg-type]
+            CostTracker(),
+            {},
+            budget=None,
+            retry=None,
+            context="test",
+        )
+        assert tp.supports_tools is False
diff --git a/tests/test_provider.py b/tests/test_provider.py
index 7f69873..f32a5db 100644
--- a/tests/test_provider.py
+++ b/tests/test_provider.py
@@ -618,3 +618,69 @@ def test_provider_client_is_set_post_init(self) -> None:
         """_use_httpx is set to a bool by __post_init__ regardless of httpx."""
         provider = Provider("https://api.openai.com/v1", model="gpt-4o-mini")
         assert isinstance(provider._use_httpx, bool)
+
+
+# ---------------------------------------------------------------------------
+# _classify_http_error regression tests (F-02)
+# Ref: extracted to eliminate duplication between urllib and httpx backends.
+# Anthropic SDK uses same pattern in _make_status_error().
+# ---------------------------------------------------------------------------
+
+
+class TestClassifyHttpError:
+    """_classify_http_error maps HTTP status codes to the correct exception."""
+
+    def test_429_raises_rate_limit_error(self) -> None:
+        from executionkit.provider import _classify_http_error
+
+        cause = Exception("original")
+        with pytest.raises(RateLimitError) as exc_info:
+            _classify_http_error(429, {}, 5.0, cause=cause)
+        assert exc_info.value.retry_after == 5.0
+        assert exc_info.value.__cause__ is cause
+
+    def test_429_default_retry_after_is_propagated(self) -> None:
+        from executionkit.provider import _classify_http_error
+
+        with pytest.raises(RateLimitError) as exc_info:
+            _classify_http_error(429, {}, 2.5, cause=Exception())
+        assert exc_info.value.retry_after == 2.5
+
+    def test_401_raises_permanent_error(self) -> None:
+        from executionkit.provider import _classify_http_error
+
+        with pytest.raises(PermanentError):
+            _classify_http_error(401, {}, 1.0, cause=Exception())
+
+    def test_403_raises_permanent_error(self) -> None:
+        from executionkit.provider import _classify_http_error
+
+        with pytest.raises(PermanentError):
+            _classify_http_error(403, {}, 1.0, cause=Exception())
+
+    def test_404_raises_permanent_error(self) -> None:
+        from executionkit.provider import _classify_http_error
+
+        with pytest.raises(PermanentError):
+            _classify_http_error(404, {}, 1.0, cause=Exception())
+
+    def test_500_raises_provider_error(self) -> None:
+        from executionkit.provider import _classify_http_error
+
+        with pytest.raises(ProviderError):
+            _classify_http_error(500, {}, 1.0, cause=Exception())
+
+    def test_503_raises_provider_error(self) -> None:
+        from executionkit.provider import _classify_http_error
+
+        with pytest.raises(ProviderError):
+            _classify_http_error(503, {}, 1.0, cause=Exception())
+
+    def test_exception_is_chained_via_cause(self) -> None:
+        """raise ... from cause must set __cause__, not just __context__."""
+        from executionkit.provider import _classify_http_error
+
+        original = ValueError("root cause")
+        with pytest.raises(ProviderError) as exc_info:
+            _classify_http_error(500, {}, 1.0, cause=original)
+        assert exc_info.value.__cause__ is original

From 9ae790c81c3717f651a307831c07271bc6ef6439 Mon Sep 17 00:00:00 2001
From: tafreeman <knowlesjim287@gmail.com>
Date: Sat, 11 Apr 2026 17:44:02 -0500
Subject: [PATCH 5/6] docs: update architecture, API reference, and C4 diagrams
 post-refactor

Synchronize all documentation with the F-02/F-04/F-05/F-06/F-08 changes:

CLAUDE.md:
- Add errors.py to module responsibilities table
- Update provider.py and patterns/base.py descriptions
- Add _classify_http_error and _check_budget to design invariants

docs/architecture.md:
- Add errors.py to module map and dependency graph
- Document _classify_http_error as single HTTP error classification point
- Update Error Handling Architecture section

docs/api-reference.md:
- Add _check_budget() and _classify_http_error() internal helper entries
- Add module-location note for error hierarchy (errors.py)

docs/c4/ (5 files):
- c4-code-src-executionkit.md: Add errors.py module, update Mermaid diagram
- c4-code-src-executionkit-patterns.md: Add _check_budget, update _TrackedProvider
- c4-component.md: Update Provider Layer to include errors.py
- c4-component-provider-layer.md: Add errors.py, _classify_http_error, update diagram
- c4-component-reasoning-patterns.md: Add _check_budget, supports_tools delegation

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 CLAUDE.md                                    |   9 +-
 docs/api-reference.md                        |  43 ++++++++
 docs/architecture.md                         |  28 ++++-
 docs/c4/c4-code-src-executionkit-patterns.md |  29 ++++-
 docs/c4/c4-code-src-executionkit.md          |  79 ++++++++++----
 docs/c4/c4-component-provider-layer.md       | 105 +++++++++++--------
 docs/c4/c4-component-reasoning-patterns.md   |   8 +-
 docs/c4/c4-component.md                      |   4 +-
 8 files changed, 224 insertions(+), 81 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 311cb0c..78a0e37 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -37,10 +37,11 @@ ExecutionKit is a minimal library for LLM reasoning patterns — it fills the ga
 
 | Module | Role |
 |--------|------|
-| `provider.py` | `LLMProvider` protocol, `Provider` HTTP client, `LLMResponse`, 9-class error hierarchy |
+| `errors.py` | 9-class exception hierarchy (`ExecutionKitError` → `LLMError`, `PatternError` subtrees); extracted from `provider.py` (F-06) |
+| `provider.py` | `LLMProvider` protocol, `Provider` HTTP client, `LLMResponse`; re-exports error classes from `errors.py` for backwards compatibility; `_classify_http_error()` is the single HTTP status→exception mapping point shared by both backends (F-02) |
 | `types.py` | Frozen value types: `TokenUsage`, `PatternResult[T]`, `Tool`, `VotingStrategy`, `Evaluator` |
 | `cost.py` | `CostTracker` — mutable accumulator with two-phase accounting (`reserve_call` + `record_without_call`) |
-| `patterns/base.py` | `checked_complete()` — shared budget guard + retry entry point for all patterns |
+| `patterns/base.py` | `checked_complete()` — shared budget guard + retry entry point; `_check_budget()` helper uses `getattr()` field loop replacing per-field if-chains (F-05/F-08); `_TrackedProvider.supports_tools` delegates to wrapped provider via `getattr` instead of hardcoding `Literal[True]` (F-04) |
 | `patterns/consensus.py` | Parallel sampling, majority/unanimous voting, agreement metadata |
 | `patterns/refine_loop.py` | Iterative improvement with `ConvergenceDetector`; default evaluator uses XML sandboxing |
 | `patterns/react_loop.py` | Think-act-observe loop; validates tool args against JSON Schema; caps context via `max_history_messages` |
@@ -55,7 +56,9 @@ ExecutionKit is a minimal library for LLM reasoning patterns — it fills the ga
 
 **Two-phase cost accounting** — `reserve_call()` pre-increments the call counter before `await` (TOCTOU-safe for concurrent patterns); `record_without_call(response)` adds token counts after success.
 
-**Budget guards** — `checked_complete()` in `patterns/base.py` checks token/call budget before every LLM call and raises `BudgetExhaustedError` (with accumulated cost snapshot) if exceeded.
+**Budget guards** — `checked_complete()` in `patterns/base.py` checks token/call budget before every LLM call and raises `BudgetExhaustedError` (with accumulated cost snapshot) if exceeded. The internal `_check_budget()` helper iterates over field names using `getattr()` rather than repeating an if-block per field (F-05/F-08).
+
+**Centralised HTTP error mapping** — `_classify_http_error()` in `provider.py` is the single function that converts HTTP status codes to the appropriate error subclass. Both the `_post_httpx` and `_post_urllib` backends call it, eliminating the duplicated mapping logic that previously existed in each (F-02).
 
 **Structural typing** — `LLMProvider` and `ToolCallingProvider` are `@runtime_checkable` protocols, not base classes. Any object matching the interface works.
 
diff --git a/docs/api-reference.md b/docs/api-reference.md
index e63abc8..8ca1155 100644
--- a/docs/api-reference.md
+++ b/docs/api-reference.md
@@ -1089,10 +1089,53 @@ Validate that an evaluator score is in [0.0, 1.0] and not NaN.
 
 ---
 
+### `_check_budget()` (internal)
+
+```python
+def _check_budget(
+    budget: TokenUsage,
+    current: TokenUsage,
+    fields: tuple[str, ...],
+    *,
+    sentinel_suffix: str,
+    exceeded_suffix: str,
+) -> None
+```
+
+Internal helper used by `checked_complete()` (F-05/F-08). Iterates over the named `TokenUsage` fields using `getattr()` and raises `BudgetExhaustedError` on the first field that is either sentinel-exhausted (value `-1`, set by `pipe()` propagation) or over its limit. This replaces the previous per-field if-block repetition and follows the same pattern as CPython's `dataclasses.asdict()`.
+
+**Location:** `executionkit/patterns/base.py`
+
+**Raises:** `BudgetExhaustedError` on the first exhausted field.
+
+---
+
+### `_classify_http_error()` (internal)
+
+```python
+def _classify_http_error(
+    status: int,
+    raw: dict[str, Any],
+    retry_after: float,
+    *,
+    cause: BaseException,
+) -> NoReturn
+```
+
+Internal helper in `provider.py` (F-02). Centralises the HTTP status code → exception mapping that is shared by both the `_post_httpx` and `_post_urllib` backends. Raises the correct typed exception — `RateLimitError` for HTTP 429, `PermanentError` for 401/403/404, `ProviderError` for all other non-2xx codes — and chains `cause` as the original exception. Both HTTP backends call this single function rather than duplicating the mapping logic.
+
+**Location:** `executionkit/provider.py`
+
+**Raises:** `RateLimitError`, `PermanentError`, or `ProviderError` (always raises; return type is `NoReturn`).
+
+---
+
 ## Error Hierarchy
 
 All exceptions carry `cost: TokenUsage` and `metadata: dict[str, Any]` attributes set at raise time.
 
+> **Module location (F-06):** The full 9-class hierarchy is defined in `executionkit/errors.py`. `provider.py` re-exports every class under the same name so that `from executionkit.provider import XError` imports remain valid.
+
 ```
 ExecutionKitError
 ├── LLMError                    — provider communication errors
diff --git a/docs/architecture.md b/docs/architecture.md
index 5caa002..655d0ad 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -37,15 +37,21 @@ shape every design decision:
 executionkit/
 ├── __init__.py          — public API surface; sync wrappers
 ├── types.py             — frozen value types: PatternResult, TokenUsage, Tool, VotingStrategy, Evaluator
+├── errors.py            — 9-class exception hierarchy (F-06: extracted from provider.py)
 ├── provider.py          — LLMProvider protocol, ToolCallingProvider protocol,
-│                          Provider concrete class, LLMResponse, ToolCall,
-│                          and the 9-class error hierarchy
+│                          Provider concrete class, LLMResponse, ToolCall;
+│                          re-exports error classes from errors.py for backwards compatibility;
+│                          _classify_http_error() is the single HTTP status→exception mapping
+│                          point for both urllib and httpx backends (F-02)
 ├── cost.py              — CostTracker mutable accumulator
 ├── compose.py           — pipe() composition helper, PatternStep protocol
 ├── kit.py               — Kit session facade (provider + cumulative usage)
 ├── _mock.py             — MockProvider test double (satisfies both protocols)
 ├── patterns/
-│   ├── base.py          — checked_complete(), validate_score(), _TrackedProvider
+│   ├── base.py          — checked_complete(), validate_score(), _TrackedProvider;
+│   │                      _check_budget() uses getattr() field loop replacing per-field
+│   │                      if-chains (F-05/F-08); _TrackedProvider.supports_tools delegates
+│   │                      to wrapped provider via getattr (F-04)
 │   ├── consensus.py     — parallel majority/unanimous voting
 │   ├── refine_loop.py   — iterative score-guided refinement
 │   └── react_loop.py    — tool-calling think-act-observe loop
@@ -66,7 +72,8 @@ patterns/base    ──► cost, engine/retry, provider, types
 patterns/consensus  ──► cost, engine/parallel, engine/retry, patterns/base, provider, types
 patterns/refine_loop ──► cost, engine/convergence, engine/retry, patterns/base, provider, types
 patterns/react_loop  ──► cost, engine/retry, patterns/base, provider, types
-provider  ──► types
+provider  ──► types, errors  (re-exports all 9 error classes from errors.py)
+errors    ──► types
 cost      ──► types
 engine/*  ──► provider (retry only)
 ```
@@ -172,8 +179,13 @@ directly. Its snapshot is emitted as an immutable `TokenUsage` via `to_usage()`.
 
 ## Error Handling Architecture
 
+The full 9-class exception hierarchy lives in `executionkit/errors.py` (F-06).
+`provider.py` re-exports all nine classes under the same names so that existing
+`from executionkit.provider import XError` imports continue to work without
+modification (PEP 387 backwards compatibility).
+
 ```
-ExecutionKitError
+ExecutionKitError              ← executionkit/errors.py
 ├── LLMError                  ← provider communication failures
 │   ├── RateLimitError        ← HTTP 429; carries retry_after float
 │   ├── PermanentError        ← HTTP 401/403/404; do not retry
@@ -188,6 +200,12 @@ All errors carry `cost: TokenUsage` so callers can see what was spent before
 the failure. `pipe()` augments errors with the cumulative cross-step cost before
 re-raising.
 
+**HTTP error classification:** `_classify_http_error()` in `provider.py` is the
+single function responsible for mapping HTTP status codes to the correct error
+subclass. Both the `_post_httpx` and `_post_urllib` backends call it, eliminating
+duplicated mapping logic (F-02). This mirrors the pattern used by the Anthropic
+SDK's `_make_status_error()`.
+
 **Retry boundary:** `with_retry()` in `engine/retry.py` only retries
 `RateLimitError` and `ProviderError`. `PermanentError` propagates immediately.
 `asyncio.CancelledError` is always re-raised without retry.
diff --git a/docs/c4/c4-code-src-executionkit-patterns.md b/docs/c4/c4-code-src-executionkit-patterns.md
index 8102785..97e6447 100644
--- a/docs/c4/c4-code-src-executionkit-patterns.md
+++ b/docs/c4/c4-code-src-executionkit-patterns.md
@@ -22,9 +22,9 @@
 - **Raises**: `ValueError` if score is NaN or outside [0.0, 1.0] range
 
 #### `checked_complete(provider: LLMProvider, messages: Sequence[dict[str, Any]], tracker: CostTracker, budget: TokenUsage | None, retry: RetryConfig | None, **kwargs: Any) -> LLMResponse`
-- **Description**: Makes a budget-aware LLM API call with retry logic. Checks token and LLM call budgets before dispatching and records usage in the cost tracker.
+- **Description**: Makes a budget-aware LLM API call with retry logic. Checks token and LLM call budgets before dispatching (via `_check_budget`) and records usage in the cost tracker.
 - **Location**: `base.py:24-55`
-- **Dependencies**: `LLMProvider`, `CostTracker`, `BudgetExhaustedError`, `with_retry`, `DEFAULT_RETRY`, `TokenUsage`, `RetryConfig`, `LLMResponse`
+- **Dependencies**: `LLMProvider`, `CostTracker`, `BudgetExhaustedError`, `with_retry`, `DEFAULT_RETRY`, `TokenUsage`, `RetryConfig`, `LLMResponse`, `_check_budget`, `_BUDGET_FIELD_LABELS`
 - **Parameters**:
   - `provider: LLMProvider` - The LLM provider to use
   - `messages: Sequence[dict[str, Any]]` - Messages to send to the LLM
@@ -35,6 +35,23 @@
 - **Return Type**: `LLMResponse` - Response from the LLM provider
 - **Raises**: `BudgetExhaustedError` if any budget constraint would be exceeded
 
+#### `_check_budget(tracker: CostTracker, budget: TokenUsage) -> None`
+- **Description**: Validates that the current accumulated cost does not exceed any field of the budget constraint. Replaces 8 per-field `if`-blocks with a single field loop using `getattr()` over `_BUDGET_FIELD_LABELS`. Raises `BudgetExhaustedError` with a descriptive message naming the exceeded field if any constraint is violated.
+- **Location**: `base.py`
+- **Dependencies**: `CostTracker`, `TokenUsage`, `BudgetExhaustedError`, `_BUDGET_FIELD_LABELS`
+- **Parameters**:
+  - `tracker: CostTracker` - Current accumulated cost tracker
+  - `budget: TokenUsage` - Maximum allowed token/call counts
+- **Return Type**: `None`
+- **Raises**: `BudgetExhaustedError` naming the exceeded field (e.g., "input_tokens", "llm_calls")
+
+#### `_BUDGET_FIELD_LABELS`
+- **Description**: Module-level dict mapping `TokenUsage` field names to human-readable label strings used in `BudgetExhaustedError` messages. Drives the field-loop in `_check_budget`, making it easy to add new budget dimensions without modifying control flow.
+- **Location**: `base.py`
+- **Type**: `dict[str, str]`
+- **Example entries**: `{"input_tokens": "input tokens", "output_tokens": "output tokens", "llm_calls": "LLM calls"}`
+- **Dependencies**: None
+
 #### `_note_truncation(response: LLMResponse, metadata: dict[str, Any], context: str) -> None`
 - **Description**: Logs a warning and increments truncation counter in metadata if the LLM response was truncated (finish_reason indicates truncation).
 - **Location**: `base.py:58-66`
@@ -185,7 +202,8 @@
   - `_budget: TokenUsage | None` - Optional token budget constraints
   - `_retry: RetryConfig | None` - Retry configuration
   - `_context: str` - Context string for error messages
-  - `supports_tools: bool = True` - Class attribute indicating tool support capability
+- **Properties**:
+  - `supports_tools: bool` - Delegates to the wrapped provider's `supports_tools` attribute rather than hardcoding `Literal[True]`; this allows `_TrackedProvider` to accurately reflect the capabilities of the underlying provider at runtime
 - **Methods**:
   - `__init__(provider: LLMProvider, tracker: CostTracker, metadata: dict[str, Any], *, budget: TokenUsage | None, retry: RetryConfig | None, context: str) -> None` - Initializes the wrapper with dependencies
   - `complete(messages: Sequence[dict[str, Any]], *, temperature: float | None = None, max_tokens: int | None = None, tools: Sequence[dict[str, Any]] | None = None, **kwargs: Any) -> LLMResponse` - Wraps provider.complete() with budget and truncation checks
@@ -228,6 +246,7 @@ None - executionkit has zero external runtime dependencies as specified in `pypr
 ### Standard Library Dependencies
 
 - `asyncio` - For async/await support and task management (react_loop)
+- `logging` - Module-level import in `react_loop.py` for structured diagnostic logging
 - `collections.Counter` - For vote counting in consensus
 - `json` - For serializing tool arguments (react_loop)
 - `math` - For NaN checking in score validation
@@ -270,7 +289,7 @@ classDiagram
         
         class TrackedProvider {
             <<class>>
-            +supports_tools: bool
+            +supports_tools: bool (property, delegates to _provider)
             -_provider: LLMProvider
             -_tracker: CostTracker
             -_metadata: dict
@@ -285,6 +304,8 @@ classDiagram
             <<module>>
             +validate_score(score) float
             +checked_complete(provider, messages, ...) LLMResponse
+            -_check_budget(tracker, budget) None
+            -_BUDGET_FIELD_LABELS dict
             -_note_truncation(response, metadata, context) void
             -_TrackedProvider TrackedProvider
         }
diff --git a/docs/c4/c4-code-src-executionkit.md b/docs/c4/c4-code-src-executionkit.md
index 75e2964..8eda740 100644
--- a/docs/c4/c4-code-src-executionkit.md
+++ b/docs/c4/c4-code-src-executionkit.md
@@ -98,7 +98,26 @@
   - `async complete(messages: Sequence[dict[str, Any]], *, temperature: float | None = None, max_tokens: int | None = None, tools: Sequence[dict[str, Any]] | None = None, **kwargs: Any) -> LLMResponse` - Sends request to provider and parses response
   - `async _post(endpoint: str, payload: dict[str, Any]) -> dict[str, Any]` - Low-level HTTP POST with error handling
   - `_parse_response(data: dict[str, Any]) -> LLMResponse` - Parses provider response into LLMResponse
-- **Dependencies**: `LLMResponse`, `ToolCall`, `RateLimitError`, `PermanentError`, `ProviderError`
+- **Dependencies**: `LLMResponse`, `ToolCall`, `RateLimitError`, `PermanentError`, `ProviderError` (all exception types now imported from `errors.py`; also uses `_classify_http_error` internally to map HTTP status codes to exceptions)
+
+### Module: `errors.py`
+
+- **Location**: `executionkit/errors.py`
+- **Purpose**: Exception hierarchy for all ExecutionKit errors, extracted from `provider.py` to give errors a dedicated module with a single responsibility
+- **Exports**:
+  - `ExecutionKitError` — base exception carrying `cost: TokenUsage` and `metadata: dict`
+  - `LLMError` — base for provider/transport failures
+  - `RateLimitError` — HTTP 429; includes `retry_after: float | None`
+  - `PermanentError` — non-retryable errors (auth failure, 404)
+  - `ProviderError` — retryable errors (5xx, network timeout)
+  - `PatternError` — base for pattern-level failures
+  - `BudgetExhaustedError` — token/call budget exceeded before next dispatch
+  - `ConsensusFailedError` — voting strategy could not be satisfied
+  - `MaxIterationsError` — iterative pattern hit iteration limit without converging
+- **Dependencies**: `types.py` (`TokenUsage`)
+- **Backwards compatibility**: `provider.py` re-exports all nine names using the `Name as Name` idiom so existing imports from `executionkit.provider` continue to work without change
+
+---
 
 #### `CostTracker`
 - **Type**: Regular class
@@ -211,9 +230,11 @@
 
 ### Exception Classes
 
+> **Note on module location**: All nine exception classes were extracted from `provider.py` into `executionkit/errors.py`. `provider.py` re-exports all of them using the `Name as Name` idiom (e.g., `from executionkit.errors import ExecutionKitError as ExecutionKitError`) to preserve backwards compatibility. Import paths through `provider.py` or directly from `errors.py` are both supported.
+
 #### `ExecutionKitError`
 - **Type**: Exception subclass
-- **Location**: `executionkit/provider.py:13-23`
+- **Location**: `executionkit/errors.py` (re-exported from `executionkit/provider.py` for backwards compatibility)
 - **Description**: Base exception for all ExecutionKit errors; carries cost and metadata
 - **Attributes**:
   - `cost: TokenUsage` - Token cost of failed operation (default: empty TokenUsage)
@@ -224,14 +245,14 @@
 
 #### `LLMError`
 - **Type**: Exception subclass
-- **Location**: `executionkit/provider.py:26-27`
+- **Location**: `executionkit/errors.py` (re-exported from `executionkit/provider.py` for backwards compatibility)
 - **Description**: Base class for provider and transport failures (network, protocol, auth)
 - **Parent**: `ExecutionKitError`
 - **Dependencies**: `ExecutionKitError`
 
 #### `RateLimitError`
 - **Type**: Exception subclass
-- **Location**: `executionkit/provider.py:30-40`
+- **Location**: `executionkit/errors.py` (re-exported from `executionkit/provider.py` for backwards compatibility)
 - **Description**: Raised for HTTP 429 rate limit responses; includes retry timing info
 - **Attributes**:
   - `retry_after: float | None` - Seconds to wait before retry
@@ -242,42 +263,42 @@
 
 #### `PermanentError`
 - **Type**: Exception subclass
-- **Location**: `executionkit/provider.py:43-44`
+- **Location**: `executionkit/errors.py` (re-exported from `executionkit/provider.py` for backwards compatibility)
 - **Description**: Non-retryable provider error (authentication failure, 404, etc.)
 - **Parent**: `LLMError`
 - **Dependencies**: `LLMError`
 
 #### `ProviderError`
 - **Type**: Exception subclass
-- **Location**: `executionkit/provider.py:47-48`
+- **Location**: `executionkit/errors.py` (re-exported from `executionkit/provider.py` for backwards compatibility)
 - **Description**: Retryable provider or transport error (5xx, network timeout, etc.)
 - **Parent**: `LLMError`
 - **Dependencies**: `LLMError`
 
 #### `PatternError`
 - **Type**: Exception subclass
-- **Location**: `executionkit/provider.py:51-52`
+- **Location**: `executionkit/errors.py` (re-exported from `executionkit/provider.py` for backwards compatibility)
 - **Description**: Base class for pattern-level execution failures (e.g., convergence issues)
 - **Parent**: `ExecutionKitError`
 - **Dependencies**: `ExecutionKitError`
 
 #### `BudgetExhaustedError`
 - **Type**: Exception subclass
-- **Location**: `executionkit/provider.py:55-56`
+- **Location**: `executionkit/errors.py` (re-exported from `executionkit/provider.py` for backwards compatibility)
 - **Description**: Raised when remaining token budget is insufficient for next dispatch
 - **Parent**: `PatternError`
 - **Dependencies**: `PatternError`
 
 #### `ConsensusFailedError`
 - **Type**: Exception subclass
-- **Location**: `executionkit/provider.py:59-60`
+- **Location**: `executionkit/errors.py` (re-exported from `executionkit/provider.py` for backwards compatibility)
 - **Description**: Raised when consensus cannot be established among LLM outputs
 - **Parent**: `PatternError`
 - **Dependencies**: `PatternError`
 
 #### `MaxIterationsError`
 - **Type**: Exception subclass
-- **Location**: `executionkit/provider.py:63-64`
+- **Location**: `executionkit/errors.py` (re-exported from `executionkit/provider.py` for backwards compatibility)
 - **Description**: Raised when iterative pattern (e.g., refine_loop) fails to converge within iteration limit
 - **Parent**: `PatternError`
 - **Dependencies**: `PatternError`
@@ -410,6 +431,16 @@
 
 #### Helper Functions in `provider.py` (private utilities)
 
+##### `_classify_http_error(status_code: int, payload: dict[str, Any], headers: Any) -> ExecutionKitError`
+- **Location**: `executionkit/provider.py`
+- **Description**: Centralizes HTTP status code → exception mapping; converts an HTTP error response into the appropriate typed exception (`RateLimitError` for 429, `PermanentError` for 4xx, `ProviderError` for 5xx). Previously this logic was duplicated inside both `_post_httpx` and `_post_urllib`; extracting it eliminates the duplication and ensures consistent error semantics regardless of which HTTP backend is used.
+- **Parameters**:
+  - `status_code: int` - HTTP response status code
+  - `payload: dict[str, Any]` - Parsed response body
+  - `headers: Any` - Response headers (used to extract `Retry-After` for 429 responses)
+- **Returns**: A typed `ExecutionKitError` subclass instance (never raises)
+- **Dependencies**: `RateLimitError`, `PermanentError`, `ProviderError`, `_format_http_error`, `_parse_retry_after`
+
 ##### `_first_choice(data: dict[str, Any]) -> dict[str, Any]`
 - **Location**: `executionkit/provider.py:195-202`
 - **Description**: Extracts first choice from provider response dict
@@ -448,7 +479,8 @@
 ### Internal Dependencies
 
 - **From other executionkit modules**:
-  - `executionkit.provider`: `LLMProvider`, `ToolCallingProvider`, `ExecutionKitError`, `LLMError`, `LLMResponse`, `RateLimitError`, `PermanentError`, `ProviderError`, `PatternError`, `BudgetExhaustedError`, `ConsensusFailedError`, `MaxIterationsError`, `ToolCall`, `Provider`
+  - `executionkit.errors`: `ExecutionKitError`, `LLMError`, `RateLimitError`, `PermanentError`, `ProviderError`, `PatternError`, `BudgetExhaustedError`, `ConsensusFailedError`, `MaxIterationsError` (canonical source; `provider.py` re-exports all of these via `Name as Name`)
+  - `executionkit.provider`: `LLMProvider`, `ToolCallingProvider`, `LLMResponse`, `ToolCall`, `Provider` (exception names also importable here for backwards compatibility)
   - `executionkit.types`: `TokenUsage`, `PatternResult`, `Tool`, `VotingStrategy`, `Evaluator`
   - `executionkit.compose`: `PatternStep`, `pipe`
   - `executionkit.cost`: `CostTracker`
@@ -496,13 +528,8 @@ classDiagram
     }
 
     namespace ProviderAbstraction {
-        class provider {
+        class errors {
             <<module>>
-            +LLMProvider Protocol
-            +ToolCallingProvider Protocol
-            +LLMResponse
-            +ToolCall
-            +Provider Class
             +ExecutionKitError Exception
             +LLMError Exception
             +RateLimitError Exception
@@ -513,6 +540,16 @@ classDiagram
             +ConsensusFailedError Exception
             +MaxIterationsError Exception
         }
+        class provider {
+            <<module>>
+            +LLMProvider Protocol
+            +ToolCallingProvider Protocol
+            +LLMResponse
+            +ToolCall
+            +Provider Class
+            +_classify_http_error Function
+            ~re-exports errors via Name as Name
+        }
     }
 
     namespace CostManagement {
@@ -561,14 +598,18 @@ classDiagram
         }
     }
 
-    %% Provider depends on types
+    %% errors depends on types (TokenUsage used by ExecutionKitError)
+    errors --> types: imports TokenUsage
+
+    %% Provider depends on types and errors
     provider --> types: imports TokenUsage
+    provider --> errors: imports exception hierarchy (re-exports via Name as Name)
 
     %% Cost depends on types and provider
     cost --> types: imports TokenUsage
     cost --> provider: imports LLMResponse
     
-    %% Compose depends on types and provider
+    %% Compose depends on types and provider/errors
     compose --> types: imports PatternResult, TokenUsage
     compose --> provider: imports LLMProvider, ExecutionKitError
     
diff --git a/docs/c4/c4-component-provider-layer.md b/docs/c4/c4-component-provider-layer.md
index c6760cb..f4a9185 100644
--- a/docs/c4/c4-component-provider-layer.md
+++ b/docs/c4/c4-component-provider-layer.md
@@ -8,15 +8,17 @@
 | **Type** | Component |
 | **Technology** | Python 3.10+, stdlib `urllib` (HTTP), `typing.Protocol` |
 | **Purpose** | Defines the contract every LLM backend must fulfil, ships a generic OpenAI-compatible HTTP implementation, and declares the full error hierarchy used across the library |
+| **Files** | `provider.py`, `errors.py`, `types.py` |
 
 ## Software Features
 
 - **LLMProvider protocol** — runtime-checkable duck-type interface for any LLM backend; requires only `async complete()`
 - **ToolCallingProvider protocol** — narrows `LLMProvider` to backends that expose function/tool calling (`supports_tools: Literal[True]`)
 - **Provider class** — zero-dependency HTTP client built on `urllib`; handles OpenAI-compatible chat-completions, tool-call parsing, rate-limit detection, and response normalisation
+- **`_classify_http_error()` function** — centralises HTTP status code → exception mapping for all HTTP backends (`_post_httpx` and `_post_urllib`); eliminates previously duplicated branching logic and ensures consistent error semantics regardless of transport
 - **LLMResponse dataclass** — structured, immutable view of one completion: text content, tool calls, finish reason, token usage, and raw provider data
 - **ToolCall dataclass** — single tool invocation (id, name, parsed arguments)
-- **Exception hierarchy** — seven typed exceptions from `ExecutionKitError` root, giving callers precise error semantics for retry decisions, budget accounting, and pattern-level failures
+- **Exception hierarchy** (`errors.py`) — nine typed exceptions from `ExecutionKitError` root, extracted into a dedicated module; `provider.py` re-exports all nine via `Name as Name` for backwards compatibility; gives callers precise error semantics for retry decisions, budget accounting, and pattern-level failures
 
 ## Code Elements
 
@@ -25,17 +27,18 @@
 | `LLMProvider` | Protocol (runtime-checkable) | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:67-77` |
 | `ToolCallingProvider` | Protocol | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:80-82` |
 | `Provider` | Dataclass / HTTP client | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:121-192` |
+| `_classify_http_error` | Private function | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py` |
 | `LLMResponse` | Frozen dataclass | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:92-118` |
 | `ToolCall` | Frozen dataclass | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:85-89` |
-| `ExecutionKitError` | Base exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:13-23` |
-| `LLMError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:26-27` |
-| `RateLimitError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:30-40` |
-| `PermanentError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:43-44` |
-| `ProviderError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:47-48` |
-| `PatternError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:51-52` |
-| `BudgetExhaustedError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:55-56` |
-| `ConsensusFailedError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:59-60` |
-| `MaxIterationsError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `provider.py:63-64` |
+| `ExecutionKitError` | Base exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `errors.py` (re-exported from `provider.py`) |
+| `LLMError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `errors.py` (re-exported from `provider.py`) |
+| `RateLimitError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `errors.py` (re-exported from `provider.py`) |
+| `PermanentError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `errors.py` (re-exported from `provider.py`) |
+| `ProviderError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `errors.py` (re-exported from `provider.py`) |
+| `PatternError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `errors.py` (re-exported from `provider.py`) |
+| `BudgetExhaustedError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `errors.py` (re-exported from `provider.py`) |
+| `ConsensusFailedError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `errors.py` (re-exported from `provider.py`) |
+| `MaxIterationsError` | Exception | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `errors.py` (re-exported from `provider.py`) |
 | `TokenUsage` | Frozen dataclass | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `types.py:14-25` |
 | `VotingStrategy` | StrEnum | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `types.py:58-60` |
 | `Tool` | Frozen dataclass | [c4-code-src-executionkit.md](c4-code-src-executionkit.md) → `types.py:39-55` |
@@ -148,7 +151,40 @@ class Tool:
 title: C4 Component — Provider Layer
 ---
 classDiagram
-    namespace ProviderLayer {
+    namespace ErrorsModule {
+        class ExecutionKitError {
+            <<Exception>>
+            +cost: TokenUsage
+            +metadata: dict
+        }
+        class LLMError {
+            <<Exception>>
+        }
+        class RateLimitError {
+            <<Exception>>
+            +retry_after: float | None
+        }
+        class PermanentError {
+            <<Exception>>
+        }
+        class ProviderError {
+            <<Exception>>
+        }
+        class PatternError {
+            <<Exception>>
+        }
+        class BudgetExhaustedError {
+            <<Exception>>
+        }
+        class ConsensusFailedError {
+            <<Exception>>
+        }
+        class MaxIterationsError {
+            <<Exception>>
+        }
+    }
+
+    namespace ProviderModule {
         class LLMProvider {
             <<Protocol, runtime_checkable>>
             +complete(messages, ...) LLMResponse
@@ -185,6 +221,9 @@ classDiagram
             +name: str
             +arguments: dict
         }
+    }
+
+    namespace TypesModule {
         class PatternResult~T~ {
             <<FrozenDataclass>>
             +value: T
@@ -213,42 +252,9 @@ classDiagram
             MAJORITY
             UNANIMOUS
         }
-        class ExecutionKitError {
-            <<Exception>>
-            +cost: TokenUsage
-            +metadata: dict
-        }
-        class LLMError {
-            <<Exception>>
-        }
-        class RateLimitError {
-            <<Exception>>
-            +retry_after: float | None
-        }
-        class PermanentError {
-            <<Exception>>
-        }
-        class ProviderError {
-            <<Exception>>
-        }
-        class PatternError {
-            <<Exception>>
-        }
-        class BudgetExhaustedError {
-            <<Exception>>
-        }
-        class ConsensusFailedError {
-            <<Exception>>
-        }
-        class MaxIterationsError {
-            <<Exception>>
-        }
     }
 
-    ToolCallingProvider --|> LLMProvider : extends
-    Provider ..|> ToolCallingProvider : implements
-    LLMResponse *-- ToolCall : contains
-    PatternResult *-- TokenUsage : contains
+    %% errors.py hierarchy
     ExecutionKitError <|-- LLMError
     ExecutionKitError <|-- PatternError
     LLMError <|-- RateLimitError
@@ -258,4 +264,13 @@ classDiagram
     PatternError <|-- ConsensusFailedError
     PatternError <|-- MaxIterationsError
     ExecutionKitError *-- TokenUsage : carries cost
+
+    %% provider.py re-exports exceptions from errors.py (Name as Name)
+    Provider ..|> ToolCallingProvider : implements
+    ToolCallingProvider --|> LLMProvider : extends
+    LLMResponse *-- ToolCall : contains
+    PatternResult *-- TokenUsage : contains
+
+    %% provider.py imports from errors.py
+    Provider ..> ExecutionKitError : re-exports via Name as Name
 ```
diff --git a/docs/c4/c4-component-reasoning-patterns.md b/docs/c4/c4-component-reasoning-patterns.md
index 620df32..27ff03b 100644
--- a/docs/c4/c4-component-reasoning-patterns.md
+++ b/docs/c4/c4-component-reasoning-patterns.md
@@ -14,7 +14,7 @@
 - **Consensus pattern** (`consensus.py`) — generates `num_samples` parallel LLM completions at high temperature, then applies `MAJORITY` or `UNANIMOUS` voting to select the winner; reports agreement ratio in metadata
 - **Refine loop pattern** (`refine_loop.py`) — wraps the provider in `_TrackedProvider` for per-call budget enforcement, iterates generate → score → refine until quality target is met, `ConvergenceDetector` provides patience-based early stopping, default evaluator uses an LLM to score on 0–10 scale
 - **ReAct loop pattern** (`react_loop.py`) — maintains a growing message history, dispatches tool calls with timeout enforcement, truncates observations to fit context window, returns the first response that contains no tool calls as the final answer
-- **Base utilities** (`base.py`) — `checked_complete` performs pre-call budget validation then records usage; `_TrackedProvider` wraps any `LLMProvider` with budget+retry+truncation-warning logic; `validate_score` guards against NaN and out-of-range scores
+- **Base utilities** (`base.py`) — `checked_complete` performs pre-call budget validation (via `_check_budget`) then records usage; `_check_budget` iterates `_BUDGET_FIELD_LABELS` with `getattr()` to validate all budget fields in a single loop, replacing 8 per-field `if`-blocks; `_TrackedProvider` wraps any `LLMProvider` with budget+retry+truncation-warning logic and exposes `supports_tools` as a property delegating to the wrapped provider rather than a hardcoded `Literal[True]`; `validate_score` guards against NaN and out-of-range scores
 
 ## Code Elements
 
@@ -24,7 +24,9 @@
 | `refine_loop` | Async function | [c4-code-src-executionkit-patterns.md](c4-code-src-executionkit-patterns.md) → `refine_loop.py:18-95` |
 | `react_loop` | Async function | [c4-code-src-executionkit-patterns.md](c4-code-src-executionkit-patterns.md) → `react_loop.py:16-88` |
 | `checked_complete` | Async function | [c4-code-src-executionkit-patterns.md](c4-code-src-executionkit-patterns.md) → `base.py:24-55` |
-| `_TrackedProvider` | Class | [c4-code-src-executionkit-patterns.md](c4-code-src-executionkit-patterns.md) → `base.py:69-110` |
+| `_check_budget` | Private function | [c4-code-src-executionkit-patterns.md](c4-code-src-executionkit-patterns.md) → `base.py` |
+| `_BUDGET_FIELD_LABELS` | Module-level dict | [c4-code-src-executionkit-patterns.md](c4-code-src-executionkit-patterns.md) → `base.py` |
+| `_TrackedProvider` | Class (`supports_tools` now a property) | [c4-code-src-executionkit-patterns.md](c4-code-src-executionkit-patterns.md) → `base.py:69-110` |
 | `validate_score` | Function | [c4-code-src-executionkit-patterns.md](c4-code-src-executionkit-patterns.md) → `base.py:18-21` |
 | `_default_evaluator` | Private async function | [c4-code-src-executionkit-patterns.md](c4-code-src-executionkit-patterns.md) → `refine_loop.py:98-116` |
 | `_parse_score` | Private function | [c4-code-src-executionkit-patterns.md](c4-code-src-executionkit-patterns.md) → `refine_loop.py:119-135` |
@@ -117,7 +119,7 @@ title: C4 Component — Reasoning Patterns
 ---
 flowchart TB
     subgraph ReasoningPatterns["Reasoning Patterns Component"]
-        BASE["base.py\nchecked_complete()\n_TrackedProvider\nvalidate_score()"]
+        BASE["base.py\nchecked_complete()\n_check_budget() / _BUDGET_FIELD_LABELS\n_TrackedProvider (supports_tools: property)\nvalidate_score()"]
         CONS["consensus.py\nconsensus()"]
         REFINE["refine_loop.py\nrefine_loop()\n_default_evaluator()\n_parse_score()\n_build_refinement_prompt()"]
         REACT["react_loop.py\nreact_loop()\n_execute_tool_call()\n_truncate()"]
diff --git a/docs/c4/c4-component.md b/docs/c4/c4-component.md
index e169ab4..faf4334 100644
--- a/docs/c4/c4-component.md
+++ b/docs/c4/c4-component.md
@@ -6,7 +6,7 @@ This document is the master index for all C4 Component-level documentation for t
 
 | # | Component | Files | Responsibility |
 |---|-----------|-------|----------------|
-| 1 | [Provider Layer](c4-component-provider-layer.md) | `provider.py`, `types.py` | LLM provider protocols, concrete HTTP client, all data types, error hierarchy |
+| 1 | [Provider Layer](c4-component-provider-layer.md) | `provider.py`, `errors.py`, `types.py` | LLM provider protocols, concrete HTTP client, all data types, error hierarchy |
 | 2 | [Execution Engine](c4-component-execution-engine.md) | `engine/retry.py`, `engine/parallel.py`, `engine/convergence.py`, `engine/json_extraction.py` | Retry/backoff, bounded concurrency, convergence detection, JSON extraction |
 | 3 | [Reasoning Patterns](c4-component-reasoning-patterns.md) | `patterns/consensus.py`, `patterns/refine_loop.py`, `patterns/react_loop.py`, `patterns/base.py` | Three composable LLM reasoning strategies with shared budget/cost base |
 | 4 | [Composition & Session](c4-component-composition-session.md) | `compose.py`, `kit.py`, `cost.py`, `__init__.py` (sync wrappers) | Pipeline chaining, session defaults, cost tracking, sync convenience API |
@@ -27,7 +27,7 @@ flowchart TB
         direction TB
 
         subgraph PL["Provider Layer"]
-            PLcore["LLMProvider / ToolCallingProvider\nProvider (HTTP client)\nLLMResponse / ToolCall\nPatternResult / TokenUsage / Tool\nVotingStrategy / Evaluator\nException hierarchy"]
+            PLcore["LLMProvider / ToolCallingProvider\nProvider (HTTP client) / _classify_http_error\nLLMResponse / ToolCall\nPatternResult / TokenUsage / Tool\nVotingStrategy / Evaluator\nerrors.py: Exception hierarchy (9 classes)\nprovider.py re-exports exceptions via Name as Name"]
         end
 
         subgraph EE["Execution Engine"]

From e443e33d755e6890b9142f65ae14d2582a4e565a Mon Sep 17 00:00:00 2001
From: "anthropic-code-agent[bot]" <242468646+Claude@users.noreply.github.com>
Date: Sat, 11 Apr 2026 22:55:37 +0000
Subject: [PATCH 6/6] docs: fix documentation inconsistencies per code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Corrects 5 documentation issues identified by @copilot-pull-request-reviewer:

1. _classify_http_error signature: Updated from incorrect (status_code, payload, headers) → ExecutionKitError to actual (status: int, raw: dict, retry_after: float, *, cause: BaseException) → NoReturn. Removed non-existent _parse_retry_after dependency.

2. _check_budget signature: Updated from incorrect (tracker: CostTracker, budget: TokenUsage) to actual (budget: TokenUsage, current: TokenUsage, fields: tuple[str, ...], *, sentinel_suffix: str, exceeded_suffix: str).

3. RateLimitError.retry_after type: Changed from float | None to float in both c4-component-provider-layer.md and c4-code-src-executionkit.md to match implementation (defaults to 1.0, never None).

4. isinstance comment in provider.py: Clarified that @runtime_checkable only checks attribute presence, not value. Simplified explanation to avoid implying isinstance reflects tool capability.

All changes verified against actual implementations in errors.py, provider.py, and patterns/base.py.

Agent-Logs-Url: https://github.com/tafreeman/executionkit/sessions/9d19e401-43ac-4ffc-a415-7a20d7589d23

Co-authored-by: tafreeman <72930272+tafreeman@users.noreply.github.com>
---
 docs/c4/c4-code-src-executionkit-patterns.md | 13 ++++++++-----
 docs/c4/c4-code-src-executionkit.md          | 17 +++++++++--------
 docs/c4/c4-component-provider-layer.md       |  2 +-
 executionkit/provider.py                     |  7 ++++---
 4 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/docs/c4/c4-code-src-executionkit-patterns.md b/docs/c4/c4-code-src-executionkit-patterns.md
index 97e6447..c499ae0 100644
--- a/docs/c4/c4-code-src-executionkit-patterns.md
+++ b/docs/c4/c4-code-src-executionkit-patterns.md
@@ -35,15 +35,18 @@
 - **Return Type**: `LLMResponse` - Response from the LLM provider
 - **Raises**: `BudgetExhaustedError` if any budget constraint would be exceeded
 
-#### `_check_budget(tracker: CostTracker, budget: TokenUsage) -> None`
-- **Description**: Validates that the current accumulated cost does not exceed any field of the budget constraint. Replaces 8 per-field `if`-blocks with a single field loop using `getattr()` over `_BUDGET_FIELD_LABELS`. Raises `BudgetExhaustedError` with a descriptive message naming the exceeded field if any constraint is violated.
+#### `_check_budget(budget: TokenUsage, current: TokenUsage, fields: tuple[str, ...], *, sentinel_suffix: str, exceeded_suffix: str) -> None`
+- **Description**: Validates selected `TokenUsage` fields by comparing the configured `budget` against the current accumulated `TokenUsage`. Iterates over the supplied `fields` and raises `BudgetExhaustedError` with a descriptive message if a field has reached a sentinel condition or would exceed its allowed limit.
 - **Location**: `base.py`
-- **Dependencies**: `CostTracker`, `TokenUsage`, `BudgetExhaustedError`, `_BUDGET_FIELD_LABELS`
+- **Dependencies**: `TokenUsage`, `BudgetExhaustedError`, `_BUDGET_FIELD_LABELS`
 - **Parameters**:
-  - `tracker: CostTracker` - Current accumulated cost tracker
   - `budget: TokenUsage` - Maximum allowed token/call counts
+  - `current: TokenUsage` - Current accumulated token/call usage to validate against the budget
+  - `fields: tuple[str, ...]` - Names of the `TokenUsage` fields to check
+  - `sentinel_suffix: str` - Message suffix used when a budget field is already at its sentinel/exhausted value
+  - `exceeded_suffix: str` - Message suffix used when the current usage would exceed the configured budget
 - **Return Type**: `None`
-- **Raises**: `BudgetExhaustedError` naming the exceeded field (e.g., "input_tokens", "llm_calls")
+- **Raises**: `BudgetExhaustedError` naming the field that hit a sentinel condition or exceeded its budget (e.g., "input_tokens", "llm_calls")
 
 #### `_BUDGET_FIELD_LABELS`
 - **Description**: Module-level dict mapping `TokenUsage` field names to human-readable label strings used in `BudgetExhaustedError` messages. Drives the field-loop in `_check_budget`, making it easy to add new budget dimensions without modifying control flow.
diff --git a/docs/c4/c4-code-src-executionkit.md b/docs/c4/c4-code-src-executionkit.md
index 8eda740..94e8981 100644
--- a/docs/c4/c4-code-src-executionkit.md
+++ b/docs/c4/c4-code-src-executionkit.md
@@ -107,7 +107,7 @@
 - **Exports**:
   - `ExecutionKitError` — base exception carrying `cost: TokenUsage` and `metadata: dict`
   - `LLMError` — base for provider/transport failures
-  - `RateLimitError` — HTTP 429; includes `retry_after: float | None`
+  - `RateLimitError` — HTTP 429; includes `retry_after: float`
   - `PermanentError` — non-retryable errors (auth failure, 404)
   - `ProviderError` — retryable errors (5xx, network timeout)
   - `PatternError` — base for pattern-level failures
@@ -431,15 +431,16 @@
 
 #### Helper Functions in `provider.py` (private utilities)
 
-##### `_classify_http_error(status_code: int, payload: dict[str, Any], headers: Any) -> ExecutionKitError`
+##### `_classify_http_error(status: int, raw: dict[str, Any], retry_after: float, *, cause: BaseException) -> NoReturn`
 - **Location**: `executionkit/provider.py`
-- **Description**: Centralizes HTTP status code → exception mapping; converts an HTTP error response into the appropriate typed exception (`RateLimitError` for 429, `PermanentError` for 4xx, `ProviderError` for 5xx). Previously this logic was duplicated inside both `_post_httpx` and `_post_urllib`; extracting it eliminates the duplication and ensures consistent error semantics regardless of which HTTP backend is used.
+- **Description**: Centralizes HTTP status code → exception mapping by raising the appropriate typed exception for an HTTP failure (`RateLimitError` for 429, `PermanentError` for other 4xx responses, `ProviderError` for 5xx responses). This helper is used by the HTTP backends so they share identical error classification behavior, and it preserves the original triggering exception via exception chaining when `cause` is provided.
 - **Parameters**:
-  - `status_code: int` - HTTP response status code
-  - `payload: dict[str, Any]` - Parsed response body
-  - `headers: Any` - Response headers (used to extract `Retry-After` for 429 responses)
-- **Returns**: A typed `ExecutionKitError` subclass instance (never raises)
-- **Dependencies**: `RateLimitError`, `PermanentError`, `ProviderError`, `_format_http_error`, `_parse_retry_after`
+  - `status: int` - HTTP response status code
+  - `raw: dict[str, Any]` - Parsed JSON body from the response (may be empty dict)
+  - `retry_after: float` - Value of the `Retry-After` header in seconds
+  - `cause: BaseException` - Original exception to chain as the raised error's cause
+- **Raises**: Always raises a typed `ExecutionKitError` subclass; does not return
+- **Dependencies**: `RateLimitError`, `PermanentError`, `ProviderError`, `_format_http_error`
 
 ##### `_first_choice(data: dict[str, Any]) -> dict[str, Any]`
 - **Location**: `executionkit/provider.py:195-202`
diff --git a/docs/c4/c4-component-provider-layer.md b/docs/c4/c4-component-provider-layer.md
index f4a9185..0f28fb8 100644
--- a/docs/c4/c4-component-provider-layer.md
+++ b/docs/c4/c4-component-provider-layer.md
@@ -162,7 +162,7 @@ classDiagram
         }
         class RateLimitError {
             <<Exception>>
-            +retry_after: float | None
+            +retry_after: float
         }
         class PermanentError {
             <<Exception>>
diff --git a/executionkit/provider.py b/executionkit/provider.py
index e880527..953e32d 100644
--- a/executionkit/provider.py
+++ b/executionkit/provider.py
@@ -171,9 +171,10 @@ class Provider:
     # this attribute verbatim — delegate instead:
     #   @property
     #   def supports_tools(self) -> bool: return self._inner.supports_tools
-    # Hardcoding True in a wrapper causes isinstance(wrapper, ToolCallingProvider)
-    # to return True even when the inner provider cannot handle tools.
-    # Ref: PEP 544 runtime_checkable only checks presence, not value.
+    # For @runtime_checkable protocols, isinstance(wrapper, ToolCallingProvider)
+    # only checks that the required attribute exists, not whether its value is
+    # True. Delegating keeps the wrapper's reported capability aligned with the
+    # inner provider's actual tool support.
     supports_tools: Literal[True] = field(default=True, init=False)
     # Derived state — excluded from repr/eq/hash; initialized only in __post_init__
     _client: Any = field(