modelcontextprotocol · maxisbey · Jun 30, 2026 · Jun 29, 2026 · Jun 29, 2026 · Jun 29, 2026
diff --git a/docs/advanced/caching.md b/docs/advanced/caching.md
diff --git a/docs/migration.md b/docs/migration.md
@@ -427,6 +427,10 @@ On `ClientSession`, `call_tool` / `get_prompt` / `read_resource` still return th
 
 For protocol 2026-07-28 over Streamable HTTP, a tool's input-schema property may carry an `x-mcp-header` annotation. When a tool the client has listed is called, each annotated argument is mirrored into an `Mcp-Param-<name>` request header (string verbatim, integer as decimal, boolean as `true`/`false`, base64-sentinel-wrapped when not header-safe; `null`/absent arguments are omitted). The argument is also left in the request body. `list_tools` caches a tool's annotations, so list a tool before calling it to enable mirroring; a tool the client never listed emits no `Mcp-Param-*` headers. Other transports ignore the annotation.
 
+### `Client` verbs may serve cached responses ([SEP-2549](https://github.com/modelcontextprotocol/modelcontextprotocol/pull/2549))
+
+On protocol 2026-07-28, servers attach caching hints (`ttlMs`, `cacheScope`) to the cacheable results, and `Client` now honors them: `list_tools`, `list_prompts`, `list_resources`, `list_resource_templates`, and `read_resource` may serve a cached response instead of making a round trip, for as long as the server's `ttlMs` says the result is fresh. With the default configuration, servers that send no hints, including every pre-2026 server, see identical call-for-call behavior, because hint-less results are not cached (a `CacheConfig.default_ttl_ms` above zero caches them too). Pass `Client(..., cache=False)` to disable the cache and restore v1 behavior exactly; per-call control (`cache_mode`) and configuration (`CacheConfig`) are described in [Caching hints](advanced/caching.md).
+
 ### Server extensions API ([SEP-2133](https://github.com/modelcontextprotocol/modelcontextprotocol/pull/2133))
 
 `MCPServer` now accepts opt-in extensions that bundle MCP behaviour behind a

diff --git a/docs_src/caching/tutorial003.py b/docs_src/caching/tutorial003.py
@@ -1,15 +1,40 @@
+from dataclasses import dataclass
+from typing import Any
+
+from mcp_types import ListToolsResult, PaginatedRequestParams, Tool
+
 from mcp import Client
-from mcp.server import CacheHint, MCPServer
+from mcp.client import CacheConfig
+from mcp.server import CacheHint, Server, ServerRequestContext
+
+
+@dataclass
+class DemoState:
+    fetches: int = 0
+    now: float = 1_000_000.0
+
+
+state = DemoState()
+
 
-mcp = MCPServer("Weather", cache_hints={"tools/list": CacheHint(ttl_ms=60_000, scope="public")})
+async def list_tools(ctx: ServerRequestContext[Any], params: PaginatedRequestParams | None) -> ListToolsResult:
+    state.fetches += 1
+    return ListToolsResult(tools=[Tool(name="forecast", input_schema={"type": "object"})])
 
 
-@mcp.tool()
-def forecast(city: str) -> str:
-    return f"Sunny in {city}"
+server = Server(
+    "Weather",
+    on_list_tools=list_tools,
+    cache_hints={"tools/list": CacheHint(ttl_ms=60_000, scope="public")},
+)
 
 
 async def main() -> None:
-    async with Client(mcp) as client:
-        tools = await client.list_tools()
-        print(f"{len(tools.tools)} tools, fresh for {tools.ttl_ms / 1000:.0f}s, scope={tools.cache_scope}")
+    start = state.fetches
+    async with Client(server, cache=CacheConfig(clock=lambda: state.now)) as client:
+        await client.list_tools()  # fetch 1
+        await client.list_tools()  # fresh for 60s: served from the cache
+        state.now += 60.0
+        await client.list_tools()  # the TTL ran out: fetch 2
+        await client.list_tools(cache_mode="refresh")  # skip the cache read: fetch 3
+        print(f"4 calls, {state.fetches - start} fetches")
diff --git a/src/mcp-types/mcp_types/methods.py b/src/mcp-types/mcp_types/methods.py
@@ -13,7 +13,7 @@
 from collections.abc import Mapping
 from functools import cache
 from types import MappingProxyType, UnionType
-from typing import Any, Final, TypeVar
+from typing import Any, Final, Literal, TypeVar, get_args
 
 from pydantic import BaseModel, TypeAdapter
 
@@ -23,9 +23,11 @@
 from mcp_types.version import KNOWN_PROTOCOL_VERSIONS
 
 __all__ = [
+    "CACHEABLE_METHODS",
     "CLIENT_NOTIFICATIONS",
     "CLIENT_REQUESTS",
     "CLIENT_RESULTS",
+    "CacheableMethod",
     "MONOLITH_NOTIFICATIONS",
     "MONOLITH_REQUESTS",
     "MONOLITH_RESULTS",
@@ -404,6 +406,24 @@
 """Monolith result model (or two-arm union) per request method."""
 
 
+CacheableMethod = Literal[
+    "prompts/list",
+    "resources/list",
+    "resources/read",
+    "resources/templates/list",
+    "server/discover",
+    "tools/list",
+]
+"""Methods whose results carry `ttlMs`/`cacheScope`; hand-written Literal, welded to `CACHEABLE_METHODS` by tests."""
+
+CACHEABLE_METHODS: Final[frozenset[str]] = frozenset(
+    method
+    for method, row in MONOLITH_RESULTS.items()
+    if any(issubclass(arm, types.CacheableResult) for arm in (get_args(row) if isinstance(row, UnionType) else (row,)))
+)
+"""Runtime mirror of `CacheableMethod`, derived from `MONOLITH_RESULTS`."""
+
+
 # --- Parse functions ---
 
 # Envelope stubs merged into bodies for surface validation (surface classes are full frames).

diff --git a/src/mcp/client/__init__.py b/src/mcp/client/__init__.py
@@ -2,8 +2,28 @@
 
 from mcp.client._input_required import InputRequiredRoundsExceededError
 from mcp.client._transport import Transport
+from mcp.client.caching import (
+    CacheConfig,
+    CacheEntry,
+    CacheKey,
+    CacheMode,
+    InMemoryResponseCacheStore,
+    ResponseCacheStore,
+)
 from mcp.client.client import Client
 from mcp.client.context import ClientRequestContext
 from mcp.client.session import ClientSession
 
-__all__ = ["Client", "ClientRequestContext", "ClientSession", "InputRequiredRoundsExceededError", "Transport"]
+__all__ = [
+    "CacheConfig",
+    "CacheEntry",
+    "CacheKey",
+    "CacheMode",
+    "Client",
+    "ClientRequestContext",
+    "ClientSession",
+    "InMemoryResponseCacheStore",
+    "InputRequiredRoundsExceededError",
+    "ResponseCacheStore",
+    "Transport",
+]