toloco · toloco · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/docs/usage.md b/docs/usage.md
@@ -133,8 +133,8 @@ Each entry has a fixed slot size determined at creation time. Keys and values th
 
 | Parameter | Default | Description |
 |-----------|---------|-------------|
-| `max_key_size` | `512` bytes | Maximum pickle size of the key (args tuple) |
-| `max_value_size` | `4096` bytes | Maximum pickle size of the return value |
+| `max_key_size` | `512` bytes | Maximum serialized size of the key (args tuple) |
+| `max_value_size` | `4096` bytes | Maximum serialized size of the return value |
 
 ```python
 # Large values: increase max_value_size
@@ -185,6 +185,60 @@ with ThreadPoolExecutor(max_workers=8) as pool:
     results = list(pool.map(work, range(100)))
 ```
 
+## Key serialization behavior
+
+### How cache keys are formed
+
+Both backends build the cache key from `*args` and `**kwargs`:
+
+- **No kwargs (common path):** The `args` tuple is used directly as the key.
+- **With kwargs:** Keywords are sorted by name to ensure deterministic ordering,
+  then combined with args as `(args, tuple(sorted(kwargs.items())))`. This means
+  `fn(a=1, b=2)` and `fn(b=2, a=1)` always hit the same cache entry.
+
+Arguments must be **hashable** (memory backend) or **serializable** (shared backend).
+
+### Memory backend
+
+Keys are stored as Python objects on the heap — no serialization. Lookups use
+Python's built-in `hash()` and `==` via the C API. This is fast but means the
+cache is inherently single-process (Python object pointers are not meaningful
+across processes).
+
+### Shared backend
+
+Keys and values are serialized to bytes before storage. The serialization uses a
+**fast-path binary format** for common primitive types, falling back to **pickle**
+for everything else:
+
+| Type | Format | Size |
+|------|--------|------|
+| `None` | Tag byte | 1 byte |
+| `bool` | Tag byte | 1 byte |
+| `int` (fits i64) | Tag + little-endian i64 | 9 bytes |
+| `float` | Tag + IEEE 754 f64 | 9 bytes |
+| `str` | Tag + 4-byte length + UTF-8 | 5 + len bytes |
+| `bytes` | Tag + 4-byte length + data | 5 + len bytes |
+| Flat tuple of above | Tag + count + elements | varies |
+| Everything else | Pickle (protocol 5) | varies |
+
+The fast-path avoids pickle overhead entirely for the most common argument types.
+Large integers (outside i64 range), nested structures, dicts, sets, and custom
+objects fall back to pickle automatically.
+
+### Cross-process determinism
+
+The shared backend must ensure that the same function arguments produce the same
+cache key in every process. Python's `hash()` is randomized per-process
+(`PYTHONHASHSEED`), so the shared backend does **not** use it. Instead:
+
+1. Keys are serialized to a deterministic byte sequence (the binary format above)
+2. The bytes are hashed with **ahash** using fixed seeds (same seeds in every process)
+3. Lookups verify matches using byte-level comparison (`memcmp`), not Python equality
+
+This makes the shared backend completely immune to `PYTHONHASHSEED` — different
+processes with different hash seeds will always agree on cache entries.
+
 ## Decorator parameters
 
 | Parameter | Type | Default | Description |

diff --git a/pyproject.toml b/pyproject.toml
@@ -24,6 +24,9 @@ classifiers = [
     "Programming Language :: Rust",
     "Topic :: Software Development :: Libraries :: Python Modules",
 ]
+dependencies = [
+    "typing_extensions>=4.0; python_version < '3.10'",
+]
 
 [dependency-groups]
 dev = [

diff --git a/warp_cache/__init__.py b/warp_cache/__init__.py
@@ -1,5 +1,12 @@
-from warp_cache._decorator import cache
+from warp_cache._decorator import BaseCacheInfo, CachedCallable, cache
 from warp_cache._strategies import Backend
 from warp_cache._warp_cache_rs import CacheInfo, SharedCacheInfo
 
-__all__ = ["Backend", "cache", "CacheInfo", "SharedCacheInfo"]
+__all__ = [
+    "Backend",
+    "BaseCacheInfo",
+    "CachedCallable",
+    "CacheInfo",
+    "SharedCacheInfo",
+    "cache",
+]
diff --git a/warp_cache/_decorator.py b/warp_cache/_decorator.py
@@ -1,10 +1,16 @@
 from __future__ import annotations
 
 import asyncio
+import sys
 import warnings
 from collections.abc import Callable
 from typing import Any, TypeVar
 
+if sys.version_info >= (3, 10):
+    from typing import ParamSpec, Protocol, runtime_checkable
+else:
+    from typing_extensions import ParamSpec, Protocol, runtime_checkable
+
 from warp_cache._strategies import Backend
 from warp_cache._warp_cache_rs import (
     CachedFunction,
@@ -14,6 +20,30 @@
 )
 
 F = TypeVar("F", bound=Callable[..., Any])
+P = ParamSpec("P")
+R = TypeVar("R")
+
+
+@runtime_checkable
+class BaseCacheInfo(Protocol):
+    """Common interface for cache info objects from both backends."""
+
+    @property
+    def hits(self) -> int: ...
+    @property
+    def misses(self) -> int: ...
+    @property
+    def max_size(self) -> int: ...
+    @property
+    def current_size(self) -> int: ...
+
+
+class CachedCallable(Protocol[P, R]):
+    """Protocol for a cached function — preserves the original call signature."""
+
+    def __call__(self, *args: P.args, **kwargs: P.kwargs) -> R: ...
+    def cache_info(self) -> BaseCacheInfo: ...
+    def cache_clear(self) -> None: ...
 
 
 class AsyncCachedFunction:
@@ -48,7 +78,7 @@ def _make_inflight_key(
         return args
 
     async def __call__(self, *args: Any, **kwargs: Any) -> Any:
-        hit, cached = self._inner._probe(*args, **kwargs)  # type: ignore[unresolved-attribute]
+        hit, cached = self._inner._probe(*args, **kwargs)
         if hit:
             return cached
 
@@ -58,7 +88,7 @@ async def __call__(self, *args: Any, **kwargs: Any) -> Any:
             event = self._inflight.get(key)
             if event is not None:
                 await event.wait()
-                hit, cached = self._inner._probe(*args, **kwargs)  # type: ignore[unresolved-attribute]
+                hit, cached = self._inner._probe(*args, **kwargs)
                 if hit:
                     return cached
                 # Leader failed — loop back to check for a new leader
@@ -108,7 +138,7 @@ def cache(
     backend: str | int | Backend = Backend.MEMORY,
     max_key_size: int | None = None,
     max_value_size: int | None = None,
-) -> Callable[[F], F]:
+) -> Callable[[Callable[P, R]], CachedCallable[P, R]]:
     """Caching decorator backed by a Rust store.
 
     Supports both sync and async functions. The async detection happens
@@ -128,7 +158,7 @@ def cache(
     """
     resolved_backend = _resolve_backend(backend)
 
-    def decorator(fn: F) -> F:
+    def decorator(fn: Callable[P, R]) -> CachedCallable[P, R]:
         if resolved_backend == Backend.SHARED:
             inner = SharedCachedFunction(
                 fn,
@@ -153,6 +183,6 @@ def decorator(fn: F) -> F:
         if asyncio.iscoroutinefunction(fn):
             return AsyncCachedFunction(fn, inner)  # type: ignore[return-value]
 
-        return inner  # type: ignore[return-value]
+        return inner
 
     return decorator
diff --git a/warp_cache/_warp_cache_rs.pyi b/warp_cache/_warp_cache_rs.pyi
@@ -34,6 +34,7 @@ class CachedFunction:
     ) -> None: ...
     def __call__(self, *args: Any, **kwargs: Any) -> Any: ...
     def get(self, *args: Any, **kwargs: Any) -> Any | None: ...
+    def _probe(self, *args: Any, **kwargs: Any) -> tuple[bool, Any]: ...
     def set(self, value: Any, *args: Any, **kwargs: Any) -> None: ...
     def cache_info(self) -> CacheInfo: ...
     def cache_clear(self) -> None: ...
@@ -50,6 +51,7 @@ class SharedCachedFunction:
     ) -> None: ...
     def __call__(self, *args: Any, **kwargs: Any) -> Any: ...
     def get(self, *args: Any, **kwargs: Any) -> Any | None: ...
+    def _probe(self, *args: Any, **kwargs: Any) -> tuple[bool, Any]: ...
     def set(self, value: Any, *args: Any, **kwargs: Any) -> None: ...
     def cache_info(self) -> SharedCacheInfo: ...
     def cache_clear(self) -> None: ...