# Tool Filtering: Token Savings Verification

Verify that filtering tools via middleware actually reduces input tokens.

**Hypothesis**: Hiding unused tools from the LLM saves tokens because tool definitions
aren't included in the request.

**Method**:

1. Create agent with many tools (10+)
2. Compare input tokens: all tools visible vs filtered (tool_search only)
3. Calculate savings


## Setup


In [1]:
from collections.abc import Callable, Sequence
from typing import Any

from dotenv import load_dotenv
from langchain.agents import create_agent
from langchain.agents.middleware.types import (
    AgentMiddleware,
    AgentState,
    ModelRequest,
    ModelResponse,
)
from langchain_anthropic import ChatAnthropic
from langchain_core.tools import BaseTool, tool
from langgraph.graph.state import CompiledStateGraph

load_dotenv()

True

## Define Many Tools

Create 10 tools with varying description lengths to simulate a real registry.


In [2]:
@tool
def get_weather(city: str) -> str:
    """Get current weather conditions for a specified city including temperature, humidity, and conditions."""
    return f"Weather in {city}: Sunny, 22°C, 65% humidity"


@tool
def get_forecast(city: str, days: int = 3) -> str:
    """Get weather forecast for a city for the specified number of days ahead."""
    return f"{days}-day forecast for {city}: Sunny → Cloudy → Rain"


@tool
def send_email(to: str, subject: str, body: str) -> str:
    """Send an email to a recipient with the specified subject and body content."""
    return f"Email sent to {to}: {subject}"


@tool
def read_emails(folder: str = "inbox", limit: int = 10) -> str:
    """Read emails from a specified folder with optional limit on number of results."""
    return f"{limit} emails from {folder}"


@tool
def create_calendar_event(title: str, date: str, duration_minutes: int = 60) -> str:
    """Create a new calendar event with the specified title, date, and duration."""
    return f"Event '{title}' created on {date} for {duration_minutes} minutes"


@tool
def list_calendar_events(date: str) -> str:
    """List all calendar events for a specific date."""
    return f"Events on {date}: Meeting at 10am, Lunch at 12pm"


@tool
def query_sales(region: str, year: int = 2024) -> str:
    """Query sales data for a specific region and year from the database."""
    return f"Sales for {region} in {year}: $1.2M revenue, 150 deals"


@tool
def generate_report(report_type: str, format: str = "pdf") -> str:
    """Generate a business report of the specified type in the requested format."""
    return f"Generated {report_type} report in {format} format"


@tool
def search_documents(query: str, max_results: int = 5) -> str:
    """Search internal documents and knowledge base for relevant information."""
    return f"Found {max_results} documents matching '{query}'"


@tool
def translate_text(text: str, target_language: str) -> str:
    """Translate text to the specified target language using the translation service."""
    return f"Translated to {target_language}: {text[:20]}..."


# Tool registry
TOOL_REGISTRY: dict[str, BaseTool] = {
    t.name: t
    for t in [
        get_weather,
        get_forecast,
        send_email,
        read_emails,
        create_calendar_event,
        list_calendar_events,
        query_sales,
        generate_report,
        search_documents,
        translate_text,
    ]
}

print(f"Registered {len(TOOL_REGISTRY)} tools: {list(TOOL_REGISTRY.keys())}")

Registered 10 tools: ['get_weather', 'get_forecast', 'send_email', 'read_emails', 'create_calendar_event', 'list_calendar_events', 'query_sales', 'generate_report', 'search_documents', 'translate_text']


## Token Tracking Middleware


In [3]:
class TokenTracker:
    """Track token usage across calls."""

    def __init__(self) -> None:
        self.calls: list[dict[str, Any]] = []

    def record(self, input_tokens: int, output_tokens: int, tool_count: int) -> None:
        self.calls.append({"input_tokens": input_tokens, "output_tokens": output_tokens, "tool_count": tool_count})

    def total_input_tokens(self) -> int:
        return sum(c["input_tokens"] for c in self.calls)

    def reset(self) -> None:
        self.calls = []


tracker = TokenTracker()

In [4]:
def get_tool_name(t: BaseTool | dict[str, Any]) -> str:
    if isinstance(t, BaseTool):
        return t.name
    return str(t.get("name", "?"))


class TokenLoggingMiddleware(AgentMiddleware[AgentState[Any], Any]):
    """Middleware that logs token usage and tool count."""

    def __init__(self, tracker: TokenTracker):
        self.tracker = tracker
        self._last_tool_count = 0

    def wrap_model_call(
        self,
        request: ModelRequest,
        handler: Callable[[ModelRequest], ModelResponse],
    ) -> ModelResponse:
        self._last_tool_count = len(request.tools)
        return handler(request)

    def after_model(self, state: AgentState[Any], runtime: Any) -> dict[str, Any] | None:  # noqa: ARG002
        """Log token usage after model call."""
        messages = state.get("messages", [])
        if messages:
            last_msg = messages[-1]
            usage = getattr(last_msg, "usage_metadata", None)
            if usage:
                input_tokens = usage.get("input_tokens", 0)
                output_tokens = usage.get("output_tokens", 0)
                self.tracker.record(
                    input_tokens=input_tokens,
                    output_tokens=output_tokens,
                    tool_count=self._last_tool_count,
                )
                print(
                    f"  → {input_tokens:,} input tokens, "
                    f"{output_tokens:,} output tokens, "
                    f"{self._last_tool_count} tools visible"
                )
        return None

## Filter Middleware


In [5]:
class ToolFilterMiddleware(AgentMiddleware[AgentState[Any], Any]):
    """Middleware that filters tools to simulate tool_search discovery."""

    def __init__(self, visible_tools: set[str] | None = None):
        # None = show all tools, empty set = show none, set = show only those
        self.visible_tools = visible_tools

    def _filter_tools(self, tools: Sequence[BaseTool | dict[str, Any]]) -> list[BaseTool | dict[str, Any]]:
        if self.visible_tools is None:
            return list(tools)
        return [t for t in tools if get_tool_name(t) in self.visible_tools]

    def wrap_model_call(
        self,
        request: ModelRequest,
        handler: Callable[[ModelRequest], ModelResponse],
    ) -> ModelResponse:
        filtered = self._filter_tools(request.tools)
        request = request.override(tools=filtered)
        return handler(request)

## Test 1: All Tools Visible

Create agent with all 10 tools visible to the LLM.


In [6]:
model = ChatAnthropic(model="claude-sonnet-4-5-20250929")

tracker.reset()
filter_all = ToolFilterMiddleware(visible_tools=None)  # Show all
logging_middleware = TokenLoggingMiddleware(tracker)

agent_all_tools: CompiledStateGraph[Any] = create_agent(
    model=model,
    tools=list(TOOL_REGISTRY.values()),
    system_prompt="You are a helpful assistant. Answer questions directly without using tools unless necessary.",
    middleware=[filter_all, logging_middleware],
)

print("Agent created with all tools visible")

Agent created with all tools visible


In [7]:
# Simple question that doesn't require tools
print("Question: What is 2+2?")
for chunk in agent_all_tools.stream({"messages": [{"role": "user", "content": "What is 2+2?"}]}):
    if "model" in chunk:
        msg = chunk["model"]["messages"][0]
        if hasattr(msg, "content") and isinstance(msg.content, str):
            print(f"Answer: {msg.content}")

all_tools_tokens = tracker.total_input_tokens()
print(f"\nTotal input tokens (all tools): {all_tools_tokens:,}")

Question: What is 2+2?


Answer: 2 + 2 = 4
  → 1,292 input tokens, 13 output tokens, 10 tools visible

Total input tokens (all tools): 1,292


## Test 2: No Tools Visible (Filtered)

Create agent with tools registered but filtered to show none.


In [8]:
tracker.reset()
filter_none = ToolFilterMiddleware(visible_tools=set())  # Show none
logging_middleware2 = TokenLoggingMiddleware(tracker)

agent_no_tools: CompiledStateGraph[Any] = create_agent(
    model=model,
    tools=list(TOOL_REGISTRY.values()),  # Same tools registered
    system_prompt="You are a helpful assistant. Answer questions directly without using tools unless necessary.",
    middleware=[filter_none, logging_middleware2],
)

print("Agent created with no tools visible (filtered)")

Agent created with no tools visible (filtered)


In [9]:
# Same question
print("Question: What is 2+2?")
for chunk in agent_no_tools.stream({"messages": [{"role": "user", "content": "What is 2+2?"}]}):
    if "model" in chunk:
        msg = chunk["model"]["messages"][0]
        if hasattr(msg, "content") and isinstance(msg.content, str):
            print(f"Answer: {msg.content}")

no_tools_tokens = tracker.total_input_tokens()
print(f"\nTotal input tokens (no tools): {no_tools_tokens:,}")

Question: What is 2+2?


Answer: 2 + 2 = 4
  → 29 input tokens, 13 output tokens, 0 tools visible

Total input tokens (no tools): 29


## Results


In [10]:
savings = all_tools_tokens - no_tools_tokens
savings_pct = (savings / all_tools_tokens * 100) if all_tools_tokens > 0 else 0

print("=" * 50)
print("TOKEN USAGE COMPARISON")
print("=" * 50)
print(f"All tools visible (10 tools):  {all_tools_tokens:,} tokens")
print(f"No tools visible (filtered):   {no_tools_tokens:,} tokens")
print()
print(f"Savings: {savings:,} tokens ({savings_pct:.1f}%)")
print(f"Tokens per tool (avg): ~{savings // 10:,} tokens/tool")
print("=" * 50)

TOKEN USAGE COMPARISON
All tools visible (10 tools):  1,292 tokens
No tools visible (filtered):   29 tokens

Savings: 1,263 tokens (97.8%)
Tokens per tool (avg): ~126 tokens/tool


## Conclusion

Tool filtering via middleware **does** reduce input tokens. Each tool definition
consumes tokens for:

- Tool name
- Description
- Parameter schemas (names, types, descriptions)

For agents with many tools (30-50+), the savings compound significantly across
multiple turns in a conversation.
