# Agent Middleware Essentials

Add production-ready middleware for message management, limits, fallbacks, and dynamic prompts.

**What you'll learn:**
- Middleware adds production capabilities without changing agent logic
- Trim messages keeps recent messages within context window
- Delete messages removes specific or all messages from state
- SummarizationMiddleware prevents context overflow with summaries
- TodoListMiddleware provides task planning and tracking
- Limits control costs and API usage
- Fallbacks improve reliability
- Dynamic prompts enable context-aware behavior
- ShellToolMiddleware enables command execution
- FilesystemFileSearchMiddleware provides file search capabilities

In [1]:
import sys
sys.path.append('../')

import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.agents import create_agent
from langchain.messages import HumanMessage
from langgraph.checkpoint.sqlite import SqliteSaver
import sqlite3
from scripts import base_tools

In [3]:
model = ChatGoogleGenerativeAI(model='gemini-2.5-flash')

# Setup checkpointer
conn = sqlite3.connect("db/middleware_agent.db", check_same_thread=False)
checkpointer = SqliteSaver(conn)
checkpointer.setup()

## Trim Messages

Keep only recent messages to fit context window.

In [4]:
from langchain.messages import RemoveMessage
from langgraph.graph.message import REMOVE_ALL_MESSAGES
from langchain.agents import AgentState
from langchain.agents.middleware import before_model
from langgraph.runtime import Runtime
from typing import Any

@before_model
def trim_messages(state: AgentState, runtime: Runtime):
    """Keep only the last few messages to fit context window."""
    messages = state["messages"]

    if len(messages) <= 3:
        return None  # No changes needed

    first_msg = messages[0]
    recent_messages = messages[-3:] if len(messages) % 2 == 0 else messages[-4:]
    new_messages = [first_msg] + recent_messages

    return {
        "messages": [
            RemoveMessage(id=REMOVE_ALL_MESSAGES),
            *new_messages
        ]
    }

agent = create_agent(
    model=model,
    tools=[],
    middleware=[trim_messages],
    checkpointer=checkpointer
)

config = {"configurable": {"thread_id": "trim_session"}}

agent.invoke({"messages": "hi, my name is Laxmi Kant"}, config)
agent.invoke({"messages": "write a short poem about cats"}, config)
agent.invoke({"messages": "now do the same but for dogs"}, config)
response = agent.invoke({"messages": "what's my name?"}, config)

response['messages'][-1].content

'Your name is **Laxmi Kant**.'

## Delete Messages

Remove specific messages or clear entire history.

In [5]:
from langchain.agents.middleware import after_model

@after_model
def delete_old_messages(state: AgentState, runtime: Runtime):
    """Remove old messages to keep conversation manageable."""
    messages = state["messages"]
    if len(messages) > 2:
        # Remove the earliest two messages
        return {"messages": [RemoveMessage(id=m.id) for m in messages[:2]]}
    return None

agent = create_agent(
    model=model,
    tools=[],
    middleware=[delete_old_messages],
    checkpointer=checkpointer
)

config = {"configurable": {"thread_id": "delete_session"}}

agent.invoke({"messages": "hi! I'm Laxmi Kant"}, config)
response = agent.invoke({"messages": "what's my name?"}, config)

response['messages'][-1].content

'Your name is Laxmi Kant.'

## SummarizationMiddleware

Automatically compress long conversations using summaries.

In [6]:
from langchain.agents.middleware import SummarizationMiddleware

agent = create_agent(
    model=model,
    tools=[base_tools.web_search],
    checkpointer=checkpointer,
    middleware=[
        SummarizationMiddleware(
            model=ChatGoogleGenerativeAI(model='gemini-2.5-flash'),
            trigger=[("messages", 15)],  # Summarize when > 15 messages
            keep=("messages", 5)  # Keep last 5 unsummarized
        )
    ]
)

config = {'configurable': {'thread_id': 'summary_session'}}
response = agent.invoke({
    'messages': [HumanMessage(
        "Search for Apple, Microsoft, and Tesla stock news"
    )]
}, config)

len(response['messages'])

8

## TodoListMiddleware

Equip agents with task planning and tracking for complex multi-step tasks.

In [7]:
from langchain.agents.middleware import TodoListMiddleware
from langchain.tools import tool

@tool
def read_file(path: str):
    """Read file contents."""
    try:
        with open(path, 'r') as f:
            return f.read()
    except Exception as e:
        return f"Error reading file: {e}"

@tool
def write_file(path: str, content: str):
    """Write content to file."""
    try:
        with open(path, 'w') as f:
            f.write(content)
        return f"Successfully wrote to {path}"
    except Exception as e:
        return f"Error writing file: {e}"

agent = create_agent(
    model=model,
    tools=[read_file, write_file],
    middleware=[TodoListMiddleware()],
    checkpointer=checkpointer
)

config = {'configurable': {'thread_id': 'todo_session'}}
response = agent.invoke({
    'messages': [HumanMessage(
        "Create a new file called test.txt with 'Hello World', then read it back"
    )]
}, config)

response['messages'][-1].content

[{'type': 'text',
  'text': "I created the file `test.txt` with the content 'Hello World' and then read it back. The content of the file is: 'Hello World'.",
  'extras': {'signature': 'CsEBAXLI2nyIB19MrS+AOeEEyL5712YVENaxwfKzYTBxk/JwYQtNvC/OEo0CVr0kxyazjd7NhVQLH8y0NHHbQ56CMcs8YxzaTo+gYZL7iMQbCJg0uaIctsRzi8XWd/KCpSlX4GyACk911lCe0/h37Q+SPvyHp5xhcNrM7OX6V7ebdVA+GEj2xN2XkRhEIooukryEgGztJOLhmu4/BLUgUcSjE2ee2W6C9UyQnRM1gNqNrzfgmFdvPCchqimpmCnJOIjZ+A=='}}]

## ModelCallLimitMiddleware

Prevent runaway costs by limiting model calls.

In [8]:
from langchain.agents.middleware import ModelCallLimitMiddleware

agent = create_agent(
    model=model,
    tools=[base_tools.web_search],
    middleware=[
        ModelCallLimitMiddleware(
            run_limit=2,  # Max 2 model calls
            exit_behavior="end"  # Stop when limit reached
        )
    ]
)

response = agent.invoke({
    'messages': [HumanMessage("Search for news on 5 different companies")]
})

response['messages'][-1].content

[{'type': 'text',
  'text': 'Please tell me the names of 5 companies you would like me to search for news about.',
  'extras': {'signature': 'Cu8EAXLI2nxqGce9c/lBXz1/icZ/QTzPcQRFJZcxYWXmvD0tzFGyDq+meL7EAR/mEolUS8Yv7w+lUa10c323XIJYKpEuRSgX8Uqr13m8aBi47lXLCHlDKBJrDqTX6/hQVnUl19lnKXEtcLNnXTd5KyCvC7L8y/QONGnqeKgptkicDoSU7TbKXekvT/d/0eSVLpF1Whh5AMqp0cqoLsGo07k4V5OPNe+BIILz4IrjF4xB3jgXpg8JWPTNJhMYnExScSsdbLP/0Scq1v7uUUouqw1aX3AR3mi6Wwg17/NjvLGEX/8emU3aWRg9pcHywLmYJ8d+r/7DkrSDlqK38FOUv/OUjc9aKoS0ldNwWf9dJpJm4QPLSWANYkX0Kfp6MNnGZ9JJ2L4T1qAfH7RHR5qsjp+Vo+1A2+aEdl9kZ7R8ayCGGB8MNzsAWV2ZbLiBe8EEzyRqfBpVR/WU/OR64xZ5aMI4r1pys3ZGqyzj9MpEcV/QBmuKRhJlWHKp1CudpwkLDV48W8qLbYmlNFi093tJqZDbqzrv3h5oat2FicP9hUbSU62ByXIfr/b881vWs2RID1EsBsgq6PnLFwI4YNzrIR/8Uq5ZBLNsUguFKckkLjL4hePQy0Z+0oqI1bSp1lWxDjtdpztQ2SInbvjFZ6rJZFTkwXTRcyG92tyxrDi56FYiQB9HqD/Jt5dkc8qkV76Y9405vCJwq0AmO5RWvpgLGQsI1S6jdPhWiuDBa4snyUXz57dTnhY00oY2sp+aYDfVWx8vfbzbAnN/1XDE4iuHzgcORy+34O69yYGYIzVHp+3CLxu6/P1V3fwHwMPrcCIH/J4='}}]

## ToolCallLimitMiddleware

Limit tool executions to manage API usage.

In [9]:
from langchain.agents.middleware import ToolCallLimitMiddleware

agent = create_agent(
    model=model,
    tools=[base_tools.web_search],
    middleware=[
        ToolCallLimitMiddleware(
            run_limit=2,
            exit_behavior="continue"  # Continue without more tools
        )
    ]
)

response = agent.invoke({
    'messages': [HumanMessage("Search for Apple, Microsoft, and Google news")]
})

response['messages'][-1].content

[{'type': 'text',
  'text': "I was unable to retrieve news for Google due to a tool call limit. I can try again if you'd like.\n\nHere's a summary of the news for Apple and Microsoft:\n\n**Apple News:**\n*   **Apple News+**: Get 3 months free with a new iPhone, iPad, or Mac. New subscribers get 1 month free, then $12.99/month. It's also included in the Apple One Premier plan.\n*   Apple News+ offers access to over 500 leading publications, local, national, and international news, daily puzzles, and audio stories.\n*   You can share your subscription with family, download issues for offline reading, and access it across various Apple devices (iPhone, Mac, iPad, CarPlay, HomePod, Apple Watch).\n*   Apple News uses on-device intelligence for story recommendations and respects user privacy.\n*   To view Apple News Top Stories, you need to open the link on an iPhone or iPad with iOS 9+ or a Mac with macOS 10.14+ and Apple News.\n\n**Microsoft News:**\n*   **AI Capabilities for Retail**: Mic

## ModelFallbackMiddleware

Fallback to alternate model on failure or for cost optimization.

In [10]:
from langchain.agents.middleware import ModelFallbackMiddleware

fallback_model = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp')

agent = create_agent(
    model=model,
    tools=[base_tools.web_search],
    middleware=[ModelFallbackMiddleware(fallback_model)]
)

## Dynamic System Prompt

Modify system prompt based on runtime context.

In [11]:
from typing import TypedDict
from langchain.agents.middleware import dynamic_prompt, ModelRequest

class Context(TypedDict):
    user_role: str

@dynamic_prompt
def user_role_prompt(request: ModelRequest):
    """Generate system prompt based on user role."""
    user_role = request.runtime.context.get("user_role", "user")
    base_prompt = "You are a helpful assistant."
    
    if user_role == "expert":
        return f"{base_prompt} Provide detailed technical responses."
    elif user_role == "beginner":
        return f"{base_prompt} Explain concepts simply and avoid jargon."
    
    return base_prompt

agent = create_agent(
    model=model,
    tools=[base_tools.web_search],
    middleware=[user_role_prompt],
    context_schema=Context
)

In [12]:
# Test with expert context
response = agent.invoke(
    {"messages": [{"role": "user", "content": "Explain machine learning"}]},
    context={"user_role": "expert"}
)

response['messages'][-1].content

[{'type': 'text',
  'text': 'Machine learning is a subset of artificial intelligence (AI) that enables systems to learn from data, identify patterns, and make decisions or predictions with minimal human intervention. Instead of being explicitly programmed for every task, machine learning algorithms are trained on large datasets, allowing them to "learn" and improve their performance over time.\n\nHere\'s a breakdown of its core concepts:\n\n*   **Learning from Data:** The fundamental idea behind machine learning is to build models that can automatically discover insights and relationships within data. These models are exposed to vast amounts of data, which they analyze to find patterns, correlations, and structures.\n*   **Algorithms:** Machine learning relies on various algorithms, which are sets of rules or instructions that the system follows to learn from data. Examples include linear regression, decision trees, support vector machines, neural networks, and k-means clustering.\n*  

In [13]:
# Test with beginner context
response = agent.invoke(
    {"messages": [{"role": "user", "content": "Explain machine learning"}]},
    context={"user_role": "beginner"}
)

response['messages'][-1].content

[{'type': 'text',
  'text': 'Imagine you want to teach a computer to recognize a cat in a picture. Instead of writing a long list of rules for every possible cat feature (like "pointy ears," "whiskers," "furry"), you can use machine learning.\n\nHere\'s the basic idea:\n\n*   **Learning from examples:** You show the computer many pictures, some with cats and some without. For each picture, you tell the computer whether it contains a cat or not.\n*   **Finding patterns:** The computer then analyzes these examples and tries to find patterns and relationships that help it tell the difference between cat pictures and non-cat pictures. It\'s like the computer "learns" what a cat looks like on its own.\n*   **Making predictions:** Once the computer has learned, you can show it a brand new picture it\'s never seen before, and it will use what it learned to predict whether there\'s a cat in that picture.\n\nIn simpler terms, **machine learning is a way of teaching computers to learn from data 

## ShellToolMiddleware

Expose persistent shell session for command execution.

In [14]:
from langchain.agents.middleware import ShellToolMiddleware, HostExecutionPolicy

# Basic shell with host execution
agent = create_agent(
    model=model,
    tools=[],
    middleware=[
        ShellToolMiddleware(
            workspace_root="./workspace",
            execution_policy=HostExecutionPolicy(),
        )
    ]
)

response = agent.invoke({
    'messages': [HumanMessage("List files in the current directory")]
})

response['messages'][-1].content

Starting shell session failed; cleaning up resources.
Traceback (most recent call last):
  File "d:\Courses\Udemy\AI Agent Projects\.venv\Lib\site-packages\langchain\agents\middleware\shell_tool.py", line 676, in _create_resources
    session.start()
  File "d:\Courses\Udemy\AI Agent Projects\.venv\Lib\site-packages\langchain\agents\middleware\shell_tool.py", line 141, in start
    self._process = self._policy.spawn(
                    ^^^^^^^^^^^^^^^^^^^
  File "d:\Courses\Udemy\AI Agent Projects\.venv\Lib\site-packages\langchain\agents\middleware\_execution.py", line 136, in spawn
    process = _launch_subprocess(
              ^^^^^^^^^^^^^^^^^^^
  File "d:\Courses\Udemy\AI Agent Projects\.venv\Lib\site-packages\langchain\agents\middleware\_execution.py", line 33, in _launch_subprocess
    return subprocess.Popen(  # noqa: S603
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\laxmi\AppData\Roaming\uv\python\cpython-3.11.14-windows-x86_64-none\Lib\subprocess.py", line 102

FileNotFoundError: [WinError 2] The system cannot find the file specified

In [None]:
# Shell with startup commands
agent = create_agent(
    model=model,
    tools=[],
    middleware=[
        ShellToolMiddleware(
            workspace_root="./workspace",
            startup_commands=["echo 'Shell initialized'"],
            execution_policy=HostExecutionPolicy(),
        )
    ]
)

response = agent.invoke({
    'messages': [HumanMessage("Create a directory called 'test_dir'")]
})

response['messages'][-1].content

## FilesystemFileSearchMiddleware

Provide Glob and Grep search tools over filesystem.

In [None]:
from langchain.agents.middleware import FilesystemFileSearchMiddleware

agent = create_agent(
    model=model,
    tools=[],
    middleware=[
        FilesystemFileSearchMiddleware(
            root_path="../",
            use_ripgrep=True,
            max_file_size_mb=10,
        )
    ]
)

response = agent.invoke({
    'messages': [HumanMessage("Find all Python files in this directory")]
})

response['messages'][-1].content

In [None]:
# Search for specific content
response = agent.invoke({
    'messages': [HumanMessage("Find files containing 'create_agent'")]
})

response['messages'][-1].content

## Combining Multiple Middleware

In [None]:
# Production agent with stacked middleware
agent = create_agent(
    model=model,
    tools=[base_tools.web_search],
    checkpointer=checkpointer,
    middleware=[
        SummarizationMiddleware(
            model=ChatGoogleGenerativeAI(model='gemini-2.5-flash'),
            trigger=[("messages", 15)],
            keep=("messages", 5)
        ),
        TodoListMiddleware(),
        ModelCallLimitMiddleware(run_limit=3, exit_behavior="end"),
        ToolCallLimitMiddleware(run_limit=3, exit_behavior="continue"),
        ModelFallbackMiddleware(fallback_model)
    ]
)

config = {'configurable': {'thread_id': 'production'}}
response = agent.invoke({
    'messages': [HumanMessage("Analyze tech sector trends")]
}, config)

response['messages'][-1].content