# Agent Middleware Essentials

Add production-ready middleware for message management, limits, fallbacks, and dynamic prompts.

**What you'll learn:**
- Middleware adds production capabilities without changing agent logic
- Trim messages keeps recent messages within context window
- Delete messages removes specific or all messages from state
- SummarizationMiddleware prevents context overflow with summaries
- TodoListMiddleware provides task planning and tracking
- Limits control costs and API usage
- Fallbacks improve reliability
- Dynamic prompts enable context-aware behavior
- ShellToolMiddleware enables command execution
- FilesystemFileSearchMiddleware provides file search capabilities

In [1]:
import sys
sys.path.append('../')

import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.agents import create_agent
from langchain.messages import HumanMessage
from langgraph.checkpoint.sqlite import SqliteSaver
import sqlite3
from scripts import base_tools

In [3]:
model = ChatGoogleGenerativeAI(model='gemini-2.5-flash')

# Setup checkpointer
conn = sqlite3.connect("db/middleware_agent.db", check_same_thread=False)
checkpointer = SqliteSaver(conn)
checkpointer.setup()

## Trim Messages

Keep only recent messages to fit context window.

In [4]:
from langchain.messages import RemoveMessage
from langgraph.graph.message import REMOVE_ALL_MESSAGES
from langchain.agents import AgentState
from langchain.agents.middleware import before_model
from langgraph.runtime import Runtime
from typing import Any

@before_model
def trim_messages(state: AgentState, runtime: Runtime):
    """Keep only the last few messages to fit context window."""
    messages = state["messages"]

    if len(messages) <= 3:
        return None  # No changes needed

    first_msg = messages[0]
    recent_messages = messages[-3:] if len(messages) % 2 == 0 else messages[-4:]
    new_messages = [first_msg] + recent_messages

    return {
        "messages": [
            RemoveMessage(id=REMOVE_ALL_MESSAGES),
            *new_messages
        ]
    }

agent = create_agent(
    model=model,
    tools=[],
    middleware=[trim_messages],
    checkpointer=checkpointer
)

config = {"configurable": {"thread_id": "trim_session"}}

agent.invoke({"messages": "hi, my name is Laxmi Kant"}, config)
agent.invoke({"messages": "write a short poem about cats"}, config)
agent.invoke({"messages": "now do the same but for dogs"}, config)
response = agent.invoke({"messages": "what's my name?"}, config)

response['messages'][-1].content

'Your name is Laxmi Kant.'

## Delete Messages

Remove specific messages or clear entire history.

In [5]:
from langchain.agents.middleware import after_model

@after_model
def delete_old_messages(state: AgentState, runtime: Runtime):
    """Remove old messages to keep conversation manageable."""
    messages = state["messages"]
    if len(messages) > 2:
        # Remove the earliest two messages
        return {"messages": [RemoveMessage(id=m.id) for m in messages[:2]]}
    return None

agent = create_agent(
    model=model,
    tools=[],
    middleware=[delete_old_messages],
    checkpointer=checkpointer
)

config = {"configurable": {"thread_id": "delete_session"}}

agent.invoke({"messages": "hi! I'm Laxmi Kant"}, config)
response = agent.invoke({"messages": "what's my name?"}, config)

response['messages'][-1].content

'Your name is **Laxmi Kant**.'

## SummarizationMiddleware

Automatically compress long conversations using summaries.

In [6]:
from langchain.agents.middleware import SummarizationMiddleware

agent = create_agent(
    model=model,
    tools=[base_tools.web_search],
    checkpointer=checkpointer,
    middleware=[
        SummarizationMiddleware(
            model=ChatGoogleGenerativeAI(model='gemini-2.5-flash'),
            trigger=[("messages", 15)],  # Summarize when > 15 messages
            keep=("messages", 5)  # Keep last 5 unsummarized
        )
    ]
)

config = {'configurable': {'thread_id': 'summary_session'}}
response = agent.invoke({
    'messages': [HumanMessage(
        "Search for Apple, Microsoft, and Tesla stock news"
    )]
}, config)

len(response['messages'])

6

## TodoListMiddleware

Equip agents with task planning and tracking for complex multi-step tasks.

In [7]:
from langchain.agents.middleware import TodoListMiddleware
from langchain.tools import tool

@tool
def read_file(path: str):
    """Read file contents."""
    try:
        with open(path, 'r') as f:
            return f.read()
    except Exception as e:
        return f"Error reading file: {e}"

@tool
def write_file(path: str, content: str):
    """Write content to file."""
    try:
        with open(path, 'w') as f:
            f.write(content)
        return f"Successfully wrote to {path}"
    except Exception as e:
        return f"Error writing file: {e}"

agent = create_agent(
    model=model,
    tools=[read_file, write_file],
    middleware=[TodoListMiddleware()],
    checkpointer=checkpointer
)

config = {'configurable': {'thread_id': 'todo_session'}}
response = agent.invoke({
    'messages': [HumanMessage(
        "Create a new file called test.txt with 'Hello World', then read it back"
    )]
}, config)

response['messages'][-1].content

''

## Dynamic Model Selection

In [8]:
from langchain.agents.middleware import wrap_model_call, ModelRequest, ModelResponse

# Define basic and advanced models
basic_model = ChatGoogleGenerativeAI(model='gemini-2.5-flash')
advanced_model = ChatGoogleGenerativeAI(model='gemini-3-flash-preview')

@wrap_model_call
def dynamic_model_selection(request: ModelRequest, handler):
    """Choose model based on conversation complexity."""
    message_count = len(request.state["messages"])
    
    if message_count > 10:
        model = advanced_model
    else:
        model = basic_model
    
    return handler(request.override(model=model))

agent = create_agent(
    model=basic_model,
    middleware=[dynamic_model_selection]
)

response = agent.invoke({'messages': [HumanMessage("What is AI?")]})
response['messages'][-1].text

'**Artificial Intelligence (AI)** is a broad field of computer science dedicated to creating machines that can perform tasks that typically require human intelligence.\n\nIn simpler terms, it\'s about making computers "smart" â€“ enabling them to learn, reason, perceive, understand language, and solve problems much like humans do.\n\nHere\'s a breakdown of what that means:\n\n1.  **The Core Goal: Mimicking Human Intelligence**\n    AI aims to replicate or simulate cognitive functions associated with the human mind, such as:\n    *   **Learning:** Acquiring information and rules for using the information.\n    *   **Reasoning:** Using rules to reach approximate or definite conclusions.\n    *   **Problem-solving:** Finding solutions to complex problems.\n    *   **Perception:** Understanding and interpreting sensory input (like sight and sound).\n    *   **Language Understanding:** Processing and generating human language.\n\n2.  **How AI is Achieved (Key Technologies and Approaches):**

## ModelCallLimitMiddleware

Prevent runaway costs by limiting model calls.

In [9]:
from langchain.agents.middleware import ModelCallLimitMiddleware

agent = create_agent(
    model=model,
    tools=[base_tools.web_search],
    middleware=[
        ModelCallLimitMiddleware(
            run_limit=2,  # Max 2 model calls
            exit_behavior="end"  # Stop when limit reached
        )
    ]
)

response = agent.invoke({
    'messages': [HumanMessage("Search for news on 5 different companies")]
})

response['messages'][-1].content

[{'type': 'text',
  'text': "Please tell me the names of the 5 companies you'd like me to search for.",
  'extras': {'signature': 'Co0CAXLI2nxV0zDJ+lJsAMuYnkBf118DbyDvf2rfZliJ7IRPXqfVD6JL3wgRVu0DuynLWAgh5hNSp/O61kAsx125BwriwbS0C22kHqYEjLPJizwxMOAOpvZOsB+YalTNg5VCBKfSdxRrYtfwehJXQgjFw/kimKA51iRmDQmYP9JWo4V/lMbj2dvghbHwv0h/QW1bnRcAvGpwsQT7IHoKSpvkRil82bfT1zdEjRASdHDyVUlv6T7sdntZyBMgYc9Frv//kUdf3Frv20GLprdwkr6Yofem1gef5TwJcRHctfqGn13xRwfAmBf3miFI4WOi1Zrm70FYN1ppcs4q+OxgAuMdANM7DUc02Sc3XuzXQZM='}}]

## ToolCallLimitMiddleware

Limit tool executions to manage API usage.

In [10]:
from langchain.agents.middleware import ToolCallLimitMiddleware

agent = create_agent(
    model=model,
    tools=[base_tools.web_search],
    middleware=[
        ToolCallLimitMiddleware(
            run_limit=2,
            exit_behavior="continue"  # Continue without more tools
        )
    ]
)

response = agent.invoke({
    'messages': [HumanMessage("Search for Apple, Microsoft, and Google news")]
})

response['messages'][-1].content

[{'type': 'text',
  'text': 'I was able to find some recent news for Microsoft:\n\n*   Microsoft is introducing innovations and programs to support AI-powered teaching and learning.\n*   College students now get 12 months of Microsoft 365 Premium and LinkedIn Premium Career for free.\n*   Microsoft is committed to building community-first AI infrastructure.\n*   The Xbox Developer_Direct broadcast returns on Jan. 22.\n\nUnfortunately, I was unable to retrieve news for Apple and Google at this time due to a tool call limit.',
  'extras': {'signature': 'CuMCAXLI2nxUnW/BJDEuax4ifiJ9B6g19alsBjXkrCp+/QBBBIPGp1yP8Qbhvb3SkEIEaTu5KFsumOKMfAVRIgfDqwnUQjosw4BdICUDBWiEn8soyOqF+Czvv86LY2imZ8sasv1YoelIh46UXK6bIaCMrT6KC+GLzicju4VTH1nmAYtWJA6Lvlzv6CFt3perTqYE+ggxu7I4FhzFGMCxK2Qutz+1o+9OpAZ6Q4XGIXRk7Th7Vh4eOyst1oJ7hsCsYCNzw7pUweRYhst4pgwHXX58MTxvP3KimdGWZQ3AsAsNtjPTDqVNHf1ZE8/hLA+m1TgRUYvzlLSNuj+MEvItkODWNNXWmGbnWZKr9+mn8mN0vMZU4HBfbrAiuIfvszJJRnP/Ag1LArhHJjvziRgKVAfxr4ZDvVuUB57+es/JypVYUFZaHDoJ4Kzhwb

## ModelFallbackMiddleware

Fallback to alternate model on failure or for cost optimization.

In [11]:
from langchain.agents.middleware import ModelFallbackMiddleware

model = ChatGoogleGenerativeAI(model = 'gemini-2.5-flash')
fallback_model = ChatGoogleGenerativeAI(model='gemini-3-flash-preview')

agent = create_agent(
    model=model,
    tools=[base_tools.web_search],
    middleware=[ModelFallbackMiddleware(fallback_model)]
)

## Dynamic System Prompt

Modify system prompt based on runtime context.

In [12]:
from typing import TypedDict
from langchain.agents.middleware import dynamic_prompt, ModelRequest

class Context(TypedDict):
    user_role: str

@dynamic_prompt
def user_role_prompt(request: ModelRequest):
    """Generate system prompt based on user role."""
    user_role = request.runtime.context.get("user_role", "user")
    base_prompt = "You are a helpful assistant."
    
    if user_role == "expert":
        return f"{base_prompt} Provide detailed technical responses."
    elif user_role == "beginner":
        return f"{base_prompt} Explain concepts simply and avoid jargon."
    
    return base_prompt

agent = create_agent(
    model=model,
    tools=[base_tools.web_search],
    middleware=[user_role_prompt],
    context_schema=Context
)

In [13]:
# Test with expert context
response = agent.invoke(
    {"messages": [{"role": "user", "content": "Explain machine learning"}]},
    context={"user_role": "expert"}
)

response['messages'][-1].content

[{'type': 'text',
  'text': 'Machine learning is a subset of artificial intelligence (AI) that enables systems to learn from data, identify patterns, and make decisions with minimal human intervention. Instead of being explicitly programmed for every task, machine learning models are "trained" on large datasets, allowing them to adapt and improve their performance over time.\n\nHere\'s a breakdown of key aspects:\n\n**How it Works:**\n\n1.  **Data Input:** Machine learning models are fed with vast amounts of data, which can include images, text, numbers, or any other form of information relevant to the problem.\n2.  **Feature Extraction:** The model identifies relevant features or attributes within the data that are crucial for making predictions or classifications.\n3.  **Training:** During training, the model uses algorithms to learn patterns and relationships within the data. It adjusts its internal parameters to minimize errors between its predictions and the actual outcomes.\n4.  

In [14]:
# Test with beginner context
response = agent.invoke(
    {"messages": [{"role": "user", "content": "Explain machine learning"}]},
    context={"user_role": "beginner"}
)

response['messages'][-1].content

[{'type': 'text',
  'text': 'Machine learning is a way to teach computers to learn from data without being explicitly programmed. Imagine you want a computer to recognize cats in pictures. Instead of writing a rule for every possible cat feature (like pointy ears, whiskers, etc.), you show the computer thousands of pictures labeled "cat" and "not cat."\n\nThe computer then figures out patterns and rules on its own to distinguish cats from other things. The more data it sees, the better it gets at making accurate predictions or decisions. It\'s like how humans learn from experience!',
  'extras': {'signature': 'CucBAXLI2nwFFBeYc/5eaUTYVuoM1dCBrs75JYvicWQSMCeiS2VAH7+vJb0f4c7AZUHMBzDPww7CqQrhC4U+Ar0steKM5EgTBJR0hY6dynrrj1t2u30y7a53kKoW2IC9j1P57yWDFxg/58xV6RvETS++p5/wp1aQUA/PsMde5hutjz3xBxmHaqoBPcdZ92fpAO2764/5xGLodmwzrZWGvxGsKdkHYaHXu15WuByU1NEPXhuThoaF2GXviEkeG+ZKZCByG/XarCnd5hyZlgaFhCmfxa2K2WvLmzmW3xW9JKf+pK5OLEDkLELE'}}]

## FilesystemFileSearchMiddleware

Provide Glob and Grep search tools over filesystem.

In [23]:
from langchain.agents.middleware import FilesystemFileSearchMiddleware

agent = create_agent(
    model=model,
    tools=[],
    middleware=[
        FilesystemFileSearchMiddleware(
            root_path="./",
            use_ripgrep=True,
            max_file_size_mb=10,
        )
    ]
)

response = agent.invoke({
    'messages': [HumanMessage("Find all notebooks files in this directory")]
})

print(response['messages'][-1].text)

Here are the notebook files in this directory:
/01 agent_fundamentals.ipynb
/02 agent_tools_and_configurations.ipynb
/03 agent_short_term_memory.ipynb
/04 agent_long_term_memory.ipynb
/05 agent_streaming.ipynb
/06 agent_middleware_essentials.ipynb
/07 agent_guardrails_and_HITL.ipynb
/08 agent_prompt_engineering.ipynb


In [24]:
# Search for specific content
response = agent.invoke({
    'messages': [HumanMessage("Find files containing 'create_agent'")]
})

response['messages'][-1].content

[{'type': 'text',
  'text': "Files containing 'create_agent':\n/01 agent_fundamentals.ipynb\n/05 agent_streaming.ipynb\n/06 agent_middleware_essentials.ipynb\n/07 agent_guardrails_and_HITL.ipynb\n/08 agent_prompt_engineering.ipynb",
  'extras': {'signature': 'Cr4DAXLI2nxa83u/WV8AnCQ2M4OyxkhQkdnFpC0vOS5gMcosT263TDQvDXkGFB+Gd+A1G0Ra+pOAr1/rZWQRodJ6d7yhVE6nt0mhOsvTm9H6dJZadGH2M4LjgPnqy02/vQ5KwI/hcSbfPVC8TFLpzzB5Y7ejlH4QM1dCNdvSLOpE2fP4VNm8VdlG+EQrrzWTwq4U1WZH4W8WV4tnzMxr2Xf5m3p1H7r+l05alGAJ5czbb64YcKXrdoxYkpwjBAX4EdPdr97SHHfL0yODKxq8Em3NQeqV8ggViorzZ8Uvz9FgGXF5DjNmJMfii8HCCvwdvrZStUPsqzxxChLScjMiSnnBX2uJKbHYPvV2T/H6Y78xaCEkYPBPDJrM/F5OnI90X1Cyg5jrydR3rCBeSEHzTWEN0i6hw2K1usO76LTPuWjl36cVIIzZ0H00yppOF7cNZNt8kqZ0EMb9B53HsBEu56852c+SHO2AQxU/IJkba7ChOgWmREYLBnKhc0waXl2zUJzyEutcl3LNWA8S3rDsRICDzLkdrAvfuIUajr2jCMr0OEhiOF5wrXdhKHxvkdi3LTK7ABvOrYK1vLEJBlADvlQ='}}]

## Combining Multiple Middleware

In [25]:
# Production agent with stacked middleware
agent = create_agent(
    model=model,
    tools=[base_tools.web_search],
    checkpointer=checkpointer,
    middleware=[
        SummarizationMiddleware(
            model=ChatGoogleGenerativeAI(model='gemini-3-pro-preview'),
            trigger=[("messages", 15)],
            keep=("messages", 5)
        ),
        TodoListMiddleware(),
        ModelCallLimitMiddleware(run_limit=3, exit_behavior="end"),
        ToolCallLimitMiddleware(run_limit=3, exit_behavior="continue"),
        ModelFallbackMiddleware(fallback_model)
    ]
)

config = {'configurable': {'thread_id': 'production'}}
response = agent.invoke({
    'messages': [HumanMessage("Analyze tech sector trends")]
}, config)

response['messages'][-1].content

'Model call limits exceeded: run limit (3/3)'