In [9]:
from langchain.agents import create_agent
from langchain.agents.middleware import SummarizationMiddleware, HumanInTheLoopMiddleware
from langchain_ollama import ChatOllama
model=ChatOllama(model="gpt-oss:120b-cloud")
agent = create_agent(
    model=model,
    tools=[],
    middleware=[
        SummarizationMiddleware(
            model=model,
            max_tokens_before_summary=4000,  # Trigger summarization at 4000 tokens
            messages_to_keep=20,  # Keep last 20 messages after summary
            summary_prompt="Custom prompt for summarization...",  # Optional
        ),
    ]
)

Human-in-the-loop


In [15]:
from langchain.agents import create_agent
from langchain.agents.middleware import HumanInTheLoopMiddleware
from langchain.messages import HumanMessage
from langchain.tools import tool

@tool
def send_email(recipient: str, subject: str, body: str) -> str:
    """Send an email."""
    return f"Email sent to {recipient} with subject '{subject}'."

agent = create_agent(
    model=model,
    tools=[send_email],
    middleware=[
        HumanInTheLoopMiddleware(
            interrupt_on={
                # Require approval, editing, or rejection for sending emails
                "send_email": {
                    "allowed_decisions": ["approve", "edit", "reject"],
                },
                # Auto-approve reading emails
                "read_email": False,
            }
        ),
    ],
)
human_message = HumanMessage(
    content="Use the `send_email` tool to send an email to john@example.com with subject 'Hello' and body 'This is a test email.'"
)
response=agent.invoke({"messages": [human_message]})
response

{'messages': [HumanMessage(content="Use the `send_email` tool to send an email to john@example.com with subject 'Hello' and body 'This is a test email.'", additional_kwargs={}, response_metadata={}, id='dbac95eb-60e3-445c-8a64-76f0c8ac5850'),
  AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'gpt-oss:120b-cloud', 'created_at': '2025-10-15T09:38:15.126509415Z', 'done': True, 'done_reason': 'stop', 'total_duration': 884233698, 'load_duration': None, 'prompt_eval_count': 157, 'prompt_eval_duration': None, 'eval_count': 60, 'eval_duration': None, 'model_name': 'gpt-oss:120b-cloud', 'model_provider': 'ollama'}, id='lc_run--2c29bcd6-1cab-44bd-9097-371a87b5e2fc-0', tool_calls=[{'name': 'send_email', 'args': {'body': 'This is a test email.', 'recipient': 'john@example.com', 'subject': 'Hello'}, 'id': 'aff1cc98-cb01-4d12-8ddf-b3545012dff7', 'type': 'tool_call'}], usage_metadata={'input_tokens': 157, 'output_tokens': 60, 'total_tokens': 217})],
 '__interrupt__': [Interrup

anthropic prompt caching

In [None]:
from langchain_anthropic import ChatAnthropic
from langchain.agents.middleware.prompt_caching import AnthropicPromptCachingMiddleware
from langchain.agents import create_agent

LONG_PROMPT = """
Please be a helpful assistant.

<Lots more context ...>
"""

agent = create_agent(
    model=ChatAnthropic(model="claude-sonnet-4-latest"),
    system_prompt=LONG_PROMPT,
    middleware=[AnthropicPromptCachingMiddleware(ttl="5m")],
)

# cache store
agent.invoke({"messages": [HumanMessage("Hi, my name is Bob")]})

# cache hit, system prompt is cached
agent.invoke({"messages": [HumanMessage("What's my name?")]})

model call limit

In [16]:
from langchain.agents import create_agent
from langchain.agents.middleware import ModelCallLimitMiddleware

agent = create_agent(
    model=model,
    tools=[],
    middleware=[
        ModelCallLimitMiddleware(
            thread_limit=10,  # Max 10 calls per thread (across runs)
            run_limit=5,  # Max 5 calls per run (single invocation)
            exit_behavior="end",  # Or "error" to raise exception
        ),
    ],
)

tool call limit

In [20]:
from langchain.agents import create_agent
from langchain.agents.middleware import ToolCallLimitMiddleware
from langchain_ollama import ChatOllama
model=ChatOllama(model="gpt-oss:120b-cloud")
# Limit all tool calls
global_limiter = ToolCallLimitMiddleware(thread_limit=20, run_limit=10)

# Limit specific tool
search_limiter = ToolCallLimitMiddleware(
    tool_name="search",
    thread_limit=5,
    run_limit=3,
)

agent = create_agent(
    model=model,
    tools=[],
    middleware=[global_limiter, search_limiter],
)

model fallback

In [21]:
from langchain.agents import create_agent
from langchain.agents.middleware import ModelFallbackMiddleware

agent = create_agent(
    model="openai:gpt-4o",
    tools=[],
    middleware=[
        ModelFallbackMiddleware(
            first_model=model
        ),
    ],
)
respponse=agent.invoke({"messages": [HumanMessage("Hello!")]})
response

{'messages': [HumanMessage(content="Use the `send_email` tool to send an email to john@example.com with subject 'Hello' and body 'This is a test email.'", additional_kwargs={}, response_metadata={}, id='dbac95eb-60e3-445c-8a64-76f0c8ac5850'),
  AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'gpt-oss:120b-cloud', 'created_at': '2025-10-15T09:38:15.126509415Z', 'done': True, 'done_reason': 'stop', 'total_duration': 884233698, 'load_duration': None, 'prompt_eval_count': 157, 'prompt_eval_duration': None, 'eval_count': 60, 'eval_duration': None, 'model_name': 'gpt-oss:120b-cloud', 'model_provider': 'ollama'}, id='lc_run--2c29bcd6-1cab-44bd-9097-371a87b5e2fc-0', tool_calls=[{'name': 'send_email', 'args': {'body': 'This is a test email.', 'recipient': 'john@example.com', 'subject': 'Hello'}, 'id': 'aff1cc98-cb01-4d12-8ddf-b3545012dff7', 'type': 'tool_call'}], usage_metadata={'input_tokens': 157, 'output_tokens': 60, 'total_tokens': 217})],
 '__interrupt__': [Interrup

Pll detection

In [24]:
from langchain.agents import create_agent
from langchain.agents.middleware import PIIMiddleware
from langchain_ollama import ChatOllama
model=ChatOllama(model="gpt-oss:120b-cloud")
agent = create_agent(
    model=model,
    tools=[],
    middleware=[
        # Redact emails in user input
        PIIMiddleware("email", strategy="redact", apply_to_input=True),
        # Mask credit cards (show last 4 digits)
        PIIMiddleware("credit_card", strategy="mask", apply_to_input=True),
        # Custom PII type with regex
        PIIMiddleware(
            "api_key",
            detector=r"sk-[a-zA-Z0-9]{32}",
            strategy="block",  # Raise error if detected
        ),
    ],
)
response=agent.invoke({"messages": [HumanMessage("My email is bob@example.com,my api key is sk-1234567890abcdef1234567890abcdef")]})
response

PIIDetectionError: Detected 1 instance(s) of api_key in message content

planning

In [2]:
from langchain.agents import create_agent
from langchain.agents.middleware import PlanningMiddleware
from langchain_ollama import ChatOllama
from langchain.messages import HumanMessage
model=ChatOllama(model="gpt-oss:120b-cloud")
agent = create_agent(
    model=model,
    tools=[],
    middleware=[
        PlanningMiddleware(),
    ]
)
result=agent.invoke({"messages": [HumanMessage("Help me refactor my codebase.")]})
result["todos"]

KeyError: 'todos'

LLM tool selector


In [None]:
from langchain.agents import create_agent
from langchain.agents.middleware import  LLMToolSelectorMiddleware
from langchain_ollama import ChatOllama
model=ChatOllama(model="gpt-oss:120b-cloud")
agent=create_agent(
    model=model,
    tools=[],
    middleware=[
        LLMToolSelectorMiddleware(
            model=model,
            max_tools=3,
            always_include=["search"]  # 
        )
    ]
)

context editing

In [None]:
from langchain.agents import create_agent
from langchain.agents.middleware import ContextEditingMiddleware,ClearToolUsesEdit
agent = create_agent(
    model=model,
    tools=[],
    middleware=[
        ContextEditingMiddleware(edits=[ClearToolUsesEdit(max_tokens=1000)]),        
    ]
)