# Managing long conversations

In [None]:
from langchain.agents import AgentState, create_agent
from langchain.agents.middleware import SummarizationMiddleware, before_agent
from langchain.chat_models import init_chat_model
from langchain.messages import AIMessage, HumanMessage, RemoveMessage, ToolMessage
from langgraph.runtime import Runtime

from chain_reaction.config import APIKeys, ModelBehavior, ModelName
from chain_reaction.utils import get_messages

api_keys = APIKeys()

# Summarization

Summarization during long conversations serves several critical purposes in agent systems:

### 1. Context Window Constraints
LLMs have fixed context windows, and as conversations grow, you face a hard ceiling. Without summarization, you either truncate (losing important early context) or hit token limits entirely. Summarization lets you compress historical context while preserving the semantic essence—turning 50k tokens of conversation into 2k tokens of distilled information.

### 2. Attention Degradation
Even within the context window, LLM attention isn't uniform. Models tend to weight recent tokens more heavily and can "lose track" of information buried in the middle of long contexts (the "lost in the middle" problem). By summarizing older exchanges, you're essentially promoting that information back into a form the model attends to more reliably.

### 3. Output Quality & Coherence
Long raw contexts introduce noise. Every tangent, correction, and back-and-forth gets included, making it harder for the model to identify what's actually relevant to the current task. Summarization acts as a relevance filter—the summary naturally emphasizes what mattered and drops what didn't.

### 4. Cost & Latency
Token count directly impacts API costs and response latency. A 100k token context is expensive and slow. Compressing to 10k tokens with good summarization gives you similar task performance at a fraction of the cost and with faster responses.

In [None]:
# Initialize the chat model
chat_model = init_chat_model(
    model=ModelName.CLAUDE_HAIKU,
    timeout=None,
    max_retries=2,
    api_key=api_keys.anthropic,
    **ModelBehavior.deterministic().model_dump(),
)

# Initialize the summarization model
summarization_model = init_chat_model(
    model=ModelName.CLAUDE_HAIKU,
    timeout=None,
    max_retries=2,
    api_key=api_keys.anthropic,
    **ModelBehavior.deterministic().model_dump(),
)

# Define the summarization middleware
summarization_middleware = SummarizationMiddleware(
    model=summarization_model,
    trigger=[
        ("tokens", 100),  # trigger summarization when 100 tokens are exceeded
        ("messages", 3),  # or trigger summarization when 3 messages are exceeded
    ],
    keep=("messages", 1),  # Keep only the last message in the conversation history (+ summary)
)

# Create an agent with summarization middleware
agent = create_agent(
    model=chat_model,
    middleware=[summarization_middleware],
)

In [None]:
# Invoke the agent with a "long" conversation
response = agent.invoke(
    {
        "messages": [
            HumanMessage(content="Hello!"),
            AIMessage(content="Hi there! How can I assist you today?"),
            HumanMessage(content="Can you tell me a joke?"),
            AIMessage(content="Sure! Why did the scarecrow win an award? Because he was outstanding in his field!"),
            HumanMessage(content="Haha, that's a good one! Can you tell me another joke?"),
            AIMessage(content="Of course! Why don't scientists trust atoms? Because they make up everything!"),
            HumanMessage(content="LOL! You're on a roll. One more, please?"),
            AIMessage(content="Alright! Why did the bicycle fall over? Because it was two-tired!"),
            HumanMessage(content="These are great! Thanks for the laughs. Can you tell me a fun fact now?"),
        ]
    },
)

# Extract messages from the response
messages = get_messages(response)

# Assert there are only 3 messages (summary, last message from sequence, AI response to last message)
assert len(messages) == 3, f"Expected 3 messages, got {len(messages)}"  # noqa: S101

# Print summary message
print("Summary Message:")
print(messages[0].content)

# Print AI's response to the last user message
print("\nAI's Response to Last User Message:")
print(messages[2].content)

# Trim messages

Rather than summarizing a long sequence of messages, we can use a custom middleware function to filter out messages

In [None]:
@before_agent
def remove_tool_messages(state: AgentState, runtime: Runtime) -> dict[str, list[RemoveMessage]]:  # noqa: ARG001
    """Remove ToolMessages from the message history before sending to agent."""
    # Get all messages from the state
    messages = get_messages(state)

    # Remove ToolMessages
    return {
        "messages": [
            RemoveMessage(id=msg.id) for msg in messages if isinstance(msg, ToolMessage) and msg.id is not None
        ]
    }


# Create an agent with message trimming middleware
agent = create_agent(
    model=chat_model,
    middleware=[remove_tool_messages],
)

In [None]:
# Invoke the agent with a "long" conversation interlaced with ToolMessages
messages = [
    HumanMessage(content="Hello!"),
    AIMessage(content="Hi there! How can I assist you today?"),
    HumanMessage(content="Can you tell me a joke?"),
    ToolMessage(content="Invoking joke tool...", tool_call_id="1"),
    AIMessage(content="Sure! Why did the scarecrow win an award? Because he was outstanding in his field!"),
    HumanMessage(content="Haha, that's a good one! Can you tell me another joke?"),
    ToolMessage(content="Invoking feedback tool...", tool_call_id="2"),
    ToolMessage(content="Invoking joke tool...", tool_call_id="3"),
    AIMessage(content="Of course! Why don't scientists trust atoms? Because they make up everything!"),
    HumanMessage(content="LOL! You're on a roll. One more, please?"),
    ToolMessage(content="Invoking feedback tool...", tool_call_id="4"),
    ToolMessage(content="Invoking joke tool...", tool_call_id="5"),
    AIMessage(content="Alright! Why did the bicycle fall over? Because it was two-tired!"),
    HumanMessage(content="These are great! Thanks for the laughs. Can you tell me a fun fact now?"),
]
response = agent.invoke(
    {"messages": messages},
)

# Extract messages from the response
messages = get_messages(response)

# Check that tool messages have been removed
assert all(not isinstance(msg, ToolMessage) for msg in messages), "ToolMessages not removed"  # noqa: S101