In [34]:
from dotenv import load_dotenv
from langchain.chat_models import init_chat_model
from langgraph.checkpoint.memory import MemorySaver
from langgraph.config import get_config
from langgraph.func import entrypoint
from langgraph.prebuilt import create_react_agent
from langgraph.store.memory import InMemoryStore
from langgraph.utils.config import get_store
from langmem import (
    ReflectionExecutor,
    create_manage_memory_tool,
    create_memory_manager,
    create_memory_store_manager,
    create_multi_prompt_optimizer,
    create_prompt_optimizer,
    create_search_memory_tool,
)
from pydantic import BaseModel, Field

In [35]:
load_dotenv()

True

# ReAct Agent with memory 

In [3]:
# Set up storage
store = InMemoryStore(
    index={
        "dims": 1536,
        "embed": "openai:text-embedding-3-small",
    }
)

In [4]:
# Create an agent with memory capabilities
agent = create_react_agent(
    "openai:gpt-4.1",
    tools=[
        # Memory tools use LangGraph's BaseStore for persistence (4)
        create_manage_memory_tool(namespace=("memories",)),
        create_search_memory_tool(namespace=("memories",)),
    ],
    store=store,
)

In [5]:
# Store a new memory
agent.invoke(
    {"messages": [{"role": "user", "content": "Remember that I prefer dark mode."}]}
)

{'messages': [HumanMessage(content='Remember that I prefer dark mode.', additional_kwargs={}, response_metadata={}, id='c09b46a5-d44e-4d68-82c8-84d4cb68f8b8'),
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_PNCVrTjAhBanekwdD32qtFQP', 'function': {'arguments': '{"content":"User prefers dark mode.","action":"create"}', 'name': 'manage_memory'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 21, 'prompt_tokens': 245, 'total_tokens': 266, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-2025-04-14', 'system_fingerprint': None, 'id': 'chatcmpl-BvnGvrcJeY5ubXKj2foiOrweZ5QkD', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--e656d1e0-a1f3-40c7-b9e7-8e2d1ddf59c5-0', tool_calls=[{'name': 'manage_memory', 'args

In [6]:
# Retrieve the stored memory
response = agent.invoke(
    {"messages": [{"role": "user", "content": "What are my lighting preferences?"}]}
)

# Hot Path Memory

In [13]:
def prompt(state):
    """Prepare the messages for the LLM."""
    # Get store from configured contextvar;
    store = get_store()  # Same as that provided to `create_react_agent`
    memories = store.search(
        # Search within the same namespace as the one
        # we've configured for the agent
        ("memories",),
        query=state["messages"][-1].content,
    )
    system_msg = f"""You are a helpful assistant.

## Memories
<memories>
{memories}
</memories>
"""
    return [{"role": "system", "content": system_msg}, *state["messages"]]


store = InMemoryStore(
    index={  # Store extracted memories
        "dims": 1536,
        "embed": "openai:text-embedding-3-small",
    }
)
checkpointer = MemorySaver()  # Checkpoint graph state

agent = create_react_agent(
    "openai:gpt-4.1",
    prompt=prompt,
    tools=[  # Add memory tools
        # The agent can call "manage_memory" to
        # create, update, and delete memories by ID
        # Namespaces add scope to memories. To
        # scope memories per-user, do ("memories", "{user_id}"):
        create_manage_memory_tool(namespace=("memories",)),
    ],
    # Our memories will be stored in this provided BaseStore instance
    store=store,
    # And the graph "state" will be checkpointed after each node
    # completes executing for tracking the chat history and durable execution
    checkpointer=checkpointer,
)

In [None]:
config = {"configurable": {"thread_id": "thread-a"}}

# Use the agent. The agent hasn't saved any memories,
# so it doesn't know about us
response = agent.invoke(
    {"messages": [{"role": "user", "content": "Know which display mode I prefer?"}]},
    config=config,
)

I currently do not know which display mode you prefer. If you let me know your preference (such as light mode or dark mode), I can remember it for future interactions!


In [15]:
agent.invoke(
    {"messages": [{"role": "user", "content": "dark. Remember that."}]},
    # We will continue the conversation (thread-a) by using the config with
    # the same thread_id
    config=config,
)

{'messages': [HumanMessage(content='Know which display mode I prefer?', additional_kwargs={}, response_metadata={}, id='e9c6a78d-984e-4d58-94a5-30f53276bbf6'),
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_4aQS5SL2toLaIYjH2Q57D4lh', 'function': {'arguments': '{"content":"User\'s display mode preference is unknown.","action":"create"}', 'name': 'manage_memory'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 24, 'prompt_tokens': 206, 'total_tokens': 230, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-2025-04-14', 'system_fingerprint': None, 'id': 'chatcmpl-BvnJoiTfzhXydjrfpBQcfMQs9wUpy', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--4d111111-08a2-43d3-bb4d-fae6a2c94d84-0', tool_calls=[{'name': 'm

In [16]:
# New thread = new conversation!
new_config = {"configurable": {"thread_id": "thread-b"}}
# The agent will only be able to recall
# whatever it explicitly saved using the manage_memories tool
response = agent.invoke(
    {
        "messages": [
            {
                "role": "user",
                "content": "Hey there. Do you remember me? What are my preferences?",
            }
        ]
    },
    config=new_config,
)

Hello! Yes, I remember you. One of your preferences is that you prefer dark display mode. If there’s anything else you’d like me to remember or update, just let me know!


# Background Memory

In [None]:
store = InMemoryStore(
    index={
        "dims": 1536,
        "embed": "openai:text-embedding-3-small",
    }
)
llm = init_chat_model("openai:gpt-4.1")

# Create memory manager Runnable to extract memories from conversations
memory_manager = create_memory_store_manager(
    "openai:gpt-4.1",
    # Store memories in the "memories" namespace (aka directory)
    namespace=("memories",),
)

executor = ReflectionExecutor(memory_manager)


@entrypoint(store=store)  # Create a LangGraph workflow
async def chat(message: str):
    response = llm.invoke(message)

    # memory_manager extracts memories from conversation history
    # We'll provide it in OpenAI's message format
    to_process = {"messages": [{"role": "user", "content": message}, response]}

    await memory_manager.ainvoke(to_process)

    # Wait 30 minutes before processing
    # If new messages arrive before then:
    # 1. Cancel pending processing task
    # 2. Reschedule with new messages included
    # delay = 0.5 # In practice would choose longer (30-60 min)
    # depending on app context.
    # executor.submit(to_process, after_seconds=delay)

    return response.content

In [None]:
# Run conversation as normal
response = await chat.ainvoke(
    "I like dogs. My dog's name is Fido.",
)

In [23]:
for item in store.search(("memories",)):
    print(item)

Item(namespace=['memories'], key='5fd6443b-9473-414f-996c-676950d1c440', value={'kind': 'Memory', 'content': {'content': 'The user likes dogs and owns a dog named Fido. (p≈1.0, direct statement)'}}, created_at='2025-07-22T04:33:19.455254+00:00', updated_at='2025-07-22T04:33:19.455262+00:00', score=None)
Item(namespace=['memories'], key='52e8ffb4-9f32-4384-be89-77992aecefde', value={'kind': 'Memory', 'content': {'content': "No prior user-specific memories about pet preferences or dog ownership should be retained if contradicted by the user's clear statement in this session. (p=0.99, memory consistency check)"}}, created_at='2025-07-22T04:33:19.510119+00:00', updated_at='2025-07-22T04:33:19.510124+00:00', score=None)
Item(namespace=['memories'], key='830e532a-4ebb-4f4a-978d-4d8d6d60b8ff', value={'kind': 'Memory', 'content': {'content': "Avoid storing redundant facts about the user's dog beyond name and strong preference unless provided—further details (such as breed or quirks) should be 

# Extract Semantic Memories

## Without Storage

In [7]:
class Triple(BaseModel):
    """Store all new facts, preferences, and relationships as triples."""

    subject: str
    predicate: str
    object: str
    context: str | None = None


# Configure extraction
manager = create_memory_manager(
    "openai:gpt-4.1",
    schemas=[Triple],
    instructions="Extract user preferences and any other useful information",
    enable_inserts=True,
    enable_deletes=True,
)

In [8]:
# First conversation - extract triples
conversation1 = [
    {
        "role": "user",
        "content": "Alice manages the ML team and mentors Bob, who is also on the team.",
    }
]
memories = manager.invoke({"messages": conversation1})
print("After first conversation:")
for m in memories:
    print(m)

After first conversation:
ExtractedMemory(id='b9ceb238-72d4-42ea-bed3-85f5eb2b6280', content=Triple(subject='Alice', predicate='manages', object='ML team', context=None))
ExtractedMemory(id='6ede4107-be96-414f-9eed-a7b972f18f73', content=Triple(subject='Alice', predicate='mentors', object='Bob', context=None))
ExtractedMemory(id='7dd8d962-3f36-40de-a794-8b5918ca8cd1', content=Triple(subject='Bob', predicate='memberOf', object='ML team', context=None))
ExtractedMemory(id='bbfc81f0-5c06-4a2b-bf9b-4ab75c5ae644', content=Triple(subject='Bob', predicate='menteeOf', object='Alice', context=None))


In [9]:
# Second conversation - update and add triples
conversation2 = [
    {"role": "user", "content": "Bob now leads the ML team and the NLP project."}
]
update = manager.invoke({"messages": conversation2, "existing": memories})
print("After second conversation:")
for m in update:
    print(m)

existing = [m for m in update if isinstance(m.content, Triple)]

After second conversation:
ExtractedMemory(id='b9ceb238-72d4-42ea-bed3-85f5eb2b6280', content=Triple(subject='Bob', predicate='manages', object='ML team', context=None))
ExtractedMemory(id='0ba16da7-cfbb-4114-bcb5-a63b1fe76b58', content=Triple(subject='Bob', predicate='leads', object='NLP project', context=None))
ExtractedMemory(id='6ede4107-be96-414f-9eed-a7b972f18f73', content=Triple(subject='Alice', predicate='mentors', object='Bob', context=None))
ExtractedMemory(id='7dd8d962-3f36-40de-a794-8b5918ca8cd1', content=Triple(subject='Bob', predicate='memberOf', object='ML team', context=None))
ExtractedMemory(id='bbfc81f0-5c06-4a2b-bf9b-4ab75c5ae644', content=Triple(subject='Bob', predicate='menteeOf', object='Alice', context=None))


In [11]:
for m in existing:
    print(m)

ExtractedMemory(id='b9ceb238-72d4-42ea-bed3-85f5eb2b6280', content=Triple(subject='Bob', predicate='manages', object='ML team', context=None))
ExtractedMemory(id='0ba16da7-cfbb-4114-bcb5-a63b1fe76b58', content=Triple(subject='Bob', predicate='leads', object='NLP project', context=None))
ExtractedMemory(id='6ede4107-be96-414f-9eed-a7b972f18f73', content=Triple(subject='Alice', predicate='mentors', object='Bob', context=None))
ExtractedMemory(id='7dd8d962-3f36-40de-a794-8b5918ca8cd1', content=Triple(subject='Bob', predicate='memberOf', object='ML team', context=None))
ExtractedMemory(id='bbfc81f0-5c06-4a2b-bf9b-4ab75c5ae644', content=Triple(subject='Bob', predicate='menteeOf', object='Alice', context=None))


In [12]:
# Delete triples about an entity
conversation3 = [{"role": "user", "content": "Alice left the company."}]
final = manager.invoke({"messages": conversation3, "existing": existing})
print("After third conversation:")
for m in final:
    print(m)

After third conversation:
ExtractedMemory(id='98c2cb67-3e22-4a1f-875c-a7d8ffd0201d', content=Triple(subject='Alice', predicate='employmentStatus', object='former employee', context=None))
ExtractedMemory(id='6ede4107-be96-414f-9eed-a7b972f18f73', content=RemoveDoc(json_doc_id='6ede4107-be96-414f-9eed-a7b972f18f73'))
ExtractedMemory(id='bbfc81f0-5c06-4a2b-bf9b-4ab75c5ae644', content=RemoveDoc(json_doc_id='bbfc81f0-5c06-4a2b-bf9b-4ab75c5ae644'))
ExtractedMemory(id='b9ceb238-72d4-42ea-bed3-85f5eb2b6280', content=Triple(subject='Bob', predicate='manages', object='ML team', context=None))
ExtractedMemory(id='0ba16da7-cfbb-4114-bcb5-a63b1fe76b58', content=Triple(subject='Bob', predicate='leads', object='NLP project', context=None))
ExtractedMemory(id='7dd8d962-3f36-40de-a794-8b5918ca8cd1', content=Triple(subject='Bob', predicate='memberOf', object='ML team', context=None))


## With Storage

In [16]:
# Set up store and models
store = InMemoryStore(
    index={
        "dims": 1536,
        "embed": "openai:text-embedding-3-small",
    }
)
manager = create_memory_store_manager(
    "openai:gpt-4.1",
    namespace=("chat", "{user_id}", "triples"),
    schemas=[Triple],
    instructions="Extract all user information and events as triples.",
    enable_inserts=True,
    enable_deletes=True,
)
my_llm = init_chat_model("openai:gpt-4.1")

In [18]:
# Define app with store context


@entrypoint(store=store)
def app(messages: list):
    response = my_llm.invoke(
        [
            {
                "role": "system",
                "content": "You are a helpful assistant.",
            },
            *messages,
        ]
    )

    # Extract and store triples (Uses store from @entrypoint context)
    manager.invoke({"messages": messages})
    return response

In [19]:
# First conversation
app.invoke(
    [
        {
            "role": "user",
            "content": "Alice manages the ML team and mentors Bob, who is also on the team.",
        },
    ],
    config={"configurable": {"user_id": "user123"}},
)

AIMessage(content='Thank you for sharing this information. If you have any questions or need help related to Alice managing the ML team, mentoring Bob, or anything else about their roles or interactions, please let me know!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 40, 'prompt_tokens': 33, 'total_tokens': 73, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-2025-04-14', 'system_fingerprint': 'fp_b3f1157249', 'id': 'chatcmpl-Bw9UOLjAFGlOPIOxy3sflXM8fX8Xk', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--ecbc0be1-749c-4ea4-a5b2-fbebeb47336d-0', usage_metadata={'input_tokens': 33, 'output_tokens': 40, 'total_tokens': 73, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [20]:
# Second conversation
app.invoke(
    [
        {"role": "user", "content": "Bob now leads the ML team and the NLP project."},
    ],
    config={"configurable": {"user_id": "user123"}},
)

AIMessage(content="Got it! To confirm, Bob is currently leading both the ML team and the NLP project. If you need this documented, included in meeting minutes, or communicated to others, let me know how you'd like to proceed!", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 44, 'prompt_tokens': 28, 'total_tokens': 72, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-2025-04-14', 'system_fingerprint': 'fp_799e4ca3f1', 'id': 'chatcmpl-Bw9VbjahqzwM8SV8k6jwUl89cB8Ed', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--1ec1ed26-1c78-4189-aff8-e979cfc5f083-0', usage_metadata={'input_tokens': 28, 'output_tokens': 44, 'total_tokens': 72, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}

In [21]:
# Third conversation
app.invoke(
    [
        {"role": "user", "content": "Alice left the company."},
    ],
    config={"configurable": {"user_id": "user123"}},
)

AIMessage(content='Thank you for letting me know. Would you like any assistance with updating records, notifying team members, or handling tasks/projects that Alice was responsible for? Please let me know how I can help.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 39, 'prompt_tokens': 22, 'total_tokens': 61, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-2025-04-14', 'system_fingerprint': 'fp_51e1070cf2', 'id': 'chatcmpl-Bw9VmdnVU5oqNDRluLEALj36GnrGK', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--0e55156e-3f41-431e-b5c8-8ed5db3f0b4f-0', usage_metadata={'input_tokens': 22, 'output_tokens': 39, 'total_tokens': 61, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [22]:
# Check stored triples
for item in store.search(("chat", "user123")):
    print(item.namespace, item.value)

('chat', 'user123', 'triples') {'kind': 'Triple', 'content': {'subject': 'Bob', 'predicate': 'member_of', 'object': 'ML team', 'context': 'session_e48eaf98-2f6c-4000-81c7-8dbaa7bab5e9'}}
('chat', 'user123', 'triples') {'kind': 'Triple', 'content': {'subject': 'Bob', 'predicate': 'leads', 'object': 'NLP project', 'context': 'session_8810f103-dc24-426e-8695-83963e9913af'}}
('chat', 'user123', 'triples') {'kind': 'Triple', 'content': {'subject': 'Bob', 'predicate': 'leads', 'object': 'ML team', 'context': 'session_8810f103-dc24-426e-8695-83963e9913af'}}
('chat', 'user123', 'triples') {'kind': 'Triple', 'content': {'subject': 'Alice', 'predicate': 'left_company', 'object': 'True', 'context': 'session_78185331-02e0-480e-8488-1ab834297d27'}}


## Memory Manager Agent

In [23]:
# Set up store and checkpointer
store = InMemoryStore(
    index={
        "dims": 1536,
        "embed": "openai:text-embedding-3-small",
    }
)
my_llm = init_chat_model("openai:gpt-4.1")


def prompt(state):
    """Prepare messages with context from existing memories."""
    memories = store.search(
        ("memories",),
        query=state["messages"][-1].content,
    )
    system_msg = f"""You are a memory manager. Extract and manage all important knowledge, rules, and events using the provided tools.



Existing memories:
<memories>
{memories}
</memories>

Use the manage_memory tool to update and contextualize existing memories, create new ones, or delete old ones that are no longer valid.
You can also expand your search of existing memories to augment using the search tool."""
    return [{"role": "system", "content": system_msg}, *state["messages"]]


# Create the memory extraction agent
manager = create_react_agent(
    "openai:gpt-4.1",
    prompt=prompt,
    tools=[
        # Agent can create/update/delete memories
        create_manage_memory_tool(namespace=("memories",)),
        create_search_memory_tool(namespace=("memories",)),
    ],
)


# Run extraction in background
@entrypoint(store=store)
def app(messages: list):
    response = my_llm.invoke(
        [
            {
                "role": "system",
                "content": "You are a helpful assistant.",
            },
            *messages,
        ]
    )

    # Extract and store triples (Uses store from @entrypoint context)
    manager.invoke({"messages": messages})
    return response

In [24]:
app.invoke(
    [
        {
            "role": "user",
            "content": "Alice manages the ML team and mentors Bob, who is also on the team.",
        }
    ]
)

AIMessage(content='Got it! Alice manages the ML team and mentors Bob, who is also on that team. How can I assist you regarding Alice, Bob, or the team?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 33, 'prompt_tokens': 33, 'total_tokens': 66, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-2025-04-14', 'system_fingerprint': 'fp_b3f1157249', 'id': 'chatcmpl-Bw9cAnXj0YfKkdLWkYWzr0jjSMtRG', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--b5fb7ec2-6c75-4ec3-8a2e-2f66ca70bca3-0', usage_metadata={'input_tokens': 33, 'output_tokens': 33, 'total_tokens': 66, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [27]:
for m in store.search(("memories",)):
    print(m)

Item(namespace=['memories'], key='db4e81d4-6d2e-4d49-a3a2-98d11fb8c7c3', value={'content': 'Alice manages the ML team and mentors Bob, who is also on the team.'}, created_at='2025-07-22T15:54:41.798262+00:00', updated_at='2025-07-22T15:54:41.798269+00:00', score=None)


# Manage User Profiles

## Basic Usage

In [7]:
# Define profile structure


class UserProfile(BaseModel):
    """Represents the full representation of a user."""

    name: str | None = None
    language: str | None = None
    timezone: str | None = None


# Configure extraction
manager = create_memory_manager(
    "openai:gpt-4.1",
    schemas=[UserProfile],  # (optional) customize schema
    instructions="Extract user profile information",
    enable_inserts=False,  # Profiles update in-place
)

# First conversation
conversation1 = [{"role": "user", "content": "I'm Alice from California"}]
memories = manager.invoke({"messages": conversation1})
for memory in memories:
    print(memory)

ExtractedMemory(id='03b0984a-b77b-411c-9a9c-4a16e9c6dd4f', content=UserProfile(name='Alice', language=None, timezone='America/Los_Angeles'))


In [8]:
# Second conversation updates existing profile
conversation2 = [{"role": "user", "content": "I speak Spanish too!"}]
updates = manager.invoke({"messages": conversation2, "existing": memories})
for update in updates:
    print(update)

ExtractedMemory(id='03b0984a-b77b-411c-9a9c-4a16e9c6dd4f', content=UserProfile(name='Alice', language='Spanish', timezone='America/Los_Angeles'))


## With LangGraph's Long-term Memory Store

In [11]:
# Set up store and models
store = InMemoryStore(
    index={
        "dims": 1536,
        "embed": "openai:text-embedding-3-small",
    }
)
my_llm = init_chat_model("openai:gpt-4.1")

# Create profile manager
manager = create_memory_store_manager(
    "openai:gpt-4.1",
    namespace=("users", "{user_id}", "profile"),  # Isolate profiles by user
    schemas=[UserProfile],
    enable_inserts=False,  # Update existing profile only
)


@entrypoint(store=store)
def chat(messages: list):
    # Get user's profile for personalization
    configurable = get_config()["configurable"]
    results = store.search(("users", configurable["user_id"], "profile"))
    profile = None
    if results:
        profile = f"""<User Profile>:

{results[0].value}
</User Profile>
"""

    # Use profile in system message
    response = my_llm.invoke(
        [
            {"role": "system", "content": f"""You are a helpful assistant.{profile}"""},
            *messages,
        ]
    )

    # Update profile with any new information
    manager.invoke({"messages": messages})
    return response

In [12]:
# Example usage
await chat.ainvoke(
    [{"role": "user", "content": "I'm Alice from California"}],
    config={"configurable": {"user_id": "user-123"}},
)

AIMessage(content="Hi Alice! It's great to meet you. How can I help you today?", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 21, 'total_tokens': 37, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-2025-04-14', 'system_fingerprint': 'fp_799e4ca3f1', 'id': 'chatcmpl-BwgrB7yX4JPQNm8YUTYtTvvG7E4iq', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--c62a049e-49a3-4aea-bde2-d71339e9b2b7-0', usage_metadata={'input_tokens': 21, 'output_tokens': 16, 'total_tokens': 37, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [13]:
await chat.ainvoke(
    [{"role": "user", "content": "I just passed the N1 exam!"}],
    config={"configurable": {"user_id": "user-123"}},
)

AIMessage(content='That’s incredible, Alice—congratulations on passing the N1! That’s the highest level of the Japanese Language Proficiency Test, and it’s a huge achievement. All your hard work has paid off! What are you planning to do next with your Japanese skills?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 55, 'prompt_tokens': 59, 'total_tokens': 114, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-2025-04-14', 'system_fingerprint': 'fp_b3f1157249', 'id': 'chatcmpl-BwgrUwfpw9b3Js4VQzMEk59RIEFpI', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--e7570446-a429-444d-9338-464e7af1f708-0', usage_metadata={'input_tokens': 59, 'output_tokens': 55, 'total_tokens': 114, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_

In [15]:
for memory in store.search(("users", "user-123", "profile")):
    print(memory)

Item(namespace=['users', 'user-123', 'profile'], key='44269228-4798-41fd-a66a-cf09a54717d0', value={'kind': 'UserProfile', 'content': {'name': 'Alice', 'language': 'Japanese (advanced/N1)', 'timezone': None}}, created_at='2025-07-24T03:24:52.871577+00:00', updated_at='2025-07-24T03:24:52.871582+00:00', score=None)


# Extract Episodic Memories

## Without storage

In [17]:
class Episode(BaseModel):
    """Write the episode from the perspective of the agent within it. Use the benefit of hindsight to record the memory, saving the agent's key internal thought process so it can learn over time."""

    observation: str = Field(..., description="The context and setup - what happened")
    thoughts: str = Field(
        ...,
        description="Internal reasoning process and observations of the agent in the episode that let it arrive"
        ' at the correct action and result. "I ..."',
    )
    action: str = Field(
        ...,
        description="What was done, how, and in what format. (Include whatever is salient to the success of the action). I ..",
    )
    result: str = Field(
        ...,
        description="Outcome and retrospective. What did you do well? What could you do better next time? I ...",
    )


manager = create_memory_manager(
    "openai:gpt-4.1",
    schemas=[Episode],
    instructions="Extract examples of successful explanations, capturing the full chain of reasoning. Be concise in your explanations and precise in the logic of your reasoning.",
    enable_inserts=True,
)

In [18]:
conversation = [
    {
        "role": "user",
        "content": "What's a binary tree? I work with family trees if that helps",
    },
    {
        "role": "assistant",
        "content": "A binary tree is like a family tree, but each parent has at most 2 children. Here's a simple example:\n   Bob\n  /  \\\nAmy  Carl\n\nJust like in family trees, we call Bob the 'parent' and Amy and Carl the 'children'.",
    },
    {
        "role": "user",
        "content": "Oh that makes sense! So in a binary search tree, would it be like organizing a family by age?",
    },
]

episodes = manager.invoke({"messages": conversation})
for episodic in episodes:
    print(episodic)

ExtractedMemory(id='dc32c087-c02d-4d28-a74e-81b813a7106f', content=Episode(observation="A user asked what a binary tree is, mentioning familiarity with family trees. I explained that a binary tree is like a family tree where each parent has at most 2 children, providing a simple textual example and drawing parallels to the 'parent' and 'children' terminology.", thoughts="I recognized the user's domain knowledge (family trees) and mapped the concept of a binary tree onto that, using familiar terms and a clear visual analogy. That ensured immediate intuitive understanding.", action="Explained the binary tree by using an analogy to family trees, relating the terms 'parent' and 'children', and providing a simple example.", result='The user quickly grasped the concept, demonstrating understanding in the next question. The explanation was successful because it activated their prior knowledge and used accessible language and structure.'))
ExtractedMemory(id='c2c543f8-4e64-4ec2-b276-f963fba1a4

## With storage

In [19]:
# Set up vector store for similarity search
store = InMemoryStore(
    index={
        "dims": 1536,
        "embed": "openai:text-embedding-3-small",
    }
)

# Configure memory manager with storage
manager = create_memory_store_manager(
    "openai:gpt-4.1",
    namespace=("memories", "episodes"),
    schemas=[Episode],
    instructions="Extract exceptional examples of noteworthy problem-solving scenarios, including what made them effective.",
    enable_inserts=True,
)

llm = init_chat_model("openai:gpt-4.1")


@entrypoint(store=store)
def app(messages: list):
    # Step 1: Find similar past episodes
    similar = store.search(
        ("memories", "episodes"),
        query=messages[-1]["content"],
        limit=1,
    )

    # Step 2: Build system message with relevant experience
    system_message = "You are a helpful assistant."
    if similar:
        system_message += "\n\n### EPISODIC MEMORY:"
        for i, item in enumerate(similar, start=1):
            episode = item.value["content"]
            system_message += f"""

Episode {i}:
When: {episode['observation']}
Thought: {episode['thoughts']}
Did: {episode['action']}
Result: {episode['result']}
        """

    # Step 3: Generate response using past experience
    response = llm.invoke([{"role": "system", "content": system_message}, *messages])

    # Step 4: Store this interaction if successful
    manager.invoke({"messages": messages})
    return response

In [20]:
app.invoke(
    [
        {
            "role": "user",
            "content": "What's a binary tree? I work with family trees if that helps",
        },
    ],
)

AIMessage(content='Great context! A **binary tree** is a concept from computer science, but it actually has some similarities to how family trees are structured.\n\n### In Short:\n- **Binary Tree:** Each "parent" in the tree can have up to **two** "children."\n\n### Compared to Family Trees:\n- If you imagine your family tree, a typical **binary tree** would be like showing just two children per person—a left child and a right child.\n- In real family trees, people might have more (or fewer) than two children, but in a binary tree, it\'s always at most two.\n\n### More Details:\n- **Nodes:** Each person (or item) in the tree is called a **node**.\n- **Edges:** The connections (parent to child) are called **edges**.\n- **Root:** The "top" node (like a common ancestor) is called the **root**.\n- **Leaves:** Nodes with no children (family members without descendants).\n\n### Visualization Example:\n```\n     Annie\n    /     \\\n Beth     Carl\n```\nHere, Annie is the root, with two child

In [21]:
for memory in store.search(("memories", "episodes"), query="Trees"):
    print(memory)

Item(namespace=['memories', 'episodes'], key='497a515c-dd87-4d3e-8f10-9bfc8b0f5d39', value={'kind': 'Episode', 'content': {'observation': 'User asked for the definition of a binary tree, mentioning their familiarity with family trees. I recognized an opportunity to tailor the example by relating it to their experience, which can aid comprehension.', 'thoughts': "I considered that by leveraging the user's knowledge of family trees, I could explain binary trees more intuitively. Connecting new concepts to the user's expertise increases the chance of successful understanding. The key is to translate a technical structure (binary tree) into terms the user already grasps.", 'action': "I explained a binary tree as a kind of tree structure where each node has at most two 'children,' similar to how a person in a family tree might only list two children. I used analogies to family trees to illustrate left/right child concepts and related terminology like 'node' and 'root.'", 'result': 'The user

In [22]:
app.invoke(
    [
        {
            "role": "user",
            "content": "What's a linked list?",
        },
    ],
)

AIMessage(content='A linked list is a way of organizing data so that each item (called a "node") contains two parts:\n\n1. The actual data (like a name, number, etc.)\n2. A reference (or "link") to the next item in the list.\n\nImagine a treasure hunt: each clue you find contains the next location, and so on, forming a chain. In a linked list, every node points to the next node, and the chain continues until the end (which is often marked by a link that says "null" or "none").\n\n**Main points:**\n- Linked lists are made up of nodes linked together one after another.\n- Unlike an array, the elements aren’t stored in a row in memory; they just point to the next.\n- You can easily add or remove items without reorganizing the whole structure.\n\n**Example using a real-life analogy:**\nThink of a linked list like a family photo album where each photo has a sticky note on the back with “where to find the next photo.” To look at every photo, you start at the beginning and follow the notes on

# How to Use Memory Tools

## Basic Usage

In [24]:
# Set up store and memory saver
store = InMemoryStore(
    index={
        "dims": 1536,
        "embed": "openai:text-embedding-3-small",
    }
)

In [25]:
# Create agent with memory tools
agent = create_react_agent(
    "openai:gpt-4.1",
    tools=[
        # Configure memory tools with runtime namespace
        create_manage_memory_tool(namespace=("memories", "{user_id}")),
        create_search_memory_tool(namespace=("memories", "{user_id}")),
    ],
    store=store,
)

## Shared Storage

In [26]:
agent_a_tools = [
    # Write to agent-specific namespace
    create_manage_memory_tool(namespace=("memories", "team_a", "agent_a")),
    # Read from shared team namespace
    create_search_memory_tool(namespace=("memories", "team_a")),
]


# Agents with different prompts sharing read access
agent_a = create_react_agent(
    "openai:gpt-4.1",
    tools=agent_a_tools,
    store=store,
    prompt="You are a research assistant",
)

# Create tools for agent B with different write space
agent_b_tools = [
    create_manage_memory_tool(namespace=("memories", "team_a", "agent_b")),
    create_search_memory_tool(namespace=("memories", "team_a")),
]
agent_b = create_react_agent(
    "openai:gpt-4.1",
    tools=agent_b_tools,
    store=store,
    prompt="You are a report writer.",
)

In [27]:
agent_b.invoke({"messages": [{"role": "user", "content": "Hi"}]})

{'messages': [HumanMessage(content='Hi', additional_kwargs={}, response_metadata={}, id='f94b0780-4c38-4bad-b09e-6cd3941a6bf6'),
  AIMessage(content='Hello! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 245, 'total_tokens': 255, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-2025-04-14', 'system_fingerprint': 'fp_b3f1157249', 'id': 'chatcmpl-BwhPhkq0NJPMiof9wE3653tVmMenD', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--3f703006-a9c2-4bf8-a339-cd876d63c892-0', usage_metadata={'input_tokens': 245, 'output_tokens': 10, 'total_tokens': 255, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})]}

# How to Optimize a Prompt

In [30]:
# Multiple conversations showing what to improve
trajectories = [
    # Conversation with no annotations (just the conversation)
    (
        [
            {"role": "user", "content": "Tell me about Mars"},
            {"role": "assistant", "content": "Mars is the fourth planet..."},
            {"role": "user", "content": "I wanted more about its moons"},
        ],
        None,
    ),
    (
        [
            {"role": "user", "content": "What are Mars' moons?"},
            {
                "role": "assistant",
                "content": "Mars has two moons: Phobos and Deimos...",
            },
        ],
        {
            "score": 0.9,
            "comment": "Should include more details and recommended follow-up questions",
        },
    ),
    # Annotations can be of different types, such as edits/revisions!
    (
        [
            {"role": "user", "content": "Compare Mars and Earth"},
            {"role": "assistant", "content": "Mars and Earth have many differences..."},
        ],
        {"revised": "Earth and Mars have many similarities and differences..."},
    ),
]

In [31]:
optimizer = create_prompt_optimizer(
    "openai:gpt-4.1",
    kind="metaprompt",
    config={"max_reflection_steps": 1, "min_reflection_steps": 0},
)
updated = optimizer.invoke(
    {"trajectories": trajectories, "prompt": "You are a planetary science expert"}
)
print(updated)

You are a planetary science expert. For every question, provide clear, detailed, and informative answers. Where relevant, include interesting facts, detailed explanations, and comparisons. Always suggest possible follow-up questions or related topics the user might find intriguing.


##  Gradient Optimizer

In [32]:
optimizer = create_prompt_optimizer(
    "openai:gpt-4.1",
    kind="gradient",  # 2-10 LLM calls
    config={
        "max_reflection_steps": 3,  # Max improvement cycles
        "min_reflection_steps": 1,  # Min improvement cycles
    },
)
updated = optimizer.invoke(
    {"trajectories": trajectories, "prompt": "You are a planetary science expert"}
)
print(updated)

You are a planetary science expert. When answering planetary science queries, provide not only the direct fact or answer but also extra context, interesting related details, and offer 1-2 follow-up questions or avenues for deeper exploration if relevant. Avoid overly terse or generic responses.


## Prompt Memory Optimizer

In [33]:
optimizer = create_prompt_optimizer(
    "openai:gpt-4.1",
    kind="prompt_memory",  # 1 LLM call
)
updated = optimizer.invoke(
    {"trajectories": trajectories, "prompt": "You are a planetary science expert"}
)
print(updated)

You are a planetary science expert. When answering questions, provide detailed and informative responses. Where appropriate, include interesting facts, comparisons, and context. After your main answer, suggest relevant follow-up questions the user might be interested in to encourage further exploration of the topic.


# How to Optimize Multiple Prompts

In [36]:
# Example team: researcher finds information, writer creates reports
conversations = [
    (
        [
            {"role": "user", "content": "Research quantum computing advances"},
            {
                "role": "assistant",
                "content": "Found several papers on quantum supremacy...",
            },
            {
                "role": "assistant",
                "content": "Recent quantum computing developments show...",
            },
            {"role": "user", "content": "The report is missing implementation details"},
        ],
        # No explicit feedback provided but the optimizer can infer from the conversation
        None,
    ),
    (
        [
            {"role": "user", "content": "Analyze new ML models"},
            {"role": "assistant", "content": "Key findings on architecture: ..."},
            {"role": "assistant", "content": "Based on the research, these models..."},
            {"role": "user", "content": "Great report, very thorough"},
        ],
        # Numeric score for the team as a whole
        {"score": 0.95},
    ),
]

# Define prompts for each role
prompts = [
    {
        "name": "researcher",
        "prompt": "You analyze technical papers and extract key findings",
    },
    {"name": "writer", "prompt": "You write clear reports based on research findings"},
]

# Create optimizer
optimizer = create_multi_prompt_optimizer(
    "openai:gpt-4.1",
    kind="gradient",  # Best for team dynamics
    config={"max_reflection_steps": 3},
)

In [38]:
# Update all prompts based on team performance
updated = optimizer.invoke({"trajectories": conversations, "prompts": prompts})
for u in updated:
    print(u)

{'name': 'researcher', 'prompt': 'You analyze technical papers and extract key findings'}
{'name': 'writer', 'prompt': 'You write clear reports based on research findings. When writing about technical subjects, always include relevant implementation details or practical aspects necessary for understanding or applying the findings.'}
