In [1]:
from dotenv import find_dotenv, load_dotenv

load_dotenv(find_dotenv())

True

In [None]:
# Import core components
from langgraph.prebuilt import create_react_agent
from langgraph.store.memory import InMemoryStore
from langmem import create_manage_memory_tool, create_search_memory_tool

# Set up storage
store = InMemoryStore(
    index={
        "dims": 1536,
        "embed": "openai:text-embedding-3-small",
    }
)

# Create an agent with memory capabilities
agent = create_react_agent(
    "openai:gpt-4o-mini",
    tools=[
        # Memory tools use LangGraph's BaseStore for persistence (4)
        create_manage_memory_tool(namespace=("memories",)),
        create_search_memory_tool(namespace=("memories",)),
    ],
    store=store,
)

In [None]:
# Store a new memory
agent.invoke({"messages": [{"role": "user", "content": "Remember that I prefer dark mode."}]})

# Retrieve the stored memory
response = agent.invoke({"messages": [{"role": "user", "content": "What are my lighting preferences?"}]})
print(response["messages"][-1].content)
# Output: "You've told me that you prefer dark mode."

In [25]:
import time

from langchain.chat_models import init_chat_model
from langgraph.func import entrypoint
from langgraph.store.memory import InMemoryStore
from langmem import create_memory_store_manager

store = InMemoryStore(  #
    index={
        "dims": 1536,
        "embed": "openai:text-embedding-3-small",
    }
)
llm = init_chat_model("openai:gpt-4o-mini")

# Create memory manager Runnable to extract memories from conversations
memory_manager = create_memory_store_manager(
    "openai:gpt-4o-mini",
    # Store memories in the "memories" namespace (aka directory)
    namespace=("memories",),  #
)


@entrypoint(store=store)  # Create a LangGraph workflow
async def chat(message: str):
    print("Calling llm")
    response = llm.invoke(message)
    print("LLM response: ", response)

    start = time.time()
    # memory_manager extracts memories from conversation history
    # We'll provide it in OpenAI's message format
    to_process = {"messages": [{"role": "user", "content": message}] + [response]}

    print("Calling memory manager")
    await memory_manager.ainvoke(to_process)  #
    end = time.time()
    print(f"Time taken: {end - start} seconds")
    return response.content

In [None]:
# Run conversation as normal
response = await chat.ainvoke(
    "I like dogs. My dog's name is Fido.",
)

print(response)
# Output: That's nice! Dogs make wonderful companions. Fido is a classic dog name. What kind of dog is Fido?

In [None]:
print(store.search(("memories",)))

In [2]:
from langchain_openai import ChatOpenAI
from langgraph.checkpoint.memory import InMemorySaver
from langgraph.graph import START, MessagesState, StateGraph
from langmem.short_term import RunningSummary, summarize_messages

model = ChatOpenAI(model="gpt-4o")
# NOTE: we're also setting max output tokens for the summary
# this should match max_summary_tokens in `summarize_messages` for better
# token budget estimates
summarization_model = model.bind(max_tokens=128)


# We will keep track of our running summary in the graph state
class SummaryState(MessagesState):
    summary: RunningSummary | None


# Define the node that will be calling the LLM
def call_model(state: SummaryState) -> SummaryState:
    # We will attempt to summarize messages before the LLM is called
    # If the messages in state["messages"] fit into max tokens budget,
    # we will simply return those messages. Otherwise, we will summarize
    # and return [summary_message] + remaining_messages
    summarization_result = summarize_messages(
        state["messages"],
        # IMPORTANT: Pass running summary, if any. This is what
        # allows summarize_messages to avoid re-summarizing the same
        # messages on every conversation turn
        running_summary=state.get("summary"),
        # by default this is using approximate token counting,
        # but you can also use LLM-specific one, like below
        token_counter=model.get_num_tokens_from_messages,
        model=summarization_model,
        max_tokens=256,
        max_summary_tokens=128,
    )
    response = model.invoke(summarization_result.messages)
    state_update = {"messages": [response]}
    # If we generated a summary, add it as a state update and overwrite
    # the previously generated summary, if any
    if summarization_result.running_summary:
        state_update["summary"] = summarization_result.running_summary
    return state_update


checkpointer = InMemorySaver()
builder = StateGraph(SummaryState)
builder.add_node(call_model)
builder.add_edge(START, "call_model")
# It's important to compile the graph with a checkpointer,
# otherwise the graph won't remember previous conversation turns
graph = builder.compile(checkpointer=checkpointer)

# Invoke the graph
config = {"configurable": {"thread_id": "1"}}
graph.invoke({"messages": "hi, my name is bob"}, config)
graph.invoke({"messages": "write a short poem about cats"}, config)
graph.invoke({"messages": "now do the same but for dogs"}, config)
graph.invoke({"messages": "what's my name?"}, config)

{'messages': [HumanMessage(content='hi, my name is bob', additional_kwargs={}, response_metadata={}, id='c900d1a8-12e2-43f4-a50e-08997fb477d6'),
  AIMessage(content='Hello Bob! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 13, 'total_tokens': 24, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_6ec83003ad', 'finish_reason': 'stop', 'logprobs': None}, id='run-d36879da-fc8c-44aa-af51-33b286a1d2dd-0', usage_metadata={'input_tokens': 13, 'output_tokens': 11, 'total_tokens': 24, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}),
  HumanMessage(content='write a short poem about cats', additional_kwargs={}, response_metadata