### Langchain Tutorial: Chatbot with Persistent Memory

https://python.langchain.com/docs/tutorials/chatbot/

In [20]:
# Install packages if not installed
# !pip install -qU langchain-core langgraph>0.2.27

In [21]:
# API keys and environment variables
import getpass
import os

# LangSmith can be used to debug / test / monitor AI Application 

os.environ["LANGSMITH_TRACING"] = "true"
if not os.environ.get("LANGSMITH_API_KEY"):
  os.environ["LANGSMITH_API_KEY"] = getpass.getpass("Enter API key for LangSmith: ")

if not os.environ.get("GOOGLE_API_KEY"):
  os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for Google Gemini: ")

In [22]:
# Load model
from langchain.chat_models import init_chat_model

model = init_chat_model("gemini-2.5-flash", model_provider="google_genai")

In [23]:
# Define a prompt template with a system message and input
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You talk like a pirate. Answer all questions to the best of your ability.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

In [24]:
# Add in-memory checkpoint (Wrap with simple langraph application)

from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

# Define a new graph
workflow = StateGraph(state_schema=MessagesState)


# Define the function that calls the model
def call_model(state: MessagesState):
    prompt = prompt_template.invoke(state)
    response = model.invoke(prompt)
    return {"messages": response}


# Define the (single) node in the graph
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

# Add memory
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [25]:
# Invoke the application
from langchain_core.messages import HumanMessage

config = {"configurable": {"thread_id": "abc345"}}

query = "Hi! My Name is PK"

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()  # output contains all messages in state


Ahoy there, PK! A fine name for a landlubber, or perhaps even a future scallywag! Welcome aboard, matey! What be yer business here today, eh?


In [26]:
# Check if the chatbot has memory
query = "What is my name?"

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()


Shiver me timbers! Yer name, ye just told me, be **PK**, didn't ye? No need to be forgettin' it so soon, me hearty!


In [27]:
# Add an input variable to the system prompt
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability in {language}.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

In [28]:
# Update the application
from typing import Sequence

from langchain_core.messages import BaseMessage
from langgraph.graph.message import add_messages
from typing_extensions import Annotated, TypedDict


class State(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]
    language: str


workflow = StateGraph(state_schema=State)


def call_model(state: State):
    prompt = prompt_template.invoke(state)
    response = model.invoke(prompt)
    return {"messages": [response]}


workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [29]:
# Invoke application with two parameters now (language and messages)

config = {"configurable": {"thread_id": "abc1234"}}
query = "Hi! I'm PK."
language = "Italian"

input_messages = [HumanMessage(query)]
output = app.invoke(
    {"messages": input_messages, "language": language},
    config,
)
output["messages"][-1].pretty_print()


Ciao PK! P
Piacere di conoscerti. Come posso aiutarti oggi?


In [30]:
# Omit language parameter, but the previous system message parameters are included in the conversation
query = "What is my name?"

input_messages = [HumanMessage(query)]
output = app.invoke(
    {"messages": input_messages},
    config,
)
output["messages"][-1].pretty_print()


Il tuo nome è PK, come mi hai detto prima.


In [31]:
print(output)

{'messages': [HumanMessage(content="Hi! I'm PK.", additional_kwargs={}, response_metadata={}, id='57855016-e609-448b-932f-8cc0b39657df'), AIMessage(content='Ciao PK! P\nPiacere di conoscerti. Come posso aiutarti oggi?', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []}, id='run--0ce6aaeb-1d6c-4521-97c8-63345f964b17-0', usage_metadata={'input_tokens': 26, 'output_tokens': 97, 'total_tokens': 123, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 79}}), HumanMessage(content='What is my name?', additional_kwargs={}, response_metadata={}, id='fb47eac9-e52f-45f6-bc47-34d9a454dedd'), AIMessage(content='Il tuo nome è PK, come mi hai detto prima.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []}, id

### Managing Conversation History

In [44]:
# Add message trimmer
# We want to do this BEFORE the prompt template but AFTER you load previous messages from Message History.
from langchain_core.messages import SystemMessage, trim_messages
from langchain_core.messages import AIMessage

trimmer = trim_messages(
    max_tokens=50,
    strategy="last",
    token_counter=model,
    include_system=True,
    allow_partial=False,
    start_on="human",
)

messages = [
    SystemMessage(content="you're a good assistant"),
    HumanMessage(content="hi! I'm PK"),
    AIMessage(content="hi!"),
    HumanMessage(content="I like vanilla ice cream"),
    AIMessage(content="nice"),
    HumanMessage(content="whats 2 + 2"),
    AIMessage(content="4"),
    HumanMessage(content="thanks"),
    AIMessage(content="no problem!"),
    HumanMessage(content="having fun?"),
    AIMessage(content="yes!"),
]

trimmer.invoke(messages)

[SystemMessage(content="you're a good assistant", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='I like vanilla ice cream', additional_kwargs={}, response_metadata={}),
 AIMessage(content='nice', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='whats 2 + 2', additional_kwargs={}, response_metadata={}),
 AIMessage(content='4', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='thanks', additional_kwargs={}, response_metadata={}),
 AIMessage(content='no problem!', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='having fun?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='yes!', additional_kwargs={}, response_metadata={})]

In [45]:
# To use it in our chain, we just need to run the trimmer before we pass the messages input to our prompt.
workflow = StateGraph(state_schema=State)


def call_model(state: State):
    trimmed_messages = trimmer.invoke(state["messages"])
    prompt = prompt_template.invoke(
        {"messages": trimmed_messages, "language": state["language"]}
    )
    response = model.invoke(prompt)
    return {"messages": [response]}


workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [46]:
# Now if we try asking the model our name, it won't know it since we trimmed that part of the chat history:
config = {"configurable": {"thread_id": "abc567"}}
query = "What is my name?"
language = "English"

input_messages = messages + [HumanMessage(query)]
output = app.invoke(
    {"messages": input_messages, "language": language},
    config,
)
output["messages"][-1].pretty_print()


I don't know your name. You haven't told me yet!

If you'd like me to know it, feel free to tell me.


In [47]:
# But if we ask about information that is within the last few messages, it remembers:
config = {"configurable": {"thread_id": "abc678"}}
query = "What math problem did I ask?"
language = "English"

input_messages = messages + [HumanMessage(query)]
output = app.invoke(
    {"messages": input_messages, "language": language},
    config,
)
output["messages"][-1].pretty_print()


You asked "what's 2 + 2".


### Streaming

In [48]:
# LLMs can sometimes take a while to respond, and so in order to improve the user experience one thing that most applications
# do is stream back each token as it is generated. This allows the user to see progress.
config = {"configurable": {"thread_id": "abc789"}}
query = "Hi I'm Todd, please tell me a joke."
language = "English"

input_messages = [HumanMessage(query)]
for chunk, metadata in app.stream(
    {"messages": input_messages, "language": language},
    config,
    stream_mode="messages",
):
    if isinstance(chunk, AIMessage):  # Filter to just model responses
        print(chunk.content, end="|")

Hi Todd! Here's one for you:

Why don't scientists trust atoms?

Because they make up everything!

Hope you liked it!|