# Build a Chatbot with memory
https://python.langchain.com/docs/tutorials/chatbot/ accessed on 19th April 2025

In [1]:
import os
from dotenv import load_dotenv

In [None]:
# Loading the .env file. Using full path to use it in the interactive shell
load_dotenv("C:\\Users\\MarcusChiri\\Dropbox\\Repositories\\Langchain\\.env")

# Loarding the environment variables
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"

In [3]:
from langchain.chat_models import init_chat_model

model = init_chat_model("gpt-4o-mini", model_provider="openai")

In [4]:
from langchain_core.messages import HumanMessage

model.invoke([HumanMessage(content="Hi! I'm Bob")])

AIMessage(content='Hi Bob! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 11, 'total_tokens': 22, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_0392822090', 'id': 'chatcmpl-BOhQMOX0rzlk1JxTOB6qU9OddF3XG', 'finish_reason': 'stop', 'logprobs': None}, id='run-11cfaa22-1ae3-4636-866b-22e51c3a63c5-0', usage_metadata={'input_tokens': 11, 'output_tokens': 11, 'total_tokens': 22, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [5]:
model.invoke([HumanMessage(content="What's my name?")])

AIMessage(content="I don't have access to personal information about individuals unless it has been shared with me in the course of our conversation. Therefore, I don't know your name. If you'd like me to know your name, feel free to share it!", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 47, 'prompt_tokens': 11, 'total_tokens': 58, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_0392822090', 'id': 'chatcmpl-BOhRMP0kSPghN0ELee8LBubKzmbQT', 'finish_reason': 'stop', 'logprobs': None}, id='run-838a84cd-f087-4081-bdbd-7315c5220fae-0', usage_metadata={'input_tokens': 11, 'output_tokens': 47, 'total_tokens': 58, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [6]:
from langchain_core.messages import AIMessage

model.invoke(
    [
        HumanMessage(content="Hi! I'm Bob"),
        AIMessage(content="Hello Bob! How can I assist you today?"),
        HumanMessage(content="What's my name?"),
    ]
)

AIMessage(content='Your name is Bob. How can I help you today, Bob?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 15, 'prompt_tokens': 33, 'total_tokens': 48, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_dbaca60df0', 'id': 'chatcmpl-BOhRuLibBpPk2dW4w9kX8dkA2rSdE', 'finish_reason': 'stop', 'logprobs': None}, id='run-3aed23dd-21d8-4f22-b6e5-8abacb81cc35-0', usage_metadata={'input_tokens': 33, 'output_tokens': 15, 'total_tokens': 48, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In Langchain, we can use a Message History class to wrap our model and make it stateful. This will keep track of inputs and outputs of the model, and store them in some datastore. Future interactions will then load those messages and pass them into the chain as part of the input. Let's see how to use this!

First, let's make sure to install langchain-community, as we will be using an integration in there to store message history.
pip install langchain_community

After that, we can import the relevant classes and set up our chain which wraps the model and adds in this message history. A key part here is the function we pass into as the get_session_history. This function is expected to take in a session_id and return a Message History object. This session_id is used to distinguish between separate conversations, and should be passed in as part of the config when calling the new chain (we'll show how to do that).

In [7]:
from langchain_core.chat_history import (
    BaseChatMessageHistory,
    InMemoryChatMessageHistory,
)
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = InMemoryChatMessageHistory()
    return store[session_id]


with_message_history = RunnableWithMessageHistory(model, get_session_history)

In [None]:
my_list = [message.content for message in get_session_history("abc2").messages]
print(my_list)

[]


We now need to create a config that we pass into the runnable every time. This config contains information that is not part of the input directly, but is still useful. In this case, we want to include a session_id. This should look like:

In [9]:
config = {"configurable": {"session_id": "abc2"}}

In [10]:
response = with_message_history.invoke(
    [HumanMessage(content="Hi! I'm Bob")],
    config=config,
)

response.content

'Hi Bob! How can I assist you today?'

In [11]:
response = with_message_history.invoke(
    [HumanMessage(content="What's my name?")],
    config=config,
)

response.content

'Your name is Bob! How can I help you today?'

In [13]:
response

AIMessage(content='Your name is Bob! How can I help you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 33, 'total_tokens': 46, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_dbaca60df0', 'id': 'chatcmpl-BOhT9tgNYqXFmYqmOpJWrIgenZhqU', 'finish_reason': 'stop', 'logprobs': None}, id='run-76bcaa09-49ee-4ddb-9c72-a90e37ed2842-0', usage_metadata={'input_tokens': 33, 'output_tokens': 13, 'total_tokens': 46, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [14]:
config = {"configurable": {"session_id": "abc3"}}
response = with_message_history.invoke(
    [HumanMessage(content="Hi! I'm Bob"),
        HumanMessage(content="What's my name?")],
    config=config,
)

response

AIMessage(content='Your name is Bob! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 19, 'total_tokens': 32, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_dbaca60df0', 'id': 'chatcmpl-BOhUbigxDrA4B98ur2HUB60t5VG90', 'finish_reason': 'stop', 'logprobs': None}, id='run-78b04033-4841-42b4-8065-e7588ecd7ea1-0', usage_metadata={'input_tokens': 19, 'output_tokens': 13, 'total_tokens': 32, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [18]:
config = {"configurable": {"session_id": "abc6"}}
response = with_message_history.invoke(
    [HumanMessage(content="Hi! I'm Bob"),
     AIMessage(content="You should actually stop calling people by their names"),
        HumanMessage(content="What's my name?")],
    config=config,
)

response

AIMessage(content='Your name is Bob! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 32, 'total_tokens': 45, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f7d56a8a2c', 'id': 'chatcmpl-BOhXGV2nnbRYDnz3TURILaecVA3ep', 'finish_reason': 'stop', 'logprobs': None}, id='run-e6c417df-747d-4374-b429-c8f5b699587a-0', usage_metadata={'input_tokens': 32, 'output_tokens': 13, 'total_tokens': 45, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

Great! Our chatbot now remembers things about us. If we change the config to reference a different session_id, we can see that it starts the conversation fresh.

In [68]:
# running with a different session id
config = {"configurable": {"session_id": "abc3"}}

response = with_message_history.invoke(
    [HumanMessage(content="What's my name?")],
    config=config,
)

response

AIMessage(content='I don’t know your name yet. If you\'d like to share it, I can refer to you by that name, and I will add "Corinthiano" to it!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 37, 'prompt_tokens': 180, 'total_tokens': 217, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_0392822090', 'id': 'chatcmpl-BO1xMtXuzUkFwpIzCeLgLU0M71T2D', 'finish_reason': 'stop', 'logprobs': None}, id='run-2e6a6700-4032-4cf7-8d7d-2ed5f9d1204e-0', usage_metadata={'input_tokens': 180, 'output_tokens': 37, 'total_tokens': 217, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

### Prompt templates 
Prompt Templates help to turn raw user information into a format that the LLM can work with. In this case, the raw user input is just a message, which we are passing to the LLM. Let's now make that a bit more complicated. First, let's add in a system message with some custom instructions (but still taking messages as input). Next, we'll add in more input besides just the messages.

First, let's add in a system message. To do this, we will create a ChatPromptTemplate. We will utilize MessagesPlaceholder to pass all the messages in.

In [24]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "For each name that you receive from a human, add \"Corinthiano\" to it",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

chain = prompt | model

Note that this slightly changes the input type - rather than pass in a list of messages, we are now passing in a dictionary with a messages key where that contains a list of messages.

In [None]:
response = chain.invoke({"messages":
                         [HumanMessage(content="hi! I'm bob"),
                          AIMessage(content=""),
                             HumanMessage(content="What's my name?")
                          ]
                         })

response.content

'Your name is Bob Corinthiano.'

We can now wrap this in the same Messages History object as before

In [25]:
with_message_history = RunnableWithMessageHistory(chain, get_session_history)

In [None]:
config = {"configurable": {"session_id": "bbc2"}}
with_message_history.invoke({"messages":
                             [HumanMessage(content="Why are you calling me Bob Corinthiano?")]}, config=config)

AIMessage(content='I’m calling you Bob Corinthiano because you mentioned your name is Bob, and I added "Corinthiano" to it as per your request! If you\'d like to share a different name, just let me know!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 45, 'prompt_tokens': 116, 'total_tokens': 161, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f7d56a8a2c', 'id': 'chatcmpl-BOhcl2hhqzbulPEn3mCteJm8c7lBU', 'finish_reason': 'stop', 'logprobs': None}, id='run-9dabee70-9536-4578-8906-9ca1a7c89556-0', usage_metadata={'input_tokens': 116, 'output_tokens': 45, 'total_tokens': 161, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [58]:
response = with_message_history.invoke(
    [HumanMessage(content="Hi! I'm Jim")],
    config=config,
)

response.content

'Hello, Jim Corinthiano! How can I assist you today?'

In [61]:
response = with_message_history.invoke(
    [HumanMessage(content="Do you know what Corinthiano means?")],
    config=config,
)

response.content

'"Corinthiano" typically refers to someone from Corinth or associated with the city of Corinth in Greece. It can also denote an affiliation with certain cultural or sports contexts, such as the Corinthians football club in Brazil. Would you like to know more about its significance or context?'

Awesome! Let's now make our prompt a little bit more complicated. Let's assume that the prompt template now looks something like this:

In [30]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. If a user gives you their name, add \"de {state}\" to the user's name. Answer all questions to the best of your ability in {language}.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

chain = prompt | model

Note that we have added a new language input to the prompt. We can now invoke the chain and pass in a language of our choice.

In [None]:
response = chain.invoke(
    {"messages": [HumanMessage(content="hi! I'm bob")],
     "language": "German", "state": "Paraiba"}
)

response.content

'Hallo, Bob de Paraiba! Wie kann ich Ihnen helfen?'

Note that we have added a new language input to the prompt. We can now invoke the chain and pass in a language of our choice.

Let's now wrap this more complicated chain in a Message History class. This time, because there are multiple keys in the input, we need to specify the correct key to use to save the chat history.

In [None]:
chain = prompt | model
config = {"configurable": {"session_id": "dbc1"}}
with_message_history = RunnableWithMessageHistory(
    chain, get_session_history, input_messages_key="messages")
with_message_history.invoke(
    {"messages": [HumanMessage(content="Hi, my name is Bob")],
     "language": "Spanish",
     "state": "Paraíba"
     },
    config=config,

)

AIMessage(content='Hola, Bob de Paraíba. ¿En qué puedo ayudarte hoy?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 15, 'prompt_tokens': 82, 'total_tokens': 97, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f7d56a8a2c', 'id': 'chatcmpl-BOi9VTlfSl6db21jcceFpBMygRQul', 'finish_reason': 'stop', 'logprobs': None}, id='run-6ca67185-655d-4124-b554-b94827167456-0', usage_metadata={'input_tokens': 82, 'output_tokens': 15, 'total_tokens': 97, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

## Managing Conversation History
One important concept to understand when building chatbots is how to manage conversation history. If left unmanaged, the list of messages will grow unbounded and potentially overflow the context window of the LLM. Therefore, it is important to add a step that limits the size of the messages you are passing in.

Importantly, you will want to do this BEFORE the prompt template but AFTER you load previous messages from Message History. I.e. you load the previous messages, apply the limits, and then call the prompt template.

We can do this by adding a simple step in front of the prompt that modifies the messages key appropriately, and then wrap that new chain in the Message History class.

LangChain comes with a few built-in helpers for managing a list of messages. In this case we'll use the trim_messages helper to reduce how many messages we're sending to the model. The trimmer allows us to specify how many tokens we want to keep, along with other parameters like if we want to always keep the system message and whether to allow partial messages:

In [65]:
from langchain_core.messages import SystemMessage, trim_messages

trimmer = trim_messages(
    max_tokens=65,
    strategy="last",
    token_counter=model,
    include_system=True,
    allow_partial=False,
    start_on="human",
)

messages = [
    SystemMessage(content="you're a good assistant"),
    HumanMessage(content="hi! I'm bob"),
    AIMessage(content="hi!"),
    HumanMessage(content="I like vanilla ice cream"),
    AIMessage(content="nice"),
    HumanMessage(content="whats 2 + 2"),
    AIMessage(content="4"),
    HumanMessage(content="thanks"),
    AIMessage(content="no problem!"),
    HumanMessage(content="having fun?"),
    AIMessage(content="yes!"),
    HumanMessage(content="whats 2 + 2"),
    AIMessage(content="4")
]

trimmer.invoke(messages)

[SystemMessage(content="you're a good assistant", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='thanks', additional_kwargs={}, response_metadata={}),
 AIMessage(content='no problem!', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='having fun?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='yes!', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='whats 2 + 2', additional_kwargs={}, response_metadata={}),
 AIMessage(content='4', additional_kwargs={}, response_metadata={})]

To use it in our chain, we just need to run the trimmer before we pass the messages input to our prompt.

Now if we try asking the model our name, it won't know it since we trimmed that part of the chat history:

In [61]:
from operator import itemgetter

from langchain_core.runnables import RunnablePassthrough

chain = (
    RunnablePassthrough.assign(messages=itemgetter("messages") | trimmer)
    | prompt
    | model
)

response = chain.invoke(
    {
        "messages": messages + [HumanMessage(content="what's my name?")],
        "language": "English",
        "state": "Rio de Janeiro"
    }
)
response.content

'You haven\'t told me your name yet. If you do, I\'ll add "de Rio de Janeiro" to it!'

In [66]:
response = chain.invoke(
    {
        "messages": messages + [HumanMessage(content="What math problem did I ask?")],
        "language": "English",
        "state": "Rio de Janeiro"
    }
)
response.content

'You asked for the sum of 2 + 2.'

In [75]:
chain = (
    RunnablePassthrough.assign(messages=itemgetter("messages") | trimmer)
    | prompt
    | model
)
config = {"configurable": {"session_id": "ebc1"}}
with_message_history = RunnableWithMessageHistory(
    chain, get_session_history, input_messages_key="messages")
with_message_history.invoke(
    {"messages": [HumanMessage(content="What is my name?")],
     "language": "English",
     "state": "São Paulo"
     },
    config=config)

AIMessage(content='Your name is John de São Paulo.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 86, 'total_tokens': 95, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_0392822090', 'id': 'chatcmpl-BOnWYPHFCmSMUYAXSGI0wHTDVG58z', 'finish_reason': 'stop', 'logprobs': None}, id='run-a7f40f8b-7051-4df6-b27f-37b483c634be-0', usage_metadata={'input_tokens': 86, 'output_tokens': 9, 'total_tokens': 95, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [73]:
trimmer.invoke(store["ebc1"].messages)

[HumanMessage(content='Please multiply 12 by 12', additional_kwargs={}, response_metadata={}),
 AIMessage(content='12 multiplied by 12 equals 144.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 83, 'total_tokens': 93, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_0392822090', 'id': 'chatcmpl-BOnUPmIHDpSRxcIfUc7b0Adp3Ooys', 'finish_reason': 'stop', 'logprobs': None}, id='run-c98b2467-0234-4cbf-b88d-40d1c34d834e-0', usage_metadata={'input_tokens': 83, 'output_tokens': 10, 'total_tokens': 93, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}),
 HumanMessage(content='What is my name?', additional_kwargs={}, response_metadata={}),
 AIMessage(co

## Streaming

Now we've got a functioning chatbot. However, one really important UX consideration for chatbot applications is streaming. LLMs can sometimes take a while to respond, and so in order to improve the user experience one thing that most applications do is stream back each token as it is generated. This allows the user to see progress.

It's actually super easy to do this!

All chains expose a .stream method, and ones that use message history are no different. We can simply use that method to get back a streaming response.

In [None]:
config = {"configurable": {"session_id": "abc15"}}
for r in with_message_history.stream(
    {
        "messages": [HumanMessage(content="hi! I'm todd. tell me a joke")],
        "language": "English",
        "state": "Minas"
    },
    config=config,
):
    print(r.content, end="|")

|Hi|,| Todd| de| Minas|!| Here's| a| joke| for| you|:

|Why| don't| scientists| trust| atoms|?

|Because| they| make| up| everything|!||

Trying with previous example:

In [84]:
chain = (
    RunnablePassthrough.assign(messages=itemgetter("messages") | trimmer)
    | prompt
    | model
)
config = {"configurable": {"session_id": "fbc1"}}
with_message_history = RunnableWithMessageHistory(
    chain, get_session_history, input_messages_key="messages")

for r in with_message_history.stream(
    {"messages": [HumanMessage(content="What is my name?")],
     "language": "English",
     "state": "São Paulo"
     },
    config=config
):
    print(r.content, end="!")

!Your! name! is! Bob! de! São! Paulo!.! How! can! I! assist! you! further!?!!