In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
from langchain_google_genai import ChatGoogleGenerativeAI
import warnings
warnings.filterwarnings("ignore")

In [2]:
## defining LLM
## The model name is set to "gemini-2.0-flash" for the latest Gemini 2.0 model.
## The convert_system_message_to_human flag is set to True to ensure that system messages are treated as human messages.
llm = ChatGoogleGenerativeAI(model = "gemini-2.0-flash", convert_system_message_to_human=True)
llm

ChatGoogleGenerativeAI(model='models/gemini-2.0-flash', google_api_key=SecretStr('**********'), client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x000001EE06F17990>, default_metadata=(), convert_system_message_to_human=True, model_kwargs={})

In [3]:
## The human message is set to "Hi, my name is Ash and I am a GenAI Engineer" to introduce the user.
llm.invoke([HumanMessage(content = "Hi, my name is Ash and I am a GenAI Engineer.")])

AIMessage(content="Hi Ash! It's nice to meet you. Being a GenAI Engineer sounds like a fascinating and cutting-edge field. \n\nWhat kind of GenAI projects are you working on these days? I'm always interested in learning more about the applications and challenges in the field.", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run-12c9e412-45cb-491d-a640-827ccd3c8646-0', usage_metadata={'input_tokens': 14, 'output_tokens': 59, 'total_tokens': 73, 'input_token_details': {'cache_read': 0}})

In [4]:
## Invoke the LLM with a list of messages
# The LLM will respond to the last HumanMessage in the list
llm.invoke(
   [
       HumanMessage(content="Hi, my name is Ash and I am a GenAI Engineer"),
       AIMessage(content="Hi Ash, nice to meet you! It's great to connect with a fellow GenAI Engineer. The field is incredibly exciting and rapidly evolving.\n\nWhat kind of GenAI projects are you working on? I'm always interested in learning about what others are doing in the space. Are you focusing on specific modalities (text, image, audio, etc.) or applications?\n\nLet me know if there's anything I can help you with, whether it's brainstorming ideas, sharing resources, or just discussing the latest trends."),
       HumanMessage(content="Hey what is my name and what do I do?")
   ] 
)

AIMessage(content='Your name is Ash, and you are a GenAI Engineer.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run-a1bf050d-adf8-4460-9ca2-90675585a103-0', usage_metadata={'input_tokens': 132, 'output_tokens': 14, 'total_tokens': 146, 'input_token_details': {'cache_read': 0}})

### Message History
We can use a Message History class to wrap our model and make it stateful. This will keep the track of inputs and outputs of the model, and store them in some data source. Future interactions will then load those messages and pass them into the chain as part of the input. Let's see how to use this!

In [5]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

## create a dictionary to store the chat message history for each session ID
## In a real-world application, you might want to use a more robust storage solution (e.g., a database).
store = {}

## create a function to get the chat message history for a given session ID
## This function will be used to retrieve the message history for a given session ID.
def get_session_history(session_id:str) -> BaseChatMessageHistory:
    """ Get the chat message history for a given session ID.
    If the session ID does not exist, create a new one.
    """
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

## Create a RunnableWithMessageHistory instance
## This will allow you to use the LLM with message history.
## The get_session_history function will be used to retrieve the message history for a given session ID.
with_message_history = RunnableWithMessageHistory(llm, get_session_history)


In [6]:
## Create a new session and add messages to it
config={"configurable":{"session_id":"chat1"}}

In [7]:
## Invoke the LLM with message history
## The session ID is passed in the config dictionary.
response = with_message_history.invoke(
    [HumanMessage(content="Hi, my name is Ash and I am a GenAI Engineer")],
    config=config
)

In [8]:
## check what get_session_history returns
get_session_history("chat1")

InMemoryChatMessageHistory(messages=[HumanMessage(content='Hi, my name is Ash and I am a GenAI Engineer', additional_kwargs={}, response_metadata={}), AIMessage(content="Hi Ash, it's great to meet you! It's exciting to connect with another GenAI Engineer. What kind of projects are you working on currently? I'm always interested in learning about the innovative applications of generative AI.", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run-55d61d5d-5e5f-4fc5-9bcb-55ac2ee48302-0', usage_metadata={'input_tokens': 13, 'output_tokens': 49, 'total_tokens': 62, 'input_token_details': {'cache_read': 0}})])

In [9]:
## The response will include the message history for the session ID "chat1".
response.content

"Hi Ash, it's great to meet you! It's exciting to connect with another GenAI Engineer. What kind of projects are you working on currently? I'm always interested in learning about the innovative applications of generative AI."

In [10]:
## We can see that the message history is being used to keep track of the conversation.
with_message_history.invoke(
    [HumanMessage(content="What is my name")],
    config=config
)

AIMessage(content='Your name is Ash.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run-75a494dc-1601-4d43-b081-bb4c3d8152f2-0', usage_metadata={'input_tokens': 65, 'output_tokens': 6, 'total_tokens': 71, 'input_token_details': {'cache_read': 0}})

In [11]:
## now we will change the session id to "chat2" and see if the message history is being used.
config2 = {"configurable":{"session_id":"chat2"}}

## Invoke the LLM with message history
## The session ID is passed in the config dictionary.
response = with_message_history.invoke(
    [HumanMessage(content="What is my name")],
    config = config2
)

## We can see that the message history is not being used for the new session ID "chat2".
response.content


"As a large language model, I have no memory of past conversations. Therefore, I don't know your name. You haven't told me!"

## Using Prompt Template

Prompt Templates help to turn raw user information into a format that the LLM can work with. In this case, the raw user input is just a message, which we are passing to the LLM. Let's now make that a bit more complicated. First, let's add in a system message with some custom instructions (but sill taking messages as input). Next, we'll add in more input besides just the messages.

In [12]:
## Now we will create a prompt template that uses the message history.
## We will use the ChatPromptTemplate class to create a prompt template.
## We will use the MessagesPlaceholder class to dynamically insert the message history into the prompt
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
prompt = ChatPromptTemplate.from_messages(
    [
        ("system","You are a helpful assistant. Answer all the questions to the best of your ability."),
        MessagesPlaceholder(variable_name="messages")
    ]
)

chain = prompt | llm

In [13]:
## As we have included the MessagePlaceholder we will provide the messages as key-value pair in the invoke method.
## The key is the variable name we provided in the prompt template.
chain.invoke(
    {"messages":[HumanMessage(content="Hi, my name is Ashutosh")]}
)

AIMessage(content="Hi Ashutosh, it's nice to meet you! How can I help you today?", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run-4f593a97-779f-4b58-95e8-49ad2c1d2bee-0', usage_metadata={'input_tokens': 25, 'output_tokens': 21, 'total_tokens': 46, 'input_token_details': {'cache_read': 0}})

In [14]:
## We can also use the RunnableWithMessageHistory with the chain.
## This will allow us to keep track of the message history while using the chain.
with_message_history2 = RunnableWithMessageHistory(chain,get_session_history)

In [15]:
## Now we will use the RunnableWithMessageHistory with the chain.
## We will use the session ID "chat3" for this example.
config3 = {"configurable":{"session_id":"chat3"}}

## Invoke the LLM with message history
## The session ID is passed in the config dictionary.
response = with_message_history2.invoke(
    [HumanMessage(content="Hi, my name is Ashutosh and I am a GenAI Engineer")],
    config=config3
)

In [16]:
response.content

"Hi Ashutosh, it's nice to meet you! As a GenAI Engineer, you're working in a really exciting and cutting-edge field. How can I help you today? I'm ready to answer your questions, assist with brainstorming, or provide information to the best of my abilities. Let me know what you need!"

In [17]:
## Adding more complexity to the prompt template.
## We will add a language variable to the prompt template.
prompt2 = ChatPromptTemplate.from_messages(
    [
        ("system","You are a helpful assistant. Answer all the questions to the best of your ability in {language}."),
        MessagesPlaceholder(variable_name="messages")
    ]
)

chain2 = prompt2 | llm

In [18]:
## We will add a language variable to the invoke method.
## The key is the variable name we provided in the prompt template.
response = chain2.invoke(
    {
        "messages":[HumanMessage(content = "Hi my name is Ashutosh.")],
        "language":"Hindi"
    }
)

In [19]:
response.content

'नमस्ते आशुतोष, मैं आपकी कैसे मदद कर सकता हूँ?'

Now we will wrap this chain in a Message History class. As we have multiple keys in the input, we need to specify the correct key to use and save the chat history.

In [20]:
## We will use input_messages_key to specify the key for the messages in the invoke method.
## This tells the RunnableWithMessageHistory where to find the new message in the input. We can use any key name we want, should not be same as the key in the prompt template.
## In this case, we will use "messages" as the key for the messages in the invoke method.
with_message_history3 = RunnableWithMessageHistory(
    chain2,
    get_session_history,
    input_messages_key="messages"
)

In [None]:
## We will use the session ID "chat4" for this example.
config4 = {"configurable":{"session_id":"chat4"}}
response = with_message_history3.invoke(
    {"messages":[HumanMessage(content="What is the new LLM model launched by Google.")],
     "language":"Hindi"},
     config=config4
)

response.content

'गूगल द्वारा लॉन्च किया गया नया LLM मॉडल **Gemini 1.5 Pro** है।'

## Managing Coversation History

One of the important concept to understand when building chatbots is how to manage conversation history. If not managed properly, the list of messages will grow unbounded and potentially overflow the context window of the LLM. Therefore, it is important to add a step that limits the size of the messages you are padding in.

We will be using **trim_message** helper to reduce the number of messages we are sending to our model. This trimmer allow us to specify how many tokens we want to keep, along with other parameters like if we want to always keep the system message and wheather to allow partial messages.

In [None]:
from langchain_core.messages import trim_messages

## The trim_messages function is used to limit the number of tokens in the message history.
## The max_tokens parameter is set to 200, which means that the message history will be trimmed to 200 tokens.
## The strategy parameter is set to "last", which means that the last messages will be kept in the message history.
## The include_system parameter is set to True, which means that the system messages will be included in the message history.
## The token_counter parameter is set to llm, which means that the token counter will be the LLM itself.
## The allow_partial parameter is set to False, which means that partial messages will not be allowed.
## The start_on parameter is set to "human", which means that the message history will start with the human message.
trimmer = trim_messages(
    max_tokens=200,
    strategy="last",
    include_system=True,
    token_counter = llm, 
    allow_partial=False,
    start_on = "human"   
)

In [34]:
## This will be our messages
messages = [
    SystemMessage(content = "Youa are a helpful assistant."),
    HumanMessage(content = "Hi, my name is Ash and I am a GenAI Engineer"),
    AIMessage(content="Hi Ash, nice to meet you!"),
    HumanMessage(content = "I like to play football."),
    AIMessage(content = "That's great! Football is a fun sport. Do you play it often?"),
    HumanMessage(content = "Yes, I play it every weekend."),
    AIMessage(content = "That's awesome! Playing sports is a great way to stay active and have fun. Do you play in a team or just for fun?"),
    HumanMessage(content = "I play in a team. We have a match every Saturday."),
    AIMessage(content = "That sounds exciting! Playing in a team adds a whole new level of fun and competition. How's your team doing? Are you winning a lot of matches?"),
    HumanMessage(content = "We are doing great! We have won 5 matches in a row."),
    AIMessage(content = "That's impressive! Winning 5 matches in a row is no small feat. It sounds like you and your team are really in sync and playing well together. Keep up the great work!"),
    HumanMessage(content = "Thanks! We are really enjoying it."),
    AIMessage(content = "That's the most important part! Enjoying the game and having fun with your teammates is what it's all about. Do you have any favorite players or teams that you look up to?")
    
]

In [None]:
## Now we will use the trim_messages function to trim the messages to 200 tokens.
## We can see that the messages are trimmed to the last 200 tokens.
## This helps us maintain the context of the conversation while keeping the message history manageable.
trimmer.invoke(messages)

[SystemMessage(content='Youa are a helpful assistant.', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='I play in a team. We have a match every Saturday.', additional_kwargs={}, response_metadata={}),
 AIMessage(content="That sounds exciting! Playing in a team adds a whole new level of fun and competition. How's your team doing? Are you winning a lot of matches?", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='We are doing great! We have won 5 matches in a row.', additional_kwargs={}, response_metadata={}),
 AIMessage(content="That's impressive! Winning 5 matches in a row is no small feat. It sounds like you and your team are really in sync and playing well together. Keep up the great work!", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Thanks! We are really enjoying it.', additional_kwargs={}, response_metadata={}),
 AIMessage(content="That's the most important part! Enjoying the game and having fun with your teammates is

### Passing Trimmer in a Chain

In [None]:
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough

## We will use the RunnablePassthrough to assign the messages to the input of the chain.
## The assign method is used to assign the messages to the input of the chain.
## The itemgetter function is used to get the messages from the input.
chain = (
    RunnablePassthrough.assign(messages=itemgetter("messages") | trimmer) 
    | prompt2
    | llm
)

## We will invoke the chain with the messages and the language variable.
response = chain.invoke(
    {
        "messages":messages +[HumanMessage(content = "How many matches did we won?")],
        "language":"English"        
    }
)


response.content

'You said you won 5 matches in a row!'

### Wrapping Trimmer in the Message History

In [None]:
## Let's wrap this in the message History
## We will use the RunnableWithMessageHistory to keep track of the message history while using the chain.
with_message_history4 = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key = "messages"
)

config5 = {"configurable":{"session_id":"chat5"}}

In [43]:
response = with_message_history4.invoke(
    {
        "messages":[HumanMessage(content="Hi how many messages did I won.")],
        "language":"English"
    },
    config=config5
)

In [44]:
response.content

'I do not have access to your personal messages or any information about your activity. Therefore, I cannot tell you how many messages you won.'