In [1]:
from langchain_core.runnables import RunnablePassthrough,RunnableLambda, Runnable, RunnableParallel,RunnableConfig,RunnableGenerator
from langchain_core.messages import AIMessage, HumanMessage,SystemMessage,ToolMessage,trim_messages
from dotenv import load_dotenv,find_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_openai import ChatOpenAI
from langchain.tools import tool
from langchain.prompts import ChatPromptTemplate,SystemMessagePromptTemplate, HumanMessagePromptTemplate,MessagesPlaceholder
from langchain_core.output_parsers import JsonOutputParser,StrOutputParser
from operator import itemgetter
from langchain.embeddings import SentenceTransformerEmbeddings
import json
from langchain_community.vectorstores import FAISS,Chroma
from operator import itemgetter
import time
import grandalf
from typing import Iterator,List,AsyncIterator
from langchain_core.runnables import ConfigurableField
from langchain.runnables.hub import HubRunnable
from langchain_community.chat_message_histories import ChatMessageHistory,RedisChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

In [2]:
load_dotenv(find_dotenv("../.env"))

True

In [14]:
llmGemini=ChatGoogleGenerativeAI(model="gemini-2.5-flash-preview-04-17")
llmGPT=ChatOpenAI(model="gpt-40-mini")

In [15]:
messages=[
    SystemMessage(content="You are a good assistant, you always respond with a joke."),
    HumanMessage(content="I Wonder why it's called Langchain"),
    AIMessage(content="Well, I guess they thought 'WordRope' and 'SentenceString' just didn't have the same ring to it!"),
    HumanMessage(content="And who is Harrison chasing anyways?"),
    AIMessage(content="Hmm let me think. \n\nWhy, he's probably chasing after the last cup of coffee in the office!"),
    HumanMessage(content="What do you call a speechless parrot")
]

<h3> Streaming Based on Token Count </h3>

In [16]:
trim_messages(
    messages=messages,
    token_counter=llmGemini,
    max_tokens=60,  # Max Messages Allowed
    strategy="last",
    start_on="human",  # start chat history with HumanMessage
    end_on=("human","tool"),
    include_system=True,
    allow_partial=True,  # If we want to allow splitting up the contents of a message
)

[SystemMessage(content='You are a good assistant, you always respond with a joke.', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='And who is Harrison chasing anyways?', additional_kwargs={}, response_metadata={}),
 AIMessage(content="Hmm let me think. \n\nWhy, he's probably chasing after the last cup of coffee in the office!", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='What do you call a speechless parrot', additional_kwargs={}, response_metadata={})]

<h3>Streaming Based on Message Count</h3>

In [17]:
trim_messages(
    messages=messages,
    token_counter=len,  # Tell the Model you want to keep count as per the message
    max_tokens=3,  # Max Messages Allowed
    strategy="last",
    start_on="human",  # start chat history with HumanMessage
    end_on=("human","tool"),
    include_system=True,
    allow_partial=False
)

[SystemMessage(content='You are a good assistant, you always respond with a joke.', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='What do you call a speechless parrot', additional_kwargs={}, response_metadata={})]

<h3> Chaining </h3>

In [None]:
trimMessages=trim_messages(
    token_counter=llmGemini,
    max_tokens=60,  # Max Messages Allowed
    strategy="last",
    start_on="human",  # start chat history with HumanMessage
    end_on=("human","tool"),
    include_system=True,
    allow_partial=True,  # If we want to allow splitting up the contents of a message
)

In [22]:
trimMessages

RunnableLambda(...)

In [19]:
chain=trimMessages|llmGemini

In [20]:
chain.invoke(input=messages)

AIMessage(content="That's an easy one!\n\nYou call a speechless parrot a **mute point**!\n\nHope that didn't ruffle any feathers!", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'models/gemini-2.5-flash-preview-04-17', 'safety_ratings': []}, id='run--b483b398-7ec2-4d59-9741-6af7211226d8-0', usage_metadata={'input_tokens': 54, 'output_tokens': 29, 'total_tokens': 659, 'input_token_details': {'cache_read': 0}})

<h3>With Chat Message History</h3>

In [23]:
store={}
def getSessionHistory(sessionId:str) -> BaseChatMessageHistory:
    if sessionId not in store:
        store[sessionId]=ChatMessageHistory()
    return store[sessionId]    

In [24]:
prompt=ChatPromptTemplate.from_messages(
    messages=[
        SystemMessagePromptTemplate.from_template(
            template="You're an assistant who's good at {ability}. Respond in 20 words or fewer"
        ),
        MessagesPlaceholder(
            variable_name="history"
        ),
        HumanMessagePromptTemplate.from_template(
            template="{question}"
        )
    ]
)

In [25]:
trimMessages=trim_messages(
    token_counter=llmGemini,
    max_tokens=90,  # Max Messages Allowed
    strategy="last",
    start_on="human",  # start chat history with HumanMessage
    end_on=("human","tool"),
    include_system=True,
    allow_partial=True,  # If we want to allow splitting up the contents of a message
)

In [26]:
chain=prompt|trimMessages|llmGemini

In [27]:
chainWithMessageHistory=RunnableWithMessageHistory(
                            chain,
                            get_session_history=getSessionHistory,
                            input_messages_key="question",
                            history_messages_key="history"
                        )

In [28]:
sessionID="ritish"

In [29]:
chainWithMessageHistory.with_config(config={"session_id":sessionID}).invoke(
    input={"ability":"geography",
           "question":"Which is the smallest country in the world?"},
    )

AIMessage(content='Vatican City is the smallest country in the world by area.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'models/gemini-2.5-flash-preview-04-17', 'safety_ratings': []}, id='run--db50c5a1-1f2e-4c9c-80ea-938f0bab3481-0', usage_metadata={'input_tokens': 30, 'output_tokens': 14, 'total_tokens': 179, 'input_token_details': {'cache_read': 0}})

In [30]:
chainWithMessageHistory.with_config(config={"session_id":sessionID}).invoke(
    input={"ability":"geography",
           "question":"Where is it located?"},
    )

AIMessage(content="It's an independent city-state entirely enclosed within Rome, Italy.", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'models/gemini-2.5-flash-preview-04-17', 'safety_ratings': []}, id='run--1bf20f69-7e8e-4363-9df0-5f17a67aa67e-0', usage_metadata={'input_tokens': 51, 'output_tokens': 15, 'total_tokens': 118, 'input_token_details': {'cache_read': 0}})

In [31]:
chainWithMessageHistory.with_config(config={"session_id":sessionID}).invoke(
    input={"ability":"geography",
           "question":"How far is it from Israel?"},
    )

AIMessage(content='Roughly 2,300 kilometers (1,400 miles) southeast across the Mediterranean Sea.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'models/gemini-2.5-flash-preview-04-17', 'safety_ratings': []}, id='run--471f05bf-7d54-47b2-b82f-3ee61dbade5b-0', usage_metadata={'input_tokens': 75, 'output_tokens': 23, 'total_tokens': 261, 'input_token_details': {'cache_read': 0}})

In [32]:
chainWithMessageHistory.with_config(config={"session_id":sessionID}).invoke(
    input={"ability":"geography",
           "question":"In Which Continent is it located?"},
    )

AIMessage(content="It's located within the continent of Europe.", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'models/gemini-2.5-flash-preview-04-17', 'safety_ratings': []}, id='run--89507d6d-2aa5-4fe7-8fc1-d8171ccd1851-0', usage_metadata={'input_tokens': 82, 'output_tokens': 10, 'total_tokens': 177, 'input_token_details': {'cache_read': 0}})