In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

groq_api_key=os.getenv("GROQ_API_KEY")


In [2]:
from langchain_groq import ChatGroq
model = ChatGroq(api_key=groq_api_key, model="llama-3.1-8b-instant")
model

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x0000017725166600>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x0000017725284EF0>, model_name='llama-3.1-8b-instant', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [4]:
from langchain_core.messages import HumanMessage
model.invoke([HumanMessage(content="Hi, This is Ravin. I am a Data & AI Architect.")])

AIMessage(content='Nice to meet you, Ravin. As a Data & AI Architect, you must be working on exciting projects that involve designing and implementing data management and AI solutions. What areas of Data & AI are you most interested in or currently working on?', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 50, 'prompt_tokens': 50, 'total_tokens': 100, 'completion_time': 0.098783484, 'prompt_time': 0.002815443, 'queue_time': 0.053451317, 'total_time': 0.101598927}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_33e8adf159', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None}, id='run--c889179e-d2c3-4dea-b6f8-6b6673b98097-0', usage_metadata={'input_tokens': 50, 'output_tokens': 50, 'total_tokens': 100})

In [5]:
from langchain_core.messages import HumanMessage
model.invoke([HumanMessage(content="Hi, what is my name?")])

AIMessage(content="I'm happy to chat with you, but I don't have any information about your name. Our conversation has just started, and I'm a large language model, I don't retain any information about individual users. If you'd like to share your name, I'd be happy to know it!", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 61, 'prompt_tokens': 42, 'total_tokens': 103, 'completion_time': 0.082807932, 'prompt_time': 0.002699074, 'queue_time': 0.051569896, 'total_time': 0.085507006}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_ab04adca7d', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None}, id='run--76d0c379-cd8e-4396-930d-9a94c9e5c9c1-0', usage_metadata={'input_tokens': 42, 'output_tokens': 61, 'total_tokens': 103})

In [8]:
from langchain_core.messages import AIMessage
model.invoke([
    HumanMessage(content="Hi, This is Ravin. I am a Data & AI Architect.")
    ,AIMessage(content="Hi, Nice to meet you Ravin!")
    ,HumanMessage(content="Hey, what is my name?")
    ])

AIMessage(content="Your name is Ravin. You're a Data & AI Architect.", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 15, 'prompt_tokens': 76, 'total_tokens': 91, 'completion_time': 0.025150857, 'prompt_time': 0.004266195, 'queue_time': 0.052271655, 'total_time': 0.029417052}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_33e8adf159', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None}, id='run--686f9fa7-3ca9-445c-9be2-3ca356d6b6ec-0', usage_metadata={'input_tokens': 76, 'output_tokens': 15, 'total_tokens': 91})

#### Message History
We will maintain a message history to keep track of the conversation context by using LangChain's ChatMessageHistory, BaseChatMessageHistory and RunnableWithMessageHistory classes. This will allow the chatbot to remember previous interactions and provide more coherent responses.

In [9]:
from langchain_community.chat_message_histories import ChatMessageHistory #Stores messages in memory (RAM) UC: Simple chatbot session memory
from langchain_core.chat_history import BaseChatMessageHistory #Abstract interface for custom message stores UC: Persistent storage in DB
from langchain_core.runnables import RunnableWithMessageHistory #Adds automatic history tracking to runnables UC: Multi-turn chat or session-based pipelines

store = {}  # In-memory store for simplicity

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

with_message_history = RunnableWithMessageHistory(
    model,
    get_session_history
)


In [10]:
config = {"configurable":{"session_id":"chat1"}}

In [11]:
response = with_message_history.invoke([HumanMessage(content="Hi, This is Ravin. I am a Data & AI Architect.")],config=config)

In [12]:
response.content

"Hello Ravin, nice to meet you. As a Data & AI Architect, that's a fascinating role. You probably work on designing and implementing data and AI solutions that drive business value and innovation. What specific areas of data and AI are you focused on, such as machine learning, data engineering, or business intelligence?"

In [14]:
# using the previous chat lets ask LLM to see if it remembers the context
response = with_message_history.invoke([HumanMessage(content="Hey, what is my name?")],config=config)
response.content

'Your name is Ravin.'

In [15]:
# using the previous chat lets ask LLM to see if it remembers the context but provide a new session
config1 = {"configurable":{"session_id":"chat2"}}
response = with_message_history.invoke([HumanMessage(content="Hey, what is my name?")],config=config1)
response.content

"I'm happy to chat with you, but I'm a large language model, I don't have any information about your personal details, including your name. Each time you interact with me, it's a new conversation, and I don't retain any information from previous conversations. If you'd like to share your name with me, I'd be happy to use it in our conversation."

In [16]:
#provide context again in new session
response = with_message_history.invoke([HumanMessage(content="Hi, This is Ravin. I am a Data & AI Architect.")],config=config1)
response.content

'Nice to meet you, Ravin. As a Data & AI Architect, you must be working with various technologies and tools to design and implement data-driven solutions and artificial intelligence systems. What specific areas of Data & AI are you most interested in or currently working on?'

In [None]:
#check it remembers the previous context in config1 session
response = with_message_history.invoke([HumanMessage(content="Hey, what is my name?")],config=config1)
response.content

"We've already established that, Ravin. Your name is Ravin, and you're a Data & AI Architect. I remember!"

#### Prompt templates

Prompt templateshelp to structure the input to the language model in a consistent way. They allow you to define a template with placeholders that can be filled with dynamic content at runtime. This is particularly useful for ensuring that the model receives the necessary context and instructions for generating accurate responses.

In [19]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
prompt = ChatPromptTemplate.from_messages([
    MessagesPlaceholder(variable_name="history")
])

chain=prompt|model

In [20]:
chain.invoke([HumanMessage(content="Hi, This is Ravin. I am a Data & AI Architect.")])

AIMessage(content='Nice to meet you, Ravin. As a Data & AI Architect, you must be involved in designing and implementing data-driven solutions that utilize artificial intelligence and machine learning. What kind of projects have you been working on recently?', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 46, 'prompt_tokens': 50, 'total_tokens': 96, 'completion_time': 0.08607078, 'prompt_time': 0.00378288, 'queue_time': 0.05036581, 'total_time': 0.08985366}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_ab04adca7d', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None}, id='run--1cd23e73-4fb8-4b4e-a1b2-79fc0bd70128-0', usage_metadata={'input_tokens': 50, 'output_tokens': 46, 'total_tokens': 96})

In [21]:
with_message_history = RunnableWithMessageHistory(chain,get_session_history)

In [23]:
config = { "configurable": {"session_id": "chat3"} }   
response = with_message_history.invoke([HumanMessage(content="Hi, This is Ravin. I am a Data & AI Architect.")],config=config)
response

AIMessage(content="Nice to meet you, Ravin. As a Data & AI Architect, you must be working on designing and implementing AI and data-driven solutions. What kind of projects are you currently working on or have you recently completed? I'm here to listen and help if I can.", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 56, 'prompt_tokens': 50, 'total_tokens': 106, 'completion_time': 0.102728051, 'prompt_time': 0.002504282, 'queue_time': 0.047374498, 'total_time': 0.105232333}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_e32974efee', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None}, id='run--2cfda197-c687-407b-bb62-dc9d91f1a74a-0', usage_metadata={'input_tokens': 50, 'output_tokens': 56, 'total_tokens': 106})

In [24]:
response = with_message_history.invoke(
    [HumanMessage(content="Hello, who am I?")],config=config)
response.content

'You are Ravin, a Data & AI Architect.'

In [32]:
## Add more complexity by adding system prompts

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Answer all questions to the best of your ability in {language}."),
    MessagesPlaceholder(variable_name="message")
])

chain=prompt|model

In [35]:
response= chain.invoke({"message":[HumanMessage(content="Hi, This is Ravin. I am a Data & AI Architect.")],"language":"Hindi"})
response.content

'नमस्ते रविन जी, आपका स्वागत है! मैं आपकी सहायता के लिए तैयार हूँ। डेटा और AI प्रौद्योगिकियों के क्षेत्र में आपकी विशेषज्ञता के कारण, मुझे लगता है कि हम एक दिलचस्प और ज्ञानवर्धक चर्चा की शुरुआत कर सकते हैं। क्या आप अपने काम में कुछ चुनौतियाँ या कार्यों पर चर्चा करना चाहते हैं?'

#### Manage Conversation History with Sessions and context window limit size
To manage conversation history effectively, especially in scenarios where the context window of the language model is limited, we can implement session-based history management. This involves creating unique sessions for each user interaction and maintaining a history of messages within those sessions. By doing so, we can ensure that the chatbot retains relevant context while adhering to the constraints of the model's context window.


In [None]:
from langchain_core.messages import SystemMessage, trim_messages
trimmer = trim_messages(strategy="last",
                         max_tokens=150,
                         token_counter=model,
                        # Most chat models expect that chat history starts with either:
                        # (1) a HumanMessage or
                        # (2) a SystemMessage followed by a HumanMessage
                        start_on="human",
                        # Usually, we want to keep the SystemMessage
                        # if it's present in the original history.
                        # The SystemMessage has special instructions for the model.
                        include_system=True,
                        allow_partial=False,
                         )
messages = [
    SystemMessage(
        "You're a friendly and witty assistant who always adds a touch of humor to your replies."
    ),
    HumanMessage("Hey there! I really love coffee. What about you?"),
    AIMessage("Ah, coffee — the magical bean juice that powers humanity! I’d love it too, if I had taste buds."),
    HumanMessage("Haha! What’s something you don’t like then?"),
    AIMessage("Probably battery drain. It's like my version of a bad mood."),
    HumanMessage("Alright smarty, let’s test your math — what’s 15 * 7?"),
    AIMessage("Easy! 15 times 7 is 105 — just like my IQ after a firmware update."),
    HumanMessage("Haha, nice one!"),
    AIMessage("Thanks! Humor is my favorite algorithm."),
]

trimmer.invoke(messages)


[SystemMessage(content="You're a friendly and witty assistant who always adds a touch of humor to your replies.", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Haha! What’s something you don’t like then?', additional_kwargs={}, response_metadata={}),
 AIMessage(content="Probably battery drain. It's like my version of a bad mood.", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Alright smarty, let’s test your math — what’s 15 * 7?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Easy! 15 times 7 is 105 — just like my IQ after a firmware update.', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Haha, nice one!', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Thanks! Humor is my favorite algorithm.', additional_kwargs={}, response_metadata={})]

In [44]:
# Lets see what LLM remembers now with context window limit size set
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough

chain = (
    RunnablePassthrough.assign(messages=RunnablePassthrough() | trimmer)
    | prompt
    | model
)

response = chain.invoke({"message": messages + [HumanMessage(content="What do I like?")]
                         , "language": "English"})

response.content

"You like coffee, and I'm guessing you liked our little math problem-solving session too?"

In [47]:
chain.invoke({"message": messages + [HumanMessage(content="What was the math problem?")]
                         , "language": "English"}).content

'Right! The math problem was 15 * 7, and I (hopefully) correctly answered 105.'