## Imports:

In [None]:
# Chat:
from operator import itemgetter
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.output_parsers import StrOutputParser
# History
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.messages import trim_messages
from langchain_core.runnables import RunnableWithMessageHistory, RunnablePassthrough

In [None]:
from IPython.display import Markdown
from llm import get_response_stream, get_response

In [None]:
for chunk in get_response("hello", dummy=True):
    print(chunk, end="", flush=True)

### Chat:

In [None]:
template_chat = ChatPromptTemplate.from_messages(
    messages=[
        SystemMessage(
            "You are a helpful assistant. You answer the question asked based on the chat history and also the Documents attached in context. Answer factually and clearly. State the source in answer wherever possible. Use various markdown features in response. \n<CONTEXT>\n{context}\n</CONTEXT>"),
        MessagesPlaceholder(variable_name="messages"),
        HumanMessage("{input}")
    ]
)
template_chat

### Summarize:

In [None]:
template_summarize = ChatPromptTemplate.from_messages(
    messages=[
        SystemMessage(
            "You are a Summarizing expert. You are given with a complete chat history and the latest user message in end of it. The latest message might have some content which refers to some part in history. You have to compile everything and return a single prompt, which will have a standalone question which can be completely understood without any chat history. So, give me a single prompt which will be helpful in retrieving the most relevant docs to latest message."),
        MessagesPlaceholder(variable_name="messages"),
        HumanMessage("{input}")
    ]
)
template_chat

## Chat Message History:

In [None]:
chat_histories = {}

In [None]:
def get_session_history(session_id:str) -> BaseChatMessageHistory:
    # print("*"*40, session_id, "*"*40)
    if session_id not in chat_histories:
        chat_histories[session_id] = ChatMessageHistory()
        # log here for creation of new chat history
        print(f"Created chat hist for session id: `{session_id}`")    
    return chat_histories[session_id]

get_session_history("abv")

In [None]:
get_session_history("abv")

## LLM:

In [None]:
from langchain_ollama import ChatOllama
# Gemma3 context size -> 128K (1,31,072)
# 30k -> 91% RAM, 91% GPU
# 25k -> 82% RAM, 89% GPU
# 15k -> 66% RAM, 87% GPU

llm = ChatOllama(
    model="gemma3:latest", temperature="1",
    #  num_predict=MAX_OUTPUT_TOKENS,
    num_gpu=35, num_ctx=20000
)

In [None]:
# Markdown(llm.invoke("write a story").content)

## Trimmer:

In [None]:
from langchain_core.messages import trim_messages

# For summary 15k chat + 1k system and all
trim_summary = trim_messages(
    max_tokens=15000,
    strategy="last", token_counter=llm, start_on="human",
    allow_partial=True,  # include_system=True,
)

# For chat 10k chat + 5*1k docs + 1k system and all
trim_chat = trim_messages(
    max_tokens=10000,
    strategy="last", token_counter=llm, start_on="human",
    allow_partial=True,  # include_system=True,
)

## Summarizer:

In [None]:
chain = (
    RunnablePassthrough().assign(messages=itemgetter("messages") | trim_chat)
    | template_summarize | llm | StrOutputParser())

summarizer_llm = RunnableWithMessageHistory(
    runnable=chain,
    get_session_history=get_session_history,
    input_messages_key="input",
    history_messages_key="messages",
)

In [None]:
chat_histories[10] = ChatMessageHistory()
chat_histories[10].messages = [
    HumanMessage("Hello, I'm Bhushan, What is your name?"),
    AIMessage("I am an AI assistant. I am not a human like you."),
    HumanMessage("What is Artificial General Intelligence?"),
    AIMessage("Artificial General Intelligence (AGI) refers to highly autonomous systems that outperform humans at most economically valuable work."),
]
# )

In [None]:
summarizer_llm.invoke(
    input={"input": "So it's not achieved yet?", },
    config={"configurable": {"session_id": 10}}
)

In [None]:
chat_histories[10].messages

## Runnable With History:

In [None]:
chain = (
    RunnablePassthrough(name="Trim Chat History").assign(messages=itemgetter("messages") | trim_chat)
    | template_chat | llm | StrOutputParser())

chat_llm = RunnableWithMessageHistory(
    runnable=chain,
    get_session_history=get_session_history,
    input_messages_key="input",
    history_messages_key="messages",
)

In [None]:
chat_llm.invoke(
    input={
        "input": "Hello, I'm Bhushan, What is your name?",
        "context": "This is some random document which contains some random information."
    },
    config={
        "configurable": {
            "session_id": 15
        }
    }
)

In [None]:
chat_llm.invoke(
    input={
        "input": "What did we discuss?",
        "context": "There is no context available for this question."
    },
    config={
        "configurable": {
            "session_id": 15
        }
    }
)

- If () add option to paste link and scrap whole content from there.