<a href="https://colab.research.google.com/github/quantranvr/all-in-one/blob/main/LangChain_QA_w_RAG_part_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Adding logic for incorporating historical messages

To do so, we need to **update** our **prompt** to support historical messages as an input and add a sub-chain that takes the latest user question and **reformulates** it in a **context** from past messages

Tutorial @ https://python.langchain.com/docs/use_cases/question_answering/chat_history

This notebook contains 2 core parts:
1. **Reproduce** [tutorial](https://python.langchain.com/docs/use_cases/question_answering/chat_history)'s example
2. **Apply** knowledge learned to similar problem

# Installation

In [None]:
!pip install --upgrade --quiet langchain langchain-openai langchainhub langchain-community chromadb bs4

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m803.6/803.6 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m25.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m509.0/509.0 kB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m229.5/229.5 kB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.3/49.3 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m223.4/223.4 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m51.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m71.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━

# Part 1: Reproduce

In [None]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass()

In [None]:
# load
import bs4
from langchain_community.document_loaders import WebBaseLoader
# split
from langchain.text_splitter import RecursiveCharacterTextSplitter
# index
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
# retrieve & generate
from langchain import hub
from langchain_openai import ChatOpenAI
# chain
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
# contextualize question
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import AIMessage, HumanMessage

In [None]:
# load documents
web_paths = (
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
)

bs4_strainer = bs4.SoupStrainer(
    class_=("post-content", "post-title", "post-header")
)

loader = WebBaseLoader(
    web_paths = web_paths,
    bs_kwargs = {"parse_only": bs4_strainer}
)

docs = loader.load()

print(f"Loaded document has {len(docs[0].page_content)} characters")

Loaded document has 42824 characters


In [None]:
# split documents into chunks
splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200,
    add_start_index = True,
)

splits = splitter.split_documents(docs)

print(f"Number of chunks = {len(splits)}")

Number of chunks = 66


In [None]:
# store and index chunks
vectorstore = Chroma.from_documents(
    documents = splits,
    embedding = OpenAIEmbeddings(),
)

In [None]:
# retrieve & generate
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
# answer question
rag_chain.invoke("What is Task Decomposition?")

"Task decomposition is a technique used to break down complex tasks into smaller and simpler steps. It can be done through various methods such as using prompting techniques, task-specific instructions, or human inputs. The goal is to make the task more manageable and facilitate the interpretation of the model's thinking process."

In [None]:
# contextualize the question
contextualize_q_system_prompt = """\
Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is.\
"""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

contextualize_q_chain = (
    contextualize_q_prompt
    | llm
    | StrOutputParser()
)

In [None]:
chat_history = [
    HumanMessage(content="What does LLM stand for?"),
    AIMessage(content="Large language model"),

]

In [None]:
# chat prompt
contextualize_q_prompt.invoke(
    {
        "chat_history": chat_history,
        "question": "What is meant by large"
    }
)

ChatPromptValue(messages=[SystemMessage(content='Given a chat history and the latest user question which might reference context in the chat history, formulate a standalone question which can be understood without the chat history. Do NOT answer the question, just reformulate it if needed and otherwise return it as is.'), HumanMessage(content='What does LLM stand for?'), AIMessage(content='Large language model'), HumanMessage(content='What is meant by large')])

In [None]:
# answer
contextualize_q_chain.invoke(
    {
        "chat_history": chat_history,
        "question": "What is meant by large",
    }
)

'What is the definition of "large" in the context of a language model?'

In [None]:
# chain with chat history
qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise.\

{context}"""

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)


def contextualized_question(input: dict):
    if input.get("chat_history"):
        return contextualize_q_chain
    else:
        return input["question"]


rag_chain = (
    RunnablePassthrough.assign(
        context=contextualized_question | retriever | format_docs
    )
    | qa_prompt
    | llm
)

In [None]:
# answer and add to history
chat_history = []

question = "What is Task Decomposition?"
ai_msg = rag_chain.invoke({"question": question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=question), ai_msg])

second_question = "What are common ways of doing it?"
rag_chain.invoke({"question": second_question, "chat_history": chat_history})

AIMessage(content='Common ways of task decomposition include:\n1. Using techniques like Chain of Thought (CoT) or Tree of Thoughts, where the task is broken down into multiple manageable steps, allowing the model to think step by step and explore different reasoning possibilities.\n2. Providing task-specific instructions or prompts to guide the model in decomposing the task. For example, asking the model to outline a story for writing a novel or asking for subgoals to achieve a specific task.\n3. Involving human inputs, where humans provide guidance or input to help decompose the task into smaller steps. This can be done through collaboration or by leveraging human expertise in the task domain.')

# Part 2: **Apply**

Problem:

A LangChain learner wants to dive deeper into certain concepts by asking a series of related questions.

Build a chatbot that could answer each of his questions based on information on LangChain official docs and his chat history with the chatbot

In [None]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass()

··········


In [None]:
# load docs
from langchain_community.document_loaders import WebBaseLoader
import bs4
# split into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter
# store and index
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
# retrieve and generate
from langchain_openai import ChatOpenAI
from langchain import hub
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [None]:
# load

web_paths = (
    "https://python.langchain.com/docs/modules/agents/",
    "https://python.langchain.com/docs/modules/agents/quick_start",
    "https://python.langchain.com/docs/modules/agents/concepts",

    "https://python.langchain.com/docs/modules/agents/agent_types/",
    "https://python.langchain.com/docs/modules/agents/agent_types/openai_functions_agent",
    "https://python.langchain.com/docs/modules/agents/agent_types/openai_tools",
    "https://python.langchain.com/docs/modules/agents/agent_types/xml_agent",
    "https://python.langchain.com/docs/modules/agents/agent_types/json_agent",
    "https://python.langchain.com/docs/modules/agents/agent_types/structured_chat",
    "https://python.langchain.com/docs/modules/agents/agent_types/react",
    "https://python.langchain.com/docs/modules/agents/agent_types/self_ask_with_search",

    "https://python.langchain.com/docs/modules/agents/how_to/custom_agent",
    "https://python.langchain.com/docs/modules/agents/how_to/streaming",
    "https://python.langchain.com/docs/modules/agents/how_to/agent_iter",
    "https://python.langchain.com/docs/modules/agents/how_to/agent_structured",
    "https://python.langchain.com/docs/modules/agents/how_to/handle_parsing_errors",
    "https://python.langchain.com/docs/modules/agents/how_to/intermediate_steps",
    "https://python.langchain.com/docs/modules/agents/how_to/max_iterations",
    "https://python.langchain.com/docs/modules/agents/how_to/max_time_limit",
    "https://python.langchain.com/docs/modules/agents/how_to/streaming_events",

    "https://python.langchain.com/docs/modules/agents/tools/",
    "https://python.langchain.com/docs/modules/agents/tools/toolkits",
    "https://python.langchain.com/docs/modules/agents/tools/custom_tools",
    "https://python.langchain.com/docs/modules/agents/tools/tools_as_openai_functions",
)

bs4_strainer = bs4.SoupStrainer(class_=("theme-doc-markdown markdown"))

loader = WebBaseLoader(
    web_paths = web_paths,
    bs_kwargs = {"parse_only": bs4_strainer}
)

docs = loader.load()

print(f"Number of docs = {len(docs)}")

Number of docs = 24


In [None]:
# split
splitter = RecursiveCharacterTextSplitter(
    chunk_size = 600,
    chunk_overlap = 100,
    add_start_index = True
)

splits = splitter.split_documents(docs)

print(f"Number of chunks = {len(splits)}")

Number of chunks = 360


In [None]:
# store and index
vectorstore = Chroma.from_documents(
    documents = splits,
    embedding = OpenAIEmbeddings(),
)

In [None]:
# retrieve and generate
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
# contextualize the question
contextualize_q_system_prompt = """\
Given a chat history and the latest user question \
which might reference context in the chat history, \
formulate a standalone question which can be understood \
without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is.
"""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}")
    ]
)

contextualize_q_chain = (
    contextualize_q_prompt
    | llm
    | StrOutputParser()
)

In [None]:
# test contextualize question chain
contextualize_q_chain.invoke(
    {
        "chat_history": [
            HumanMessage(content="What does LLM stand for?"),
            AIMessage(content="Large language model"),
        ],
        "question": "What is meant by large",
    }
)

'What is the definition of "large" in the context of a language model?'

In [None]:
# chain with chat history
qa_system_prompt = """\
You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise.\

{context}\
"""

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

def contextualized_question(input: dict):
    if input.get("chat_history"):
        return contextualize_q_chain
    else:
        return input["question"]

rag_chain = (
    RunnablePassthrough.assign(
        context = (
            contextualized_question
            | retriever
            | format_docs
        )
    )
    | qa_prompt
    | llm
)

In [None]:
chat_history = []

question_series = [
    "What is LangChain?",
    "Could I use it with Python?",
    "What are other programming languages that I could use it with?",
    "How do I benefit from it?",
    "What are key concepts of it?",
    "What is the definition of each of them?"
]

for question in question_series:
    ai_msg = rag_chain.invoke({
        "question": question,
        "chat_history": chat_history,
    })

    chat_history.extend([HumanMessage(content=question), ai_msg])

In [None]:
chat_history

[HumanMessage(content='What is LangChain?'),
 AIMessage(content='LangChain is an open-source orchestration framework for building applications using large language models (LLMs) such as chatbots and virtual agents. It simplifies the programming and integration process with external data sources and software workflows. It supports Python and JavaScript languages and offers integrations for various LLM providers.'),
 HumanMessage(content='Could I use it with Python?'),
 AIMessage(content='Yes, LangChain supports Python as one of its supported languages. You can use LangChain to build applications and integrate with Python-based workflows and data sources.'),
 HumanMessage(content='What are other programming languages that I could use it with?'),
 AIMessage(content='LangChain supports Python and JavaScript as the programming languages for building applications.'),
 HumanMessage(content='How do I benefit from it?'),
 AIMessage(content='By using LangChain, you can leverage the power of larg