# 2.6 Conversational RAG - Adding chat history

## Setup

### Install dependencies

In [None]:
%pip install python-dotenv~=1.0 docarray~=0.40.0 pypdf~=5.1 --upgrade --quiet
%pip install chromadb~=0.5.18 sentence-transformers~=3.3 --upgrade --quiet 
%pip install langchain~=0.3.7 langchain_openai~=0.2.6 langchain_community~=0.3.5 --upgrade --quiet

# If running locally, you can do this instead:
#%pip install -r ../requirements.txt

### Load environment variables

In [None]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

# If running in Google Colab, you can use this code instead:
# from google.colab import userdata
# os.environ["AZURE_OPENAI_API_KEY"] = userdata.get("AZURE_OPENAI_API_KEY")
# os.environ["AZURE_OPENAI_ENDPOINT"] = userdata.get("AZURE_OPENAI_ENDPOINT")

### Setup models

In [None]:
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
api_version = "2024-10-01-preview"
llm = AzureChatOpenAI(deployment_name="gpt-4o", temperature=0.0, openai_api_version=api_version)
embedding_model = AzureOpenAIEmbeddings(model="text-embedding-3-large", openai_api_version=api_version)

### Setup LangSmith tracing for this notebook

In [None]:
import os

# API key etc is in the .env file
# my_name = "Totoro"
# os.environ["LANGCHAIN_TRACING_V2"] = "true"
# os.environ["LANGCHAIN_PROJECT"] = f"tokyo24-test-{my_name}"

### Setup path to data 

In [None]:
data_path = "../data"

## Initialize VectorDB - like before

Let's load our vectorDB and set it up like in previous chapters. _If you already have a persisted vectorDB, you can skip to "Vector DB" below._

In [None]:
# LOAD DOCS
from langchain.document_loaders import PyPDFLoader

# Load PDFs
loaders = [
    PyPDFLoader(f"{data_path}/MachineLearning-Lecture01.pdf"),
    PyPDFLoader(f"{data_path}/MachineLearning-Lecture01.pdf"),
    PyPDFLoader(f"{data_path}/MachineLearning-Lecture03.pdf")
]
docs = []
for loader in loaders:
    docs.extend(loader.load())

In [None]:
# SPLIT DOCS
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1500,
    chunk_overlap = 150
)
splits = text_splitter.split_documents(docs)

In [None]:
# Vector DB - Indexing / Store
from langchain.vectorstores import Chroma

# Optional persist_directory to save the database
persist_directory = './db/chroma-ML-docs/'

vectordb = Chroma.from_documents(
    collection_name="ml_docs",
    documents=splits,
    embedding=embedding_model,
    #persist_directory=persist_directory # Optionally persist the database
)
retriever = vectordb.as_retriever()

In [None]:
print(vectordb._collection.count())

## Create a RAG chain

### Prompt

In [None]:
from langchain.prompts import ChatPromptTemplate

# Build prompt
q_and_a_system_template = """Use the following pieces of context to answer the question. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. 
<context>
{context}
</context>
"""
q_and_a_prompt = ChatPromptTemplate([
    ("system", q_and_a_system_template),
    ("human", "{input}"),
])


### Start with a simple RAG chain

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

combine_docs_chain = create_stuff_documents_chain(llm, q_and_a_prompt)
rag_chain = create_retrieval_chain(retriever, combine_docs_chain)

In [None]:
question = "Is probability a class topic?"
result = rag_chain.invoke({"input": question})

In [None]:
result["answer"]

In [None]:
# Get first source document
result["context"][0]

### Add history aware retrieval 

The chain we have built uses the input query directly to retrieve relevant context. But in a conversational setting, the user query might require conversational context to be understood. For example, consider this exchange:

> Human: "Is probability a class topic?"
>
> AI: "Yes, probability is a class topic, as the course assumes familiarity with basic probability and statistics."
>
> Human: "Why are those prerequisites needed?"

In order to answer the second question, our system needs to understand that "those" refers to "probability and statistics."

We'll need to update two things about our existing app:

1. **Prompt**: Update our prompt to support historical messages as an input.
2. **Contextualizing questions**: Add a sub-chain that takes the latest user question and reformulates it in the context of the chat history. This can be thought of simply as building a new "history aware" retriever. Whereas before we had:
query -> retriever
Now we will have:
(query, conversation history) -> LLM -> rephrased query -> retriever

In [None]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

### Your task: Put it together! 
Build the final chain using history_aware_retriever. Hint: look at how the basic RAG chain was put together. And look here for further reference: 
https://python.langchain.com/docs/tutorials/qa_chat_history/#adding-chat-history 

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

# TODO: Your task - redefine q_and_a_prompt to take history into account
q_and_a_prompt = ChatPromptTemplate.from_messages(
    [
        # TODO: ...
    ]
)

# TODO: Your task - create the final RAG chain 
# rag_chain = 

### Setup the simples possible store for message history 
**_We'll improve on this in the next (agent) section._**

In [None]:
from typing import List
from langchain_core.messages import BaseMessage

chat_history: List[BaseMessage] = []
def add_to_history(human_message: str, ai_message: str):
    chat_history.extend(
        [
            HumanMessage(content=human_message),
            AIMessage(content=ai_message),
        ]
    )

## Run it!

In [None]:
from langchain_core.messages import AIMessage, HumanMessage

ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})
add_to_history(question, ai_message=ai_msg_1["answer"])

second_question = "Why are those prerequisites needed?"
ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})

print(ai_msg_2["answer"])

#### Have a look at the trace in LangSmith
Example: https://smith.langchain.com/public/7cfa0ffd-90ae-4aa3-8a5a-be7479010a17/r