# Naive Approach

# Langchain Approach

In [1]:
%pip install langchain openai faiss-cpu tiktoken

Collecting faiss-cpu
  Downloading faiss_cpu-1.7.4-cp311-cp311-win_amd64.whl (10.8 MB)
     ---------------------------------------- 0.0/10.8 MB ? eta -:--:--
     ---------------------------------------- 0.0/10.8 MB ? eta -:--:--
     ---------------------------------------- 0.0/10.8 MB ? eta -:--:--
     ---------------------------------------- 0.0/10.8 MB ? eta -:--:--
     --------------------------------------- 0.1/10.8 MB 469.7 kB/s eta 0:00:23
     --------------------------------------- 0.1/10.8 MB 573.4 kB/s eta 0:00:19
      -------------------------------------- 0.1/10.8 MB 655.8 kB/s eta 0:00:17
      -------------------------------------- 0.1/10.8 MB 655.8 kB/s eta 0:00:17
      -------------------------------------- 0.2/10.8 MB 477.7 kB/s eta 0:00:23
     - ------------------------------------- 0.3/10.8 MB 679.5 kB/s eta 0:00:16
     - ------------------------------------- 0.3/10.8 MB 679.5 kB/s eta 0:00:16
     - ------------------------------------- 0.3/10.8 MB 634.9 kB

In [2]:
from operator import itemgetter

from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
from langchain.vectorstores import FAISS


In [4]:
from dotenv import load_dotenv
load_dotenv()

True

In [5]:
vectorstore = FAISS.from_texts(
    ["harrison worked at kensho"], embedding=OpenAIEmbeddings()
)
retriever = vectorstore.as_retriever()

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

model = ChatOpenAI()

In [6]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [7]:
chain.invoke("where did harrison work?")

'Harrison worked at Kensho.'

In [8]:
template = """Answer the question based only on the following context:
{context}

Question: {question}

Answer in the following language: {language}
"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
        "language": itemgetter("language"),
    }
    | prompt
    | model
    | StrOutputParser()
)

In [9]:
chain.invoke({"question": "where did harrison work", "language": "italian"})

'Harrison ha lavorato a Kensho.'

## Consersational Retrieval Chain

In [10]:
from langchain.schema import format_document
from langchain.schema.runnable import RunnableMap

In [11]:
from langchain.prompts.prompt import PromptTemplate

_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

In [12]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
ANSWER_PROMPT = ChatPromptTemplate.from_template(template)

In [13]:
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")


def _combine_documents(
    docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)

In [14]:
from typing import List, Tuple


def _format_chat_history(chat_history: List[Tuple]) -> str:
    buffer = ""
    for dialogue_turn in chat_history:
        human = "Human: " + dialogue_turn[0]
        ai = "Assistant: " + dialogue_turn[1]
        buffer += "\n" + "\n".join([human, ai])
    return buffer

In [15]:
_inputs = RunnableMap(
    standalone_question=RunnablePassthrough.assign(
        chat_history=lambda x: _format_chat_history(x["chat_history"])
    )
    | CONDENSE_QUESTION_PROMPT
    | ChatOpenAI(temperature=0)
    | StrOutputParser(),
)
_context = {
    "context": itemgetter("standalone_question") | retriever | _combine_documents,
    "question": lambda x: x["standalone_question"],
}
conversational_qa_chain = _inputs | _context | ANSWER_PROMPT | ChatOpenAI()

In [16]:
conversational_qa_chain.invoke(
    {
        "question": "where did harrison work?",
        "chat_history": [],
    }
)

AIMessage(content='Harrison was employed at Kensho.')

In [17]:
conversational_qa_chain.invoke(
    {
        "question": "where did he work?",
        "chat_history": [("Who wrote this notebook?", "Harrison")],
    }
)

AIMessage(content='Harrison worked at Kensho.')

## With Memory and returning source documents

In [18]:
from operator import itemgetter

from langchain.memory import ConversationBufferMemory

In [19]:
memory = ConversationBufferMemory(
    return_messages=True, output_key="answer", input_key="question"
)

In [20]:
# First we add a step to load memory
# This adds a "memory" key to the input object
loaded_memory = RunnablePassthrough.assign(
    chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
)
# Now we calculate the standalone question
standalone_question = {
    "standalone_question": {
        "question": lambda x: x["question"],
        "chat_history": lambda x: _format_chat_history(x["chat_history"]),
    }
    | CONDENSE_QUESTION_PROMPT
    | ChatOpenAI(temperature=0)
    | StrOutputParser(),
}
# Now we retrieve the documents
retrieved_documents = {
    "docs": itemgetter("standalone_question") | retriever,
    "question": lambda x: x["standalone_question"],
}
# Now we construct the inputs for the final prompt
final_inputs = {
    "context": lambda x: _combine_documents(x["docs"]),
    "question": itemgetter("question"),
}
# And finally, we do the part that returns the answers
answer = {
    "answer": final_inputs | ANSWER_PROMPT | ChatOpenAI(),
    "docs": itemgetter("docs"),
}
# And now we put it all together!
final_chain = loaded_memory | standalone_question | retrieved_documents | answer

In [21]:
inputs = {"question": "where did harrison work?"}
result = final_chain.invoke(inputs)
result

{'answer': AIMessage(content='Harrison was employed at Kensho.'),
 'docs': [Document(page_content='harrison worked at kensho')]}

In [22]:
# Note that the memory does not save automatically
# This will be improved in the future
# For now you need to save it yourself
memory.save_context(inputs, {"answer": result["answer"].content})

In [23]:
memory.load_memory_variables({})

{'history': [HumanMessage(content='where did harrison work?'),
  AIMessage(content='Harrison was employed at Kensho.')]}

# Advance RAG (Optimize)

## Using Llama API