# RAG
(retrieval-augmented generation)

In [1]:
import os
from dotenv import load_dotenv

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [2]:
from operator import itemgetter

from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

In [3]:
text_input = "Liam Everest, born in Crestwood, showed early curiosity in tech and nature. \
        A Mechatronics graduate from Crestwood University, he excelled in eco-friendly tech. \
        Liam aspires to blend technology and nature for a harmonious future."

In [5]:
persist_directory = './Data/chroma/'

In [6]:
vectorstore = Chroma.from_texts(
    [text_input], embedding=OpenAIEmbeddings(), persist_directory=persist_directory
)
retriever = vectorstore.as_retriever()

In [7]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

model = ChatOpenAI()

In [8]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [10]:
chain.invoke("where did Liam study?")

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


'Liam studied at Crestwood University.'

In [11]:
template = """Answer the question based only on the following context:
{context}

Question: {question}

Answer in the following language: {language}
"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
        "language": itemgetter("language"),
    }
    | prompt
    | model
    | StrOutputParser()
)

In [12]:
chain.invoke({"question": "where was Liam born", "language": "german"})

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


'Liam wurde in Crestwood geboren.'

## Converstational Retrieval Chain

In [13]:
from langchain.schema import format_document
from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string
from langchain_core.runnables import RunnableParallel

In [14]:
from langchain.prompts.prompt import PromptTemplate

_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

In [15]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
ANSWER_PROMPT = ChatPromptTemplate.from_template(template)

In [16]:
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")


def _combine_documents(
    docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)

In [17]:
_inputs = RunnableParallel(
    standalone_question=RunnablePassthrough.assign(
        chat_history=lambda x: get_buffer_string(x["chat_history"])
    )
    | CONDENSE_QUESTION_PROMPT
    | ChatOpenAI(temperature=0)
    | StrOutputParser(),
)
_context = {
    "context": itemgetter("standalone_question") | retriever | _combine_documents,
    "question": lambda x: x["standalone_question"],
}
conversational_qa_chain = _inputs | _context | ANSWER_PROMPT | ChatOpenAI()

In [18]:
conversational_qa_chain.invoke(
    {
        "question": "what is Liam's dream?",
        "chat_history": [],
    }
)

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


AIMessage(content='Liam dreams of blending technology and nature for a harmonious future.')

In [19]:
conversational_qa_chain.invoke(
    {
        "question": "where did he work?",
        "chat_history": [
            HumanMessage(content="Who wrote this notebook?"),
            AIMessage(content="Harrison"),
        ],
    }
)

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


AIMessage(content='Based on the given context, there is no mention of Harrison or where he worked.')

## With Memory and returning source documents

In [22]:
from operator import itemgetter

from langchain.memory import ConversationBufferMemory

In [24]:
memory = ConversationBufferMemory(
    return_messages=True, output_key="answer", input_key="question"
)

In [25]:
# First we add a step to load memory
# This adds a "memory" key to the input object
loaded_memory = RunnablePassthrough.assign(
    chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
)

In [26]:
# Now we calculate the standalone question
standalone_question = {
    "standalone_question": {
        "question": lambda x: x["question"],
        "chat_history": lambda x: get_buffer_string(x["chat_history"]),
    }
    | CONDENSE_QUESTION_PROMPT
    | ChatOpenAI(temperature=0)
    | StrOutputParser(),
}

In [27]:
# Now we retrieve the documents
retrieved_documents = {
    "docs": itemgetter("standalone_question") | retriever,
    "question": lambda x: x["standalone_question"],
}

In [28]:
# Now we construct the inputs for the final prompt
final_inputs = {
    "context": lambda x: _combine_documents(x["docs"]),
    "question": itemgetter("question"),
}

In [29]:
# And finally, we do the part that returns the answers
answer = {
    "answer": final_inputs | ANSWER_PROMPT | ChatOpenAI(),
    "docs": itemgetter("docs"),
}

In [30]:
# And now we put it all together!
final_chain = loaded_memory | standalone_question | retrieved_documents | answer

In [31]:
inputs = {"question": "where did harrison work?"}
result = final_chain.invoke(inputs)
result

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


{'answer': AIMessage(content="The context does not provide any information about Harrison's employment."),
 'docs': [Document(page_content='Liam Everest, born in Crestwood, showed early curiosity in tech and nature.         A Mechatronics graduate from Crestwood University, he excelled in eco-friendly tech.         Liam aspires to blend technology and nature for a harmonious future.')]}

In [32]:
# Note that the memory does not save automatically
# This will be improved in the future
# For now you need to save it yourself
memory.save_context(inputs, {"answer": result["answer"].content})

In [33]:
memory.load_memory_variables({})

{'history': [HumanMessage(content='where did harrison work?'),
  AIMessage(content="The context does not provide any information about Harrison's employment.")]}

In [34]:
inputs = {"question": "but where did he really work?"}
result = final_chain.invoke(inputs)
result

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


{'answer': AIMessage(content='There is no information provided about where Liam Everest actually works.'),
 'docs': [Document(page_content='Liam Everest, born in Crestwood, showed early curiosity in tech and nature.         A Mechatronics graduate from Crestwood University, he excelled in eco-friendly tech.         Liam aspires to blend technology and nature for a harmonious future.')]}