In [1]:
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
from dotenv import load_dotenv

load_dotenv()

embedding_function = OpenAIEmbeddings()

docs = [
    Document(
        page_content="the dog loves to eat pizza", metadata={"source": "animal.txt"}
    ),
    Document(
        page_content="the cat loves to eat lasagna", metadata={"source": "animal.txt"}
    ),
]


db = Chroma.from_documents(docs, embedding_function)
llm = ChatOpenAI(model="gpt-4o-mini")
retriever = db.as_retriever()

In [2]:
retriever.invoke("What exactly?")

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


[Document(id='82c30936-870f-4691-9347-6a1e963be6a5', metadata={'source': 'animal.txt'}, page_content='the dog loves to eat pizza'),
 Document(id='67103371-e59a-4bb9-9819-82eb6af25744', metadata={'source': 'animal.txt'}, page_content='the cat loves to eat lasagna')]

In [3]:
from langchain.chains import ConversationalRetrievalChain
from langchain_core.prompts import ChatPromptTemplate


qa_template = """
You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer
the question. If you don't know the answer, say that you
don't know. Use three sentences maximum and keep the
answer concise.

Chat History:
{chat_history}

Other context:
{context}

Question: {question}
"""

qa_prompt = ChatPromptTemplate.from_template(qa_template)

convo_qa_chain = ConversationalRetrievalChain.from_llm(
    llm,
    retriever,
    return_source_documents=True,
    combine_docs_chain_kwargs={
        "prompt": qa_prompt,
    },
)

convo_qa_chain(
    {
        "question": "What kind of food does the cat like?",
        "chat_history": "",
    }
)

  convo_qa_chain(
Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


{'question': 'What kind of food does the cat like?',
 'chat_history': '',
 'answer': 'The cat loves to eat lasagna.',
 'source_documents': [Document(id='67103371-e59a-4bb9-9819-82eb6af25744', metadata={'source': 'animal.txt'}, page_content='the cat loves to eat lasagna'),
  Document(id='82c30936-870f-4691-9347-6a1e963be6a5', metadata={'source': 'animal.txt'}, page_content='the dog loves to eat pizza')]}

In [4]:
from langchain.prompts.prompt import PromptTemplate

rephrase_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
REPHRASE_TEMPLATE = PromptTemplate.from_template(rephrase_template)

In [5]:
from langchain_core.messages import AIMessage, HumanMessage
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

rephrase_chain = REPHRASE_TEMPLATE | ChatOpenAI(temperature=0) | StrOutputParser()

In [6]:
rephrase_chain.invoke(
    {
        "question": "No, really?",
        "chat_history": [
            HumanMessage(content="What does the dog like to eat?"),
            AIMessage(content="Thuna!"),
        ],
    }
)

'Is that really what the dog likes to eat?'

In [7]:
from langchain_core.prompts import ChatPromptTemplate

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
ANSWER_PROMPT = ChatPromptTemplate.from_template(template)

In [8]:
from langchain_core.runnables import RunnablePassthrough

retrieval_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | ANSWER_PROMPT
    | ChatOpenAI(temperature=0)
    | StrOutputParser()
)

In [9]:
final_chain = rephrase_chain | retrieval_chain

In [None]:
final_chain.invoke(
    {
        "question": "No, really?",
        "chat_history": [
            HumanMessage(content="What does the dog like to eat?"),
            AIMessage(content="Thuna!"),
        ],
    }
)

### Chat with returning documents

In [11]:
retrieved_documents = {"docs": retriever, "question": RunnablePassthrough()}
final_inputs = {
    "context": lambda x: "\n".join(doc.page_content for doc in x["docs"]),
    "question": lambda x: x["question"],
}
answer = {
    "answer": final_inputs | ANSWER_PROMPT | ChatOpenAI(model="gpt-4o-mini") | StrOutputParser(),
    "docs": lambda x: x["docs"],
}

final_chain = rephrase_chain | retrieved_documents | answer

In [12]:
result = final_chain.invoke(
    {
        "question": "No, really?",
        "chat_history": [
            HumanMessage(content="What does the dog like to eat?"),
            AIMessage(content="Thuna!"),
        ],
    }
)
print(result)

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


{'answer': 'Yes, the dog really likes to eat pizza.', 'docs': [Document(id='d9d0de87-14a9-452f-8fd8-c70a4633ea07', metadata={'source': 'animal.txt'}, page_content='the dog loves to eat pizza'), Document(id='f0ceb2da-de7f-4429-bfa6-2f54d6509143', metadata={'source': 'animal.txt'}, page_content='the cat loves to eat lasagna')]}


In [13]:
result["answer"]

'Yes, the dog really likes to eat pizza.'

In [14]:
result["docs"]

[Document(id='d9d0de87-14a9-452f-8fd8-c70a4633ea07', metadata={'source': 'animal.txt'}, page_content='the dog loves to eat pizza'),
 Document(id='f0ceb2da-de7f-4429-bfa6-2f54d6509143', metadata={'source': 'animal.txt'}, page_content='the cat loves to eat lasagna')]