In [1]:
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
import os
from dotenv import load_dotenv

app_dir = os.path.join(os.getcwd(), "app")
load_dotenv(os.path.join(app_dir, ".env"))

embedding_function = OpenAIEmbeddings()

docs = [
    Document(
        page_content="the dog loves to eat pizza", metadata={"source": "animal.txt"}
    ),
    Document(
        page_content="the cat loves to eat lasagna", metadata={"source": "animal.txt"}
    ),
]


db = Chroma.from_documents(docs, embedding_function)
retriever = db.as_retriever()

In [2]:
retriever.invoke("What exactly?")

[Document(id='2fe4bc4f-69b3-4e3e-86ea-0820465adab4', metadata={'source': 'animal.txt'}, page_content='the dog loves to eat pizza'),
 Document(id='b3f936bf-e2ab-4bc1-80fd-82595486d5c9', metadata={'source': 'animal.txt'}, page_content='the cat loves to eat lasagna')]

In [3]:
from langchain.prompts.prompt import PromptTemplate

rephrase_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
REPHRASE_TEMPLATE = PromptTemplate.from_template(rephrase_template)

In [4]:
from langchain_core.messages import AIMessage, HumanMessage
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

rephrase_chain = REPHRASE_TEMPLATE | ChatOpenAI(temperature=0) | StrOutputParser()

In [5]:
rephrase_chain.invoke(
    {
        "question": "No, really?",
        "chat_history": [
            HumanMessage(content="What does the dog like to eat?"),
            AIMessage(content="Thuna!"),
        ],
    }
)

'Is that really what the dog likes to eat?'

In [6]:
from langchain_core.prompts import ChatPromptTemplate

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
ANSWER_PROMPT = ChatPromptTemplate.from_template(template)

In [7]:
from langchain_core.runnables import RunnablePassthrough

retrieval_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | ANSWER_PROMPT
    | ChatOpenAI(temperature=0)
    | StrOutputParser()
)

In [8]:
final_chain = rephrase_chain | retrieval_chain

In [9]:
final_chain.invoke(
    {
        "question": "No, really?",
        "chat_history": [
            HumanMessage(content="What does the dog like to eat?"),
            AIMessage(content="Thuna!"),
        ],
    }
)

'No, the dog loves to eat pizza.'

### Chat with returning documents

In [11]:
retrieved_documents = {"docs": retriever, "question": RunnablePassthrough()}
final_inputs = {
    "context": lambda x: "\n".join(doc.page_content for doc in x["docs"]),
    "question": lambda x: x["question"],
}
answer = {
    "answer": final_inputs | ANSWER_PROMPT | ChatOpenAI(model="gpt-4o-mini") | StrOutputParser(),
    "docs": lambda x: x["docs"],
}

final_chain = rephrase_chain | retrieved_documents | answer

In [12]:
result = final_chain.invoke(
    {
        "question": "No, really?",
        "chat_history": [
            HumanMessage(content="What does the dog like to eat?"),
            AIMessage(content="Thuna!"),
        ],
    }
)
print(result)

{'answer': 'Yes, according to the context, the dog loves to eat pizza.', 'docs': [Document(id='2fe4bc4f-69b3-4e3e-86ea-0820465adab4', metadata={'source': 'animal.txt'}, page_content='the dog loves to eat pizza'), Document(id='b3f936bf-e2ab-4bc1-80fd-82595486d5c9', metadata={'source': 'animal.txt'}, page_content='the cat loves to eat lasagna')]}


In [13]:
result["answer"]

'Yes, according to the context, the dog loves to eat pizza.'

In [14]:
result["docs"]

[Document(id='2fe4bc4f-69b3-4e3e-86ea-0820465adab4', metadata={'source': 'animal.txt'}, page_content='the dog loves to eat pizza'),
 Document(id='b3f936bf-e2ab-4bc1-80fd-82595486d5c9', metadata={'source': 'animal.txt'}, page_content='the cat loves to eat lasagna')]