## 1. 初始化Retriever

In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

loader = TextLoader("./data/stories.txt")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings(), persist_directory="./chroma_persist", collection_name="qingming")
retriever = vectorstore.as_retriever()

## 2. 问题上下文

In [2]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI

from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

contextualize_q_system_prompt = """我给你一个聊天记录和最新的用户问题，新的用户问题中可能引用聊天记录中的上下文。
你需要制定一个独立问题，该问题无需聊天记录即可理解。不要回答问题，只需根据需求进行重新表述，否则将问题按原样返回。
"""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

## 3. 对话上下文

In [3]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

qa_system_prompt = """使用以下上下文来回答最后的问题。
如果你不知道答案，就说你不知道，不要试图编造答案。
最多使用三句话，并尽可能简洁地回答。
在答案的最后一定要说“谢谢询问！”

{context}"""

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [6]:
from langchain_core.messages import HumanMessage, AIMessage

chat_history = []

question = "晴明在山洞里遇到了谁?"
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})
print(ai_msg_1["answer"])

chat_history.extend([HumanMessage(content=question), AIMessage(content=ai_msg_1["answer"])])
second_question = "晴明与这个人聊了些什么?"
ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})
print(ai_msg_2["answer"])

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


晴明在山洞里遇到了一个神秘的老者。谢谢询问！


Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


晴明与这个神秘老者聊了关于他成为仙侠的命运和如何获得仙侠力量的话题。谢谢询问！
