In [None]:
import os
# os.environ["OPENAI_API_KEY"] = "*"
os.environ["GOOGLE_API_KEY"] = "*"


# 1.Load 导入Document Loaders
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import Docx2txtLoader
from langchain.document_loaders import TextLoader

# 加载Documents
documents = []
for file in os.listdir('OneFlower'):
    if file.endswith('.pdf'):
        pdf_path = './OneFlower/' + file
        loader = PyPDFLoader(pdf_path)
        documents.extend(loader.load())
    elif file.endswith('.docx') or file.endswith('.doc'):
        doc_path = './OneFlower/' + file
        loader = Docx2txtLoader(doc_path)
        documents.extend(loader.load())
    elif file.endswith('.txt'):
        text_path = './OneFlower/' + file
        loader = TextLoader(text_path)
        documents.extend(loader.load())


In [None]:
# 2.Split 将Documents切分成块以便后续进行嵌入和向量存储
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200,
                                               chunk_overlap=10)
chunked_documents = text_splitter.split_documents(documents)

In [None]:
# 3.Store 将分割嵌入并存储在矢量数据库Qdrant中
from langchain.vectorstores import Qdrant
from langchain.embeddings import OpenAIEmbeddings
# from langchain_google_genai import GoogleGenerativeAIEmbeddings
vectorstore = Qdrant.from_documents(
    documents=chunked_documents, # 已分块的文档
    embedding=OpenAIEmbeddings(), # 用OpenAI的Embedding Model 做嵌入
    location=":memory:", # in-memory 存储
    collection_name="my_documents" # 指定collectio_name
)

In [None]:
# 4.Retrieval 准备模型和Retrieval链
import logging # 导入Logging工具
from langchain.chain_models import ChatOpenAI # ChatOpenAI模型
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.retrievers.muti_query import MultiQueryRetriever # MultiQueryRetriever工具
from langchain.chains import RetrievalQA # RetrievalQA链

# 设置Logging
logging.basicConfig()
logging.getLogger('langchain.retrievers.multi_query').setLevel(logging.INFO)

# 实例化一个大模型工具 - OpenAI的GPT-3.5
llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0)

# 实例化一个MultiQueryRetriever
retriever_from_llm = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(), llm=llm)

# 实例化一个RetrievalQA链
qa_chain = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever())

In [None]:
# 4.Retrieval 准备模型和Retrieval链
import logging # 导入Logging工具
from langchain_google_genai import ChatGoogleGenerativeAI # ChatGoogleGenerativeAI模型
from langchain.retrievers.muti_query import MultiQueryRetriever # MultiQueryRetriever工具
from langchain.chains import RetrievalQA # RetrievalQA链

# 设置Logging
logging.basicConfig()
logging.getLogger('langchain.retrievers.multi_query').setLevel(logging.INFO)

# 实例化一个大模型工具 - gemini-pro
llm = ChatGoogleGenerativeAI(model="gemini-pro")

# 实例化一个MultiQueryRetriever
retriever_from_llm = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(), llm=llm)

# 实例化一个RetrievalQA链
qa_chain = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever())

In [None]:
# 5.问答展示
def ask_question(query):
    # 使用RetrievalQA链获取答案
    response = qa_chain(query)

    # 返回得到的答案
    return response

# 为用户提供交互界面进行回答
while True:
    # 获取用户的问题
    user_query = input("有什么可以帮助您的，请提问（输入'exit'退出）：")

    # 如果用户输入'exit'，则退出循环
    if user_query.lower() == 'exit':
        break

    # 使用定义的函数获取答案，并打印
    answer = ask_question(user_query)
    print('回复：', answer)

print("谢谢使用 QA 系统")