In [35]:
import os
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOllama
from langchain.document_loaders import CSVLoader, DataFrameLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.embeddings import HuggingFaceEmbeddings

In [36]:
file = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file)
data = loader.load()

In [37]:
embeddings = HuggingFaceEmbeddings()
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings
).from_loaders([loader])

  embeddings = HuggingFaceEmbeddings()


In [38]:
llm = ChatOllama(
    model="qwen2.5:7b",
    temperature=0.0
)
# 创建一个检索问答链
# llm: 使用已定义的ChatOllama模型
# chain_type: 使用"stuff"方法将文档组合成单个字符串
# retriever: 使用向量存储的检索器
# verbose: 启用详细输出
# document_separator: 定义文档之间的分隔符
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=index.vectorstore.as_retriever(),
    verbose=True,
    chain_type_kwargs={
        "document_separator": "<<<<>>>>>"
    }
)

想要测评模型需要设定一些数据点

In [39]:
# QAGenerateChain 用于自动生成问答对，帮助我们评估QA系统的性能
# 它可以基于给定的文档内容，自动生成问题和对应的答案
# 这对于创建测试数据集和评估QA模型的表现非常有用
from langchain.evaluation.qa import QAGenerateChain

In [40]:
example_gen_chain = QAGenerateChain.from_llm(llm=llm)

In [41]:
new_examples = example_gen_chain.apply_and_parse([{"doc": t} for t in data[:5]])
new_examples[0]



{'qa_pairs': {'query': 'What are the key features of the Alpine Explorer Jacket as described in the document?',
  'answer': 'The key features of the Alpine Explorer Jacket include:'}}

In [42]:
response = qa.run(new_examples[0]["qa_pairs"]["query"]) # 这里要注意new_examples的格式是否是嵌套型的字典，这样才能提取出正确的query，似乎不同llm的输出格式不一样
print(response)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
The key features of the Alpine Explorer Jacket include:

- It is designed for serious mountaineers.
- Offers maximum protection against harsh weather conditions.
- Made with Gore-Tex material.
- Waterproof.
