In [13]:
import os
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOllama
from langchain.document_loaders import CSVLoader, DataFrameLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.embeddings import HuggingFaceEmbeddings

In [14]:
file = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file)
data = loader.load()

In [15]:
embeddings = HuggingFaceEmbeddings()
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings
).from_loaders([loader])

  embeddings = HuggingFaceEmbeddings()


In [16]:
llm = ChatOllama(
    model="llama3.2",
    temperature=0.0
)
# 创建一个检索问答链
# llm: 使用已定义的ChatOllama模型
# chain_type: 使用"stuff"方法将文档组合成单个字符串
# retriever: 使用向量存储的检索器
# verbose: 启用详细输出
# document_separator: 定义文档之间的分隔符
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=index.vectorstore.as_retriever(),
    verbose=True,
    chain_type_kwargs={
        "document_separator": "<<<<>>>>>"
    }
)

想要测评模型需要设定一些数据点

In [17]:
# QAGenerateChain 用于自动生成问答对，帮助我们评估QA系统的性能
# 它可以基于给定的文档内容，自动生成问题和对应的答案
# 这对于创建测试数据集和评估QA模型的表现非常有用
from langchain.evaluation.qa import QAGenerateChain

In [18]:
example_gen_chain = QAGenerateChain.from_llm(llm=llm)

In [19]:
new_examples = example_gen_chain.apply_and_parse([{"doc": t} for t in data[:5]])
new_examples[0]



{'qa_pairs': {'query': 'What is the product category of the Alpine Explorer Jacket?',
  'answer': 'The product category of the Alpine Explorer Jacket is Jackets.'}}

In [20]:
response = qa.run(new_examples[0]["qa_pairs"]["query"]) # 这里要注意new_examples的格式是否是嵌套型的字典，这样才能提取出正确的query，似乎不同llm的输出格式不一样
print(response)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
The product category of the Alpine Explorer Jacket is Jackets.


In [21]:
import langchain
langchain.debug = True

In [22]:
response = qa.run(new_examples[0]["qa_pairs"]["query"]) # 这里要注意new_examples的格式是否是嵌套型的字典，这样才能提取出正确的query，似乎不同llm的输出格式不一样
print(response)

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "What is the product category of the Alpine Explorer Jacket?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA > chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA > chain:StuffDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "What is the product category of the Alpine Explorer Jacket?",
  "context": "product_id: OC001\nname: Alpine Explorer Jacket\ncategory: Jackets\ngender: Men\nprice: 249.99\ncolor: Blue\nsize: M\nmaterial: Gore-Tex\nwaterproof: Yes\ndescription: Designed for serious mountaineers, this jacket offers maximum protection against harsh weather conditions.<<<<>>>>>product_id: OC066\nname: Alpine Technical Hoodie\ncategory: Jackets\ngender: Men\nprice: 119.99\ncolor: Gray\nsize: L\nmaterial: Polyester/Elastane\nwaterproof: No\ndescription: A technical hoodi

In [23]:
langchain.debug = False

In [24]:
predictions = qa.apply(new_examples[0]["qa_pairs"]["query"])



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m

In [25]:
from langchain.evaluation.qa import QAEvalChain