In [None]:
!pip install langchain openai --upgrade --quiet

<a href = "https://colab.research.google.com/github/qxr777/llm-application-code/blob/master/function_call/function-call-based-on-langchain.ipynb"
target = "_parent"> Open In Colab </a>

In [None]:
# 三选一 加载OPENAI_API_KEY

# 本地运行，包含.env文件
# from dotenv import load_dotenv
# load_dotenv()

# 本地运行，不包含.env文件
# import os
# os.environ['OPENAI_API_KEY'] = 'sk-123456789'

# google colab运行，需要配置secret key
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

In [2]:
from langchain.chains import RetrievalQA  #检索QA链，在文档上进行检索
from langchain.chat_models import ChatOpenAI  #openai模型
from langchain.document_loaders import CSVLoader #文档加载器，采用csv格式存储
from langchain.vectorstores import DocArrayInMemorySearch  #向量存储
from IPython.display import display, Markdown #在jupyter显示信息的工具
import pandas as pd

file = './data/OutdoorClothingCatalog_1000.csv'

# 使用langchain文档加载器对数据进行导入
loader = CSVLoader(file_path=file)

# 使用pandas导入数据，用以查看
data = pd.read_csv(file,usecols=[1, 2])
data.head()

Unnamed: 0,name,description
0,Women's Campside Oxfords,This ultracomfortable lace-to-toe Oxford boast...
1,"Recycled Waterhog Dog Mat, Chevron Weave",Protect your floors from spills and splashing ...
2,Infant and Toddler Girls' Coastal Chill Swimsu...,"She'll love the bright colors, ruffles and exc..."
3,"Refresh Swimwear, V-Neck Tankini Contrasts",Whether you're going for a swim or heading out...
4,EcoFlex 3L Storm Pants,Our new TEK O2 technology makes our four-seaso...


In [3]:
#导入向量存储索引创建器
from langchain.indexes import VectorstoreIndexCreator 

# 创建指定向量存储类, 创建完成后，从加载器中调用, 通过文档加载器列表加载
index = VectorstoreIndexCreator(vectorstore_cls=DocArrayInMemorySearch).from_loaders([loader])

  warn_deprecated(
2024-03-15 15:32:57.068572: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
query ="请用markdown表格的方式列出所有具有防晒功能的衬衫，对每件衬衫描述进行总结"

#使用索引查询创建一个响应，并传入这个查询
response = index.query(query)

#查看查询返回的内容
display(Markdown(response))



| 衬衫名称 | 描述总结 |
| --- | --- |
| Men's Tropical Plaid Short-Sleeve Shirt | 100% polyester, UPF 50+ sun protection, wrinkle-resistant, front and back cape venting, two front bellows pockets, imported |
| Men's Plaid Tropic Shirt, Short-Sleeve | 52% polyester, 48% nylon, UPF 50+ sun protection, SunSmart technology, wrinkle-free, front and back cape venting, two front bellows pockets, imported |
| Girls' Ocean Breeze Long-Sleeve Stripe Shirt | Nylon Lycra®-elastane blend, UPF 50+ sun protection, quick-drying, fade-resistant, durable seawater-resistant fabric, coordinates with swimsuits, imported |

In [8]:
#创建一个文档加载器，通过csv格式加载
file = './data/OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)
docs = loader.load()

#查看单个文档，每个文档对应于CSV中的一行数据
docs[0]

Document(page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \n\nQuestions? Please contact us for any inquiries.", metadata={'source': './data/OutdoorClothingCatalog_1000.csv', 'row': 0})

In [9]:
#使用OpenAIEmbedding类
from langchain.embeddings import OpenAIEmbeddings 

embeddings = OpenAIEmbeddings() 

#因为文档比较短了，所以这里不需要进行任何分块,可以直接进行向量表征
#使用初始化OpenAIEmbedding实例上的查询方法embed_query为文本创建向量表征
embed = embeddings.embed_query("你好呀，我的名字叫小可爱")

#查看得到向量表征的长度
print("\n\033[32m向量表征的长度: \033[0m \n", len(embed))

#每个元素都是不同的数字值，组合起来就是文本的向量表征
print("\n\033[32m向量表征前5个元素: \033[0m \n", embed[:5])


[32m向量表征的长度: [0m 
 1536

[32m向量表征前5个元素: [0m 
 [-0.01931950839094474, -0.006699711893054907, -0.007517367872624899, -0.02442827792540216, -0.026596016709149642]


In [10]:
# 将刚才创建文本向量表征(embeddings)存储在向量存储(vector store)中
# 使用DocArrayInMemorySearch类的from_documents方法来实现
# 该方法接受文档列表以及向量表征模型作为输入
db = DocArrayInMemorySearch.from_documents(docs, embeddings)

query = "请推荐一件具有防晒功能的衬衫"
#使用上面的向量存储来查找与传入查询类似的文本，得到一个相似文档列表
docs = db.similarity_search(query)
print("\n\033[32m返回文档的个数: \033[0m \n", len(docs))
print("\n\033[32m第一个文档: \033[0m \n", docs[0])


[32m返回文档的个数: [0m 
 4

[32m第一个文档: [0m 
 page_content=": 535\nname: Men's TropicVibe Shirt, Short-Sleeve\ndescription: This Men’s sun-protection shirt with built-in UPF 50+ has the lightweight feel you want and the coverage you need when the air is hot and the UV rays are strong. Size & Fit: Traditional Fit: Relaxed through the chest, sleeve and waist. Fabric & Care: Shell: 71% Nylon, 29% Polyester. Lining: 100% Polyester knit mesh. UPF 50+ rated – the highest rated sun protection possible. Machine wash and dry. Additional Features: Wrinkle resistant. Front and back cape venting lets in cool breezes. Two front bellows pockets. Imported.\n\nSun Protection That Won't Wear Off: Our high-performance fabric provides SPF 50+ sun protection, blocking 98% of the sun's harmful rays." metadata={'source': './data/OutdoorClothingCatalog_1000.csv', 'row': 535}


In [15]:
#导入大语言模型, 这里使用默认模型gpt-3.5-turbo会出现504服务器超时，
#因此使用gpt-3.5-turbo-0301
llm = ChatOpenAI(model_name="gpt-3.5-turbo-0301",temperature = 0.0) 

#合并获得的相似文档内容
qdocs = "".join([docs[i].page_content for i in range(len(docs))])  


#将合并的相似文档内容后加上问题（question）输入到 `llm.call_as_llm`中
#这里问题是：以Markdown表格的方式列出所有具有防晒功能的衬衫并总结 
response = llm.call_as_llm(f"{qdocs}问题：请用markdown表格的方式列出所有具有防晒功能的衬衫，对每件衬衫描述进行总结") 

display(Markdown(response))

| 衣服名称 | 描述总结 |
| --- | --- |
| Men's TropicVibe Shirt, Short-Sleeve | 男士短袖衬衫，内置UPF 50+防晒功能，轻盈舒适，前后通风口，两个前口袋，防皱，最高级别的防晒保护。 |
| Men's Tropical Plaid Short-Sleeve Shirt | 男士短袖衬衫，UPF 50+防晒，100%聚酯纤维，防皱，前后通风口，两个前口袋，最高级别的防晒保护。 |
| Men's Plaid Tropic Shirt, Short-Sleeve | 男士短袖衬衫，UPF 50+防晒，52%聚酯纤维和48%尼龙，防皱，前后通风口，两个前口袋，最高级别的防晒保护。 |
| Girls' Ocean Breeze Long-Sleeve Stripe Shirt | 女童长袖衬衫，UPF 50+防晒，尼龙Lycra®-弹性纤维混纺，快干，耐褪色，耐海水，与我们的泳衣系列搭配，最高级别的防晒保护。 |

### 2.5 使用检索问答链来回答问题

通过LangChain创建一个检索问答链，对检索到的文档进行问题回答。检索问答链的输入包含以下
- `llm`: 语言模型，进行文本生成
- `chain_type`: 传入链类型，这里使用stuff，将所有查询得到的文档组合成一个文档传入下一步。其他的方式包括：
    -  Map Reduce： 将所有块与问题一起传递给语言模型，获取回复，使用另一个语言模型调用将所有单独的回复总结成最终答案，它可以在任意数量的文档上运行。可以并行处理单个问题，同时也需要更多的调用。它将所有文档视为独立的
    - Refine： 用于循环许多文档，际上是迭代的，建立在先前文档的答案之上，非常适合前后因果信息并随时间逐步构建答案，依赖于先前调用的结果。它通常需要更长的时间，并且基本上需要与Map Reduce一样多的调用
    - Map Re-rank： 对每个文档进行单个语言模型调用，要求它返回一个分数，选择最高分，这依赖于语言模型知道分数应该是什么，需要告诉它，如果它与文档相关，则应该是高分，并在那里精细调整说明，可以批量处理它们相对较快，但是更加昂贵
  
![](../../../figures/docs/C3/3_additional%20methods.png)
  
- `retriever`:检索器

In [14]:
#基于向量储存，创建检索器
retriever = db.as_retriever() 

qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

#创建一个查询并在此查询上运行链
query =  "请用markdown表格的方式列出所有具有防晒功能的衬衫，对每件衬衫描述进行总结"

response = qa_stuff.run(query)

display(Markdown(response)) 



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


| 编号 | 名称 | 描述 |
| --- | --- | --- |
| 618 | Men's Tropical Plaid Short-Sleeve Shirt | 100%聚酯纤维制成，轻便，防皱，前后背部有通风口，两个前面的扩张袋口，UPF 50+防晒等级，可阻挡98%的紫外线 |
| 374 | Men's Plaid Tropic Shirt, Short-Sleeve | 52%聚酯纤维和48%尼龙制成，轻便，防皱，前后背部有通风口，两个前面的扩张袋口，UPF 50+防晒等级，可阻挡98%的紫外线 |
| 293 | Girls' Ocean Breeze Long-Sleeve Stripe Shirt | 尼龙Lycra®-弹性纤维混纺，长袖，UPF 50+防晒等级，可阻挡98%的紫外线，快干，不褪色，耐海水，建议由皮肤癌基金会作为有效的紫外线防护剂 |
| 535 | Men's TropicVibe Shirt, Short-Sleeve | 71%尼龙和29%聚酯纤维制成，轻便，防皱，前后背部有通风口，两个前面的扩张袋口，UPF 50+防晒等级，可阻挡98%的紫外线 |

总结：这四件衬衫都具有防晒功能，防晒等级为UPF 50+，可阻挡98%的紫外线。它们都是轻便的，防皱的，有前后背部通风口和前面的扩张袋口。其中三件衬衫是短袖的，一件是长袖的，适合不同的季节和场合。其中一件女孩的衬衫由皮肤癌基金会推荐作为有效的紫外线防护剂。