In [25]:
import settings
import os
os.environ["OPENAI_API_KEY"] = settings.configration["OPENAI_API_KEY"]

### 使用 LangChain 完成一次问答

In [4]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI()
llm.invoke("怎么评价人工智能?")

AIMessage(content='人工智能是一种革命性的技术，它在许多领域都取得了巨大的成就，为人类带来了许多便利和提升。但同时也存在一些问题，如数据隐私、就业岗位减少等。因此人工智能需要在技术发展的同时注重伦理和社会影响，才能更好地造福人类。总的来说，人工智能是一种有着巨大潜力和挑战的技术。', response_metadata={'finish_reason': 'stop', 'logprobs': None})

### 通过 Google 搜索并返回答案

In [26]:
import settings
import os
os.environ["SERPAPI_API_KEY"] = settings.configration["SERPAPI_API_KEY"]

In [29]:
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.llms import OpenAI
from langchain.agents import AgentType

# 加载 OpenAI 模型
llm = OpenAI(temperature=0,max_tokens=2048) 

 # 加载 serpapi 工具
tools = load_tools(["serpapi"])
# 工具加载后都需要初始化，verbose 参数为 True，会打印全部的执行详情
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

# 运行 agent
agent.run("What's the date today? What great events have taken place today in history?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I should use a search engine to find the answer and then search for historical events that happened on this date.
Action: Search
Action Input: "today's date"[0m
Observation: [36;1m[1;3mThursday, March 14, 2024[0m
Thought:[32;1m[1;3m Now I know the date, I can search for historical events that happened on this date.
Action: Search
Action Input: "historical events on March 14"[0m
Observation: [36;1m[1;3m['Historical Events on March 14 · Battle of Montiel · Battle of Ivry · Convention to Pick Scottish Monarch · Cotton Gin Machine · Pope Pius VII · Henry Addington ...', "Discover what happened on March 14 with HISTORY's summaries of major events, anniversaries, famous births and notable deaths.", 'The first Pan-American conference convened in Panama with representatives from Mexico, Colombia, Peru, and Central America in attendance. 1794.', 'On this day - March 14 ; 1991 The Birmingham Six are released ; 1979 Factory Pl

"Today's date is Thursday, March 14, 2024. Some significant historical events that have taken place on this date include the first Pan-American conference in 1794, the execution of Irish republicans in 1921 and 1923, and the patenting of the cotton gin machine in 1794. Additionally, on this day in 1991, the Birmingham Six were released and in 1979, there was a factory plane crash in China."

### 对超长文本进行总结

In [32]:
from langchain.document_loaders import UnstructuredFileLoader
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import OpenAI

# 导入文本
loader = UnstructuredFileLoader("./data/lg_test.txt")
# 将文本转成 Document 对象
document = loader.load()
print(f'documents:{len(document)}')

# 初始化文本分割器
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 0
)

# 切分文本
split_documents = text_splitter.split_documents(document)
print(f'documents:{len(split_documents)}')

# 加载 llm 模型
llm = OpenAI(max_tokens=1500)

# 创建总结链
chain = load_summarize_chain(llm, chain_type="refine", verbose=True)

# 执行总结链，（为了快速演示，只总结前5段）
chain.run(split_documents[:5])

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping taggers\averaged_perceptron_tagger.zip.


documents:1
documents:319


[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"声明：本书为爱奇电子书(www.i7wu.cn)的用户上传至其在本站的存储空间，本站只提供TXT全集电子书存储服务以及免费下载服务，以下作品内容之版权与本站无任何关系。

---------------------------用户上传之内容开始--------------------------------

《地藏心经》

作者：铸剑师无名

正文

第一第十五章 天下势，渡江（一）

“渝州陆家？！”

虽然原本的那个秦逸，每日只知道苦读诗书，从未与商贾们打过交道，但是渝州陆家的名声，他还是知道。

陆家三代为官，官至两江总督，五代经商，百年经营，家私何止千万，直至今朝，俨然已是江南一等士族大户。渝州陆氏以皮货起家，乃是西北之地数得上号的商户，西北之地所产的皮货，有三成经他们之手卖往江南。

若只是如此，陆氏也不过是一头肥硕的羔羊，只待他人宰杀。

陆氏三代家主都极具雄韬伟略，以千金买官，以万金开路，更是在蛮夷南侵之时，倾尽家资招兵买马，拒十万蛮夷铁骑于侯关外，短短三年间，便一手扶持起了都护大将军——苏和，抗夷大将军——邓昌。"


CONCISE SUMMARY:[0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYour job is to produce a final summary.
We have provided an existing summary up to a certain point: 
This is a disclaimer stating that the book "Di Zang Xin Jing" was uploaded by a user on

'\n\nThe website www.i7wu.cn provides storage and free download services for the book "Di Zang Xin Jing" but makes no claim to owning the copyright. The book follows the powerful Lu family in the city of Yuzhou, who have risen to prominence through strategic marriages and military alliances. They are constantly threatened by rival factions and maintain a strong army, including the renowned Black Iron Cavalry. The protagonist, Qin Yi, faces difficulties in his own family as they are not as influential as the Lu family. When a middle-aged merchant recognizes Qin Yi as a member of the prestigious Qin family, he eagerly tries to establish a connection. Despite being from a noble family, Qin Yi remains humble and agrees to help the merchant. As he travels, Qin Yi begins to realize the reputation and influence of his family, who have been known for their benevolence for generations. As he waits for the Lu family\'s caravan to pass, he is approached by a group of people led by a bearded man h

### 本地知识库

In [35]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain import OpenAI,VectorDBQA
from langchain.document_loaders import DirectoryLoader
from langchain.chains import RetrievalQA

# 加载文件夹中的所有txt类型的文件
loader = DirectoryLoader('./data/', glob='**/*.txt')
# 将数据转成 document 对象，每个文件会作为一个 document
documents = loader.load()

# 初始化加载器
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
# 切割加载的 document
split_docs = text_splitter.split_documents(documents)

# 初始化 openai 的 embeddings 对象
embeddings = OpenAIEmbeddings()
# 将 document 通过 openai 的 embeddings 对象计算 embedding向量信息并临时存入 Chroma 向量数据库，用于后续匹配查询
docsearch = Chroma.from_documents(split_docs, embeddings)

# 创建问答对象
qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=docsearch,return_source_documents=True)
# 进行问答
result = qa({"query": "科大讯飞今年第一季度收入是多少？"})
print(result)

Created a chunk of size 106, which is longer than the specified 100
Created a chunk of size 104, which is longer than the specified 100
Created a chunk of size 101, which is longer than the specified 100
Created a chunk of size 128, which is longer than the specified 100
Created a chunk of size 116, which is longer than the specified 100
Created a chunk of size 109, which is longer than the specified 100
Created a chunk of size 109, which is longer than the specified 100
Created a chunk of size 113, which is longer than the specified 100
Created a chunk of size 106, which is longer than the specified 100
Created a chunk of size 143, which is longer than the specified 100
Created a chunk of size 111, which is longer than the specified 100
Created a chunk of size 214, which is longer than the specified 100
Created a chunk of size 102, which is longer than the specified 100
Created a chunk of size 106, which is longer than the specified 100
Created a chunk of size 119, which is longer tha

{'query': '科大讯飞今年第一季度收入是多少？', 'result': ' 126.5亿人民币', 'source_documents': [Document(page_content='虽然收入增加了不少，但在这一点上，科大讯飞的高层也坦言，与过去几个财季相比，第一个财季的收入增加谏度明显慢了下来，这其中最重要的一个因到了病毒的冲击，导致了国内不少工程进度推迟，在讯飞的智能教育商业计划中，第一季度有18个左右的延期，合约金额接近16个，另外，安徽皆方面，调，在第三季度内，大部分大型工程的投标都已经停止，这也是造成今年第三季度，科大讯飞收入的增幅低于预计的主要因素。', metadata={'source': 'data\\keda.txt'}), Document(page_content='科大讯飞前三季营收曝光，用AI赋能企业转型推出了多款Saas产品', metadata={'source': 'data\\keda.txt'}), Document(page_content='而在十月二十八日晚上，在一份关于科技的报告中，复旦讯飞公布了2022年第三季度的财报。据财报，今年第一季度科技讯飞的收入为126.5亿人民币公司在应对新冠肺炎疫情的过程中，在其核心产业领域，持续取得了较好的发展。', metadata={'source': 'data\\keda.txt'}), Document(page_content='据该份财报，截至今年第二季度，科技讯飞的营运资金净流入率较去年增加百分之三十二，虽然受到了严峻的经济形势影响，但科技讯飞的营运资金和营势头。', metadata={'source': 'data\\keda.txt'})]}


### 执行多个chain

In [36]:
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import SimpleSequentialChain

# location 链
llm = OpenAI(temperature=1)
template = """Your job is to come up with a classic dish from the area that the users suggests.
% USER LOCATION
{user_location}

YOUR RESPONSE:
"""
prompt_template = PromptTemplate(input_variables=["user_location"], template=template)
location_chain = LLMChain(llm=llm, prompt=prompt_template)

# meal 链
template = """Given a meal, give a short and simple recipe on how to make that dish at home.
% MEAL
{user_meal}

YOUR RESPONSE:
"""
prompt_template = PromptTemplate(input_variables=["user_meal"], template=template)
meal_chain = LLMChain(llm=llm, prompt=prompt_template)

# 通过 SimpleSequentialChain 串联起来，第一个答案会被替换第二个中的user_meal，然后再进行询问
overall_chain = SimpleSequentialChain(chains=[location_chain, meal_chain], verbose=True)
review = overall_chain.run("Rome")



[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3mOne classic dish from Rome is Cacio e Pepe, a simple yet flavorful pasta dish made with only three ingredients: pasta, pecorino cheese, and black pepper. The name literally translates to "cheese and pepper" in Italian. The cheese is melted into a creamy sauce using the starchy pasta water, and the black pepper gives a spicy punch to the dish. Cacio e Pepe is a staple in Roman cuisine and can be found at many traditional trattorias in the city.[0m
[33;1m[1;3mIngredients:
- 1 pound of spaghetti or bucatini pasta
- 1 cup of grated pecorino cheese
- 1-2 tablespoons of freshly ground black pepper

Instructions:
1. Cook the pasta in a large pot of boiling salted water according to package instructions until al dente.
2. While the pasta is cooking, heat a large pan over medium heat and add the freshly ground black pepper. Toast for 1-2 minutes until fragrant.
3. Reserve 1 cup of the starchy pasta water before draining the

### 结构化输出

In [39]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

llm = OpenAI(temperature=0.5)

# 告诉他我们生成的内容需要哪些字段，每个字段类型式啥
response_schemas = [
    ResponseSchema(name="bad_string", description="This a poorly formatted user input string"),
    ResponseSchema(name="good_string", description="This is your response, a reformatted response")
]

# 初始化解析器
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

# 生成的格式提示符
# {
#	"bad_string": string  // This a poorly formatted user input string
#	"good_string": string  // This is your response, a reformatted response
#}
format_instructions = output_parser.get_format_instructions()

template = """
You will be given a poorly formatted string from a user.
Reformat it and make sure all the words are spelled correctly

{format_instructions}

% USER INPUT:
{user_input}

YOUR RESPONSE:
"""

# 讲我们的格式描述嵌入到 prompt 中去，告诉 llm 我们需要他输出什么样格式的内容
prompt = PromptTemplate(
    input_variables=["user_input"],
    partial_variables={"format_instructions": format_instructions},
    template=template
)

promptValue = prompt.format(user_input="welcom to califonya!")
llm_output = llm(promptValue)

# 使用解析器进行解析生成的内容
output_parser.parse(llm_output)

{'bad_string': 'welcom to califonya!', 'good_string': 'Welcome to California!'}

### 网页爬取

In [41]:
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.chains import LLMRequestsChain, LLMChain

llm = OpenAI( temperature=0)

template = """在 >>> 和 <<< 之间是网页的返回的HTML内容。
网页是新浪财经A股上市公司的公司简介。
请抽取参数请求的信息。

>>> {requests_result} <<<
请使用如下的JSON格式返回数据
{{
  "company_name":"a",
  "company_english_name":"b",
  "issue_price":"c",
  "date_of_establishment":"d",
  "registered_capital":"e",
  "office_address":"f",
  "Company_profile":"g"

}}
Extracted:"""

prompt = PromptTemplate(
    input_variables=["requests_result"],
    template=template
)

chain = LLMRequestsChain(llm_chain=LLMChain(llm=llm, prompt=prompt))
inputs = {
  "url": "https://vip.stock.finance.sina.com.cn/corp/go.php/vCI_CorpInfo/stockid/600519.phtml"
}

response = chain(inputs)
print(response['output'])


{
  "company_name":"贵州茅台酒股份有限公司",
  "company_english_name":"Kweichow Moutai Co.,Ltd.",
  "issue_price":"31.39",
  "date_of_establishment":"1999-11-20",
  "registered_capital":"125620万元(CNY)",
  "office_address":"贵州省仁怀市茅台镇",
  "Company_profile":"公司是根据贵州省人民政府黔府函〔1999〕291号文,由中国贵州茅台酒厂有限责任公司作为主发起人,联合贵州茅台酒厂技术开发公司、贵州省轻纺集体工业联社、深圳清华大学研究院、中国食品发酵工业研究院、北京市糖业烟酒公司、江苏省糖烟酒总公司、上海


### tools 自定义工具

In [43]:
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
from langchain.tools import BaseTool
from langchain.llms import OpenAI
from langchain import LLMMathChain, SerpAPIWrapper

llm = OpenAI(temperature=0)

# 初始化搜索链和计算链
search = SerpAPIWrapper()
llm_math_chain = LLMMathChain(llm=llm, verbose=True)

# 生成一个功能列表，指明这个 agent 里面都有哪些可用工具
tools = [
    Tool(
        name = "Search",
        func=search.run,
        description="useful for when you need to answer questions about current events"
    ),
    Tool(
        name="Calculator",
        func=llm_math_chain.run,
        description="useful for when you need to answer questions about math"
    )
]

# 初始化 agent
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

# 执行 agent
agent.run("Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?")





[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I should use the search tool to find information about Leo DiCaprio's girlfriend.
Action: Search
Action Input: "Leo DiCaprio girlfriend"[0m
Observation: [36;1m[1;3mModel Vittoria Ceretti, who is dating Leonardo DiCaprio, landed an Elle cover story to talk about modeling and not talk about Leo. The 49-year-old actor only warrants one mention in the piece about his 25-year-old girlfriend.[0m
Thought:[32;1m[1;3m I should use the calculator to calculate her current age raised to the 0.43 power.
Action: Calculator
Action Input: 25^0.43[0m

[1m> Entering new LLMMathChain chain...[0m
25^0.43[32;1m[1;3m```text
25**0.43
```
...numexpr.evaluate("25**0.43")...
[0m
Answer: [33;1m[1;3m3.991298452658078[0m
[1m> Finished chain.[0m

Observation: [33;1m[1;3mAnswer: 3.991298452658078[0m
Thought:[32;1m[1;3m I now know the final answer.
Final Answer: 3.991298452658078[0m

[1m> Finished chain.[0m


'3.991298452658078'

### 带记忆功能的机器人

In [44]:
from langchain.memory import ChatMessageHistory
from langchain.chat_models import ChatOpenAI

chat = ChatOpenAI(temperature=0)

# 初始化 MessageHistory 对象
history = ChatMessageHistory()

# 给 MessageHistory 对象添加对话内容
history.add_ai_message("你好！")
history.add_user_message("中国的首都是哪里？")

# 执行对话
ai_response = chat(history.messages)
print(ai_response)

  warn_deprecated(
  warn_deprecated(


content='中国的首都是北京。' response_metadata={'finish_reason': 'stop', 'logprobs': None}
