## Get the key

In [2]:
from apikey import apikey 
import os
os.environ['OPENAI_API_KEY'] = apikey

# 这个配置神了
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"


# LangChain: Models, Prompts and Output Parsers


Outline

 * API calls through LangChain:
   * Prompts
   * Models
   * Output parsers

In [3]:
# 从下面返回来看，OpenAI 和 ChatOpenAI 差别不大。只是模型不同，部分参数不太一样；
# OpenAI 默认为 text-davinci-003，即 基于大模型的文本处理，价格比gpt3.5贵一点
# ChatOpenAI 默认为 gpt-3.5-turbo，即 基于chat的大模型，一般有chat需求调用这个就行。
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
# 不过还是有些些微差别的，见官方文档：https://python.langchain.com/docs/get_started/quickstart

from langchain.prompts import ChatPromptTemplate

## Prompts

In [17]:
# 海盗邮件
customer_email = """
Arrr, I be fuming that me blender lid \
flew off and splattered me kitchen walls \
with smoothie! And to make matters worse,\
the warranty don't cover the cost of \
cleaning up me kitchen. I need yer help \
right now, matey!
"""

customer_style = """American English \
in a calm and respectful tone
"""

service_reply = """Hey there customer, \
the warranty does not cover \
cleaning expenses for your kitchen \
because it's your fault that \
you misused your blender \
by forgetting to put the lid on before \
starting the blender. \
Tough luck! See ya!
"""

service_style_pirate = """\
a polite tone \
that speaks in English Pirate\
"""


In [14]:
# prompt 模板
template_string = """Translate the text \
that is delimited by triple backticks \
into a style that is {style}. \
text: ```{text}```
"""

prompt_template = ChatPromptTemplate.from_template(template_string)


In [15]:
# 传参1
customer_messages = prompt_template.format_messages(
    style = customer_style,
    text = customer_email
)

print(type(customer_messages))
print(type(customer_messages[0]))
print(customer_messages[0])

<class 'list'>
<class 'langchain.schema.HumanMessage'>
content="Translate the text that is delimited by triple backticks into a style that is American English in a calm and respectful tone\n. text: ```\nArrr, I be fuming that me blender lid flew off and splattered me kitchen walls with smoothie! And to make matters worse,the warranty don't cover the cost of cleaning up me kitchen. I need yer help right now, matey!\n```\n" additional_kwargs={} example=False


In [18]:
# 传参2

service_messages = prompt_template.format_messages(
    style = service_style_pirate,
    text = service_reply)

print(type(service_messages))
print(type(service_messages[0]))
print(service_messages[0])



<class 'list'>
<class 'langchain.schema.HumanMessage'>
content="Translate the text that is delimited by triple backticks into a style that is a polite tone that speaks in English Pirate. text: ```Hey there customer, the warranty does not cover cleaning expenses for your kitchen because it's your fault that you misused your blender by forgetting to put the lid on before starting the blender. Tough luck! See ya!\n```\n" additional_kwargs={} example=False


## Model

In [12]:
# 调用大模型对邮件进行风格润色
chat = ChatOpenAI(temperature=0.0)


In [19]:
customer_response = chat(customer_messages)
customer_response.content

"I'm really frustrated that my blender lid flew off and made a mess of my kitchen walls with smoothie! And to make things even worse, the warranty doesn't cover the cost of cleaning up my kitchen. I could really use your help at this moment, my friend!"

In [21]:
service_response = chat(service_messages)
service_response.content

"Ahoy there, matey! I regret to inform ye that the warranty be not coverin' the costs o' cleanin' yer galley, as 'tis yer own fault fer misusin' yer blender by forgettin' to secure the lid afore startin' it. Aye, tough luck, me heartie! Fare thee well!"

## Output Parsers

对输出进行格式化指定，将LLM的输出解析为python 字典类型（json格式）

In [22]:
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

In [35]:
from typing import List
# 先生成键值对的schema

# 步骤1：response_schemas
gift_schema = ResponseSchema(name="gift",
                             description="Was the item purchased\
                             as a gift for someone else? \
                             Answer True if yes,\
                             False if not or unknown.")

delivery_days_schema = ResponseSchema(name="delivery_days",
                                      description="How many days\
                                      did it take for the product\
                                      to arrive? If this \
                                      information is not found,\
                                      output -1.")

price_value_schema = ResponseSchema(name="price_value",
                                    description="Extract any\
                                    sentences about the value or \
                                    price, and output them as a \
                                    comma separated Python list.")

response_schemas = [gift_schema, 
                    delivery_days_schema,
                    price_value_schema]
print("步骤1（生成 response_schemas列表）：",response_schemas)


# 步骤2：构建 output_parser
# 这里通过 StructuredOutputParser 先生成一个解析器，解析器有两个用处：
# 用处1：在生成prompt 的时候会按照 解析器的要求进行生成；
# 用处2：在llm 生成结果的时候，解析器又会对结果进行格式化；（类似于：解析器产生一个标准，llm按照这个标准生成，则解析器就可以直接解析）
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
print("步骤2（构建解析器）：",output_parser)

# 步骤3：生成prompt 提示词
format_instructions = output_parser.get_format_instructions()
print("步骤3（生成prompt 提示词）：",format_instructions)

# 而 format_instructions 是通过langchain StructuredOutputParser对这部分进行了比较好的优化
customer_review = """\
This leaf blower is pretty amazing.  It has four settings:\
candle blower, gentle breeze, windy city, and tornado. \
It arrived in two days, just in time for my wife's \
anniversary present. \
I think my wife liked it so much she was speechless. \
So far I've been the only one using it, and I've been \
using it every other morning to clear the leaves on our lawn. \
It's slightly more expensive than the other leaf blowers \
out there, but I think it's worth it for the extra features.
"""

review_template = """\
For the following text, extract the following information:

gift: Was the item purchased as a gift for someone else? \
Answer True if yes, False if not or unknown.

delivery_days: How many days did it take for the product\
to arrive? If this information is not found, output -1.

price_value: Extract any sentences about the value or price,\
and output them as a comma separated Python list.

text: {text}

{format_instructions}
"""

# 这样就形成了更加好的prompt模板
prompt = ChatPromptTemplate.from_template(template=review_template)

# 这里把 变量传进去之后，就形成了最终的prompt
messages = prompt.format_messages(text=customer_review, 
                                format_instructions=format_instructions)
print(messages[0].content)


# 步骤4：将 messages 传入，生成返回，直接用解析器解析
response = chat(messages)
output_dict = output_parser.parse(response.content)
print("步骤4（对生成的返回用解析器解析）：",output_dict)
print(type(output_dict))

步骤1（生成 response_schemas列表）： [ResponseSchema(name='gift', description='Was the item purchased                             as a gift for someone else?                              Answer True if yes,                             False if not or unknown.'), ResponseSchema(name='delivery_days', description='How many days                                      did it take for the product                                      to arrive? If this                                       information is not found,                                      output -1.'), ResponseSchema(name='price_value', description='Extract any                                    sentences about the value or                                     price, and output them as a                                     comma separated Python list.')]
步骤2（构建解析器）： response_schemas=[ResponseSchema(name='gift', description='Was the item purchased                             as a gift for someone else?                              Answer True

# LangChain: Memory

## Outline
* ConversationBufferMemory
* ConversationBufferWindowMemory
* ConversationTokenBufferMemory
* ConversationSummaryMemory

## ConversationBufferMemory

In [5]:
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory

In [6]:
llm_for_memory = ChatOpenAI(temperature=0.0)
memory = ConversationBufferMemory()
conversation = ConversationChain(
    llm = llm_for_memory,
    memory = memory,
    verbose = True
)

In [7]:
conversation.predict(input="Hi, my name is Andrew")
conversation.predict(input="What is 1+1?")
conversation.predict(input="What is my name?")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

Human: Hi, my name is Andrew
AI:[0m

[1m> Finished chain.[0m


[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi, my name is Andrew
AI: Hello Andrew! It's nice to meet you. How can I assist you today?
Human: What is 1+1?
AI:[0m

[1m> Finished chain.[0m


[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe 

'Your name is Andrew.'

In [63]:
print(memory.buffer)

Human: Hi, my name is Andrew
AI: Hello Andrew, it's nice to meet you. My name is AI. How can I assist you today?
Human: What is 1+1?
AI: The answer to 1+1 is 2.
Human: What is my name?
AI: Your name is Andrew, as you mentioned earlier.


In [64]:
# 实际上返回的就是 memory.buffer，只不过这里转成了字典类型，并加了key 为 history
memory.load_memory_variables({})



{'history': "Human: Hi, my name is Andrew\nAI: Hello Andrew, it's nice to meet you. My name is AI. How can I assist you today?\nHuman: What is 1+1?\nAI: The answer to 1+1 is 2.\nHuman: What is my name?\nAI: Your name is Andrew, as you mentioned earlier."}

In [65]:
memory = ConversationBufferMemory()
memory.save_context({"input": "Hi"}, 
                    {"output": "What's up"})
print(memory.buffer)


Human: Hi
AI: What's up


In [66]:
memory.load_memory_variables({})

{'history': "Human: Hi\nAI: What's up"}

In [67]:
memory.save_context({"input": "Not much, just hanging"}, 
                    {"output": "Cool"})
memory.load_memory_variables({})

{'history': "Human: Hi\nAI: What's up\nHuman: Not much, just hanging\nAI: Cool"}

## ConversationBufferWindowMemory

In [68]:
from langchain.memory import ConversationBufferWindowMemory

In [69]:
memory = ConversationBufferWindowMemory(k=1)

# cc：似乎这里按照k的长度留最近的memory


In [70]:
memory.save_context({"input": "Hi"},
                    {"output": "What's up"})
memory.save_context({"input": "Not much, just hanging"},
                    {"output": "Cool"})

In [71]:
memory.load_memory_variables({})

{'history': 'Human: Not much, just hanging\nAI: Cool'}

In [72]:
memory = ConversationBufferWindowMemory(k=1)
conversation = ConversationChain(
    llm=llm_for_memory, 
    memory = memory,
    verbose=False
)


In [73]:
# 因为这里只保留了1个长度的memory，所以之前对话内容丢掉了。
conversation.predict(input="Hi, my name is Andrew")
conversation.predict(input="What is 1+1?")
conversation.predict(input="What is my name?")

"I'm sorry, I don't have access to that information. Could you please tell me your name?"

## ConversationTokenBufferMemory

In [76]:
!pip list | grep tiktoken
# 注意：这里要求需要 tiktoken 包，可以检查下环境里面有没有，没有的话可以pip install

tiktoken                                 0.4.0


In [74]:
from langchain.memory import ConversationTokenBufferMemory


In [77]:
memory = ConversationTokenBufferMemory(llm= llm_for_memory, max_token_limit=30)
memory.save_context({"input": "AI is what?!"},
                    {"output": "Amazing!"})
memory.save_context({"input": "Backpropagation is what?"},
                    {"output": "Beautiful!"})
memory.save_context({"input": "Chatbots are what?"}, 
                    {"output": "Charming!"})


In [81]:
memory.load_memory_variables({}).get('history')

'AI: Beautiful!\nHuman: Chatbots are what?\nAI: Charming!'

## ConversationSummaryMemory

In [83]:
from langchain.memory import ConversationSummaryBufferMemory


In [87]:
# create a long string
schedule = "There is a meeting at 8am with your product team. \
You will need your powerpoint presentation prepared. \
9am-12pm have time to work on your LangChain \
project which will go quickly because Langchain is such a powerful tool. \
At Noon, lunch at the italian resturant with a customer who is driving \
from over an hour away to meet you to understand the latest in AI. \
Be sure to bring your laptop to show the latest LLM demo."

memory = ConversationSummaryBufferMemory(llm=llm_for_memory, max_token_limit=100)


In [90]:
memory.save_context({"input": "Hello"}, {"output": "What's up"})
memory.save_context({"input": "Not much, just hanging"},
                    {"output": "Cool"})
memory.save_context({"input": "What is on the schedule today?"}, 
                    {"output": f"{schedule}"})

In [91]:
memory.load_memory_variables({})

{'history': 'System: The human and AI engage in small talk before the human asks about their schedule for the day. The AI informs the human of a meeting with their product team at 8am, time to work on their LangChain project, and a lunch meeting with a customer to discuss the latest in AI.'}

In [92]:
conversation = ConversationChain(
    llm=llm_for_memory, 
    memory = memory,
    verbose=True
)

In [93]:
conversation.predict(input="What would be a good demo to show?")




[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
System: The human and AI engage in small talk before the human asks about their schedule for the day. The AI informs the human of a meeting with their product team at 8am, time to work on their LangChain project, and a lunch meeting with a customer to discuss the latest in AI.
Human: What would be a good demo to show?
AI:[0m

[1m> Finished chain.[0m


'Based on our previous conversations with the customer, they seem particularly interested in natural language processing and sentiment analysis. Perhaps we could showcase our latest advancements in those areas. Additionally, they have expressed interest in machine learning algorithms for predictive analytics, so we could also demonstrate our capabilities in that field. Would you like me to gather more information on their specific interests to tailor the demo even further?'

In [94]:
memory.load_memory_variables({})

{'history': 'System: The human and AI engage in small talk before the human asks about their schedule for the day. The AI informs the human of a meeting with their product team at 8am, time to work on their LangChain project, and a lunch meeting with a customer to discuss the latest in AI.\nHuman: What would be a good demo to show?\nAI: Based on our previous conversations with the customer, they seem particularly interested in natural language processing and sentiment analysis. Perhaps we could showcase our latest advancements in those areas. Additionally, they have expressed interest in machine learning algorithms for predictive analytics, so we could also demonstrate our capabilities in that field. Would you like me to gather more information on their specific interests to tailor the demo even further?'}

# Chains in LangChain

## Outline

* LLMChain
* Sequential Chains
  * SimpleSequentialChain
  * SequentialChain
* Router Chain

In [17]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
df = pd.read_csv('data/Data.csv')

## LLMChain

可以作为最小单元子chain

In [8]:
from langchain.chains import LLMChain

In [9]:
llm_for_chains = ChatOpenAI(temperature=0.9)

prompt = ChatPromptTemplate.from_template(
    "What is the best name to describe \
    a company that makes {product}?"
)

chain = LLMChain(llm=llm_for_chains, prompt=prompt)

product = "Queen Size Sheet Set"
chain.run(product)

'RoyalRest'

## SimpleSequentialChain

简单顺序链，上一个chain的返回作为下一个chain的输入，最后只返回最后chain的结果

In [11]:
from langchain.chains import SimpleSequentialChain

In [12]:
# chain 1
first_prompt = ChatPromptTemplate.from_template(
    "What is the best name to describe \
    a company that makes {product}?"
)
chain_one = LLMChain(llm=llm_for_chains, prompt=first_prompt)


# chain 2
second_prompt = ChatPromptTemplate.from_template(
    "Write a 20 words description for the following \
    company:{company_name}"
)
chain_two = LLMChain(llm=llm_for_chains, prompt=second_prompt)

overall_simple_chain = SimpleSequentialChain(chains=[chain_one, chain_two],
                                             verbose=True
                                             )

product = "Queen Size Sheet Set"
overall_simple_chain.run(product)



[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3mRegalRest Bedding[0m
[33;1m[1;3mRegalRest Bedding is a premium bedding company offering luxurious and comfortable sleep solutions for a restful night's sleep.[0m

[1m> Finished chain.[0m


"RegalRest Bedding is a premium bedding company offering luxurious and comfortable sleep solutions for a restful night's sleep."

## SequentialChain

顺序链，可以输出中间任何chain的结果；

In [14]:
from langchain.chains import SequentialChain

In [13]:
# prompt template 1
first_prompt = ChatPromptTemplate.from_template(
    "Translate the following review to english:"
    "\n\n{Review}"
)
# chain 1: input-> Review | output->English_Review
chain_one = LLMChain(llm=llm_for_chains, prompt=first_prompt, 
                     output_key="English_Review"
                    )

# prompt template 2
second_prompt = ChatPromptTemplate.from_template(
    "Can you summarize the following review in 1 sentence:"
    "\n\n{English_Review}"
)
# chain 2: input-> English_Review | output-> summary
chain_two = LLMChain(llm=llm_for_chains, prompt=second_prompt, 
                     output_key="summary"
                    )

# prompt template 3
third_prompt = ChatPromptTemplate.from_template(
    "What language is the following review:\n\n{Review}"
)
# chain 3: input-> Review | output-> language
chain_three = LLMChain(llm=llm_for_chains, prompt=third_prompt,
                       output_key="language"
                      )

# prompt template 4
fourth_prompt = ChatPromptTemplate.from_template(
    "Write a follow up response to the following "
    "summary in the specified language:"
    "\n\nSummary: {summary}\n\nLanguage: {language}"
)
# chain 4: input-> summary, language | output-> followup_message
chain_four = LLMChain(llm=llm_for_chains, prompt=fourth_prompt,
                      output_key="followup_message"
                     )


In [15]:
overall_chain = SequentialChain(
    chains=[chain_one, chain_two, chain_three, chain_four],
    input_variables=['Review'],

    # 这里output 是指通过 SequentialChain 之后总共需要output出来的内容；
    output_variables=['English_Review', 'summary', 'followup_message'],
    verbose=True
)

In [18]:
review = df.Review[5]
review

"Je trouve le goût médiocre. La mousse ne tient pas, c'est bizarre. J'achète les mêmes dans le commerce et le goût est bien meilleur...\nVieux lot ou contrefaçon !?"

In [19]:
overall_chain(review)



[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


{'Review': "Je trouve le goût médiocre. La mousse ne tient pas, c'est bizarre. J'achète les mêmes dans le commerce et le goût est bien meilleur...\nVieux lot ou contrefaçon !?",
 'English_Review': "I find the taste mediocre. The foam doesn't last, it's strange. I buy the same ones from the store and the taste is much better...\nOld batch or counterfeit!?",
 'summary': 'The reviewer is disappointed with the taste and lack of foam in the product, suspecting it might be an old batch or counterfeit compared to the ones purchased from the store.',
 'followup_message': "Réponse de suivi : Nous sommes désolés d'apprendre que vous êtes déçu par le goût et l'absence de mousse de notre produit. Nous tenons à vous assurer que tous nos produits sont fabriqués avec soin et avec les ingrédients les plus frais possible. Il est possible que le lot que vous avez reçu soit plus ancien ou qu'il s'agisse d'une contrefaçon, ce qui expliquerait la différence de goût et de texture par rapport à ceux achetés 

## Router Chain


In [21]:
from langchain.chains.router import MultiPromptChain
from langchain.chains.router.llm_router import LLMRouterChain, RouterOutputParser
from langchain.prompts import PromptTemplate

In [22]:
physics_template = """You are a very smart physics professor. \
You are great at answering questions about physics in a concise\
and easy to understand manner. \
When you don't know the answer to a question you admit\
that you don't know.

Here is a question:
{input}"""

math_template = """You are a very good mathematician. \
You are great at answering math questions. \
You are so good because you are able to break down \
hard problems into their component parts, 
answer the component parts, and then put them together\
to answer the broader question.

Here is a question:
{input}"""

history_template = """You are a very good historian. \
You have an excellent knowledge of and understanding of people,\
events and contexts from a range of historical periods. \
You have the ability to think, reflect, debate, discuss and \
evaluate the past. You have a respect for historical evidence\
and the ability to make use of it to support your explanations \
and judgements.

Here is a question:
{input}"""

computerscience_template = """ You are a successful computer scientist.\
You have a passion for creativity, collaboration,\
forward-thinking, confidence, strong problem-solving capabilities,\
understanding of theories and algorithms, and excellent communication \
skills. You are great at answering coding questions. \
You are so good because you know how to solve a problem by \
describing the solution in imperative steps \
that a machine can easily interpret and you know how to \
choose a solution that has a good balance between \
time complexity and space complexity. 

Here is a question:
{input}"""

In [23]:
prompt_infos = [
    {
        "name": "physics", 
        "description": "Good for answering questions about physics", 
        "prompt_template": physics_template
    },
    {
        "name": "math", 
        "description": "Good for answering math questions", 
        "prompt_template": math_template
    },
    {
        "name": "History", 
        "description": "Good for answering history questions", 
        "prompt_template": history_template
    },
    {
        "name": "computer science", 
        "description": "Good for answering computer science questions", 
        "prompt_template": computerscience_template
    }
]

destination_chains = {}

for p_info in prompt_infos:
    name = p_info['name']
    prompt_template = p_info['prompt_template']
    prompt = ChatPromptTemplate.from_template(template=prompt_template)
    chain = LLMChain(llm=llm_for_chains, prompt=prompt)
    destination_chains[name] = chain

destination = [f"{p['name']}: {p['description']}" for p in prompt_infos]
destination_str = "\n".join(destination)

In [24]:
default_prompt = ChatPromptTemplate.from_template("{input}")
default_chain = LLMChain(llm=llm_for_chains, prompt=default_prompt)

In [27]:
MULTI_PROMPT_ROUTER_TEMPLATE = """Given a raw text input to a \
language model select the model prompt best suited for the input. \
You will be given the names of the available prompts and a \
description of what the prompt is best suited for. \
You may also revise the original input if you think that revising\
it will ultimately lead to a better response from the language model.

<< FORMATTING >>
Return a markdown code snippet with a JSON object formatted to look like:
```json
{{{{
    "destination": string \ name of the prompt to use or "DEFAULT"
    "next_inputs": string \ a potentially modified version of the original input
}}}}
```

REMEMBER: "destination" MUST be one of the candidate prompt \
names specified below OR it can be "DEFAULT" if the input is not\
well suited for any of the candidate prompts.
REMEMBER: "next_inputs" can just be the original input \
if you don't think any modifications are needed.

<< CANDIDATE PROMPTS >>
{destinations}

<< INPUT >>
{{input}}

<< OUTPUT (remember to include the ```json)>>"""

In [28]:
router_template = MULTI_PROMPT_ROUTER_TEMPLATE.format(
    destinations=destination_str
)

router_prompt = PromptTemplate(
    template=router_template,
    input_variables=['input'],
    output_parser=RouterOutputParser(),
)

router_chain = LLMRouterChain.from_llm(llm=llm_for_chains, prompt=router_prompt)

In [29]:
chain = MultiPromptChain(router_chain=router_chain,
                         destination_chains=destination_chains,
                         default_chain=default_chain,
                         verbose=True
                         )

In [31]:
response1 = chain.run("What is black body radiation?")
response2 = chain.run("what is 2 + 2")

response1, response2



[1m> Entering new MultiPromptChain chain...[0m
physics: {'input': 'What is black body radiation?'}
[1m> Finished chain.[0m


[1m> Entering new MultiPromptChain chain...[0m
math: {'input': 'what is 2 + 2'}
[1m> Finished chain.[0m


('Black body radiation refers to the electromagnetic radiation emitted by an object that absorbs all incident radiation without reflecting or transmitting any. It is called "black body" because an idealized object that perfectly absorbs all radiation across all wavelengths and does not reflect or transmit any would appear completely black.\n\nAccording to Planck\'s law, the intensity and distribution of radiation emitted by a black body depends on its temperature. As the temperature increases, the emitted radiation shifts to shorter wavelengths and becomes more intense. This relationship between temperature and the distribution of emitted radiation is known as Planck\'s black body radiation law.\n\nBlack body radiation has a characteristic spectrum that depends solely on the temperature of the object emitting it. At low temperatures, the radiation is mostly in the infrared range, while at higher temperatures, it extends into visible light and even ultraviolet or higher-energy regions.\

In [32]:
chain.run("Why does every cell in our body contain DNA?")



[1m> Entering new MultiPromptChain chain...[0m
None: {'input': 'Why does every cell in our body contain DNA?'}
[1m> Finished chain.[0m


'Every cell in our body contains DNA because DNA carries the genetic information that determines our physical characteristics and controls the functions of our cells. DNA stands for deoxyribonucleic acid, and it is a long molecule made up of nucleotides. These nucleotides contain the genetic instructions that code for proteins, which are essential for the structure, function, and regulation of our body.\n\nEach cell in our body needs to carry the complete set of genetic information to carry out its specific functions. This includes cells that make up our muscles, skin, organs, blood, and even cells in our immune system. While different cells express different genes, they all have the same basic genetic blueprint stored in the DNA.\n\nDuring the process of cell division, DNA is replicated so that each new cell contains a full copy of the genetic information. This ensures that the newly formed cells inherit the same genetic instructions and can continue to function properly. Additionally


这个的用法还是挺高级的，在多分类任务中用的到。比如在数据分析领域，有几个分类的任务：
- 数据查询；（调用查询API）
- 数据处理；（数据处理这一步因为存在数据泄露风险，还是主要以调接口为主）
- 绘图；（调用绘图API，但是如果统一调用API的话，实际上就不是多分类的任务了）

# LangChain: Q&A over Documents

An example might be a tool that would allow you to query a product catalog for items of interest.

这个功能主要是基于某个大型文档进行问答的，主要的逻辑是将文档进行向量化存储，且有index索引，在文档查询过程中可以快速进行匹配。

不过暂时还用不太上，后面再仔细学习下。

In [2]:
from langchain.chains import RetrievalQA # 可以对文档进行检索
from langchain.document_loaders import CSVLoader

 # 向量存储，这里是用内存中的doc数组搜索向量存储，因为是在内存中的所以不需要链接到任何外部向量数据库，非常容易入门（当然还有其他类型向量存储）
from langchain.vectorstores import DocArrayInMemorySearch

# 向量存储索引创建器，可以帮我们非常容易地创建一个向量存储
from langchain.indexes import VectorstoreIndexCreator
from IPython.display import display, Markdown

In [3]:
file = 'data/OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)
loader

<langchain.document_loaders.csv_loader.CSVLoader at 0x1197f2bc0>

In [4]:
index = VectorstoreIndexCreator(
    # 指定向量存储类，这里是 DocArrayInMemorySearch
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])



In [5]:
query ="Please list all your shirts with sun protection \
in a table in markdown and summarize each one."

In [6]:
response = index.query(query)

In [10]:
display(Markdown(response))



| Name | Description |
| --- | --- |
| Men's Tropical Plaid Short-Sleeve Shirt | UPF 50+ rated, 100% polyester, wrinkle-resistant, front and back cape venting, two front bellows pockets |
| Men's Plaid Tropic Shirt, Short-Sleeve | UPF 50+ rated, 52% polyester and 48% nylon, machine washable and dryable, front and back cape venting, two front bellows pockets |
| Men's TropicVibe Shirt, Short-Sleeve | UPF 50+ rated, 71% Nylon, 29% Polyester, 100% Polyester knit mesh, machine wash and dry, front and back cape venting, two front bellows pockets |
| Sun Shield Shirt by | UPF 50+ rated, 78% nylon, 22% Lycra Xtra Life fiber, handwash, line dry, wicks moisture, fits comfortably over swimsuit, abrasion resistant |

All four shirts provide UPF 50+ sun protection, blocking 98% of the sun's harmful rays. The Men's Tropical Plaid Short-Sleeve Shirt is made of 100% polyester and is wrinkle-resistant

In [11]:
docs = loader.load()
docs[0]

Document(page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \n\nQuestions? Please contact us for any inquiries.", metadata={'source': 'data/OutdoorClothingCatalog_1000.csv', 'row': 0})

In [12]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [13]:
embed = embeddings.embed_query("Hi my name is Harrison")
print(len(embed))

1536


In [None]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)

# LangChain: Evaluation

## Outline:

* Example generation
* Manual evaluation (and debuging)
* LLM-assisted evaluation

这部分还没有很好的理解，后期应该会用到，后面回头再来看一下。

In [39]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

## Create our QandA application

In [43]:
from langchain.chains import RetrievalQA
from langchain.document_loaders import CSVLoader, TextLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import CharacterTextSplitter


In [49]:
file = 'data/OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)
data = loader.load()


In [51]:
# 不知道这边为啥就崩了
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

# 后来用配置解决了：
# import os
# os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

In [52]:
llm = ChatOpenAI(temperature = 0.0)
qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=index.vectorstore.as_retriever(), 
    verbose=True,
    chain_type_kwargs = {
        "document_separator": "<<<<>>>>>"
    }
)

### Coming up with test datapoints


In [53]:
data[10]

Document(page_content=": 10\nname: Cozy Comfort Pullover Set, Stripe\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\n\nSize & Fit\n- Pants are Favorite Fit: Sits lower on the waist.\n- Relaxed Fit: Our most generous fit sits farthest from the body.\n\nFabric & Care\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\n\nAdditional Features\n- Relaxed fit top with raglan sleeves and rounded hem.\n- Pull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg.\n\nImported.", metadata={'source': 'data/OutdoorClothingCatalog_1000.csv', 'row': 10})

### Hard-coded examples

In [28]:
examples = [
    {
        "query": "Do the Cozy Comfort Pullover Set\
        have side pockets?",
        "answer": "Yes"
    },
    {
        "query": "What collection is the Ultra-Lofty \
        850 Stretch Down Hooded Jacket from?",
        "answer": "The DownTek collection"
    }
]

### LLM-Generated examples

In [54]:
from langchain.evaluation.qa import QAGenerateChain

In [55]:
example_gen_chain = QAGenerateChain.from_llm(ChatOpenAI())

new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in data[:5]]
)



In [56]:
new_examples[0]

{'query': "What is the weight of one pair of Women's Campside Oxfords?",
 'answer': "The approximate weight of one pair of Women's Campside Oxfords is 1 lb.1 oz."}

In [57]:
data[0]

Document(page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \n\nQuestions? Please contact us for any inquiries.", metadata={'source': 'data/OutdoorClothingCatalog_1000.csv', 'row': 0})

### Combine examples

In [58]:
examples += new_examples

In [59]:
qa.run(examples[0]["query"])



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


'The Cozy Comfort Pullover Set, Stripe has side pockets.'

## Manual Evaluation

In [60]:
import langchain
langchain.debug = True

In [61]:
qa.run(examples[0]["query"])

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Do the Cozy Comfort Pullover Set        have side pockets?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 2:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 2:chain:StuffDocumentsChain > 3:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Do the Cozy Comfort Pullover Set        have side pockets?",
  "context": ": 10\nname: Cozy Comfort Pullover Set, Stripe\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\n\nSize & Fit\n- Pants are Favorite Fit: Sits lower on the waist.\n- Relaxed Fit: Our most generous fit sits farthest from the body.\n\nFabric & Care\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\

'The Cozy Comfort Pullover Set, Stripe has side pockets on the pull-on pants.'

In [62]:
# Turn off the debug mode
langchain.debug = False

# LangChain: Agents

## Outline:

* Using built in LangChain tools: DuckDuckGo search and Wikipedia
* Defining your own tools

代理是可以使用各种工具实现更智能流程上的自动化，这部分暂时还用不上。后面有需求再详细了解。