# 安装 LangChain

In [None]:
!pip install langchain[llms]

Collecting langchain[llms]
  Downloading langchain-0.0.314-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain[llms])
  Downloading dataclasses_json-0.6.1-py3-none-any.whl (27 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain[llms])
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting langsmith<0.1.0,>=0.0.43 (from langchain[llms])
  Downloading langsmith-0.0.43-py3-none-any.whl (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.0/40.0 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
Collecting clarifai>=9.1.0 (from langchain[llms])
  Downloading clarifai-9.9.2-py3-none-any.whl (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m58.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cohere<5,>=4 (from langchain[llms])
  Downloading cohere-4.27-py3-none-any.whl (47

# 设置 OpenAI Key

In [None]:
import os
os.environ['OPENAI_API_KEY'] = ''

# 2. Prompt Templates（提示词模板）

## PromptTemplate

In [None]:
from langchain import OpenAI
from langchain import PromptTemplate

template = '请帮我生成{language}的打印"hello world"的代码示例'
prompt_tpl = PromptTemplate.from_template(template)

llm = OpenAI(temperature=0.1)
for language in ['Python', 'Java', 'C++']:
    prompt = prompt_tpl.format(language=language)
    print(f'{"="* 10} {language} {"="* 10}')
    print(llm(prompt))




print("Hello World")


public class HelloWorld {
    public static void main(String[] args) {
        System.out.println("Hello World!");
    }
}


#include <iostream>

int main()
{
    std::cout << "Hello World!" << std::endl;
    return 0;
}


## Partial Prompt Templates

In [None]:
from langchain.prompts import PromptTemplate

prompt_tpl = PromptTemplate.from_template(
    '请给你我讲{num}个关于{type}的笑话,'
    '并且不要出现{location1}和{location2}'
)

partial_prompt1 = prompt_tpl.partial(num='3')
partial_prompt2 = partial_prompt1.partial(
    location1='办公室',
    location2='学校'
)
print(partial_prompt2.format(type='程序员'))

请给你我讲3个关于程序员的笑话,并且不要出现办公室和学校


In [None]:
from datetime import datetime
from langchain.prompts import PromptTemplate


def get_date():
    return datetime.now().strftime('%Y-%m-%d')


prompt_tpl = PromptTemplate.from_template(
    '你是一个优秀的{role}助手，你的知识库截止日期是{date}'
)

partial_prompt = prompt_tpl.partial(date=get_date)
print(partial_prompt.format(role='AI'))

你是一个优秀的AI助手，你的知识库截止日期是2023-10-08


In [None]:
from datetime import datetime
from langchain.prompts import PromptTemplate


def get_date():
    return datetime.now().strftime('%Y-%m-%d')


prompt_tpl = PromptTemplate.from_template(
    '你是一个优秀的{role}助手，你的知识库截止日期是{date}',
    partial_variables={'date': get_date}
)

print(prompt_tpl.format(role='AI'))

你是一个优秀的AI助手，你的知识库截止日期是2023-10-08


## PipelinePromptTemplate

In [None]:
from langchain.prompts.prompt import PromptTemplate
from langchain.prompts.pipeline import PipelinePromptTemplate

full_template = '''
{expect}
{example}
{question}
'''
full_prompt = PromptTemplate.from_template(full_template)

expect_prompt = PromptTemplate.from_template(
    '请学习我给定的例子，并判断我给出的提问：'
)

example_prompt = PromptTemplate.from_template('''"""
文本：今天天气阳光明媚，真好！
情绪：正向

文本：今天又下雨了，天气真糟糕！
情绪：反向

文本：今天衣服又弄脏了！
情绪：反向
"""''')

question_prompt = PromptTemplate.from_template('''
文本：{input}！
情绪：
''')

input_prompts = [
    ('expect', expect_prompt),
    ('example', example_prompt),
    ('question', question_prompt)
]

pipeline_prompt = PipelinePromptTemplate(
    final_prompt=full_prompt,
    pipeline_prompts=input_prompts
)

print(pipeline_prompt.format(input='今天又被批评了'))


请学习我给定的例子，并判断我给出的提问：
"""
文本：今天天气阳光明媚，真好！
情绪：正向

文本：今天又下雨了，天气真糟糕！
情绪：反向

文本：今天衣服又弄脏了！
情绪：反向
"""

文本：今天又被批评了！
情绪：




## FewShotPromptTemplate

In [None]:
from langchain.prompts.prompt import PromptTemplate
from langchain.prompts.few_shot import FewShotPromptTemplate

example_prompt = PromptTemplate.from_template(
    '文本：{text}\n情绪：{mood}'
)

examples = [
    {'text': '今天天气阳光明媚，真好！', 'mood': '正向'},
    {'text': '今天又下雨了，天气真糟糕！', 'mood': '反向'},
    {'text': '今天衣服又弄脏了！', 'mood': '反向'},
]

prompt = FewShotPromptTemplate(
    prefix='请学习我给定的例子，并判断我给出的提问：\n"""',
    example_prompt=example_prompt,
    examples=examples,
    suffix='"""\n文本：{input}！\n情绪：',
    input_variables=['input']
)

print(prompt.format(input='今天又被批评了！'))

请学习我给定的例子，并判断我给出的提问：
"""

文本：今天天气阳光明媚，真好！
情绪：正向

文本：今天又下雨了，天气真糟糕！
情绪：反向

文本：今天衣服又弄脏了！
情绪：反向

"""
文本：今天又被批评了！！
情绪：


## 自定义 Prompt Template

In [None]:
import types
import random

from langchain.utils.formatting import formatter
from langchain.prompts.base import StringPromptTemplate


class FunctionPromptTemplate(StringPromptTemplate):
    template = ''

    def format(self, **kwargs) -> str:
        kwargs = self._merge_partial_and_user_variables(**kwargs)

        for key, value in kwargs.items():
            if isinstance(value, types.FunctionType):
                kwargs[key] = value()

        return formatter.format(self.template, **kwargs)

    @property
    def _prompt_type(self) -> str:
        return 'function_prompt'


def get_num():
    return random.randint(0, 10)


prompt = FunctionPromptTemplate(
    template='请给你我讲{num}个笑话',
    input_variables=['num'],
)


print(prompt.format(num=get_num))


请给你我讲7个笑话


## Prompt Template 的序列化与反序列化

In [None]:
from langchain.prompts import PromptTemplate
from langchain.prompts.loading import load_prompt

prompt = PromptTemplate(
    template='请给你我讲{num}个笑话',
    input_variables=['num'],
)
prompt.save('prompt.json')

prompt = load_prompt('prompt.json')
print(prompt)

input_variables=['num'] template='请给你我讲{num}个笑话'


## ChatPromptTemplate

In [None]:
from langchain.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatMessagePromptTemplate
)

messages = [
    SystemMessagePromptTemplate.from_template(
        '你的名字是{name}'),
    HumanMessagePromptTemplate.from_template(
        '你叫什么名字？')
]

# 或者使用 ChatMessagePromptTemplate.from_template 方法
# 但是需要设置对应的 role 参数
# messages = [
#     ChatMessagePromptTemplate.from_template(
#         '你的名字是{name}', role='system'),
#     ChatMessagePromptTemplate.from_template(
#         '你叫什么名字？', role='human')
# ]

prompt_tpl = ChatPromptTemplate(
    messages=messages,
    input_variables=['name']
)

print(prompt_tpl)

input_variables=['name'] messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['name'], template='你的名字是{name}')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='你叫什么名字？'))]


In [None]:
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI

messages = [
    ('system', '你的名字是{name}'),
    ('human', '你叫什么名字？')
]

prompt_tpl = ChatPromptTemplate.from_messages(
    messages=messages
)

prompt = prompt_tpl.format_messages(name='小明')

llm = ChatOpenAI()
print(llm(prompt))


content='我是小明。'


## MessagesPlaceholder

In [None]:
from langchain.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder
)
from langchain.schema import AIMessage, HumanMessage
from langchain.chat_models import ChatOpenAI


messages = [
    SystemMessagePromptTemplate.from_template(
        '请用不超过{text_number}个字来总结以下对话'),
    MessagesPlaceholder(variable_name='context'),
    HumanMessagePromptTemplate.from_template(
        '###请开始总结上面的对话')
]

prompt_tpl = ChatPromptTemplate.from_messages(messages)

human_message = HumanMessage(content='如何学好英语？')
ai_message = AIMessage(
    content='学好英语需要每天持续实践，均衡地练习听、说、读、写四大技能，'
            '不断扩充词汇和掌握语法。利用现代技术工具可增强学习效果，'
            '考虑沉浸式学习方法并参与相关课程与学习小组。逐渐增加阅读难度，'
            '模仿优秀的英语说话者，定期反思并调整学习方法，'
            '并始终保持积极的学习态度。'
)

prompt_messages = prompt_tpl.format_messages(
    context=[human_message, ai_message], text_number=20)

for message in prompt_messages:
    print(repr(message))

llm = ChatOpenAI()
print('\n' + repr(llm(prompt_messages)))

SystemMessage(content='请用不超过20个字来总结以下对话')
HumanMessage(content='如何学好英语？')
AIMessage(content='学好英语需要每天持续实践，均衡地练习听、说、读、写四大技能，不断扩充词汇和掌握语法。利用现代技术工具可增强学习效果，考虑沉浸式学习方法并参与相关课程与学习小组。逐渐增加阅读难度，模仿优秀的英语说话者，定期反思并调整学习方法，并始终保持积极的学习态度。')
HumanMessage(content='###请开始总结上面的对话')

AIMessage(content='学好英语：坚持每天实践，全面练习，利用技术工具，沉浸式学习，模仿优秀者，积极态度。')


# Example Selector（示例选择器）

## FewShotChatMessagePromptTemplate

In [None]:
from langchain.prompts import (
    ChatPromptTemplate,
    FewShotChatMessagePromptTemplate
)

examples = [
    {'input': '2+2', 'output': '4'},
    {'input': '2+3', 'output': '5'},
]

example_prompt = ChatPromptTemplate.from_messages(
    [
        ('human', '{input}'),
        ('ai', '{output}'),
    ]
)
few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)

print(few_shot_prompt.format())


Human: 2+2
AI: 4
Human: 2+3
AI: 5


## LengthBasedExampleSelector

In [None]:
from langchain.prompts import PromptTemplate
from langchain.prompts import FewShotPromptTemplate
from langchain.prompts.example_selector import LengthBasedExampleSelector

example_prompt = PromptTemplate.from_template(
    'Input: {input}\noutput: {output}'
)

examples = [
    {'input': 'happy', 'output': 'sad'},
    {'input': 'tall', 'output': 'short'},
    {'input': 'hot', 'output': 'cold'},
    {'input': 'fast', 'output': 'slow'},
    {'input': 'rich', 'output': 'poor'},
]

example_selector = LengthBasedExampleSelector(
    example_prompt=example_prompt,
    examples=examples,
    max_length=12,
)

prompt = FewShotPromptTemplate(
    example_selector=example_selector,
    prefix='Please learn examples and answer questions:',
    example_prompt=example_prompt,
    suffix='Input: {input}\nOutput:',
    input_variables=['input']
)

print(prompt.format(input='open'))


Please learn examples and answer questions:

Input: happy
output: sad

Input: tall
output: short

Input: open
Output:


## SemanticSimilarityExampleSelector

In [None]:
!pip install chromadb
!pip install tiktoken

Collecting chromadb
  Downloading chromadb-0.4.14-py3-none-any.whl (448 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/448.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.4/448.1 kB[0m [31m2.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m448.1/448.1 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
Collecting chroma-hnswlib==0.7.3 (from chromadb)
  Downloading chroma_hnswlib-0.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m44.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fastapi>=0.95.2 (from chromadb)
  Downloading fastapi-0.103.2-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.3/66.3 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting uvicorn[standard]>=0.18.3 (from chrom

In [None]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import SemanticSimilarityExampleSelector
from langchain.prompts import FewShotPromptTemplate, PromptTemplate

examples = [
    {'input': 'happy', 'output': 'sad'},
    {'input': 'tall', 'output': 'short'},
    {'input': 'hot', 'output': 'cold'},
    {'input': 'fast', 'output': 'slow'},
    {'input': 'rich', 'output': 'poor'},
]

example_selector = SemanticSimilarityExampleSelector.from_examples(
    examples=examples,
    embeddings=OpenAIEmbeddings(),
    vectorstore_cls=Chroma,
    k=1
)

example_prompt = PromptTemplate.from_template(
    'Input: {input}\noutput: {output}'
)


prompt = FewShotPromptTemplate(
    example_selector=example_selector,
    prefix='Please learn examples and answer questions:',
    example_prompt=example_prompt,
    suffix='Input: {input}\nOutput:',
    input_variables=['input']
)

print(prompt.format(input='sunny'))




Please learn examples and answer questions:

Input: happy
output: sad

Input: sunny
Output:


## MaxMarginalRelevanceExampleSelector

In [None]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import MaxMarginalRelevanceExampleSelector
from langchain.prompts import FewShotPromptTemplate, PromptTemplate

examples = [
    {'input': 'happy', 'output': 'sad'},
    {'input': 'tall', 'output': 'short'},
    {'input': 'hot', 'output': 'cold'},
    {'input': 'fast', 'output': 'slow'},
    {'input': 'rich', 'output': 'poor'},
]

example_selector = MaxMarginalRelevanceExampleSelector.from_examples(
    examples=examples,
    embeddings=OpenAIEmbeddings(),
    vectorstore_cls=Chroma,
    k=2
)

example_prompt = PromptTemplate.from_template(
    'Input: {input}\noutput: {output}'
)


prompt = FewShotPromptTemplate(
    example_selector=example_selector,
    prefix='Please learn examples and answer questions:',
    example_prompt=example_prompt,
    suffix='Input: {input}\nOutput:',
    input_variables=['input']
)

print(prompt.format(input='sunny'))



Please learn examples and answer questions:

Input: happy
output: sad

Input: tall
output: short

Input: sunny
Output:


In [None]:
!pip install nltk



In [None]:
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.prompts import NGramOverlapExampleSelector


examples = [
    {'input': 'happy', 'output': 'sad'},
    {'input': 'tall', 'output': 'short'},
    {'input': 'hot', 'output': 'cold'},
    {'input': 'fast', 'output': 'slow'},
    {'input': 'rich', 'output': 'poor'},
]

example_prompt = PromptTemplate.from_template(
    'Input: {input}\noutput: {output}'
)

example_selector = NGramOverlapExampleSelector(
    examples=examples,
    example_prompt=example_prompt,
    threshold=-1,

)

prompt = FewShotPromptTemplate(
    example_selector=example_selector,
    prefix='Please learn examples and answer questions:',
    example_prompt=example_prompt,
    suffix='Input: {input}\nOutput:',
    input_variables=['input']
)

print(prompt.format(input='sunny'))


Please learn examples and answer questions:

Input: happy
output: sad

Input: tall
output: short

Input: hot
output: cold

Input: fast
output: slow

Input: rich
output: poor

Input: sunny
Output:


In [None]:
from langchain.prompts import NGramOverlapExampleSelector
from langchain.prompts import FewShotPromptTemplate, PromptTemplate

examples = [
    {'input': 'See Spot run.', 'output': 'Ver correr a Spot.'},
    {'input': 'My dog barks.', 'output': 'Mi perro ladra.'},
    {'input': 'Spot can run.', 'output': 'Spot puede correr.'},
]

example_prompt = PromptTemplate.from_template(
    'Input: {input}\noutput: {output}'
)

example_selector = NGramOverlapExampleSelector(
    examples=examples,
    example_prompt=example_prompt,
    threshold=-1,

)

prompt = FewShotPromptTemplate(
    example_selector=example_selector,
    prefix='Please learn examples and answer questions:',
    example_prompt=example_prompt,
    suffix='Input: {input}\nOutput:',
    input_variables=['input']
)


for threshold in [-0.1, 0.01, 1.0]:
    print(f'\n======= threshold: {threshold} =======')
    example_selector.threshold = threshold
    print(prompt.format(input='Spot can run fast.'))



Please learn examples and answer questions:

Input: Spot can run.
output: Spot puede correr.

Input: See Spot run.
output: Ver correr a Spot.

Input: My dog barks.
output: Mi perro ladra.

Input: Spot can run fast.
Output:

Please learn examples and answer questions:

Input: Spot can run.
output: Spot puede correr.

Input: See Spot run.
output: Ver correr a Spot.

Input: Spot can run fast.
Output:

Please learn examples and answer questions:

Input: Spot can run fast.
Output:


## 自定义 Example Selector

In [None]:
import random
from typing import Dict, List

from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.prompts.example_selector.base import BaseExampleSelector


class RandomExampleSelector(BaseExampleSelector):
    def __init__(self, examples: List, count: int = 2):
        self.examples = examples
        self.count = count

    def add_example(self, example: Dict) -> None:
        self.examples.append(example)

    def select_examples(self, input_variables: Dict) -> List:
        return random.sample(self.examples, self.count)


examples = [
    {'input': 'happy', 'output': 'sad'},
    {'input': 'tall', 'output': 'short'},
    {'input': 'hot', 'output': 'cold'},
    {'input': 'fast', 'output': 'slow'},
    {'input': 'rich', 'output': 'poor'},
]

example_selector = RandomExampleSelector(examples, 3)

example_prompt = PromptTemplate.from_template(
    'Input: {input}\noutput: {output}'
)

prompt = FewShotPromptTemplate(
    example_selector=example_selector,
    prefix='Please learn examples and answer questions:',
    example_prompt=example_prompt,
    suffix='Input: {input}\nOutput:',
    input_variables=['input']
)

print(prompt.format(input='sunny'))


Please learn examples and answer questions:

Input: rich
output: poor

Input: tall
output: short

Input: fast
output: slow

Input: sunny
Output:


# 4. Output parsers（输出解析器）
## CommaSeparatedListOutputParser

In [None]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser

output_parser = CommaSeparatedListOutputParser()
instructions = output_parser.get_format_instructions()

print(f'instructions: {instructions}')

prompt_tpl = PromptTemplate.from_template(
    template='请返回3个最有代表性的{input}.\n{instructions}',
    partial_variables={'instructions': instructions}
)

llm = OpenAI(model_name='gpt-3.5-turbo-instruct')
prompt = prompt_tpl.format(input='编程语言')

output = llm(prompt)
print(f'output: {output}, type: {type(output)}')

output_format = output_parser.parse(output)
print(f'output format: {output_format}, type: {type(output_format)}')


instructions: Your response should be a list of comma separated values, eg: `foo, bar, baz`
output: 

Java, Python, JavaScript, type: <class 'str'>
output format: ['Java', 'Python', 'JavaScript'], type: <class 'list'>


## DatetimeOutputParser

In [None]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.output_parsers import DatetimeOutputParser

output_parser = DatetimeOutputParser()
instructions = output_parser.get_format_instructions()

print(f'instructions: {instructions}')

prompt_tpl = PromptTemplate.from_template(
    template='北京举办奥运会开幕式是哪一年的几点.\n{instructions}',
    partial_variables={'instructions': instructions}
)

llm = OpenAI(model_name='gpt-3.5-turbo-instruct')
prompt = prompt_tpl.format()

output = llm(prompt)
print(f'output: {output}, type: {type(output)}')

output_format = output_parser.parse(output)
print(f'output format: {output_format}, type: {type(output_format)}')


instructions: Write a datetime string that matches the 
            following pattern: "%Y-%m-%dT%H:%M:%S.%fZ". Examples: 369-04-05T20:15:31.537537Z, 1585-09-27T10:20:00.905533Z, 982-11-22T12:53:31.390465Z
output: 


2008-08-08T20:00:00.000000Z, type: <class 'str'>
output format: 2008-08-08 20:00:00, type: <class 'datetime.datetime'>


## EnumOutputParser

In [None]:
from enum import Enum

from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.output_parsers import EnumOutputParser


class Color(Enum):
    BLUE = 'blue'
    RED = 'red'
    GREEN = 'green'


output_parser = EnumOutputParser(enum=Color)
instructions = output_parser.get_format_instructions()

print(f'instructions: {instructions}')

prompt_tpl = PromptTemplate.from_template(
    template='天空是什么颜色？\n{instructions}',
    partial_variables={'instructions': instructions}
)

llm = OpenAI(model_name='gpt-3.5-turbo-instruct')
prompt = prompt_tpl.format()

output = llm(prompt)
print(f'output: {output}, type: {type(output)}')

output_format = output_parser.parse(output)
print(f'output format: {output_format}, type: {type(output_format)}')


instructions: Select one of the following options: blue, red, green
output: 

blue, type: <class 'str'>
output format: Color.BLUE, type: <enum 'Color'>


In [None]:
# 如果值没有在 Color 对象中
output_format = output_parser.parse('orange')

OutputParserException: ignored

## XMLOutputParser

In [None]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.output_parsers import XMLOutputParser

output_parser = XMLOutputParser(
    tags=['movies', 'movie', 'name', 'director', 'year']
)
instructions = output_parser.get_format_instructions()

print(f'instructions: {instructions}')

prompt_tpl = PromptTemplate.from_template(
    template='请举例2个最有代表性的中国电影，请用中文回答.\n{instructions}',
    partial_variables={'instructions': instructions}
)

llm = OpenAI(model_name='gpt-3.5-turbo-instruct')
prompt = prompt_tpl.format()

output = llm(prompt)
print(f'output: {output}, type: {type(output)}')

output_format = output_parser.parse(output)
print('\noutput_format: ')
for movie in output_format['movies']:
	print(movie)


instructions: The output should be formatted as a XML file.
1. Output should conform to the tags below. 
2. If tags are not given, make them on your own.
3. Remember to always open and close all the tags.

As an example, for the tags ["foo", "bar", "baz"]:
1. String "<foo>
   <bar>
      <baz></baz>
   </bar>
</foo>" is a well-formatted instance of the schema. 
2. String "<foo>
   <bar>
   </foo>" is a badly-formatted instance.
3. String "<foo>
   <tag>
   </tag>
</foo>" is a badly-formatted instance.

Here are the output tags:
```
['movies', 'movie', 'name', 'director', 'year']
```
output: 
<movies>
    <movie>
        <name>霸王别姬</name>
        <director>陈凯歌</director>
        <year>1993</year>
    </movie>
    <movie>
        <name>活着</name>
        <director>张艺谋</director>
        <year>1994</year>
    </movie>
</movies>, type: <class 'str'>

output_format: 
{'movie': [{'name': '霸王别姬'}, {'director': '陈凯歌'}, {'year': '1993'}]}
{'movie': [{'name': '活着'}, {'director': '张艺谋'}, {'year': 

## StructuredOutputParser

In [None]:
from langchain.output_parsers import (
    StructuredOutputParser,
    ResponseSchema
)
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

response_schemas = [
    ResponseSchema(
        name='answer',
        description='提问的回答内容。'
    ),
    ResponseSchema(
        name='source',
        description='回答内容的出处网址。'
    )
]
output_parser = StructuredOutputParser.from_response_schemas(
    response_schemas)

instructions = output_parser.get_format_instructions()

print(f'instructions: {instructions}')

prompt_tpl = PromptTemplate.from_template(
    template='请尽可能的回答用户所提的问题。\n{input}\n{instructions}',
    partial_variables={'instructions': instructions}
)

llm = OpenAI(model_name='gpt-3.5-turbo-instruct')
prompt = prompt_tpl.format(input='中国有多少个名族？')

output = llm(prompt)
print(f'output: {output}, type: {type(output)}')

output_format = output_parser.parse(output)
print('\noutput_format:')
for name, value in output_format.items():
    print(name, value)


instructions: The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"answer": string  // 提问的回答内容。
	"source": string  // 回答内容的出处网址。
}
```
output: 


```json
{
	"answer": "中国有56个民族。",
	"source": "https://baike.baidu.com/item/%E4%B8%AD%E5%9B%BD%E6%B0%91%E6%97%8F/111037?fr=aladdin"
}
```
, type: <class 'str'>

output_format:
answer 中国有56个民族。
source https://baike.baidu.com/item/%E4%B8%AD%E5%9B%BD%E6%B0%91%E6%97%8F/111037?fr=aladdin


## PydanticOutputParser

In [None]:
!pip install BeautifulSoup4



In [None]:
from pydantic import BaseModel, Field

import requests
from bs4 import BeautifulSoup

from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate
)
from langchain.chat_models import ChatOpenAI


class MovieInfo(BaseModel):
    name: str = Field(description='电影名称')
    director: str = Field(description='电影导演')
    scriptwriter: str = Field(description='电影编剧')
    language: str = Field(description='电影语言')
    release_date: str = Field(description='电影上映日期')
    movie_type: str = Field(description='电影类型')
    rating: float = Field(description="电影评分")
    length: str = Field(description='电影片长')


def get_movie_html(url):
    """解析并拼接关系数据的html"""
    response = requests.get(
        url, headers={
            'User-Agent': (
                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
                'AppleWebKit/537.36 (KHTML, like Gecko) '
                'Chrome/116.0.0.0 '
                'Safari/537.36'
            )}
    )
    html_markup = ''
    if response.status_code == 200:
        html_markup = response.text
        soup = BeautifulSoup(html_markup, 'html.parser')
        html_markup = str(soup.h1)

        element = soup.find(id='info')
        html_markup += str(element)

        element = soup.find(id='interest_sectl')
        html_markup += str(element)

    return html_markup


def chatgpt_parse(html):
    """解析内容"""
    parser = PydanticOutputParser(pydantic_object=MovieInfo)

    messages = [HumanMessagePromptTemplate.from_template(
        template=('从以下HTML中提取电影信息:\n{html}.\n'
                  '{format_instructions}\n'),
    )]
    prompt_tpl = ChatPromptTemplate(messages=messages).format_prompt(
        html=html,
        format_instructions=parser.get_format_instructions()
    )
    model = ChatOpenAI(temperature=0.0)
    _output = model(prompt_tpl.to_messages())
    recipe = parser.parse(_output.content)
    return recipe


movie_url = 'https://movie.douban.com/subject/1292063/'
html_str = get_movie_html(movie_url)

data = chatgpt_parse(html_str)

print(f'type: {type(data)}')
for key, value in data.dict().items():
    print(f'{key}: {value}')


type: <class '__main__.MovieInfo'>
name: 美丽人生 La vita è bella
director: 罗伯托·贝尼尼
scriptwriter: 温琴佐·切拉米 / 罗伯托·贝尼尼
language: 意大利语 / 德语 / 英语
release_date: 2020-01-03(中国大陆) / 1997-12-20(意大利)
movie_type: 剧情 / 喜剧 / 爱情 / 战争
rating: 9.6
length: 116分钟(国际版) / 125分钟


## OutputFixingParser

In [None]:
from pydantic import BaseModel, Field

from langchain.llms import OpenAI
from langchain.schema.output_parser import OutputParserException
from langchain.output_parsers import (
    PydanticOutputParser,
    OutputFixingParser,
)


class MovieInfo(BaseModel):
    name: str = Field(description='电影名称')
    director: str = Field(description='电影导演')


parser = PydanticOutputParser(pydantic_object=MovieInfo)
error_info = "{'name': '美丽人生', 'director': '罗伯托·贝尼尼'}"

try:

    parser.parse(error_info)
except OutputParserException as e:
    print(e)
    fix_parser = OutputFixingParser.from_llm(
        parser=parser, llm=OpenAI()
    )
    print(fix_parser.parse(error_info))


Failed to parse MovieInfo from completion {'name': '美丽人生', 'director': '罗伯托·贝尼尼'}. Got: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)
name='美丽人生' director='罗伯托·贝尼尼'


## RetryWithErrorOutputParser

In [None]:
from langchain.llms import OpenAI
from langchain.schema.output_parser import OutputParserException
from langchain.output_parsers import (
    PydanticOutputParser,
    RetryWithErrorOutputParser,
)


class Action(BaseModel):
    action: str = Field(description='要执行的动作')
    action_input: str = Field(description='动作的输入')


parser = PydanticOutputParser(pydantic_object=Action)
prompt = PromptTemplate(
    template='请回答用户的问题.\n{instructions}\n{query}\n',
    input_variables=['query'],
    partial_variables={
        'instructions': parser.get_format_instructions()
    },
)

prompt_value = prompt.format_prompt(query='西游记的作者是谁？')
bad_response = '{"action": "search"}'

try:
    parser.parse(bad_response)
except OutputParserException as e:
    retry_parser = RetryWithErrorOutputParser.from_llm(
        parser=parser, llm=OpenAI(temperature=0)
    )
    print(retry_parser.parse_with_prompt(bad_response, prompt_value))


action='search' action_input='西游记的作者是谁？'


## 自定义Output Parser

In [None]:
import re

from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.schema.output_parser import BaseOutputParser


class UMLOutputParser(BaseOutputParser):
    def get_format_instructions(self):
        return (
            'The output should be a markdown code snippet '
            'formatted in the following schema, '
            'including the leading and trailing "```uml" and "```":\n'
            '```uml\n'
            '@startuml\n'
            '......\n'
            '@enduml\n'
            '```'
        )

    def parse(self, text: str):
        match = re.search(r"```(uml)?(.*)```", text, re.DOTALL)
        uml_str = match.group(2).strip()
        return uml_str

    @property
    def _type(self):
        return 'uml'


output_parser = UMLOutputParser()
instructions = output_parser.get_format_instructions()

prompt_tpl = PromptTemplate.from_template(
    template='请画一张{content}。\n{instructions}',
    partial_variables={'instructions': instructions}
)

prompt = prompt_tpl.format(content='采购审批流程图')

llm = OpenAI(model_name='gpt-3.5-turbo-instruct')
output = llm(prompt)

output_format = output_parser.parse(output)
print(output_format)


@startuml
title 采购审批流程图

start
:提交采购申请;
:部门经理审批;
if (金额 <= 5000) then (是)
  :审批通过;
  else (否)
  :提交给总经理;
endif
:总经理审批;
if (金额 <= 10000) then (是)
  :审批通过;
  else (否)
  :提交给财务部;
endif
:财务部审批;
if (金额 <= 20000) then (是)
  :审批通过;
  else (否)
  :提交给采购部;
endif
:采购部审批;
:生成采购订单;
:采购商品;
:收到商品并验收;
:付款;
stop
@enduml
