# OutputParsers

### ENV

In [76]:
# 引入依赖包，这里的 pydantic 版本为 v2
from pydantic import BaseModel, Field, model_validator
from langchain_deepseek import ChatDeepSeek
from langchain.tools import tool
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder
from langchain.agents import create_tool_calling_agent, AgentExecutor
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.output_parsers import JsonOutputParser
from langchain.output_parsers import RetryOutputParser, OutputFixingParser
from typing import Dict
from pydantic import BaseModel, Field

# 使用 deepseek
llm = ChatDeepSeek(
    model="deepseek-chat",
    temperature=0,
    api_key="sk-6b2a3015b6094e68aa2956124ad1e870",
    api_base="https://api.deepseek.com",
)

In [16]:
llm.invoke("北京现在的天气怎么样？")

AIMessage(content='由于我无法实时获取网络数据，无法提供北京当前的天气情况。不过，你可以通过以下方式轻松查询：\n\n1. **天气应用**：打开手机上的天气应用（如苹果天气、墨迹天气等），搜索“北京”即可查看实时天气、温度、空气质量等详细信息。\n\n2. **搜索引擎**：在浏览器中输入“北京实时天气”，搜索结果会直接显示当前天气状况（如晴/雨、温度、风力等）。\n\n3. **气象网站**：访问中国天气网（[www.weather.com.cn](http://www.weather.com.cn)）或中央气象台官网获取权威预报。\n\n**提示**：北京近期处于夏季，常见高温多雨天气，建议出门前关注紫外线指数和降水概率，做好防晒或防雨准备。如需更具体的穿衣或出行建议，可以告诉我你的需求哦！ ☀️🌧️', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 182, 'prompt_tokens': 8, 'total_tokens': 190, 'completion_tokens_details': None, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}, 'prompt_cache_hit_tokens': 0, 'prompt_cache_miss_tokens': 8}, 'model_name': 'deepseek-chat', 'system_fingerprint': 'fp_8802369eaa_prod0623_fp8_kvcache', 'id': '1b1b1507-7176-4365-a0c2-76adc7e78b41', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None}, id='run--61b68fb4-9676-430c-a5d5-144362451ff6-0', usage_metadata={'input_tokens': 8, 'output_tokens': 182, 'total_tokens':

### 1 测试 

In [17]:
# =======================
# 1. 定义工具
# =======================
@tool
def get_weather(location: str) -> str:
    """
    根据地名返回模拟天气信息。
    """
    return f"天气晴朗，{location}当前温度为22摄氏度"


# =======================
# 3. 创建 prompt（必须包含工具占位符）
# =======================
# 推荐使用官方 agent prompt 或自定义支持工具调用的 prompt
prompt = ChatPromptTemplate.from_messages([
    ("system", "你是一个有用的助手。如果问题涉及天气，请使用工具查询后再回答。"),
    # 移除了 chat_history
    ("human", "{input}"),
    MessagesPlaceholder(variable_name="agent_scratchpad"),  # 必须保留
])

# =======================
# 4. 创建 Agent
# =======================
# 绑定工具并创建 agent
agent = create_tool_calling_agent(
    llm=llm,
    tools=[get_weather],
    prompt=prompt,
)

# =======================
# 5. 创建执行器（AgentExecutor）
# =======================
agent_executor = AgentExecutor(
    agent=agent,
    tools=[get_weather],
    verbose=True,  # 显示中间步骤
    handle_parsing_errors=True,  # 自动处理解析错误
)

# =======================
# 6. 调用测试
# =======================
response = agent_executor.invoke({
    "input": "北京现在的天气怎么样？"
})

print("\n最终回答：", response["output"])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `get_weather` with `{'location': '北京'}`


[0m[36;1m[1;3m天气晴朗，北京当前温度为22摄氏度[0m[32;1m[1;3m北京现在的天气晴朗，温度为22摄氏度。[0m

[1m> Finished chain.[0m

最终回答： 北京现在的天气晴朗，温度为22摄氏度。


### 2 解析器

In [30]:
# 定义一个名为Joke的数据模型
class Joke(BaseModel):
    setup: str = Field(description="笑话中的铺垫问题，必须以？结尾")
    punchline: str = Field(description="笑话中回答铺垫问题的部分，通常是一种抖包袱方式回答铺垫问题，例如谐音、会错意等")

    # 验证器，确保setup字段以英文或中文问号结尾
    @model_validator(mode="before")
    @classmethod
    def question_ends_with_question_mark(cls, values: Dict) -> Dict:
        setup = values.get("setup")
        if setup:
            # 检查是否以英文问号(?)或中文问号(？)结尾
            if not (setup.endswith("?") or setup.endswith("？")):
                raise ValueError("Badly formed question! The setup must end with a question mark (? or ？).")
        return values

#### Pydantic

In [31]:
# 实例化解析器、提示词模板
parser = PydanticOutputParser(pydantic_object=Joke)
prompt = PromptTemplate(
    template="回答用户的查询.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

# 使用LCEL语法组合一个简单的链
prompt_and_model = prompt | llm

# 调用模型并解析输出
output = prompt_and_model.invoke({"query": "给我讲一个笑话"})
print("Model output:", output)

try:
    parsed_output = parser.invoke(output)
    print("Parsed output:", parsed_output)
except Exception as e:
    print(f"Failed to parse output: {e}")

Model output: content='```json\n{\n  "setup": "为什么数学书总是很忧郁？",\n  "punchline": "因为它有太多的问题！"\n}\n```' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 32, 'prompt_tokens': 227, 'total_tokens': 259, 'completion_tokens_details': None, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 192}, 'prompt_cache_hit_tokens': 192, 'prompt_cache_miss_tokens': 35}, 'model_name': 'deepseek-chat', 'system_fingerprint': 'fp_8802369eaa_prod0623_fp8_kvcache', 'id': '8dd76625-b57f-4d99-ba1d-b777781c655c', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None} id='run--91f02c02-5181-49a1-aa0b-9c43e9d00701-0' usage_metadata={'input_tokens': 227, 'output_tokens': 32, 'total_tokens': 259, 'input_token_details': {'cache_read': 192}, 'output_token_details': {}}
Parsed output: setup='为什么数学书总是很忧郁？' punchline='因为它有太多的问题！'


#### JSON

In [32]:
# 实例化解析器、提示词模板
parser = JsonOutputParser()
prompt = PromptTemplate(
    template="回答用户的查询.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

# 使用LCEL语法组合一个简单的链
prompt_and_model = prompt | llm

# 调用模型并解析输出
output = prompt_and_model.invoke({"query": "给我讲一个笑话"})
print("Model output:", output)

try:
    parsed_output = parser.parse(output.content)
    print("Parsed output (JSON format):", parsed_output)
except Exception as e:
    print(f"Failed to parse output: {e}")

Model output: content='```json\n{\n  "joke": "为什么电脑经常感冒？因为它总是开着窗户（Windows）！"\n}\n```' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 17, 'total_tokens': 43, 'completion_tokens_details': None, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}, 'prompt_cache_hit_tokens': 0, 'prompt_cache_miss_tokens': 17}, 'model_name': 'deepseek-chat', 'system_fingerprint': 'fp_8802369eaa_prod0623_fp8_kvcache', 'id': '7b1b74ce-0114-4cd3-8d20-2eec51ef3d51', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None} id='run--84b0e994-0c80-464e-a3ac-038846b7c7ae-0' usage_metadata={'input_tokens': 17, 'output_tokens': 26, 'total_tokens': 43, 'input_token_details': {'cache_read': 0}, 'output_token_details': {}}
Parsed output (JSON format): {'joke': '为什么电脑经常感冒？因为它总是开着窗户（Windows）！'}


In [58]:
# 使用LCEL语法组合一个简单的链
chain = prompt | llm | parser

# 流式调用并逐个打印输出片段
for s in chain.stream({"query": "给我讲一个关于程序员编程的笑话"}):
    print(s, end="", flush=True)

action='tell_joke' action_input=''action='tell_joke' action_input='Why'action='tell_joke' action_input='Why do'action='tell_joke' action_input='Why do programmers'action='tell_joke' action_input='Why do programmers prefer'action='tell_joke' action_input='Why do programmers prefer dark'action='tell_joke' action_input='Why do programmers prefer dark mode'action='tell_joke' action_input='Why do programmers prefer dark mode?'action='tell_joke' action_input='Why do programmers prefer dark mode? Because'action='tell_joke' action_input='Why do programmers prefer dark mode? Because light'action='tell_joke' action_input='Why do programmers prefer dark mode? Because light attracts'action='tell_joke' action_input='Why do programmers prefer dark mode? Because light attracts bugs'action='tell_joke' action_input='Why do programmers prefer dark mode? Because light attracts bugs!'

### 3 LLM应用容错机制

#### 正常流程

In [59]:
template = """Based on the user question, provide an Action and Action Input for what step should be taken.
{format_instructions}
Question: {query}
Response:"""

class Action(BaseModel):
    action: str = Field(description="action to take")
    action_input: str = Field(description="input to the action")
parser = PydanticOutputParser(pydantic_object=Action)

prompt = PromptTemplate(
    template = "Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

prompt_values = prompt.format_prompt(query="北京天气如何？")
print(prompt_values)

# 使用LCEL语法组合一个简单的链
prompt_and_model = prompt | llm

# 调用模型并解析输出
output = prompt_and_model.invoke({"query": "给我讲一个笑话"})
print("Model output:", output)

try:
    parsed_output = parser.parse(output.content)
    print("Parsed output (JSON format):", parsed_output)
except Exception as e:
    print(f"Failed to parse output: {e}")

text='Answer the user query.\nThe output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"action": {"description": "action to take", "title": "Action", "type": "string"}, "action_input": {"description": "input to the action", "title": "Action Input", "type": "string"}}, "required": ["action", "action_input"]}\n```\n北京天气如何？\n'
Model output: content='```json\n{\n  "action": "tell_joke",\n  "action_input": "为什么电脑很笨？因为它总是被人‘点’来‘点’去！"\n}\n```' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 39, 'prompt_tokens': 200, 'total_tokens': 239, 'co

#### RetryOutputParser

In [72]:
bad_response = '{"action": "search"}'
try:
    parsed_output = parser.parse(bad_response)
    print("Parsed output (JSON format):", parsed_output)
except Exception as e:
    print(f"Failed to parse output: {e}\n")
    # 处理
    retry_parser = RetryOutputParser.from_llm(
        parser = parser,
        llm=llm, # 可以更改新的llm
    )
    print(retry_parser)
    print("\n")
    print(retry_parser.parse_with_prompt(bad_response, prompt_values))

Failed to parse output: Failed to parse Action from completion {"action": "search"}. Got: 1 validation error for Action
action_input
  Field required [type=missing, input_value={'action': 'search'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/missing
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE 

parser=PydanticOutputParser(pydantic_object=<class '__main__.Action'>) retry_chain=PromptTemplate(input_variables=['completion', 'prompt'], input_types={}, partial_variables={}, template='Prompt:\n{prompt}\nCompletion:\n{completion}\n\nAbove, the Completion did not satisfy the constraints given in the Prompt.\nPlease try again:')
| ChatDeepSeek(client=<openai.resources.chat.completions.completions.Completions object at 0x000001F4D413D280>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000001F4D413E660>, root_client=<openai.OpenAI object at 0x000001

#### OutputFixingParser

In [79]:
bad_response = '{"action": "search"}'
try:
    parsed_output = parser.parse(bad_response)
    print("Parsed output (JSON format):", parsed_output)
except Exception as e:
    print(f"Failed to parse output: {e}\n")
    # 处理
    fix_parser = OutputFixingParser.from_llm(
        parser = parser,
        llm=llm, # 可以更改新的llm
    )
    print(fix_parser)
    
    print("\n")
    print(fix_parser.parse(bad_response))
    print("\n")
    print(fix_parser.parse_with_prompt(bad_response, prompt_values))

Failed to parse output: Failed to parse Action from completion {"action": "search"}. Got: 1 validation error for Action
action_input
  Field required [type=missing, input_value={'action': 'search'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/missing
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE 

parser=PydanticOutputParser(pydantic_object=<class '__main__.Action'>) retry_chain=PromptTemplate(input_variables=['completion', 'error', 'instructions'], input_types={}, partial_variables={}, template='Instructions:\n--------------\n{instructions}\n--------------\nCompletion:\n--------------\n{completion}\n--------------\n\nAbove, the Completion did not satisfy the constraints given in the Instructions.\nError:\n--------------\n{error}\n--------------\n\nPlease try again. Please only respond with an answer that satisfies the constraints laid out in the Instructions:')
| ChatDeepSeek(client

#### 自定义解析器

In [85]:
from typing import Any
from langchain_core.output_parsers import BaseOutputParser  # 从 langchain_core 导入
from langchain_core.exceptions import OutputParserException  # 从 langchain_core.exceptions 导入
from pydantic import BaseModel, Field  # 导入 Pydantic 的 BaseModel 和 Field

# 自定义布尔值解析器
class BooleanOutputParser(BaseOutputParser[bool]):
    """Custom boolean parser."""

    # 定义 Pydantic 字段
    true_val: str = Field(default="YES", description="表示 True 的字符串")
    false_val: str = Field(default="NO", description="表示 False 的字符串")

    def parse(self, text: str) -> bool:
        """
        将输入的文本解析为布尔值。
        
        :param text: 输入的字符串。
        :return: 解析后的布尔值。
        :raises OutputParserException: 如果输入的字符串既不是 true_val 也不是 false_val。
        """
        cleaned_text = text.strip().upper()  # 去除空格并转换为大写
        if cleaned_text not in (self.true_val.upper(), self.false_val.upper()):
            raise OutputParserException(
                f"BooleanOutputParser expected output value to either be "
                f"{self.true_val} or {self.false_val} (case-insensitive). "
                f"Received {cleaned_text}."
            )
        return cleaned_text == self.true_val.upper()

    @property
    def _type(self) -> str:
        """
        返回解析器的类型名称。
        
        :return: 解析器的类型名称。
        """
        return "boolean_output_parser"

# 测试解析器
if __name__ == "__main__":
    # 创建解析器实例
    parser = BooleanOutputParser(true_val="YES", false_val="NO")

    # 正常解析
    try:
        result = parser.parse("YES")
        print(f"Parsed 'YES' to: {result}")  # 输出: Parsed 'YES' to: True
    except OutputParserException as e:
        print(f"Error parsing 'YES': {e}")

    try:
        result = parser.parse("NO")
        print(f"Parsed 'NO' to: {result}")  # 输出: Parsed 'NO' to: False
    except OutputParserException as e:
        print(f"Error parsing 'NO': {e}")

    # 错误解析
    try:
        result = parser.parse("MEOW")
        print(f"Parsed 'MEOW' to: {result}")
    except OutputParserException as e:
        print(f"Error parsing 'MEOW': {e}")  # 输出: Error parsing 'MEOW': ...

Parsed 'YES' to: True
Parsed 'NO' to: False
Error parsing 'MEOW': BooleanOutputParser expected output value to either be YES or NO (case-insensitive). Received MEOW.
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE 
