## 数据输出格式
除了基本的string输出，更多情况下我们需要结构化数据输出，这里有两种选择,使用python的话
首先推荐使用JSON strctured可以跨语言使用，其次是Pydantic class更方便简易

In [2]:
import os
from langchain_openai import ChatOpenAI


os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")

# 对应的pydantic方案
# from typing import Optional
# from langchain_core.pydantic_v1 import BaseModel, Field

# class Joke(BaseModel):
#     """Joke to tell user."""

#     setup: str = Field(description="The setup of the joke")
#     punchline: str = Field(description="The punchline to the joke")
#     rating: Optional[int] = Field(description="How funny the joke is, from 1 to 10")

json_schema = {
    "title": "joke",
    "description": "Joke to tell user.",
    "type": "object",
    "properties": {
        "setup": {
            "type": "string",
            "description": "The setup of the joke",
        },
        "punchline": {
            "type": "string",
            "description": "The punchline to the joke",
        },
        "rating": {
            "type": "integer",
            "description": "How funny the joke is, from 1 to 10",
        },
    },
    "required": ["setup", "punchline"],
}
structured_llm = llm.with_structured_output(json_schema,include_raw=True)

structured_llm.invoke("Tell me a joke about cats")

{'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_yvtoVNFFebkKkq4l8PfPm8cI', 'function': {'arguments': '{"setup":"Why was the cat sitting on the computer?","punchline":"To keep an eye on the mouse!","rating":8}', 'name': 'joke'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 37, 'prompt_tokens': 91, 'total_tokens': 128}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-3b032ae5-0420-4bfc-8e47-011c329dca75-0', tool_calls=[{'name': 'joke', 'args': {'setup': 'Why was the cat sitting on the computer?', 'punchline': 'To keep an eye on the mouse!', 'rating': 8}, 'id': 'call_yvtoVNFFebkKkq4l8PfPm8cI'}], usage_metadata={'input_tokens': 91, 'output_tokens': 37, 'total_tokens': 128}),
 'parsed': {'setup': 'Why was the cat sitting on the computer?',
  'punchline': 'To keep an eye on the mouse!',
  'rating': 8},
 'parsing_error': None}

在没有触发笑话的情况下，我们需要后退方案进行用户的普通提问回复，其格式也不遵从joke的回复

In [1]:
from typing import Union

from pydantic import BaseModel, Field

from typing import Optional

from langchain_core.pydantic_v1 import BaseModel, Field


class Joke(BaseModel):
    """Joke to tell user."""

    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")
    rating: Optional[int] = Field(description="How funny the joke is, from 1 to 10")

# structured_llm = llm.with_structured_output(Joke)

class ConversationalResponse(BaseModel):
    """Respond in a conversational manner. Be kind and helpful."""

    response: str = Field(description="A conversational response to the user's query")


class Response(BaseModel):
    output: Union[Joke, ConversationalResponse]


structured_llm = llm.with_structured_output(Response)

# structured_llm.invoke("给我一个关于教育的笑话")

for chunk in structured_llm.stream("Tell me a joke about cats"):
    print(chunk)

NameError: name 'llm' is not defined

多层嵌套的使用

In [None]:
from typing import List

from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field


class Person(BaseModel):
    """Information about a person."""

    name: str = Field(..., description="The name of the person")
    height_in_meters: float = Field(
        ..., description="The height of the person expressed in meters."
    )


class People(BaseModel):
    """Identifying information about all people in a text."""

    people: List[Person]


# Set up a parser
parser = PydanticOutputParser(pydantic_object=People)

# Prompt
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer the user query. Wrap the output in `json` tags\n{format_instructions}",
        ),
        ("human", "{query}"),
    ]
).partial(format_instructions=parser.get_format_instructions())

也可以完全自定义输出方式

In [13]:
import json
import re
from typing import List

from langchain_core.messages import AIMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field


class Person(BaseModel):
    """Information about a person."""

    name: str = Field(..., description="The name of the person")
    height_in_meters: float = Field(
        ..., description="The height of the person expressed in meters."
    )


class People(BaseModel):
    """Identifying information about all people in a text."""

    people: List[Person]


# Prompt
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer the user query. Output your answer as JSON that  "
            "matches the given schema: ```json\n{schema}\n```. "
            "Make sure to wrap the answer in ```json and ``` tags",
        ),
        ("human", "{query}"),
    ]
).partial(schema=People.schema())


# Custom parser
def extract_json(message: AIMessage) -> List[dict]:
    """Extracts JSON content from a string where JSON is embedded between ```json and ``` tags.

    Parameters:
        text (str): The text containing the JSON content.

    Returns:
        list: A list of extracted JSON strings.
    """
    text = message.content
    # Define the regular expression pattern to match JSON blocks
    pattern = r"```json(.*?)```"

    # Find all non-overlapping matches of the pattern in the string
    matches = re.findall(pattern, text, re.DOTALL)

    # Return the list of matched JSON strings, stripping any leading or trailing whitespace
    try:
        return [json.loads(match.strip()) for match in matches]
    except Exception:
        raise ValueError(f"Failed to parse: {message}")
    
# print(prompt.invoke({"query": "NBA球星张母思2024年加入洛杉矶湖人队，年薪3000万美元，身高7feet, 年龄19岁"}))

chain = prompt | llm | extract_json

chain.invoke({"query": "Anna is 23 years old and she is 6 feet tall"})

[{'title': 'People',
  'description': 'Identifying information about all people in a text.',
  'type': 'object',
  'properties': {'people': {'title': 'People',
    'type': 'array',
    'items': {'$ref': '#/definitions/Person'}}},
  'required': ['people'],
  'definitions': {'Person': {'title': 'Person',
    'description': 'Information about a person.',
    'type': 'object',
    'properties': {'name': {'title': 'Name',
      'description': 'The name of the person',
      'type': 'string'},
     'height_in_meters': {'title': 'Height In Meters',
      'description': 'The height of the person expressed in meters.',
      'type': 'number'}},
    'required': ['name', 'height_in_meters']}}}]