### 提取单个实例的实体

In [1]:
from typing import Optional

from pydantic import BaseModel, Field


class Person(BaseModel):
    name: Optional[str] = Field(default=None, description="名字")
    hair_color: Optional[str] = Field(
        default=None, description="该人的头发颜色（如果已知）"
    )
    height_in_meters: Optional[str] = Field(
        default=None, description="身高（米）"
    )

In [2]:
from langchain_core.prompts import ChatPromptTemplate

# 定义自定义提示以提供说明和任何其他上下文。
# 1） 您可以在提示模板中添加示例以提高提取质量
# 2） 引入额外的参数以考虑上下文（例如，包含元数据关于从中提取文本的文档。
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "您是专家级提取算法。"
            "仅从文本中提取相关信息。"
            "如果您不知道要求提取的属性的值， "
            "返回 null 作为属性的值。",
        ),
        ("human", "{text}"),
    ]
)

In [3]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    model="shmily_006/Qw3",
    base_url="http://localhost:11434/v1",
    api_key="EMPTY",
)

In [4]:
structured_llm = llm.with_structured_output(schema=Person)

In [7]:
text = "艾伦·史密斯 （Alan Smith） 身高 170cm，有一头金发。"
prompt = prompt_template.invoke({"text": text})
structured_llm.invoke(prompt)

Person(name='艾伦·史密斯', hair_color='金发', height_in_meters='170cm')

### 提取多个实例的实体

In [8]:
from typing import List, Optional

class Person(BaseModel):
    name: Optional[str] = Field(default=None, description="名字")
    hair_color: Optional[str] = Field(
        default=None, description="头发颜色"
    )
    height_in_meters: Optional[str] = Field(
        default=None, description="身高"
    )


class Data(BaseModel):
    """提取的有关人员的数据。"""

    # 创建一个模型，以便我们可以提取多个实体。
    people: List[Person]

In [11]:
structured_llm = llm.with_structured_output(schema=Data)
text = "我叫 Jeff，我的头发是黑色的，身高 170cm。安娜的头发和我一样。"
prompt = prompt_template.invoke({"text": text})
structured_llm.invoke(prompt)

Data(people=[Person(name='Jeff', hair_color='黑色', height_in_meters='1.70'), Person(name='安娜', hair_color='黑色', height_in_meters='1.70')])

### 提供示例

In [13]:
messages = [
    {"role": "user", "content": "2 + 2"},
    {"role": "assistant", "content": "4"},
    {"role": "user", "content": "2 + 3"},
    {"role": "assistant", "content": "5"},
    {"role": "user", "content": "3 + 4"},
]

response = llm.invoke(messages)
print(response.content)

$3 + 4 = 7$


In [14]:
from langchain_core.utils.function_calling import tool_example_to_messages

examples = [
    (
        "海洋浩瀚而湛蓝。它有 20,000 多英尺深。",
        Data(people=[]),
    ),
    (
        "菲奥娜从法国长途跋涉到西班牙。",
        Data(people=[Person(name="菲奥娜", height_in_meters=None, hair_color=None)]),
    ),
]


messages = []

for txt, tool_call in examples:
    if tool_call.people:
        ai_response = "检测到人员。"
    else:
        ai_response = "未检测到人员。"
    messages.extend(tool_example_to_messages(txt, [tool_call], ai_response=ai_response))

  messages.extend(tool_example_to_messages(txt, [tool_call], ai_response=ai_response))


In [15]:
for message in messages:
    message.pretty_print()


海洋浩瀚而湛蓝。它有 20,000 多英尺深。
Tool Calls:
  Data (196ad4f5-14c0-4098-8bd4-73fa600549ff)
 Call ID: 196ad4f5-14c0-4098-8bd4-73fa600549ff
  Args:
    people: []

You have correctly called this tool.

未检测到人员。

菲奥娜从法国长途跋涉到西班牙。
Tool Calls:
  Data (126417e7-47fc-4755-bf3d-868762137561)
 Call ID: 126417e7-47fc-4755-bf3d-868762137561
  Args:
    people: [{'name': '菲奥娜', 'hair_color': None, 'height_in_meters': None}]

You have correctly called this tool.

检测到人员。


In [16]:
message_no_extraction = {
    "role": "user",
    "content": "太阳系很大，但地球只有 1 颗卫星。",
}

structured_llm = llm.with_structured_output(schema=Data)
structured_llm.invoke([message_no_extraction])

Data(people=[Person(name='用户', hair_color='黑色', height_in_meters="I don't know"), Person(name='AI助手', hair_color='蓝色', height_in_meters='1.75')])

In [17]:
structured_llm.invoke(messages + [message_no_extraction])

Data(people=[])