## Build an Extraction Chain

 - use tool-calling features of chat models to extract structured information from unstructured text



In [9]:
from typing import Optional

from pydantic import BaseModel, Field


class Person(BaseModel):
    """Information about a person."""
    name: Optional[str] = Field(default=None, description="The name of the person")
    hair_color: Optional[str] = Field(
        default=None, description="The color of the person's hair if known"
    )
    height_in_meters: Optional[str] = Field(
        default=None, description="Height measured in meters"
    )



from langchain_core.prompts import ChatPromptTemplate
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm. "
            "Only extract relevant information from the text. "
            "If you do not know the value of an attribute asked to extract, "
            "return null for the attribute's value.",
        ),
        # Please see the how-to about improving performance with
        # reference examples.
        # MessagesPlaceholder('examples'),
        ("human", "{text}"),
    ]
)

import os
# è®¾ç½® OpenAI API å¯†é’¥
os.environ["OPENAI_API_KEY"] = "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"

from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o-mini")
structured_llm = llm.with_structured_output(Person)
text = "Alan Smith is 6 feet tall and has blond hair."
prompt = prompt_template.invoke({"text": text})
print(prompt)

result = structured_llm.invoke(prompt)
print(result)


messages=[SystemMessage(content="You are an expert extraction algorithm. Only extract relevant information from the text. If you do not know the value of an attribute asked to extract, return null for the attribute's value.", additional_kwargs={}, response_metadata={}), HumanMessage(content='Alan Smith is 6 feet tall and has blond hair.', additional_kwargs={}, response_metadata={})]
name='Alan Smith' hair_color='blond' height_in_meters='1.83'


In [14]:
from typing import List

class PersonList(BaseModel):
    people:List[Person]

structured_llm = llm.with_structured_output(schema=PersonList)
text = "My name is Jeff, my hair is black and i am 6 feet tall. Anna has the same color hair as me."
prompt = prompt_template.invoke({"text": text})
response = structured_llm.invoke(prompt)
print(response)


people=[Person(name='Jeff', hair_color='black', height_in_meters='1.83'), Person(name='Anna', hair_color='black', height_in_meters=None)]


## few-shot prompting.

In [15]:
messages = [
    {"role": "user", "content": "2 ðŸ¦œ 2"},
    {"role": "assistant", "content": "4"},
    {"role": "user", "content": "2 ðŸ¦œ 3"},
    {"role": "assistant", "content": "5"},
    {"role": "user", "content": "3 ðŸ¦œ 4"},
]

response = llm.invoke(messages)
print(response.content)

7


In [27]:
from langchain_core.utils.function_calling import tool_example_to_messages

examples = [
    (
        "The ocean is vast and blue. It's more than 20,000 feet deep.",
        PersonList(people=[]),
    ),
    (
        "Fiona traveled far from France to Spain.",
        PersonList(people=[Person(name="Fiona", height_in_meters=None, hair_color=None)]),
    ),
]
messages = []

for txt,tool_call in examples:
    if tool_call.people:
        ai_repsonse = "Detected People."
    else :
        ai_repsonse = "No people detected."
    messages.extend(tool_example_to_messages(txt,[tool_call], ai_repsonse))
    

for message in messages:
    message.pretty_print()


The ocean is vast and blue. It's more than 20,000 feet deep.
Tool Calls:
  PersonList (0efda4c9-8a27-43df-a943-9d08fec65140)
 Call ID: 0efda4c9-8a27-43df-a943-9d08fec65140
  Args:
    people: []

N

Fiona traveled far from France to Spain.
Tool Calls:
  PersonList (087eeaae-1270-4945-a5d9-97006653999c)
 Call ID: 087eeaae-1270-4945-a5d9-97006653999c
  Args:
    people: [{'name': 'Fiona', 'hair_color': None, 'height_in_meters': None}]

D


In [34]:
message_no_extraction = {
    "role": "user",
    "content": "The solar system is large, but earth has only 1 moon.",
}
message_no_extraction = {
    "role": "user",
    "content": "The solar system is large, but earth has only 1 moon.",
}

structured_llm = llm.with_structured_output(schema=PersonList)
print(type(llm))
print(type(structured_llm))
res = structured_llm.invoke([message_no_extraction])
print(res)

rest = structured_llm.invoke(messages + [message_no_extraction])
print(rest)

<class 'langchain_openai.chat_models.base.ChatOpenAI'>
<class 'langchain_core.runnables.base.RunnableSequence'>
people=[Person(name='Luna', hair_color='None', height_in_meters='None'), Person(name='Apollo', hair_color='None', height_in_meters='None'), Person(name='Graviton', hair_color='None', height_in_meters='None')]
people=[]
