In [None]:
import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAIA_API_KEY"] = getpass.getpass("Enter OpenAI API key:")

from langchain.chat_models import init_chat_model

model = init_chat_model("gpt-4o-mini", model_provider = "openai")

In [None]:
# define schema
from typing import Optional
from pydantic import BaseModel, Field

class Person(BaseModel):
    """Information about a Person."""

    name: Optional[str] = Field(default=None, description="Name of the person")
    hair_color: Optional[str] = Field(default=None, description="Colour of the person's hair if known")
    height_in_meters: Optional[str] = Field(default=None, description="Height of the person measured in meters")

In [None]:
# define extractor
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system", 
            "You are an expert extraction algorithm."
            "Only extract relevant information from the text."
            "If you do not know the value of an attribute asked to extract, return null for the attribute's value"
        ),
        (
            "human",
            "{text}"
        )
    ]
)

structured_llm = model.with_structured_output(schema=Person)

In [None]:
# extraction
text = "Alan Smith is 6 feet tall and has blond hair"
prompt = prompt_template.invoke({"text": text})
structured_llm.invoke(prompt)

In [None]:
# extracting multiple entities
from typing import List

class Data(BaseModel):
    """Extracted data about people"""
    
    people: List[Person]

structured_llm = model.with_structured_output(schema=Data)
text = "My name is Jeff, my hair is black and i am 6 feet tall. Anna has the same color hair as me."
prompt = prompt_template.invoke({"text": text})
structured_llm.invoke(prompt)