In [1]:
import os

openai_api_key = os.getenv("OPENAI_API_KEY")

In [2]:
from langchain_openai.chat_models import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers.json import SimpleJsonOutputParser 

In [3]:
chat = ChatOpenAI(model="gpt-4o-mini", model_kwargs={'response_format': {"type": "json_object"}})

In [4]:
chat_prompt = ChatPromptTemplate.from_messages(
    ("system", """I want you to extract the person name, age and a description from the following text.
    Here is the JSON schema:
    "name": string
    "age": int
    "description": string
    {message_to_extract}
    --- 
    If there are multiple people, then put them in a 'persons' key, which is a list of the above schema.
    """)
)

In [6]:
chain = (
    chat_prompt
    | chat
    | SimpleJsonOutputParser()
)

In [7]:
multiple_results = chain.invoke({
    "message_to_extract": '''Alice is 28 years old and she is a graphic designer. She enjoys painting and hiking in her free time.
    Bob is 45 years old and works as a project manager, he enjoys reading books and travelling.
    Charlie is 32 years old and he is a software developer, he likes playing video games and coding.
    '''
})

print(multiple_results)

{'persons': [{'name': 'Alice', 'age': 28, 'description': 'She is a graphic designer. She enjoys painting and hiking in her free time.'}, {'name': 'Bob', 'age': 45, 'description': 'He works as a project manager, he enjoys reading books and travelling.'}, {'name': 'Charlie', 'age': 32, 'description': 'He is a software developer, he likes playing video games and coding.'}]}


In [8]:
from pydantic.v1 import BaseModel
from typing import List
from langchain.chains.openai_tools import create_extraction_chain_pydantic

In [9]:
class Person(BaseModel):
    """A person object that we want to extract from the text"""
    name: str
    age: int

# Previous we had to write this:
class Persons(BaseModel):
    persons: List[Person]

In [11]:
# Make sure to use a recent model that supports tools
model = ChatOpenAI(model="gpt-3.5-turbo-1106")

In [16]:
chain = create_extraction_chain_pydantic(Person, model)

In [17]:
chain.invoke({"input":"Marcell is 31 and Zsolt is 28 years old"})

[Person(name='Marcell', age=31), Person(name='Zsolt', age=28)]