In [1]:
import os
import logging
from dotenv import load_dotenv
import json
from pprint import pprint

from typing import Optional, Sequence, List

# from openai import OpenAI
from langsmith import Client

from langchain.chains import create_extraction_chain
from langchain_openai import ChatOpenAI, OpenAI

from langchain.chains import create_extraction_chain_pydantic
from langchain_core.pydantic_v1 import BaseModel as BaseModel1, Field as Field1

from pydantic import BaseModel as BaseModel2, Field as Field2, validator
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate

In [2]:
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)

load_dotenv()

True

In [3]:
openai_api_key = os.getenv("OPENAI_API_KEY")
serpapi_api_key = os.getenv("SERPAPI_API_KEY")
langchain_api_url = os.getenv("LANGCHAIN_ENDPOINT")
langchain_api_key = os.getenv("LANGCHAIN_API_KEY")

In [4]:
openai_client = OpenAI(api_key=openai_api_key)

In [5]:
langsmith_client = Client(api_url=langchain_api_url, api_key=langchain_api_key)

In [6]:
llm3_turbo = ChatOpenAI(
    openai_api_key=openai_api_key, model_name="gpt-3.5-turbo", temperature=0.0
)

In [7]:
llm3_1106 = ChatOpenAI(
    openai_api_key=openai_api_key, model_name="gpt-3.5-turbo-1106", temperature=0.0
)

In [8]:
llm4_turbo = ChatOpenAI(
    openai_api_key=openai_api_key,
    temperature=0.0,
    model_name="gpt-4-1106-preview"
)

In [9]:
model_names = ["gpt-4-1106-preview", "gpt-3.5-turbo-1106", "gpt-3.5-turbo"]

# OpenAI Function

## Schema Dict

In [10]:
schema = {
    "properties": {
        "name": {"type": "string"},
        "height": {"type": "integer"},
        "hair_color": {"type": "string"},
    },
    "required": ["name", "height"],
}

In [27]:
inp = """Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde."""

In [28]:
chain4 = create_extraction_chain(schema, llm4_turbo)

In [29]:
result = chain4.invoke(inp)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [30]:
for k, v in result.items():
    print(k)
    if isinstance(v, list):
        for item in v:
            print(item)
    else:
        print(v)
    print("\n")

input
Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.


text
{'name': 'Alex', 'height': 5, 'hair_color': 'blonde'}
{'name': 'Claudia', 'height': 6, 'hair_color': 'brunette'}


In [44]:
chain31 = create_extraction_chain(schema, llm3_1106)

In [45]:
result = chain31.invoke(inp)
for k, v in result.items():
    print(k)
    if isinstance(v, list):
        for item in v:
            print(item)
    else:
        print(v)
    print("\n")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


input
Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.


text
{'name': 'Alex', 'height': 5}
{'name': 'Claudia', 'height': 6, 'hair_color': 'brunette'}


In [48]:
chain3t = create_extraction_chain(schema, llm3_turbo)

In [49]:
result = chain3t.invoke(inp)
for k, v in result.items():
    print(k)
    if isinstance(v, list):
        for item in v:
            print(item)
    else:
        print(v)
    print("\n")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


input
Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.


text
{'name': 'Alex', 'height': 5, 'hair_color': 'blonde'}
{'name': 'Claudia', 'height': 6, 'hair_color': 'brunette'}


In [62]:
schema = {
    "properties": {
        "person_name": {"type": "string"},
        "person_height": {"type": "integer"},
        "person_hair_color": {"type": "string"},
        "dog_name": {"type": "string"},
        "dog_breed": {"type": "string"},
    },
    "required": ["person_name", "person_height"],
}

In [63]:
inp = """Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.
Alex's dog Frosty is a labrador and likes to play hide and seek."""

In [68]:
for model_name in model_names:
    llm = ChatOpenAI(openai_api_key=openai_api_key, model_name=model_name, temperature=0.0)
    result = create_extraction_chain(schema, llm).invoke(inp)
    for k, v in result.items():
        print(k)
        if isinstance(v, list):
            for item in v:
                print(item)
        else:
            print(v)
        print("\n")
    print("\n\n")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


input
Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.
Alex's dog Frosty is a labrador and likes to play hide and seek.


text
{'person_name': 'Alex', 'person_height': 60, 'person_hair_color': 'blonde'}
{'person_name': 'Claudia', 'person_height': 72, 'person_hair_color': 'brunette'}
{'person_name': 'Alex', 'dog_name': 'Frosty', 'dog_breed': 'labrador'}


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


input
Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.
Alex's dog Frosty is a labrador and likes to play hide and seek.


text
{'person_name': 'Alex', 'person_height': 5, 'person_hair_color': 'blonde', 'dog_name': 'Frosty', 'dog_breed': 'labrador'}
{'person_name': 'Claudia', 'person_height': 6, 'person_hair_color': 'brunette'}


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


input
Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.
Alex's dog Frosty is a labrador and likes to play hide and seek.


text
{'person_name': 'Alex', 'person_height': 5, 'person_hair_color': 'blonde', 'dog_name': 'Frosty', 'dog_breed': 'labrador'}
{'person_name': 'Claudia', 'person_height': 6, 'person_hair_color': 'brunette'}


In [71]:
schema = {
    "properties": {
        "person_name": {"type": "string"},
        "person_height": {"type": "integer"},
        "person_hair_color": {"type": "string"},
        "dog_name": {"type": "string"},
        "dog_breed": {"type": "string"},
    },
    "required": [],
}
inp = """Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.
Willow is a German Shepherd that likes to play with other dogs and can always be found playing with Milo, a border collie that lives close by."""

In [72]:
for model_name in model_names:
    llm = ChatOpenAI(openai_api_key=openai_api_key, model_name=model_name, temperature=0.0)
    result = create_extraction_chain(schema, llm).invoke(inp)
    for k, v in result.items():
        print(k)
        if isinstance(v, list):
            for item in v:
                print(item)
        else:
            print(v)
        print("\n")
    print("\n\n")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


input
Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.
Willow is a German Shepherd that likes to play with other dogs and can always be found playing with Milo, a border collie that lives close by.


text
{'person_name': 'Alex', 'person_height': 60, 'person_hair_color': 'blonde'}
{'person_name': 'Claudia', 'person_height': 72, 'person_hair_color': 'brunette'}
{'dog_name': 'Willow', 'dog_breed': 'German Shepherd'}
{'dog_name': 'Milo', 'dog_breed': 'border collie'}


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


input
Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.
Willow is a German Shepherd that likes to play with other dogs and can always be found playing with Milo, a border collie that lives close by.


text
{'person_name': 'Alex', 'person_height': 5, 'person_hair_color': 'blonde'}
{'person_name': 'Claudia', 'person_height': 6, 'person_hair_color': 'brunette'}
{'dog_name': 'Willow', 'dog_breed': 'German Shepherd'}
{'dog_name': 'Milo', 'dog_breed': 'border collie'}


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


input
Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.
Willow is a German Shepherd that likes to play with other dogs and can always be found playing with Milo, a border collie that lives close by.


text
{'person_name': 'Alex', 'person_height': 5, 'person_hair_color': 'blonde'}
{'person_name': 'Claudia', 'person_height': 6, 'person_hair_color': 'brunette'}
{'dog_name': 'Willow', 'dog_breed': 'German Shepherd'}
{'dog_name': 'Milo', 'dog_breed': 'border collie'}


In [73]:
schema = {
    "properties": {
        "person_name": {"type": "string"},
        "person_height": {"type": "integer"},
        "person_hair_color": {"type": "string"},
        "dog_name": {"type": "string"},
        "dog_breed": {"type": "string"},
        "dog_extra_info": {"type": "string"},
    },
}

In [74]:
for model_name in model_names:
    llm = ChatOpenAI(openai_api_key=openai_api_key, model_name=model_name, temperature=0.0)
    result = create_extraction_chain(schema, llm).invoke(inp)
    for k, v in result.items():
        print(k)
        if isinstance(v, list):
            for item in v:
                print(item)
        else:
            print(v)
        print("\n")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


input
Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.
Willow is a German Shepherd that likes to play with other dogs and can always be found playing with Milo, a border collie that lives close by.


text
{'person_name': 'Alex', 'person_height': 60, 'person_hair_color': 'blonde'}
{'person_name': 'Claudia', 'person_height': 72, 'person_hair_color': 'brunette'}
{'dog_name': 'Willow', 'dog_breed': 'German Shepherd', 'dog_extra_info': 'likes to play with other dogs'}
{'dog_name': 'Milo', 'dog_breed': 'border collie', 'dog_extra_info': 'lives close by'}


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


input
Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.
Willow is a German Shepherd that likes to play with other dogs and can always be found playing with Milo, a border collie that lives close by.


text
{'person_name': 'Alex', 'person_height': 5, 'person_hair_color': 'blonde'}
{'person_name': 'Claudia', 'person_height': 6, 'person_hair_color': 'brunette'}
{'dog_name': 'Willow', 'dog_breed': 'German Shepherd', 'dog_extra_info': 'likes to play with other dogs'}
{'dog_name': 'Milo', 'dog_breed': 'border collie'}


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


input
Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.
Willow is a German Shepherd that likes to play with other dogs and can always be found playing with Milo, a border collie that lives close by.


text
{'person_name': 'Alex', 'person_height': 5, 'person_hair_color': 'blonde'}
{'person_name': 'Claudia', 'person_height': 6, 'person_hair_color': 'brunette'}
{'dog_name': 'Willow', 'dog_breed': 'German Shepherd', 'dog_extra_info': 'likes to play with other dogs'}
{'dog_name': 'Milo', 'dog_breed': 'border collie', 'dog_extra_info': 'lives close by'}


## Schema Pydantic

In [14]:
class Properties(BaseModel1):
    person_name: str
    person_height: int
    person_hair_color: str
    dog_breed: str | None
    dog_name: str | None

In [15]:
inp = """Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde."""

In [16]:
for model_name in model_names:
    llm = ChatOpenAI(openai_api_key=openai_api_key, model_name=model_name, temperature=0.0)
    chain = create_extraction_chain_pydantic(pydantic_schema=Properties, llm=llm)
    result = chain.invoke(inp)
    for k, v in result.items():
        print(k)
        if isinstance(v, list):
            for item in v:
                print(item)
        else:
            print(v)
        print("\n")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


input
Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.


text
person_name='Alex' person_height=60 person_hair_color='blonde' dog_breed=None dog_name=None
person_name='Claudia' person_height=72 person_hair_color='brunette' dog_breed=None dog_name=None


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


input
Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.


text
person_name='Alex' person_height=5 person_hair_color='blonde' dog_breed=None dog_name=None
person_name='Claudia' person_height=6 person_hair_color='brunette' dog_breed=None dog_name=None


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


input
Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.


text
person_name='Alex' person_height=5 person_hair_color='blonde' dog_breed=None dog_name=None
person_name='Claudia' person_height=6 person_hair_color='brunette' dog_breed=None dog_name=None


In [17]:
chain3t = create_extraction_chain_pydantic(pydantic_schema=Properties, llm=llm3_turbo, verbose=True)

In [18]:
result = chain3t.invoke(inp)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mHuman: Extract and save the relevant entities mentioned in the following passage together with their properties.

Only extract the properties mentioned in the 'information_extraction' function.

If a property is not present and is not required in the function parameters, do not include it in the output.

Passage:
Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.
[0m


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



[1m> Finished chain.[0m


In [19]:
result.get("text")

[Properties(person_name='Alex', person_height=5, person_hair_color='blonde', dog_breed=None, dog_name=None),
 Properties(person_name='Claudia', person_height=6, person_hair_color='brunette', dog_breed=None, dog_name=None)]

In [20]:
for p in result["text"]:
    print(p.dict())

{'person_name': 'Alex', 'person_height': 5, 'person_hair_color': 'blonde', 'dog_breed': None, 'dog_name': None}
{'person_name': 'Claudia', 'person_height': 6, 'person_hair_color': 'brunette', 'dog_breed': None, 'dog_name': None}


## Schema Pydantic Nested Objects

In [47]:
class Address(BaseModel1):
    street: str | None = Field1(description="Street, e.g. 100 Marlo S",)
    city: str = Field1(description="City, e.g. Boston", enum=["boston", "new York", "la"])
    state: str | None = Field1(description="State, e.g. MA")
    zipcode: str | None = Field1(description="Zip code e.g. 23232")
    country: str | None = Field1(description="A country in the world, e.g. USA")

In [48]:
class Person(BaseModel1):
    name: str = Field1(description="The full name of the person or partial name, e.g. Tom")
    from_address: List[Address] | None = Field1( description="Person moved away from this address")
    to_address: List[Address] | None = Field1(description="Address to which the person is moving")

In [49]:
inp = """Alice Doe and Bob Smith moved from New York to Boston. Andrew was 12 years old. He also moved to Boston. So did Joana and Paul. Betty did the opposite."""

In [50]:
for model_name in model_names:
    print(f"Model: {model_name}")
    llm = ChatOpenAI(openai_api_key=openai_api_key, model_name=model_name, temperature=0.0)
    results = create_extraction_chain_pydantic(pydantic_schema=Person, llm=llm).invoke(inp)
    text_results = results.get("text")
    if isinstance(text_results, list):
        for text_result in text_results:
            print(text_result)
    else:
        print(text_results)
    print("\n")

Model: gpt-4-1106-preview


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


name='Alice Doe' from_address=[Address(street=None, city='new York', state=None, zipcode=None, country=None)] to_address=[Address(street=None, city='boston', state=None, zipcode=None, country=None)]
name='Bob Smith' from_address=[Address(street=None, city='new York', state=None, zipcode=None, country=None)] to_address=[Address(street=None, city='boston', state=None, zipcode=None, country=None)]
name='Andrew' from_address=None to_address=[Address(street=None, city='boston', state=None, zipcode=None, country=None)]
name='Joana' from_address=None to_address=[Address(street=None, city='boston', state=None, zipcode=None, country=None)]
name='Paul' from_address=None to_address=[Address(street=None, city='boston', state=None, zipcode=None, country=None)]
name='Betty' from_address=[Address(street=None, city='boston', state=None, zipcode=None, country=None)] to_address=None


Model: gpt-3.5-turbo-1106


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


name='Alice Doe' from_address=[Address(street=None, city='new York', state=None, zipcode=None, country=None)] to_address=[Address(street=None, city='boston', state=None, zipcode=None, country=None)]
name='Bob Smith' from_address=[Address(street=None, city='new York', state=None, zipcode=None, country=None)] to_address=[Address(street=None, city='boston', state=None, zipcode=None, country=None)]
name='Andrew' from_address=None to_address=[Address(street=None, city='boston', state=None, zipcode=None, country=None)]
name='Joana' from_address=None to_address=[Address(street=None, city='boston', state=None, zipcode=None, country=None)]
name='Paul' from_address=None to_address=[Address(street=None, city='boston', state=None, zipcode=None, country=None)]
name='Betty' from_address=None to_address=None


Model: gpt-3.5-turbo


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


name='Alice Doe' from_address=[Address(street='', city='new York', state='', zipcode='', country='')] to_address=[Address(street='', city='boston', state='', zipcode='', country='')]
name='Bob Smith' from_address=[Address(street='', city='new York', state='', zipcode='', country='')] to_address=[Address(street='', city='boston', state='', zipcode='', country='')]
name='Andrew' from_address=[] to_address=[Address(street='', city='boston', state='', zipcode='', country='')]
name='Joana' from_address=[] to_address=[Address(street='', city='boston', state='', zipcode='', country='')]
name='Paul' from_address=[] to_address=[Address(street='', city='boston', state='', zipcode='', country='')]
name='Betty' from_address=[] to_address=[Address(street='', city='new York', state='', zipcode='', country='')]



In [21]:
chain4 = create_extraction_chain_pydantic(pydantic_schema=Person, llm=llm4_turbo, verbose=True)

In [22]:
result = chain4.invoke(inp)
result.get("text")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mHuman: Extract and save the relevant entities mentioned in the following passage together with their properties.

Only extract the properties mentioned in the 'information_extraction' function.

If a property is not present and is not required in the function parameters, do not include it in the output.

Passage:
Alice Doe and Bob Smith moved from New York to Boston. Andrew was 12 years old. He also moved to Boston. So did Joana and Paul. Betty did the opposite.
[0m


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



[1m> Finished chain.[0m


[Person(name='Alice Doe', from_address=[Address(street=None, city='New York', state=None, zipcode=None, country=None)], to_address=[Address(street=None, city='Boston', state=None, zipcode=None, country=None)]),
 Person(name='Bob Smith', from_address=[Address(street=None, city='New York', state=None, zipcode=None, country=None)], to_address=[Address(street=None, city='Boston', state=None, zipcode=None, country=None)]),
 Person(name='Andrew', from_address=None, to_address=[Address(street=None, city='Boston', state=None, zipcode=None, country=None)]),
 Person(name='Joana', from_address=None, to_address=[Address(street=None, city='Boston', state=None, zipcode=None, country=None)]),
 Person(name='Paul', from_address=None, to_address=[Address(street=None, city='Boston', state=None, zipcode=None, country=None)]),
 Person(name='Betty', from_address=[Address(street=None, city='Boston', state=None, zipcode=None, country=None)], to_address=None)]

In [45]:
inp = "Alice Doe and Bob Smith moved from New York to Boston. Bob later moved to LA."

In [46]:
for model_name in model_names:
    print(f"Model: {model_name}")
    llm = ChatOpenAI(openai_api_key=openai_api_key, model_name=model_name, temperature=0.0)
    results = create_extraction_chain_pydantic(pydantic_schema=Person, llm=llm).invoke(inp)
    text_results = results.get("text")
    if isinstance(text_results, list):
        for text_result in text_results:
            print(text_result)
    else:
        print(text_results)
    print("\n")

Model: gpt-4-1106-preview


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


name='Alice Doe' from_address=[Address(street=None, city='new York', state=None, zipcode=None, country=None)] to_address=[Address(street=None, city='boston', state=None, zipcode=None, country=None)]
name='Bob Smith' from_address=[Address(street=None, city='new York', state=None, zipcode=None, country=None), Address(street=None, city='boston', state=None, zipcode=None, country=None)] to_address=[Address(street=None, city='boston', state=None, zipcode=None, country=None), Address(street=None, city='la', state=None, zipcode=None, country=None)]


Model: gpt-3.5-turbo-1106


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


name='Alice Doe' from_address=[Address(street=None, city='new York', state=None, zipcode=None, country=None)] to_address=[Address(street=None, city='boston', state=None, zipcode=None, country=None)]
name='Bob Smith' from_address=[Address(street=None, city='new York', state=None, zipcode=None, country=None)] to_address=[Address(street=None, city='la', state=None, zipcode=None, country=None)]


Model: gpt-3.5-turbo


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


name='Alice Doe' from_address=[Address(street='', city='new York', state='', zipcode='', country='')] to_address=[Address(street='', city='boston', state='', zipcode='', country='')]
name='Bob Smith' from_address=[Address(street='', city='new York', state='', zipcode='', country='')] to_address=[Address(street='', city='boston', state='', zipcode='', country=''), Address(street='', city='la', state='', zipcode='', country='')]



# Parsing

In [10]:
class Person(BaseModel2):
    person_name: str
    person_height: int
    person_hair_color: str
    dog_breed: Optional[str]
    dog_name: Optional[str]


class People(BaseModel2):
    """Identifying information about all people in a text."""

    people: Sequence[Person]

In [11]:
# Run
query = """Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde."""

# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=People)

# Prompt
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

# Run
_input = prompt.format_prompt(query=query)
model = OpenAI(openai_api_key=openai_api_key, model_name="gpt-3.5-turbo-instruct", temperature=0.0)
output = model(_input.to_string())
parser.parse(output)

  warn_deprecated(
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/completions "HTTP/1.1 200 OK"


People(people=[Person(person_name='Alex', person_height=5, person_hair_color='blonde', dog_breed=None, dog_name=None), Person(person_name='Claudia', person_height=6, person_hair_color='brunette', dog_breed=None, dog_name=None)])

# Kor

In [39]:
import enum
# from langchain.chat_models import ChatOpenAI
# from langchain.llms import OpenAI
from kor import create_extraction_chain, Object, Text, from_pydantic
from kor.nodes import Object, Text, Number

In [40]:
llm = ChatOpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0,
    max_tokens=2000,
    model_kwargs = {
        'frequency_penalty':0,
        'presence_penalty':0,
        'top_p':1.0
    }
)

In [113]:
schema = Object(
    id="player",
    description=(
        "User is controlling a music player to select songs, pause or start them or play"
        " music by a particular artist."
    ),
    attributes=[
        Text(
            id="song",
            description="User wants to play this song",
            examples=[],
            many=True,
        ),
        Text(
            id="album",
            description="User wants to play this album",
            examples=[],
            many=True,
        ),
        Text(
            id="artist",
            description="Music by the given artist",
            examples=[("Songs by paul simon", "paul simon")],
            many=True,
        ),
        Text(
            id="action",
            description="Action to take one of: `play`, `stop`, `next`, `previous`.",
            examples=[
                ("Please stop the music", "stop"),
                ("play something", "play"),
                ("play a song", "play"),
                ("next song", "next"),
            ],
        ),
    ],
    many=False,
)

chain = create_extraction_chain(llm, schema, encoder_or_encoder_class='json')
chain.run("play songs by paul simon and led zeppelin and the doors")['data']

INFO:numexpr.utils:NumExpr defaulting to 8 threads.
  warn_deprecated(
  warn_deprecated(
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


{'player': {'artist': ['paul simon', 'led zeppelin', 'the doors']}}

## Nested Objects

In [26]:
from_address = Object(
    id="from_address",
    description="Person moved away from this address",
    attributes=[
        Text(id="street"),
        Text(id="city"),
        Text(id="state"),
        Text(id="zipcode"),
        Text(id="country", description="A country in the world; e.g., France."),
    ],
    examples=[
        (
            "100 Main St, Boston, MA, 23232, USA",
            {
                "street": "100 Marlo St",
                "city": "Boston",
                "state": "MA",
                "zipcode": "23232",
                "country": "USA",
            },
        )
    ],
)

to_address = from_address.replace(
    id="to_address", description="Address to which the person is moving"
)

schema = Object(
    id="information",
    attributes=[
        Text(
            id="person_name",
            description="The full name of the person or partial name",
            examples=[("John Smith was here", "John Smith")],
        ),
        from_address,
        to_address,
    ],
    many=True,
)

In [27]:
chain = create_extraction_chain(
    llm, schema, encoder_or_encoder_class="json", input_formatter=None
)

In [15]:
result = chain.run("Alice Doe moved from New York to Boston, MA while Bob Smith did the opposite.")["data"]
result

  warn_deprecated(
  warn_deprecated(
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


{'information': [{'person_name': 'Alice Doe',
   'from_address': {'city': 'New York'},
   'to_address': {'city': 'Boston', 'state': 'MA'}},
  {'person_name': 'Bob Smith',
   'from_address': {'city': 'Boston', 'state': 'MA'},
   'to_address': {'city': 'New York'}}]}

In [21]:
for info in result["information"]:
    pprint(info)
    print("\n")

{'from_address': {'city': 'New York'},
 'person_name': 'Alice Doe',
 'to_address': {'city': 'Boston', 'state': 'MA'}}


{'from_address': {'city': 'Boston', 'state': 'MA'},
 'person_name': 'Bob Smith',
 'to_address': {'city': 'New York'}}


In [28]:
result = chain.run(
    "Alice Doe and Bob Smith moved from New York to Boston. Andrew was 12 years"
    " old. He also moved to Boston. So did Joana and Paul. Betty did the opposite."
)["data"]

  warn_deprecated(
  warn_deprecated(
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [29]:
for info in result["information"]:
    pprint(info)
    print("\n")

{'from_address': {'city': 'New York'},
 'person_name': 'Alice Doe',
 'to_address': {'city': 'Boston'}}


{'from_address': {'city': 'New York'},
 'person_name': 'Bob Smith',
 'to_address': {'city': 'Boston'}}


{'person_name': 'Andrew', 'to_address': {'city': 'Boston'}}


{'person_name': 'Joana', 'to_address': {'city': 'Boston'}}


{'person_name': 'Paul', 'to_address': {'city': 'Boston'}}


{'from_address': {'city': 'Boston'},
 'person_name': 'Betty',
 'to_address': {'city': 'New York'}}


## Nested Lists

In [41]:
from_address = Object(
    id="from_address",
    description="Person moved away from this address",
    attributes=[
        Text(id="street"),
        Text(id="city"),
        Text(id="state"),
        Text(id="zipcode"),
        Text(id="country", description="A country in the world; e.g., France."),
    ],
    examples=[
        (
            "100 Main St, Boston,MA, 23232, USA",
            {
                "street": "100 Marlo St",
                "city": "Boston",
                "state": "MA",
                "zipcode": "23232",
                "country": "USA",
            },
        )
    ],
    many=True,  # <-- PLEASE NOTE THIS CHANGE
)

to_address = from_address.replace(
    id="to_address", description="Address to which the person is moving"
)

schema = Object(
    id="information",
    attributes=[
        Text(
            id="person_name",
            description="The full name of the person or partial name",
            examples=[("John Smith was here", "John Smith")],
        ),
        from_address,
        to_address,
    ],
    many=True,
)

In [42]:
chain = create_extraction_chain(llm, schema, encoder_or_encoder_class="json")

In [43]:
results = chain.run(
    "Alice Doe and Bob Smith moved from New York to Boston. Bob later moved to LA."
)["data"]

  warn_deprecated(
  warn_deprecated(
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [44]:
for info in results["information"]:
    pprint(info)
    print("\n")

{'from_address': [{'city': 'New York'}],
 'person_name': 'Alice Doe',
 'to_address': [{'city': 'Boston'}]}


{'from_address': [{'city': 'New York'}],
 'person_name': 'Bob Smith',
 'to_address': [{'city': 'Boston'}, {'city': 'LA'}]}
