In [99]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Extraction

The tagging chain uses the OpenAI `functions` parameter to specify a schema to extract entities from a document.

In [100]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import create_extraction_chain, create_extraction_chain_pydantic
from langchain.prompts import ChatPromptTemplate

In [101]:
llm = ChatOpenAI(temperature=0, 
                   model="gpt-3.5-turbo-0613")

In [102]:
schema = {
      "properties": {
          "person_name": {"type": "string"}, 
          "person_height":{"type": "integer"},
          "person_hair_color": {"type": "string"},
          "dog_name": {"type": "string"},
          "dog_breed": {"type": "string"}
              },
        "required": ["person_name", "height"]
    }

In [103]:
inp = """
Alex is 5 feet tall. Claudia is 4 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.
Alex's dog Frosty is a labrador and likes to play hide and seek.
        """

In [104]:
chain = create_extraction_chain(schema, llm)

In [105]:
chain.run(inp)

[{'person_name': 'Alex',
  'person_height': 5,
  'person_hair_color': 'blonde',
  'dog_name': 'Frosty',
  'dog_breed': 'labrador'},
 {'person_name': 'Claudia',
  'person_height': 9,
  'person_hair_color': 'brunette',
  'dog_name': '',
  'dog_breed': ''}]

## Pydantic example

In [106]:
from typing import Optional, List
from pydantic import BaseModel, Field

In [119]:
class Properties(BaseModel):
    person_name: str
    person_height: str
    person_hair_color: str
    dog_breed: Optional[str]
    dog_name: Optional[str]

In [126]:
chain = create_extraction_chain_pydantic(pydantic_schema=Properties, llm=llm)

[{'name': 'information_extraction', 'description': 'Extracts the relevant information from the passage.', 'parameters': {'type': 'object', 'properties': {'info': {'type': 'array', 'items': {'type': 'object', 'properties': {'info': {'title': 'Info', 'type': 'array', 'items': {'title': 'Properties', 'type': 'object', 'properties': {'person_name': {'title': 'Person Name', 'type': 'string'}, 'person_height': {'title': 'Person Height', 'type': 'string'}, 'person_hair_color': {'title': 'Person Hair Color', 'type': 'string'}, 'dog_breed': {'title': 'Dog Breed', 'type': 'string'}, 'dog_name': {'title': 'Dog Name', 'type': 'string'}}, 'required': ['person_name', 'person_height', 'person_hair_color']}}}, 'required': ['info']}}}, 'required': ['info']}}]


In [127]:
inp = """
Alex is 5 feet tall. Claudia is 4 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.
Alex's dog Frosty is a labrador and likes to play hide and seek.
        """
chain.run(inp)

[Properties(person_name='Alex', person_height='5 feet', person_hair_color='blonde', dog_breed='labrador', dog_name='Frosty'),
 Properties(person_name='Claudia', person_height='4 feet taller Alex', person_hair_color='brunette', dog_breed=None, dog_name=None)]