# Tagging and Extraction Using OpenAI functions

In [1]:
####.  Please use VirttualEnv: LCEL_inforetrieve
####.  Please use VirttualEnv: LCEL_inforetrieve
####.  Please use VirttualEnv: LCEL_inforetrieve

import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) 
print(os.environ["OPENAI_API_KEY"]),
OPENAI_API_KEY=os.environ["OPENAI_API_KEY"]


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


In [1]:
from langchain_openai import ChatOpenAI
from langchain.schema import StrOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableParallel


In [2]:
!pip freeze | grep langchain

langchain==0.2.15
langchain-core==0.2.41
langchain-openai==0.1.23
langchain-text-splitters==0.2.4


In [3]:
!pip freeze | grep openai

langchain-openai==0.1.23
openai==1.47.0


In [4]:
!pip freeze | grep pydantic

pydantic==2.9.2
pydantic_core==2.23.4


## Tagging

Before we used function to extract specific API parameters from a natural langugage input.
 
Here, we show that functions are very flexible. 

We use them to easily tag a piece of text with particular info.


In [5]:
from typing import List
from pydantic import BaseModel, Field
from langchain.utils.openai_functions import convert_pydantic_to_openai_function

In [6]:
class Tagging(BaseModel):
    """Tag the piece of text with particular info."""
    sentiment: str = Field(description="sentiment of text, should be `pos`, `neg`, or `neutral`")
    language: str = Field(description="language of text (should be ISO 639-1 code)")

In [7]:
convert_pydantic_to_openai_function(Tagging)
#convert_to_openai_function(Tagging)

  convert_pydantic_to_openai_function(Tagging)


{'name': 'Tagging',
 'description': 'Tag the piece of text with particular info.',
 'parameters': {'properties': {'sentiment': {'description': 'sentiment of text, should be `pos`, `neg`, or `neutral`',
    'type': 'string'},
   'language': {'description': 'language of text (should be ISO 639-1 code)',
    'type': 'string'}},
  'required': ['sentiment', 'language'],
  'type': 'object'}}

In [8]:
from langchain_openai import ChatOpenAI
from langchain.schema import StrOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableParallel


model = ChatOpenAI(temperature=0)
tagging_functions = [convert_pydantic_to_openai_function(Tagging)]
prompt = ChatPromptTemplate.from_messages([
    ("system", "Think carefully, and then tag the text as instructed."),
    ("human", "{input}"),
])
# We pass function_call to MAKE it call this function
model_with_functions = model.bind(functions=tagging_functions, function_call={"name":"Tagging"})
tagging_chain = prompt | model_with_functions
tagging_chain.invoke({"input": "I love LangChain"})

AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"sentiment":"pos","language":"en"}', 'name': 'Tagging'}, 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 108, 'total_tokens': 118, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-a490c550-0b72-4bf7-883a-50c97e132b5e-0', usage_metadata={'input_tokens': 108, 'output_tokens': 10, 'total_tokens': 118})

In [9]:
tagging_chain.invoke({"input": "non mi piace questo cibo"})

AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"sentiment":"neg","language":"it"}', 'name': 'Tagging'}, 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 111, 'total_tokens': 121, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-47d8df52-5439-4443-ae42-4091a8a043b1-0', usage_metadata={'input_tokens': 111, 'output_tokens': 10, 'total_tokens': 121})

We can use an output parser to automatically extract this

In [10]:
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
tagging_chain = prompt | model_with_functions | JsonOutputFunctionsParser()
tagging_chain.invoke({"input": "non mi piace questo cibo"})

{'sentiment': 'neg', 'language': 'it'}

## Extraction

Extraction is similar to tagging, but used for extracting multiple pieces of information.

In [11]:
from typing import Optional
class Person(BaseModel):
    """Information about a person."""
    name: str = Field(description="person's name")
    age: Optional[int] = Field(description="person's age")

In [12]:
class Information(BaseModel):
    """Information to extract."""
    people: List[Person] = Field(description="List of info about people")

In [13]:
extraction_functions = [convert_pydantic_to_openai_function(Information)]
extraction_model = model.bind(functions=extraction_functions, function_call={"name":"Information"})
extraction_model.invoke("Joe is 30. Joe's mom is Martha")

AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"people":[{"name":"Joe","age":30},{"name":"Martha","age":null}]}', 'name': 'Information'}, 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 21, 'prompt_tokens': 96, 'total_tokens': 117, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-6c5368c7-d588-452a-bd5c-0467ccbc7512-0', usage_metadata={'input_tokens': 96, 'output_tokens': 21, 'total_tokens': 117})

Similarly we can use a separate output parser to pluck that "Information" key, since that's the information we really care about

In [14]:
from langchain.output_parsers.openai_functions import JsonKeyOutputFunctionsParser
extraction_chain = extraction_model | JsonKeyOutputFunctionsParser(key_name="people")
extraction_chain.invoke("Joe is 30. Joe's mom is Martha")

[{'name': 'Joe', 'age': 30}, {'name': 'Martha', 'age': None}]

In [15]:
from langchain.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages([
    ("system", "Extract the relevant information, if not explicitly provided do not guess. Extract partial info"),
    ("human", "{input}")
])

In [16]:
extraction_chain = prompt | extraction_model | JsonKeyOutputFunctionsParser(key_name="people")

In [17]:
extraction_chain.invoke({"input": "Joe is 30. Joe's mom is Martha"})

[{'name': 'Joe', 'age': 30}, {'name': 'Martha', 'age': None}]