# Tagging and Extraction Using OpenAI functions

In [1]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) 
OPENAI_API_KEY=os.environ["OPENAI_API_KEY"]


In [2]:
!pip freeze | grep langchain

!pip freeze | grep openai

!pip freeze | grep pydantic


langchain==0.2.15
langchain-community==0.2.13
langchain-core==0.2.41
langchain-openai==0.1.23
langchain-text-splitters==0.2.4
langchain-openai==0.1.23
openai==1.47.0
openapi-schema-pydantic==1.2.4
pydantic==2.9.2
pydantic-settings==2.5.2
pydantic_core==2.23.4


## Tagging

Before we used function to extract specific API parameters from a natural langugage input.
 
Here, we show that functions are very flexible. 

We use them to easily tag a piece of text with particular info.


In [3]:
from typing import List
from pydantic import BaseModel, Field
from langchain.utils.openai_functions import convert_pydantic_to_openai_function

In [4]:
class Tagging(BaseModel):
    """Tag the piece of text with particular info."""
    sentiment: str = Field(description="sentiment of text, should be `pos`, `neg`, or `neutral`")
    language: str = Field(description="language of text (should be ISO 639-1 code)")

In [5]:
convert_pydantic_to_openai_function(Tagging)
#convert_to_openai_function(Tagging)

  convert_pydantic_to_openai_function(Tagging)


{'name': 'Tagging',
 'description': 'Tag the piece of text with particular info.',
 'parameters': {'properties': {'sentiment': {'description': 'sentiment of text, should be `pos`, `neg`, or `neutral`',
    'type': 'string'},
   'language': {'description': 'language of text (should be ISO 639-1 code)',
    'type': 'string'}},
  'required': ['sentiment', 'language'],
  'type': 'object'}}

In [6]:
from langchain_openai import ChatOpenAI
from langchain.schema import StrOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser


model = ChatOpenAI(temperature=0)
tagging_functions = [convert_pydantic_to_openai_function(Tagging)]
prompt = ChatPromptTemplate.from_messages([
    ("system", "Think carefully, and then tag the text as instructed."),
    ("human", "{input}"),
])
# We pass function_call to MAKE it call this function
model_with_functions = model.bind(functions=tagging_functions, function_call={"name":"Tagging"})
tagging_chain = prompt | model_with_functions | JsonOutputFunctionsParser()
tagging_chain.invoke({"input": "I love LangChain"})

{'sentiment': 'pos', 'language': 'en'}

In [7]:
tagging_chain.invoke({"input": "non mi piace questo cibo"})

{'sentiment': 'neg', 'language': 'it'}

We can use an output parser to automatically extract this

In [8]:
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
tagging_chain = prompt | model_with_functions | JsonOutputFunctionsParser()
tagging_chain.invoke({"input": "non mi piace questo cibo"})

{'sentiment': 'neg', 'language': 'it'}