In [1]:
## Tagging means labeling a document with classes such as:
# sentiment , language,  style (formal, informal etc.), covered topics, political tendency
# Tagging has a few components:
#     function: Like extraction, tagging uses functions to specify how the model should tag a document
#     schema: defines how we want to tag the document

In [2]:
from util.config import load_configuration

PROP_FILE = 'app.properties'
configs = load_configuration(PROP_FILE)

API_TYPE = configs.get("API_TYPE").data
API_VERSION = configs.get("API_VERSION").data
API_KEY = configs.get("API_KEY").data
API_BASE = configs.get("API_BASE").data
LLM_ENGINE_GPT35_16K=configs.get("LLM_ENGINE_GPT35_16K").data
ADA_MODEL=configs.get("ADA_MODEL").data

In [10]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import AzureChatOpenAI

tagging_prompt = ChatPromptTemplate.from_template(
    """
Extract the desired information from the following passage.

Only extract the properties mentioned in the 'Classification' function.

Passage:
{input}
"""
)


class Classification(BaseModel):
    sentiment: str = Field(description="The sentiment of the text")
    aggressiveness: int = Field(
        description="How aggressive the text is on a scale from 1 to 10"
    )
    language: str = Field(description="The language the text is written in")


# LLM
llm = AzureChatOpenAI(
            azure_endpoint=API_BASE,
            api_key=API_KEY,
            azure_deployment=LLM_ENGINE_GPT35_16K,
            openai_api_version=API_VERSION,
        ).with_structured_output(
            Classification
        )

tagging_chain = tagging_prompt | llm

In [11]:
inp = "Estoy increiblemente contento de haberte conocido! Creo que seremos muy buenos amigos!"
res = tagging_chain.invoke({"input": inp})

In [12]:
res.dict()

{'sentiment': 'positive', 'aggressiveness': 1, 'language': 'Spanish'}

In [14]:
## Finer control
class Classification(BaseModel):
    sentiment: str = Field(..., enum=["happy", "neutral", "sad"])
    aggressiveness: int = Field(
        ...,
        description="describes how aggressive the statement is, the higher the number the more aggressive",
        enum=[1, 2, 3, 4, 5],
    )
    language: str = Field(
        ..., enum=["spanish", "english", "french", "german", "italian"]
    )

tagging_prompt = ChatPromptTemplate.from_template(
    """
Extract the desired information from the following passage.

Only extract the properties mentioned in the 'Classification' function.

Passage:
{input}
"""
)

llm = AzureChatOpenAI(
            azure_endpoint=API_BASE,
            api_key=API_KEY,
            azure_deployment=LLM_ENGINE_GPT35_16K,
            openai_api_version=API_VERSION,
        ).with_structured_output(
            Classification
        )

chain = tagging_prompt | llm

In [15]:
inp = "Estoy increiblemente contento de haberte conocido! Creo que seremos muy buenos amigos!"
chain.invoke({"input": inp})

Classification(sentiment='happy', aggressiveness=1, language='spanish')

In [16]:
inp = "Weather is ok here, I can go outside without much more than a coat"
chain.invoke({"input": inp})

Classification(sentiment='neutral', aggressiveness=2, language='english')