# Tagging

Following: https://python.langchain.com/v0.2/docs/tutorials/classification/

In [1]:
from utils import load_openai_key

load_openai_key()

In [6]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI

tagging_prompt = ChatPromptTemplate.from_template(
    """
Extract the desired information from the following passage.

Only extract the properties mentioned in the 'Classification' function.

Passage:
{input}
"""
)


class Classification(BaseModel):
    sentiment: str = Field(description="The sentiment of the text")
    aggressiveness: int = Field(
        description="How aggressive the text is on a scale from 1 to 10"
    )
    language: str = Field(description="The language the text is written in")
    critique: list[str] = Field(description="Bullet points of what the movie could have done better")


# LLM
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0125").with_structured_output(
    Classification
)

tagging_chain = tagging_prompt | llm


In [7]:
inp = "Ich fande Dune 2 hatte leider viel verschenktes Potential. Aber paar weniger Plotpunkte, dafür aber Detail in dem was übrig bleibt wäre gut gewesen. Die Schauspieler waren gut, aber die Story war nicht so gut."

res = tagging_chain.invoke({"input": inp})
res.dict()

{'sentiment': 'negative',
 'aggressiveness': 3,
 'language': 'German',
 'critique': ['Potential was not fully utilized',
  'Too many plot points',
  'Lack of detail in the remaining plot',
  'Story was not strong'],
 'positive': ['Good actors']}

# Finer control

In [17]:
class Classification(BaseModel):
    sentiment: str = Field(description="The sentiment of the text", enum=['postive', 'negative', 'neutral'])
    aggressiveness: int = Field(
        description="How aggressive the text is. Higher number means more aggressive",
        enum=[1, 2, 3, 4, 5]
    )
    language: str = Field(description="The language the text is written in")
    critique: list[str] = Field(description="Bullet points of what the movie could have done better, in english")
    positive_aspects: list[str] = Field(description="Bullet points of what the movie did well, in english")

In [18]:
# LLM
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0125").with_structured_output(
    Classification
)

tagging_chain = tagging_prompt | llm

In [19]:
res = tagging_chain.invoke({"input": inp})
res.dict()

{'sentiment': 'negative',
 'aggressiveness': 3,
 'language': 'German',
 'critique': ['Potential wurde verschenkt',
  'Zu viele Plotpunkte',
  'Story war nicht so gut'],
 'positive_aspects': ['Gute Schauspieler']}

In [20]:
res = tagging_chain.invoke({"input": "Dune 2 was a great movie. I loved the story and the actors were amazing. I would definitely recommend it to my friends."})
res.dict()

{'sentiment': 'postive',
 'aggressiveness': 1,
 'language': 'english',
 'critique': [],
 'positive_aspects': ['Great movie',
  'Loved the story',
  'Amazing actors',
  'Recommend to friends']}

In [21]:
res = tagging_chain.invoke({"input": "Dune 2 var en fantastisk film. Jeg elskede historien, og skuespillerne var fantastiske. Jeg vil helt sikkert anbefale den til mine venner."})
res.dict()

{'sentiment': 'positive',
 'aggressiveness': 1,
 'language': 'Danish',
 'critique': [],
 'positive_aspects': ['Great story',
  'Fantastic actors',
  'Recommendation to friends']}