In [1]:
import os

from dotenv import load_dotenv
from langchain_groq import ChatGroq


# Import Environment Variables
def set_tokens():
    global HUGGINGFACEHUB_API_TOKEN
    global GROQ_API_KEY
    load_dotenv()
    if os.getenv("HUGGINGFACEHUB_API_TOKEN") is not None:
        HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
        print(f"Token is added")
    if os.getenv("GROQ_API_KEY") is not None:
        GROQ_API_KEY = os.environ["GROQ_API_KEY"]
    if (os.getenv("AZURE_OPENAI_ENDPOINT") is not None
            or os.getenv("AZURE_OPENAI_API_KEY") is not None):
        AZURE_OPENAI_ENDPOINT = os.environ["AZURE_OPENAI_ENDPOINT"]
        AZUER_OPENAI_API_KEY = os.environ["AZURE_OPENAI_API_KEY"]
    else:
        raise Exception("No API Token Provided!")


set_tokens()

Token is added


### Image assessment

In [17]:
import base64
from IPython.display import Image
from langchain_core.messages import HumanMessage, AIMessage
from functions import use_huggingface_endpoint


def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')


def get_image_caption2(image_path):
    """Generates a short caption for the provided image...."""

    encoded_image = encode_image(image_path)

    model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"

    prompt = [
        AIMessage(content="You are a bot that is good at analyzing images."),
        HumanMessage(content=[
            {"type": "text", "text": "Describe the contents of this image."},
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image;base64,{encoded_image}"
                },
            },
        ])
    ]

    llm = use_huggingface_endpoint(model_name, 0.3)
    max_new_tokens = 1024

    response = llm.bind(max_tokens=max_new_tokens).invoke(prompt)
    return response.content


image_path = "../data/example_aircraft.png"
Image(image_path)
get_image_caption2(image_path)

Token is added


"The image shows a white Swiss International Air Lines passenger plane taxiing on the runway, likely preparing for takeoff. The all-white plane features red lettering reading 'SWISS' on its side with a red and white Swiss cross on its red tail. It has a door near its front with several windows down the side. Small wheels protrude from underneath the body of the plane, with black wheels housing those on the very front. There appears to be shadow cast on the ground in front of the plane, possibly from another plane.\n\nThe background features a building, likely an airport. The building is white, with tall, dark windows within. The sky is a clear blue, showing no clouds, and a forest is visible to the left of the building."

### Classification and structured output

In [33]:

from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field

tagging_prompt = ChatPromptTemplate.from_template(
"""
Extract the desired information from the following passage.

Only extract the properties mentioned in the 'Classification' function.

Passage:
{input}
"""
)


class Classification(BaseModel):
    sentiment: str = Field(..., enum=["happy", "neutral", "sad"])
    aggressiveness: int = Field(
        ...,
        description="describes how aggressive the statement is, the higher the number the more aggressive",
        enum=[1, 2, 3, 4, 5],
    )
    language: str = Field(
        ..., enum=["spanish", "english", "french", "german", "italian"]
    )


tagging_prompt = ChatPromptTemplate.from_template(
"""
Extract the desired information from the following passage.

Only extract the properties mentioned in the 'Classification' function.

Passage:
{input}
"""
)
# LLM
llm = ChatGroq(temperature= 0, model="llama-3.1-8b-instant").with_structured_output(
    Classification
)

chain = tagging_prompt | llm

In [48]:
input = ("Salut, comment ça va ?")
chain.invoke({"input": input})

Classification(sentiment='neutral', aggressiveness=1, language='french')