# Structured Output

In [None]:
from devtools import debug
from dotenv import load_dotenv
import os
import sys

project_root = '/mnt/c/Users/a884470/prj/genai-blueprint-main'  # Change this if needed
sys.path.append(os.path.join(project_root, 'python'))
load_dotenv(verbose=True)


#!export PYTHONPATH="/mnt/c/Users/a884470/prj/genai-blueprint-main"

True

### Method 1 : provide instruction in the prompt

In [26]:
from langchain.output_parsers import PydanticOutputParser

from pydantic import BaseModel, Field

In [29]:
"""
The usual "tell me a joke" LLM call.
"""

from ai_core.llm import get_llm  # noqa: E402
from ai_core.prompts import def_prompt # noqa: E402


class Joke(BaseModel):
    the_joke: str = Field(description="a good joke")
    explanation: str = Field(description="explain why it's funny")
    rate: float = Field(description="rate how the joke is funny between 0 and 5")


parser = PydanticOutputParser(pydantic_object=Joke)

prompt_with_format = """
    tell me  a joke on {topic}     
    --- 
    {format_instructions}"""

structured_prompt = def_prompt(user=prompt_with_format).partial(
    format_instructions=parser.get_format_instructions(),
)

LLM_ID = None
structured_joke = structured_prompt | get_llm(llm_id=LLM_ID, json_mode=True) | parser

r = structured_joke.invoke({"topic": "cat"})
debug(r)

[32m2024-11-13 17:53:32.154[0m | [1mINFO    [0m | [36mconfig[0m:[36myaml_file_config[0m:[36m43[0m - [1mload /mnt/c/Users/a884470/prj/genai-blueprint-main/app_conf.yaml[0m
[32m2024-11-13 17:53:32.199[0m | [1mINFO    [0m | [36mconfig[0m:[36myaml_file_config[0m:[36m52[0m - [1mOverride config from env. variable: azure[0m
[32m2024-11-13 17:53:32.254[0m | [1mINFO    [0m | [36mai_core.llm[0m:[36mget_llm[0m:[36m409[0m - [1mget LLM:'gpt_4_azure' -json_mode[0m


/tmp/ipykernel_30663/2095564118.py:30 <module>
    r: Joke(
        the_joke='Why was the cat sitting on the computer? Because it wanted to keep an eye on the mouse!',
        explanation=(
            "This joke is a play on words. Cats are known for chasing mice, and 'mouse' also refers to the computer ac"
            "cessory used to navigate the cursor on the screen. The humor comes from the double meaning of the word 'm"
            "ouse' and the image of a cat literally watching a computer mouse."
        ),
        rate=3.5,
    ) (Joke)


Joke(the_joke='Why was the cat sitting on the computer? Because it wanted to keep an eye on the mouse!', explanation="This joke is a play on words. Cats are known for chasing mice, and 'mouse' also refers to the computer accessory used to navigate the cursor on the screen. The humor comes from the double meaning of the word 'mouse' and the image of a cat literally watching a computer mouse.", rate=3.5)

In [30]:
debug(structured_prompt)

/tmp/ipykernel_30663/2639052080.py:1 <module>
    structured_prompt: ChatPromptTemplate(
        input_variables=['topic'],
        input_types={},
        partial_variables={
            'format_instructions': (
                'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n'
                '\n'
                'As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strin'
                'gs", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\n'
                'the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"propertie'
                's": {"foo": ["bar", "baz"]}} is not well-formatted.\n'
                '\n'
                'Here is the output schema:\n'
                '```\n'
                '{"properties": {"the_joke": {"description": "a good joke", "title": "The Joke", "type": "string"}, "e'
                'xplanation": {"descri

ChatPromptTemplate(input_variables=['topic'], input_types={}, partial_variables={'format_instructions': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"the_joke": {"description": "a good joke", "title": "The Joke", "type": "string"}, "explanation": {"description": "explain why it\'s funny", "title": "Explanation", "type": "string"}, "rate": {"description": "rate how the joke is funny between 0 and 5", "title": "Rate", "type": "number"}}, "required": ["the_joke", "explanation", "rate"]}\n```'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variable

In [31]:
# You can have a look at the generated prompt:
print(structured_prompt.invoke({"topic": "cat"}).messages[0].content)

tell me  a joke on cat     
--- 
The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"the_joke": {"description": "a good joke", "title": "The Joke", "type": "string"}, "explanation": {"description": "explain why it's funny", "title": "Explanation", "type": "string"}, "rate": {"description": "rate how the joke is funny between 0 and 5", "title": "Rate", "type": "number"}}, "required": ["the_joke", "explanation", "rate"]}
```


### Method #2 : Use "with_structured_output"  (bases on function calls)

In [32]:
prompt = "tell me  a joke on {topic}"

# MODEL = None
MODEL = "gpt_4_azure"
chain = def_prompt(prompt) | get_llm(llm_id=MODEL).with_structured_output(Joke)
debug(chain.invoke(({"topic": "cat"})))

[32m2024-11-13 17:58:56.225[0m | [1mINFO    [0m | [36mai_core.llm[0m:[36mget_llm[0m:[36m409[0m - [1mget LLM:'gpt_4_azure'[0m


/tmp/ipykernel_30663/925907821.py:6 <module>
    chain.invoke(({"topic": "cat"})): Joke(
        the_joke='Why was the cat sitting on the computer? Because it wanted to keep an eye on the mouse!',
        explanation=(
            "This joke is a play on words. In the context of computers, a 'mouse' is a device used to navigate the cur"
            "sor. However, in the context of cats, a 'mouse' is a small rodent that they often chase. The humor comes "
            "from the double meaning of the word 'mouse' and the image of a cat literally sitting on a computer to wat"
            'ch a computer mouse, as if it were a real mouse.'
        ),
        rate=3.0,
    ) (Joke)


Joke(the_joke='Why was the cat sitting on the computer? Because it wanted to keep an eye on the mouse!', explanation="This joke is a play on words. In the context of computers, a 'mouse' is a device used to navigate the cursor. However, in the context of cats, a 'mouse' is a small rodent that they often chase. The humor comes from the double meaning of the word 'mouse' and the image of a cat literally sitting on a computer to watch a computer mouse, as if it were a real mouse.", rate=3.0)

##  Assignement (Optional)
Rate the above joke.
Use https://python.langchain.com/v0.1/docs/modules/model_io/output_parsers/types/enum/ 


In [33]:
from enum import Enum


class JokeRater(Enum):
    NOT_SO_GOOD = 0
    GOOD = 1
    VERY_GOOD = 2