# Building LLM applications: Notebook 02
# Structured outputs

## Initialize

In [1]:
import os
import dotenv
import json

from langchain_ollama import ChatOllama
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser

In [2]:
MODEL = 'llama3.2:3b-instruct-fp16'

In [3]:
# Read fro `.env` file
dotenv.load_dotenv()

OLLAMA_URL = os.getenv('OLLAMA_URL')
print(f"Using Ollama server: {OLLAMA_URL if OLLAMA_URL else 'local'}")

Using Ollama server: http://kqrw311-g5-12xlarge-a.img.astrazeneca.net:8080


## Excercise 01: Structured outputs in `system` prompt (JSON)

In [12]:
messages_template = [
    ('system', "Answer the user's question in JSON format"),
    ('human', "{question}")
]

llm = ChatOllama(model=MODEL, base_url=OLLAMA_URL)
prompt = ChatPromptTemplate.from_messages(messages=messages_template)
runnable = prompt | llm

# Run the 'runnable'
ret = runnable.invoke({"question": "What are the top 3 most popular programming languages?"})
ret.pretty_print()

```
{
  "top_programming_languages": [
    {
      "rank": 1,
      "language": "JavaScript",
      "percentage": "19.42%"
    },
    {
      "rank": 2,
      "language": "Python",
      "percentage": "17.57%"
    },
    {
      "rank": 3,
      "language": "Java",
      "percentage": "9.44%"
    }
  ]
}
```

```
{
  "top_programming_languages": [
    {
      "rank": 1,
      "language": "JavaScript",
      "percentage": "19.42%"
    },
    {
      "rank": 2,
      "language": "Python",
      "percentage": "17.57%"
    },
    {
      "rank": 3,
      "language": "Java",
      "percentage": "9.44%"
    }
  ]
}
```


## Excercise 02: Structured outputs (`format="JSON"`)

In [5]:
prompt_template = """
Please answer the following question:

{question}

"""

In [13]:
llm = ChatOllama(model=MODEL
                 , base_url=OLLAMA_URL
                 , format="json"        # Set the format to JSON
                 )
prompt = PromptTemplate.from_template(prompt_template)
runnable = prompt | llm

# Run the 'runnable'
ret = runnable.invoke({"question": "What are the top 3 most popular programming languages?"})
ret.pretty_print()


{
  "2022": "Python is the leading language, followed by JavaScript and Java",
  "2021": "JavaScript took the top spot, followed by Python and C++",
  "2020": "Python was the clear leader in popularity, with JavaScript and Java following closely behind"

}


## Excercise 03: Structured outputs (Pydantic model)

In [7]:
# Create the LLM object
from pydantic import BaseModel
from enum import Enum


class LanguageType(Enum):
    COMPILED = "compiled"
    INTERPRETED = "interpreted"


class ProgrammingLanguage(BaseModel):
    rank: int
    name: str
    type: LanguageType


class ProgrammingLanguages(BaseModel):
    languages: list[ProgrammingLanguage]


ProgrammingLanguages.model_json_schema()

{'$defs': {'LanguageType': {'enum': ['compiled', 'interpreted'],
   'title': 'LanguageType',
   'type': 'string'},
  'ProgrammingLanguage': {'properties': {'rank': {'title': 'Rank',
     'type': 'integer'},
    'name': {'title': 'Name', 'type': 'string'},
    'type': {'$ref': '#/$defs/LanguageType'}},
   'required': ['rank', 'name', 'type'],
   'title': 'ProgrammingLanguage',
   'type': 'object'}},
 'properties': {'languages': {'items': {'$ref': '#/$defs/ProgrammingLanguage'},
   'title': 'Languages',
   'type': 'array'}},
 'required': ['languages'],
 'title': 'ProgrammingLanguages',
 'type': 'object'}

In [14]:
llm = ChatOllama(model=MODEL
                 , base_url=OLLAMA_URL
                 , format=ProgrammingLanguages.model_json_schema()  # Set the format usning a Pydantic Schema
                )
prompt = PromptTemplate.from_template(prompt_template)
runnable = prompt | llm

# Run the 'runnable'
ret = runnable.invoke({"question": "What are the top 3 most popular programming languages?"})
ret.pretty_print()


{
  "languages": [
    {
      "rank": 1,
      "name": "JavaScript"
    ,"type": "interpreted"
    },
    {
      "rank": 2,
      "name": "Python",
      "type":"interpreted"
    },
    {
      "rank": 3,
      "name": "Java",
      "type":"compiled" }
]
}


## Excercise 04: LLM with structured outputs `llm.with_structured_output(...)`

In [18]:
llm = ChatOllama(model=MODEL, base_url=OLLAMA_URL)
llm_structured = llm.with_structured_output(ProgrammingLanguages.model_json_schema())
prompt = PromptTemplate.from_template(prompt_template)
runnable = prompt | llm_structured

# Run the 'runnable'
ret = runnable.invoke({"question": "What are the top 3 most popular programming languages?"})
print(f"Return type: {type(ret)}\nContent: {ret}")

Return type: <class 'dict'>
Content: {'languages': ['Python', 'Java', 'JavaScript']}


## Excercise 05: Output parsers (JSON)

In [10]:
llm = ChatOllama(model=MODEL, base_url=OLLAMA_URL, format="json")
prompt = PromptTemplate.from_template(prompt_template)
parser = JsonOutputParser()
runnable = prompt | llm | parser

# Run the 'runnable'
ret = runnable.invoke({"question": "What are the top 3 most popular programming languages?"})
print(f"Return type: {type(ret)}\nContent: {json.dumps(ret, indent=2)}")

Return type: <class 'dict'>
Content: {}


## Excercise 06: Output parsers (Pydantic)

In [11]:
llm = ChatOllama(model=MODEL
                 , base_url=OLLAMA_URL
                 , format=ProgrammingLanguages.model_json_schema()
                 )
prompt = PromptTemplate.from_template(prompt_template)
parser = JsonOutputParser(pydantic_object=ProgrammingLanguages)
runnable = prompt | llm | parser

# Run the 'runnable'
ret = runnable.invoke({"question": "What are the top 3 most popular programming languages?"})
print(f"Return type: {type(ret)}\nContent: {json.dumps(ret, indent=2)}")

# Create the Pydantic object
object = ProgrammingLanguages(**ret)
print(f"Type: {type(object)}\nObject: {object}")

Return type: <class 'dict'>
Content: {
  "languages": [
    {
      "rank": 1,
      "name": "JavaScript",
      "type": "interpreted"
    },
    {
      "rank": 2,
      "name": "Python",
      "type": "interpreted"
    },
    {
      "rank": 3,
      "name": "Java",
      "type": "compiled"
    }
  ]
}
Type: <class '__main__.ProgrammingLanguages'>
Object: languages=[ProgrammingLanguage(rank=1, name='JavaScript', type=<LanguageType.INTERPRETED: 'interpreted'>), ProgrammingLanguage(rank=2, name='Python', type=<LanguageType.INTERPRETED: 'interpreted'>), ProgrammingLanguage(rank=3, name='Java', type=<LanguageType.COMPILED: 'compiled'>)]
