# Tutorial - parse_yaml
- Modified: 7 Nov 2025
- Compatible with pydantic v2.12.4
- Parses the LLM output as a YAML, and converts it to a python dictionary for easy access
- Uses concise `output_format` to save tokens
- Converts `output_format` into pydantic schema automatically, and uses pydantic to validate output
- Able to process datatypes: `int`, `float`, `str`, `bool`, `list`, `dict`, `date`, `datetime`, `time`, `UUID`, `Decimal`
- Able to process: `None`, `Any`, `Union`, `Optional`
- Default datatype when not specified is `Any`
- Error correction of up to `num_tries` times (default: 3)

In [1]:
import os
from strictjson import parse_yaml, parse_yaml_async

In [2]:
from dotenv import load_dotenv
# put your LLM API keys in your .env
load_dotenv()

True

In [3]:
# MODEL = "anthropic/claude-3.7-sonnet"
# MODEL = "anthropic/claude-3.5-sonnet"
# MODEL = "openai/o3-mini"
# MODEL = "openai/o1-mini"
# MODEL = "openai/gpt-4o"
# MODEL = "openai/gpt-4o-mini" 
# MODEL = "meta-llama/llama-3.3-70b-instruct"
# MODEL = "meta-llama/llama-3.2-90b-vision-instruct"
# MODEL = "meta-llama/llama-3.1-70b-instruct"
# MODEL = "deepseek/deepseek-chat"
MODEL = "google/gemini-2.5-flash"

# Define LLMs
- LLMs take in a `system_prompt` and a `user_prompt` and outputs a `str`

In [4]:
# !pip install openai

In [5]:
def llm(system_prompt: str, user_prompt: str, **kwargs):
    ''' Use your favourite LLM here - we use OpenRouter here to test various LLMs '''

    # make sure to install the relevant packages
    from openai import OpenAI

    client = OpenAI(
        base_url="https://openrouter.ai/api/v1",
        api_key=os.environ["OPENROUTER_API_KEY"],
    )

    messages = []
    if system_prompt:
        messages.append({
            "role": "system",
            "content": [{"type": "text", "text": system_prompt}]
        })
    messages.append({
        "role": "user",
        "content": user_prompt
    })

    # change the model as you wish
    response = client.chat.completions.create(
        model=MODEL,
        messages=messages
    )
    return response.choices[0].message.content

In [6]:
async def llm_async(system_prompt: str, user_prompt: str, **kwargs):
    ''' Use your favourite LLM here - we use OpenRouter here to test various LLMs '''
    from openai import AsyncOpenAI

    client = AsyncOpenAI(
        base_url="https://openrouter.ai/api/v1",
        api_key=os.environ["OPENROUTER_API_KEY"],
    )

    messages = []
    if system_prompt:
        messages.append({
            "role": "system",
            "content": [{"type": "text", "text": system_prompt}]
        })
    messages.append({
        "role": "user",
        "content": user_prompt
    })

    # change the model as you wish
    response = await client.chat.completions.create(
        model=MODEL,
        messages=messages
    )
    return response.choices[0].message.content

# Basic Syntax
- `system_prompt`: Instructions for the LLM
- `user_prompt`: Content to perform instructions on
- `output_format`: Concise description-type infused dictionary to specify format LLM should output in. If type is given, should specify it as a standalone string, or after a comma (e.g. `"int"` or `"an age, int"` or `"type: int"` or `"an age, type: int"`)
- `llm`: Your llm that takes in `system_prompt` and `user_prompt` and outputs a `str`

# Two main ways to use parse_yaml

### Method 1: Using the pydantic model automatically generated via output_format (preferred)

In [7]:
parse_yaml(system_prompt = "You are a helpful assistant",
    user_prompt = "Generate a birthday event for Alex",
    output_format = {"name": "str",
                     "date": "date",
                     "participants": [{'Name': 'str', 
                                       'Age': 'int'}]}, 
                    llm = llm, debug = True)

## Concise YAML format used by parse_yaml:
name: str
date: date
participants:
- Name: str
  Age: int



## Equivalent YAML Schema:
$defs:
  PydanticModelCcde8112d90e4339a1f5a94cebe3385cParticipants:
    additionalProperties: false
    properties:
      Age:
        title: Age
        type: integer
      Name:
        title: Name
        type: string
    required:
    - Name
    - Age
    title: PydanticModelCcde8112d90e4339a1f5a94cebe3385cParticipants
    type: object
additionalProperties: false
properties:
  date:
    format: date
    title: Date
    type: string
  name:
    title: Name
    type: string
  participants:
    items:
      $ref: '#/$defs/PydanticModelCcde8112d90e4339a1f5a94cebe3385cParticipants'
    title: Participants
    type: array
required:
- name
- date
- participants
title: pydantic_model_ccde8112d90e4339a1f5a94cebe3385c
type: object



## LLM Result: ```yaml
name: Birthday Party for Alex
date: 2024-08-15
participants:
- Name: Alex
  Age: 30
- Name: Sarah
  Age: 29


{'name': 'Birthday Party for Alex',
 'date': datetime.date(2024, 8, 15),
 'participants': [{'Name': 'Alex', 'Age': 30},
  {'Name': 'Sarah', 'Age': 29},
  {'Name': 'Mike', 'Age': 31}]}

### Method 2: Using the pydantic model specified in `parse_yaml` input (only use if you have nested classes)

In [8]:
# Generate pydantic model
from typing import List, Dict, Union
from pydantic import BaseModel, Field
from datetime import date

class Participant(BaseModel):
    Name: str
    Age: int

class CalendarEvent(BaseModel):
    name: str
    date: date
    participants: List[Participant]

In [9]:
parse_yaml(system_prompt = "You are a helpful assistant",
    user_prompt = "Generate a birthday event for Alex", 
    pydantic_model = CalendarEvent,
    llm = llm,
    debug = True)

## Concise YAML format used by parse_yaml:
name: str
date: date
participants:
- Name: str
  Age: int



## Equivalent YAML Schema:
$defs:
  Participant:
    properties:
      Age:
        title: Age
        type: integer
      Name:
        title: Name
        type: string
    required:
    - Name
    - Age
    title: Participant
    type: object
properties:
  date:
    format: date
    title: Date
    type: string
  name:
    title: Name
    type: string
  participants:
    items:
      $ref: '#/$defs/Participant'
    title: Participants
    type: array
required:
- name
- date
- participants
title: CalendarEvent
type: object



## LLM Result: ```yaml
name: Birthday Party for Alex
date: 2024-10-26
participants:
- Name: Alex
  Age: 30
- Name: Maria
  Age: 29
- Name: David
  Age: 31
```


## Parsed YAML before type checks:
name: Birthday Party for Alex
date: 2024-10-26
participants:
- Name: Alex
  Age: 30
- Name: Maria
  Age: 29
- Name: David
  Age: 31



{'name': 'Birthday Party for Alex',
 'date': datetime.date(2024, 10, 26),
 'participants': [{'Name': 'Alex', 'Age': 30},
  {'Name': 'Maria', 'Age': 29},
  {'Name': 'David', 'Age': 31}]}

# Add in descriptions for fields
- Add in descriptions within the values of the fields

### Method 1: Using the pydantic model automatically generated via output_format (preferred)

In [10]:
parse_yaml(system_prompt = "You are a helpful assistent",
    user_prompt = "Generate a birthday event for Alex",
    output_format = {"name": "Name of birthday party, str",
                     "date": "Any date in Mar 2026, date",
                     "participants": [{'Name': 'starting with A, str', 
                                       'Age': 'between 5 to 12, int'}]}, 
                    llm = llm,
                    debug = True)

## Concise YAML format used by parse_yaml:
name: Name of birthday party, str
date: Any date in Mar 2026, date
participants:
- Name: starting with A, str
  Age: between 5 to 12, int



## Equivalent YAML Schema:
$defs:
  PydanticModelDeb369dd76c64a19868ec2a064d2d43aParticipants:
    additionalProperties: false
    properties:
      Age:
        description: between 5 to 12
        title: Age
        type: integer
      Name:
        description: starting with A
        title: Name
        type: string
    required:
    - Name
    - Age
    title: PydanticModelDeb369dd76c64a19868ec2a064d2d43aParticipants
    type: object
additionalProperties: false
properties:
  date:
    description: Any date in Mar 2026
    format: date
    title: Date
    type: string
  name:
    description: Name of birthday party
    title: Name
    type: string
  participants:
    items:
      $ref: '#/$defs/PydanticModelDeb369dd76c64a19868ec2a064d2d43aParticipants'
    title: Participants
    type: array
required:

{'name': "Alex's Birthday Bash",
 'date': datetime.date(2026, 3, 15),
 'participants': [{'Name': 'Alice', 'Age': 7},
  {'Name': 'Aaron', 'Age': 9},
  {'Name': 'Amy', 'Age': 6}]}

### Method 2: Using the pydantic model specified in `parse_yaml` input (only use if you have nested classes)

In [11]:
from typing import List
from pydantic import BaseModel, Field
from datetime import date as Date

class Participant(BaseModel):
    Name: str = Field(..., pattern=r"^A.*$", description="starting with A")
    Age: int = Field(..., ge=5, le=12, description="between 5 to 12")

class CalendarEvent(BaseModel):
    name: str = Field(..., description="Name of birthday party")
    date: Date = Field(..., description="Any date in March 2026")
    participants: List[Participant]

In [12]:
parse_yaml(system_prompt = "You are a helpful assistent",
    user_prompt = "Generate a birthday event for Alex", 
    pydantic_model = CalendarEvent,
    llm = llm,
    debug = True)

## Concise YAML format used by parse_yaml:
name: Name of birthday party, str
date: Any date in March 2026, date
participants:
- Name: starting with A, str
  Age: between 5 to 12, int



## Equivalent YAML Schema:
$defs:
  Participant:
    properties:
      Age:
        description: between 5 to 12
        maximum: 12
        minimum: 5
        title: Age
        type: integer
      Name:
        description: starting with A
        pattern: ^A.*$
        title: Name
        type: string
    required:
    - Name
    - Age
    title: Participant
    type: object
properties:
  date:
    description: Any date in March 2026
    format: date
    title: Date
    type: string
  name:
    description: Name of birthday party
    title: Name
    type: string
  participants:
    items:
      $ref: '#/$defs/Participant'
    title: Participants
    type: array
required:
- name
- date
- participants
title: CalendarEvent
type: object



## LLM Result: ```yaml
name: Alex's Birthday Bash
date: 2026-03-1

{'name': "Alex's Birthday Bash",
 'date': datetime.date(2026, 3, 15),
 'participants': [{'Name': 'Alex', 'Age': 8},
  {'Name': 'Amy', 'Age': 7},
  {'Name': 'Andrew', 'Age': 9}]}

# Async Method
- Note: Make sure your llm is awaitable (async function)

In [13]:
await parse_yaml_async(system_prompt = "You are a helpful assistent",
    user_prompt = "Generate a birthday event for Alex",
    output_format = {"name": "Name of birthday party, str",
                     "date": "Any date in Mar 2026, date",
                     "participants": [{'Name': 'starting with A, str', 
                                       'Age': 'between 5 to 12, int'}]}, 
                    llm = llm_async)

{'name': "Alex's Birthday Bash",
 'date': datetime.date(2026, 3, 15),
 'participants': [{'Name': 'Alex', 'Age': 8},
  {'Name': 'Ashley', 'Age': 7},
  {'Name': 'Andrew', 'Age': 9},
  {'Name': 'Alice', 'Age': 6}]}

# Info Extraction

## Method 1: Output Format

In [14]:
invoice_str = '''
=========================================
                   INVOICE
=========================================
Invoice Number: 1234
Date of Invoice: 23 Mar 1990
-----------------------------------------
Line Items:
-----------------------------------------
| Description    | Qty | Unit Price | Line Total |
-----------------------------------------
| Product A      |  2  |   $10.00   |   $20.00   |
| Product B      |  1  |   $15.00   |   $15.00   |
| Service C      |  3  |    $7.50   |   $22.50   |
| Item D         |  5  |    $4.00   |   $20.00   |
| Service E      |  1  |   $30.00   |   $30.00   |
-----------------------------------------
Subtotal:                         $107.50
Tax (10%):                        $10.75
-----------------------------------------
Total:                            $118.25
=========================================
       Thank you for your business!
=========================================
'''

In [15]:
parse_yaml(system_prompt = 'Extract details of the invoice',
          user_prompt = invoice_str,
          output_format = {
            "Invoice Number": "int",
            "Invoice Date (datetime obj)": "date",
            "Invoice Date (str)": "DD-MMM-YYYY, str",
            "Invoice Due Date": "Optional[date]",
            "Line Items": [
                {
                    "Description": "str",
                    "Quantity": "int",
                    "Unit Price": "float",
                    "Total Price": "float"
                }
            ],
            "Tax": "Tax amount, if any, Optional[float]",
            "Total Value": "Total value for invoice, float"
        },
           llm = llm)

{'Invoice Number': 1234,
 'Invoice Date (datetime obj)': datetime.date(1990, 3, 23),
 'Invoice Date (str)': '23-Mar-1990',
 'Invoice Due Date': None,
 'Line Items': [{'Description': 'Product A',
   'Quantity': 2,
   'Unit Price': 10.0,
   'Total Price': 20.0},
  {'Description': 'Product B',
   'Quantity': 1,
   'Unit Price': 15.0,
   'Total Price': 15.0},
  {'Description': 'Service C',
   'Quantity': 3,
   'Unit Price': 7.5,
   'Total Price': 22.5},
  {'Description': 'Item D',
   'Quantity': 5,
   'Unit Price': 4.0,
   'Total Price': 20.0},
  {'Description': 'Service E',
   'Quantity': 1,
   'Unit Price': 30.0,
   'Total Price': 30.0}],
 'Tax': 10.75,
 'Total Value': 118.25}

## Method 2: Use Pydantic Model

In [16]:
from typing import List, Optional
from datetime import date
from pydantic import BaseModel, Field

class LineItem(BaseModel):
    description: str = Field(..., alias="Description")
    quantity: int = Field(..., alias="Quantity")
    unit_price: float = Field(..., alias="Unit Price")
    total_price: float = Field(..., alias="Total Price")


class Invoice(BaseModel):
    invoice_number: int = Field(..., alias="Invoice Number")
    invoice_date_datetime_obj: date = Field(..., alias="Invoice Date (datetime obj)")
    invoice_date_str: str = Field(..., alias="Invoice Date (str)", description="DD-MMM-YYYY")
    invoice_due_date: Optional[date] = Field(None, alias="Invoice Due Date")
    line_items: List[LineItem] = Field(..., alias="Line Items")
    tax: Optional[float] = Field(None, alias="Tax", description="Tax amount, if any")
    total_value: float = Field(..., alias="Total Value", description="Total value for invoice")

In [17]:
parse_yaml(system_prompt = 'Extract details of the invoice',
        user_prompt = invoice_str,
        pydantic_model = Invoice,
        llm = llm)

{'Invoice Number': 1234,
 'Invoice Date (datetime obj)': datetime.date(1990, 3, 23),
 'Invoice Date (str)': '23-Mar-1990',
 'Invoice Due Date': None,
 'Line Items': [{'Description': 'Product A',
   'Quantity': 2,
   'Unit Price': 10.0,
   'Total Price': 20.0},
  {'Description': 'Product B',
   'Quantity': 1,
   'Unit Price': 15.0,
   'Total Price': 15.0},
  {'Description': 'Service C',
   'Quantity': 3,
   'Unit Price': 7.5,
   'Total Price': 22.5},
  {'Description': 'Item D',
   'Quantity': 5,
   'Unit Price': 4.0,
   'Total Price': 20.0},
  {'Description': 'Service E',
   'Quantity': 1,
   'Unit Price': 30.0,
   'Total Price': 30.0}],
 'Tax': 10.75,
 'Total Value': 118.25}

# Long Context processing

In [18]:
text = '''SINGAPORE – Singapore will study the potential deployment of nuclear power here and take further steps to systematically build up capabilities in this area, Prime Minister Lawrence Wong said on Feb 18.

Noting that interest in nuclear energy is increasing worldwide, with several countries within the region planning to include nuclear in their energy mix, PM Wong said Singapore will need new capabilities to evaluate options and consider if there is a solution that the island-state can deploy in a safe, cost-effective way. 

Malaysia and Indonesia, for example, have operated research reactors for some time, he noted. 


ADVERTISING


“These capabilities will also be needed for nuclear safety, which will become more salient given the growing regional interest in nuclear power,” said PM Wong. 

The Government will also pump in another $5 billion into its existing Future Energy Fund to support Singapore’s efforts to secure clean power, he said in his Budget speech.

“Be it electricity imports, hydrogen or nuclear, we will need to make major investments in new infrastructure,” added PM Wong, who is also Finance Minister.


The Future Energy Fund was announced during Budget 2024 with an initial $5 billion investment. It was set up to catalyse investments into clean energy technology that may involve high upfront costs and significant commercial, technological and geopolitical risks. 


Catch up on the news that everyone’s talking about
Enter your e-mail
 Sign up
By signing up, I accept SPH Media's Terms & Conditions and Privacy Policy as amended from time to time.


Yes, I would also like to receive SPH Media Group's SPH Media Limited, its related corporations and affiliates as well as their agents and authorised service providers. marketing and promotions.
The fund is part of Singapore’s efforts to address its resource constraints. 

PM Wong cited how the Republic overcame its water challenges through innovations such as recycling used water to form Newater, and building up its water industry. 


“Today, we face a different challenge. The industries of the future – artificial intelligence, semiconductors, biopharmaceuticals – are highly energy-intensive. To meet these growing energy needs and to bring down our carbon emissions at the same time, we will need more clean power,” he said.

“Expanding access to clean energy is therefore a major national imperative.” 

Singapore has not made a decision to adopt nuclear energy. But given that the Republic has limitations in accessing renewable energy, nuclear is among various low-carbon sources that the country is looking into amid considerations of the nation’s energy security, affordability and carbon footprint. 

“Our options are inherently limited because we do not have the natural resources nor the land to meet our needs using hydro, wind or solar power,” PM Wong said.


Singapore now relies on natural gas, a fossil fuel, for some 95 per cent of its energy needs. The power sector contributes about 40 per cent of the country’s total emissions.

Achieving Singapore’s long-term climate target of reaching net-zero emissions by 2050 would require reducing carbon emissions from this sector. 

On Feb 10, the Republic published its 2035 climate target – to reduce its emissions to between 45 million tonnes and 50 million tonnes, down from the 60 million tonnes it expects to emit in 2030. 

PM Wong said that while Singapore had earlier assessed that conventional nuclear technologies were not suitable for Singapore, the country had continued to keep a close watch on developments in this space to keep its options open. 

“Since then, we have seen significant advancements in nuclear technologies,” he added, citing small modular reactors (SMRs) as one advanced nuclear technology that has better safety features than conventional, large plants. 

SMRs are compact systems that can be factory-assembled and installed in dense urban areas. The power capacity of one SMR is about a third of that of a traditional reactor.

PM Wong added that a few SMRs have been deployed elsewhere, and more could become operational by the end of the decade. 

Over the past couple of years, Singapore’s exploration of nuclear energy has been hotting up. 

It started around 2022, when a local report on future energy scenarios mentioned that emerging energy technologies, including nuclear and geothermal, could potentially supply around 10 per cent of Singapore’s energy needs by 2050.

More on this Topic
PM Wong unveils bumper SG60 Budget for all Singaporeans
Singapore Budget 2025: Read more
In July 2024, the Republic inked the 123 Agreement on Nuclear Cooperation with the US, which will allow Singapore to learn more about nuclear technologies and scientific research from American organisations.

PM Wong noted that Singapore is working on similar cooperation with other countries that have capabilities and experience in civilian nuclear power, particularly SMRs. 

In the nearer-term, PM Wong said one immediate solution to green the country’s energy mix is to import low-carbon electricity from the region, and the Republic has been progressing on this front. 

Singapore has inked deals with Indonesia, Cambodia and Vietnam to import 5.6 gigawatts of low-carbon electricity by 2035, and much of the green electricity is expected to come from solar, hydropower and wind.

Under a pilot that was expanded in 2024, Singapore is importing hydropower from Laos via Thailand and Malaysia. In late 2024, it was said that additional energy supply will come from Malaysia, increasing the total electricity import capacity to 200MW from 100MW. Malaysia’s grid comprises coal and natural gas. 

“By 2035, we expect that about one-third of our projected electricity demand can be met through electricity imports,” said PM Wong. 

On low-carbon hydrogen – an emerging fuel that does not produce planet-warming emissions when burned – PM Wong said that Singapore has been closely evaluating its use. 

But there are inherent challenges in its production, storage and transportation, he said, which makes it hard to scale up in a commercially viable manner.'''

In [19]:
parse_yaml(system_prompt = "Extract information from text",
           user_prompt = text,
           output_format = {
    "Entities": "organisations only, List[str]",  
    "Sentiment": "Enum['Happy', 'Sad', 'Neutral']",
    "News about DeepSeek": "if any, Optional[str]",
    "Summary": "one sentence of no more than 10 words, str",
    "Code": "code to print out entities",         
    "Latex": "latex code to write article in latex" 
},
    llm = llm)

{'Entities': ['Singapore',
  'Malaysia',
  'Indonesia',
  'SPH Media',
  'US',
  'Cambodia',
  'Vietnam',
  'Laos',
  'Thailand'],
 'Sentiment': 'Neutral',
 'News about DeepSeek': None,
 'Summary': 'Singapore explores nuclear power, expanding clean energy investments.',
 'Code': "entities = ['Singapore', 'Malaysia', 'Indonesia', 'SPH Media', 'US', 'Cambodia', 'Vietnam', 'Laos', 'Thailand']\nfor entity in entities:\n    print(entity)\n",
 'Latex': '\\documentclass{article}\n\\usepackage{amsmath}\n\\begin{document}\n\nSingapore is studying the potential deployment of nuclear power and systematically building up capabilities in this area, as announced by Prime Minister Lawrence Wong on February 18. This comes amid increasing global interest in nuclear energy, with several regional countries planning to incorporate it into their energy mix.\n\nThe government will inject an additional \\$5 billion into its existing Future Energy Fund to support efforts to secure clean power. This fund, init

# Advanced Tests

# Test 1: Multiple Datatypes

In [20]:
# Sync
parse_yaml(system_prompt = "Give me 5 names on a topic", 
           user_prompt = "weather",
           output_format = {"Names": "Great sounding names, List[str]",
                            "Meanings": "Name and meaning, dict", 
                            "Chinese Meanings": "Name and meaning in chinese, dict",
                            "Lucky Name or Number": "List[Union[int, str]]",
                            "Code": "Python code to generate 5 names",
                            "Sentiment": "Enum['Happy', 'Sad', 'Other']",
                            "Sentiment on a Scale of 1 (Bored) to 5 (Excited)": "Enum[1, 2, 3, 4, 5]"},
           llm = llm)

{'Names': ['Rain', 'Cloud', 'Storm', 'Breeze', 'Sun'],
 'Meanings': {'Rain': 'Water falling from the sky',
  'Cloud': 'Visible mass of water droplets suspended in the atmosphere',
  'Storm': 'Violent disturbance of the atmosphere with strong winds and usually rain, thunder, lightning, or snow',
  'Breeze': 'A gentle wind',
  'Sun': 'The star that the Earth orbits'},
 'Chinese Meanings': {'Rain': '雨 (yǔ)',
  'Cloud': '云 (yún)',
  'Storm': '风暴 (fēngbào)',
  'Breeze': '微风 (wēifēng)',
  'Sun': '太阳 (tàiyáng)'},
 'Lucky Name or Number': ['Sun', 7],
 'Code': 'import random\nweather_names = ["Rain", "Cloud", "Storm", "Breeze", "Sun"]\ngenerated_names = random.sample(weather_names, 5)\nprint(generated_names)\n',
 'Sentiment': 'Happy',
 'Sentiment on a Scale of 1 (Bored) to 5 (Excited)': 4}

In [21]:
# Async
await parse_yaml_async(system_prompt = "Give me 5 names on a topic", 
           user_prompt = "weather",
           output_format = {"Names": "Great sounding names, List[str]",
                            "Meanings": "Name and meaning, dict", 
                            "Chinese Meanings": "Name and meaning in chinese, dict",
                            "Lucky Name or Number": "List[Union[int, str]]",
                            "Code": "Python code to generate 5 names",
                            "Sentiment": "Enum['Happy', 'Sad', 'Other']",
                            "Sentiment on a Scale of 1 (Bored) to 5 (Excited)": "Enum[1, 2, 3, 4, 5]"},
           llm = llm_async)

Retrying...


{'Names': ['Storm', 'Rain', 'Cloud', 'Breeze', 'Sunny'],
 'Meanings': {'Storm': 'A violent disturbance of the atmosphere with strong winds and usually rain, thunder, lightning, or snow.',
  'Rain': 'Moisture condensed from the atmosphere that falls visibly in distinct drops.',
  'Cloud': 'A visible mass of condensed watery vapor floating in the atmosphere, typically high above the general level of the ground.',
  'Breeze': 'A gentle wind.',
  'Sunny': 'Bright with or full of sunshine.'},
 'Chinese Meanings': {'Storm': '暴风 (Bàofēng)',
  'Rain': '雨 (Yǔ)',
  'Cloud': '云 (Yún)',
  'Breeze': '微风 (Wēifēng)',
  'Sunny': '晴朗 (Qínglǎng)'},
 'Lucky Name or Number': ['Storm', 7, 3, 'Rain (Lucky Name)'],
 'Code': 'import random\n\nweather_names = ["Gust", "Mist", "Shine", "Thunder", "Frost"]\ngenerated_names = random.sample(weather_names, 5)\nprint(generated_names)\n',
 'Sentiment': 'Happy',
 'Sentiment on a Scale of 1 (Bored) to 5 (Excited)': 4}

# Test 2: Complicated Special Character Test

In [22]:
parse_yaml(system_prompt = '''Output the contents of this table
Day | % precipitation over time | $%@#% (a nonsense word) index
23  | 40.2 | 20.4
24  | 50.3 | 23.4
''', 
           user_prompt = "Output the key-value pairs of the table",
           output_format = {"Description": "Brief one sentence description, str",
                            "Datapoints": "List[dict]"},
           llm = llm)

{'Description': 'Table with daily precipitation and a nonsense index.',
 'Datapoints': [{'Day': 23,
   '% precipitation over time': 40.2,
   '$%@#% (a nonsense word) index': 20.4},
  {'Day': 24,
   '% precipitation over time': 50.3,
   '$%@#% (a nonsense word) index': 23.4}]}

# How `parse_yaml` works under the hood
- Converts `output_format` into Pydantic format, then use Pydantic error parsers to test and error correct
- You can also use `convert_schema_to_pydantic` to convert the `output_format` to a Pydantic model, so you can use it for structured outputs

In [23]:
from strictjson import convert_pydantic_to_yaml, convert_schema_to_pydantic
import json

output_format = {"name": "Name of birthday party, str",
                     "date": "Any date in Mar 2026, date",
                     "participants": [{'Name': 'starting with A, str', 
                                       'Age': 'between 5 to 12, int'}]}

output_format = {"name": "str", "age": "int", "school": "str", "hobby": "str"}

In [24]:
# converts an output_format into a pydantic model
model = convert_schema_to_pydantic(output_format)
json_schema = model.model_json_schema()
print("Length of Normal JSON Schema:", len(str(json_schema)))
print()
print(json.dumps(json_schema, indent = 2))

Length of Normal JSON Schema: 358

{
  "additionalProperties": false,
  "properties": {
    "name": {
      "title": "Name",
      "type": "string"
    },
    "age": {
      "title": "Age",
      "type": "integer"
    },
    "school": {
      "title": "School",
      "type": "string"
    },
    "hobby": {
      "title": "Hobby",
      "type": "string"
    }
  },
  "required": [
    "name",
    "age",
    "school",
    "hobby"
  ],
  "title": "pydantic_model_498838353bec4919ae4b8a20801fe25e",
  "type": "object"
}


In [25]:
concise_schema = convert_pydantic_to_yaml(model)
print("Length of Concise YMAL Schema:", len(concise_schema))
print()
print(concise_schema)

Length of Concise YMAL Schema: 42

name: str
age: int
school: str
hobby: str

