# LLM YAML Parser
- Created: 19 Feb 2025
- Parses the LLM output as a YAML, and converts it to dict
- Uses concise `output_format` to save tokens
- Converts `output_format` into pydantic schema automatically, and uses pydantic to validate output
- Able to process datatypes: `int`, `float`, `str`, `bool`, `list`, `dict`, `date`, `datetime`, `time`, `UUID`, `Decimal`
- Able to process: `None`, `Any`, `Union`, `Optional`
- Default datatype when not specified is `Any`
- Error correction of up to `num_tries` times (default: 3)

In [1]:
import os
from strictjson import parse_yaml, parse_yaml_async

In [14]:
from dotenv import load_dotenv
# put your secret keys in your .env
# For instance, if you are using OpenAI, your .env should contain
# export OPENAI_API_KEY = "sk-......."
load_dotenv()

True

# Define LLMs
- LLMs take in a `system_prompt` and a `user_prompt` and outputs a `str`

In [3]:
def llm(system_prompt: str, user_prompt: str) -> str:
    ''' Here, we use OpenAI for illustration, you can change it to your own LLM '''
    # ensure your LLM imports are all within this function
    from openai import OpenAI
    
    # define your own LLM here
    client = OpenAI()
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        temperature = 0,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )
    return response.choices[0].message.content

In [4]:
async def llm_async(system_prompt: str, user_prompt: str) -> str:
    ''' Here, we use OpenAI for illustration, you can change it to your own LLM '''
    # ensure your LLM imports are all within this function
    from openai import AsyncOpenAI
    
    # define your own LLM here
    client = AsyncOpenAI()
    response = await client.chat.completions.create(
        model='gpt-4o-mini',
        temperature = 0,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )
    return response.choices[0].message.content

# Basic Syntax
- `system_prompt`: Instructions for the LLM
- `user_prompt`: Content to perform instructions on
- `output_format`: Concise description-type infused dictionary to specify format LLM should output in. If type is given, should specify it as a standalone string, or after a comma (e.g. "int" or "number above 5, int")
- `llm`: Your llm that takes in `system_prompt` and `user_prompt` and outputs a `str`

In [5]:
# Sync
parse_yaml(system_prompt = "Give me 5 names on a topic", 
           user_prompt = "weather",
           output_format = {"Names": "Great sounding names, List[str]",
                            "Meanings": "Name and meaning, dict", 
                            "Chinese Meanings": "Name and meaning in chinese, dict",
                            "Lucky Name or Number": "List[Union[int, str]]",
                            "Code": "Python code to generate 5 names"},
           llm = llm)

{'Names': ['Sunny', 'Stormy', 'Breeze', 'Cloudy', 'Rainy'],
 'Meanings': {'Sunny': 'Bright and cheerful, like the sun.',
  'Stormy': 'Characterized by strong winds and rain.',
  'Breeze': 'A gentle wind, often refreshing.',
  'Cloudy': 'Covered with clouds; overcast.',
  'Rainy': 'Characterized by rain; wet.'},
 'Chinese Meanings': {'Sunny': '阳光明媚',
  'Stormy': '暴风雨',
  'Breeze': '微风',
  'Cloudy': '多云',
  'Rainy': '下雨'},
 'Lucky Name or Number': [7, 'Lucky', 3, 'Fortune', 9],
 'Code': 'import random\n\nnames = ["Sunny", "Stormy", "Breeze", "Cloudy", "Rainy"]\nselected_names = random.sample(names, 5)\nprint(selected_names)'}

In [6]:
# Async
await parse_yaml_async(system_prompt = "Give me 5 names on a topic", 
           user_prompt = "weather",
           output_format = {"Names": "Great sounding names, List[str]",
                            "Meanings": "Name and meaning, dict", 
                            "Chinese Meanings": "Name and meaning in chinese, dict",
                            "Lucky Name or Number": "List[Union[int, str]]",
                            "Code": "Python code to generate 5 names"},
           llm = llm_async)

{'Names': ['Aurora', 'Zephyr', 'Solstice', 'Nimbus', 'Tempest'],
 'Meanings': {'Aurora': 'Dawn',
  'Zephyr': 'Gentle breeze',
  'Solstice': 'Sun standing still',
  'Nimbus': 'Rain cloud',
  'Tempest': 'Storm'},
 'Chinese Meanings': {'Aurora': '曙光',
  'Zephyr': '微风',
  'Solstice': '至日',
  'Nimbus': '雨云',
  'Tempest': '暴风'},
 'Lucky Name or Number': [7, 'Lucky Star', 3, 'Fortune', 9],
 'Code': 'import random\n\ndef generate_weather_names():\n    names = ["Aurora", "Zephyr", "Solstice", "Nimbus", "Tempest"]\n    return random.sample(names, 5)\n\nprint(generate_weather_names())'}

# Long Context processing

In [7]:
text = '''SINGAPORE – Singapore will study the potential deployment of nuclear power here and take further steps to systematically build up capabilities in this area, Prime Minister Lawrence Wong said on Feb 18.

Noting that interest in nuclear energy is increasing worldwide, with several countries within the region planning to include nuclear in their energy mix, PM Wong said Singapore will need new capabilities to evaluate options and consider if there is a solution that the island-state can deploy in a safe, cost-effective way. 

Malaysia and Indonesia, for example, have operated research reactors for some time, he noted. 


ADVERTISING


“These capabilities will also be needed for nuclear safety, which will become more salient given the growing regional interest in nuclear power,” said PM Wong. 

The Government will also pump in another $5 billion into its existing Future Energy Fund to support Singapore’s efforts to secure clean power, he said in his Budget speech.

“Be it electricity imports, hydrogen or nuclear, we will need to make major investments in new infrastructure,” added PM Wong, who is also Finance Minister.


The Future Energy Fund was announced during Budget 2024 with an initial $5 billion investment. It was set up to catalyse investments into clean energy technology that may involve high upfront costs and significant commercial, technological and geopolitical risks. 


Catch up on the news that everyone’s talking about
Enter your e-mail
 Sign up
By signing up, I accept SPH Media's Terms & Conditions and Privacy Policy as amended from time to time.


Yes, I would also like to receive SPH Media Group's SPH Media Limited, its related corporations and affiliates as well as their agents and authorised service providers. marketing and promotions.
The fund is part of Singapore’s efforts to address its resource constraints. 

PM Wong cited how the Republic overcame its water challenges through innovations such as recycling used water to form Newater, and building up its water industry. 


“Today, we face a different challenge. The industries of the future – artificial intelligence, semiconductors, biopharmaceuticals – are highly energy-intensive. To meet these growing energy needs and to bring down our carbon emissions at the same time, we will need more clean power,” he said.

“Expanding access to clean energy is therefore a major national imperative.” 

Singapore has not made a decision to adopt nuclear energy. But given that the Republic has limitations in accessing renewable energy, nuclear is among various low-carbon sources that the country is looking into amid considerations of the nation’s energy security, affordability and carbon footprint. 

“Our options are inherently limited because we do not have the natural resources nor the land to meet our needs using hydro, wind or solar power,” PM Wong said.


Singapore now relies on natural gas, a fossil fuel, for some 95 per cent of its energy needs. The power sector contributes about 40 per cent of the country’s total emissions.

Achieving Singapore’s long-term climate target of reaching net-zero emissions by 2050 would require reducing carbon emissions from this sector. 

On Feb 10, the Republic published its 2035 climate target – to reduce its emissions to between 45 million tonnes and 50 million tonnes, down from the 60 million tonnes it expects to emit in 2030. 

PM Wong said that while Singapore had earlier assessed that conventional nuclear technologies were not suitable for Singapore, the country had continued to keep a close watch on developments in this space to keep its options open. 

“Since then, we have seen significant advancements in nuclear technologies,” he added, citing small modular reactors (SMRs) as one advanced nuclear technology that has better safety features than conventional, large plants. 

SMRs are compact systems that can be factory-assembled and installed in dense urban areas. The power capacity of one SMR is about a third of that of a traditional reactor.

PM Wong added that a few SMRs have been deployed elsewhere, and more could become operational by the end of the decade. 

Over the past couple of years, Singapore’s exploration of nuclear energy has been hotting up. 

It started around 2022, when a local report on future energy scenarios mentioned that emerging energy technologies, including nuclear and geothermal, could potentially supply around 10 per cent of Singapore’s energy needs by 2050.

More on this Topic
PM Wong unveils bumper SG60 Budget for all Singaporeans
Singapore Budget 2025: Read more
In July 2024, the Republic inked the 123 Agreement on Nuclear Cooperation with the US, which will allow Singapore to learn more about nuclear technologies and scientific research from American organisations.

PM Wong noted that Singapore is working on similar cooperation with other countries that have capabilities and experience in civilian nuclear power, particularly SMRs. 

In the nearer-term, PM Wong said one immediate solution to green the country’s energy mix is to import low-carbon electricity from the region, and the Republic has been progressing on this front. 

Singapore has inked deals with Indonesia, Cambodia and Vietnam to import 5.6 gigawatts of low-carbon electricity by 2035, and much of the green electricity is expected to come from solar, hydropower and wind.

Under a pilot that was expanded in 2024, Singapore is importing hydropower from Laos via Thailand and Malaysia. In late 2024, it was said that additional energy supply will come from Malaysia, increasing the total electricity import capacity to 200MW from 100MW. Malaysia’s grid comprises coal and natural gas. 

“By 2035, we expect that about one-third of our projected electricity demand can be met through electricity imports,” said PM Wong. 

On low-carbon hydrogen – an emerging fuel that does not produce planet-warming emissions when burned – PM Wong said that Singapore has been closely evaluating its use. 

But there are inherent challenges in its production, storage and transportation, he said, which makes it hard to scale up in a commercially viable manner.'''

In [8]:
parse_yaml("Extract information from text",
           text,
           output_format = {
    "Entities": "organisations only, list[str]",  
    "Sentiment": "Enum['Happy', 'Sad', 'Neutral']",
    "News about DeepSeek": "if any, Optional[str]",
    "Summary": "str",
    "Code": "code to print out entities",         
    "Latex": "latex code to write article in latex" 
},
    llm = llm)

{'Entities': ['Singapore',
  'Malaysia',
  'Indonesia',
  'US',
  'Cambodia',
  'Vietnam',
  'Laos'],
 'Sentiment': 'Neutral',
 'News about DeepSeek': 'None',
 'Summary': 'Singapore is exploring the potential deployment of nuclear power as part of its efforts to secure clean energy and reduce carbon emissions. Prime Minister Lawrence Wong highlighted the need for new capabilities to evaluate nuclear options, citing regional interest and advancements in nuclear technologies, particularly small modular reactors (SMRs). The government is also investing in the Future Energy Fund to support clean energy initiatives and has inked agreements to import low-carbon electricity from neighboring countries.',
 'Code': 'entities = ["Singapore", "Malaysia", "Indonesia", "US", "Cambodia", "Vietnam", "Laos"]\nprint(entities)\n',
 'Latex': "\\documentclass{article}\n\\begin{document}\n\\title{Singapore's Exploration of Nuclear Energy}\n\\author{Author Name}\n\\date{\\today}\n\\maketitle\n\n\\section{Int

# Previous StrictJSON tests

In [9]:
parse_yaml(system_prompt = 'You are a classifier',
                    user_prompt = 'It is a beautiful and sunny day',
                    output_format = {'Sentiment': 'Type of Sentiment',
                                    'Adjectives': 'Array of adjectives',
                                    'Words': 'Number of words'},
                    llm = llm)

{'Sentiment': 'Positive', 'Adjectives': ['beautiful', 'sunny'], 'Words': 7}

In [10]:
parse_yaml(system_prompt = 'You are a classifier',
                    user_prompt = 'It is a beautiful and sunny day',
                    output_format = {'Sentiment': 'Type of Sentiment, type: Enum["Pos", "Neg", "Other"]',
                                    'Adjectives': 'Array of Adjectives, type: List[str]',
                                    'Words': 'Number of words, type: int',
                                    'In English': 'Whether sentence is in English, type: bool'},
                    llm = llm)

{'Sentiment': 'Pos',
 'Adjectives': ['beautiful', 'sunny'],
 'Words': 7,
 'In English': True}

In [11]:
parse_yaml(system_prompt = 'You are a code generator, generating code to fulfil a task',
                    user_prompt = 'Given array p, output a function named func_sum to return its sum',
                    output_format = {'Elaboration': 'How you would do it',
                                     'C': 'Code',
                                    'Python': 'Code'},
                    llm = llm)

{'Elaboration': "To create a function named `func_sum` that takes an array `p` as input and returns its sum, we can use Python's built-in `sum()` function. This function will iterate through the elements of the array and calculate the total sum. The function will handle both integer and float values in the array.\n",
 'C': 'def func_sum(p):\n    return sum(p)\n',
 'Python': 'def func_sum(p):\n    return sum(p)'}

In [13]:
parse_yaml(system_prompt = 'You are a classifier',
                    user_prompt = 'It is a beautiful and sunny day',
                    output_format = {'Sentiment': 'Type of Sentiment', 
                                     'Strength of Sentiment': 'Enum[1, 2, 3, 4, 5]',
                                    'Adjectives': "Name and Description as separate keys, type: List[Dict['Name', 'Description']]",
                                    'Words': {
                                        'Number of words': 'Word count', 
                                        'Language': {
                                              'English': 'Whether it is English, type: bool',
                                              'Chinese': 'Whether it is Chinese, type: bool'
                                                  },
                                        'Proper Words': 'Whether the words are proper in the native language, type: bool'
                                        }
                                    },
                 llm = llm)

{'Sentiment': 'Positive',
 'Strength of Sentiment': 4,
 'Adjectives': [{'Name': 'Beautiful',
   'Description': 'Pleasing to the senses or mind'},
  {'Name': 'Sunny', 'Description': 'Bright with sunlight'}],
 'Words': {'Number of words': 7,
  'Language': {'English': True, 'Chinese': False},
  'Proper Words': True}}