# Parse_YAML Pipeline Flow

In [1]:
from dotenv import load_dotenv
# put your secret keys in your .env
# For instance, if you are using OpenAI, your .env should contain
# export OPENAI_API_KEY = "sk-......."
load_dotenv()

True

In [10]:
from strictjson import parse_yaml, parse_yaml_async
import os

In [15]:
# MODEL = "anthropic/claude-3.7-sonnet"
# MODEL = "anthropic/claude-3.5-sonnet"
# MODEL = "openai/o3-mini"
# MODEL = "openai/o1-mini"
# MODEL = "openai/gpt-4o"
# MODEL = "openai/gpt-4o-mini" 
# MODEL = "meta-llama/llama-3.3-70b-instruct"
# MODEL = "meta-llama/llama-3.2-90b-vision-instruct"
# MODEL = "meta-llama/llama-3.1-70b-instruct"
# MODEL = "deepseek/deepseek-chat"
MODEL = "google/gemini-2.5-flash"

In [16]:
def llm(system_prompt: str, user_prompt: str, **kwargs):
    ''' Use your favourite LLM here - we use OpenRouter here to test various LLMs '''

    # make sure to install the relevant packages
    from openai import OpenAI

    client = OpenAI(
        base_url="https://openrouter.ai/api/v1",
        api_key=os.environ["OPENROUTER_API_KEY"],
    )

    messages = []
    if system_prompt:
        messages.append({
            "role": "system",
            "content": [{"type": "text", "text": system_prompt}]
        })
    messages.append({
        "role": "user",
        "content": user_prompt
    })

    # change the model as you wish
    response = client.chat.completions.create(
        model=MODEL,
        messages=messages
    )
    return response.choices[0].message.content

In [17]:
async def llm_async(system_prompt: str, user_prompt: str, **kwargs):
    ''' Use your favourite LLM here - we use OpenRouter here to test various LLMs '''
    from openai import AsyncOpenAI

    client = AsyncOpenAI(
        base_url="https://openrouter.ai/api/v1",
        api_key=os.environ["OPENROUTER_API_KEY"],
    )

    messages = []
    if system_prompt:
        messages.append({
            "role": "system",
            "content": [{"type": "text", "text": system_prompt}]
        })
    messages.append({
        "role": "user",
        "content": user_prompt
    })

    # change the model as you wish
    response = await client.chat.completions.create(
        model=MODEL,
        messages=messages
    )
    return response.choices[0].message.content

# Plan Generator (refinement of plan and make plan more specific)

- Get LLM to come up with possible recipes given your ingredients
- Get LLM to evaluate which is the healthiest one
- Get LLM to come up with steps for a recipe
- Get LLM to expand on each step for the recipe

In [68]:
ingredients = '''Chilled Tofu
Broccoli
Pasta
Button Mushrooms
Ragu Sauce
Pepper
M&Ms
Egg
'''

In [69]:
res = parse_yaml(system_prompt = "Generate 10 possible recipes given user ingredients",
                 user_prompt = ingredients,
                 output_format = {"Recipes": [{'Name': 'str', 'Description': 'str'}]},
                 llm = llm)

In [70]:
print(res["Recipes"])

[{'Name': 'Chilled Tofu & Broccoli Pasta Salad', 'Description': 'A refreshing pasta salad featuring chilled tofu cubes, blanched broccoli florets, and pasta, tossed with a light vinaigrette and a sprinkle of pepper.'}, {'Name': 'Mushroom & Ragu Pasta with Tofu Crumbles', 'Description': 'Hearty pasta dish with sautéed button mushrooms and Ragu sauce, enhanced with crumbled, pan-fried tofu for extra protein and texture.'}, {'Name': 'Broccoli & Mushroom Omelette', 'Description': 'A simple and nutritious omelette filled with chopped broccoli and sautéed button mushrooms, seasoned with pepper.'}, {'Name': 'Savory Tofu & Broccoli Stir-fry with Pasta', 'Description': 'Diced chilled tofu and broccoli florets stir-fried with a savory sauce, served over pasta and seasoned with pepper.'}, {'Name': 'Ragu Mushroom & Tofu Skillet', 'Description': 'Button mushrooms and chilled tofu cooked in Ragu sauce, seasoned with pepper, and served as a quick and flavorful skillet meal.'}, {'Name': 'Pasta with Cr

In [77]:
res2 = parse_yaml(system_prompt = "Rank each of these recipes by nutritional value. 5 is the healthiest. Then output the healthiest",
                 user_prompt = str(res),
                 output_format = {"Recipes": [{'Name': 'str', 
                                               'Explanation for Nutritional Value': 'include the nutrients for each ingredient used, str',
                                               'Nutritional Value': 'Enum[1, 2, 3, 4, 5]'}],
                                  "Healthiest Recipe": 
                                      {'Name': 'str', 
                                               'Explanation for Nutritional Value': 'str',
                                               'Nutritional Value': 'Enum[1, 2, 3, 4, 5]'}
                                 },
                 llm = llm)

In [85]:
res2["Recipes"]

[{'Name': 'Chilled Tofu & Broccoli Pasta Salad',
  'Explanation for Nutritional Value': 'Tofu: Excellent source of protein, essential amino acids, iron, and calcium.\nBroccoli: Rich in vitamins C and K, fiber, and iron.\nPasta: Provides carbohydrates for energy.\nVinaigrette: Can add healthy fats (olive oil) but also calories.\nPepper: Adds flavor, some antioxidants.\n',
  'Nutritional Value': 4},
 {'Name': 'Mushroom & Ragu Pasta with Tofu Crumbles',
  'Explanation for Nutritional Value': 'Mushrooms: Low in calories, good source of B vitamins and selenium.\nRagu Sauce: Can be high in sodium and sometimes sugar, depending on the brand. Tomatoes provide lycopene.\nPasta: Provides carbohydrates for energy.\nTofu: Excellent source of protein, essential amino acids, iron, and calcium.\n',
  'Nutritional Value': 3},
 {'Name': 'Broccoli & Mushroom Omelette',
  'Explanation for Nutritional Value': 'Broccoli: Rich in vitamins C and K, fiber, and iron.\nMushrooms: Low in calories, good source of

In [88]:
res2["Healthiest Recipe"]

{'Name': 'Broccoli & Mushroom Omelette',
 'Explanation for Nutritional Value': "This recipe consists primarily of eggs, broccoli, and mushrooms, making it a powerhouse of lean protein, fiber, vitamins (C, K, D, B12), and minerals. It's naturally low in unhealthy fats and carbohydrates, focusing on nutrient density. The simple preparation and minimal added ingredients further enhance its health profile.",
 'Nutritional Value': 5}

In [89]:
res3 = parse_yaml(system_prompt = "Output a skeleton plan with the steps needed for cooking this recipe",
                 user_prompt = f"Recipe: {res2["Healthiest Recipe"]}, Ingredients: {ingredients}",
                output_format = {"Skeleton Plan": "list"},
                  llm = llm)

In [90]:
res3

{'Skeleton Plan': ['Prep Ingredients: Chop broccoli and mushrooms. Crush tofu.',
  'Cook Vegetables: Sauté broccoli and mushrooms until tender.',
  'Cook Eggs: Whisk eggs and cook in a pan, adding the cooked vegetables and tofu.',
  'Season: Season with pepper.',
  'Serve: Fold the omelette and serve.']}

In [91]:
res4 = parse_yaml(system_prompt = '''Output the detailed plan for each part of the Skeleton Plan. 
Make it such that there is quantity and time inside the plan, and make it friendly enough for a 12 year old
Be detailed and make sure every step is explained, including how to slice and how to cook''',
                 user_prompt = f"Recipe: {res2["Healthiest Recipe"]}\nSkeleton Plan: {res3}\nIngredients: {ingredients}",
                output_format = {"Detailed Plan for each part": "List[str]"},
                  llm = llm)

In [92]:
print("\n\n".join(res4["Detailed Plan for each part"]))

Prep Ingredients (15 minutes):

  Broccoli: Get out your broccoli! We only need about 1 cup. First, give it a good rinse under the cold water tap to wash off any dirt. Now, grab a grown-up to help you with a knife or use kitchen scissors. We need to cut the broccoli into really tiny, bite-sized pieces, like little green trees! We want about 1 cup of these tiny trees. Put them in a bowl.

  Mushrooms: Time for the mushrooms! Take out about 1 cup of button mushrooms. Just like the broccoli, give them a quick rinse. Now, with a grown-up's help, carefully slice them into thin pieces. Think of them like little mushroom coins. Put them in the same bowl with the broccoli.

  Tofu: Now for the chilled tofu! We only need a quarter of a block. Take it out of its packaging. You can use your hands to gently squeeze and crumble it into tiny pieces in a separate bowl. We want it to be like little soft curds. If you want, you can use a fork to mash it up too. The goal is to make it look like small wh

# Comparison to getting LLM to generate directly

In [94]:
print(llm("Generate me detailed steps to cook something healthy using the ingredients provided",
    ingredients))

Okay, this is an interesting and... eclectic mix of ingredients! We'll definitely be omitting the M&Ms from the savory dish for health and taste reasons, but hey, you can always enjoy them for a little dessert after your meal.

This recipe will focus on creating a hearty, flavorful, and relatively healthy pasta dish with some Asian-inspired elements from the chilled tofu.

**Dish Title:** **Hearty Tofu & Mushroom Pasta with Roasted Broccoli**

This recipe prioritizes health by:
*   **Lean Protein:** Tofu provides a great plant-based protein source.
*   **Vegetable-Rich:** Plenty of broccoli and mushrooms for fiber and nutrients.
*   **Whole Grains (Optional):** You can easily use whole wheat pasta for an extra health boost.
*   **Reduced Sodium (Optional):** Choose a lower-sodium ragu if available.

---

**Yields:** 2-3 servings
**Prep time:** 15 minutes
**Cook time:** 25-30 minutes

**Ingredients:**

*   **Chilled Tofu:** 1 block (14-16 oz) firm or extra-firm tofu
*   **Broccoli:** 1 

# MCQ Question Generator

In [145]:
import requests
from io import BytesIO
from PyPDF2 import PdfReader

def get_arxiv_pdf_text(arxiv_id = "1706.03762"):
    """
    Download and extract text from an arXiv paper's PDF.
    Example: arxiv_id = "1706.03762"
    """
    url = f"https://arxiv.org/pdf/{arxiv_id}.pdf"
    response = requests.get(url)
    response.raise_for_status()

    with BytesIO(response.content) as pdf_file:
        reader = PdfReader(pdf_file)
        text = ""
        for page in reader.pages:
            text += page.extract_text() or ""
    return text

paper_id = "1706.03762"  # attention is all you need - replace with any arxiv id
text = get_arxiv_pdf_text(paper_id)
print(text[:2000])  # print the first 2000 characters

Provided proper attribution is provided, Google hereby grants permission to
reproduce the tables and figures in this paper solely for use in journalistic or
scholarly works.
Attention Is All You Need
Ashish Vaswani∗
Google Brain
avaswani@google.comNoam Shazeer∗
Google Brain
noam@google.comNiki Parmar∗
Google Research
nikip@google.comJakob Uszkoreit∗
Google Research
usz@google.com
Llion Jones∗
Google Research
llion@google.comAidan N. Gomez∗ †
University of Toronto
aidan@cs.toronto.eduŁukasz Kaiser∗
Google Brain
lukaszkaiser@google.com
Illia Polosukhin∗ ‡
illia.polosukhin@gmail.com
Abstract
The dominant sequence transduction models are based on complex recurrent or
convolutional neural networks that include an encoder and a decoder. The best
performing models also connect the encoder and decoder through an attention
mechanism. We propose a new simple network architecture, the Transformer,
based solely on attention mechanisms, dispensing with recurrence and convolutions
entirely. Experime

In [162]:
res = parse_yaml(system_prompt = '''Come up with 10 questions, the MCQ options and the right answer and 
the explanation for the answer using user text,
Options should all be in string, including numerical choices''',
                 user_prompt = text,
                 output_format = {"Questions": [{"Question": "str",
                                                 "Option 1": "str",
                                                 "Option 2": "str",
                                                 "Option 3": "str",
                                                 "Option 4": "str",
                                                 "Answer": "Enum[1, 2, 3, 4]",
                                                 "Explanation for Answer": "multi-line block, str"}]},
                 llm = llm)

In [163]:
res

{'Questions': [{'Question': 'What is the main innovation proposed in the paper "Attention Is All You Need"?',
   'Option 1': 'Replacing feed-forward networks with recurrent neural networks.',
   'Option 2': 'Introducing a novel convolutional neural network architecture.',
   'Option 3': 'Proposing the Transformer, a network architecture based solely on attention mechanisms.',
   'Option 4': 'Enhancing traditional encoder-decoder models with more complex recurrent layers.',
   'Answer': 3,
   'Explanation for Answer': 'The abstract clearly states: "We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely." This highlights the paper\'s main innovation.\n'},
  {'Question': 'Which task did the Transformer model achieve a new state-of-the-art BLEU score of 41.8 on?',
   'Option 1': 'WMT 2014 English-to-German translation.',
   'Option 2': 'English constituency parsing.',
   'Option 3': 'WMT 2014

# Tool Calling Agent

- This defines the tools and then the llm parser uses it iteratively to call the tool
- Will be implemented in strictjson soon

In [140]:
def add_numbers(x: int, y: int):
    return x+y

def multiply_numbers(x: int, y: int):
    return x*y

def reply_user(reply: str):
    print("Reply to user:", reply)
    return reply

In [141]:
history = []

In [142]:
for _ in range(5):
    
    res = parse_yaml(system_prompt= f'''You are to evaluate an equation. 

Previous steps: {history}

You have access to the following tools:

This is in the form of tool_name(tool_params): -> output
add_numbers(x: int, y: int) -> int
multiply_numbers(x: int, y: int) -> int
reply_user(reply: str) -> None

Output the next step to perform the calculation. End with reply_user''',
    user_prompt = "Equation: 3+5*7 + (4*6)",
    output_format = {"Tool Name": "str",
                     "Tool Params": "dict"},
    llm = llm)

    tool_call = res["Tool Name"]
    tool_params = res["Tool Params"]

    if tool_call == "add_numbers":
        tool_result =  add_numbers(**tool_params)
    elif tool_call == "multiply_numbers":
        tool_result = multiply_numbers(**tool_params)
    elif tool_call == "reply_user":
        tool_result = reply_user(**tool_params)
        break
    else:
        break

    step_result = f"Tool Call: {tool_call}\nTool Params: {tool_params}\nTool Result: {tool_result}"
    print(step_result)
    
    history.append(step_result)

Tool Call: multiply_numbers
Tool Params: {'x': 5, 'y': 7}
Tool Result: 35
Tool Call: multiply_numbers
Tool Params: {'x': 4, 'y': 6}
Tool Result: 24
Tool Call: add_numbers
Tool Params: {'x': 3, 'y': 35}
Tool Result: 38
Tool Call: add_numbers
Tool Params: {'x': 38, 'y': 24}
Tool Result: 62
Reply to user: The answer is 62.


In [143]:
history

["Tool Call: multiply_numbers\nTool Params: {'x': 5, 'y': 7}\nTool Result: 35",
 "Tool Call: multiply_numbers\nTool Params: {'x': 4, 'y': 6}\nTool Result: 24",
 "Tool Call: add_numbers\nTool Params: {'x': 3, 'y': 35}\nTool Result: 38",
 "Tool Call: add_numbers\nTool Params: {'x': 38, 'y': 24}\nTool Result: 62"]