In [1]:
from rich import print as rprint

### Tool Loop


#### Sync


In [None]:
from pydantic import BaseModel
import random
from tinyloop.features.function_calling import Tool
from tinyloop.modules.tool_loop import ToolLoop


def roll_dice():
    """Roll a dice and return the result"""
    return random.randint(1, 6)


class FinalAnswer(BaseModel):
    last_roll: int
    reached_goal: bool


system_prompt = """
You are a dice rolling assistant.
You should rool a dice until you get the number indicated in the prompt.
You should use the function roll_dice to roll the dice.
Before you roll the dice make sure to check if you have reached the goal.

In the end, you should return the last roll.
You should also return a boolean indicating if you reached the number indicated in the prompt or not.
"""

loop = ToolLoop(
    model="openai/gpt-4.1",
    system_prompt=system_prompt,
    temperature=0.1,
    output_format=FinalAnswer,
    tools=[
        Tool(
            roll_dice,
        )
    ],
)

response = loop(
    prompt="Roll a dice until you get a 6",
    parallel_tool_calls=False,
)
rprint(response)

#### Async


In [None]:
from pydantic import BaseModel
from tinyloop.features.function_calling import Tool
from tinyloop.modules.tool_loop import ToolLoop


def roll_dice():
    """Roll a dice and return the result"""
    return random.randint(1, 6)


class FinalAnswer(BaseModel):
    last_roll: int
    reached_goal: bool


system_prompt = """
You are a dice rolling assistant.
You should rool a dice until you get the number indicated in the prompt.
You should use the function roll_dice to roll the dice.
Before you roll the dice make sure to check if you have reached the goal.

In the end, you should return the last roll.
You should also return a boolean indicating if you reached the number indicated in the prompt or not.
"""

loop = ToolLoop(
    model="openai/gpt-4.1",
    system_prompt=system_prompt,
    temperature=0.1,
    output_format=FinalAnswer,
    tools=[
        Tool(
            roll_dice,
        )
    ],
)


response = await loop.acall(
    prompt="Roll a dice until you get a 6",
    parallel_tool_calls=False,
)
rprint(response)

In [None]:
import asyncio
import time

# Different prompts for variety
prompts = [
    "Roll a dice until you get a 6",
    "Roll a dice until you get a 3",
    "Roll a dice until you get a 5",
]

tasks = [loop.acall(prompt=prompt, parallel_tool_calls=False) for prompt in prompts]

# Run all tasks concurrently
start_time = time.time()
results = await asyncio.gather(*tasks)
total_time = time.time() - start_time

### Generate


#### Sync


In [None]:
from tinyloop.modules.generate import Generate
from pydantic import BaseModel
from typing import List


class Character(BaseModel):
    name: str
    description: str
    image: str


class Characters(BaseModel):
    characters: List[Character]


generate = Generate(
    model="openai/gpt-4.1-nano", temperature=0.1, output_format=Characters
)

response = generate.call(prompt="Give me 3 harry potter characters")
rprint(response)

In [None]:
from tinyloop.modules.generate import Generate
from pydantic import BaseModel
from typing import List


class Character(BaseModel):
    name: str
    description: str
    image: str


class Characters(BaseModel):
    characters: List[Character]


Generate.run(
    model="openai/gpt-4.1-nano",
    temperature=0.1,
    output_format=Characters,
    prompt="Give me 3 harry potter characters",
)

#### Async


In [None]:
from tinyloop.modules.generate import Generate
from pydantic import BaseModel
from typing import List


class Character(BaseModel):
    name: str
    description: str
    image: str


class Characters(BaseModel):
    characters: List[Character]


generate = Generate(
    model="openai/gpt-4.1-nano", temperature=0.1, output_format=Characters
)

response = await generate.acall(prompt="Give me 3 harry potter characters")
rprint(response)

In [None]:
from tinyloop.modules.generate import Generate
from pydantic import BaseModel
from typing import List


class Character(BaseModel):
    name: str
    description: str
    image: str


class Characters(BaseModel):
    characters: List[Character]


await Generate.arun(
    model="openai/gpt-4.1-nano",
    temperature=0.1,
    output_format=Characters,
    prompt="Give me 3 harry potter characters",
)

In [None]:
from tinyloop.modules.generate import Generate
from pydantic import BaseModel
from typing import List
from textwrap import dedent


class Character(BaseModel):
    name: str
    description: str
    image: str


INSTRUCTIONS_PLANNING = dedent("""
    You are an expert data visualization architect. Your role is to analyze technical requirements and produce clear, actionable implementation plans.
    These plans will then be carried out by a junior data scientist so you need to be specific and detailed. However do not actually write the code, just explain the plan.

    # Main guidelines
        1. Carefully analyze requirements to identify core functionality and constraints
        2. Define clear technical approach with specific technologies, patterns, design choices, etc.
        3. Break down implementation into concrete, actionable steps at the appropriate level of abstraction
        4. Base yourself on the rubric below to provide the best plan possible.
    
    # The Rubric
    --

    This rubric evaluates key visual aspects of static 2D plots in Matplotlib, with **0 = Not Achieved**, **0.5 = Partially Achieved**, **1 = Achieved**, and an **Extreme Quality** bonus level for maximized excellence in each aspect. The focus is on academic-style, publication-quality visuals.

    ---

    ## Overall Style & Theme Consistency

    ### 0 (Not Achieved)

    No cohesive style; the plot uses default settings or mixes styles inconsistently. Visual elements clash (e.g. one plot uses a dark background, another a light theme) and the overall look is disjointed.

    ### 0.5 (Partially Achieved)

    Some style customization is present (perhaps a style sheet applied or a few rcParams tweaked), but not all elements are unified. For example, colors or fonts might be updated while grid/axes styling remains default, yielding a semi-polished but still inconsistent appearance.

    ### 1 (Achieved)

    A consistent style is applied across the entire plot, either by using a well-designed Matplotlib style sheet or a custom rcParams configuration
    All components (fonts, colors, grid, etc.) adhere to a unified theme – nothing looks out of place. For instance, using Matplotlib’s built-in styles (`plt.style.use`) like “ggplot” gives a coordinated look (gray background, larger fonts, and subtle gridlines).
    The visual presentation is harmonious and professional.

    ### Extreme Quality

    The plot’s theme is maximized for polish and consistency at the highest level. This often means creating a bespoke style sheet for the project or publication, ensuring every detail (down to axis line widths and minor tick styling) is predefined for uniformity
    . The style might build on high-quality defaults (e.g. the Seaborn “ticks” style for a clean, minimalist look
    ) and then refine it further. At this level, the figure looks publication-ready out-of-the-box – comparable to figures in top journals – with a cohesive aesthetic that enhances readability and looks refined.

    ---

    ## Color Palette & Contrast

    ### 0 (Not Achieved)

    Poor color choices undermine the plot. The default or ad-hoc color cycle is used without consideration for clarity or accessibility. This can lead to adjacent elements blending together or harsh contrasts that strain the eyes
    . For example, error bars might be the same color as bar fill, making them nearly invisible
    , or a bright green line over a blue patch is unpleasant and hard to distinguish
    . No thought is given to colorblind-friendly palettes or print (grayscale) legibility.

    ### 0.5 (Partially Achieved)

    Some improvements in color selection are made, but issues remain. Perhaps a basic palette is chosen, yet a few colors still clash or are too close in hue. There may be high saturation default colors that create distracting contrast
    or too many similar tones that confuse categories. The palette is not consistently applied to all plot elements (e.g. one series gets an off-theme color).

    ### 1 (Achieved)

    Colors are chosen deliberately from a harmonious, professional palette. The plot uses either Matplotlib’s improved defaults (e.g. the “tab10” palette) or a known colorblind-friendly set for categorical data
    . The colors are distinguishable yet not garish – often tending towards softer, pastel or muted tones that still contrast well
    . For example, a custom cycle inspired by design guidelines (like Stephen Few’s palette of soft, pastel colors) yields lines and markers that are clearly distinguishable without screaming bright ink
    . Continuous data uses perceptually uniform colormaps (e.g. viridis, inferno) so that intensity differences are reflected accurately and remain visible even in grayscale
    . There is sufficient contrast between data and background (dark text on light background or vice versa) for readability.

    ### Extreme Quality

    Color usage is maximized for both beauty and accessibility. Every color is carefully vetted. Categorical palettes might use expert-designed schemes like Paul Tol’s colorblind-friendly palette to ensure even a dozen categories are distinct and colorblind safe
    . The design is tested with grayscale or colorblind simulators to confirm that critical information is still conveyed without color
    . For sequential data, only perceptually uniform colormaps are used
    , and for divergent data, balanced schemes with a neutral center (avoiding problematic extremes like pure white or black in the midpoint) are selected
    . In short, the plot’s colors not only look elegant but also communicate effectively under various viewing conditions, reflecting an extreme attention to detail.

    ---

    ## Fonts & Text Clarity

    ### 0 (Not Achieved)

    Text is hard to read or inconsistent. The plot might rely on default font settings that yield small, thin labels which become illegible when the figure is scaled down
    . Inconsistency issues may occur, such as mathematical symbols or Greek letters appearing in a different font from the rest of the text (due to default math font)
    . This mismatch and tiny sizing give an unprofessional look and force readers to squint or decipher the labels. Overall, labels and titles do not stand out or match the document’s style.

    ### 0.5 (Partially Achieved)

    There are some font improvements, but not all text meets high standards. Perhaps the font family was changed to a more attractive one or sizes were increased slightly, yet some elements remain in default style or size. You might see axis labels in a nice font, but tick labels or legend still in the old style. Or the main text is updated but math text still uses a conflicting font. The readability is better than default, but full consistency or optimal sizing isn’t achieved – some text may still be a bit too small or off-style.

    ### 1 (Achieved)

    All text on the plot is clear, readable, and consistent in style. A high-quality font (serif or sans-serif as appropriate) is used uniformly for axes labels, titles, legends, and annotations
    . For an academic look, one might use a serif font (e.g. Times New Roman or Computer Modern via LaTeX) to match a paper’s body text, or a clean sans-serif like Helvetica/Arial for slides – but the key is that the choice is deliberate and applied everywhere. Font sizes are chosen so that labels and numbers will be legible at the final figure size: for example, axis titles around \~12 pt, tick labels \~10 pt, and the main title slightly larger (14–18 pt range)
    . Crucially, any special characters or math expressions are formatted to use the same font (or at least look harmonious) as the rest of the text, avoiding the distraction of mismatched fonts
    . The result is text that integrates seamlessly with the figure and the surrounding document.

    ### Extreme Quality

    Typography is taken to the highest level. The plot might use LaTeX rendering for all text (`text.usetex=True`) so that labels exactly match the publication’s font and math notation is beautifully typeset
    . Every text element is finely tuned: kerning, padding, and positioning are adjusted so nothing overlaps or feels cramped. The font is not only consistent but embedded properly in the output (no bitmaps or missing fonts in the PDF, using Type 42/TrueType embedding)
    . At this level, the plot’s text looks like it was produced by a professional graphics program – crisp and perfectly aligned. Even at high zoom or in print, the lettering remains sharp and easy to read. The overall impression is that of a polished figure where text and data complement each other without any readability issues.

    ---

    Axes, Grid & Background
    0 (Not Achieved)
    The axes and background detract from the data. Common pitfalls at this level: heavy or unnecessary chart borders, or a background that is unfitting (e.g. non-white background in a paper where it prints as gray, or a distracting color). Grid lines might be absent when the viewer needs them to gauge values, or present but in a very distracting way (such as thick, high-contrast grid lines that overshadow the data). The plot may have all four axes spines (borders) drawn in a strong line by default, boxing in the data and creating clutter. Tick marks might point inward cluttering the plot, or their default length/width might draw too much attention. Overall, the framing of the plot feels either too sparse (no guidance for the eye) or too busy.

    0.5 (Partially Achieved)
    Some axis tuning is done, but not fully optimized. Perhaps the top and right spines have been removed or lightened, but other elements remain suboptimal. For example, grid lines are added but maybe too many (including minor grids that crowd the plot) or a bit too dark. Or the grid is only on one axis when the context calls for both. The background might be set to white (good for print) but maybe the axes lines are still thick black, creating high contrast that isn’t gentle on the eyes. Tick marks might be adjusted outward or lengthened, but their labels could be too close to the axes or unevenly spaced. The axes and grid are improved but could use further refinement for a truly clean look.

    1 (Achieved)
    The axes frame and grid are styled to enhance readability without stealing focus. Typically, unnecessary spines are removed – for instance, the top and right axis lines are omitted for a cleaner look since they usually aren’t needed
    seaborn.pydata.org. The remaining spines (left and bottom) are kept subtle: thin and a neutral color (often a medium gray rather than stark black) so they outline the data softly
    . Grid lines, if used, are light and thin (e.g. light gray or semi-transparent) and serve to guide the eye to axis ticks without competing with the data curves
    . In an academic plot, a common achieved style is a white or very light background with thin gray gridlines (“whitegrid” or “ticks” style) so that numeric estimation is easy but the grid stays in the background
    . Tick marks are appropriately sized and placed: often facing outward (to avoid cluttering the inside of the plot) and given a bit of padding from the labels for clarity
    . The background color of the plotting area is chosen deliberately: usually white for print (ensuring maximum contrast with colored lines) or a soft neutral tone if using a specific theme (e.g. a light gray in ggplot style) – in all cases, the text and grid colors are adjusted to maintain clear contrast against the background
    jumpingrivers.com. Overall, the axes and grid provide structure in an unobtrusive, elegant way.

    Extreme Quality
    The plot’s framing is fine-tuned to perfection. Every aspect of axes and background serves the data presentation. For example, the plot might use minor tick marks and grid lines effectively: major grid lines are slightly emphasized and minor grids are even lighter or dashed, giving just enough reference without clutter. Axis spines could be subtly offset or broken if it improves readability (a Tufte-inspired detail), and all spines can be precisely calibrated in thickness. Outward tick marks with custom lengths ensure that tick labels never collide with data points
    . At this level, even the background may be optimized – for instance, using a slight off-white or transparent background if the figure will be overlaid on colored paper/slides. The key is that the background and axes feel invisible until you need them: a viewer sees the data clearly and only subconsciously notices the grid or frame as gentle guides. Achieving this might involve custom spine removal (using functions like sns.despine() or manual rcParams) and careful color selection so that the axes are present but understated
    seaborn.pydata.org. In sum, extreme quality axes/grid make the plot look ultra-clean and precise, as if crafted by a meticulous designer, all while improving the viewer’s ability to read the data.

    ---

    Lines & Markers
    0 (Not Achieved)
    The plotted lines and markers suffer from default or poor settings. Lines might be too thin to see clearly (e.g. hairline 0.5 pt width on a high-DPI output) or jagged due to low resolution. Markers (for data points) could be so small they disappear, or so large they dominate the plot. There may be no differentiation between multiple lines beyond color – if printed in grayscale or viewed by a colorblind reader, they become indistinguishable. In dense plots, overplotting is an issue (points all overlap into an illegible blob) because no technique like transparency is used. Overall, the data representation is either hard to see or aesthetically unpleasing.

    0.5 (Partially Achieved)
    Some improvements are made to lines and markers, but not all problems resolved. For example, line widths have been increased a bit for visibility, but perhaps not consistently across all plot elements (maybe error bars or secondary lines remain too thin). Markers might have been given distinct shapes or colors, yet could still be better tuned (some markers might overlap or lack clear outline). There might be partial use of dashing or marker variety, but in a limited way that still poses issues in certain cases (e.g. two dash patterns that look similar). The visual weight of lines and points is better than default, but could be refined further for clarity and polish.

    1 (Achieved)
    Data lines and markers are rendered clearly and attractively. Lines have a appropriate weight – typically slightly thicker than the default, e.g. around 1.5–2.0 pts, to ensure they’re easily visible in print
    . They are anti-aliased (smooth) and use round joins/caps so there are no sharp pixelated corners
    . Multiple lines are well contrasted not just by color but by style: one line might be solid, another dashed or dotted, so that even without color you can tell them apart. Markers (for scatter points or line markers) are chosen to be distinct shapes and sufficiently large to see details. For instance, one series might use circles, another triangles, etc., with a sensible size (e.g. 6–8 point marker size) such that they’re visible but not overcrowding. If markers are filled with color, they often have a subtle outline or edge color to maintain visibility on any background. In bar charts or areas, edges are slightly darker or outlined so that boundaries are clear (using a gray edge instead of a stark black to keep it soft)
    . In cases of high data density, techniques like partial transparency (alpha) or using smaller markers help reveal overlapping points instead of a solid mass. Overall, achieved-level styling means the data lines are easy to follow and distinguish. They look smooth and are emphasized just enough to draw attention without looking chunky.

    Extreme Quality
    At this level, the portrayal of data is meticulously optimized. Every line and marker is polished: line styles and markers are not only distinct, but chosen to best represent the data’s meaning (e.g. using intuitive patterns or shapes for certain categories). The thickness of lines might be adjusted based on importance – primary data series have a slightly bolder line, background reference lines (if any) thinner. Caps and joins are rounded for all lines giving a sleek appearance
    . Markers might have custom styling, for example, using a white edge outline to make them visible even when plotted on top of lines or other filled areas. Error bars and other indicators are given the same careful treatment: they are drawn with enough thickness to be seen and perhaps use a cap size that makes their extent clear. In truly refined cases, the plot might employ bespoke marker symbols or even subtle gradients/visual effects to enhance visibility (though sparingly, in line with scientific clarity). There is also consideration for print vs. screen: e.g. ensuring that if the figure is printed in black-and-white, the combination of line styles and markers still differentiates the data. The end result is that lines look elegant and smooth, and data points are highlighted in just the right way – the plot looks like a high-quality illustration where every stroke is deliberate.

    ---

    Legend & Annotations
    0 (Not Achieved)
    The plot either lacks a needed legend/annotations or presents them poorly. In the worst case, multiple data series are unlabeled, forcing the reader to guess which is which. If a legend exists, it might be placed arbitrarily (e.g. the default “best” location that sometimes covers important data) or uses tiny default fonts that are hard to read. Legends or text boxes might overlap the data points or each other, creating clutter. Annotations (like arrows or notes) if present could be formatted inconsistently or intrusively (e.g. text too small, arrows pointing ambiguously). The viewer is left confused about what certain colors/lines mean, or distracted by awkwardly placed text.

    0.5 (Partially Achieved)
    Legends and annotations are present and somewhat helpful, but not optimal. For example, the legend does label the series, but perhaps uses a not-quite matching style (maybe the line samples in the legend are too thin or small compared to the plot lines, or the legend frame is overly bold). The font size might be marginal – somewhat readable but maybe smaller than ideal
    . The legend placement is improved (not covering data), but might not be the best possible spot (maybe it’s outside the plot but causing a lot of whitespace, or inside but in a corner that’s still a bit tight). Annotations added to highlight data points are partially helpful, yet could be clearer (they might overlap slightly with other elements or use default arrow styles that are too heavy). In short, the information is there, just not fine-tuned.

    1 (Achieved)
    The legend and any annotations complement the plot without overwhelming it. The legend clearly identifies each plot element with labels that are concise and descriptive. It is placed in a non-intrusive location – often just outside the main plotting area or in an empty corner – so that it does not block data
    . The legend’s appearance is refined: fonts are the same style as the axis labels (for consistency) and sized around 10–12 pt for readability
    . The legend symbols (line samples, marker icons) are sized to match what’s on the plot, making the legend a true key. If the legend has a background, it’s a subtle, translucent box or a faint border, just enough to separate it from the plot. Annotations (like text arrows calling out a specific data point) are used sparingly and purposefully. When included, they are formatted in the same font and an appropriate size, and placed to avoid overlap with important data. For example, an annotation arrow might have a softened color or thin line so it’s noticeable but not the first thing the viewer sees. Overall, achieved status means the viewer can quickly decode the plot’s meaning through the legend and notes, with no frustration or guesswork.

    Extreme Quality
    Legends and annotations are masterfully handled. At this level, every explanatory label on the plot is almost an extension of the figure’s storytelling. The legend, if there are many items, might be moved completely outside the plot area or below the plot, possibly laid out in a clean multi-column format to avoid a long list in a tiny box. It could even be integrated into a figure caption or callouts for maximum clarity. If inside the plot, the legend might employ a transparent background to let the grid show through and avoid blocking any data points, yet still ensure text legibility. Fine adjustments are made such as setting custom handlelength and spacing in plt.legend() so that the legend items are perfectly proportioned. Annotations on the plot are done with design flair: for instance, important points might be marked with unobtrusive arrows or labels that follow the curve of the data, or highlighted with a gentle circle/shape that does not upset the plot’s balance. The annotative elements use color and font consistent with the plot’s style (no jarring red text unless it’s intentional to highlight something). There is zero overlap of text with data – labels might be slightly offset or curved to ensure clarity
    medium.com. The overall impression at extreme quality is that the legend and annotations feel like a natural part of the visualization. They guide the reader through the figure like a caption would, all while looking clean and elegant. It’s the kind of meticulous detail you’d see in an infographic or a top-tier journal figure where every label has its perfect place.

    ---

    Figure Layout & Size
    0 (Not Achieved)
    The figure’s size and layout are poorly chosen, leading to readability issues. For example, the plot might be produced at a default size that, when inserted into a paper or slide, makes all elements too small (tiny text, cramped markers). Alternatively, the aspect ratio might be distorted or not appropriate for the data (e.g. a world map drawn in a non-square aspect, or a time series squashed so trends are hard to see). If multiple subplots are present, they may be misaligned, overlapping, or inconsistent in scale. Little to no margin is adjusted, so labels or titles could be cut off or too close to other elements. The figure does not “fit” well in its final context, appearing either oversized or undersized, and generally hard to interpret.

    0.5 (Partially Achieved)
    Some consideration has been given to figure size and layout, but it’s not perfect. The figure dimensions might be adjusted to a reasonable value, but perhaps not exactly the target publication size. There might still be minor issues like a very long axis label that runs off the edge or subplots that aren’t evenly spaced. The aspect ratio could be acceptable but maybe leaves a lot of empty space or slight distortion. If this is for a paper, maybe the figure width is roughly column width, but not precisely, requiring scaling later. If for a presentation, maybe it’s almost full slide width but the text might still be a tad small. In general, it’s better than default – things mostly fit – but some tweaking is needed for a truly balanced layout.

    1 (Achieved)
    The figure is sized and laid out with intent and precision. It is created at the exact dimensions required for its final use, ensuring that fonts and other elements appear at the right scale in the document
    . For an academic paper, this means if the figure is going into a single-column, it’s made ~3–3.5 inches wide (or whatever the journal’s spec is), or ~6–7 inches for double column, with appropriate height so that content isn’t squished. This way, no resizing (and resultant blurriness or disproportionate scaling) is needed later. All components (text, markers, line widths) are chosen with that size in mind, matching the document’s text size and look
    . The layout is clean: margins are adjusted so that labels and titles have breathing room (using plt.tight_layout() or manual subplots_adjust to prevent any cut-off text). If multiple panels (subplots) are present, they are aligned neatly with consistent axes ticks and shared legends if appropriate, giving a sense of unity. There’s sufficient padding for figure captions or annotations. The aspect ratio of each plot is chosen to best display the data (for example, using equal aspect for geographic maps or an aesthetically pleasing ratio for line charts). In sum, an achieved layout feels “just right” – the figure neither wastes space nor crams elements; it will slot into its final medium perfectly.

    Extreme Quality
    The figure’s size and layout are maximized for impact and professionalism. At this level, the author has possibly iterated through mock placements (e.g. designing the figure while previewing it in an actual document or slide to ensure a perfect fit)
    . Fine adjustments are made such that, for instance, all subplot labels (like “(a), (b)” panel indicators or titles) are perfectly aligned and of equal size. The designer might follow advanced guidelines: for instance, keeping total figure width under ~6 inches for print so it fits nicely with caption on the same page
    , or ensuring the figure height leaves room for the caption below without spilling to next page
    . They might use tools or calculations to get precise aspect ratios or to convert units (inches, mm) to match the publication requirements
    . Every element of spacing is controlled – perhaps using a grid or golden-ratio spacing for aesthetic balance. When multiple plots are involved, their scales are synchronized (same axis limits, same color scales) for consistency across the figure. The end result is a layout that feels elegant and well-proportioned. It looks as if the figure was professionally typeset: nothing is cut off, nothing is awkwardly positioned. The figure draws the viewer’s eye through the content smoothly, and fits perfectly in the medium (be it a journal column or a presentation slide) without any last-minute resizing.

    ---

    Export & Output Quality
    0 (Not Achieved)
    The figure output is low-quality, undermining the visual appeal. This includes cases like using a low DPI (dots-per-inch) raster image (perhaps a default 72 DPI PNG or a screenshot) which results in blurry or pixelated graphics when printed or zoomed
    medium.com. Text might appear fuzzy, and fine lines can vanish or look jagged. There may be compression artifacts (in formats like JPEG) or inappropriate file format choices (e.g. a lossy JPG for a line plot). In worst cases, colors might be saved in the wrong profile causing them to appear off. Essentially, the final figure file does not faithfully represent the high-quality plot seen on screen.

    0.5 (Partially Achieved)
    The figure is exported with some attention to quality, but not fully optimal. For instance, a high-resolution PNG (e.g. 300 DPI) might be used – so it’s not blurry – but still raster, which could inflate file size or still not scale ideally in all cases. Or perhaps the figure is saved as a PDF but fonts are not embedded, which can lead to issues like fonts being substituted or edges appearing jagged in some viewers. Another scenario: the plot is nearly perfect, but the chosen output dimensions are slightly off, causing a need to rescale in a document (introducing slight blurriness). There is an improvement from default, but the output could be even better with proper settings or formats.

    1 (Achieved)
    The plot is saved in a publication-quality format that preserves its clarity. Typically, this means exporting as a vector format like PDF or SVG for line art and scatter plots, so that all lines and text remain crisp at any zoom level. In a vector output, fonts are embedded or handled such that text is selectable and renders sharply (e.g. using PDF with Type 42 fonts to avoid blurry text
    . If a raster image is needed (for example, required by a specific submission system or for a high-complexity image), it is exported at a very high resolution (e.g. 300–600 DPI for print) to ensure no pixelation. The figure is also cropped properly (no excessive whitespace unless intended) and uses an appropriate color model (typically RGB for web, CMYK for print if requested, or grayscale for certain print publications). The result is an output file that looks professional: lines are sharp, colors are accurate, and text is crisp when the figure is placed in the final document. Reviewers or viewers can zoom in without quality loss.

    Extreme Quality
    The output settings are maxed out for perfection. At this level, every detail of the file is considered. For example, the figure might be exported as PDF/X or PDF/A for guaranteed compatibility in publishing. All fonts are not only embedded but also subsetted to reduce file size. The figure’s PDF has no extraneous elements – for instance, no hidden layers or unused metadata – ensuring a clean file. If using SVG for web, the code might be simplified or cleaned to remove unnecessary markup. In cases of very complex visuals (dense data or thousands of points), a combination of vector and high-res raster (for specific layers) might be employed to balance fidelity and file size. Additionally, color profiles could be embedded to ensure consistent printing. The creator might even manually adjust the output by editing the vector file (e.g. tweaking line joins or text positions in Adobe Illustrator) to fix any minor issues that automatic rendering can produce. Saving in multiple formats as needed (PDF for print, PNG for slides at 2× resolution) is also common. Ultimately, an extreme-quality export means the figure looks absolutely top-notch in any context – it prints beautifully with razor-sharp detail and displays perfectly on high-resolution screens. This is the kind of output where even upon close inspection, every letter and line is pristine, reflecting the utmost care in the final production step
    

    # IMPORTANT: Do not ask the user if you should implement the changes at the end. Just provide the plan as described above.
    # IMPORTANT: Do not attempt to write the code or use any string modification tools. Just provide the plan.
""")


class PlannerResponse(BaseModel):
    """
    Creates a plot based on a given plot creation goal.
    """

    reasoning: str
    plan: str


prompt = "The user plot creation goal is: find correlation between the number of people who have a certain disease and the number of people who have a certain disease. You were requested to plan on: creating the plot"

inference_result = await Generate.arun(
    model="openai/gpt-4.1-mini",
    temperature=1,
    response_format=PlannerResponse,
    system_prompt=INSTRUCTIONS_PLANNING,
    prompt=prompt,
)

print(inference_result.response)

In [None]:
from rich import print as rprint

rprint(inference_result)