In [None]:
from enum import Enum

import instructor
import pandas as pd
from pydantic import Field

from cuery import Prompt, Response, Task, pprint

# Create a prompt from simple string
The `Prompt` class expects a list of (jinja) messages with their roles. But it can also be instantiated from a simple string.

In [2]:
t = "Hello {{name}}! {% for item in ingredients %} {{ item }} {% endfor %}"
p = Prompt.from_string(t)
pprint(p)

# Simplified client/model creation

In [None]:
class Recipe(Response):
    ingredients: list[str] = Field(description="A list of ingredients for the dish.")


prompt = Prompt.from_string("Generate a list of recipe ingredients to make '{{dish}}'.")
task = Task(prompt=prompt, response=Recipe)
pprint(task)

model = "openai/gpt-4.1-mini"  # or e.g. "perplexity/sonar"
responses = await task(context=[{"dish": "spaghetti carbonara"}], model=model)
responses.to_pandas(explode=False)

Iterating context:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,dish,ingredients
0,spaghetti carbonara,"[spaghetti, eggs, pancetta or guanciale, Parme..."


# Inspect LLM queries (containing final prompt send to LLM)

Only available (for now), when multiple rows were processed!

Each task maintains a log of errors and the queries to the LLM provider. Note that the structure of what's sent to the provider may be different for each.

In [9]:
task.query_log.queries[0]

{'messages': [{'role': 'user',
   'content': "Generate a list of recipe ingredients to make 'spaghetti carbonara'."}],
 'model': 'gpt-4.1-mini',
 'tools': [{'type': 'function',
   'function': {'name': 'Recipe',
    'description': 'Correctly extracted `Recipe` with all the required parameters with correct types',
    'parameters': {'properties': {'ingredients': {'description': 'A list of ingredients for the dish.',
       'items': {'type': 'string'},
       'title': 'Ingredients',
       'type': 'array'}},
     'required': ['ingredients'],
     'type': 'object'}}}],
 'tool_choice': {'type': 'function', 'function': {'name': 'Recipe'}}}

# Choices (enum)
Require LLM to respond with one of N _options_ (fixed categories).

In [None]:
class Role(Enum):
    PRINCIPAL = "PRINCIPAL"
    TEACHER = "TEACHER"
    STUDENT = "STUDENT"
    OTHER = "OTHER"


class UserDetail(Response):
    age: int
    name: str
    role: Role = Field(description="Correctly assign one of the predefined roles to the user.")


prompt = Prompt.from_string("Please a create a synthetic user profile with age, name and role.")
task = Task(prompt=prompt, response=UserDetail)

response = await task(model="openai/gpt-3.5-turbo")
print(response)
response.to_pandas()

[UserDetail(age=25, name='Alice', role=<Role.STUDENT: 'STUDENT'>)]


Unnamed: 0,age,name,role
0,25,Alice,Role.STUDENT


# Simple Multivalued fields
Require LLM to respond with a _list_ of values (unconstrained).

In [None]:
class Ingredients(Response):
    items: list[str] = Field(description="List of ingredients for the recipe.")


prompt = Prompt.from_string("List the ingredients for the following dish: {{dish}}.")
context = [{"dish": "pasta bolognese"}, {"dish": "chocolate cake"}]

task = Task(prompt=prompt, response=Ingredients)
responses = await task(context=context)
print(responses)

Iterating context:   0%|          | 0/2 [00:00<?, ?it/s]

[Ingredients(items=['pasta', 'bolognese sauce', 'ground beef', 'onion', 'garlic', 'carrot', 'celery', 'tomato paste', 'crushed tomatoes', 'red wine', 'beef broth', 'bay leaves', 'salt', 'pepper', 'olive oil']), Ingredients(items=['chocolate', 'flour', 'sugar', 'eggs', 'butter', 'baking powder', 'vanilla extract', 'salt', 'milk'])]


In [13]:
# Maintain the original structure of the responses
responses.to_pandas(explode=False)

Unnamed: 0,dish,items
0,pasta bolognese,"[pasta, bolognese sauce, ground beef, onion, g..."
1,chocolate cake,"[chocolate, flour, sugar, eggs, butter, baking..."


In [14]:
# Explode the list of ingredients into separate rows
responses.to_pandas(explode=True)

Unnamed: 0,dish,items
0,pasta bolognese,pasta
1,pasta bolognese,bolognese sauce
2,pasta bolognese,ground beef
3,pasta bolognese,onion
4,pasta bolognese,garlic
5,pasta bolognese,carrot
6,pasta bolognese,celery
7,pasta bolognese,tomato paste
8,pasta bolognese,crushed tomatoes
9,pasta bolognese,red wine


In [9]:
# Convert to simple python records
responses.to_records(explode=False)

[{'dish': 'pasta bolognese',
  'items': ['pasta',
   'bolognese sauce',
   'ground beef',
   'onion',
   'garlic',
   'celery',
   'carrot',
   'tomato paste',
   'red wine',
   'beef broth',
   'salt',
   'pepper',
   'olive oil',
   'parmesan cheese']},
 {'dish': 'chocolate cake',
  'items': ['chocolate',
   'flour',
   'sugar',
   'eggs',
   'butter',
   'cocoa powder',
   'baking powder',
   'vanilla extract',
   'salt',
   'milk']}]

# Nested models
Define a more complicated output structure by referencing another response model. 

In this case a list of certain length containing instances of pre-defined response model.

In [None]:
class Sector(Response):
    sector: str = Field(
        description="Human-readable title(!) of the industrical sector (in NAICS taxonomy)",
        min_length=10,
        max_length=150,
    )
    subsector: str = Field(
        description="Human-readable title(!) of the industrial SUBsector (in NAICS taxonomy)",
        min_length=5,
        max_length=150,
    )
    sector_automation_potential: int = Field(
        description="A score from 1 to 10 indicating the sector's potential for automation",
        ge=0,
        le=10,
    )


class Sectors(Response):
    sectors: list[Sector] = Field(
        description="A list of 1 to 5 NAIC industrial sectors with their AI automation potential",
        min_length=1,
        max_length=5,
    )


sectors_prompt = Prompt.from_string(
    "List some industrial sector in the country of {{country}} that have great AI automation potential."
)

context = [{"country": "Germany"}, {"country": "United States"}, {"country": "Japan"}]
sectors_task = Task(prompt=sectors_prompt, response=Sectors)
responses = await sectors_task(context=context)

Iterating context:   0%|          | 0/3 [00:00<?, ?it/s]

In [16]:
responses.to_pandas(explode=True)

Unnamed: 0,country,sector,subsector,sector_automation_potential
0,Germany,Manufacturing,Automobile Manufacturing,8
1,Germany,Healthcare,Medical Devices Manufacturing,7
2,Germany,Transportation and Warehousing,Freight Trucking,9
3,United States,Manufacturing,Automobile Manufacturing,8
4,United States,Healthcare,Medical Imaging,9
5,United States,Retail Trade,E-commerce,7
6,Japan,Manufacturing,Automobile Manufacturing,8
7,Japan,Health Care and Social Assistance,Hospitals,7
8,Japan,Retail Trade,E-commerce,9


# Chain tasks together
Run multiple tasks one after the other, collecting the results in a single DataFrame.

Keep in mind here that the names of inputs of one task must be the same as the names of outputs in the previous one.

Here we extract first some industrial sectors for each input country, and then some job roles within each sector.

In [None]:
# Re-uses "sectors" task from previous code cell (!)

from cuery import Chain


class Job(Response):
    job_role: str = Field(description="Name of the job role (job title, less than 50 characters)")
    job_description: str = Field(
        description="A short description of the job role (less than 200 characters)"
    )
    job_automation_potential: int = Field(
        description="A score from 1 to 10 indicating the job's potential for automation",
        ge=0,
        le=10,
    )


class Jobs(Response):
    jobs: list[Job] = Field(
        description=(
            "A list of jobs with their AI automation potential and reasons for that potential"
        ),
    )


jobs_prompt = Prompt.from_string(
    "List some job roles with great AI automation potential in the country of {{country}} and the sector '{{sector}}'"
)

context = pd.DataFrame(
    {
        "country": ["Germany", "United States", "Japan"],
        "PIB": [4.0, 5.0, 3.5],
    }
)

jobs_task = Task(prompt=jobs_prompt, response=Jobs)
chain = Chain(sectors_task, jobs_task)
responses = await chain(context=context)

Iterating context:   0%|          | 0/3 [00:00<?, ?it/s]

Iterating context:   0%|          | 0/9 [00:00<?, ?it/s]

In [14]:
responses

Unnamed: 0,sector,country,job_role,job_description,job_automation_potential
0,Manufacturing,Germany,Production Planner,Responsible for planning production schedules ...,8
1,Manufacturing,Germany,Quality Control Inspector,Ensures products meet quality standards throug...,7
2,Manufacturing,Germany,Maintenance Technician,Responsible for repairing and maintaining mach...,9
3,Manufacturing,Germany,Supply Chain Manager,"Manages the flow of goods, information, and fi...",6
4,Manufacturing,Germany,Logistics Coordinator,Coordinates the transportation and delivery of...,7
5,Transportation and Warehousing,Germany,Autonomous Vehicle Engineer,Design and develop autonomous vehicles for var...,8
6,Transportation and Warehousing,Germany,Supply Chain Analyst,Manage and optimize supply chain operations us...,7
7,Transportation and Warehousing,Germany,Warehouse Robotics Engineer,Design and implement robotic systems for wareh...,9
8,Transportation and Warehousing,Germany,Transportation Data Analyst,Analyze transportation data to improve efficie...,6
9,Information,Germany,Data Scientist,Analyzing complex data to extract insights and...,8
