In [3]:
from anthropic import Anthropic
import json
client = Anthropic()

In [2]:
def add_message(role:str, content:str, messages:list[str]) -> None:
    """Add a message to the conversation history
    
    Args:
        role (str): The role of the message sender, i.e. "user" or "assistant"
        content (str): The content of the messagemessage
        messages (list[str]): The list of messages in the conversation
    """
    params = {
        "role": role,
        "content": content
    }
    messages.append(params)
    
def get_response_text(
    messages:list[str],
    system_prompt:str=None,
    model:str="claude-sonnet-4-0", 
    max_tokens:int=1000, 
    client:Anthropic=client,
    stop_sequences:list[str]=None) -> str:
    """Get the response text from the model
    
    Args:
        messages (list[str]): The list of messages in the conversation
        system_prompt (str): The system prompt to use for the response
        model (str): The model to use for the response
        max_tokens (int): The maximum number of tokens in the response
        client (Anthropic): The Anthropic client
        stop_sequences (list[str]): The list of stop sequences to use for the response
        
    Returns:
        str: The response text from the model
    """
    
    params = {
        "model": model,
        "max_tokens": max_tokens,
        "messages": messages
    }
    
    if system_prompt:
        params["system"] = system_prompt
    
    if stop_sequences:
        params["stop_sequences"] = stop_sequences
    
    response = client.messages.create(**params)
    return response.content[0].text

In [6]:
def generate_dataset():
    prompt = """
    Generate an evaluation dataset for a prompt evaluation. The dataset will be used to evaluate prompts that generate Python, JSON, or Regex specifically for AWS-related tasks. Generate an array of JSON objects, each representing task that requires Python, JSON, or a Regex to complete.

    Example output:
    ```json
    [
    {
        "task": "Description of task",
    },
    ...additional
    ]
    ```

    * Focus on tasks that can be solved by writing a single Python function, a single JSON object, or a single regex
    * Focus on tasks that do not require writing much code

    Please generate 3 objects.
    """
    
    messages = []
    add_message("user", prompt, messages)
    add_message("assistant", "```json", messages) # this tells Claude to start generating the response after "```json"
    text = get_response_text(messages, model="claude-haiku-4-5", stop_sequences=["```"])
    return json.loads(text)

In [7]:
dataset = generate_dataset()
print(dataset)

[{'task': 'Write a Python function that takes an AWS S3 bucket name and returns True if it follows AWS naming conventions (lowercase, 3-63 characters, no consecutive hyphens), False otherwise.'}, {'task': "Create a JSON object that represents an AWS IAM policy allowing a user to read all objects from an S3 bucket named 'my-bucket'."}, {'task': 'Write a Regex pattern that matches valid AWS EC2 instance IDs (format: i-followed by 8 or 17 hexadecimal characters).'}]


In [None]:
# write dataset into a file called prompt_eval_dataset.json

with open('prompt_eval_dataset.json', 'w') as f:
    json.dump(dataset, f, indent=2)