In [1]:
# Load env variables and create client
from dotenv import load_dotenv
from anthropic import Anthropic

load_dotenv()

client = Anthropic()
model = "claude-haiku-4-5"

In [2]:
# Helper functions
def add_user_message(messages, text):
    user_message = {"role": "user", "content": text}
    messages.append(user_message)


def add_assistant_message(messages, text):
    assistant_message = {"role": "assistant", "content": text}
    messages.append(assistant_message)


def chat(messages, system=None, temperature=1.0, stop_sequences=[]):
    params = {
        "model": model,
        "max_tokens": 1000,
        "messages": messages,
        "temperature": temperature,
        "stop_sequences": stop_sequences,
    }

    if system:
        params["system"] = system

    message = client.messages.create(**params)
    return message.content[0].text

In [23]:
import json


def generate_dataset():
    prompt = """
Generate a evaluation dataset for a prompt evaluation. The dataset will be used to evaluate prompts
that generate Python, JSON, or Regex specifically for AWS-related tasks. Generate an array of JSON objects,
each representing task that requires Python, JSON, or a Regex to complete.

Example output:
```json
[
    {
        "task": "Description of task",
    },
    ...additional
]
```

* Focus on tasks that can be solved by writing a single Python function, a single JSON object, or a regular expression.
* Focus on tasks that do not require writing much code

Please generate 3 objects.
"""

    messages = []

    add_user_message(messages, prompt)
    add_assistant_message(messages, "```json")

    answer = chat(messages, stop_sequences=["```"])

    return json.loads(answer)

In [24]:
dataset = generate_dataset()
with open("dataset.json", "w") as f:
    json.dump(dataset, f, indent=2)
dataset

[{'task': 'Write a regular expression to validate AWS S3 bucket names. S3 bucket names must be between 3 and 63 characters long, contain only lowercase letters, numbers, hyphens, and periods, start and end with a letter or number, and cannot contain consecutive hyphens or periods.'},
 {'task': "Write a Python function that takes an AWS CloudWatch log event as input and extracts the timestamp and message fields, returning them as a dictionary with 'timestamp' and 'message' keys."},
 {'task': "Create a JSON object that represents an AWS IAM policy allowing a principal to perform s3:GetObject and s3:PutObject actions only on objects with the 'public' tag in any S3 bucket."}]

In [25]:
def run_prompt(test_case):
    """Merges the prompt and test case input, then returns the result"""
    prompt = f"""
Please solve the following task:

{test_case["task"]}
"""

    messages = []
    add_user_message(messages, prompt)
    output = chat(messages)
    return output

In [26]:
def run_test_case(test_case):
    """Calls run_prompt, then grades the result"""
    output = run_prompt(test_case)

    # TODO - Grading
    score = 10

    return {
        "output": output,
        "test_case": test_case,
        "score": score
    }

In [27]:
def run_eval(dataset):
    """Loads the dataset and calls run_test_case with each case"""

    results = []

    for test_case in dataset:
        result = run_test_case(test_case)
        results.append(result)

    return results

In [28]:
with open("dataset.json", "r") as f:
    dataset = json.load(f)

results = run_eval(dataset)

In [29]:
print(json.dumps(results, indent=2))

[
  {
    "output": "# AWS S3 Bucket Name Validation Regex\n\nHere's a comprehensive solution with multiple approaches:\n\n## Solution 1: Single Regex (Most Practical)\n\n```regex\n^(?!.*(-{2}|\\.{2}))(?!.*(-\\.|.\\-))(?!-)(?!\\.)[a-z0-9][a-z0-9\\.\\-]{1,61}[a-z0-9]$|^[a-z0-9]{3}$\n```\n\n## Solution 2: More Readable with Comments\n\n```regex\n^\n  (?!.*(-{2}|\\.{2}))      # Negative lookahead: no consecutive hyphens or periods\n  (?!-)                    # Cannot start with hyphen\n  (?!\\.)                   # Cannot start with period\n  [a-z0-9]                 # Must start with letter or number\n  [a-z0-9\\.\\-]{1,61}       # Middle characters (1-61 chars for total 3-63)\n  [a-z0-9]                 # Must end with letter or number\n$\n```\n\n## Solution 3: Simplified Alternative\n\n```regex\n^(?!.*[_\\-\\.]{2})(?![\\-\\.])(?!.*[\\-\\.]$)[a-z0-9][a-z0-9\\.\\-]{1,61}[a-z0-9]$|^[a-z0-9]{3}$\n```\n\n---\n\n## Implementation Examples\n\n### JavaScript\n```javascript\nconst s3BucketRegex