# Prompt Evaluations

In [1]:
import boto3
import json

In [2]:
client = boto3.client("bedrock-runtime", region_name="us-east-2")
# Use Haiku for faster evals
model_id = "us.anthropic.claude-3-5-haiku-20241022-v1:0"


def add_user_message(messages, text):
    user_message = {"role": "user", "content": [{"text": text}]}
    messages.append(user_message)


def add_assistant_message(messages, text):
    assistant_message = {"role": "assistant", "content": [{"text": text}]}
    messages.append(assistant_message)


def chat(messages, system=None, temperature=1.0, stop_sequences=[]):
    params = {
        "modelId": model_id,
        "messages": messages,
        "inferenceConfig": {
            "temperature": temperature,
            "stopSequences": stop_sequences,
        },
    }

    if system:
        params["system"] = [{"text": system}]

    response = client.converse(**params)

    return response["output"]["message"]["content"][0]["text"]

## Step 1: Define an initial prompt

In [None]:
# prompt_v1 = f"""
# Please provide a solution to hte following task:

# {task}
# """

## Step 2: Generate the Evaluation Dataset

In [5]:
def generate_dataset():
    prompt = """
    Generate 3 AWS-related tasks that require Python, JSON, or Regex solutions.
    
    Focus on tasks that can be solved by writing a single Python function, 
    a single JSON object, or tasks that do not require writing much code.
    
    Example output:
    [
        {
            "task": "Description of task"
        },
        ...additional
    ]
    
    Please generate 3 objects.
    """

    messages = []
    add_user_message(messages, prompt)
    add_assistant_message(messages, "```json")
    text = chat(messages, stop_sequences=["```"])

    return json.loads(text)

In [6]:
eval_dataset = generate_dataset()

# writing the dataset into a json file
with open("../../evals/dataset.json", "w") as f:
    json.dump(eval_dataset, f, indent=2)