Skip to content

Evaluating prompts at scale with Prompt Management and Prompt Flows for Amazon Bedrock

License

Notifications You must be signed in to change notification settings

jwalsh/amazon-bedrock-prompt-evaluation

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

11 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

Evaluating prompts at scale with Prompt Management and Prompt Flows for Amazon Bedrock

Setup

pip3 install boto3 botocore matplotlib -qU

Workflow Diagrams

Prompt evaluation logic flow

flowchart LR
    A[Prompts] --> B((LLM invocation))
    B --> C[Outputs]
    A --> D{LLM-as-a-judge evaluation}
    C --> D
    D --> E[Evaluation results]
Loading

Simple prompt evaluation flow

flowchart TD
    A[Flow input] -->|document| B[Invoke]
    B -->|modelCompletion| C[Evaluate]
    A -->|document| C
    C -->|modelCompletion| D[Flow output]
Loading

Prompt evaluation flow at scale

flowchart TD
    A[Flow input] -->|document| B[S3 Retrieval]
    B -->|s3Content| C[Iterator]
    C -->|arrayItem| D[Invoker]
    D -->|modelCompletion| E[Evaluator]
    C -->|arrayItem| E
    E -->|modelCompletion| F[Collector]
    C -->|arraySize| F
    F -->|collectedArray| G[S3 Storage]
    A -->|document| G
    G -->|s3Uri| H[Flow output]
Loading

Configuration

import boto3
import json
import os

def create_default_config():
    default_config = {
        "region": "us-east-1",
        "modelInvokeId": "amazon.titan-text-premier-v1:0",
        "modelEvalId": "anthropic.claude-3-sonnet-20240229-v1:0"
    }
    with open('bedrock_config.json', 'w') as config_file:
        json.dump(default_config, config_file, indent=2)
    print("Created default bedrock_config.json")
    return default_config

# Check if bedrock_config.json exists, if not create it
if not os.path.exists('bedrock_config.json'):
    config = create_default_config()
else:
    with open('bedrock_config.json', 'r') as config_file:
        config = json.load(config_file)

region = config['region']
modelEvalId = config['modelEvalId']

bedrock_agent = boto3.client(service_name="bedrock-agent", region_name=region)

def get_existing_prompt(name):
    try:
        response = bedrock_agent.list_prompts()
        for prompt in response.get('promptSummaries', []):
            if prompt['name'] == name:
                return prompt['id'], prompt['arn']
    except Exception as e:
        print(f"Error checking for existing prompt: {str(e)}")
    return None, None

def create_prompt(name):
    with open('03_ai_prompt_answer_evaluator.tmpl', 'r') as file:
        template = file.read()

    try:
        response = bedrock_agent.create_prompt(
            name=name,
            description="Prompt template for evaluating prompt responses with LLM-as-a-judge",
            variants=[
                {
                    "inferenceConfiguration": {
                        "text": {
                            "maxTokens": 2000,
                            "temperature": 0,
                        }
                    },
                    "modelId": modelEvalId,
                    "name": "variantOne",
                    "templateConfiguration": {
                        "text": {
                            "inputVariables": [
                                {"name": "input"},
                                {"name": "output"}
                            ],
                            "text": template
                        }
                    },
                    "templateType": "TEXT"
                }
            ],
            defaultVariant="variantOne"
        )
        return response["id"], response["arn"]
    except Exception as e:
        print(f"Error creating prompt: {str(e)}")
        return None, None

def get_or_create_prompt():
    prompt_name = "prompt-evaluator"
    prompt_id, prompt_arn = get_existing_prompt(prompt_name)

    if prompt_id:
        print(f"A prompt with the name '{prompt_name}' already exists.")
        use_existing = input("Do you want to use the existing prompt? (yes/no): ").lower()
        if use_existing == 'yes':
            return prompt_id, prompt_arn
        else:
            new_name = input("Enter a new name for the prompt: ")
            prompt_name = new_name if new_name else f"{prompt_name}-new"
            return create_prompt(prompt_name)
    else:
        print(f"Creating new prompt '{prompt_name}'...")
        return create_prompt(prompt_name)

prompt_id, prompt_arn = get_or_create_prompt()

if prompt_id and prompt_arn:
    print(f"Prompt ID: {prompt_id}")
    print(f"Prompt ARN: {prompt_arn}")
    # Save prompt details for later use
    with open('prompt_details.json', 'w') as f:
        json.dump({
            "promptEvalId": prompt_id,
            "promptEvalArn": prompt_arn
        }, f)
    print("Prompt details saved to 'prompt_details.json'")
else:
    print("Failed to get or create prompt.")

Evaluation Template

You're an evaluator for the prompts and answers provided by a generative AI model.
Consider the input prompt in the <input> tags, the output answer in the <output> tags, the prompt evaluation criteria in the <prompt_criteria> tags, and the answer evaluation criteria in the <answer_criteria> tags.

<input>
{{input}}
</input>

<output>
{{output}}
</output>

<prompt_criteria>
- The prompt should be clear, direct, and detailed.
- The question, task, or goal should be well explained and be grammatically correct.
- The prompt is better if containing examples.
- The prompt is better if specifies a role or sets a context.
- The prompt is better if provides details about the format and tone of the expected answer.
</prompt_criteria>

<answer_criteria>
- The answers should be correct, well structured, and technically complete.
- The answers should not have any hallucinations, made up content, or toxic content.
- The answer should be grammatically correct.
- The answer should be fully aligned with the question or instruction in the prompt.
</answer_criteria>

Evaluate the answer the generative AI model provided in the <output> with a score from 0 to 100 according to the <answer_criteria> provided; any hallucinations, even if small, should dramatically impact the evaluation score.
Also evaluate the prompt passed to that generative AI model provided in the <input> with a score from 0 to 100 according to the <prompt_criteria> provided.
Respond only with a JSON having:
- An 'answer-score' key with the score number you evaluated the answer with.
- A 'prompt-score' key with the score number you evaluated the prompt with.
- A 'justification' key with a justification for the two evaluations you provided to the answer and the prompt; make sure to explicitely include any errors or hallucinations in this part.
- An 'input' key with the content of the <input> tags.
- An 'output' key with the content of the <output> tags.
- A 'prompt-recommendations' key with recommendations for improving the prompt based on the evaluations performed.
Skip any preamble or any other text apart from the JSON in your answer.

Create Evaluation Prompt

import boto3
import json
import os

# Load configuration
with open('bedrock_config.json', 'r') as config_file:
    config = json.load(config_file)

region = config['region']
modelEvalId = config['modelEvalId']

bedrock_agent = boto3.client(service_name="bedrock-agent", region_name=region)

def get_existing_prompt(name):
    try:
        response = bedrock_agent.list_prompts()
        for prompt in response.get('promptSummaries', []):
            if prompt['name'] == name:
                return prompt['id'], prompt['arn']
    except Exception as e:
        print(f"Error checking for existing prompt: {str(e)}")
    return None, None

def create_prompt(name):
    with open('03_ai_prompt_answer_evaluator.tmpl', 'r') as file:
        template = file.read()

    try:
        response = bedrock_agent.create_prompt(
            name=name,
            description="Prompt template for evaluating prompt responses with LLM-as-a-judge",
            variants=[
                {
                    "inferenceConfiguration": {
                        "text": {
                            "maxTokens": 2000,
                            "temperature": 0,
                        }
                    },
                    "modelId": modelEvalId,
                    "name": "variantOne",
                    "templateConfiguration": {
                        "text": {
                            "inputVariables": [
                                {"name": "input"},
                                {"name": "output"}
                            ],
                            "text": template
                        }
                    },
                    "templateType": "TEXT"
                }
            ],
            defaultVariant="variantOne"
        )
        return response["id"], response["arn"]
    except Exception as e:
        print(f"Error creating prompt: {str(e)}")
        return None, None

def get_or_create_prompt():
    prompt_name = "prompt-evaluator"
    prompt_id, prompt_arn = get_existing_prompt(prompt_name)

    if prompt_id:
        print(f"A prompt with the name '{prompt_name}' already exists.")
        use_existing = input("Do you want to use the existing prompt? (yes/no): ").lower()
        if use_existing == 'yes':
            return prompt_id, prompt_arn
        else:
            new_name = input("Enter a new name for the prompt: ")
            prompt_name = new_name if new_name else f"{prompt_name}-new"
            return create_prompt(prompt_name)
    else:
        print(f"Creating new prompt '{prompt_name}'...")
        return create_prompt(prompt_name)

prompt_id, prompt_arn = get_or_create_prompt()

if prompt_id and prompt_arn:
    print(f"Prompt ID: {prompt_id}")
    print(f"Prompt ARN: {prompt_arn}")
    # Save prompt details for later use
    with open('prompt_details.json', 'w') as f:
        json.dump({
            "promptEvalId": prompt_id,
            "promptEvalArn": prompt_arn
        }, f)
    print("Prompt details saved to 'prompt_details.json'")
else:
    print("Failed to get or create prompt.")

Evaluation Flow

Policy

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Principal": {
                "Service": "bedrock.amazonaws.com"
            },
            "Action": "sts:AssumeRole"
        }
    ]
}

Create Role

import boto3
import json

# Create IAM client
iam = boto3.client('iam')

# Define the role name
role_name = 'AmazonBedrockExecutionRoleForAgentFlowEval'

# Define the trust relationship policy
trust_relationship = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Principal": {
                "Service": "bedrock.amazonaws.com"
            },
            "Action": "sts:AssumeRole"
        }
    ]
}

# Define the role policy
role_policy = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "bedrock:InvokeModel"
            ],
            "Resource": "*"
        }
    ]
}

def get_or_create_role():
    try:
        # Try to get the role
        existing_role = iam.get_role(RoleName=role_name)
        print(f"The role '{role_name}' already exists.")
        use_existing = input("Do you want to use the existing role? (yes/no): ").lower()
        if use_existing == 'yes':
            return existing_role['Role']['Arn']
        else:
            # Delete the existing role and recreate
            iam.delete_role(RoleName=role_name)
            print(f"Deleted existing role '{role_name}'.")
    except iam.exceptions.NoSuchEntityException:
        pass  # Role doesn't exist, we'll create it

    try:
        # Create the IAM role
        create_role_response = iam.create_role(
            RoleName=role_name,
            AssumeRolePolicyDocument=json.dumps(trust_relationship),
            Description='Execution role for Amazon Bedrock Agent Flow'
        )
        print("Role created successfully.")

        # Attach the inline policy to the role
        iam.put_role_policy(
            RoleName=role_name,
            PolicyName='BedrockInvokeModelPolicy',
            PolicyDocument=json.dumps(role_policy)
        )
        print("Policy attached successfully")

        return create_role_response['Role']['Arn']
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None

# Get or create the role
role_arn = get_or_create_role()

if role_arn:
    print(f"Role ARN: {role_arn}")
    # Save role details for later use
    with open('role_details.json', 'w') as f:
        json.dump({
            "roleName": role_name,
            "roleArn": role_arn
        }, f)
    print("Role details saved to 'role_details.json'")
else:
    print("Failed to get or create the role.")

Flow Definition

{
    "nodes": [
        {
            "name": "Start",
            "type": "Input",
            "configuration": {
                "input": {}
            },
            "outputs": [
                {
                    "name": "document",
                    "type": "String"
                }
            ]
        },
        {
            "name": "End",
            "type": "Output",
            "configuration": {
                "output": {}
            },
            "inputs": [
                {
                    "expression": "$.data",
                    "name": "document",
                    "type": "String"
                }
            ]
        },
        {
            "name": "Invoke",
            "type": "Prompt",
            "configuration": {
                "prompt": {
                    "sourceConfiguration": {
                        "inline": {
                            "inferenceConfiguration": {
                                "text": {
                                    "maxTokens": 2000,
                                    "temperature": 0
                                }
                            },
                            "modelId": "$MODEL_INVOKE_ID",
                            "templateConfiguration": {
                                "text": {
                                    "inputVariables": [
                                        {
                                            "name": "input"
                                        }
                                    ],
                                    "text": "{{input}}"
                                }
                            },
                            "templateType": "TEXT"
                        }
                    }
                }
            },
            "inputs": [
                {
                    "expression": "$.data",
                    "name": "input",
                    "type": "String"
                }
            ],
            "outputs": [
                {
                    "name": "modelCompletion",
                    "type": "String"
                }
            ]
        },
        {
            "name": "Evaluate",
            "type": "Prompt",
            "configuration": {
                "prompt": {
                    "sourceConfiguration": {
                        "resource": {
                            "promptArn": "$PROMPT_EVAL_ARN"
                        }
                    }
                }
            },
            "inputs": [
                {
                    "expression": "$.data",
                    "name": "input",
                    "type": "String"
                },
                {
                    "expression": "$.data",
                    "name": "output",
                    "type": "String"
                }
            ],
            "outputs": [
                {
                    "name": "modelCompletion",
                    "type": "String"
                }
            ]
        }
    ],
    "connections": [
        {
            "name": "StartToInvoke",
            "source": "Start",
            "target": "Invoke",
            "type": "Data",
            "configuration": {
                "data": {
                    "sourceOutput": "document",
                    "targetInput": "input"
                }
            }
        },
        {
            "name": "InvokeToEvaluate",
            "source": "Invoke",
            "target": "Evaluate",
            "type": "Data",
            "configuration": {
                "data": {
                    "sourceOutput": "modelCompletion",
                    "targetInput": "output"
                }
            }
        },
        {
            "name": "StartToEvaluate",
            "source": "Start",
            "target": "Evaluate",
            "type": "Data",
            "configuration": {
                "data": {
                    "sourceOutput": "document",
                    "targetInput": "input"
                }
            }
        },
        {
            "name": "EvaluateToEnd",
            "source": "Evaluate",
            "target": "End",
            "type": "Data",
            "configuration": {
                "data": {
                    "sourceOutput": "modelCompletion",
                    "targetInput": "document"
                }
            }
        }
    ]
}

Create Flow

import boto3
import json
import os
from botocore.exceptions import ClientError

# Load configuration
with open('bedrock_config.json', 'r') as config_file:
    config = json.load(config_file)

region = config['region']
modelInvokeId = config['modelInvokeId']

bedrock_agent = boto3.client(service_name="bedrock-agent", region_name=region)

# Load role and prompt details
with open('role_details.json', 'r') as f:
    role_details = json.load(f)
role_arn = role_details['roleArn']

with open('prompt_details.json', 'r') as f:
    prompt_details = json.load(f)
promptEvalArn = prompt_details['promptEvalArn']

def get_existing_flow(name):
    try:
        response = bedrock_agent.list_flows()
        for flow in response.get('flowSummaries', []):
            if flow['name'] == name:
                return flow['id'], flow['arn']
    except Exception as e:
        print(f"Error checking for existing flow: {str(e)}")
    return None, None

def create_or_update_flow(name, description, role_arn, prompt_arn):
    flow_id, flow_arn = get_existing_flow(name)
    
    flow_definition = {
        "nodes": [
            {
                "name": "Start",
                "type": "Input",
                "configuration": {"input": {}},
                "outputs": [{"name": "document", "type": "String"}]
            },
            {
                "name": "End",
                "type": "Output",
                "configuration": {"output": {}},
                "inputs": [{"expression": "$.data", "name": "document", "type": "String"}]
            },
            {
                "name": "Invoke",
                "type": "Prompt",
                "configuration": {
                    "prompt": {
                        "sourceConfiguration": {
                            "inline": {
                                "inferenceConfiguration": {"text": {"maxTokens": 2000, "temperature": 0}},
                                "modelId": modelInvokeId,
                                "templateConfiguration": {
                                    "text": {
                                        "inputVariables": [{"name": "input"}],
                                        "text": "{{input}}"
                                    }
                                },
                                "templateType": "TEXT"
                            }
                        }
                    }
                },
                "inputs": [{"expression": "$.data", "name": "input", "type": "String"}],
                "outputs": [{"name": "modelCompletion", "type": "String"}]
            },
            {
                "name": "Evaluate",
                "type": "Prompt",
                "configuration": {
                    "prompt": {
                        "sourceConfiguration": {
                            "resource": {"promptArn": prompt_arn}
                        }
                    }
                },
                "inputs": [
                    {"expression": "$.data", "name": "input", "type": "String"},
                    {"expression": "$.data", "name": "output", "type": "String"}
                ],
                "outputs": [{"name": "modelCompletion", "type": "String"}]
            }
        ],
        "connections": [
            {
                "name": "StartToInvoke",
                "source": "Start",
                "target": "Invoke",
                "type": "Data",
                "configuration": {"data": {"sourceOutput": "document", "targetInput": "input"}}
            },
            {
                "name": "InvokeToEvaluate",
                "source": "Invoke",
                "target": "Evaluate",
                "type": "Data",
                "configuration": {"data": {"sourceOutput": "modelCompletion", "targetInput": "output"}}
            },
            {
                "name": "StartToEvaluate",
                "source": "Start",
                "target": "Evaluate",
                "type": "Data",
                "configuration": {"data": {"sourceOutput": "document", "targetInput": "input"}}
            },
            {
                "name": "EvaluateToEnd",
                "source": "Evaluate",
                "target": "End",
                "type": "Data",
                "configuration": {"data": {"sourceOutput": "modelCompletion", "targetInput": "document"}}
            }
        ]
    }

    try:
        if flow_id:
            print(f"Updating existing flow '{name}'...")
            response = bedrock_agent.update_flow(
                flowIdentifier=flow_id,
                description=description,
                executionRoleArn=role_arn,
                definition=flow_definition
            )
        else:
            print(f"Creating new flow '{name}'...")
            response = bedrock_agent.create_flow(
                name=name,
                description=description,
                executionRoleArn=role_arn,
                definition=flow_definition
            )
        return response['id'], response['arn']
    except ClientError as e:
        print(f"Error creating/updating flow: {e}")
        return None, None

# Create or update the flow
flow_name = "prompt-eval-flow"
flow_description = "Prompt Flow for evaluating prompts with LLM-as-a-judge."

flow_id, flow_arn = create_or_update_flow(flow_name, flow_description, role_arn, promptEvalArn)

if flow_id and flow_arn:
    print(f"Flow ID: {flow_id}")
    print(f"Flow ARN: {flow_arn}")
    # Save flow details for later use
    with open('flow_details.json', 'w') as f:
        json.dump({
            "flowId": flow_id,
            "flowArn": flow_arn,
            "flowName": flow_name
        }, f)
    print("Flow details saved to 'flow_details.json'")
else:
    print("Failed to create or update flow.")

Prepare and Version the Flow

Prepare the Flow

import boto3
import json
import time

# Assuming you're using the same region as before
region = "us-east-1"
bedrock_agent = boto3.client(service_name="bedrock-agent", region_name=region)

# Read flow details
with open('flow_details.json', 'r') as f:
    flow_details = json.load(f)

flow_id = flow_details['flowId']

try:
    response = bedrock_agent.prepare_flow(
        flowIdentifier=flow_id
    )
    print("Flow preparation started:")
    print(json.dumps(response, indent=2, default=str))
except Exception as e:
    print(f"An error occurred while preparing the flow: {str(e)}")

# Poll the flow status until it's prepared
max_attempts = 30
attempts = 0
while attempts < max_attempts:
    try:
        response = bedrock_agent.get_flow(
            flowIdentifier=flow_id
        )
        status = response['status']
        print(f"Current flow status: {status}")
        if status == 'Prepared':
            print("Flow is prepared and ready to use!")
            break
        elif status == 'Failed':
            print("Flow preparation failed. Check the AWS console for more details.")
            break
    except Exception as e:
        print(f"An error occurred while checking flow status: {str(e)}")
        break
    
    time.sleep(10)  # Wait for 10 seconds before checking again
    attempts += 1

if attempts == max_attempts:
    print("Flow preparation timed out. Please check the AWS console for more information.")

Create Flow Version

import boto3
import json

region = "us-east-1"
bedrock_agent = boto3.client(service_name="bedrock-agent", region_name=region)

# Read flow details
with open('flow_details.json', 'r') as f:
    flow_details = json.load(f)

flow_id = flow_details['flowId']

try:
    response = bedrock_agent.create_flow_version(
        flowIdentifier=flow_id
    )
    print("Flow version created:")
    print(json.dumps(response, indent=2, default=str))
    
    # Update flow details with the new version
    flow_details['flowVersion'] = response['version']
    with open('flow_details.json', 'w') as f:
        json.dump(flow_details, f, indent=2)
    print("Flow details updated with new version.")
except Exception as e:
    print(f"An error occurred while creating the flow version: {str(e)}")

Create Flow Alias

import boto3
import json

region = "us-east-1"
bedrock_agent = boto3.client(service_name="bedrock-agent", region_name=region)

# Read flow details
with open('flow_details.json', 'r') as f:
    flow_details = json.load(f)

flow_id = flow_details['flowId']
flow_name = flow_details['flowName']
flow_version = flow_details.get('flowVersion', '1')  # Default to '1' if not set

try:
    response = bedrock_agent.create_flow_alias(
        flowIdentifier=flow_id,
        name=flow_name,
        description=f"Alias for {flow_name}",
        routingConfiguration=[
            {
                "flowVersion": flow_version
            }
        ]
    )
    print("Flow alias created:")
    print(json.dumps(response, indent=2, default=str))
    
    # Update flow details with the alias information
    flow_details['flowAliasId'] = response['id']
    flow_details['flowAliasArn'] = response['arn']
    with open('flow_details.json', 'w') as f:
        json.dump(flow_details, f, indent=2)
    print("Flow details updated with alias information.")
except Exception as e:
    print(f"An error occurred while creating the flow alias: {str(e)}")

List of Prompts and Prompt Flows

  1. Prompts:
    • Evaluation Prompt: “prompt-evaluator” (Created in 04_create_evaluation_prompt.py)
  2. Prompt Flows:
    • Simple Prompt Evaluation Flow: “prompt-eval-flow” (Created in 08_create_flow.py)

Evaluation

Good Prompt

You are an expert data scientist with years of experience in machine learning and statistical analysis. A junior data analyst has approached you for help with a problem they're facing. They have a dataset of customer information and purchase history, and they want to predict future purchasing behavior.

Please provide a detailed, step-by-step guide on how to approach this problem. Include the following in your response:
1. Initial data exploration steps
2. Feature engineering suggestions
3. Potential machine learning models to consider
4. Model evaluation metrics
5. Tips for interpreting the results

Use clear, concise language and explain any technical terms you use. Your goal is to educate the junior analyst and give them a solid foundation to start their project.

Bad Prompt

Tell me about machine learning.

Prompts Evaluation

import boto3
import json
import time
from botocore.exceptions import ClientError

# Assuming you're using the same region as before
region = "us-east-1"
bedrock_agent_runtime = boto3.client(service_name='bedrock-agent-runtime', region_name=region)

# Read flow details
with open('flow_details.json', 'r') as f:
    flow_details = json.load(f)

flow_id = flow_details['flowId']
flow_alias_id = flow_details.get('flowAliasId')  # Use the alias if available

def invoke_flow(input_text, timeout=300):  # 5 minutes timeout
    try:
        response = bedrock_agent_runtime.invoke_flow(
            flowIdentifier=flow_id,
            flowAliasIdentifier=flow_alias_id,
            inputs=[
                {
                    "content": {
                        "document": input_text
                    },
                    "nodeName": "Start",
                    "nodeOutputName": "document"
                }
            ]
        )
        
        event_stream = response["responseStream"]
        result = ""
        start_time = time.time()
        for event in event_stream:
            if time.time() - start_time > timeout:
                print(f"Flow invocation timed out after {timeout} seconds.")
                return None
            if "flowOutputEvent" in event:
                result += event["flowOutputEvent"]["content"]["document"]
        
        return json.loads(result)
    except ClientError as e:
        error_code = e.response['Error']['Code']
        error_message = e.response['Error']['Message']
        print(f"AWS Error: {error_code} - {error_message}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {str(e)}")
        return None

def read_prompt(file_path):
    with open(file_path, 'r') as file:
        return file.read().strip()

# Test the flow with good and bad prompts
good_prompt = read_prompt('prompts/evaluation_good_prompt.tmpl')
bad_prompt = read_prompt('prompts/evaluation_bad_prompt.tmpl')

for prompt_type, prompt in [("Good", good_prompt), ("Bad", bad_prompt)]:
    print(f"\nTesting {prompt_type} Prompt:")
    print("-" * 40)
    print(f"Prompt: {prompt[:100]}...")  # Print first 100 characters of the prompt
    print("Invoking flow... This may take a few minutes.")
    result = invoke_flow(prompt)

    if result:
        print("\nFlow invocation result:")
        print(f"Prompt Score: {result.get('prompt-score', 'N/A')}")
        print(f"Answer Score: {result.get('answer-score', 'N/A')}")
        print(f"\nJustification: {result.get('justification', 'N/A')[:200]}...")  # Print first 200 characters
        print(f"\nPrompt Recommendations: {result.get('prompt-recommendations', 'N/A')[:200]}...")  # Print first 200 characters
    else:
        print("Failed to get a result from the flow.")

    print("\n" + "=" * 50 + "\n")

Resources

About

Evaluating prompts at scale with Prompt Management and Prompt Flows for Amazon Bedrock

Topics

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published