pip3 install boto3 botocore matplotlib -qUflowchart LR
A[Prompts] --> B((LLM invocation))
B --> C[Outputs]
A --> D{LLM-as-a-judge evaluation}
C --> D
D --> E[Evaluation results]
flowchart TD
A[Flow input] -->|document| B[Invoke]
B -->|modelCompletion| C[Evaluate]
A -->|document| C
C -->|modelCompletion| D[Flow output]
flowchart TD
A[Flow input] -->|document| B[S3 Retrieval]
B -->|s3Content| C[Iterator]
C -->|arrayItem| D[Invoker]
D -->|modelCompletion| E[Evaluator]
C -->|arrayItem| E
E -->|modelCompletion| F[Collector]
C -->|arraySize| F
F -->|collectedArray| G[S3 Storage]
A -->|document| G
G -->|s3Uri| H[Flow output]
import boto3
import json
import os
def create_default_config():
default_config = {
"region": "us-east-1",
"modelInvokeId": "amazon.titan-text-premier-v1:0",
"modelEvalId": "anthropic.claude-3-sonnet-20240229-v1:0"
}
with open('bedrock_config.json', 'w') as config_file:
json.dump(default_config, config_file, indent=2)
print("Created default bedrock_config.json")
return default_config
# Check if bedrock_config.json exists, if not create it
if not os.path.exists('bedrock_config.json'):
config = create_default_config()
else:
with open('bedrock_config.json', 'r') as config_file:
config = json.load(config_file)
region = config['region']
modelEvalId = config['modelEvalId']
bedrock_agent = boto3.client(service_name="bedrock-agent", region_name=region)
def get_existing_prompt(name):
try:
response = bedrock_agent.list_prompts()
for prompt in response.get('promptSummaries', []):
if prompt['name'] == name:
return prompt['id'], prompt['arn']
except Exception as e:
print(f"Error checking for existing prompt: {str(e)}")
return None, None
def create_prompt(name):
with open('03_ai_prompt_answer_evaluator.tmpl', 'r') as file:
template = file.read()
try:
response = bedrock_agent.create_prompt(
name=name,
description="Prompt template for evaluating prompt responses with LLM-as-a-judge",
variants=[
{
"inferenceConfiguration": {
"text": {
"maxTokens": 2000,
"temperature": 0,
}
},
"modelId": modelEvalId,
"name": "variantOne",
"templateConfiguration": {
"text": {
"inputVariables": [
{"name": "input"},
{"name": "output"}
],
"text": template
}
},
"templateType": "TEXT"
}
],
defaultVariant="variantOne"
)
return response["id"], response["arn"]
except Exception as e:
print(f"Error creating prompt: {str(e)}")
return None, None
def get_or_create_prompt():
prompt_name = "prompt-evaluator"
prompt_id, prompt_arn = get_existing_prompt(prompt_name)
if prompt_id:
print(f"A prompt with the name '{prompt_name}' already exists.")
use_existing = input("Do you want to use the existing prompt? (yes/no): ").lower()
if use_existing == 'yes':
return prompt_id, prompt_arn
else:
new_name = input("Enter a new name for the prompt: ")
prompt_name = new_name if new_name else f"{prompt_name}-new"
return create_prompt(prompt_name)
else:
print(f"Creating new prompt '{prompt_name}'...")
return create_prompt(prompt_name)
prompt_id, prompt_arn = get_or_create_prompt()
if prompt_id and prompt_arn:
print(f"Prompt ID: {prompt_id}")
print(f"Prompt ARN: {prompt_arn}")
# Save prompt details for later use
with open('prompt_details.json', 'w') as f:
json.dump({
"promptEvalId": prompt_id,
"promptEvalArn": prompt_arn
}, f)
print("Prompt details saved to 'prompt_details.json'")
else:
print("Failed to get or create prompt.")You're an evaluator for the prompts and answers provided by a generative AI model.
Consider the input prompt in the <input> tags, the output answer in the <output> tags, the prompt evaluation criteria in the <prompt_criteria> tags, and the answer evaluation criteria in the <answer_criteria> tags.
<input>
{{input}}
</input>
<output>
{{output}}
</output>
<prompt_criteria>
- The prompt should be clear, direct, and detailed.
- The question, task, or goal should be well explained and be grammatically correct.
- The prompt is better if containing examples.
- The prompt is better if specifies a role or sets a context.
- The prompt is better if provides details about the format and tone of the expected answer.
</prompt_criteria>
<answer_criteria>
- The answers should be correct, well structured, and technically complete.
- The answers should not have any hallucinations, made up content, or toxic content.
- The answer should be grammatically correct.
- The answer should be fully aligned with the question or instruction in the prompt.
</answer_criteria>
Evaluate the answer the generative AI model provided in the <output> with a score from 0 to 100 according to the <answer_criteria> provided; any hallucinations, even if small, should dramatically impact the evaluation score.
Also evaluate the prompt passed to that generative AI model provided in the <input> with a score from 0 to 100 according to the <prompt_criteria> provided.
Respond only with a JSON having:
- An 'answer-score' key with the score number you evaluated the answer with.
- A 'prompt-score' key with the score number you evaluated the prompt with.
- A 'justification' key with a justification for the two evaluations you provided to the answer and the prompt; make sure to explicitely include any errors or hallucinations in this part.
- An 'input' key with the content of the <input> tags.
- An 'output' key with the content of the <output> tags.
- A 'prompt-recommendations' key with recommendations for improving the prompt based on the evaluations performed.
Skip any preamble or any other text apart from the JSON in your answer.
import boto3
import json
import os
# Load configuration
with open('bedrock_config.json', 'r') as config_file:
config = json.load(config_file)
region = config['region']
modelEvalId = config['modelEvalId']
bedrock_agent = boto3.client(service_name="bedrock-agent", region_name=region)
def get_existing_prompt(name):
try:
response = bedrock_agent.list_prompts()
for prompt in response.get('promptSummaries', []):
if prompt['name'] == name:
return prompt['id'], prompt['arn']
except Exception as e:
print(f"Error checking for existing prompt: {str(e)}")
return None, None
def create_prompt(name):
with open('03_ai_prompt_answer_evaluator.tmpl', 'r') as file:
template = file.read()
try:
response = bedrock_agent.create_prompt(
name=name,
description="Prompt template for evaluating prompt responses with LLM-as-a-judge",
variants=[
{
"inferenceConfiguration": {
"text": {
"maxTokens": 2000,
"temperature": 0,
}
},
"modelId": modelEvalId,
"name": "variantOne",
"templateConfiguration": {
"text": {
"inputVariables": [
{"name": "input"},
{"name": "output"}
],
"text": template
}
},
"templateType": "TEXT"
}
],
defaultVariant="variantOne"
)
return response["id"], response["arn"]
except Exception as e:
print(f"Error creating prompt: {str(e)}")
return None, None
def get_or_create_prompt():
prompt_name = "prompt-evaluator"
prompt_id, prompt_arn = get_existing_prompt(prompt_name)
if prompt_id:
print(f"A prompt with the name '{prompt_name}' already exists.")
use_existing = input("Do you want to use the existing prompt? (yes/no): ").lower()
if use_existing == 'yes':
return prompt_id, prompt_arn
else:
new_name = input("Enter a new name for the prompt: ")
prompt_name = new_name if new_name else f"{prompt_name}-new"
return create_prompt(prompt_name)
else:
print(f"Creating new prompt '{prompt_name}'...")
return create_prompt(prompt_name)
prompt_id, prompt_arn = get_or_create_prompt()
if prompt_id and prompt_arn:
print(f"Prompt ID: {prompt_id}")
print(f"Prompt ARN: {prompt_arn}")
# Save prompt details for later use
with open('prompt_details.json', 'w') as f:
json.dump({
"promptEvalId": prompt_id,
"promptEvalArn": prompt_arn
}, f)
print("Prompt details saved to 'prompt_details.json'")
else:
print("Failed to get or create prompt."){
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "bedrock.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}import boto3
import json
# Create IAM client
iam = boto3.client('iam')
# Define the role name
role_name = 'AmazonBedrockExecutionRoleForAgentFlowEval'
# Define the trust relationship policy
trust_relationship = {
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "bedrock.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
# Define the role policy
role_policy = {
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"bedrock:InvokeModel"
],
"Resource": "*"
}
]
}
def get_or_create_role():
try:
# Try to get the role
existing_role = iam.get_role(RoleName=role_name)
print(f"The role '{role_name}' already exists.")
use_existing = input("Do you want to use the existing role? (yes/no): ").lower()
if use_existing == 'yes':
return existing_role['Role']['Arn']
else:
# Delete the existing role and recreate
iam.delete_role(RoleName=role_name)
print(f"Deleted existing role '{role_name}'.")
except iam.exceptions.NoSuchEntityException:
pass # Role doesn't exist, we'll create it
try:
# Create the IAM role
create_role_response = iam.create_role(
RoleName=role_name,
AssumeRolePolicyDocument=json.dumps(trust_relationship),
Description='Execution role for Amazon Bedrock Agent Flow'
)
print("Role created successfully.")
# Attach the inline policy to the role
iam.put_role_policy(
RoleName=role_name,
PolicyName='BedrockInvokeModelPolicy',
PolicyDocument=json.dumps(role_policy)
)
print("Policy attached successfully")
return create_role_response['Role']['Arn']
except Exception as e:
print(f"An error occurred: {str(e)}")
return None
# Get or create the role
role_arn = get_or_create_role()
if role_arn:
print(f"Role ARN: {role_arn}")
# Save role details for later use
with open('role_details.json', 'w') as f:
json.dump({
"roleName": role_name,
"roleArn": role_arn
}, f)
print("Role details saved to 'role_details.json'")
else:
print("Failed to get or create the role."){
"nodes": [
{
"name": "Start",
"type": "Input",
"configuration": {
"input": {}
},
"outputs": [
{
"name": "document",
"type": "String"
}
]
},
{
"name": "End",
"type": "Output",
"configuration": {
"output": {}
},
"inputs": [
{
"expression": "$.data",
"name": "document",
"type": "String"
}
]
},
{
"name": "Invoke",
"type": "Prompt",
"configuration": {
"prompt": {
"sourceConfiguration": {
"inline": {
"inferenceConfiguration": {
"text": {
"maxTokens": 2000,
"temperature": 0
}
},
"modelId": "$MODEL_INVOKE_ID",
"templateConfiguration": {
"text": {
"inputVariables": [
{
"name": "input"
}
],
"text": "{{input}}"
}
},
"templateType": "TEXT"
}
}
}
},
"inputs": [
{
"expression": "$.data",
"name": "input",
"type": "String"
}
],
"outputs": [
{
"name": "modelCompletion",
"type": "String"
}
]
},
{
"name": "Evaluate",
"type": "Prompt",
"configuration": {
"prompt": {
"sourceConfiguration": {
"resource": {
"promptArn": "$PROMPT_EVAL_ARN"
}
}
}
},
"inputs": [
{
"expression": "$.data",
"name": "input",
"type": "String"
},
{
"expression": "$.data",
"name": "output",
"type": "String"
}
],
"outputs": [
{
"name": "modelCompletion",
"type": "String"
}
]
}
],
"connections": [
{
"name": "StartToInvoke",
"source": "Start",
"target": "Invoke",
"type": "Data",
"configuration": {
"data": {
"sourceOutput": "document",
"targetInput": "input"
}
}
},
{
"name": "InvokeToEvaluate",
"source": "Invoke",
"target": "Evaluate",
"type": "Data",
"configuration": {
"data": {
"sourceOutput": "modelCompletion",
"targetInput": "output"
}
}
},
{
"name": "StartToEvaluate",
"source": "Start",
"target": "Evaluate",
"type": "Data",
"configuration": {
"data": {
"sourceOutput": "document",
"targetInput": "input"
}
}
},
{
"name": "EvaluateToEnd",
"source": "Evaluate",
"target": "End",
"type": "Data",
"configuration": {
"data": {
"sourceOutput": "modelCompletion",
"targetInput": "document"
}
}
}
]
}import boto3
import json
import os
from botocore.exceptions import ClientError
# Load configuration
with open('bedrock_config.json', 'r') as config_file:
config = json.load(config_file)
region = config['region']
modelInvokeId = config['modelInvokeId']
bedrock_agent = boto3.client(service_name="bedrock-agent", region_name=region)
# Load role and prompt details
with open('role_details.json', 'r') as f:
role_details = json.load(f)
role_arn = role_details['roleArn']
with open('prompt_details.json', 'r') as f:
prompt_details = json.load(f)
promptEvalArn = prompt_details['promptEvalArn']
def get_existing_flow(name):
try:
response = bedrock_agent.list_flows()
for flow in response.get('flowSummaries', []):
if flow['name'] == name:
return flow['id'], flow['arn']
except Exception as e:
print(f"Error checking for existing flow: {str(e)}")
return None, None
def create_or_update_flow(name, description, role_arn, prompt_arn):
flow_id, flow_arn = get_existing_flow(name)
flow_definition = {
"nodes": [
{
"name": "Start",
"type": "Input",
"configuration": {"input": {}},
"outputs": [{"name": "document", "type": "String"}]
},
{
"name": "End",
"type": "Output",
"configuration": {"output": {}},
"inputs": [{"expression": "$.data", "name": "document", "type": "String"}]
},
{
"name": "Invoke",
"type": "Prompt",
"configuration": {
"prompt": {
"sourceConfiguration": {
"inline": {
"inferenceConfiguration": {"text": {"maxTokens": 2000, "temperature": 0}},
"modelId": modelInvokeId,
"templateConfiguration": {
"text": {
"inputVariables": [{"name": "input"}],
"text": "{{input}}"
}
},
"templateType": "TEXT"
}
}
}
},
"inputs": [{"expression": "$.data", "name": "input", "type": "String"}],
"outputs": [{"name": "modelCompletion", "type": "String"}]
},
{
"name": "Evaluate",
"type": "Prompt",
"configuration": {
"prompt": {
"sourceConfiguration": {
"resource": {"promptArn": prompt_arn}
}
}
},
"inputs": [
{"expression": "$.data", "name": "input", "type": "String"},
{"expression": "$.data", "name": "output", "type": "String"}
],
"outputs": [{"name": "modelCompletion", "type": "String"}]
}
],
"connections": [
{
"name": "StartToInvoke",
"source": "Start",
"target": "Invoke",
"type": "Data",
"configuration": {"data": {"sourceOutput": "document", "targetInput": "input"}}
},
{
"name": "InvokeToEvaluate",
"source": "Invoke",
"target": "Evaluate",
"type": "Data",
"configuration": {"data": {"sourceOutput": "modelCompletion", "targetInput": "output"}}
},
{
"name": "StartToEvaluate",
"source": "Start",
"target": "Evaluate",
"type": "Data",
"configuration": {"data": {"sourceOutput": "document", "targetInput": "input"}}
},
{
"name": "EvaluateToEnd",
"source": "Evaluate",
"target": "End",
"type": "Data",
"configuration": {"data": {"sourceOutput": "modelCompletion", "targetInput": "document"}}
}
]
}
try:
if flow_id:
print(f"Updating existing flow '{name}'...")
response = bedrock_agent.update_flow(
flowIdentifier=flow_id,
description=description,
executionRoleArn=role_arn,
definition=flow_definition
)
else:
print(f"Creating new flow '{name}'...")
response = bedrock_agent.create_flow(
name=name,
description=description,
executionRoleArn=role_arn,
definition=flow_definition
)
return response['id'], response['arn']
except ClientError as e:
print(f"Error creating/updating flow: {e}")
return None, None
# Create or update the flow
flow_name = "prompt-eval-flow"
flow_description = "Prompt Flow for evaluating prompts with LLM-as-a-judge."
flow_id, flow_arn = create_or_update_flow(flow_name, flow_description, role_arn, promptEvalArn)
if flow_id and flow_arn:
print(f"Flow ID: {flow_id}")
print(f"Flow ARN: {flow_arn}")
# Save flow details for later use
with open('flow_details.json', 'w') as f:
json.dump({
"flowId": flow_id,
"flowArn": flow_arn,
"flowName": flow_name
}, f)
print("Flow details saved to 'flow_details.json'")
else:
print("Failed to create or update flow.")import boto3
import json
import time
# Assuming you're using the same region as before
region = "us-east-1"
bedrock_agent = boto3.client(service_name="bedrock-agent", region_name=region)
# Read flow details
with open('flow_details.json', 'r') as f:
flow_details = json.load(f)
flow_id = flow_details['flowId']
try:
response = bedrock_agent.prepare_flow(
flowIdentifier=flow_id
)
print("Flow preparation started:")
print(json.dumps(response, indent=2, default=str))
except Exception as e:
print(f"An error occurred while preparing the flow: {str(e)}")
# Poll the flow status until it's prepared
max_attempts = 30
attempts = 0
while attempts < max_attempts:
try:
response = bedrock_agent.get_flow(
flowIdentifier=flow_id
)
status = response['status']
print(f"Current flow status: {status}")
if status == 'Prepared':
print("Flow is prepared and ready to use!")
break
elif status == 'Failed':
print("Flow preparation failed. Check the AWS console for more details.")
break
except Exception as e:
print(f"An error occurred while checking flow status: {str(e)}")
break
time.sleep(10) # Wait for 10 seconds before checking again
attempts += 1
if attempts == max_attempts:
print("Flow preparation timed out. Please check the AWS console for more information.")import boto3
import json
region = "us-east-1"
bedrock_agent = boto3.client(service_name="bedrock-agent", region_name=region)
# Read flow details
with open('flow_details.json', 'r') as f:
flow_details = json.load(f)
flow_id = flow_details['flowId']
try:
response = bedrock_agent.create_flow_version(
flowIdentifier=flow_id
)
print("Flow version created:")
print(json.dumps(response, indent=2, default=str))
# Update flow details with the new version
flow_details['flowVersion'] = response['version']
with open('flow_details.json', 'w') as f:
json.dump(flow_details, f, indent=2)
print("Flow details updated with new version.")
except Exception as e:
print(f"An error occurred while creating the flow version: {str(e)}")import boto3
import json
region = "us-east-1"
bedrock_agent = boto3.client(service_name="bedrock-agent", region_name=region)
# Read flow details
with open('flow_details.json', 'r') as f:
flow_details = json.load(f)
flow_id = flow_details['flowId']
flow_name = flow_details['flowName']
flow_version = flow_details.get('flowVersion', '1') # Default to '1' if not set
try:
response = bedrock_agent.create_flow_alias(
flowIdentifier=flow_id,
name=flow_name,
description=f"Alias for {flow_name}",
routingConfiguration=[
{
"flowVersion": flow_version
}
]
)
print("Flow alias created:")
print(json.dumps(response, indent=2, default=str))
# Update flow details with the alias information
flow_details['flowAliasId'] = response['id']
flow_details['flowAliasArn'] = response['arn']
with open('flow_details.json', 'w') as f:
json.dump(flow_details, f, indent=2)
print("Flow details updated with alias information.")
except Exception as e:
print(f"An error occurred while creating the flow alias: {str(e)}")- Prompts:
- Evaluation Prompt: “prompt-evaluator” (Created in 04_create_evaluation_prompt.py)
- Prompt Flows:
- Simple Prompt Evaluation Flow: “prompt-eval-flow” (Created in 08_create_flow.py)
You are an expert data scientist with years of experience in machine learning and statistical analysis. A junior data analyst has approached you for help with a problem they're facing. They have a dataset of customer information and purchase history, and they want to predict future purchasing behavior.
Please provide a detailed, step-by-step guide on how to approach this problem. Include the following in your response:
1. Initial data exploration steps
2. Feature engineering suggestions
3. Potential machine learning models to consider
4. Model evaluation metrics
5. Tips for interpreting the results
Use clear, concise language and explain any technical terms you use. Your goal is to educate the junior analyst and give them a solid foundation to start their project.
Tell me about machine learning.
import boto3
import json
import time
from botocore.exceptions import ClientError
# Assuming you're using the same region as before
region = "us-east-1"
bedrock_agent_runtime = boto3.client(service_name='bedrock-agent-runtime', region_name=region)
# Read flow details
with open('flow_details.json', 'r') as f:
flow_details = json.load(f)
flow_id = flow_details['flowId']
flow_alias_id = flow_details.get('flowAliasId') # Use the alias if available
def invoke_flow(input_text, timeout=300): # 5 minutes timeout
try:
response = bedrock_agent_runtime.invoke_flow(
flowIdentifier=flow_id,
flowAliasIdentifier=flow_alias_id,
inputs=[
{
"content": {
"document": input_text
},
"nodeName": "Start",
"nodeOutputName": "document"
}
]
)
event_stream = response["responseStream"]
result = ""
start_time = time.time()
for event in event_stream:
if time.time() - start_time > timeout:
print(f"Flow invocation timed out after {timeout} seconds.")
return None
if "flowOutputEvent" in event:
result += event["flowOutputEvent"]["content"]["document"]
return json.loads(result)
except ClientError as e:
error_code = e.response['Error']['Code']
error_message = e.response['Error']['Message']
print(f"AWS Error: {error_code} - {error_message}")
return None
except Exception as e:
print(f"An unexpected error occurred: {str(e)}")
return None
def read_prompt(file_path):
with open(file_path, 'r') as file:
return file.read().strip()
# Test the flow with good and bad prompts
good_prompt = read_prompt('prompts/evaluation_good_prompt.tmpl')
bad_prompt = read_prompt('prompts/evaluation_bad_prompt.tmpl')
for prompt_type, prompt in [("Good", good_prompt), ("Bad", bad_prompt)]:
print(f"\nTesting {prompt_type} Prompt:")
print("-" * 40)
print(f"Prompt: {prompt[:100]}...") # Print first 100 characters of the prompt
print("Invoking flow... This may take a few minutes.")
result = invoke_flow(prompt)
if result:
print("\nFlow invocation result:")
print(f"Prompt Score: {result.get('prompt-score', 'N/A')}")
print(f"Answer Score: {result.get('answer-score', 'N/A')}")
print(f"\nJustification: {result.get('justification', 'N/A')[:200]}...") # Print first 200 characters
print(f"\nPrompt Recommendations: {result.get('prompt-recommendations', 'N/A')[:200]}...") # Print first 200 characters
else:
print("Failed to get a result from the flow.")
print("\n" + "=" * 50 + "\n")