# Create Protein Design Agent with AWS HealthOmics Workflow Integration

This notebook demonstrates how to create a Bedrock agent that can trigger AWS HealthOmics workflows for protein design optimization.

# Pre-requisites

1. Go through the notebook environment setup in the agents_catalog/0-Notebook-environment/ folder

2. Deploy protein_design_stack.yaml to your AWS account to instantiate a ECR repository with a custom Docker image, a AWS HealthOmics (AHO) private workflow, and a lambda function that invokes the AHO workflow


### Steps for deploying the CloudFormation stack:
1. Create a S3 bucket for storing required files in the same region as your cf stack
2. Upload workflow definition files to S3
3. Package and upload container code to S3
4. Deploy the CloudFormation stack

In [33]:
import json
import os
import shutil
import boto3
import datetime


# Function to create S3 bucket in specified region
def create_s3_bucket(bucket_name, region):
    """
    Create an S3 bucket in the specified region if it doesn't exist
    
    Parameters:
    bucket_name (str): Name of the S3 bucket to create
    region (str): AWS region where the bucket should be created
    
    Returns:
    bool: True if bucket was created or already exists, False otherwise
    """
    s3_client = boto3.client('s3', region_name=region)
    
    try:
        # Check if bucket already exists
        response = s3_client.head_bucket(Bucket=bucket_name)
        print(f"Bucket {bucket_name} already exists")
        return True
    except Exception as e:
        if "404" in str(e):
            # Bucket doesn't exist, create it
            try:
                if region == 'us-east-1':
                    # Special case for us-east-1 which doesn't accept LocationConstraint
                    response = s3_client.create_bucket(
                        Bucket=bucket_name
                    )
                else:
                    response = s3_client.create_bucket(
                        Bucket=bucket_name,
                        CreateBucketConfiguration={
                            'LocationConstraint': region
                        }
                    )
                print(f"Successfully created bucket {bucket_name} in {region}")
                return True
            except Exception as create_error:
                print(f"Error creating bucket: {create_error}")
                return False
        else:
            print(f"Error checking bucket: {e}")
            return False

# Create zip file of container code
def create_container_zip():
    try:
        shutil.make_archive('code', 'zip', 'container')
        print("Successfully created code.zip from container directory")
    except Exception as e:
        print(f"Error creating zip file: {e}")

# Upload workflow files and container code to S3
def upload_to_s3(bucket_name):
    s3 = boto3.client('s3')
    
    # Upload workflow files
    workflow_files = ['main.nf', 'nextflow.config', 'config.yaml', 'parameter-template.json']
    for file in workflow_files:
        try:
            s3.upload_file(
                f'aho_workflow/{file}', 
                bucket_name, 
                f'workflow/{file}'
            )
            print(f"Uploaded {file} to s3://{bucket_name}/workflow/")
        except Exception as e:
            print(f"Error uploading {file}: {e}")
    
    # Upload container code zip
    try:
        s3.upload_file(
            'code.zip',
            bucket_name,
            'code.zip'
        )
        print(f"Uploaded code.zip to s3://{bucket_name}/")
    except Exception as e:
        print(f"Error uploading code.zip: {e}")

# Define the CloudFormation parameters
def write_cf_parameters(bucket_name):
    '''Write the param JSON file for creating the cf stack'''
    cf_parameters = [
        {
            "ParameterKey": "S3BucketName",
            "ParameterValue": bucket_name
        },
        {
            "ParameterKey": "StackPrefix",
            "ParameterValue": "protein-design"  # Default value from the template
        },
        {
            "ParameterKey": "ApplicationName",
            "ParameterValue": "HealthOmics-Workflow"  # Default value from the template
        },
        {
            "ParameterKey": "WorkflowPath",
            "ParameterValue": "workflow"  # Default value from the template
        },
        {
            "ParameterKey": "SecretName",
            "ParameterValue": "protein-design-secret"  # Default value from the template
        }
    ]

    # Write parameters to cf_parameter.json file
    with open('cf_parameters.json', 'w') as f:
        json.dump(cf_parameters, f, indent=2)

    print(f"CloudFormation parameters written to cf_parameters.json")
    print(f"File path: {os.path.abspath('cf_parameters.json')}")

# Main execution
REGION = "us-west-2"  # Change this to your desired region
s3_bucket_name = "hcls-bedrock-agents-byot-aho-20240416-west2"  # Base bucket name

# Create the S3 bucket if it doesn't exist
bucket_created = create_s3_bucket(s3_bucket_name, REGION)

if bucket_created:
    # Create and write CloudFormation parameters
    write_cf_parameters(s3_bucket_name)
    
    # Create zip and upload files
    create_container_zip()
    upload_to_s3(s3_bucket_name)
else:
    print("Failed to create or verify S3 bucket. CloudFormation parameters not written.")


Bucket hcls-bedrock-agents-byot-aho-20240416-west2 already exists
CloudFormation parameters written to cf_parameters.json
File path: /home/sagemaker-user/8-Protein-Design-Agent/cf_parameters.json
Successfully created code.zip from container directory
Uploaded main.nf to s3://hcls-bedrock-agents-byot-aho-20240416-west2/workflow/
Uploaded nextflow.config to s3://hcls-bedrock-agents-byot-aho-20240416-west2/workflow/
Uploaded config.yaml to s3://hcls-bedrock-agents-byot-aho-20240416-west2/workflow/
Uploaded parameter-template.json to s3://hcls-bedrock-agents-byot-aho-20240416-west2/workflow/
Uploaded code.zip to s3://hcls-bedrock-agents-byot-aho-20240416-west2/


### AWS CLI commands to deploy the CloudFormation stack:
````
# Deploy the CloudFormation stack
aws cloudformation create-stack \
    --stack-name hcls-bedrock-agents-byot-aho-stack-20240416-lg \
    --template-body file://protein_design_stack.yaml \
    --parameters file://cf_parameters.json \
    --capabilities CAPABILITY_IAM CAPABILITY_AUTO_EXPAND CAPABILITY_NAMED_IAM \
    --region us-west-2

# Monitor stack creation
aws cloudformation describe-stacks \
    --stack-name hcls-bedrock-agents-byot-aho-stack-20240416-lg \
    --query 'Stacks[0].StackStatus'

# Get stack outputs once complete
aws cloudformation describe-stacks \
    --stack-name hcls-bedrock-agents-byot-aho-stack-20240416-lg \
    --query 'Stacks[0].Outputs'
```

#### Load in environment variables to notebook

In [5]:
# Retrieve import path
%store -r IMPORTS_PATH

# Retrieve account info
%store -r account_id
%store -r region

# Retrieve model lists
%store -r agent_foundation_model

%run $IMPORTS_PATH



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
Successfully imported necessary libraries into notebook


## Configure AWS clients and parameters

In [34]:
import boto3
import json
import time
import uuid
from botocore.exceptions import ClientError

# Configure AWS clients
session = boto3.Session()
account_id = boto3.client('sts').get_caller_identity()['Account']

bedrock = boto3.client('bedrock', REGION)
cfn = boto3.client('cloudformation', REGION)

## Get CloudFormation Outputs

In [35]:
import boto3
from botocore.exceptions import ClientError
import json

STACK_NAME = 'hcls-bedrock-agents-byot-aho-stack-20240416-lg'

# Initialize the CloudFormation client with the specific region
cloudformation = boto3.client('cloudformation', region_name=REGION)

def get_cloudformation_outputs(stack_name):
    try:
        response = cloudformation.describe_stacks(StackName=stack_name)
        outputs = {}
        for output in response['Stacks'][0]['Outputs']:
            outputs[output['OutputKey']] = output['OutputValue']
        return outputs
    except ClientError as e:
        print(f"Error getting CloudFormation outputs: {e}")
        raise

# Get the outputs from CloudFormation
cf_outputs = get_cloudformation_outputs(STACK_NAME)
print("CloudFormation Outputs:")
print(json.dumps(cf_outputs, indent=2))

CloudFormation Outputs:
{
  "TriggerFunctionArn": "arn:aws:lambda:us-west-2:048051882663:function:hcls-bedrock-agents-byot-a-WorkflowTriggerFunction-fz7o5htyRJFA",
  "WorkflowExecutionRoleArn": "arn:aws:iam::048051882663:role/hcls-bedrock-agents-byot-aho--WorkflowExecutionRole-WaOLNnPR9fIG",
  "ECRRepositoryUri": "048051882663.dkr.ecr.us-west-2.amazonaws.com/protein-design-evoprotgrad"
}


In [36]:
lambda_function_arn = cf_outputs["TriggerFunctionArn"]
lambda_function_name = "hcls-bedrock-agents-byot-a-WorkflowTriggerFunction"

## Create Bedrock Agent

In [37]:
# Define agent configuration
agent_name = 'ProteinDesignAgent'
agent_description = "Agent for protein design using HealthOmics workflow"
agent_instruction = """You are an expert in protein design and optimization using AWS HealthOmics workflows. 
Your primary task is to help users run protein design optimization workflows and provide relevant insights.

When providing your response:
a. Start with a brief summary of your understanding of the user's query.
b. Explain the steps you're taking to address the query. Ask for clarifications from the user if required.
c. Present the results of the workflow execution."""


In [38]:
agent_foundation_model

['anthropic.claude-3-5-sonnet-20241022-v2:0']

## Create Agent Instance

In [39]:
# Instantiate agent with the desired configuration
agents = AgentsForAmazonBedrock()

protein_design_agent = agents.create_agent(
    agent_name,
    agent_description,
    agent_instruction,
    agent_foundation_model,
    code_interpretation=False,
    verbose=False
)

# Extract useful agent information
protein_design_agent_id = protein_design_agent[0]
protein_design_agent_arn = f"arn:aws:bedrock:{REGION}:{account_id}:agent/{protein_design_agent_id}"

print(f"Agent created with ID: {protein_design_agent_id}")
print(f"Agent ARN: {protein_design_agent_arn}")

Agent created with ID: F3XFMZHY7H
Agent ARN: arn:aws:bedrock:us-west-2:048051882663:agent/F3XFMZHY7H


## Define Action Group Functions

In [40]:
function_defs = [
    {
        "name": "trigger_aho_workflow",
        "description": "Trigger the AWS HealthOmics workflow for protein design optimization",
        "parameters": {
            "workflowId": {
                "description": "The ID of the HealthOmics workflow to run",
                "required": True,
                "type": "string"
            },
            "runName": {
                "description": "Name for the workflow run",
                "required": True,
                "type": "string"
            },
            "container_image": {
                "description": "ECR image URI for the protein design container",
                "required": True,
                "type": "string"
            },
            "seed_sequence": {
                "description": "The input protein sequence to optimize",
                "required": True,
                "type": "string"
            },
            "outputUri": {
                "description": "S3 URI where the workflow outputs will be stored",
                "required": True,
                "type": "string"
            },
            "roleArn": {
                "description": "ARN of the IAM role for workflow execution",
                "required": True,
                "type": "string"
            }
        },
        "requireConfirmation": "DISABLED"
    }
]


## Add Action Group with Lambda Function

In [41]:
# Add action group with Lambda function
agents.add_action_group_with_lambda(
    agent_name=agent_name,
    lambda_function_name=lambda_function_name,
    source_code_file=lambda_function_arn,
    agent_action_group_name="ProteinDesignActions",
    agent_action_group_description="Actions for protein design using AWS HealthOmics workflows",
    agent_functions=function_defs,
    verbose=True
)

Creating action group: ProteinDesignActions...
Lambda ARN: arn:aws:lambda:us-west-2:048051882663:function:hcls-bedrock-agents-byot-a-WorkflowTriggerFunction-fz7o5htyRJFA
Agent functions: [{'name': 'trigger_aho_workflow', 'description': 'Trigger the AWS HealthOmics workflow for protein design optimization', 'parameters': {'workflowId': {'description': 'The ID of the HealthOmics workflow to run', 'required': True, 'type': 'string'}, 'runName': {'description': 'Name for the workflow run', 'required': True, 'type': 'string'}, 'container_image': {'description': 'ECR image URI for the protein design container', 'required': True, 'type': 'string'}, 'seed_sequence': {'description': 'The input protein sequence to optimize', 'required': True, 'type': 'string'}, 'outputUri': {'description': 'S3 URI where the workflow outputs will be stored', 'required': True, 'type': 'string'}, 'roleArn': {'description': 'ARN of the IAM role for workflow execution', 'required': True, 'type': 'string'}}, 'requireC

## Add Lambda Resource-Based Policy

In [42]:
lambda_client = boto3.client('lambda', REGION)

try:
    # Add the new statement to the existing policy
    response = lambda_client.add_permission(
        FunctionName=lambda_function_arn,
        StatementId="AllowBedrockAgentAccess",
        Action="lambda:InvokeFunction",
        Principal="bedrock.amazonaws.com",
        SourceArn=protein_design_agent_arn
    )
    
    print("Resource policy added successfully.")
    print("Response:", response)
except lambda_client.exceptions.ResourceConflictException:
    print("Permission already exists")
except Exception as e:
    print(f"Error adding permission: {e}")

Resource policy added successfully.
Response: {'ResponseMetadata': {'RequestId': 'c8919d3d-aa9c-4186-a9c8-9cf91a425275', 'HTTPStatusCode': 201, 'HTTPHeaders': {'date': 'Wed, 16 Apr 2025 23:30:13 GMT', 'content-type': 'application/json', 'content-length': '401', 'connection': 'keep-alive', 'x-amzn-requestid': 'c8919d3d-aa9c-4186-a9c8-9cf91a425275'}, 'RetryAttempts': 0}, 'Statement': '{"Sid":"AllowBedrockAgentAccess","Effect":"Allow","Principal":{"Service":"bedrock.amazonaws.com"},"Action":"lambda:InvokeFunction","Resource":"arn:aws:lambda:us-west-2:048051882663:function:hcls-bedrock-agents-byot-a-WorkflowTriggerFunction-fz7o5htyRJFA","Condition":{"ArnLike":{"AWS:SourceArn":"arn:aws:bedrock:us-west-2:048051882663:agent/F3XFMZHY7H"}}}'}


## Create Agent Alias

In [43]:
# Create agent alias
protein_design_agent_alias_id, protein_design_agent_alias_arn = agents.create_agent_alias(
    protein_design_agent[0], 'v1'
)

# Store the alias ARN for future use
%store protein_design_agent_alias_arn

print(f"Agent alias created with ID: {protein_design_agent_alias_id}")
print(f"Agent alias ARN: {protein_design_agent_alias_arn}")

Stored 'protein_design_agent_alias_arn' (str)
Agent alias created with ID: YU2CSDMOKK
Agent alias ARN: arn:aws:bedrock:us-west-2:048051882663:agent-alias/F3XFMZHY7H/YU2CSDMOKK


## Test the Agent

In [46]:
bedrock_agent_runtime_client = boto3.client("bedrock-agent-runtime", REGION)
session_id = str(uuid.uuid1())

test_query = "Please tell me what you can help me do to optimize a protein sequence"

response = bedrock_agent_runtime_client.invoke_agent(
    inputText=test_query,
    agentId=protein_design_agent_id,
    agentAliasId=protein_design_agent_alias_id,
    sessionId=session_id,
    enableTrace=True
)

print("Request sent to Agent:\n{}".format(response))
print("====================")
print("Agent processing query now")
print("====================")

# Initialize an empty string to store the answer
answer = ""

# Iterate through the event stream
for event in response['completion']:
    # Check if the event is a 'chunk' event
    if 'chunk' in event:
        chunk_obj = event['chunk']
        if 'bytes' in chunk_obj:
            # Decode the bytes and append to the answer
            chunk_data = chunk_obj['bytes'].decode('utf-8')
            answer += chunk_data

print("Agent Answer: {}".format(answer))
print("====================")

Request sent to Agent:
{'ResponseMetadata': {'RequestId': '78d7c99a-5797-4f45-8e2e-e49e034fcb69', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Wed, 16 Apr 2025 23:49:31 GMT', 'content-type': 'application/vnd.amazon.eventstream', 'transfer-encoding': 'chunked', 'connection': 'keep-alive', 'x-amzn-requestid': '78d7c99a-5797-4f45-8e2e-e49e034fcb69', 'x-amz-bedrock-agent-session-id': '712547c0-1b1d-11f0-aee4-924e7e14b784', 'x-amzn-bedrock-agent-content-type': 'application/json'}, 'RetryAttempts': 0}, 'contentType': 'application/json', 'sessionId': '712547c0-1b1d-11f0-aee4-924e7e14b784', 'completion': <botocore.eventstream.EventStream object at 0x7f0d221c8a10>}
Agent processing query now
Agent Answer: I can help you optimize protein sequences using advanced computational methods through AWS HealthOmics workflows. To assist you, I'll need:

1. A seed protein sequence that you want to optimize
2. A name for your optimization run
3. The necessary AWS infrastructure details (like S3 output l

ipdb>  parsed_response


{'Error': {'Code': 'resourceNotFoundException', 'Message': "Failed to retrieve resource because it doesn't exist. Retry the request with a different resource identifier."}}


ipdb>  u


> [0;32m/opt/conda/lib/python3.11/site-packages/botocore/eventstream.py[0m(592)[0;36m__iter__[0;34m()[0m
[0;32m    590 [0;31m    [0;32mdef[0m [0m__iter__[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    591 [0;31m        [0;32mfor[0m [0mevent[0m [0;32min[0m [0mself[0m[0;34m.[0m[0m_event_generator[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 592 [0;31m            [0mparsed_event[0m [0;34m=[0m [0mself[0m[0;34m.[0m[0m_parse_event[0m[0;34m([0m[0mevent[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    593 [0;31m            [0;32mif[0m [0mparsed_event[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    594 [0;31m                [0;32myield[0m [0mparsed_event[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  event


<botocore.eventstream.EventStreamMessage object at 0x7feaee99d050>


ipdb>  d


> [0;32m/opt/conda/lib/python3.11/site-packages/botocore/eventstream.py[0m(608)[0;36m_parse_event[0;34m()[0m
[0;32m    606 [0;31m            [0;32mreturn[0m [0mparsed_response[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    607 [0;31m        [0;32melse[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 608 [0;31m            [0;32mraise[0m [0mEventStreamError[0m[0;34m([0m[0mparsed_response[0m[0;34m,[0m [0mself[0m[0;34m.[0m[0m_operation_name[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    609 [0;31m[0;34m[0m[0m
[0m[0;32m    610 [0;31m    [0;32mdef[0m [0mget_initial_response[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  parsed_response


{'Error': {'Code': 'resourceNotFoundException', 'Message': "Failed to retrieve resource because it doesn't exist. Retry the request with a different resource identifier."}}
