# Multi-Agent Collaboration - DSL query use case

In [1]:
import logging
import boto3
import os
import json
import time
import zipfile
import subprocess
from textwrap import dedent

### Configure Logging

In [2]:
# -----------------------------------------------------------------------------
# Configure Logging
# -----------------------------------------------------------------------------
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)

### Import Bedrock Agent utilities

In [3]:
from src.utils.bedrock_agent import Agent, SupervisorAgent, agents_helper, region, account_id

2025-02-09 16:11:33,261 - botocore.credentials - INFO - Found credentials in shared credentials file: ~/.aws/credentials


boto3 version: 1.36.9


### Define AWS clients

In [4]:
sts_client = boto3.client('sts')
session = boto3.session.Session()

account_id = sts_client.get_caller_identity()["Account"]
region = session.region_name
account_id_suffix = account_id[:3]
agent_suffix = f"{region}-{account_id_suffix}"

s3_client = boto3.client('s3', region_name=region)
bedrock_client = boto3.client('bedrock-runtime', region_name=region)
iam_client = boto3.client('iam', region_name=region)
lambda_client = boto3.client('lambda', region_name=region)

logger.info(f"Region: {region}")
logger.info(f"Account ID: {account_id}")
logger.info(f"Agent Suffix: {agent_suffix}")


2025-02-09 16:11:34,698 - __main__ - INFO - Region: us-west-2
2025-02-09 16:11:34,699 - __main__ - INFO - Account ID: 533267284022
2025-02-09 16:11:34,699 - __main__ - INFO - Agent Suffix: us-west-2-533


### Helper Functions

In [5]:
def create_iam_role(role_name: str) -> str:
    """
    Creates or retrieves an IAM Role with the necessary trust policy for Lambda.
    Attaches AWSLambdaBasicExecutionRole, and adds inline policies for OpenSearch 
    and AOSS access.

    :param role_name: Name of the IAM Role to create or retrieve.
    :return: ARN of the created or retrieved IAM Role.
    """
    logger.info(f"Creating or retrieving IAM Role: {role_name}")
    assume_role_policy_document = {
        "Version": "2012-10-17",
        "Statement": [
            {
                "Effect": "Allow",
                "Principal": {
                    "Service": "lambda.amazonaws.com"
                },
                "Action": "sts:AssumeRole"
            }
        ]
    }

    try:
        role = iam_client.create_role(
            RoleName=role_name,
            AssumeRolePolicyDocument=json.dumps(assume_role_policy_document)
        )
        logger.info(f"IAM Role {role_name} created.")
    except iam_client.exceptions.EntityAlreadyExistsException:
        logger.info(f"IAM Role {role_name} already exists. Retrieving existing role.")
        role = iam_client.get_role(RoleName=role_name)

    # Attach AWS Lambda execution policy
    iam_client.attach_role_policy(
        RoleName=role_name,
        PolicyArn="arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
    )
    logger.info(f"Attached AWSLambdaBasicExecutionRole to {role_name}.")

    # Attach additional policies for OpenSearch access
    opensearch_policy_document = {
        "Version": "2012-10-17",
        "Statement": [
            {
                "Effect": "Allow",
                "Action": [
                    "es:Describe*",
                    "es:List*",
                    "es:Get*"
                ],
                "Resource": "*"
            }
        ]
    }
    opensearch_policy_name = f"{role_name}-OpenSearchPolicy"
    try:
        iam_client.put_role_policy(
            RoleName=role_name,
            PolicyName=opensearch_policy_name,
            PolicyDocument=json.dumps(opensearch_policy_document)
        )
        logger.info(f"Attached OpenSearch policy to IAM Role {role_name}.")
    except Exception as e:
        logger.error(f"Failed to attach OpenSearch policy to IAM Role {role_name}: {str(e)}")

    # Attach the new policy for aoss:APICall
    aoss_policy_document = {
        "Version": "2012-10-17",
        "Statement": [
            {
                "Effect": "Allow",
                "Action": [
                    "aoss:*"
                ],
                "Resource": "*"
            }
        ]
    }
    aoss_policy_name = f"{role_name}-AOSSPolicy"
    try:
        iam_client.put_role_policy(
            RoleName=role_name,
            PolicyName=aoss_policy_name,
            PolicyDocument=json.dumps(aoss_policy_document)
        )
        logger.info(f"Attached AOSS policy to IAM Role {role_name}.")
    except Exception as e:
        logger.error(f"Failed to attach AOSS policy to IAM Role {role_name}: {str(e)}")

    role_arn = role['Role']['Arn']

    # Wait for IAM role to propagate
    logger.info("Waiting 10 seconds for IAM role to propagate...")
    time.sleep(10)

    return role_arn


def create_lambda_package(source_file: str, zip_file_path: str, dependencies: list = None):
    """
    Packages a Lambda function and its dependencies into a single ZIP file.

    Args:
        source_file (str): Path to the Lambda function source code
        zip_file_path (str): Path to the ZIP file that will be created
        dependencies (list, optional): List of Python packages required by the Lambda
    """
    logger.info(f"Packaging Lambda function from {source_file}")
    package_dir = "package"

    # Create package directory if it doesn't exist
    if not os.path.exists(package_dir):
        os.makedirs(package_dir)

    # Install dependencies only if there are any
    if dependencies:
        logger.info(f"Installing dependencies: {dependencies}")
        subprocess.run(
            f"pip install {' '.join(dependencies)} -t {package_dir}",
            shell=True,
            check=True
        )
    
    # Create ZIP file with function (and dependencies if any)
    logger.info(f"Creating Lambda deployment package: {zip_file_path}")
    with zipfile.ZipFile(zip_file_path, 'w') as zipf:
        # Add dependencies if they exist
        if os.path.exists(package_dir) and os.listdir(package_dir):
            for root, _, files in os.walk(package_dir):
                for file in files:
                    file_path = os.path.join(root, file)
                    arcname = os.path.relpath(file_path, package_dir)
                    zipf.write(file_path, arcname)

        # Add the Lambda function code
        if not os.path.exists(source_file):
            raise FileNotFoundError(f"Lambda source file not found: {source_file}")
        zipf.write(source_file, os.path.basename(source_file))

    # Cleanup temporary package directory
    if os.path.exists(package_dir):
        logger.info("Cleaning up temporary package directory...")
        subprocess.run(f"rm -rf {package_dir}", shell=True)
    
    logger.info("Lambda package created successfully.")


def create_lambda_function(function_name: str,
                         role_arn: str,
                         handler: str,
                         runtime: str,
                         zip_file_path: str,
                         region_name: str = region,
                         layers: list = None,
                         memory_size: int = 128,
                         timeout: int = 15) -> dict:
    """
    Creates or updates an AWS Lambda function with retry logic for updates.
    """
    logger.info(f"Creating/updating Lambda function: {function_name}")
    lambda_client = boto3.client('lambda', region_name=region_name)

    with open(zip_file_path, 'rb') as f:
        zip_content = f.read()

    # Prepare create function parameters
    create_params = {
        'FunctionName': function_name,
        'Runtime': runtime,
        'Role': role_arn,
        'Handler': handler,
        'Code': {'ZipFile': zip_content},
        'Description': 'Lambda function to execute DSL queries',
        'Timeout': timeout,
        'MemorySize': memory_size,
        'Publish': True
    }

    # Add layers if specified
    if layers:
        create_params['Layers'] = layers

    try:
        # Try to create new function
        response = lambda_client.create_function(**create_params)
        logger.info(f"Lambda function {function_name} created successfully.")
    except lambda_client.exceptions.ResourceConflictException:
        # Function exists, update it
        logger.info(f"Lambda function {function_name} already exists. Updating...")
        
        def wait_for_function_update_completion():
            max_attempts = 10
            delay_seconds = 5
            
            for attempt in range(max_attempts):
                try:
                    # Get current function state
                    function_info = lambda_client.get_function(FunctionName=function_name)
                    state = function_info['Configuration']['State']
                    last_update = function_info['Configuration'].get('LastUpdateStatus', 'Successful')
                    
                    if state == 'Active' and last_update == 'Successful':
                        return True
                    
                    logger.info(f"Function state: {state}, LastUpdateStatus: {last_update}. Waiting...")
                    time.sleep(delay_seconds)
                except Exception as e:
                    logger.warning(f"Error checking function status: {str(e)}")
                    time.sleep(delay_seconds)
            
            return False

        # Wait for any ongoing updates to complete
        if not wait_for_function_update_completion():
            raise Exception("Timeout waiting for function update to complete")

        # Update function code
        response = lambda_client.update_function_code(
            FunctionName=function_name,
            ZipFile=zip_content
        )
        
        # Wait for code update to complete
        if not wait_for_function_update_completion():
            raise Exception("Timeout waiting for function code update to complete")
        
        # Update configuration if layers are specified
        if layers:
            config_update = {
                'FunctionName': function_name,
                'Timeout': timeout,
                'MemorySize': memory_size,
                'Layers': layers
            }
            lambda_client.update_function_configuration(**config_update)
            
            # Wait for configuration update to complete
            if not wait_for_function_update_completion():
                raise Exception("Timeout waiting for function configuration update to complete")
            
        logger.info(f"Lambda function {function_name} updated successfully.")

    return response


def add_resource_based_policy(function_name: str,
                              agent_ids: list,
                              region_name: str,
                              account_id: str):
    """
    Adds a resource-based policy to the specified Lambda function to allow invocation
    from one or more Bedrock agents.

    :param function_name: Name of the Lambda function.
    :param agent_ids: List of agent IDs permitted to invoke this Lambda.
    :param region_name: AWS region.
    :param account_id: AWS account ID.
    """
    logger.info(f"Adding resource-based policy to Lambda function {function_name} for agents: {agent_ids}")
    statement_id_prefix = "AllowExecutionFromBedrockAgent"
    policy_doc = {
        "Version": "2012-10-17",
        "Statement": []
    }

    for agent_id in agent_ids:
        sid = f"{statement_id_prefix}_{agent_id}"
        policy_doc['Statement'].append({
            "Sid": sid,
            "Effect": "Allow",
            "Principal": {
                "Service": "bedrock.amazonaws.com"
            },
            "Action": "lambda:InvokeFunction",
            "Resource": f"arn:aws:lambda:{region_name}:{account_id}:function:{function_name}",
            "Condition": {
                "ArnLike": {
                    "AWS:SourceArn": f"arn:aws:bedrock:{region_name}:{account_id}:agent/{agent_id}"
                }
            }
        })

    # Retrieve existing policy and remove any existing statements with the same prefix
    try:
        existing_policy = lambda_client.get_policy(FunctionName=function_name)
        existing_policy_doc = json.loads(existing_policy['Policy'])
        for stmt in existing_policy_doc['Statement']:
            if stmt['Sid'].startswith(statement_id_prefix):
                sid_to_remove = stmt['Sid']
                logger.info(f"Removing existing statement: {sid_to_remove}")
                lambda_client.remove_permission(
                    FunctionName=function_name,
                    StatementId=sid_to_remove
                )
    except lambda_client.exceptions.ResourceNotFoundException:
        logger.info(f"No existing policy found for Lambda function {function_name}.")
    except Exception as e:
        logger.error(f"Error retrieving/removing existing policy for {function_name}: {str(e)}")

    # Add new permissions
    for stmt in policy_doc['Statement']:
        sid_val = stmt['Sid']
        try:
            lambda_client.add_permission(
                FunctionName=function_name,
                StatementId=sid_val,
                Action=stmt['Action'],
                Principal=stmt['Principal']['Service'],
                SourceArn=stmt['Condition']['ArnLike']['AWS:SourceArn']
            )
            logger.info(f"Added permission for statement: {sid_val}")
        except Exception as e:
            logger.error(f"Failed to add resource-based policy for {function_name}, statement {sid_val}: {str(e)}")


def create_lambda_layer(layer_name: str, requirements_file: str):
    """
    Create a Lambda Layer containing the specified dependencies with improved error handling.
    
    Args:
        layer_name (str): Name of the Lambda Layer
        requirements_file (str): Path to requirements.txt file
    """
    # Create a temporary directory for layer contents
    layer_dir = "layer"
    if not os.path.exists(layer_dir):
        os.makedirs(f"{layer_dir}/python")
    
    try:
        # Install dependencies to the layer directory
        subprocess.run(
            f"pip install -r {requirements_file} -t {layer_dir}/python",
            shell=True,
            check=True
        )
        
        # Create ZIP file
        layer_zip = f"{layer_name}.zip"
        with zipfile.ZipFile(layer_zip, 'w', zipfile.ZIP_DEFLATED) as zipf:
            for root, _, files in os.walk(layer_dir):
                for file in files:
                    file_path = os.path.join(root, file)
                    arcname = os.path.relpath(file_path, layer_dir)
                    zipf.write(file_path, arcname)
        
        # Create Lambda Layer with retries
        max_retries = 3
        retry_delay = 5  # seconds
        
        for attempt in range(max_retries):
            try:
                with open(layer_zip, 'rb') as zip_file:
                    response = lambda_client.publish_layer_version(
                        LayerName=layer_name,
                        Description='Dependencies for reasoning query function',
                        Content={'ZipFile': zip_file.read()},
                        CompatibleRuntimes=['python3.12']
                    )
                logger.info(f"Successfully created Lambda layer {layer_name} on attempt {attempt + 1}")
                break
            except lambda_client.exceptions.ClientError as e:
                if attempt == max_retries - 1:
                    logger.error(f"Failed to create Lambda layer after {max_retries} attempts: {str(e)}")
                    raise
                logger.warning(f"Attempt {attempt + 1} failed, retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
            except Exception as e:
                logger.error(f"Unexpected error creating Lambda layer: {str(e)}")
                raise
        
    finally:
        # Cleanup
        if os.path.exists(layer_dir):
            subprocess.run(f"rm -rf {layer_dir}", shell=True)
        if os.path.exists(layer_zip):
            os.remove(layer_zip)
    
    return response['LayerVersionArn']


def create_lambda_layer_via_s3(layer_name: str, requirements: list, bucket_name: str):
    """
    Create a Lambda Layer using S3 for large dependencies.
    
    Args:
        layer_name (str): Name of the Lambda Layer
        requirements (list): List of requirements to install
        bucket_name (str): S3 bucket to use for layer upload
    """
    layer_dir = "layer"
    if not os.path.exists(layer_dir):
        os.makedirs(f"{layer_dir}/python")
    
    try:
        # Split requirements into smaller groups
        max_group_size = 3
        requirement_groups = [
            requirements[i:i + max_group_size] 
            for i in range(0, len(requirements), max_group_size)
        ]
        
        layer_arns = []
        for idx, group in enumerate(requirement_groups):
            group_layer_name = f"{layer_name}-part-{idx+1}"
            layer_zip = f"{group_layer_name}.zip"
            
            # Install dependencies
            for req in group:
                subprocess.run(
                    f"pip install {req} -t {layer_dir}/python",
                    shell=True,
                    check=True
                )
            
            # Create ZIP file
            with zipfile.ZipFile(layer_zip, 'w', zipfile.ZIP_DEFLATED) as zipf:
                for root, _, files in os.walk(layer_dir):
                    for file in files:
                        file_path = os.path.join(root, file)
                        arcname = os.path.relpath(file_path, layer_dir)
                        zipf.write(file_path, arcname)
            
            # Upload to S3
            s3_key = f"lambda-layers/{layer_zip}"
            s3_client.upload_file(layer_zip, bucket_name, s3_key)
            
            # Create layer from S3
            response = lambda_client.publish_layer_version(
                LayerName=group_layer_name,
                Description=f'Dependencies group {idx+1} for reasoning query function',
                Content={
                    'S3Bucket': bucket_name,
                    'S3Key': s3_key
                },
                CompatibleRuntimes=['python3.12']
            )
            
            layer_arns.append(response['LayerVersionArn'])
            
            # Cleanup group files
            subprocess.run(f"rm -rf {layer_dir}/python/*", shell=True)
            os.remove(layer_zip)
            
            logger.info(f"Created layer part {idx+1}: {group_layer_name}")
            
    finally:
        # Cleanup
        if os.path.exists(layer_dir):
            subprocess.run(f"rm -rf {layer_dir}", shell=True)
    
    return layer_arns



## Main Execution

In [6]:
# Load Shipping Schema 
with open('schemas/ecom_shipping_schema.json', 'r') as file:
    ecom_shipping_schema = json.load(file)
ecom_shipping_schema_string = json.dumps(ecom_shipping_schema, indent=2)

# Agent foundation model 
agent_foundation_model = [
    # "anthropic.claude-3-5-sonnet-20241022-v2:0"
    "us.amazon.nova-lite-v1:0"
]

# Force re-create default setting for Agent objects, but for now set to False
Agent.set_force_recreate_default(True)

In [7]:
"""
Main execution flow:
    1. Create an IAM Role for Lambda.
    2. Create/Update two Lambda functions (execute-dsl-query, execute-modified-dsl-query).
    3. Create DSL Query Agent & Query Fixer Agent referencing those Lambda functions.
    4. Retrieve the newly created agent IDs.
    5. Add resource-based policies to each Lambda function for those agent IDs.
    6. Create the Supervisor Agent to orchestrate both DSL Query and Query Fixer agents.
    7. Invoke the Supervisor Agent with a sample query.
    8. Delete the agents (cleanup).
"""
# -------------------------------------------------------------------------
# 1. Create (or retrieve) IAM Role for Lambda
# -------------------------------------------------------------------------
IAM_ROLE_NAME = f"LambdaExecutionRole-{agent_suffix}"
role_arn = create_iam_role(IAM_ROLE_NAME)


# -------------------------------------------------------------------------
# 2. Create the first Lambda (execute-dsl-query)
# -------------------------------------------------------------------------
DSL_QUERY_LAMBDA_NAME = f"execute-dsl-query-{agent_suffix}"
DSL_QUERY_LAMBDA_PATH = "src/lambda/execute_dsl_query.py"
DSL_QUERY_ZIP_PATH = "dsl_query_function.zip"

if not os.path.exists(DSL_QUERY_LAMBDA_PATH):
    logger.error(f"Error: {DSL_QUERY_LAMBDA_PATH} does not exist.")

DEPENDENCIES = ["opensearch-py", "requests", "urllib3"]

# Package & create the Lambda
create_lambda_package(DSL_QUERY_LAMBDA_PATH, DSL_QUERY_ZIP_PATH, DEPENDENCIES)
create_lambda_function(
    function_name=DSL_QUERY_LAMBDA_NAME,
    role_arn=role_arn,
    handler="execute_dsl_query.lambda_handler",
    runtime="python3.12",
    zip_file_path=DSL_QUERY_ZIP_PATH
)
os.remove(DSL_QUERY_ZIP_PATH)


2025-02-09 16:11:34,749 - __main__ - INFO - Creating or retrieving IAM Role: LambdaExecutionRole-us-west-2-533
2025-02-09 16:11:35,098 - __main__ - INFO - IAM Role LambdaExecutionRole-us-west-2-533 already exists. Retrieving existing role.
2025-02-09 16:11:35,341 - __main__ - INFO - Attached AWSLambdaBasicExecutionRole to LambdaExecutionRole-us-west-2-533.
2025-02-09 16:11:35,469 - __main__ - INFO - Attached OpenSearch policy to IAM Role LambdaExecutionRole-us-west-2-533.
2025-02-09 16:11:35,587 - __main__ - INFO - Attached AOSS policy to IAM Role LambdaExecutionRole-us-west-2-533.
2025-02-09 16:11:35,589 - __main__ - INFO - Waiting 10 seconds for IAM role to propagate...
2025-02-09 16:11:45,592 - __main__ - INFO - Packaging Lambda function from src/lambda/execute_dsl_query.py
2025-02-09 16:11:45,593 - __main__ - INFO - Installing dependencies: ['opensearch-py', 'requests', 'urllib3']


Collecting opensearch-py
  Using cached opensearch_py-2.8.0-py3-none-any.whl.metadata (6.9 kB)
Collecting requests
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting urllib3
  Using cached urllib3-2.3.0-py3-none-any.whl.metadata (6.5 kB)
Collecting python-dateutil (from opensearch-py)
  Using cached python_dateutil-2.9.0.post0-py2.py3-none-any.whl.metadata (8.4 kB)
Collecting certifi>=2024.07.04 (from opensearch-py)
  Using cached certifi-2025.1.31-py3-none-any.whl.metadata (2.5 kB)
Collecting Events (from opensearch-py)
  Using cached Events-0.5-py3-none-any.whl.metadata (3.9 kB)
Collecting charset-normalizer<4,>=2 (from requests)
  Using cached charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl.metadata (35 kB)
Collecting idna<4,>=2.5 (from requests)
  Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)
Collecting six>=1.5 (from python-dateutil->opensearch-py)
  Using cached six-1.17.0-py2.py3-none-any.whl.metadata (1.7 kB)
Using cached op

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
aiobotocore 2.16.1 requires botocore<1.35.89,>=1.35.74, but you have botocore 1.36.9 which is incompatible.
datasets 2.21.0 requires dill<0.3.9,>=0.3.0, but you have dill 0.3.9 which is incompatible.
datasets 2.21.0 requires fsspec[http]<=2024.6.1,>=2023.1.0, but you have fsspec 2024.12.0 which is incompatible.
awscli 1.34.8 requires botocore==1.35.8, but you have botocore 1.36.9 which is incompatible.
awscli 1.34.8 requires s3transfer<0.11.0,>=0.10.0, but you have s3transfer 0.11.2 which is incompatible.[0m[31m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
2025-02-09 16:11:46,666 - __main__ - INFO - C

Successfully installed Events-0.5 certifi-2025.1.31 charset-normalizer-3.4.1 idna-3.10 opensearch-py-2.8.0 python-dateutil-2.9.0.post0 requests-2.32.3 six-1.17.0 urllib3-2.3.0


2025-02-09 16:11:52,037 - __main__ - INFO - Lambda function execute-dsl-query-us-west-2-533 already exists. Updating...
2025-02-09 16:11:59,478 - __main__ - INFO - Function state: Active, LastUpdateStatus: InProgress. Waiting...
2025-02-09 16:12:04,577 - __main__ - INFO - Lambda function execute-dsl-query-us-west-2-533 updated successfully.


In [8]:
# -------------------------------------------------------------------------
# 2(b). Create the second Lambda (execute-modified-dsl-query)
# -------------------------------------------------------------------------
MODIFIED_QUERY_LAMBDA_NAME = f"execute-modified-dsl-query-{agent_suffix}"
MODIFIED_QUERY_LAMBDA_PATH = "src/lambda/execute_modified_dsl_query.py"
MODIFIED_QUERY_ZIP_PATH = "modified_query_function.zip"

if not os.path.exists(MODIFIED_QUERY_LAMBDA_PATH):
    logger.error(f"Error: {MODIFIED_QUERY_LAMBDA_PATH} does not exist.")

create_lambda_package(MODIFIED_QUERY_LAMBDA_PATH, MODIFIED_QUERY_ZIP_PATH, DEPENDENCIES)
create_lambda_function(
    function_name=MODIFIED_QUERY_LAMBDA_NAME,
    role_arn=role_arn,
    handler="execute_modified_dsl_query.lambda_handler",
    runtime="python3.12",
    zip_file_path=MODIFIED_QUERY_ZIP_PATH
)
os.remove(MODIFIED_QUERY_ZIP_PATH)

2025-02-09 16:12:04,595 - __main__ - INFO - Packaging Lambda function from src/lambda/execute_modified_dsl_query.py
2025-02-09 16:12:04,596 - __main__ - INFO - Installing dependencies: ['opensearch-py', 'requests', 'urllib3']


Collecting opensearch-py
  Using cached opensearch_py-2.8.0-py3-none-any.whl.metadata (6.9 kB)
Collecting requests
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting urllib3
  Using cached urllib3-2.3.0-py3-none-any.whl.metadata (6.5 kB)
Collecting python-dateutil (from opensearch-py)
  Using cached python_dateutil-2.9.0.post0-py2.py3-none-any.whl.metadata (8.4 kB)
Collecting certifi>=2024.07.04 (from opensearch-py)
  Using cached certifi-2025.1.31-py3-none-any.whl.metadata (2.5 kB)
Collecting Events (from opensearch-py)
  Using cached Events-0.5-py3-none-any.whl.metadata (3.9 kB)
Collecting charset-normalizer<4,>=2 (from requests)
  Using cached charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl.metadata (35 kB)
Collecting idna<4,>=2.5 (from requests)
  Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)
Collecting six>=1.5 (from python-dateutil->opensearch-py)
  Using cached six-1.17.0-py2.py3-none-any.whl.metadata (1.7 kB)
Using cached op

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
aiobotocore 2.16.1 requires botocore<1.35.89,>=1.35.74, but you have botocore 1.36.9 which is incompatible.
datasets 2.21.0 requires dill<0.3.9,>=0.3.0, but you have dill 0.3.9 which is incompatible.
datasets 2.21.0 requires fsspec[http]<=2024.6.1,>=2023.1.0, but you have fsspec 2024.12.0 which is incompatible.
awscli 1.34.8 requires botocore==1.35.8, but you have botocore 1.36.9 which is incompatible.
awscli 1.34.8 requires s3transfer<0.11.0,>=0.10.0, but you have s3transfer 0.11.2 which is incompatible.[0m[31m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
2025-02-09 16:12:05,714 - __main__ - INFO - C

Successfully installed Events-0.5 certifi-2025.1.31 charset-normalizer-3.4.1 idna-3.10 opensearch-py-2.8.0 python-dateutil-2.9.0.post0 requests-2.32.3 six-1.17.0 urllib3-2.3.0


2025-02-09 16:12:12,253 - __main__ - INFO - Lambda function execute-modified-dsl-query-us-west-2-533 already exists. Updating...
2025-02-09 16:12:18,531 - __main__ - INFO - Function state: Active, LastUpdateStatus: InProgress. Waiting...
2025-02-09 16:12:23,642 - __main__ - INFO - Lambda function execute-modified-dsl-query-us-west-2-533 updated successfully.


In [9]:
# # Clean up existing resources
# try:
#     # Delete the Lambda function
#     lambda_client.delete_function(FunctionName=REASONING_QUERY_LAMBDA_NAME)
#     logger.info(f"Deleted Lambda function: {REASONING_QUERY_LAMBDA_NAME}")
# except lambda_client.exceptions.ResourceNotFoundException:
#     pass

# # Delete the layers
# for group in requirements_groups:
#     layer_name = f"reasoning-deps-{group['name']}-{agent_suffix}"
#     try:
#         # Get all layer versions
#         versions = lambda_client.list_layer_versions(LayerName=layer_name)
#         # Delete each version
#         for version in versions['LayerVersions']:
#             lambda_client.delete_layer_version(
#                 LayerName=layer_name,
#                 VersionNumber=version['Version']
#             )
#         logger.info(f"Deleted layer: {layer_name}")
#     except lambda_client.exceptions.ResourceNotFoundException:
#         pass
    
    
# REASONING_QUERY_LAMBDA_NAME = f"execute-reasoning-query-{agent_suffix}"
# REASONING_QUERY_LAMBDA_PATH = "src/lambda/execute_reasoning_query.py"
# REASONING_QUERY_ZIP_PATH = "reasoning_query_function.zip"

# # Create minimal Lambda package with just the handler code
# create_lambda_package(
#     source_file=REASONING_QUERY_LAMBDA_PATH,
#     zip_file_path=REASONING_QUERY_ZIP_PATH,
#     dependencies=[]  # No dependencies needed in package since using layers
# )


# # Define S3 bucket for layer deployment (ensure agent_suffix is defined)
# LAYER_BUCKET = f"lambda-layers-{agent_suffix.lower()}"

# # Create the S3 bucket if it doesn't already exist
# try:
#     s3_client.create_bucket(
#         Bucket=LAYER_BUCKET,
#         CreateBucketConfiguration={'LocationConstraint': region}
#     )
# except s3_client.exceptions.BucketAlreadyExists:
#     pass
# except s3_client.exceptions.BucketAlreadyOwnedByYou:
#     pass

# # Define optimized requirements with only necessary components (torch removed)
# requirements_groups = [
#     # Group 1: Core dependencies for tokenization and model prompt formatting
#     {
#         'name': 'core',
#         'packages': [
#             'transformers',   # Use transformers without the [torch] extra
#             'tokenizers',
#             'numpy',
#             'typing-extensions'
#         ]
#     },
#     # Group 2: Utility dependencies
#     {
#         'name': 'utils',
#         'packages': [
#             'regex',
#             'requests',
#             'tqdm'
#         ]
#     }
# ]

# def create_optimized_layer(group, bucket_name):
#     """Create an optimized Lambda layer for a group of requirements."""
#     layer_name = f"reasoning-deps-{group['name']}-{agent_suffix}"
#     layer_dir = f"layer_{group['name']}"
#     zip_path = f"{layer_name}.zip"
    
#     try:
#         # Create the directory structure for the layer
#         os.makedirs(f"{layer_dir}/python", exist_ok=True)
        
#         # Special handling for numpy to prevent source directory issues
#         if 'numpy' in group['packages']:
#             group['packages'].remove('numpy')
#             logger.info("Installing numpy separately with special handling")
#             subprocess.run(
#                 "pip install numpy --no-cache-dir --platform manylinux2014_x86_64 "
#                 f"--target {layer_dir}/python --only-binary=:all:",
#                 shell=True,
#                 check=True
#             )
        
#         # Install remaining packages into the layer directory
#         for package in group['packages']:
#             logger.info(f"Installing package {package} into {layer_dir}/python")
#             subprocess.run(
#                 f"pip install {package} --no-cache-dir --target {layer_dir}/python",
#                 shell=True,
#                 check=True
#             )
        
#         # Remove unnecessary files to reduce the layer size
#         cleanup_patterns = [
#             # "*.dist-info",
#             "*.egg-info",
#             "__pycache__",
#             "*.pyc",
#             "*.pyo",
#             "tests",
#             # "*.so"  # Remove shared objects as they might cause compatibility issues
#         ]
#         for pattern in cleanup_patterns:
#             logger.info(f"Cleaning up files matching {pattern} in {layer_dir}/python")
#             subprocess.run(
#                 f"find {layer_dir}/python -name '{pattern}' -exec rm -rf {{}} +",
#                 shell=True
#             )
        
#         # Create a ZIP file from the layer directory
#         logger.info(f"Creating zip file {zip_path} from {layer_dir}")
#         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
#             for root, _, files in os.walk(layer_dir):
#                 for file in files:
#                     file_path = os.path.join(root, file)
#                     arcname = os.path.relpath(file_path, layer_dir)
#                     zipf.write(file_path, arcname)
        
#         # Upload the ZIP file to S3
#         s3_key = f"lambda-layers/{zip_path}"
#         logger.info(f"Uploading {zip_path} to S3 bucket {bucket_name} with key {s3_key}")
#         s3_client.upload_file(zip_path, bucket_name, s3_key)
        
#         # Publish the Lambda layer version
#         logger.info(f"Publishing Lambda layer version for {layer_name}")
#         response = lambda_client.publish_layer_version(
#             LayerName=layer_name,
#             Description=f'Optimized {group["name"]} dependencies',
#             Content={
#                 'S3Bucket': bucket_name,
#                 'S3Key': s3_key
#             },
#             CompatibleRuntimes=['python3.12']
#         )
        
#         return response['LayerVersionArn']
        
#     finally:
#         # Cleanup local files
#         if os.path.exists(layer_dir):
#             subprocess.run(f"rm -rf {layer_dir}", shell=True)
#         if os.path.exists(zip_path):
#             os.remove(zip_path)

# # Create layers and then the Lambda function with the published layers
# try:
#     layer_arns = []
#     for group in requirements_groups:
#         logger.info(f"Creating layer for group: {group['name']}...")
#         layer_arn = create_optimized_layer(group, LAYER_BUCKET)
#         layer_arns.append(layer_arn)
#         logger.info(f"Created layer {group['name']}: {layer_arn}")
    
#     # Create the Lambda function using the optimized layers
#     create_lambda_function(
#         function_name=REASONING_QUERY_LAMBDA_NAME,
#         role_arn=role_arn,
#         handler="execute_reasoning_query.lambda_handler",
#         runtime="python3.12",
#         zip_file_path=REASONING_QUERY_ZIP_PATH,
#         layers=layer_arns,
#         timeout=900,
#         memory_size=2048
#     )
    
#     logger.info(f"Lambda function '{REASONING_QUERY_LAMBDA_NAME}' created successfully")
#     logger.info(f"Attached layers: {json.dumps(layer_arns, indent=2)}")

# except Exception as e:
#     logger.error(f"Failed to create Lambda layers: {str(e)}")
#     raise
# finally:
#     # Cleanup the Lambda function zip file if it exists
#     if os.path.exists(REASONING_QUERY_ZIP_PATH):
#         os.remove(REASONING_QUERY_ZIP_PATH)


In [10]:
reasoning_query_lambda_arn = "arn:aws:lambda:us-west-2:533267284022:function:HuggingFaceLambdaFunction"

REASONING_QUERY_LAMBDA_NAME ="HuggingFaceLambdaFunction"

In [11]:

# -------------------------------------------------------------------------
# 3. Create the DSL Query Agent & Query Fixer Agent
#
#    Important: reference the just-created Lambda ARNs in "tool_code"
#    The actual ARN is "arn:aws:lambda:<REGION>:<ACCOUNT>:function:<FUNCTION_NAME>"
# -------------------------------------------------------------------------
dsl_query_lambda_arn = f"arn:aws:lambda:{region}:{account_id}:function:{DSL_QUERY_LAMBDA_NAME}"
modified_query_lambda_arn = f"arn:aws:lambda:{region}:{account_id}:function:{MODIFIED_QUERY_LAMBDA_NAME}"
# reasoning_query_lambda_arn = f"arn:aws:lambda:{region}:{account_id}:function:{REASONING_QUERY_LAMBDA_NAME}"

logger.info("Creating DSL Query Agent...")
dsl_query_agent = Agent.direct_create(
    name=f"dsl-query-agent-{agent_suffix}",
    role="DSL Query Creator",
    goal="Create DSL queries for a given user query",
    instructions=f"""
    You are an expert in generating Query DSL for Elasticsearch-style queries. Your task is to convert a 
    given natural language user question into a well-structured Query DSL.
    
    ## Instructions:
    - Use the provided e-commerce shipping schema to construct the query.
    - Encapsulate the output in <json>...</json> tags.
    - Follow the syntax of the Query DSL strictly; do not introduce elements outside the provided schema.
    
    ## Query Construction Rules:
    - **Keyword fields** (carrier, status, country): Use `term` for exact matches or `prefix`/`wildcard` for partial matches.
    - **Text fields** (description, address): Use `match` queries to account for analyzed terms.
    - **Nested fields** (tracking): Always use `nested` queries.
    - **Date fields**: Use `range` queries with date math for filtering by date ranges.
    - **Aggregations**: When counting occurrences, use a 'terms' aggregation on the relevant keyword field to capture the exact values present (e.g., 'delivery.carrier').
    - Break down complex queries into smaller parts for accuracy.
    - Think step-by-step before constructing the query.


    ## Schema:
    {ecom_shipping_schema_string}

    ## Output Format:
    - Return only the generated Query DSL within <json>...</json> tags.
    - Do not include explanations, comments, or additional text.
    """,
    tool_code=dsl_query_lambda_arn,
    tool_defs=[
        {
            "name": "execute_dsl_query",
            "description": "Executes a given DSL query and returns the results",
            "parameters": {
                "dsl_query": {
                    "description": "The DSL query to execute",
                    "type": "string",
                    "required": True,
                }
            }
        }
    ]
)

logger.info("Creating Query Fixer Agent...")
query_fixer_agent = Agent.direct_create(
    name=f"query-fixer-agent-{agent_suffix}",
    role="Query Repair Specialist",
    goal="Fix and optimize failed DSL queries",
    instructions=f"""
    You are an expert query debugger and optimizer. Your tasks are:
    1. Analyze failed DSL queries from the query generator
    2. Diagnose errors using OpenSearch error messages
    3. Apply targeted fixes while maintaining original intent
    4. Optimize queries for better recall when results are empty
    5. Extract exact terms from 'terms' aggregations for accurate reporting.
    6. Identify alternative ways to answer queries when direct fields are missing.
    7. Recognize schema gaps and propose workarounds or schema modifications.

    ## Repair Strategies:
    - SYNTAX ERRORS: Fix formatting issues in nested queries/aggregations
    - FIELD ERRORS: Map invalid fields to valid schema equivalents
    - ZERO HITS: Apply query relaxation techniques:
        * Add wildcards to keyword searches
        * Expand date ranges
        * Reduce strictness of term matches
        * Add synonym expansion
    - Ensure queries include 'terms' aggregations to capture exact values

    ## Optimization Rules:
    - Maintain original query structure where possible
    - Prefer query-time fixes over rearchitecting
    - Document all modifications in revision notes
    - Limit query relaxation to 3 iterations
    - When results are found, check the 'terms' aggregation for the exact field values.
    - Report the exact terms from the data (e.g., use "DHL Express" if that's the stored value).

    ## Schema Gap Analysis & Alternative Solutions:
    - Creative Field Mapping: If direct fields are missing, use existing fields to infer answers.
      * Example: If "delivery duration" is not available, compute it using `delivered_time - out_for_delivery_time`.
    - Schema Enhancement: Identify missing fields required for full query support.
    - Derived Data Solutions:
      * If exact data isn’t available but can be computed, create scripted fields using painless scripting.
      * If no alternative exists, clearly state the required data and suggest schema modifications.



    ## Schema:
    {ecom_shipping_schema_string}

    ## Output Format:
    - Return modified query in <json> tags
    - Include revision notes and exact terms from aggregations in <notes> tags 
    """,
    tool_code=modified_query_lambda_arn,
    tool_defs=[
        {
            "name": "retry_query",
            "description": "Retries a modified version of the failed query",
            "parameters": {
                "modified_dsl_query": {
                    "description": "The corrected DSL query",
                    "type": "string",
                    "required": True
                },
                "revision_notes": {
                    "description": "Description of modifications made",
                    "type": "string",
                },
            }
        }
    ]
)



# -------------------------------------------------------------------------
kb_rag_agent = Agent.direct_create(
    name=f"kb-response-agent-{agent_suffix}",
    role="Knowledge Base Content Analyzer",
    goal="Analyze retrieved document content and generate well-structured responses",
    instructions="""
    You analyze pre-retrieved document content and generate clear, accurate responses.

    ## Response Rules:
    - Synthesize information from provided passages
    - Include relevant quotes with proper citations
    - Use consistent citation format [doc_id:para_num]
    - Maintain factual accuracy
    - Flag any inconsistencies between sources

    ## Output Format:
    Response should be structured as:
    1. Direct answer
    2. Supporting evidence
    3. Source citations
    4. Confidence level (High/Medium/Low)

    ## Quality Guidelines:
    - Prefer direct quotes for key information
    - Summarize when appropriate
    - Note any information gaps
    - Maintain neutral tone
    """,
    kb_descr="Use knowledge base to extract relevant information, analyze content across multiple documents, and generate accurate responses with proper citations. Focus on maintaining context and factual accuracy.",
    kb_id="5GADU65GNF",
    verbose=True
)



# -------------------------------------------------------------------------
reasoning_agent = Agent.direct_create(
    name=f"reasoning-agent-{agent_suffix}",
    role="Reasoning Specialist",
    goal="Analyze complex scenarios and provide well-reasoned answers",
    instructions="""
    You are a reasoning specialist tasked with analyzing complex scenarios and providing well-reasoned answers.

    ## Task Description:
    - Analyze the provided scenario and identify key elements
    - Break down the problem into smaller parts for easier analysis
    - Use logical reasoning to derive conclusions
    - Clearly explain your thought process and reasoning

    ## Output Format:
    - Explain your reasoning and thought process in a <reasoning>...</reasoning> block
    - Include any assumptions made in your analysis in a <assumptions>...</assumptions> block
    """,
    tool_code=reasoning_query_lambda_arn,
    tool_defs=[
        {
            "name": "execute_reasoning",
            "description": "Executes a reasoning model to analyze complex scenarios",
            "parameters": {
                "scenario": {
                    "description": "The scenario to analyze",
                    "type": "string",
                    "required": True
                }
            }
        }
    ]

)

2025-02-09 16:12:23,673 - __main__ - INFO - Creating DSL Query Agent...



Deleting existing agent and corresponding lambda for: dsl-query-agent-us-west-2-533...
Agent dsl-query-agent-us-west-2-533 not found
Creating agent dsl-query-agent-us-west-2-533...
Created agent, id: HAPHJFTMAB, alias id: TSTALIASID

Adding action group with Lambda: arn:aws:lambda:us-west-2:533267284022:function:execute-dsl-query-us-west-2-533...
Waiting for agent status to change. Current status CREATING
Agent id HAPHJFTMAB current status: NOT_PREPARED
Waiting for agent status to change. Current status VERSIONING
Agent id HAPHJFTMAB current status: PREPARED


2025-02-09 16:12:51,695 - __main__ - INFO - Creating Query Fixer Agent...


DONE: Agent: dsl-query-agent-us-west-2-533, id: HAPHJFTMAB, alias id: YB1XUC0ZRR


Deleting existing agent and corresponding lambda for: query-fixer-agent-us-west-2-533...
Agent query-fixer-agent-us-west-2-533 not found
Creating agent query-fixer-agent-us-west-2-533...
Created agent, id: Y4W1ICA55I, alias id: TSTALIASID

Adding action group with Lambda: arn:aws:lambda:us-west-2:533267284022:function:execute-modified-dsl-query-us-west-2-533...
Waiting for agent status to change. Current status CREATING
Agent id Y4W1ICA55I current status: NOT_PREPARED
Waiting for agent status to change. Current status VERSIONING
Agent id Y4W1ICA55I current status: PREPARED
DONE: Agent: query-fixer-agent-us-west-2-533, id: Y4W1ICA55I, alias id: XIQPAGMFD5


Deleting existing agent and corresponding lambda for: kb-response-agent-us-west-2-533...
Agent kb-response-agent-us-west-2-533 not found
Creating agent kb-response-agent-us-west-2-533...
Creating agent: kb-response-agent-us-west-2-533...
Created agent 

In [12]:
# -------------------------------------------------------------------------
# 4. Retrieve the newly created Agent IDs
# -------------------------------------------------------------------------
logger.info("Retrieving DSL Query Agent ID...")
dsl_query_agent_id = agents_helper.get_agent_id_by_name(dsl_query_agent.name)
logger.info(f"DSL Query Agent ID: {dsl_query_agent_id}")

logger.info("Retrieving Query Fixer Agent ID...")
query_fixer_agent_id = agents_helper.get_agent_id_by_name(query_fixer_agent.name)
logger.info(f"Query Fixer Agent ID: {query_fixer_agent_id}")

logger.info("Retrieving KB Response Agent ID...")
kb_rag_agent_id = agents_helper.get_agent_id_by_name(kb_rag_agent.name)
logger.info(f"KB Response Agent ID: {kb_rag_agent_id}")

logger.info("Retrieving Reasoning Agent ID...")
reasoning_agent_id = agents_helper.get_agent_id_by_name(reasoning_agent.name)
logger.info(f"Reasoning Agent ID: {reasoning_agent_id}")

2025-02-09 16:14:20,444 - __main__ - INFO - Retrieving DSL Query Agent ID...
2025-02-09 16:14:20,533 - __main__ - INFO - DSL Query Agent ID: HAPHJFTMAB
2025-02-09 16:14:20,533 - __main__ - INFO - Retrieving Query Fixer Agent ID...
2025-02-09 16:14:20,611 - __main__ - INFO - Query Fixer Agent ID: Y4W1ICA55I
2025-02-09 16:14:20,611 - __main__ - INFO - Retrieving KB Response Agent ID...
2025-02-09 16:14:20,687 - __main__ - INFO - KB Response Agent ID: NMYVAUXBYQ
2025-02-09 16:14:20,688 - __main__ - INFO - Retrieving Reasoning Agent ID...
2025-02-09 16:14:20,774 - __main__ - INFO - Reasoning Agent ID: QEZS20IZCK


In [13]:
# -------------------------------------------------------------------------
# 5. Add resource-based policy to each Lambda so the Agents can invoke them
# -------------------------------------------------------------------------
add_resource_based_policy(DSL_QUERY_LAMBDA_NAME, [dsl_query_agent_id], region, account_id)
add_resource_based_policy(MODIFIED_QUERY_LAMBDA_NAME, [query_fixer_agent_id], region, account_id)
add_resource_based_policy(REASONING_QUERY_LAMBDA_NAME, [reasoning_agent_id], region, account_id)

2025-02-09 16:14:20,778 - __main__ - INFO - Adding resource-based policy to Lambda function execute-dsl-query-us-west-2-533 for agents: ['HAPHJFTMAB']
2025-02-09 16:14:20,963 - __main__ - INFO - Removing existing statement: AllowExecutionFromBedrockAgent_RCUWCRBCR4
2025-02-09 16:14:21,129 - __main__ - INFO - Added permission for statement: AllowExecutionFromBedrockAgent_HAPHJFTMAB
2025-02-09 16:14:21,131 - __main__ - INFO - Adding resource-based policy to Lambda function execute-modified-dsl-query-us-west-2-533 for agents: ['Y4W1ICA55I']
2025-02-09 16:14:21,175 - __main__ - INFO - Removing existing statement: AllowExecutionFromBedrockAgent_ZJBRMHWDBY
2025-02-09 16:14:21,338 - __main__ - INFO - Added permission for statement: AllowExecutionFromBedrockAgent_Y4W1ICA55I
2025-02-09 16:14:21,339 - __main__ - INFO - Adding resource-based policy to Lambda function HuggingFaceLambdaFunction for agents: ['QEZS20IZCK']
2025-02-09 16:14:21,396 - __main__ - INFO - Removing existing statement: Allow

In [14]:
# supervisor_agent = SupervisorAgent.direct_create(
#     name=f"supervisor-agent-{agent_suffix}",
#     role="Query Pipeline Orchestrator",
#     collaboration_type="SUPERVISOR",
#     collaborator_objects=[dsl_query_agent, query_fixer_agent, kb_rag_agent, reasoning_agent],
#     collaborator_agents=[
#         {
#             "agent": dsl_query_agent.name,
#             "instructions": dedent(f"""
#                 {dsl_query_agent.name} is responsible for generating the primary DSL query based on 
#                 the provided e-commerce shipping schema. Your task is to produce a precise Query DSL 
#                 encapsulated in <json>...</json> tags. Ensure the query strictly follows the schema 
#                 and DSL syntax without any additional commentary or explanations.
#             """).strip(),
#             "relay_conversation_history": "DISABLED"
#         },
#         {
#             "agent": query_fixer_agent.name,
#             "instructions": dedent(f"""
#                 Engage {query_fixer_agent.name} when any of the following conditions occur:
#                 1. The DSL query returns syntax or validation errors.
#                 2. The DSL query execution returns zero hits.
#                 3. The query requires optimization for improved recall.
#                 4. Alternative query solutions are needed due to schema limitations.
                
#                 Responsibilities:
#                 - Analyze error messages and the current query structure.
#                 - Apply targeted fixes that preserve the original query intent.
#                 - Implement query relaxation techniques (for example, adding wildcards, extending date ranges, or expanding term matches).
#                 - Identify and map alternative fields if direct schema fields are missing.
#                 - Suggest schema enhancements when appropriate.
#                 - Document all modifications with clear revision notes and output exact terms from aggregations.
                
#                 Return the corrected DSL query within <json>...</json> tags and include any revision notes within <notes>...</notes> tags.
#             """),
#             "relay_conversation_history": "DISABLED"
#         },
#         {
#             "agent": kb_rag_agent.name,
#             "instructions": dedent(f"""
#                 Engage {kb_rag_agent.name} to answer user questions that require analyzing the document content retrieved from executed queries.
#                 When search results are available, your task is to:
                
#                 1. Synthesize and validate the information from the provided passages.
#                 2. Generate a final response that includes a direct answer and supporting evidence with relevant quotes and citations.
                
#                 Your output must be clear, well-structured, and factually accurate to support decision-making.
#             """),
#             "relay_conversation_history": "DISABLED"
#         },
#         {
#             "agent": reasoning_agent.name,
#             "instructions": dedent(f"""
#                 Engage {reasoning_agent.name} when the scenario is complex and the supervisor agent cannot determine the appropriate call.
#                 This includes situations with multiple ambiguous factors or when further logical analysis is required.
                
#                 Responsibilities:
#                 1. Analyze the provided scenario and identify its key elements.
#                 2. Break down the problem into manageable parts.
#                 3. Use logical reasoning to derive well-reasoned conclusions.
#                 4. Explain your thought process in a <reasoning>...</reasoning> block and list any assumptions in an <assumptions>...</assumptions> block.
#             """),
#             "relay_conversation_history": "DISABLED"
#         }
#     ],
#     instructions=dedent(f"""
#         High-Level Overview:
#         Route user queries to the appropriate agent based on the type of answer required:
#           - Structured Data Retrieval: If the query requires retrieving structured information from the e-commerce shipping data, route it to {dsl_query_agent.name}. 
#             If the DSL query returns errors or zero hits, immediately route the query to {query_fixer_agent.name} for reattempts.
#           - Document Content Analysis: If the query requires synthesizing and analyzing document content from executed queries, route it to {kb_rag_agent.name}.
#           - Complex Scenarios: When the supervisor agent determines that the scenario is too complex or ambiguous to decide the next step, route it to {reasoning_agent.name}.
        
#         Detailed Instructions:
        
#         Route A: Structured Data Retrieval (DSL Query Agent + Query Fixer Agent)
#         1. Initial Query Analysis:
#            - Receive the user's natural language query.
#            - Determine if the query requires structured data retrieval from the e-commerce shipping data.
#            - Validate the query against the provided schema:
#              {ecom_shipping_schema_string}
#            - If the query qualifies, route it to {dsl_query_agent.name}.
        
#         2. DSL Query Execution:
#            - {dsl_query_agent.name} generates a Query DSL encapsulated in <json>...</json> tags that follows the provided schema.
        
#         3. Error Handling & Retry:
#            - Monitor the query execution results:
#              a. If the DSL query returns syntax or validation errors, or if the result is zero hits, capture the error context.
#              b. Immediately route the query, along with diagnostic details, to {query_fixer_agent.name}.
#              c. {query_fixer_agent.name} applies targeted fixes and query relaxation techniques, then returns a modified DSL query.
#              d. Validate the modified query; allow up to 3 retry attempts if necessary.
        
#         4. Evaluation & Final Approval (for structured data queries):
#            - Confirm that the final DSL query adheres to best practices (e.g., proper nested queries, correct field types and mappings).
#            - Maintain an audit trail of all query versions and modifications.
#            - Generate an execution summary including:
#              - Query versions attempted.
#              - Reasons for modifications.
#              - Performance metrics.
        
#         General Aggregation Guidance:
#            - If an aggregation returns an unexpectedly inflated count, verify whether it is counting nested or repeated values.
#            - To accurately count unique items, use a cardinality aggregation on a unique identifier rather than aggregating on fields that may contain duplicate entries.
        
#         Route B: Document Content Analysis (KB Response Agent)
#         1. Initial Query Analysis:
#            - Receive the user's natural language query.
#            - Determine if the query requires synthesizing and analyzing document content from executed queries.
#            - If so, route the query to {kb_rag_agent.name}.
        
#         2. KB Response Generation:
#            - {kb_rag_agent.name} synthesizes and validates the information from the provided passages.
#            - Generate a final response that includes:
#              a. A direct answer.
#              b. Supporting evidence with relevant quotes and citations.
#              c. A confidence level.
        
#         3. Final Response:
#            - Deliver the final, well-structured answer to the user by addressing the user by name.
        
#         Route C: Complex Scenarios (Reasoning Agent)
#         1. When a scenario is identified as complex—i.e., the supervisor agent cannot determine the appropriate call due to multiple ambiguous factors—route the query to {reasoning_agent.name}.
#         2. {reasoning_agent.name} will analyze the scenario, break it down into manageable parts, and provide a well-reasoned explanation.
#         3. The output must include your thought process in a <reasoning>...</reasoning> block and any assumptions in an <assumptions>...</assumptions> block.
#     """)
# )


In [15]:
supervisor_agent = SupervisorAgent.direct_create(
    name=f"supervisor-agent-{agent_suffix}",
    role="Query Pipeline Orchestrator",
    collaboration_type="SUPERVISOR",
    collaborator_objects=[dsl_query_agent, query_fixer_agent, kb_rag_agent, reasoning_agent],
    collaborator_agents=[
        {
            "agent": dsl_query_agent.name,
            "instructions": dedent(f"""
                {dsl_query_agent.name} is responsible for generating the primary DSL query based on 
                the provided e-commerce shipping schema. Your task is to produce a precise Query DSL 
                encapsulated in <json>...</json> tags. Ensure the query strictly follows the schema 
                and DSL syntax without any additional commentary or explanations.
                
                For testing purposes, if you encounter any uncertainty or complexity, always seek advice from the reasoning agent.
            """).strip(),
            "relay_conversation_history": "DISABLED"
        },
        {
            "agent": query_fixer_agent.name,
            "instructions": dedent(f"""
                Engage {query_fixer_agent.name} when any of the following conditions occur:
                1. The DSL query returns syntax or validation errors.
                2. The DSL query execution returns zero hits.
                3. The query requires optimization for improved recall.
                4. Alternative query solutions are needed due to schema limitations.
                
                Responsibilities:
                - Analyze error messages and the current query structure.
                - Apply targeted fixes that preserve the original query intent.
                - Implement query relaxation techniques (for example, adding wildcards, extending date ranges, or expanding term matches).
                - Identify and map alternative fields if direct schema fields are missing.
                - Suggest schema enhancements when appropriate.
                - Document all modifications with clear revision notes and output exact terms from aggregations.
                
                For testing purposes, if any ambiguity arises during error handling, always consult the reasoning agent for advice.
                
                Return the corrected DSL query within <json>...</json> tags and include any revision notes within <notes>...</notes> tags.
            """),
            "relay_conversation_history": "DISABLED"
        },
        {
            "agent": kb_rag_agent.name,
            "instructions": dedent(f"""
                Engage {kb_rag_agent.name} to answer user questions that require analyzing the document content retrieved from executed queries.
                When search results are available, your task is to:
                
                1. Synthesize and validate the information from the provided passages.
                2. Generate a final response that includes a direct answer and supporting evidence with relevant quotes and citations.
                
                Your output must be clear, well-structured, and factually accurate to support decision-making.
                
                For testing purposes, if you face any complex interpretation issues, always seek advice from the reasoning agent.
            """),
            "relay_conversation_history": "DISABLED"
        },
        {
            "agent": reasoning_agent.name,
            "instructions": dedent(f"""
                Engage {reasoning_agent.name} when the scenario is complex or ambiguous and the supervisor agent cannot determine the appropriate call.
                For testing purposes, always consult the reasoning agent in uncertain situations.
                
                Responsibilities:
                1. Analyze the provided scenario and identify its key elements.
                2. Break down the problem into manageable parts.
                3. Use logical reasoning to derive well-reasoned conclusions.
                4. Explain your thought process in a <reasoning>...</reasoning> block and list any assumptions in an <assumptions>...</assumptions> block.
            """),
            "relay_conversation_history": "DISABLED"
        }
    ],
    instructions=dedent(f"""
        High-Level Overview:
        Route user queries to the appropriate agent based on the type of answer required:
          - Structured Data Retrieval: If the query requires retrieving structured information from the e-commerce shipping data, route it to {dsl_query_agent.name}. 
            If the DSL query returns errors or zero hits, immediately route the query to {query_fixer_agent.name} for reattempts.
          - Document Content Analysis: If the query requires synthesizing and analyzing document content from executed queries, route it to {kb_rag_agent.name}.
          - Complex or Ambiguous Scenarios: For any scenario that is complex, ambiguous, or when the supervisor agent is uncertain, always consult {reasoning_agent.name} for advice.
        
        Detailed Instructions:
        
        Route A: Structured Data Retrieval (DSL Query Agent + Query Fixer Agent)
        1. Initial Query Analysis:
           - Receive the user's natural language query.
           - Determine if the query requires structured data retrieval from the e-commerce shipping data.
           - Validate the query against the provided schema:
             {ecom_shipping_schema_string}
           - If the query qualifies, route it to {dsl_query_agent.name}.
           - For testing purposes, if there is any uncertainty in the query's intent or complexity, consult {reasoning_agent.name} for advice.
        
        2. DSL Query Execution:
           - {dsl_query_agent.name} generates a Query DSL encapsulated in <json>...</json> tags that follows the provided schema.
           - For testing purposes, if the query appears complex, seek advice from {reasoning_agent.name}.
        
        3. Error Handling & Retry:
           - Monitor the query execution results:
             a. If the DSL query returns syntax or validation errors, or if the result is zero hits, capture the error context.
             b. Immediately route the query, along with diagnostic details, to {query_fixer_agent.name}.
             c. {query_fixer_agent.name} applies targeted fixes and query relaxation techniques, then returns a modified DSL query.
             d. Validate the modified query; allow up to 3 retry attempts if necessary.
             e. At any point, if the error context is complex, consult {reasoning_agent.name} for further advice.
        
        4. Evaluation & Final Approval (for structured data queries):
           - Confirm that the final DSL query adheres to best practices (e.g., proper nested queries, correct field types and mappings).
           - Maintain an audit trail of all query versions and modifications.
           - Generate an execution summary including:
             - Query versions attempted.
             - Reasons for modifications.
             - Performance metrics.
           - For testing purposes, if any ambiguity arises during evaluation, consult {reasoning_agent.name}.
        
        General Aggregation Guidance:
           - If an aggregation returns an unexpectedly inflated count, verify whether it is counting nested or repeated values.
           - To accurately count unique items, use a cardinality aggregation on a unique identifier rather than aggregating on fields that may contain duplicate entries.
           - For testing purposes, if aggregation results are ambiguous, always seek advice from {reasoning_agent.name}.
        
        Route B: Document Content Analysis (KB Response Agent)
        1. Initial Query Analysis:
           - Receive the user's natural language query.
           - Determine if the query requires synthesizing and analyzing document content from executed queries.
           - If so, route the query to {kb_rag_agent.name}.
           - For testing purposes, if the scenario appears complex, consult {reasoning_agent.name} for advice.
        
        2. KB Response Generation:
           - {kb_rag_agent.name} synthesizes and validates the information from the provided passages.
           - Generate a final response that includes:
             a. A direct answer.
             b. Supporting evidence with relevant quotes and citations.
             c. A confidence level.
           - For testing purposes, if the response strategy is unclear, consult {reasoning_agent.name}.
        
        3. Final Response:
           - Deliver the final, well-structured answer to the user by addressing the user by name.
           - For testing purposes, if any final ambiguity remains, seek additional advice from {reasoning_agent.name} before responding.
        
        Route C: Complex or Ambiguous Scenarios (Reasoning Agent)
        1. When a scenario is identified as complex or ambiguous—i.e., the supervisor agent cannot determine the appropriate call—route the query to {reasoning_agent.name}.
        2. {reasoning_agent.name} will analyze the scenario, break it down into manageable parts, and provide a well-reasoned explanation.
        3. The output must include the reasoning in a <reasoning>...</reasoning> block and any assumptions in an <assumptions>...</assumptions> block.
    """)
)


Agent supervisor-agent-us-west-2-533 not found

Created supervisor, id: MZAK8ELKGV, alias id: TSTALIASID

  associating sub-agents / collaborators to supervisor...
Waiting for agent status to change. Current status CREATING
Agent id MZAK8ELKGV current status: NOT_PREPARED
Waiting for agent status to change. Current status PREPARING
Agent id MZAK8ELKGV current status: PREPARED
Waiting for agent status to change. Current status PREPARING
Agent id MZAK8ELKGV current status: PREPARED
Waiting for agent status to change. Current status PREPARING
Agent id MZAK8ELKGV current status: PREPARED
Waiting for agent status to change. Current status PREPARING
Agent id MZAK8ELKGV current status: PREPARED
DONE: Agent: supervisor-agent-us-west-2-533, id: MZAK8ELKGV, alias id: WECCGNIT8G



In [16]:
# Inspect attributes of the SupervisorAgent object
# print(dir(supervisor_agent))


In [17]:
# Retrieve the Supervisor Agent ID
logger.info("Retrieving Supervisor Agent ID...")
supervisor_agent_id = agents_helper.get_agent_id_by_name(supervisor_agent.name)
logger.info(f"Supervisor Agent ID: {supervisor_agent_id}")

2025-02-09 16:15:05,739 - __main__ - INFO - Retrieving Supervisor Agent ID...
2025-02-09 16:15:05,821 - __main__ - INFO - Supervisor Agent ID: MZAK8ELKGV


In [18]:
# Lets get the agent based on the ID
# supervisor_agentV2 = agents_helper.get_agent_by_id("KGTOVCVLKI")

In [19]:
# user_query = "What are the priority orders with shipping cost greater than $30?"
# user_query = "What is the average delivery time for DHL Express shipments?"
user_query = "temperature-controlled packages delivered within last week, what is the average cost and the economic impact of late deliveries?"

In [22]:
import uuid


response = supervisor_agent.invoke(
    input_text=user_query,
    session_id=uuid.uuid4().hex,
    enable_trace=True,
    trace_level="core",
    session_state={
        "sessionAttributes": {
            "currentTimestamp": "2022-01-01T12:00:00Z"
        },
        "promptSessionAttributes": {
            "userName": "Alice Mallory",
            "jobTitle": "Data Analyst",
        }
    }
)
logger.info(f"Supervisor agent response: {response}")

invokeAgent API request ID: ab212f98-44e7-4f80-b417-9f8591467f38
invokeAgent API session ID: 09b8bbd402b74a6b962550b63bc8a78f
  agent id: MZAK8ELKGV, agent alias id: WECCGNIT8G
[32m---- Step 1 ----[0m
[33mTook 8.8s, using 3703 tokens (in: 3401, out: 302) to complete prior action, observe, orchestrate.[0m
[34mThis query requires multiple aspects to be analyzed:
1. First, we need to get data about temperature-controlled packages from the last week
2. We need to calculate average shipping costs
3. We need to identify late deliveries and their economic impact

Let me break this down into steps and consult multiple agents simultaneously to handle this complex query efficiently.

1. I'll ask the DSL query agent to create a query for temperature-controlled packages from the last week
2. I'll also ask the reasoning agent about how to best approach the economic impact analysis
3. Once we have the data, we'll need the KB response agent to synthesize the findings[0m
[35mUsing sub-agent col

2025-02-09 16:36:56,562 - __main__ - INFO - Supervisor agent response: I apologize, but I'm unable to provide a complete analysis of temperature-controlled packages and their economic impact due to limited data availability. Here's what we found:

1. We identified 5 potential temperature-sensitive shipments from January 2024, but we need additional data to confirm their nature and status.

2. To provide you with accurate information about costs and economic impact, we would need:
   - Confirmation of which packages are actually temperature-controlled
   - Specific delivery status (on-time vs. delayed)
   - Actual shipping costs and package values
   - Temperature monitoring data if available

Would you like me to:
1. Modify the search criteria to look for specific temperature-control keywords?
2. Extend the time range to gather more historical data?
3. Focus on specific carriers or shipping methods?

Please let me know how you'd like to proceed with the analysis.


[32m---- Step 1.14 [using sub-agent name:kb-response-agent-us-west-2-533, id:NMYVAUXBYQ/BTMK53A5UM] ----[0m
[33mTook 8.3s, using 5542 tokens (in: 5249, out: 293) to complete prior action, observe, orchestrate.[0m
[34mI need to inform the user about the limitations in our data and provide a path forward.[0m
[36mFinal response:
I apologize, but I'm unable to provide a complete analysis of temperature-controlled packages and their economic impact due to limited data availability. Here's what we found:

1. We identified 5 potential temperature-sensitive shipments from January 2024, but we need additional data to confirm thei...[0m
[33mAgent made a total of 15 LLM calls, using 61407 tokens (in: 55640, out: 5767), and took 154.4 total seconds[0m


In [None]:
# # -------------------------------------------------------------------------
# # 7. Invoke the Supervisor Agent with a sample query
# # -------------------------------------------------------------------------
# response = supervisor_agent.invoke(
#     input_text="How many orders have been shipped by DHL?",
#     session_id="12345",
#     enable_trace=True,
#     trace_level="core"
# )
# logger.info(f"Supervisor agent response: {response}")

In [None]:
# response = supervisor_agent.invoke(
#     input_text="What are the effects of Covid-19 on e-commerce?",
#     session_id="1245",
#     enable_trace=True,
#     trace_level="core"
# )

In [None]:
# response = supervisor_agent.invoke(
#     input_text="How many orders have recipients in Spain and were last updated during customs clearance after January 16, 2024?",
#     session_id="1245",
#     enable_trace=True,
#     trace_level="core"
# )

In [None]:
# # -------------------------------------------------------------------------
# # 8. Cleanup: Delete the created agents
# # -------------------------------------------------------------------------
# logger.info("Deleting Supervisor Agent...")
# agents_helper.delete_agent(supervisor_agent.name, verbose=True)

# logger.info("Deleting DSL Query Agent...")
# agents_helper.delete_agent(dsl_query_agent.name, verbose=True)

# logger.info("Deleting Query Fixer Agent...")
# agents_helper.delete_agent(query_fixer_agent.name, verbose=True)

# logger.info("Deleting KB Response Agent...")
# agents_helper.delete_agent(kb_rag_agent.name, verbose=True)

# logger.info("Deleting Reasoning Agent...")
# agents_helper.delete_agent(reasoning_agent.name, verbose=True)