In [1]:
import os
import json
import time
import uuid
import boto3
import logging
import requests
from datetime import datetime


In [2]:
# Initialize AWS clients
session = boto3.session.Session()
region = session.region_name

TARGET_REGION = "us-east-1"
boto3.setup_default_session(region_name=TARGET_REGION)

s3_client = boto3.client('s3')
sts_client = boto3.client('sts')
redshift_client = boto3.client('redshift-serverless', region_name=region)
redshift_data_client = boto3.client('redshift-data', region_name=region)
iam_client = boto3.client('iam')
bedrock_agent_client = boto3.client('bedrock-agent')
bedrock_agent_runtime_client = boto3.client("bedrock-agent-runtime")

In [3]:
# Generate unique suffix for resource names
current_time = time.time()
timestamp_str = time.strftime("%Y%m%d%H%M%S", time.localtime(current_time))[-7:]
suffix = f"{timestamp_str}"

print(f"Using suffix: {suffix}")

Using suffix: 1122011


## Step 1: Download Bedrock Knowledge Base Utilities

Lets download the structured knowledge base utility to help with Knowledge Base configuration and creation.


In [4]:
url = "https://raw.githubusercontent.com/aws-samples/amazon-bedrock-samples/main/rag/knowledge-bases/features-examples/utils/structured_knowledge_base.py"
target_path = "utils/structured_knowledge_base.py"
os.makedirs(os.path.dirname(target_path), exist_ok=True)
response = requests.get(url)
with open(target_path, "w") as f:
    f.write(response.text)
print(f"Downloaded structured KB utils to {target_path}")

Downloaded structured KB utils to utils/structured_knowledge_base.py


In [5]:
from utils.structured_knowledge_base import BedrockStructuredKnowledgeBase


## Step 2: Set up Redshift Serverless Infrastructure

Next we will create the necessary Redshift Serverless components: namespace and workgroup. This infrastructure will host our structured data that the Knowledge Base will query.

In [6]:
# Configuration for Redshift resources
REDSHIFT_NAMESPACE = f'sds-ecommerce-{suffix}'
REDSHIFT_WORKGROUP = f'sds-ecommerce-wg-{suffix}'
REDSHIFT_DATABASE = f'sds-ecommerce'
S3_BUCKET = f'sds-ecommerce-redshift-{suffix}'

print(f"Redshift Namespace: {REDSHIFT_NAMESPACE}")
print(f"Redshift Workgroup: {REDSHIFT_WORKGROUP}")
print(f"Database: {REDSHIFT_DATABASE}")
print(f"S3 Bucket: {S3_BUCKET}")

Redshift Namespace: sds-ecommerce-1122011
Redshift Workgroup: sds-ecommerce-wg-1122011
Database: sds-ecommerce
S3 Bucket: sds-ecommerce-redshift-1122011


In [7]:
def create_iam_role_for_redshift():
    """Create IAM role for Redshift to access S3"""
    try:
        # Get account ID
        account_id = sts_client.get_caller_identity()['Account']
        
        # Create IAM role if it doesn't exist
        role_name = f'RedshiftS3AccessRole-{suffix}'
        try:
            role_response = iam_client.get_role(RoleName=role_name)
            print(f'Role {role_name} already exists')
            return f'arn:aws:iam::{account_id}:role/{role_name}'
        except iam_client.exceptions.NoSuchEntityException:
            trust_policy = {
                "Version": "2012-10-17",
                "Statement": [
                    {
                        "Effect": "Allow",
                        "Principal": {
                            "Service": "redshift.amazonaws.com"
                        },
                        "Action": "sts:AssumeRole"
                    }
                ]
            }
            
            iam_client.create_role(
                RoleName=role_name,
                AssumeRolePolicyDocument=json.dumps(trust_policy)
            )
            
            # Attach necessary policies
            iam_client.attach_role_policy(
                RoleName=role_name,
                PolicyArn='arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess'
            )
            
            print(f'Created role {role_name}')
            return f'arn:aws:iam::{account_id}:role/{role_name}'
            
    except Exception as e:
        print(f'Error creating IAM role: {str(e)}')
        raise



redshift_role_arn = create_iam_role_for_redshift()
print(f"Redshift IAM Role ARN: {redshift_role_arn}")


Created role RedshiftS3AccessRole-1122011
Redshift IAM Role ARN: arn:aws:iam::533267284022:role/RedshiftS3AccessRole-1122011


In [8]:
def create_redshift_namespace():
    """Create Redshift Serverless namespace"""
    try:
        # Check if namespace already exists
        try:
            response = redshift_client.get_namespace(namespaceName=REDSHIFT_NAMESPACE)
            print(f'Namespace {REDSHIFT_NAMESPACE} already exists')
            return response['namespace']
        except redshift_client.exceptions.ResourceNotFoundException:
            print(f'Creating namespace {REDSHIFT_NAMESPACE}...')
        
        # Create the namespace
        response = redshift_client.create_namespace(
            namespaceName=REDSHIFT_NAMESPACE,
            adminUsername='admin',
            adminUserPassword='TempPassword123!',  # Change this in production
            dbName=REDSHIFT_DATABASE,
            defaultIamRoleArn=redshift_role_arn,
            iamRoles=[redshift_role_arn]
        )
        
        print(f'Created namespace {REDSHIFT_NAMESPACE}')
        
        # Wait for namespace to be available
        print('Waiting for namespace to be available...')
        max_attempts = 30
        for attempt in range(max_attempts):
            try:
                namespace_response = redshift_client.get_namespace(namespaceName=REDSHIFT_NAMESPACE)
                status = namespace_response['namespace']['status']
                if status == 'AVAILABLE':
                    print(f'Namespace {REDSHIFT_NAMESPACE} is now available')
                    return namespace_response['namespace']
                else:
                    print(f'Namespace status: {status}, waiting...')
                    time.sleep(10)
            except Exception as e:
                print(f'Error checking namespace status: {str(e)}, retrying...')
                time.sleep(10)
        
        print('Timeout waiting for namespace, but proceeding...')
        return response['namespace']
        
    except Exception as e:
        print(f'Error creating namespace: {str(e)}')
        raise

# Create namespace
namespace = create_redshift_namespace()


Creating namespace sds-ecommerce-1122011...
Created namespace sds-ecommerce-1122011
Waiting for namespace to be available...
Namespace sds-ecommerce-1122011 is now available


In [9]:
def create_redshift_workgroup():
    """Create Redshift Serverless workgroup"""
    try:
        # Check if workgroup already exists
        try:
            response = redshift_client.get_workgroup(workgroupName=REDSHIFT_WORKGROUP)
            print(f'Workgroup {REDSHIFT_WORKGROUP} already exists')
            return response['workgroup']
        except redshift_client.exceptions.ResourceNotFoundException:
            print(f'Creating workgroup {REDSHIFT_WORKGROUP}...')
        
        # Create the workgroup
        response = redshift_client.create_workgroup(
            workgroupName=REDSHIFT_WORKGROUP,
            namespaceName=REDSHIFT_NAMESPACE,
            baseCapacity=8,  # Minimum base capacity
            enhancedVpcRouting=False,
            publiclyAccessible=True,
            configParameters=[
                {
                    'parameterKey': 'enable_user_activity_logging',
                    'parameterValue': 'true'
                }
            ]
        )
        
        print(f'Created workgroup {REDSHIFT_WORKGROUP}')
        
        # Wait for workgroup to be available
        print('Waiting for workgroup to be available...')
        max_attempts = 30
        for attempt in range(max_attempts):
            try:
                workgroup_response = redshift_client.get_workgroup(workgroupName=REDSHIFT_WORKGROUP)
                status = workgroup_response['workgroup']['status']
                if status == 'AVAILABLE':
                    print(f'Workgroup {REDSHIFT_WORKGROUP} is now available')
                    return workgroup_response['workgroup']
                else:
                    print(f'Workgroup status: {status}, waiting...')
                    time.sleep(10)
            except Exception as e:
                print(f'Error checking workgroup status: {str(e)}, retrying...')
                time.sleep(10)
        
        print('Timeout waiting for workgroup, but proceeding...')
        return response['workgroup']
        
    except Exception as e:
        print(f'Error creating workgroup: {str(e)}')
        raise

# Create workgroup
workgroup = create_redshift_workgroup()
workgroup_arn = workgroup['workgroupArn']
print(f"Workgroup ARN: {workgroup_arn}")


Creating workgroup sds-ecommerce-wg-1122011...
Created workgroup sds-ecommerce-wg-1122011
Waiting for workgroup to be available...
Workgroup status: CREATING, waiting...
Workgroup status: CREATING, waiting...
Workgroup status: CREATING, waiting...
Workgroup status: CREATING, waiting...
Workgroup status: CREATING, waiting...
Workgroup sds-ecommerce-wg-1122011 is now available
Workgroup ARN: arn:aws:redshift-serverless:us-east-1:533267284022:workgroup/229deaab-1493-4fae-be7f-e08d65b05aa3


## Step 3: Create S3 Bucket and Load Sample Data

We will create an S3 bucket to stage our sample e-commerce data before loading it into Redshift tables.

In [10]:
region

'us-east-1'

In [11]:

def create_s3_bucket():
    """Create the staging S3 bucket in the same Region as this session."""
    # Build a region-pinned client so the endpoint and LocationConstraint match.
    session       = boto3.session.Session()
    bucket_region = session.region_name or "us-east-1"
    s3            = boto3.client("s3", region_name=bucket_region)

    try:
        s3.head_bucket(Bucket=S3_BUCKET)
        print(f"Bucket {S3_BUCKET} already exists")
    except s3.exceptions.ClientError as e:
        # Any 404/NoSuchBucket falls through to creation; re-raise everything else.
        if e.response["Error"]["Code"] not in ("404", "NoSuchBucket"):
            raise

        # For us-east-1 the API requires *no* LocationConstraint.
        cfg = (
            {}
            if bucket_region == "us-east-1"
            else {"LocationConstraint": bucket_region}
        )

        s3.create_bucket(
            Bucket=S3_BUCKET,
            **({"CreateBucketConfiguration": cfg} if cfg else {})
        )
        print(f"Created bucket {S3_BUCKET} in {bucket_region}")

# Create S3 bucket
create_s3_bucket()



Created bucket sds-ecommerce-redshift-1122011 in us-east-1


In [12]:
def upload_sample_data():
    """Upload sample CSV files to S3"""
    data_files = ['orders.csv', 'order_items.csv', 'payments.csv', 'reviews.csv']
    sds_directory = 'sample_data'
    
    print("Uploading sample data files to S3...")
    files_found = 0
    
    for file_name in data_files:
        local_path = os.path.join(sds_directory, file_name)
        if os.path.exists(local_path):
            # Get file size for informational purposes
            file_size = os.path.getsize(local_path)
            file_size_mb = file_size / (1024 * 1024)
            
            s3_client.upload_file(local_path, S3_BUCKET, file_name)
            print(f'Uploaded {file_name} ({file_size_mb:.1f} MB) to S3')
            files_found += 1
        else:
            print(f'Warning: {local_path} not found')
    
    if files_found == len(data_files):
        print(f"\nSuccessfully uploaded all {files_found} data files to S3")
    else:
        print(f"\nOnly {files_found} out of {len(data_files)} files were found and uploaded")

# Upload sample data
upload_sample_data()


Uploading sample data files to S3...
Uploaded orders.csv (1.8 MB) to S3
Uploaded order_items.csv (1.3 MB) to S3
Uploaded payments.csv (0.8 MB) to S3
Uploaded reviews.csv (0.5 MB) to S3

Successfully uploaded all 4 data files to S3


## Step 4: Create Redshift Tables and Load Data

Now we will create the database tables in Redshift and load our sample e-commerce data.

In [13]:
def wait_for_statement(statement_id):
    """Wait for a Redshift Data API statement to complete"""
    max_attempts = 30
    for attempt in range(max_attempts):
        try:
            response = redshift_data_client.describe_statement(Id=statement_id)
            status = response['Status']
            if status == 'FINISHED':
                return response
            elif status == 'FAILED':
                raise Exception(f"Statement failed: {response.get('Error', 'Unknown error')}")
            elif status == 'CANCELLED':
                raise Exception("Statement was cancelled")
            else:
                print(f"Statement status: {status}, waiting...")
                time.sleep(5)
        except Exception as e:
            if 'Statement failed' in str(e) or 'cancelled' in str(e):
                raise
            print(f"Error checking statement status: {str(e)}, retrying...")
            time.sleep(5)
    
    raise Exception("Timeout waiting for statement to complete")

def run_redshift_statement(sql_statement):
    """Execute a SQL statement in Redshift"""
    try:
        response = redshift_data_client.execute_statement(
            WorkgroupName=REDSHIFT_WORKGROUP,
            Database=REDSHIFT_DATABASE,
            Sql=sql_statement
        )
        statement_id = response['Id']
        print(f"Executing statement: {statement_id}")
        result = wait_for_statement(statement_id)
        print(f"Statement completed successfully")
        return result
    except Exception as e:
        print(f"Error executing statement: {str(e)}")
        raise


In [14]:
# Create tables in Redshift
def create_tables():
    """Create all necessary tables in Redshift"""
    
    # Orders table
    orders_sql = """
    CREATE TABLE IF NOT EXISTS orders (
        order_id VARCHAR(255) PRIMARY KEY,
        customer_id VARCHAR(255),
        order_total DECIMAL(10,2),
        order_status VARCHAR(50),
        payment_method VARCHAR(50),
        shipping_address TEXT,
        created_at TIMESTAMP,
        updated_at TIMESTAMP
    );
    """
    
    # Order Items table
    order_items_sql = """
    CREATE TABLE IF NOT EXISTS order_items (
        order_item_id VARCHAR(255) PRIMARY KEY,
        order_id VARCHAR(255),
        product_id VARCHAR(255),
        quantity INTEGER,
        price DECIMAL(10,2)
    );
    """
    
    # Payments table
    payments_sql = """
    CREATE TABLE IF NOT EXISTS payments (
        payment_id VARCHAR(255) PRIMARY KEY,
        order_id VARCHAR(255),
        customer_id VARCHAR(255),
        amount DECIMAL(10,2),
        payment_method VARCHAR(50),
        payment_status VARCHAR(50),
        created_at DATE
    );
    """
    
    # Reviews table
    reviews_sql = """
    CREATE TABLE IF NOT EXISTS reviews (
        review_id VARCHAR(255) PRIMARY KEY,
        product_id VARCHAR(255),
        customer_id VARCHAR(255),
        rating INTEGER,
        created_at DATE
    );
    """
    
    tables = {
        'orders': orders_sql,
        'order_items': order_items_sql,
        'payments': payments_sql,
        'reviews': reviews_sql
    }
    
    for table_name, sql in tables.items():
        print(f"Creating table: {table_name}")
        run_redshift_statement(sql)
        print(f"Created table: {table_name}")
        print("-------------")

# Create tables
create_tables()


Creating table: orders
Executing statement: eb4da616-d8a7-4c3c-8c7d-2e66cc02eb79
Statement status: PICKED, waiting...
Statement completed successfully
Created table: orders
-------------
Creating table: order_items
Executing statement: e05be4a8-bc21-4e6d-b54b-4b217c11de5d
Statement status: STARTED, waiting...
Statement completed successfully
Created table: order_items
-------------
Creating table: payments
Executing statement: 841c158b-228f-4dec-a1b9-ac465fc02ff2
Statement status: PICKED, waiting...
Statement completed successfully
Created table: payments
-------------
Creating table: reviews
Executing statement: 26e7bc07-674b-4a7b-8ca4-9542a595af87
Statement status: PICKED, waiting...
Statement completed successfully
Created table: reviews
-------------


In [15]:
# Load data from S3 into Redshift tables
def load_data_from_s3():
    """Load data from S3 CSV files into Redshift tables"""
    
    tables_and_files = {
        'orders': 'orders.csv',
        'order_items': 'order_items.csv',
        'payments': 'payments.csv',
        'reviews': 'reviews.csv'
    }
    
    for table_name, file_name in tables_and_files.items():
        print(f"Loading data into {table_name} from {file_name}")
        
        copy_sql = f"""
        COPY {table_name}
        FROM 's3://{S3_BUCKET}/{file_name}'
        IAM_ROLE '{redshift_role_arn}'
        CSV
        IGNOREHEADER 1
        DELIMITER ','
        REGION '{region}';
        """
        
        try:
            run_redshift_statement(copy_sql)
            print(f"Loaded data into {table_name}")
        except Exception as e:
            print(f"Error loading data into {table_name}: {str(e)}")

# Load data from S3
load_data_from_s3()

Loading data into orders from orders.csv
Executing statement: c39d78c0-2eb8-41f3-81f1-f8a1e3ad4d1d
Statement status: PICKED, waiting...
Statement completed successfully
Loaded data into orders
Loading data into order_items from order_items.csv
Executing statement: 436ee9a3-3272-4d7e-8454-bec447a85734
Statement status: STARTED, waiting...
Statement completed successfully
Loaded data into order_items
Loading data into payments from payments.csv
Executing statement: 33098b36-5634-4d00-b3ba-243fc5d43caf
Statement status: PICKED, waiting...
Statement completed successfully
Loaded data into payments
Loading data into reviews from reviews.csv
Executing statement: 5637e1eb-2a53-4c2f-a2d2-94332ca109ce
Statement status: PICKED, waiting...
Statement completed successfully
Loaded data into reviews


## Step 5: Verify Data Load

Let's verify that our data has been loaded correctly by running some sample queries.

## Step 6: Create Bedrock Knowledge Base with Redshift Data Source

Now we'll create the Bedrock Knowledge Base configured to use our Redshift data as a structured data source.


In [16]:
# Configure Knowledge Base parameters
kb_name = f"redshift-structured-kb-{suffix}"
kb_description = "Structured Knowledge Base for e-commerce data queries using Redshift"
generation_model = "anthropic.claude-3-5-haiku-20241022-v1:0"

print(f"Knowledge Base Name: {kb_name}")


Knowledge Base Name: redshift-structured-kb-1122011


Amazon Bedrock Knowledge Bases uses a service role to connect knowledge bases to structured data stores, retrieve data from these data stores, and generate SQL queries based on user queries and the structure of the data stores. There are several access patterns based on if you're using Redshift Serverless vs Redshift Provisioned Cluster. In this notebook, let's use `IAM Role + Redshift Serverless WorkGroup` access pattern.

In [17]:
# Configure Knowledge Base parameters for Redshift Serverless with IAM authentication
kb_config_param = {
    "type": "SQL",
    "sqlKnowledgeBaseConfiguration": {
        "type": "REDSHIFT",
        "redshiftConfiguration": {
            "storageConfigurations": [{
                "type": "REDSHIFT",
                "redshiftConfiguration": {
                    "databaseName": REDSHIFT_DATABASE
                }
            }],
            "queryEngineConfiguration": {
                "type": "SERVERLESS",
                "serverlessConfiguration": {
                    "workgroupArn": workgroup_arn,
                    "authConfiguration": {}
                }
            }
        }
    }
}


In [18]:
kb_config_param['sqlKnowledgeBaseConfiguration']['redshiftConfiguration']['queryEngineConfiguration']['serverlessConfiguration']['authConfiguration']['type'] = "IAM"


In [19]:
kb_config_param

{'type': 'SQL',
 'sqlKnowledgeBaseConfiguration': {'type': 'REDSHIFT',
  'redshiftConfiguration': {'storageConfigurations': [{'type': 'REDSHIFT',
     'redshiftConfiguration': {'databaseName': 'sds-ecommerce'}}],
   'queryEngineConfiguration': {'type': 'SERVERLESS',
    'serverlessConfiguration': {'workgroupArn': 'arn:aws:redshift-serverless:us-east-1:533267284022:workgroup/229deaab-1493-4fae-be7f-e08d65b05aa3',
     'authConfiguration': {'type': 'IAM'}}}}}}

In [20]:
try:
    structured_kb = BedrockStructuredKnowledgeBase(
        kb_name=kb_name,
        kb_description=kb_description,
        workgroup_arn=workgroup_arn,
        kbConfigParam=kb_config_param,
        generation_model=generation_model,
        suffix=suffix
    )
    
    print("Knowledge Base created successfully!")
    kb_id = structured_kb.get_knowledge_base_id()
    print(f"Knowledge Base ID: {kb_id}")
    
except Exception as e:
    print(f"Error creating Knowledge Base: {str(e)}")
    raise


Step 1 - Creating Knowledge Base Execution Role (AmazonBedrockExecutionRoleForStructuredKnowledgeBase_1122011) and Policies
Step 2 - Creating Knowledge Base
{ 'createdAt': datetime.datetime(2025, 6, 21, 19, 22, 15, 496706, tzinfo=tzutc()),
  'description': 'Structured Knowledge Base for e-commerce data queries using '
                 'Redshift',
  'knowledgeBaseArn': 'arn:aws:bedrock:us-east-1:533267284022:knowledge-base/MERU0MPC8X',
  'knowledgeBaseConfiguration': { 'sqlKnowledgeBaseConfiguration': { 'redshiftConfiguration': { 'queryEngineConfiguration': { 'serverlessConfiguration': { 'authConfiguration': { 'type': 'IAM'},
                                                                                                                                                           'workgroupArn': 'arn:aws:redshift-serverless:us-east-1:533267284022:workgroup/229deaab-1493-4fae-be7f-e08d65b05aa3'},
                                                                                              

## Step 6: Database Access Configuration for IAM Role + Redshift Serverless WorkGroup


For the IAM Role + Redshift Serverless WorkGroup access pattern, you must configure database-level permissions for the IAM role used by Bedrock Knowledge Base.

1. **Create IAM-based database user**: Map the IAM role to a database user in Redshift
2. **Grant appropriate permissions**: Provide SELECT access to the relevant schemas and tables


In [21]:
# Extract the IAM role name from the ARN for database user creation
kb_details = structured_kb.knowledge_base

bedrock_role_arn = kb_details['roleArn']
bedrock_role_name = bedrock_role_arn.split('/')[-1]
print(f"   Extracted Role Name: {bedrock_role_name}")

   Extracted Role Name: AmazonBedrockExecutionRoleForStructuredKnowledgeBase_1122011


In [22]:

# Create the IAM user in Redshift (this is the critical missing step!)
create_user_sql = f'CREATE USER "IAMR:{bedrock_role_name}" WITH PASSWORD DISABLE;'

try:
    print(f"Creating user: IAMR:{bedrock_role_name}")
    run_redshift_statement(create_user_sql)
    print("IAM user created successfully!")
except Exception as e:
    if "already exists" in str(e).lower():
        print("User already exists, continuing...")
    else:
        print(f"Error creating user: {str(e)}")
        raise

Creating user: IAMR:AmazonBedrockExecutionRoleForStructuredKnowledgeBase_1122011
Executing statement: 5a7182d8-ece4-4b25-8f5b-025b9c86a530
Statement status: STARTED, waiting...
Statement completed successfully
IAM user created successfully!


In [23]:
# Grant SELECT on all tables in public schema
grant_select_sql = f'GRANT SELECT ON ALL TABLES IN SCHEMA public TO "IAMR:{bedrock_role_name}";'

try:
    print(f"Granting SELECT permissions to: IAMR:{bedrock_role_name}")
    run_redshift_statement(grant_select_sql)
    print("SELECT permissions granted successfully!")
except Exception as e:
    print(f"Error granting permissions: {str(e)}")
    raise

Granting SELECT permissions to: IAMR:AmazonBedrockExecutionRoleForStructuredKnowledgeBase_1122011
Executing statement: a9003ac8-6fb3-432c-b02e-daa123ef237d
Statement status: PICKED, waiting...
Statement completed successfully
SELECT permissions granted successfully!


## Step 7: Start Ingestion Job

Now that the database permissions are properly configured, let's start the ingestion job to sync the data from the Redshift database.

In [24]:
# Wait a bit for the Knowledge Base to be fully ready
time.sleep(60)
structured_kb.start_ingestion_job()

job  started successfully

{ 'dataSourceId': 'QA6E9AZQBO',
  'ingestionJobId': 'HXSAVT8YXC',
  'knowledgeBaseId': 'MERU0MPC8X',
  'startedAt': datetime.datetime(2025, 6, 21, 19, 23, 27, 971321, tzinfo=tzutc()),
  'status': 'FAILED',
  'updatedAt': datetime.datetime(2025, 6, 21, 19, 23, 31, 156217, tzinfo=tzutc())}
.....

In [25]:
# Helper functions for querying the Knowledge Base

def query_with_retrieve_and_generate(kb_id, query):
    """Query using retrieve_and_generate API - returns natural language response"""
    try:
        response = bedrock_agent_runtime_client.retrieve_and_generate(
            input={"text": query},
            retrieveAndGenerateConfiguration={
                "type": "KNOWLEDGE_BASE",
                "knowledgeBaseConfiguration": {
                    'knowledgeBaseId': kb_id,
                    "modelArn": f"arn:aws:bedrock:{region}::foundation-model/{generation_model}",
                    "retrievalConfiguration": {
                        "vectorSearchConfiguration": {
                            "numberOfResults": 5
                        }
                    }
                }
            }
        )
        return response['output']['text']
    except Exception as e:
        return f"Error: {str(e)}"

def generate_sql_query(kb_arn, query):
    """Generate SQL query from natural language"""
    try:
        response = bedrock_agent_runtime_client.generate_query(
            queryGenerationInput={
                "text": query,
                "type": "TEXT"
            },
            transformationConfiguration={
                "mode": "TEXT_TO_SQL",
                "textToSqlConfiguration": {
                    "type": "KNOWLEDGE_BASE",
                    "knowledgeBaseConfiguration": {
                        "knowledgeBaseArn": kb_details['knowledgeBaseArn']
                    }
                }
            }
        )
        
        if response.get('queries') and len(response['queries']) > 0:
            return response['queries'][0]['sql']
        else:
            return "No SQL generated"
    except Exception as e:
        return f"Error: {str(e)}"

print("✅ Helper functions defined")

✅ Helper functions defined


In [26]:
# Test queries
test_queries = [
    "How many orders are in the database?",
    "What is the average order total?"
]

print("🧪 Testing Knowledge Base with sample queries")
print("=" * 60)

for i, query in enumerate(test_queries, 1):
    print(f"\n📝 Query {i}: {query}")
    print("-" * 50)
    
    # Get natural language response
    print("🤖 Natural Language Response:")
    nl_response = query_with_retrieve_and_generate(kb_id, query)
    print(f"   {nl_response}")
    
    # Get generated SQL
    print("\n🔧 Generated SQL:")
    sql_query = generate_sql_query(kb_details['knowledgeBaseArn'], query)
    print(f"   {sql_query}")
    
    print("\n" + "="*50)

🧪 Testing Knowledge Base with sample queries

📝 Query 1: How many orders are in the database?
--------------------------------------------------
🤖 Natural Language Response:
   Error: An error occurred (ValidationException) when calling the RetrieveAndGenerate operation: No metadata found. Please trigger an ingestion and try again. (Service: BedrockAgentRuntime, Status Code: 400, Request ID: 603fd599-45ce-4e19-abd1-b18220015471) (SDK Attempt Count: 1)

🔧 Generated SQL:
   SELECT COUNT("order_id") AS "total_orders" FROM public.orders;


📝 Query 2: What is the average order total?
--------------------------------------------------
🤖 Natural Language Response:
   Error: An error occurred (ValidationException) when calling the RetrieveAndGenerate operation: Invocation of model ID anthropic.claude-3-5-haiku-20241022-v1:0 with on-demand throughput isn’t supported. Retry your request with the ID or ARN of an inference profile that contains this model. (Service: BedrockRuntime, Status Code: 40

In [27]:
import boto3
from pprint import pprint

# ------------------------------------------------------------------
#  Set your knowledge-base Id here (10-char alpha-numeric string)
# ------------------------------------------------------------------
KB_ID = kb_id

bedrock = boto3.client("bedrock-agent")

def get_ingestion_jobs(kb_id: str):
    """
    Prints the detail of every ingestion job that was ever started
    for every data-source attached to the given knowledge-base.
    """

    # 1) find all data-sources for this KB
    sources = bedrock.list_data_sources(
        knowledgeBaseId=kb_id,
        maxResults=100
    )["dataSourceSummaries"]

    if not sources:
        print("No data-sources found for KB:", kb_id)
        return

    for src in sources:
        ds_id = src["dataSourceId"]
        print(f"\n=== Data-source {ds_id} ({src['name']}) ===")

        # 2) list all ingestion jobs (newest first)
        jobs = bedrock.list_ingestion_jobs(
            knowledgeBaseId=kb_id,
            dataSourceId=ds_id,
            sortBy={"attribute": "STARTED_AT", "order": "DESCENDING"},
            maxResults=100
        )["ingestionJobSummaries"]

        if not jobs:
            print("  (no ingestion jobs yet)")
            continue

        # 3) fetch & print the full record for every job found
        for j in jobs:
            job_id = j["ingestionJobId"]
            detail = bedrock.get_ingestion_job(
                knowledgeBaseId=kb_id,
                dataSourceId=ds_id,
                ingestionJobId=job_id
            )["ingestionJob"]

            print(f"\n  Job {job_id}  |  status: {detail['status']}")
            if detail.get("failureReasons"):
                print("     failureReasons:", detail["failureReasons"])
            if detail.get("statistics"):
                print("     statistics:")
                pprint(detail["statistics"], indent=10)

# ------------------------------------------------------------------
#  Run it
# ------------------------------------------------------------------
get_ingestion_jobs(KB_ID)


=== Data-source QA6E9AZQBO (redshift-structured-kb-1122011-ds) ===

  Job HXSAVT8YXC  |  status: FAILED


## Clean Up
Please make sure to uncomment and run the below section to delete all the resources

In [30]:
# # Delete resources
# print("===============================Deleteing resources ==============================\n")
structured_kb.delete_kb( delete_iam_roles_and_policies=True)



In [31]:
def cleanup_redshift_environment():
    """
    Delete all Redshift-related resources including workgroup, namespace, S3 bucket, and IAM role.
    Uses the existing variables defined in the notebook.
    """
    import boto3
    import time
    
    # Initialize clients
    session = boto3.session.Session()
    region = session.region_name
    redshift_client = boto3.client('redshift-serverless', region_name=region)
    iam_client = boto3.client('iam')
    s3 = boto3.resource('s3')
    s3_client = boto3.client('s3')
    
    def wait_for_workgroup_deleted(name, poll_interval=10, max_attempts=60):
        """Wait until workgroup is completely deleted"""
        print(f"  Waiting for workgroup {name} to be deleted...")
        attempts = 0
        while attempts < max_attempts:
            try:
                wg = redshift_client.get_workgroup(workgroupName=name)["workgroup"]
                status = wg["status"]
                print(f"    Workgroup status: {status}")
                if status == "DELETED":
                    break
                time.sleep(poll_interval)
                attempts += 1
            except redshift_client.exceptions.ResourceNotFoundException:
                print("    Workgroup deleted successfully")
                return
        
        if attempts >= max_attempts:
            print(f"    Warning: Timeout waiting for workgroup deletion after {max_attempts * poll_interval} seconds")
    
    def wait_for_namespace_deleted(name, poll_interval=10, max_attempts=60):
        """Wait until namespace is completely deleted"""
        print(f"  Waiting for namespace {name} to be deleted...")
        attempts = 0
        while attempts < max_attempts:
            try:
                redshift_client.get_namespace(namespaceName=name)
                print(f"    Namespace still exists, waiting...")
                time.sleep(poll_interval)
                attempts += 1
            except redshift_client.exceptions.ResourceNotFoundException:
                print("    Namespace deleted successfully")
                return
        
        if attempts >= max_attempts:
            print(f"    Warning: Timeout waiting for namespace deletion after {max_attempts * poll_interval} seconds")
    
    print("Starting Redshift environment cleanup...")
    print("=" * 60)
    
    # 1. Delete Redshift workgroup first
    print(f"Step 1: Deleting Redshift workgroup {REDSHIFT_WORKGROUP}")
    try:
        redshift_client.delete_workgroup(workgroupName=REDSHIFT_WORKGROUP)
        print("  Workgroup deletion initiated")
        wait_for_workgroup_deleted(REDSHIFT_WORKGROUP)
    except redshift_client.exceptions.ResourceNotFoundException:
        print("  Workgroup already deleted or does not exist")
    except Exception as e:
        print(f"  Error deleting workgroup: {str(e)}")
    
    # 2. Delete Redshift namespace
    print(f"\nStep 2: Deleting Redshift namespace {REDSHIFT_NAMESPACE}")
    try:
        redshift_client.delete_namespace(namespaceName=REDSHIFT_NAMESPACE)
        print("  Namespace deletion initiated")
        wait_for_namespace_deleted(REDSHIFT_NAMESPACE)
    except redshift_client.exceptions.ResourceNotFoundException:
        print("  Namespace already deleted or does not exist")
    except Exception as e:
        print(f"  Error deleting namespace: {str(e)}")
    
    # 3. Empty and delete S3 bucket
    print(f"\nStep 3: Deleting S3 bucket {S3_BUCKET}")
    try:
        bucket = s3.Bucket(S3_BUCKET)
        
        # Check if bucket exists
        s3_client.head_bucket(Bucket=S3_BUCKET)
        
        # Delete all objects in the bucket
        print("  Emptying bucket contents...")
        objects_to_delete = []
        for obj in bucket.objects.all():
            objects_to_delete.append({'Key': obj.key})
        
        if objects_to_delete:
            bucket.delete_objects(Delete={'Objects': objects_to_delete})
            print(f"    Deleted {len(objects_to_delete)} objects")
        else:
            print("    Bucket was already empty")
        
        # Delete the bucket
        print("  Deleting bucket...")
        bucket.delete()
        print("  S3 bucket deleted successfully")
        
    except s3_client.exceptions.NoSuchBucket:
        print("  S3 bucket already deleted or does not exist")
    except Exception as e:
        print(f"  Error deleting S3 bucket: {str(e)}")
    
    # 4. Delete IAM role and policies
    print(f"\nStep 4: Deleting IAM role {redshift_role_arn.split('/')[-1]}")
    role_name = redshift_role_arn.split('/')[-1]
    try:
        # Check if role exists
        iam_client.get_role(RoleName=role_name)
        
        # Detach managed policies
        print("  Detaching managed policies...")
        attached_policies = iam_client.list_attached_role_policies(RoleName=role_name)['AttachedPolicies']
        for policy in attached_policies:
            policy_arn = policy['PolicyArn']
            iam_client.detach_role_policy(RoleName=role_name, PolicyArn=policy_arn)
            print(f"    Detached policy: {policy['PolicyName']}")
            
            # Delete custom policies (not AWS managed)
            if not policy_arn.startswith('arn:aws:iam::aws:policy/'):
                try:
                    iam_client.delete_policy(PolicyArn=policy_arn)
                    print(f"    Deleted custom policy: {policy['PolicyName']}")
                except Exception as e:
                    print(f"    Could not delete policy {policy['PolicyName']}: {str(e)}")
        
        # Delete inline policies
        print("  Deleting inline policies...")
        inline_policies = iam_client.list_role_policies(RoleName=role_name)['PolicyNames']
        for policy_name in inline_policies:
            iam_client.delete_role_policy(RoleName=role_name, PolicyName=policy_name)
            print(f"    Deleted inline policy: {policy_name}")
        
        # Delete the role
        iam_client.delete_role(RoleName=role_name)
        print("  IAM role deleted successfully")
        
    except iam_client.exceptions.NoSuchEntityException:
        print("  IAM role already deleted or does not exist")
    except Exception as e:
        print(f"  Error deleting IAM role: {str(e)}")
    
    print("\n" + "=" * 60)
    print("Redshift environment cleanup completed")
    print("\nSummary of deleted resources:")
    print(f"  - Redshift Workgroup: {REDSHIFT_WORKGROUP}")
    print(f"  - Redshift Namespace: {REDSHIFT_NAMESPACE}")
    print(f"  - S3 Bucket: {S3_BUCKET}")
    print(f"  - IAM Role: {role_name}")

# Usage:
cleanup_redshift_environment()

Starting Redshift environment cleanup...
Step 1: Deleting Redshift workgroup sds-ecommerce-wg-1122011
  Workgroup deletion initiated
  Waiting for workgroup sds-ecommerce-wg-1122011 to be deleted...
    Workgroup status: DELETING
    Workgroup status: DELETING
    Workgroup status: DELETING
    Workgroup status: DELETING
    Workgroup status: DELETING
    Workgroup status: DELETING
    Workgroup status: DELETING
    Workgroup status: DELETING
    Workgroup status: DELETING
    Workgroup status: DELETING
    Workgroup status: DELETING
    Workgroup status: DELETING
    Workgroup status: DELETING
    Workgroup status: DELETING
    Workgroup status: DELETING
    Workgroup deleted successfully

Step 2: Deleting Redshift namespace sds-ecommerce-1122011
  Namespace deletion initiated
  Waiting for namespace sds-ecommerce-1122011 to be deleted...
    Namespace still exists, waiting...
    Namespace still exists, waiting...
    Namespace still exists, waiting...
    Namespace still exists, wai