In [40]:
from common import *
from pinecone.grpc import PineconeGRPC as Pinecone
import boto3
import sys
import json
import os
from botocore.exceptions import ClientError
from datetime import datetime as now

In [30]:
pc = Pinecone(api_key=pinecone_api_key)
INDEX_NAME = f"movie-time-hk-{now.year}-{now.month}-{now.day}"  # @param {type:"string"}
DISTANCE_METRIC = ("cosine")
EMBEDDING_DIMENSION_SIZE = 1024  # @param {"type":"number","placeholder":"768"}
CLOUD_PROVIDER = "aws"  # @param ["gcp", "aws", "azure"] {allow-input: true}
CLOUD_REGION = "us-east-1"  # @param {type:"string"}

In [None]:
pc.create_index(
    name=INDEX_NAME,
    dimension=EMBEDDING_DIMENSION_SIZE,
    metric=DISTANCE_METRIC,
    spec=ServerlessSpec(cloud=CLOUD_PROVIDER, region=CLOUD_REGION),
    deletion_protection="disabled",
)

In [32]:
boto3.DEFAULT_SESSION = boto3.Session()

In [None]:
pinecone_secret_filename = "pinecone-secret.txt"
def get_or_create_secret(filename, secret_name, secret_value):
    if os.path.exists(filename):
        print(f'File {filename} already exists. Reading secret ARN from the file.')
        with open(filename, 'r') as file:
            secret_arn = file.read().strip()
            print(f'Secret ARN: {secret_arn}')
            return secret_arn  # Return the existing secret ARN

    # If the file does not exist, create the secret
    print(f'File {filename} does not exist. Creating secret.')
    secret_arn = create_secret(secret_name, secret_value)

    if secret_arn:
        # Write the secret ARN to the file
        with open(filename, 'w') as file:
            file.write(secret_arn)
            print(f'Secret ARN saved to {filename}.')

    return secret_arn

def create_secret(secret_name, secret_value):
    # Create a Secrets Manager client
    client = boto3.client('secretsmanager', region_name='ap-southeast-2')

    try:
        # Create the secret
        response = client.create_secret(
            Name=secret_name,
            SecretString=json.dumps(secret_value)
        )
        print(f'Secret created: {response["ARN"]}')
        return response["ARN"]
    except Exception as e:
        if e.response['Error']['Code'] == 'ResourceExistsException':
            print(f'Secret {secret_name} already exists.')
        else:
            print(f'Error creating secret: {e}')

# Define the secret name and value
secret_name = 'pineconeApiKey'
secret_value = {
    'apiKey': pinecone_api_key
}

# Create and save the secret
get_or_create_secret(pinecone_secret_filename, secret_name, secret_value)

In [15]:
def create_bucket(bucket_name):
    s3 = boto3.client('s3', region_name='ap-southeast-2')
    try:
        # Create the S3 bucket
        response = s3.create_bucket(
            Bucket=bucket_name,
            CreateBucketConfiguration={
                'LocationConstraint': 'ap-southeast-2'
            }
        )
        print(f'Bucket created: {response["Location"]}')
        return response["Location"]
    except ClientError as e:
        if e.response['Error']['Code'] == 'BucketAlreadyOwnedByYou':
            print(f'Bucket {bucket_name} already exists and is owned by you.')
        else:
            print(f'Error creating bucket: {e}')

def upload_file_to_bucket(bucket_name, file_name):
    s3 = boto3.client('s3', region_name='ap-southeast-2')
    try:
        # Upload the file to the bucket
        s3.upload_file(file_name, bucket_name, file_name)
        print(f'File {file_name} uploaded to bucket {bucket_name}.')
    except ClientError as e:
        print(f'Error uploading file: {e}')

# Define the bucket name and the file name
now = datetime.now()
bucket_name = f'movie-time-data-{now.year}-{now.month}-{now.day}' 
file_name = f'movies_today_{now.year}_{now.month}_{now.day}.csv'

# Create the bucket
create_bucket(bucket_name)

# Upload the file to the bucket
upload_file_to_bucket(bucket_name, file_name)

Bucket created: http://movie-time-data-2025-2-23.s3.amazonaws.com/
File movies_today_2025_2_23.csv uploaded to bucket movie-time-data-2025-2-23.


In [None]:
bedrock = boto3.client('bedrock-agent', region_name='ap-southeast-2')

knowledge_base_config = {
    "type": "VECTOR",
    "vectorKnowledgeBaseConfiguration": {
        "embeddingModelArn": "arn:aws:bedrock:ap-southeast-2::foundation-model/amazon.titan-embed-text-v2:0",
        'embeddingModelConfiguration': {
            'bedrockEmbeddingModelConfiguration': {
                'dimensions': 1024,
                # 'embeddingDataType': 'BINARY'
            }
        },

    },
}

storageConfiguration = {
    "type": "PINECONE",
    "pineconeConfiguration" : {
        "connectionString": pinecone_index_url,
        "credentialsSecretArn": pinecone_aws_arn,
        "fieldMapping": {
            "metadataField": "metadata",
            "textField": "text",
        }
    }
}
response = bedrock.create_knowledge_base(
    name=f"PineconKnowledgeBase{now.year}{now.month}{now.day}",
    description="Knowledge base using S3 and Pinecone",
    knowledgeBaseConfiguration=knowledge_base_config,
    roleArn="arn:aws:iam::116981794561:role/service-role/AmazonBedrockExecutionRoleForKnowledgeBase_xi4h1",
    storageConfiguration=storageConfiguration,
)
print(response)
knowledgeBase = response["knowledgeBase"]

In [None]:
create_ds_response = bedrock.create_data_source(
    name = f'movie-time-data-{now.year}-{now.month}-{now.day}',
    description = "movie time",
    knowledgeBaseId = knowledgeBase['knowledgeBaseId'],
    dataSourceConfiguration = {
        "type": "S3",
        "s3Configuration": {
            "bucketArn": f"arn:aws:s3:::movie-time-data-{now.year}-{now.month}-{now.day}"
        }
    },
)
ds = create_ds_response["dataSource"]

In [None]:
bedrock.get_data_source(knowledgeBaseId = knowledgeBase['knowledgeBaseId'], dataSourceId = ds["dataSourceId"])
start_job_response = bedrock.start_ingestion_job(knowledgeBaseId = knowledgeBase['knowledgeBaseId'], dataSourceId = ds["dataSourceId"])

In [37]:
# Initialize the Bedrock Agent Runtime client
bedrock_agent_runtime = boto3.client('bedrock-agent-runtime', region_name='ap-southeast-2')
model = "Mistral Large (24.02)"
model_arn = f'arn:aws:bedrock:ap-southeast-2::foundation-model/mistral.mistral-large-2402-v1:0'

In [38]:
def ask_bedrock_llm_with_knowledge_base(query: str, model_arn: str, kb_id: str) -> str:
    response = bedrock_agent_runtime.retrieve_and_generate(
        input={
            'text': query
        },
        retrieveAndGenerateConfiguration={
            'type': 'KNOWLEDGE_BASE',
            'knowledgeBaseConfiguration': {
                'knowledgeBaseId': kb_id,
                'modelArn': model_arn
            }
        },
    )

    return response

In [None]:
query = "can you recommend some movie for today?"


response = ask_bedrock_llm_with_knowledge_base(query, model_arn, knowledgeBase['knowledgeBaseId'])
generated_text = response['output']['text']
citations = response["citations"]
contexts = []
for citation in citations:
    retrievedReferences = citation["retrievedReferences"]
    for reference in retrievedReferences:
        contexts.append(reference["content"]["text"])
print(f"---------- Generated using {model}:")
print(generated_text )
print(f'---------- The citations for the response generated by {model}:')
print(contexts)
print()