In [None]:
import IPython

app = IPython.Application.instance()

app.kernel.do_shutdown(True)

{'status': 'ok', 'restart': True}

In [1]:
from pinecone import PodSpec, ServerlessSpec
from pinecone.grpc import PineconeGRPC as Pinecone

# Set API Key
PINECONE_API_KEY = ""  # @param {type:"string"}

pc = Pinecone(api_key=PINECONE_API_KEY)

In [None]:
# Index Configs
INDEX_NAME = "movie-time-hk"  # @param {type:"string"}

# Choose a distance metric
DISTANCE_METRIC = (
    "cosine"  # @param ["cosine", "euclidean", "dotproduct"] {allow-input: true}
)

EMBEDDING_DIMENSION_SIZE = 1024  # @param {"type":"number","placeholder":"768"}

CLOUD_PROVIDER = "aws"  # @param ["gcp", "aws", "azure"] {allow-input: true}

CLOUD_REGION = "us-east-1"  # @param {type:"string"}


# Create the index
pc.create_index(
    name=INDEX_NAME,
    dimension=EMBEDDING_DIMENSION_SIZE,
    metric=DISTANCE_METRIC,
    spec=ServerlessSpec(cloud=CLOUD_PROVIDER, region=CLOUD_REGION),
    deletion_protection="disabled",
)

In [1]:
!pip3 install -qU boto3


[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
import boto3
import sys
print('Running boto3 version:', boto3.__version__)
boto3.DEFAULT_SESSION = boto3.Session()

Running boto3 version: 1.36.21


In [None]:
import json
import os

pinecone_secret_filename = ""
def get_or_create_secret(filename, secret_name, secret_value):
    if os.path.exists(filename):
        print(f'File {filename} already exists. Reading secret ARN from the file.')
        with open(filename, 'r') as file:
            secret_arn = file.read().strip()
            print(f'Secret ARN: {secret_arn}')
            return secret_arn  # Return the existing secret ARN

    # If the file does not exist, create the secret
    print(f'File {filename} does not exist. Creating secret.')
    secret_arn = create_secret(secret_name, secret_value)

    if secret_arn:
        # Write the secret ARN to the file
        with open(filename, 'w') as file:
            file.write(secret_arn)
            print(f'Secret ARN saved to {filename}.')

    return secret_arn

def create_secret(secret_name, secret_value):
    # Create a Secrets Manager client
    client = boto3.client('secretsmanager', region_name='ap-southeast-2')

    try:
        # Create the secret
        response = client.create_secret(
            Name=secret_name,
            SecretString=json.dumps(secret_value)
        )
        print(f'Secret created: {response["ARN"]}')
        return response["ARN"]
    except Exception as e:
        if e.response['Error']['Code'] == 'ResourceExistsException':
            print(f'Secret {secret_name} already exists.')
        else:
            print(f'Error creating secret: {e}')

# Define the secret name and value
secret_name = ''
secret_value = {
    'apiKey':   # Replace with your actual API key
}

# Create and save the secret
get_or_create_secret(pinecone_secret_filename, secret_name, secret_value)

In [None]:
from botocore.exceptions import ClientError

def create_bucket(bucket_name):
    s3 = boto3.client('s3', region_name='ap-southeast-2')
    try:
        # Create the S3 bucket
        response = s3.create_bucket(
            Bucket=bucket_name,
            CreateBucketConfiguration={
                'LocationConstraint': 'ap-southeast-2'
            }
        )
        print(f'Bucket created: {response["Location"]}')
        return response["Location"]
    except ClientError as e:
        if e.response['Error']['Code'] == 'BucketAlreadyOwnedByYou':
            print(f'Bucket {bucket_name} already exists and is owned by you.')
        else:
            print(f'Error creating bucket: {e}')

def upload_file_to_bucket(bucket_name, file_name):
    s3 = boto3.client('s3', region_name='ap-southeast-2')
    try:
        # Upload the file to the bucket
        s3.upload_file(file_name, bucket_name, file_name)
        print(f'File {file_name} uploaded to bucket {bucket_name}.')
    except ClientError as e:
        print(f'Error uploading file: {e}')

# Define the bucket name and the file name
bucket_name = 'movie-time-data' 
file_name = 'movies_today_2025_2_16.csv'

# Create the bucket
create_bucket(bucket_name)

# Upload the file to the bucket
upload_file_to_bucket(bucket_name, file_name)

In [None]:
bedrock = boto3.client('bedrock-agent', region_name='ap-southeast-2')

knowledge_base_config = {
    "type": "VECTOR",
    "vectorKnowledgeBaseConfiguration": {
        "embeddingModelArn": "",
        'embeddingModelConfiguration': {
            'bedrockEmbeddingModelConfiguration': {
                'dimensions': 1024,
            }
        },

    },
}

storageConfiguration = {
    "type": "PINECONE",
    "pineconeConfiguration" : {
        "connectionString": "",
        "credentialsSecretArn": "",
        "fieldMapping": {
            "metadataField": "metadata",
            "textField": "text",
        }
    }
}
response = bedrock.create_knowledge_base(
    name="PineconeKnowledgeBase",
    description="Knowledge base using S3 and Pinecone",
    knowledgeBaseConfiguration=knowledge_base_config,
    roleArn="",
    storageConfiguration=storageConfiguration,
)
print(response)
knowledgeBase = response["knowledgeBase"]

In [None]:
create_ds_response = bedrock.create_data_source(
    name = "pineconeMoviedataSource",
    description = "data for movie time",
    knowledgeBaseId = knowledgeBase['knowledgeBaseId'],
    dataSourceConfiguration = {
        "type": "S3",
        "s3Configuration": {
            "bucketArn": ""
        }
    },
)
ds = create_ds_response["dataSource"]

In [None]:
bedrock.get_data_source(knowledgeBaseId = knowledgeBase['knowledgeBaseId'], dataSourceId = ds["dataSourceId"])

In [35]:
start_job_response = bedrock.start_ingestion_job(knowledgeBaseId = knowledgeBase['knowledgeBaseId'], dataSourceId = ds["dataSourceId"])

In [36]:
job = start_job_response["ingestionJob"]
print(job)

{'dataSourceId': 'QFF9EIV7MG', 'ingestionJobId': 'XIAPK6AHFT', 'knowledgeBaseId': 'SXXNMXKPPL', 'startedAt': datetime.datetime(2025, 2, 16, 9, 12, 0, 537551, tzinfo=tzutc()), 'statistics': {'numberOfDocumentsDeleted': 0, 'numberOfDocumentsFailed': 0, 'numberOfDocumentsScanned': 0, 'numberOfMetadataDocumentsModified': 0, 'numberOfMetadataDocumentsScanned': 0, 'numberOfModifiedDocumentsIndexed': 0, 'numberOfNewDocumentsIndexed': 0}, 'status': 'STARTING', 'updatedAt': datetime.datetime(2025, 2, 16, 9, 12, 0, 537551, tzinfo=tzutc())}


In [64]:
response = boto3.client('bedrock', region_name='ap-southeast-2').list_foundation_models()
print(response)

{'ResponseMetadata': {'RequestId': 'cbf8b501-7ca2-4279-bd0e-b300f7483387', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Sun, 16 Feb 2025 09:44:15 GMT', 'content-type': 'application/json', 'content-length': '11011', 'connection': 'keep-alive', 'x-amzn-requestid': 'cbf8b501-7ca2-4279-bd0e-b300f7483387'}, 'RetryAttempts': 0}, 'modelSummaries': [{'modelArn': 'arn:aws:bedrock:ap-southeast-2::foundation-model/amazon.titan-text-lite-v1:0:4k', 'modelId': 'amazon.titan-text-lite-v1:0:4k', 'modelName': 'Titan Text G1 - Lite', 'providerName': 'Amazon', 'inputModalities': ['TEXT'], 'outputModalities': ['TEXT'], 'responseStreamingSupported': True, 'customizationsSupported': [], 'inferenceTypesSupported': ['PROVISIONED'], 'modelLifecycle': {'status': 'ACTIVE'}}, {'modelArn': 'arn:aws:bedrock:ap-southeast-2::foundation-model/amazon.titan-text-lite-v1', 'modelId': 'amazon.titan-text-lite-v1', 'modelName': 'Titan Text G1 - Lite', 'providerName': 'Amazon', 'inputModalities': ['TEXT'], 'outputModaliti

In [65]:
# Initialize the Bedrock Agent Runtime client
bedrock_agent_runtime = boto3.client('bedrock-agent-runtime', region_name='ap-southeast-2')
model = "Mistral Large (24.02)"
model_arn = f'arn:aws:bedrock:ap-southeast-2::foundation-model/mistral.mistral-large-2402-v1:0'

In [57]:
def ask_bedrock_llm_with_knowledge_base(query: str, model_arn: str, kb_id: str) -> str:
    response = bedrock_agent_runtime.retrieve_and_generate(
        input={
            'text': query
        },
        retrieveAndGenerateConfiguration={
            'type': 'KNOWLEDGE_BASE',
            'knowledgeBaseConfiguration': {
                'knowledgeBaseId': kb_id,
                'modelArn': model_arn
            }
        },
    )

    return response

In [67]:
query = "can you recommend movies showing today?"


response = ask_bedrock_llm_with_knowledge_base(query, model_arn, knowledgeBase['knowledgeBaseId'])
generated_text = response['output']['text']
citations = response["citations"]
contexts = []
for citation in citations:
    retrievedReferences = citation["retrievedReferences"]
    for reference in retrievedReferences:
        contexts.append(reference["content"]["text"])
print(f"---------- Generated using {model}:")
print(generated_text )
print(f'---------- The citations for the response generated by {model}:')
print(contexts)
print()

---------- Generated using Mistral Large (24.02):
Here are some movies showing today: "Farewell To My Concubine (4K Restored Version)", "Bridget Jones: Mad About the Boy", "The Motive and the Cue (NT Live 2025)", "THERE'S STILL TOMORROW", "K-Family Affairs", "Moana 2", and "Maria".
---------- The citations for the response generated by Mistral Large (24.02):
['0, 761017), \'threatre\': \'百老匯電影中心\'}]"\r Farewell To My Concubine (4K Restored Version),/movie/e7dfd3d3-4c84-4348-8e41-2e94d26a9105/Farewell_To_My_Concubine_(4K_Restored_Version),"[{\'date\': datetime.datetime(2025, 2, 16, 14, 0, 0, 761017), \'threatre\': \'K11 Art House\'}]"\r Bridget Jones: Mad About the Boy,/movie/0b5d270b-748c-4f18-aa13-6aef90c06c74/Bridget_Jones:_Mad_About_the_Boy,"[{\'date\': datetime.datetime(2025, 2, 16, 0, 20, 0, 761017), \'threatre\': \'巴黎倫敦紐約米蘭戲院\'}]"\r The Motive and the Cue (NT Live 2025),/movie/cd41412e-7cb7-416f-93b3-0dfa83af1d74/The_Motive_and_the_Cue_(NT_Live_2025),"[{\'date\': datetime.datetim