# Amazon Bedrock

導入庫

In [None]:
# 導入庫
import boto3
# 建立 AWS 客戶端
bedrock_client = boto3.client(
    service_name='bedrock-agent-runtime'
)

session = boto3.session.Session()
region = session.region_name

調用 API 的函數 `retrieve_and_generate`

In [None]:
def retrieveAndGenerate(
    input_text,
    sourceType,
    model_id,
    region,
    document_s3_uri=None,
    data=None,
    identifier=None
):
    # URI 
    model_arn = f'arn:aws:bedrock:{region}::foundation-model/{model_id}'
    # 假如是 S3
    if sourceType == "S3":
        # 透過客戶端調用 retrieve_and_generate 函數
        return bedrock_client.retrieve_and_generate(
            input={'text': input_text},
            retrieveAndGenerateConfiguration={
                'type': 'EXTERNAL_SOURCES',
                'externalSourcesConfiguration': {
                    'modelArn': model_arn,
                    'sources': [
                        {
                            "sourceType": sourceType,
                            "s3Location": {
                                "uri": document_s3_uri  
                            }
                        }
                    ]
                }
            }
        )
    else:
        return bedrock_client.retrieve_and_generate(
            input={'text': input_text},
            retrieveAndGenerateConfiguration={
                'type': 'EXTERNAL_SOURCES',
                'externalSourcesConfiguration': {
                    'modelArn': model_arn,
                    'sources': [
                        {
                            "sourceType": sourceType,
                            "byteContent": {
                                "identifier": identifier,
                                "contentType": "text/plain",
                                "data": data  
                            }
                        }
                    ]
                }
            }
        )

跟文件對話

In [None]:
# 模型 ID
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"
# 替換自己的 S3 URI
# document_uri = "s3://data-dump-2024/FAQ_Zappos.com.pdf"
document_uri = "論文01.pdf"

In [None]:
def my_chatbot(question):
    response = retrieveAndGenerate(
        input_text=question,
        sourceType="S3",
        model_id=model_id,
        region=region,
        document_s3_uri=document_uri,
    )
    return response

提問

In [None]:
# my_question = "你們的退費政策是什麼？"
my_question = "論文的作者及考官有哪幾位？"
response = my_chatbot(question=my_question)
print(response['output']['text'])

提問

In [None]:
my_question =  "我需要多長時間才能收到退款？"
response = my_chatbot(question=my_question)
print(response['output']['text'])

查詢版本

In [None]:
boto3.__version__

# 改為本地文件

In [None]:
%pip install PyPDF2

In [3]:
import boto3
from PyPDF2 import PdfReader
import base64
import os
from dotenv import load_dotenv

# 環境變數
load_dotenv()

bedrock_client = boto3.client(service_name='bedrock-agent-runtime')

session = boto3.session.Session()
region = session.region_name

def retrieveAndGenerate(
    input_text,
    sourceType,
    model_id,
    region,
    document_content=None,
    identifier=None
):
    model_arn = f'arn:aws:bedrock:{region}::foundation-model/{model_id}'

    if sourceType == "S3":
        return bedrock_client.retrieve_and_generate(
            input={'text': input_text},
            retrieveAndGenerateConfiguration={
                'type': 'EXTERNAL_SOURCES',
                'externalSourcesConfiguration': {
                    'modelArn': model_arn,
                    'sources': [
                        {
                            "sourceType": sourceType,
                            "s3Location": {
                                "uri": document_content  
                            }
                        }
                    ]
                }
            }
        )
    else:
        return bedrock_client.retrieve_and_generate(
            input={'text': input_text},
            retrieveAndGenerateConfiguration={
                'type': 'EXTERNAL_SOURCES',
                'externalSourcesConfiguration': {
                    'modelArn': model_arn,
                    'sources': [
                        {
                            "sourceType": sourceType,
                            "byteContent": {
                                "identifier": identifier,
                                "contentType": "application/pdf",
                                "data": document_content  
                            }
                        }
                    ]
                }
            }
        )

def read_pdf_as_base64(file_path, max_pages=5):
    pdf_text = ""
    with open(file_path, "rb") as pdf_file:
        pdf_reader = PdfReader(pdf_file)
        for page in range(min(max_pages, len(pdf_reader.pages))):
            pdf_text += pdf_reader.pages[page].extract_text()
        return base64.b64encode(pdf_text.encode('utf-8')).decode('utf-8')

model_id = "anthropic.claude-3-sonnet-20240229-v1:0"
document_path = "論文01.pdf"
pdf_content_base64 = read_pdf_as_base64(document_path)
print(pdf_content_base64)

def my_chatbot(question):
    response = retrieveAndGenerate(
        input_text=question,
        sourceType="BYTE_CONTENT",
        model_id=model_id,
        region=region,
        document_content=pdf_content_base64,
        identifier=document_path
    )
    return response

# 提問一
my_question = "主要的作者是誰？"
response = my_chatbot(question=my_question)
print(response['output']['text'], '\n\n')

# 提問二
my_question =  "考試官有哪幾位？"
response = my_chatbot(question=my_question)
print(response['output']['text'])


IArlnIvnq4voh7rngaPluKvnr4TlpKflrbjmlZnogrLlrbjpmaLlnJbmm7jos4foqIrlrbjnoJTnqbbmiYAgIArlnJbmm7jos4foqIrlrbjmlbjkvY3lrbjnv5Lnoqnlo6vlnKjogbflsIjnj60gIArnoqnlo6voq5bmlocgIApHcmFkdWF0ZSBJbnN0aXR1dGUgb2YgTGlicmFyeSBhbmQgSW5mb3JtYXRpb24gU3R1ZGllcyAgCk9ubGluZSBDb250aW51aW5nIEVkdWNhdGlvbiBNYXN0ZXIncyBQcm9ncmFtIG9mIExpYnJhcnkgYW5kIEluZm9ybWF0aW9uIFN0dWRpZXMgIApDb2xsZWdlIG9mIEVkdWNhdGlvbiAgCk5hdGlvbmFsIFRhaXdhbiBOb3JtYWwgVW5pdmVyc2l0eSAgCk1hc3RlcuKAmXMgVGhlc2lzICAKICAK5Lul57WQ5qeL5pa556iL5qihIOWei+aOoueptuWci+Wwj+aVmeW4q+W3peS9nOWjk+WKm+WSjOWFtue3muS4iuaVmeWtuOihjOeCugrnnIvms5XnmoTlt67nlbDmgKcgIApQcm9iaW5nIGluIC1zZXJ2aWNlIGVsZW1lbnRhcnkgc2Nob29sIHRlYWNoZXJz4oCZIHBlcmNlcHRpb25zIG9mIAp3b3JrIHN0cmVzcyBhbmQgdGhlaXIgb25saW5lIHRlYWNoaW5nOiAgQSBzdHVkeSBvZiB0aGVpciBzdHJ1Y3R1cmFsIAptb2RlbHMgYW5kIHRlYWNoZXJz4oCZIGRlbW9ncmFwaGljcyAgCiAK5rKI5ZCb5YSSICAKU2hlbiwgQ2h1biAtSnUgCiAK5oyH5bCO5pWZ5o6IIO+8muaigeiHs+S4rSAg5Y2a5aOrIApBZHZpc29y77yaTGlhbmcsIEp5aCAtQ2hvbmcsIFBoLkQuICAKIArkuK3oj6/msJHlnIsgIDExMiDlubQgNyDm