In [1]:
%pip install unstructured[all-docs] transformers opensearch-py boto3


Collecting transformers
  Downloading transformers-4.41.2-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.8/43.8 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Collecting unstructured[all-docs]
  Downloading unstructured-0.14.6-py3-none-any.whl.metadata (28 kB)
Collecting chardet (from unstructured[all-docs])
  Downloading chardet-5.2.0-py3-none-any.whl.metadata (3.4 kB)
Collecting filetype (from unstructured[all-docs])
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting python-magic (from unstructured[all-docs])
  Downloading python_magic-0.4.27-py2.py3-none-any.whl.metadata (5.8 kB)
Collecting lxml (from unstructured[all-docs])
  Downloading lxml-5.2.2-cp312-cp312-manylinux_2_28_aarch64.whl.metadata (3.4 kB)
Collecting nltk (from unstructured[all-docs])
  Downloading nltk-3.8.1-py3-none-any.whl.metadata (2.8 kB)
Collecting tabulate (from unstructured[all-docs])
  Downloading tabulate-0.9.0-py3-none-any.whl

In [2]:
import boto3
import json

bedrock_runtime_client = boto3.client('bedrock-runtime',region_name='us-west-2')

summary_prompt = """You are an assistant tasked with summarizing tables and text. \
Give a concise summary of the table or text. Table or text chunk: {element} """

In [3]:
def invoke_model(input):
    response = bedrock_runtime_client.invoke_model(
        body=json.dumps({
            'inputText': input
        }),
        modelId="amazon.titan-embed-text-v1",
        accept="application/json",
        contentType="application/json",
    )
    response_body = json.loads(response.get("body").read())
    return response_body.get("embedding")


In [5]:
input=invoke_model(summary_prompt)

In [6]:
body=json.dumps({
            "prompt": "\n\nHuman: {input}\n\nAssistant:".format(input=input),
            "max_tokens_to_sample": 300,
            "temperature": 0.5,
            "top_k": 250,
            "top_p": 1,
            "stop_sequences": [
                "\n\nHuman:"
            ],
            # "anthropic_version": "bedrock-2023-05-31"
        })

In [7]:
print(body)

{"prompt": "\n\nHuman: [0.671875, -0.31054688, -0.011230469, 0.092285156, -0.18261719, -0.041992188, -0.08935547, 3.412366e-06, 0.35351562, -0.5234375, 0.092285156, 0.88671875, 0.16699219, 0.07714844, 0.4140625, -0.110839844, 0.09472656, 0.58984375, -0.87109375, 0.34960938, -0.34765625, -0.045410156, -0.123535156, -0.18554688, 0.5234375, -0.10107422, -0.015625, -0.59765625, -0.18066406, -0.15332031, 0.43359375, 0.18847656, 0.16210938, -0.69921875, 0.10888672, -0.20800781, 0.018066406, 0.40039062, 0.27539062, -0.59375, -0.024047852, 0.2734375, -0.111328125, -0.088378906, -0.26367188, 0.010009766, 0.019042969, -0.13671875, -0.5234375, 0.54296875, -0.14453125, 1.015625, 0.0012435913, 0.2265625, 0.46289062, 0.59765625, 0.3671875, -0.36132812, 0.22363281, 0.11035156, -0.0026550293, -0.265625, 0.49804688, -0.15332031, 0.20507812, 0.7265625, -0.049316406, -0.14160156, 0.17675781, 1.3671875, -0.328125, -0.25195312, 0.671875, -0.0055236816, 0.47265625, -0.071777344, -0.059814453, 0.24609375, 0.

In [8]:
from enum import Enum
class Models(Enum):
    # Enum for storing model identifiers for different AI models.
    # Each member represents a specific model hosted by various platforms like Anthropic and Meta.
    
    Sonnet = "anthropic.claude-3-sonnet-20240229-v1:0"  # Identifier for the 'Sonnet' model by Anthropic.
    Haiku = "anthropic.claude-3-haiku-20240307-v1:0"   # Identifier for the 'Haiku' model by Anthropic.
    Llama = "meta.llama3-8b-instruct-v1:0"            # Identifier for the 'Llama' model by Meta.
    Cohere = "cohere.command-r-plus-v1:0"      # Identifier for the 'Cohere' model.

In [9]:
#Function for caling the Bedrock Converse API...
def converse_with_tools(model: Models, messages, system=''):
    m=model.value
    response = bedrock_runtime_client.converse(
        modelId=m,
        messages=messages,
        inferenceConfig={
                "maxTokens": 2000,  # Maximum number of tokens that the model can generate.
                "temperature": 0.0  # Temperature controls the randomness of the response; 0.0 makes it deterministic.
            },
    )
    return response

In [12]:
# Initialize a list to keep track of the messages sent and received.
messages = []
input = "hello world"
# Add the initial user prompt to the messages list.
messages.append({
        "role": "user",
        "content": [{"text": input}]
    })

In [13]:
# Invoke the model the first time using the initial set of messages.
output = converse_with_tools(Models.Sonnet, messages, "")
print(output)

{'ResponseMetadata': {'RequestId': '6ebbdb57-8af6-40bf-9300-37075b753155', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Thu, 20 Jun 2024 04:09:19 GMT', 'content-type': 'application/json', 'content-length': '255', 'connection': 'keep-alive', 'x-amzn-requestid': '6ebbdb57-8af6-40bf-9300-37075b753155'}, 'RetryAttempts': 0}, 'output': {'message': {'role': 'assistant', 'content': [{'text': "Hello! I'm an AI assistant created by Anthropic. How can I help you today?"}]}}, 'stopReason': 'end_turn', 'usage': {'inputTokens': 9, 'outputTokens': 23, 'totalTokens': 32}, 'metrics': {'latencyMs': 1279}}


In [18]:
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth
host = 'erz3fznnf3antezuu2bc.us-west-2.aoss.amazonaws.com'
region = 'us-west-2'
service = 'aoss'
index = 'bedrock-knowledge-base-default-index'
credentials = boto3.Session().get_credentials()
auth = AWSV4SignerAuth(credentials, region, service)

ospy_client = OpenSearch(
    hosts = [{'host': host, 'port': 443}],
    http_auth = auth,
    use_ssl = True,
    verify_certs = True,
    connection_class = RequestsHttpConnection,
    pool_maxsize = 20
)

In [75]:
question = 'What can are the characteristics of bull dog breed? Summarize in one line'
embedding = invoke_model(question)
k = 4 # number of neighbours, size and k are the same to return k results in total. If size is not specified, k results will be returned per shard.
query = {
  "size": 7,
  "query": {
    "knn": {
      "bedrock-knowledge-base-default-vector": {
        "vector": embedding,
        "k": 2
      }
    }
  }
}

print(json.dumps(query))

response = ospy_client.search(
    body = query,
    index = index
)

hits = response['hits']['hits']
context = []
for hit in hits:
    #print(json.dumps(hit["_source"]["AMAZON_BEDROCK_TEXT_CHUNK"], indent=4))
    context.append(hit["_source"]["AMAZON_BEDROCK_TEXT_CHUNK"])
    context.append(hit["_source"]["AMAZON_BEDROCK_METADATA"])

print(context)

{"size": 7, "query": {"knn": {"bedrock-knowledge-base-default-vector": {"vector": [0.27734375, -0.6640625, -0.74609375, -0.09765625, -0.012084961, 0.15722656, 0.41601562, -0.0004005432, -0.22070312, 0.028198242, -0.01574707, -0.041992188, -0.28320312, -0.05493164, -0.20019531, -0.37109375, 0.19433594, -0.24414062, -0.49023438, 0.3125, 0.034179688, -0.07128906, 0.49023438, 0.2109375, 0.44921875, -0.47070312, 0.39453125, 0.053710938, -0.05859375, -0.060791016, 0.35351562, 0.33398438, 0.083984375, -0.84765625, 0.421875, -1.21875, -0.06347656, -0.19726562, 0.41210938, -1.015625, 0.28320312, 0.59375, 0.21582031, 0.20996094, 0.50390625, -0.48046875, 0.1875, 0.20410156, -0.21386719, -0.2578125, -0.15429688, -0.19042969, -0.30273438, -0.23535156, -0.18652344, -0.49414062, 0.013671875, -0.24902344, -0.66796875, -0.42773438, -0.07470703, -0.21386719, 0.55859375, -0.020507812, 0.4140625, -0.5625, 0.70703125, -0.20019531, 0.107421875, 0.17773438, -0.50390625, -1.375, 0.80078125, 0.033935547, -0.08

In [76]:
#Function for caling the Bedrock Converse API...
def converse_with_tools(model: Models, messages):
    m=model.value
    response = bedrock_runtime_client.converse(
        modelId=m,
        messages=messages,
        inferenceConfig={
                "maxTokens": 2000,  # Maximum number of tokens that the model can generate.
                "temperature": 0.0  # Temperature controls the randomness of the response; 0.0 makes it deterministic.
            }
    )
    return response

In [77]:
prompt_template = """
    The following is a friendly conversation between a human and an AI. 
    The AI is talkative and provides lots of specific details from its context.
    If the AI does not know the answer to a question, it truthfully says it 
    does not know.
    {context}
    Instruction: Based on the above documents, provide a detailed answer including document source for, {question} Answer "don't know" 
    if not present in the document. 
    Solution:"""


llm_prompt = prompt_template.format(context='\n'.join(context),question=question)

    # Initialize a list to keep track of the messages sent and received.
messages = []

    # Add the initial user prompt to the messages list.
messages.append({
        "role": "user",
        "content": [{"text": llm_prompt}]
    })

#print (messages)
output = converse_with_tools(Models.Sonnet,messages)
#print(output)
print(output['output']['message']['content'][0]['text'])

According to the document "Dog-breed-book-low-resolution.pdf", the English Bulldog breed has the following characteristics:

"The English Bulldog is a friendly and patient dog known for getting along well with children, other dogs, and other types of pets. These dogs are muscular and heavy and some find their appearance intimidating, but they're normally absolute sweethearts and amongst the gentlest of all dog breeds. Bulldogs are calm dogs that like to relax in a comfortable chair and watch TV with their humans. They need a walk every day, but they're far from the explosive bundles of energy characteristic for many other dog breeds."

In one line: The English Bulldog is a friendly, patient, calm breed that gets along well with children and other pets despite its muscular, intimidating appearance.
