# Amazon Bedrock - vision - Latency Benchmark Tool
This notebook contains a set of tools to benchmark inference latency for Foundation Models available in Amazon Bedrock. 

In [1]:
!pip install -qU pip
!pip install -qU --upgrade boto3 awscli matplotlib numpy pandas nbdime anthropic openai

### Claude 3 sonnet vs GPT-4-Vision  

In [2]:
!wget https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg -O 2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg

--2024-03-14 01:44:26--  https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg
Resolving upload.wikimedia.org (upload.wikimedia.org)... 208.80.154.240, 2620:0:861:ed1a::2:b
Connecting to upload.wikimedia.org (upload.wikimedia.org)|208.80.154.240|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1063892 (1.0M) [image/jpeg]
Saving to: ‘2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg’


2024-03-14 01:44:26 (7.66 MB/s) - ‘2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg’ saved [1063892/1063892]



In [3]:
from PIL import Image 
 
# Opens a image in RGB mode 
im = Image.open(r"2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg") 
 
# Size of the image in pixels (size of original image) 
# (This is not mandatory) 
width, height = im.size 
 
# # Setting the points for cropped image 
# left = 6
# top = height / 4
# right = 174
# bottom = 3 * height / 4
 
# # Cropped image of above dimension 
# # (It will not change original image) 
# im1 = im.crop((left, top, right, bottom))
resize_list = [(512, 512),(1024, 1024),(2048, 4096),(4096, 8000),(8000, 8000)]
for newsize in resize_list:
    #print(newsize[1])
    im1 = im.resize(newsize)
    im1.save(f"{newsize[0]}x{newsize[1]}.jpg")
    #im1.show() 

In [4]:
import base64
import requests
import json
import boto3
import time
from utils.key import OPENAI_API_KEY #请添加/修改/utils/key文件中OPENAI_API_KEY为您的可用的有效的key

stream = False

# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

In [5]:
# "model": "gpt-4-vision-preview",
def gpt_4_vision_preview(image_path,max_tokens):

    # Path to your image
    #image_path = image_path

    # Getting the base64 string
    base64_image = encode_image(image_path)

    headers = {
      "Content-Type": "application/json",
      "Authorization": f"Bearer {OPENAI_API_KEY}"
    }

    payload = {
      "model": "gpt-4-vision-preview",
      "messages": [
        {
          "role": "user",
          "content": [
            {
              "type": "text",
              "text": f"What’s in this image? output {max_tokens} tokens"
            },
            {
              "type": "image_url",
              "image_url": {
                "url": f"data:image/jpeg;base64,{base64_image}"
              }
            }
          ]
        }
      ],
      "max_tokens": max_tokens
    }
    start = time.time()
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    end = time.time()
    #print(response.json())
    
    return round(end-start,2),response.json()["usage"]["prompt_tokens"],response.json()["usage"]["completion_tokens"]


In [6]:
#"modelId": "anthropic.claude-3-sonnet-20240229-v1:0"
# Create a BedrockRuntime client
def anthropic_claude_3(modelId,image_path,max_tokens):
    bedrock_runtime = boto3.client('bedrock-runtime')
    base64_image = encode_image(image_path)
    payload = {
        "modelId": modelId,
        "contentType": "application/json",
        "accept": "application/json",
        "body": {
            "anthropic_version": "bedrock-2023-05-31",
            "max_tokens": max_tokens,
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": "image/png",
                                "data": base64_image
                            }
                        },
                        {
                            "type": "text",
                            #"text": "Write me a detailed description of these two photos, and then a poem talking about it."
                            "text": f"What’s in this image? output {max_tokens} tokens"
                        }
                    ]
                }
            ]
        }
    }

    # Convert the payload to bytes
    body_bytes = json.dumps(payload['body']).encode('utf-8')

    # Invoke the model
    response = bedrock_runtime.invoke_model(
        body=body_bytes,
        contentType=payload['contentType'],
        accept=payload['accept'],
        modelId=payload['modelId']
    )

    # Process the response
    response_body = json.loads(response['body'].read().decode('utf-8'))
    #print(response_body)
    #return round(end-start,2),response_body["usage"]["input_tokens"],response_body["usage"]["output_tokens"]
    return round(float(response['ResponseMetadata']['HTTPHeaders']['x-amzn-bedrock-invocation-latency'])/1000,2),int(response['ResponseMetadata']['HTTPHeaders']['x-amzn-bedrock-input-token-count']),int(response['ResponseMetadata']['HTTPHeaders']['x-amzn-bedrock-output-token-count'])


In [7]:
#"modelId": "anthropic.claude-3-sonnet-20240229-v1:0"
# Create a BedrockRuntime client
def anthropic_claude_3_stream(modelId,image_path,max_tokens):
    bedrock_runtime = boto3.client('bedrock-runtime')
    base64_image = encode_image(image_path)
    payload = {
        "modelId": modelId,
        "contentType": "application/json",
        "accept": "application/json",
        "body": {
            "anthropic_version": "bedrock-2023-05-31",
            "max_tokens": max_tokens,
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": "image/png",
                                "data": base64_image
                            }
                        },
                        {
                            "type": "text",
                            #"text": "Write me a detailed description of these two photos, and then a poem talking about it."
                            "text": f"What’s in this image? output {max_tokens} tokens"
                        }
                    ]
                }
            ]
        }
    }

    # Convert the payload to bytes
    body_bytes = json.dumps(payload['body']).encode('utf-8')

    # Invoke the model
    response = bedrock_runtime.invoke_model_with_response_stream(
        body=body_bytes, modelId=payload['modelId'], accept=payload['accept'], contentType=payload['contentType']
    )
    stream = response.get('body')
    chunk_obj = {}
   
    if stream:
        for event in stream:
            chunk = event.get('chunk')
            if chunk:
                chunk_obj = json.loads(chunk.get('bytes').decode())
                #print(chunk_obj)

    # Process the response
    #response_body = json.loads(response['body'].read().decode('utf-8'))
    #print(response_body)
    {'type': 'message_stop', 'amazon-bedrock-invocationMetrics': {'inputTokenCount': 92, 'outputTokenCount': 277, 'invocationLatency': 3679, 'firstByteLatency': 677}}
    
    return round(float(chunk_obj['amazon-bedrock-invocationMetrics']['firstByteLatency'])/1000,2),round(float(chunk_obj['amazon-bedrock-invocationMetrics']['invocationLatency'])/1000,2),chunk_obj['amazon-bedrock-invocationMetrics']['inputTokenCount'],chunk_obj['amazon-bedrock-invocationMetrics']['outputTokenCount']

In [8]:
max_tokens = 200
modelId = "anthropic.claude-3-haiku-20240307-v1:0"
for newsize in resize_list:
    image_path = f"{newsize[0]}x{newsize[1]}.jpg"
    print(f"-----------{image_path}---------")
    print(gpt_4_vision_preview(image_path,max_tokens))
    
    print(anthropic_claude_3(modelId,image_path,max_tokens))
    print(anthropic_claude_3_stream(modelId,image_path,max_tokens))
    
    modelId = "anthropic.claude-3-sonnet-20240229-v1:0"
    print(anthropic_claude_3(modelId,image_path,max_tokens))
    print(anthropic_claude_3_stream(modelId,image_path,max_tokens))
    #print(f"-----------end {image_path}---------")

-----------512x512.jpg---------
(11.2, 272, 163)
(1.59, 458, 119)
(0.61, 1.6, 458, 103)
(4.85, 412, 200)
(0.51, 5.29, 412, 200)
-----------1024x1024.jpg---------
(14.52, 782, 175)
(7.21, 1420, 200)
(1.67, 7.0, 1420, 200)
(8.03, 1420, 200)
(1.24, 6.83, 1420, 200)
-----------2048x4096.jpg---------
(10.14, 1122, 149)
(10.86, 1619, 192)
(2.01, 8.73, 1619, 200)
(10.26, 1619, 200)
(2.5, 9.21, 1619, 200)
-----------4096x8000.jpg---------
(16.68, 1122, 200)
(10.12, 1591, 200)
(3.27, 10.52, 1591, 179)
(10.03, 1591, 200)
(3.09, 9.61, 1591, 200)
-----------8000x8000.jpg---------
(19.16, 782, 200)


ValidationException: An error occurred (ValidationException) when calling the InvokeModel operation: messages.0.content.0.image.source.base64: image exceeds 5 MB maximum: 6634428 bytes > 5242880 bytes

# Done