# Amazon Bedrock Service Tier Performance Comparison

This notebook compares the performance of Amazon Bedrock's Priority, Standard (default), and Flex tiers for a given model.

In [None]:
import boto3
import json
import time
import pandas as pd
from datetime import datetime
from typing import Dict, List
import numpy as np

In [None]:
bedrock = boto3.client('bedrock-runtime', region_name='us-west-2')

NOVA_MODEL = 'us.amazon.nova-pro-v1:0'
MODEL_ID = 'moonshot.kimi-k2-thinking'
TIERS = ['priority', 'default', 'flex']

## Generate Test Prompts

Using Amazon Nova Pro to generate prompts of varying lengths.

In [None]:
def generate_prompts(count: int = 60) -> List[str]:
    prompt = f"""Generate {count} diverse prompts of varying lengths for testing an AI model. 
    Include:
    - 20 short prompts (10-30 words)
    - 20 medium prompts (50-150 words)
    - 20 long prompts (200-400 words)
    
    Topics should vary: technical questions, creative writing, analysis, coding, explanations.
    Return ONLY a JSON array of strings, no other text."""
    
    response = bedrock.converse(
        modelId=NOVA_MODEL,
        messages=[{"role": "user", "content": [{"text": prompt}]}]
    )
    
    text = response['output']['message']['content'][0]['text']
    return json.loads(text)

prompts = generate_prompts()
print(f"Generated {len(prompts)} prompts")
print(f"Sample prompt lengths: {[len(p.split()) for p in prompts[:5]]} words")

## Test Function

Sends prompts to each tier and collects performance metrics.

In [None]:
def test_tier(prompt: str, tier: str) -> Dict:
    start = time.time()
    
    response = bedrock.converse_stream(
        modelId=MODEL_ID,
        messages=[{"role": "user", "content": [{"text": prompt}]}],
        serviceTier={"type": tier}
    )
    
    first_token_time = None
    tokens = 0
    
    for event in response['stream']:
        if 'contentBlockDelta' in event:
            if first_token_time is None:
                first_token_time = time.time()
            tokens += 1
        elif 'metadata' in event:
            usage = event['metadata'].get('usage', {})
            input_tokens = usage.get('inputTokens', 0)
            output_tokens = usage.get('outputTokens', 0)
    
    end = time.time()
    
    return {
        'tier': tier,
        'input_tokens': input_tokens,
        'output_tokens': output_tokens,
        'time_to_first_token': first_token_time - start if first_token_time else None,
        'time_to_last_token': end - start,
        'throughput': output_tokens / (end - start) if end > start else 0
    }

## Run Tests

Testing all prompts across all tiers.

In [None]:
results = []

for i, prompt in enumerate(prompts):
    print(f"Testing prompt {i+1}/{len(prompts)}...")
    for tier in TIERS:
        try:
            result = test_tier(prompt, tier)
            result['prompt_id'] = i
            result['prompt_length'] = len(prompt.split())
            results.append(result)
            time.sleep(0.5)
        except Exception as e:
            print(f"Error with tier {tier}, prompt {i}: {e}")

df = pd.DataFrame(results)
print(f"\nCompleted {len(results)} tests")

## Performance Analysis

In [None]:
summary = df.groupby('tier').agg({
    'time_to_first_token': ['mean', 'median', 'std', 'min', 'max'],
    'time_to_last_token': ['mean', 'median', 'std', 'min', 'max'],
    'throughput': ['mean', 'median', 'std'],
    'input_tokens': 'mean',
    'output_tokens': 'mean'
}).round(4)

summary.columns = ['_'.join(col).strip() for col in summary.columns.values]
summary = summary.rename(columns={
    'time_to_first_token_mean': 'TTFT_mean (s)',
    'time_to_first_token_median': 'TTFT_median (s)',
    'time_to_first_token_std': 'TTFT_std (s)',
    'time_to_first_token_min': 'TTFT_min (s)',
    'time_to_first_token_max': 'TTFT_max (s)',
    'time_to_last_token_mean': 'TTLT_mean (s)',
    'time_to_last_token_median': 'TTLT_median (s)',
    'time_to_last_token_std': 'TTLT_std (s)',
    'time_to_last_token_min': 'TTLT_min (s)',
    'time_to_last_token_max': 'TTLT_max (s)',
    'throughput_mean': 'Throughput_mean (tok/s)',
    'throughput_median': 'Throughput_median (tok/s)',
    'throughput_std': 'Throughput_std (tok/s)',
    'input_tokens_mean': 'Avg_input_tokens',
    'output_tokens_mean': 'Avg_output_tokens'
})

print("\n=== PERFORMANCE COMPARISON BY TIER ===")
print(summary.to_string())

In [None]:
print("\n=== KEY METRICS COMPARISON ===")
key_metrics = df.groupby('tier')[['time_to_first_token', 'time_to_last_token', 'throughput']].mean().round(4)
key_metrics.columns = ['Avg TTFT (s)', 'Avg TTLT (s)', 'Avg Throughput (tok/s)']
print(key_metrics.to_string())

## Performance by Input Size

In [None]:
df['size_category'] = pd.cut(df['prompt_length'], bins=[0, 50, 150, 500], labels=['Short', 'Medium', 'Long'])

size_analysis = df.groupby(['tier', 'size_category']).agg({
    'time_to_first_token': 'mean',
    'throughput': 'mean'
}).round(4)

print("\n=== PERFORMANCE BY INPUT SIZE ===")
print(size_analysis.to_string())

## Run this if you want to save the results in a CSV

In [None]:
df.to_csv('bedrock_tier_results.csv', index=False)
print("\nResults saved to bedrock_tier_results.csv")