In [None]:
import boto3
import math
import json
import re

# Constants per job
RAM_PER_JOB_GIB = 40
VCPU_PER_JOB = 1
S3_DATA_PER_JOB_GIB = 30
COMPUTE_TIME_SEC = 90  # 1.5 minutes
BANDWIDTH_EFFICIENCY = 0.5  # Assume 60% of max bandwidth is usable for S3
TOTAL_JOBS = 400
REGION = "eu-central-2"

# Initialize Boto3 clients
pricing = boto3.client("pricing", region_name='us-east-1')
ec2 = boto3.client("ec2", region_name=REGION)

def get_price_per_hour(instance_type):
    try:
        response = pricing.get_products(
            ServiceCode="AmazonEC2",
            Filters=[
                {"Type": "TERM_MATCH", "Field": "instanceType", "Value": instance_type},
                {"Type": "TERM_MATCH", "Field": "location", "Value": "Europe (Zurich)"},
                {"Type": "TERM_MATCH", "Field": "operatingSystem", "Value": "Linux"},
                {"Type": "TERM_MATCH", "Field": "tenancy", "Value": "Shared"},
                {"Type": "TERM_MATCH", "Field": "preInstalledSw", "Value": "NA"},
                {"Type": "TERM_MATCH", "Field": "usagetype", "Value": f"EUC2-BoxUsage:{instance_type}"}
            ],
            MaxResults=100
        )
        if not response['PriceList']:
            return None
        assert len(response['PriceList']) == 1, f"Unexpected number of price lists: {len(response['PriceList'])}"
        product = json.loads(response['PriceList'][0])
        terms = product['terms']['OnDemand']
        price_dimensions = next(iter(next(iter(terms.values()))['priceDimensions'].values()))
        return float(price_dimensions['pricePerUnit']['USD'])
    except Exception as e:
        return e

def analyze_instance(instance):
    instance_type = instance['InstanceType']
    vcpus = instance['VCpuInfo']['DefaultVCpus']
    ram = instance['MemoryInfo']['SizeInMiB'] / 1024  # Convert to GiB
    net_gbps = instance.get('NetworkInfo', {}).get('NetworkPerformance', '')

    # Convert network performance to number
    net_gbps_val = parse_network_performance(net_gbps)
    if net_gbps_val is None:
        print(f"Skipping {instance_type} due to invalid network performance: {net_gbps}")
        return None

    net = net_gbps_val * BANDWIDTH_EFFICIENCY / 8
    concurrent_jobs = min(math.floor(vcpus / 4), math.floor(ram / RAM_PER_JOB_GIB))
    if concurrent_jobs == 0:
        print(f"Skipping {instance_type} due to insufficient resources, vcpus: {vcpus}, ram: {ram}")
        return None

    net_per_job = net / concurrent_jobs
    s3_time = S3_DATA_PER_JOB_GIB / net_per_job
    job_time = s3_time + COMPUTE_TIME_SEC

    hourly_price = get_price_per_hour(instance_type)
    if hourly_price == 0:
        print(f"Suspicious price for {instance_type}: {hourly_price}")
    if hourly_price is None:
        print(f"Skipping {instance_type} due to missing price information")
        return None

    instances_needed = math.ceil(TOTAL_JOBS / concurrent_jobs)
    total_cost = hourly_price * (job_time / 3600) * instances_needed

    return {
        "instance_type": instance_type,
        "vcpus": vcpus,
        "ram": round(ram, 1),
        "net_gbps": net_gbps_val,
        "concurrent_jobs": concurrent_jobs,
        "job_time_sec": round(job_time, 2),
        "price_per_hour": round(hourly_price, 4),
        "total_cost": round(total_cost, 4),
        "instances_needed": instances_needed
    }

def parse_network_performance(perf_str):
    if not perf_str:
        print(f"Invalid network performance string: {perf_str}")
        return None
    try:
        # Use a regular expression to match "X Gigabit" or "Up to X Gigabit"
        match = re.search(r"(\d+\.?\d*)\s*Gigabit", perf_str, re.IGNORECASE)
        if match:
            return float(match.group(1))  # Return the numeric part as float
    except Exception as e:
        print(f"Error parsing network performance: {e}")
        return None
    return None

In [14]:
get_price_per_hour('c5.12xlarge')

2.561

In [15]:
print("Fetching instance types available in region...")
paginator = ec2.get_paginator('describe_instance_types')
page_iterator = paginator.paginate()

results = []

for page in page_iterator:
    for instance in page['InstanceTypes']:
        r = analyze_instance(instance)
        if r:
            results.append(r)

Fetching instance types available in region...
Skipping r6gd.medium due to insufficient resources, vcpus: 1, ram: 8.0
Skipping r6i.xlarge due to insufficient resources, vcpus: 4, ram: 32.0
Skipping c7g.xlarge due to insufficient resources, vcpus: 4, ram: 8.0
Skipping m6id.2xlarge due to insufficient resources, vcpus: 8, ram: 32.0
Skipping r6g.medium due to insufficient resources, vcpus: 1, ram: 8.0
Skipping t3.large due to insufficient resources, vcpus: 2, ram: 8.0
Skipping t3.medium due to insufficient resources, vcpus: 2, ram: 4.0
Skipping c6g.large due to insufficient resources, vcpus: 2, ram: 4.0
Skipping m5d.large due to insufficient resources, vcpus: 2, ram: 8.0
Skipping m5d.xlarge due to insufficient resources, vcpus: 4, ram: 16.0
Skipping r6gd.xlarge due to insufficient resources, vcpus: 4, ram: 32.0
Skipping m6idn.large due to insufficient resources, vcpus: 2, ram: 8.0
Skipping r6gd.large due to insufficient resources, vcpus: 2, ram: 16.0
Skipping c7g.4xlarge due to insufficie

In [16]:

def print_r(r):
    print(f"{r['instance_type']}: ${r['total_cost']} (x{r['instances_needed']} @ ${r['price_per_hour']}/hr), job_time: {r['job_time_sec']}s, RAM: {r['ram']} GiB, Net: {r['net_gbps']} Gbps")

results.sort(key=lambda x: x["total_cost"])

print("\nTop instance types sorted by total cost to run 400 jobs:\n")
for r in results[:20]:
    print_r(r)



Top instance types sorted by total cost to run 400 jobs:

r6g.2xlarge: $7.2522 (x400 @ $0.535/hr), job_time: 122.0s, RAM: 64.0 GiB, Net: 10.0 Gbps
r6g.4xlarge: $7.4087 (x134 @ $1.0701/hr), job_time: 186.0s, RAM: 128.0 GiB, Net: 10.0 Gbps
r6gd.2xlarge: $8.2309 (x400 @ $0.6072/hr), job_time: 122.0s, RAM: 64.0 GiB, Net: 10.0 Gbps
r6i.4xlarge: $8.3047 (x134 @ $1.3376/hr), job_time: 166.8s, RAM: 128.0 GiB, Net: 12.5 Gbps
r6gd.4xlarge: $8.4077 (x134 @ $1.2144/hr), job_time: 186.0s, RAM: 128.0 GiB, Net: 10.0 Gbps
r6i.2xlarge: $8.5904 (x400 @ $0.6688/hr), job_time: 115.6s, RAM: 64.0 GiB, Net: 12.5 Gbps
i4i.4xlarge: $8.6028 (x134 @ $1.8/hr), job_time: 128.4s, RAM: 128.0 GiB, Net: 25.0 Gbps
r5.2xlarge: $9.0687 (x400 @ $0.669/hr), job_time: 122.0s, RAM: 64.0 GiB, Net: 10.0 Gbps
r5.4xlarge: $9.2634 (x134 @ $1.338/hr), job_time: 186.0s, RAM: 128.0 GiB, Net: 10.0 Gbps
r6g.12xlarge: $9.3898 (x45 @ $3.2102/hr), job_time: 234.0s, RAM: 384.0 GiB, Net: 20.0 Gbps
r6g.16xlarge: $9.8475 (x34 @ $4.2803/hr),

In [17]:
[r for r in results if r['instance_type'] == 'm6in.8xlarge']

[{'instance_type': 'm6in.8xlarge',
  'vcpus': 32,
  'ram': 128.0,
  'net_gbps': 50.0,
  'concurrent_jobs': 3,
  'job_time_sec': 109.2,
  'price_per_hour': 2.9354,
  'total_cost': 11.9313,
  'instances_needed': 134}]