In [131]:
from functools import cache
import boto3
import math
import json
import re

REGION = "eu-west-1"

pricing = boto3.client("pricing", region_name='us-east-1')
ec2 = boto3.client("ec2", region_name=REGION)

@cache
def get_price_per_hour(instance_type):
    try:
        response = pricing.get_products(
            ServiceCode="AmazonEC2",
            Filters=[
                {"Type": "TERM_MATCH", "Field": "instanceType", "Value": instance_type},
                {"Type": "TERM_MATCH", "Field": "location", "Value": "EU (Ireland)"},
                {"Type": "TERM_MATCH", "Field": "operatingSystem", "Value": "Linux"},
                {"Type": "TERM_MATCH", "Field": "tenancy", "Value": "Shared"},
                {"Type": "TERM_MATCH", "Field": "preInstalledSw", "Value": "NA"},
                {"Type": "TERM_MATCH", "Field": "usagetype", "Value": f"EU-BoxUsage:{instance_type}"}
            ],
            MaxResults=100
        )
        if not response['PriceList']:
            return None
        assert len(response['PriceList']) == 1, f"Unexpected number of price lists: {len(response['PriceList'])}"
        product = json.loads(response['PriceList'][0])
        terms = product['terms']['OnDemand']
        price_dimensions = next(iter(next(iter(terms.values()))['priceDimensions'].values()))
        return float(price_dimensions['pricePerUnit']['USD'])
    except Exception as e:
        return e


In [176]:


# Constants per job
RAM_PER_JOB_GIB = 32
VCPU_PER_JOB = 4
S3_DATA_PER_JOB_GIB = 32
COMPUTE_TIMES = [0, 60, 35, 25, 18, 15, 13]
COMPUTE_TIME_SEC = COMPUTE_TIMES[(VCPU_PER_JOB).bit_length()]
BANDWIDTH_EFFICIENCY = 0.4  # Assume 60% of max bandwidth is usable for S3
TOTAL_JOBS = 400

# Initialize Boto3 clients
def analyze_instance(instance):
    instance_type = instance['InstanceType']
    vcpus = instance['VCpuInfo']['DefaultVCpus']
    ram = instance['MemoryInfo']['SizeInMiB'] / 1024  # Convert to GiB
    net_gbps = instance.get('NetworkInfo', {}).get('NetworkPerformance', '')
    if instance_type.startswith("hpc7"):
        net_gbps = "25 Gigabit"  # Assume 25 Gbps for HPC7 instances

    # Convert network performance to number
    net_gbps_val = parse_network_performance(net_gbps)
    if net_gbps_val is None:
        print(f"Skipping {instance_type} due to invalid network performance: {net_gbps}")
        return None

    net = net_gbps_val * BANDWIDTH_EFFICIENCY / 8
    concurrent_jobs = min(math.floor(vcpus / VCPU_PER_JOB), math.floor(ram / RAM_PER_JOB_GIB))
    if concurrent_jobs == 0:
        print(f"Skipping {instance_type} due to insufficient resources, vcpus: {vcpus}, ram: {ram}")
        return None

    net_per_job = net / concurrent_jobs
    s3_time = S3_DATA_PER_JOB_GIB / net_per_job
    job_time = s3_time + COMPUTE_TIME_SEC

    hourly_price = get_price_per_hour(instance_type)
    if hourly_price == 0:
        print(f"Suspicious price for {instance_type}: {hourly_price}")
    if hourly_price is None:
        print(f"Skipping {instance_type} due to missing price information")
        return None

    instances_needed = math.ceil(TOTAL_JOBS / concurrent_jobs)
    total_cost = hourly_price * (job_time / 3600) * instances_needed

    return {
        "instance_type": instance_type,
        "vcpus": vcpus,
        "ram": round(ram, 1),
        "net_gbps": net_gbps_val,
        "concurrent_jobs": concurrent_jobs,
        "job_time_sec": round(job_time, 2),
        "price_per_hour": round(hourly_price, 4),
        "total_cost": round(total_cost, 4),
        "instances_needed": instances_needed
    }

def parse_network_performance(perf_str):
    if not perf_str:
        print(f"Invalid network performance string: {perf_str}")
        return None
    try:
        # Use a regular expression to match "X Gigabit" or "Up to X Gigabit"
        match = re.search(r"(\d+\.?\d*)\s*Gigabit", perf_str, re.IGNORECASE)
        if match:
            return float(match.group(1))  # Return the numeric part as float
    except Exception as e:
        print(f"Error parsing network performance: {e}")
        return None
    return None

In [None]:
print("Fetching instance types available in region...")
paginator = ec2.get_paginator('describe_instance_types')
page_iterator = paginator.paginate()

results = []

for page in page_iterator:
    for instance in page['InstanceTypes']:
        r = analyze_instance(instance)
        if r:
            results.append(r)

Fetching instance types available in region...
Skipping r6g.medium due to insufficient resources, vcpus: 1, ram: 8.0
Skipping m6idn.large due to insufficient resources, vcpus: 2, ram: 8.0
Skipping r6gd.medium due to insufficient resources, vcpus: 1, ram: 8.0
Skipping c5n.2xlarge due to insufficient resources, vcpus: 8, ram: 21.0
Skipping is4gen.xlarge due to insufficient resources, vcpus: 4, ram: 24.0
Skipping t1.micro due to invalid network performance: Very Low
Skipping m6id.xlarge due to insufficient resources, vcpus: 4, ram: 16.0
Skipping m5d.xlarge due to insufficient resources, vcpus: 4, ram: 16.0
Skipping t4g.nano due to insufficient resources, vcpus: 2, ram: 0.5
Skipping t2.2xlarge due to invalid network performance: Moderate
Skipping c3.large due to invalid network performance: Moderate
Skipping r3.large due to invalid network performance: Moderate
Skipping c7a.xlarge due to insufficient resources, vcpus: 4, ram: 8.0
Skipping c7gn.xlarge due to insufficient resources, vcpus: 4

In [178]:

def print_r(r):
    print(f"{r['instance_type']}: ${r['total_cost']} (x{r['instances_needed']} @ ${r['price_per_hour']}/hr), job_time: {r['job_time_sec']}s, RAM: {r['ram']} GiB, Net: {r['net_gbps']} Gbps")

results.sort(key=lambda x: x["total_cost"])

print("\nTop instance types sorted by total cost to run 400 jobs:\n")
for r in results:
    print_r(r)



Top instance types sorted by total cost to run 400 jobs:

r5n.xlarge: $1.8778 (x400 @ $0.334/hr), job_time: 50.6s, RAM: 32.0 GiB, Net: 25.0 Gbps
r6in.xlarge: $2.0118 (x400 @ $0.3908/hr), job_time: 46.33s, RAM: 32.0 GiB, Net: 30.0 Gbps
r7g.xlarge: $2.0295 (x400 @ $0.2397/hr), job_time: 76.2s, RAM: 32.0 GiB, Net: 12.5 Gbps
r5dn.xlarge: $2.0915 (x400 @ $0.372/hr), job_time: 50.6s, RAM: 32.0 GiB, Net: 25.0 Gbps
r6a.xlarge: $2.1488 (x400 @ $0.2538/hr), job_time: 76.2s, RAM: 32.0 GiB, Net: 12.5 Gbps
r6g.xlarge: $2.2309 (x400 @ $0.2256/hr), job_time: 89.0s, RAM: 32.0 GiB, Net: 10.0 Gbps
r8g.xlarge: $2.2325 (x400 @ $0.2637/hr), job_time: 76.2s, RAM: 32.0 GiB, Net: 12.5 Gbps
r6idn.xlarge: $2.2407 (x400 @ $0.4352/hr), job_time: 46.33s, RAM: 32.0 GiB, Net: 30.0 Gbps
r6i.xlarge: $2.3876 (x400 @ $0.282/hr), job_time: 76.2s, RAM: 32.0 GiB, Net: 12.5 Gbps
r6in.2xlarge: $2.4749 (x200 @ $0.7816/hr), job_time: 57.0s, RAM: 64.0 GiB, Net: 40.0 Gbps
r7i.xlarge: $2.507 (x400 @ $0.2961/hr), job_time: 76.2s,