AWS GET CLI Details

In [10]:
import os
import boto3
import pandas as pd
import json
from dotenv import load_dotenv
load_dotenv()


os.environ['AWS_DEFAULT_REGION'] = os.getenv('AWS_DEFAULT_REGION')
os.environ['AWS_ACCESS_KEY_ID'] = os.getenv('AWS_ACCESS_KEY_ID')
os.environ['AWS_SECRET_ACCESS_KEY'] = os.getenv('AWS_SECRET_ACCESS_KEY')

print("REGION:", os.environ['AWS_DEFAULT_REGION'])
print("KEY ID:", os.environ['AWS_ACCESS_KEY_ID'][:4] + '…')

# Use boto3 to interact with AWS
ec2 = boto3.client('ec2')
print("Default region:", ec2.meta.region_name)
print("Regions list:", [r['RegionName'] for r in ec2.describe_regions()['Regions']])

REGION: ap-south-1
KEY ID: AKIA…
Default region: ap-south-1
Regions list: ['ap-south-1', 'eu-north-1', 'eu-west-3', 'eu-west-2', 'eu-west-1', 'ap-northeast-3', 'ap-northeast-2', 'ap-northeast-1', 'ca-central-1', 'sa-east-1', 'ap-southeast-1', 'ap-southeast-2', 'eu-central-1', 'us-east-1', 'us-east-2', 'us-west-1', 'us-west-2']


In [11]:
def get_category_definitions():
    """
    Return a dict mapping each category name to the set of
    instance‐family prefixes that belong in it.
    Edit this one place whenever AWS adds a new family.
    """
    return {
        'Accelerated Computing': {
            'P5','P4','G6E','G6','G5G','G5','G4DN','G4AD',
            'TRN2','TRN1','INF2','INF1','DL1','DL2Q','F2','VT1'
        },
        'Compute Optimized': {
            'C8G','C7G','C7GN','C7I','C7I-FLEX','C7A',
            'C6G','C6GN','C6I','C6IN','C6A','C5','C5N','C5A','C4'
        },
        'General Purpose': {
            'M8G','M7G','M7I','M7I-FLEX','M7A','MAC',
            'M6G','M6I','M6IN','M6A','M5','M5N','M5ZN','M5A','M4',
            'T4G','T3','T3A','T2'
        },
        'HPC Optimized': {
            'HPC7G','HPC7A','HPC6ID','HPC6A'
        },
        'Memory Optimized': {
            'R8G','R7G','R7I','R7IZ','R7A',
            'R6G','R6I','R6IN','R6A',
            'R5','R5N','R5B','R5A','R4',
            'U7I','U-1',
            'X8G','X2GD','X2IDN','X2IEDN','X2IEZN',
            'X1','X1E','Z1D'
        },
        'Storage Optimized': {
            'I8G','I7I','I7IE','I4G','IM4GN','IS4GEN',
            'I4I','I3','I3EN',
            'D3','D3EN','D2','H1'
        }
    }

In [14]:
def build_category_map(category_defs):
    """
    Invert get_category_definitions() into a { family_prefix -> category } lookup.
    """
    lookup = {}
    for category, families in category_defs.items():
        for fam in families:
            lookup[fam.upper()] = category
    return lookup

In [18]:
def collect_all_instance_types():
    """
    Pull every EC2 instance type in every region, extract vCPU, RAM,
    network, storage, GPUs/FPGAs—and also Family + Category.
    """
    # build our family→category map once
    category_defs = get_category_definitions()
    category_map  = build_category_map(category_defs)

    rows = []
    ec2_default = boto3.client('ec2')
    regions = [r['RegionName'] for r in ec2_default.describe_regions()['Regions']]

    for region in regions:
        client = boto3.client('ec2', region_name=region)
        paginator = client.get_paginator('describe_instance_types')

        for page in paginator.paginate():
            for it in page['InstanceTypes']:
                itype = it['InstanceType']
                # 1) split off family
                family = itype.split('.', 1)[0].upper()
                # 2) look up category, default to "Other"
                category = category_map.get(family, 'Other')

                mem_mib = it['MemoryInfo']['SizeInMiB']
                ram_gib = round(mem_mib / 1024, 2)
                net_perf = it.get('NetworkInfo', {}).get('NetworkPerformance')

                rows.append({
                    'Region':             region,
                    'InstanceType':       itype,
                    'InstanceFamily':     category,
                    'vCPUs':              it['VCpuInfo']['DefaultVCpus'],
                    'MemoryMiB':          mem_mib,
                    'RAM (GiB)':          ram_gib,
                    'NetworkPerformance': net_perf,
                    'StorageInfo':        it.get('InstanceStorageInfo'),
                    'Accelerators':       it.get('GpuInfo') or it.get('FpgaInfo'),
                })

    return pd.DataFrame(rows)

In [19]:
df_types = collect_all_instance_types()
print(df_types.columns)
df_types.head()


Index(['Region', 'InstanceType', 'InstanceFamily', 'vCPUs', 'MemoryMiB',
       'RAM (GiB)', 'NetworkPerformance', 'StorageInfo', 'Accelerators'],
      dtype='object')


Unnamed: 0,Region,InstanceType,InstanceFamily,vCPUs,MemoryMiB,RAM (GiB),NetworkPerformance,StorageInfo,Accelerators
0,ap-south-1,r6gd.xlarge,Other,4,32768,32.0,Up to 10 Gigabit,"{'TotalSizeInGB': 237, 'Disks': [{'SizeInGB': ...",
1,ap-south-1,x2iedn.2xlarge,Memory Optimized,8,262144,256.0,Up to 25 Gigabit,"{'TotalSizeInGB': 237, 'Disks': [{'SizeInGB': ...",
2,ap-south-1,m7g.4xlarge,General Purpose,16,65536,64.0,Up to 15 Gigabit,,
3,ap-south-1,i3en.12xlarge,Storage Optimized,48,393216,384.0,50 Gigabit,"{'TotalSizeInGB': 30000, 'Disks': [{'SizeInGB'...",
4,ap-south-1,m5ad.12xlarge,Other,48,196608,192.0,10 Gigabit,"{'TotalSizeInGB': 1800, 'Disks': [{'SizeInGB':...",


In [20]:
print(df_types.to_string())

               Region         InstanceType         InstanceFamily  vCPUs  MemoryMiB  RAM (GiB)   NetworkPerformance                                                                                                                                            StorageInfo                                                                                                                                                   Accelerators
0          ap-south-1          r6gd.xlarge                  Other      4      32768      32.00     Up to 10 Gigabit            {'TotalSizeInGB': 237, 'Disks': [{'SizeInGB': 237, 'Count': 1, 'Type': 'ssd'}], 'NvmeSupport': 'required', 'EncryptionSupport': 'required'}                                                                                                                                                           None
1          ap-south-1       x2iedn.2xlarge       Memory Optimized      8     262144     256.00     Up to 25 Gigabit            {'TotalSizeInGB': 237

In [19]:
print("Number of rows:", df_types.shape[0])  # Total rows
print("Number of columns:", df_types.shape[1])  # Total columns
print(df_types.count())

Number of rows: 10972
Number of columns: 10
Region                10972
InstanceType          10972
Family                10972
Category              10972
vCPUs                 10972
MemoryMiB             10972
RAM (GiB)             10972
NetworkPerformance    10972
StorageInfo            4571
Accelerators            587
dtype: int64


In [21]:
output_path = "new_aws_intance_sizes_data.csv"
df_types.to_csv(output_path, index=False)

print(f"Exported {len(df_types)} rows to {output_path}")

Exported 10972 rows to new_aws_intance_sizes_data.csv


# Cost Per Hours

In [12]:
def build_category_map(category_defs: dict[str, set[str]]) -> dict[str, str]:
    """
    Invert category_defs so that each family prefix maps to its category.
    """
    cmap: dict[str, str] = {}
    for category, prefixes in category_defs.items():
        for prefix in prefixes:
            cmap[prefix.upper()] = category
    return cmap

In [13]:
import botocore.session
def build_region_to_location_map():
    """
    Map every region code (e.g. 'eu-central-1') to its human name,
    and normalize any "Europe"→"EU" so it matches Pricing API.
    """
    session    = botocore.session.get_session()
    parts      = session.get_data('partitions')['partitions']
    region_map: dict[str, str] = {}
    for part in parts:
        for code, info in part.get('regions', {}).items():
            desc = info.get('description', '')
            # Pricing API uses "EU (...)" not "Europe (...)"
            desc = desc.replace('Europe', 'EU')
            region_map[code] = desc
    return region_map

In [14]:
def get_all_on_demand_prices(
    operating_system: str = "Linux",
    tenancy: str         = "Shared",
    pre_installed_sw: str= "NA",
    capacitystatus: str  = "Used",
) -> dict[tuple[str,str], float]:
    """
    Paginate over the Pricing API to fetch every On-Demand price
    for EC2 (USD/hr), filtered by OS/tenancy/etc.
    Returns a dict: { (instanceType, locationKey) -> price_float }.
    We key by both the human name (e.g. "EU (Frankfurt)") and the code ("eu-central-1").
    """
    pricing   = boto3.client("pricing", region_name="us-east-1")
    paginator = pricing.get_paginator("get_products")

    filters = [
        {"Type": "TERM_MATCH", "Field": "operatingSystem", "Value": operating_system},
        {"Type": "TERM_MATCH", "Field": "preInstalledSw",   "Value": pre_installed_sw},
        {"Type": "TERM_MATCH", "Field": "tenancy",          "Value": tenancy},
        {"Type": "TERM_MATCH", "Field": "capacitystatus",   "Value": capacitystatus},
    ]

    # build mapping code↔desc so we can duplicate keys
    region_map   = build_region_to_location_map()
    desc_to_code = {desc: code for code, desc in region_map.items()}

    price_map: dict[tuple[str,str], float] = {}
    for page in paginator.paginate(
        ServiceCode="AmazonEC2",
        Filters=filters,
        FormatVersion="aws_v1",
    ):
        for price_str in page["PriceList"]:
            prod = json.loads(price_str)
            attrs = prod["product"]["attributes"]
            itype = attrs["instanceType"]
            loc   = attrs["location"]           # e.g. "EU (Frankfurt)"
            od    = next(iter(prod["terms"]["OnDemand"].values()))
            pdim  = next(iter(od["priceDimensions"].values()))

            # extract USD/hour
            per_unit = pdim["pricePerUnit"]
            if isinstance(per_unit, dict):
                # e.g. {"USD": "0.0960000000"}
                currency_key = next(iter(per_unit))
                usd = float(per_unit[currency_key])
            else:
                usd = float(per_unit)

            # store under both human name and region code (if available)
            price_map[(itype, loc)] = usd
            if loc in desc_to_code:
                price_map[(itype, desc_to_code[loc])] = usd

    return price_map

In [15]:
def collect_all_instance_types(
    operating_system: str = "Linux",
    tenancy: str         = "Shared",
    pre_installed_sw: str= "NA",
    capacitystatus: str  = "Used",
):
    """
    Describe every instance type in every region, merge in the On-Demand
    cost (USD/hr) by trying both the code (eu-…) and human name ("EU (…)").
    """
    ec2     = boto3.client('ec2')
    regions = [r['RegionName'] for r in ec2.describe_regions()['Regions']]

    region_map   = build_region_to_location_map()
    category_map = build_category_map(get_category_definitions())
    price_map    = get_all_on_demand_prices(
        operating_system, tenancy, pre_installed_sw, capacitystatus
    )

    rows: list[dict] = []
    for region in regions:
        client        = boto3.client("ec2", region_name=region)
        paginator     = client.get_paginator("describe_instance_types")
        location_name = region_map.get(region, region)

        for page in paginator.paginate():
            for it in page["InstanceTypes"]:
                itype      = it["InstanceType"]
                family     = itype.split(".", 1)[0].upper()
                category   = category_map.get(family, "Other")
                mem_mib    = it["MemoryInfo"]["SizeInMiB"]
                ram_gib    = round(mem_mib / 1024, 2)
                net_perf   = it.get("NetworkInfo", {}).get("NetworkPerformance")
                # try code first, then human name
                cost_hr    = price_map.get((itype, region),
                             price_map.get((itype, location_name)))

                rows.append({
                    "Region":             region,
                    "InstanceType":       itype,
                    "InstanceFamily":     category,
                    "CostPerHour":        cost_hr,
                    "vCPUs":              it["VCpuInfo"]["DefaultVCpus"],
                    "MemoryMiB":          mem_mib,
                    "RAM (GiB)":          ram_gib,
                    "NetworkPerformance": net_perf,
                    "StorageInfo":        it.get("InstanceStorageInfo"),
                    "Accelerators":       it.get("GpuInfo") or it.get("FpgaInfo"),
                })

    return pd.DataFrame(rows)

In [16]:
df_types = collect_all_instance_types(
        operating_system="Linux",
        tenancy="Shared",
        pre_installed_sw="NA",
        capacitystatus="Used"
    )

In [17]:
print(df_types.columns)
df_types.head()
print(df_types.to_string())

Index(['Region', 'InstanceType', 'InstanceFamily', 'CostPerHour', 'vCPUs',
       'MemoryMiB', 'RAM (GiB)', 'NetworkPerformance', 'StorageInfo',
       'Accelerators'],
      dtype='object')
               Region         InstanceType         InstanceFamily  CostPerHour  vCPUs  MemoryMiB  RAM (GiB)   NetworkPerformance                                                                                                                                            StorageInfo                                                                                                                                                   Accelerators
0          ap-south-1        c6in.24xlarge      Compute Optimized      5.44320     96     196608     192.00          150 Gigabit                                                                                                                                                   None                                                                                           

In [18]:
print("Number of rows:", df_types.shape[0])  # Total rows
print("Number of columns:", df_types.shape[1])  # Total columns
print(df_types.count())

Number of rows: 10972
Number of columns: 10
Region                10972
InstanceType          10972
InstanceFamily        10972
CostPerHour           10936
vCPUs                 10972
MemoryMiB             10972
RAM (GiB)             10972
NetworkPerformance    10972
StorageInfo            4571
Accelerators            587
dtype: int64


In [19]:
output_path = "new_aws_intance_sizes_data.csv"
df_types.to_csv(output_path, index=False)

print(f"Exported {len(df_types)} rows to {output_path}")

Exported 10972 rows to new_aws_intance_sizes_data.csv
