Azure GET CLI Details

In [1]:
import os
import pandas as pd
import requests
from azure.identity import DefaultAzureCredential
from azure.mgmt.compute import ComputeManagementClient
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
subscription_id = os.getenv('AZURE_SUBSCRIPTION_ID')
credential = DefaultAzureCredential()
compute_client = ComputeManagementClient(credential, subscription_id)

In [4]:
# VM Family Name 

In [3]:

mapping = [
    # General Purpose
    ("A",   "Av2-series; previous-gen A-family series",                         "General Purpose"),
    ("B",   "Bsv2-series; Basv2-series; Bpsv2-series; previous-gen B-family",   "General Purpose"),
    ("D",   "Dpsv6/Dplsv6; Dpdsv6/Dpldsv6; Dasv6/Dadvsv6; Dalsv6/Daldsv6; "
            "Dpsv5/Dpds5; Dpldsv5/Dpldsv5; Dlsv5/Dlds5; Dv5/Ddsv5; Dasv5/Dads5; "
            "previous-gen D-family",                                              "General Purpose"),
    ("DC",  "DCasv5/DCadsv5; DCas_cc_v5/DCads_cc_v5; DCesv6/DCedsv6; "
            "DCsv3/DCdsv3; previous-gen DC-family",                                "General Purpose"),
 
    # Compute Optimized
    ("F",   "Fasy6-series; Falsy6-series; Famsv6-series; Fsv2-series; previous-gen F-family",
            "Compute Optimized"),
    ("FX",  "FX-series",                                                          "Compute Optimized"),

    # FPGA Accelerated
    ("NP",  "NP-series",     "GPU Accelerated"),
    ("NV",  "NV-series; NVv3; NVv4; NVadsA10_v5; previous-gen NV-family",         "GPU Accelerated"),

    # High performance compute
    ("HB",  "HB-series; HBv2-series; HBv3-series; HBv4-series",                   "High performance compute"),
    ("HC",  "HC-series",                                                         "High performance compute"),
    ("HX",  "HX-series",                                                         "High performance compute"),

    # Memory Optimized
    ("E",   "Epsv6/EpdsV6; Easv6/Eadsv6; Ev5/Esv5; Edy5/Eds5; Easv5/Eadsv5; "
            "Epsv5/EpdsV5; previous-gen E-family",                                  "Memory Optimized"),
    ("Eb",  "Ebdsv5/Ebsv5",                                                      "Memory Optimized"),
    ("EC",  "ECasv5/ECadsv5; ECas_cc_v5/ECads_cc_v5; ECesv5/ECedsv5",             "Memory Optimized"),
    ("M",   "MbsV3/MbdsV3; Msv3/Mdsv3; Mv2; Msv2/Mdsv2",                          "Memory Optimized"),

    # Storage Optimized
    ("L",   "Lsv3-series; Lasv3-series; previous-gen L-family",                  "Storage Optimized"),
]

mapping_dict   = {fam: {"series": s, "category": c} for fam, s, c in mapping}
sorted_prefixes = sorted(mapping_dict, key=lambda x: -len(x))


def get_vm_mapping(sku_name: str):
    """
    Given e.g. "Standard_D4s_v3", returns (prefix, series-str, category-str).
    """
    name = sku_name.upper()
    if name.startswith("STANDARD_"):
        name = name.split("_", 1)[1]
    for prefix in sorted_prefixes:
        if name.startswith(prefix):
            md = mapping_dict[prefix]
            return prefix, md["series"], md["category"]
    return None, None, "Other"


In [9]:
def collect_all_instance_types_azure(compute_client: ComputeManagementClient) -> pd.DataFrame:
    """
    Pull every Azure VM SKU and return a DataFrame matching the AWS instance schema.
    """
    rows = []
    for sku in compute_client.resource_skus.list():
        if sku.resource_type.lower() != "virtualmachines":
            continue

        caps = {c.name: c.value for c in sku.capabilities}
        _, _, category = get_vm_mapping(sku.name)

        for loc_info in sku.location_info:
            loc     = loc_info.location
            ram_gib = float(caps.get("MemoryGB", 0))
            os_gb   = float(caps.get("OSVhdSizeMB", 0)) / 1024
            tmp_gb  = float(caps.get("ResourceDiskSizeMB", 0)) / 1024

            rows.append({
                "Region":               loc,
                "Instance Type":        sku.name,
                "Instance Family":      category,
                "vCPUs":                int(caps.get("vCPUs", 0)),
                "RAM (GiB)":            ram_gib,
                "Memory MiB":           int(round(ram_gib * 1024)),
                "Accelerators":         int(caps.get("GPUs", 0)),
                "Storage Info":         {"OS (GB)": os_gb, "Temp (GB)": tmp_gb},
                "Network Performance":  float(caps.get("NetworkBandwidthInGbps", 0)),
            })

    return pd.DataFrame(rows)

In [10]:
df_azure = collect_all_instance_types_azure(compute_client)

In [11]:
print(df_azure.head())
print("Total SKUs:", len(df_azure))

               Region Instance Type  Instance Family  vCPUs  RAM (GiB)  \
0       australiaeast      Basic_A0  General Purpose      1       0.75   
1  australiasoutheast      Basic_A0  General Purpose      1       0.75   
2         brazilsouth      Basic_A0  General Purpose      1       0.75   
3       CanadaCentral      Basic_A0  General Purpose      1       0.75   
4          CanadaEast      Basic_A0  General Purpose      1       0.75   

   Memory MiB  Accelerators                           Storage Info  \
0         768             0  {'OS (GB)': 1023.0, 'Temp (GB)': 0.0}   
1         768             0  {'OS (GB)': 1023.0, 'Temp (GB)': 0.0}   
2         768             0  {'OS (GB)': 1023.0, 'Temp (GB)': 0.0}   
3         768             0  {'OS (GB)': 1023.0, 'Temp (GB)': 0.0}   
4         768             0  {'OS (GB)': 1023.0, 'Temp (GB)': 0.0}   

   Network Performance  
0                  0.0  
1                  0.0  
2                  0.0  
3                  0.0  
4        

In [12]:
print("Number of rows:", df_azure.shape[0])  # Total rows
print("Number of columns:", df_azure.shape[1])  # Total columns
print(df_azure.count())
print(df_azure.to_string())

Number of rows: 44232
Number of columns: 9
Region                 44232
Instance Type          44232
Instance Family        44232
vCPUs                  44232
RAM (GiB)              44232
Memory MiB             44232
Accelerators           44232
Storage Info           44232
Network Performance    44232
dtype: int64
                   Region               Instance Type           Instance Family  vCPUs  RAM (GiB)  Memory MiB  Accelerators                           Storage Info  Network Performance
0           australiaeast                    Basic_A0           General Purpose      1       0.75         768             0  {'OS (GB)': 1023.0, 'Temp (GB)': 0.0}                  0.0
1      australiasoutheast                    Basic_A0           General Purpose      1       0.75         768             0  {'OS (GB)': 1023.0, 'Temp (GB)': 0.0}                  0.0
2             brazilsouth                    Basic_A0           General Purpose      1       0.75         768             0  {'OS (

In [13]:
output_path = "azure_test.csv"
df_azure.to_csv(output_path, index=False)
print(f"Exported {len(df_azure)} rows to {output_path}")

Exported 44232 rows to azure_test.csv


# Cost per Hour

In [15]:

df = pd.read_csv('azure_test.csv')
base_url = "https://prices.azure.com/api/retail/prices"
params = {
    '$filter': "serviceName eq 'Virtual Machines'",
    'currencyCode': 'USD'
}

items = []
url = base_url + "?$filter=" + params['$filter'].replace(" ", "%20") + "&currencyCode=" + params['currencyCode']
while url:
    resp = requests.get(url).json()
    items.extend(resp.get('Items', []))
    url = resp.get('NextPageLink')
prices = pd.DataFrame(items)
df['_Region_key']  = df['Region'].str.lower()
df['_VMSize_key'] = df['Instance Type'].str.lower()
prices['_Region_key']  = prices['armRegionName'].str.lower()
prices['_VMSize_key'] = prices['armSkuName'].str.lower()

merged = df.merge(
    prices[['unitOfMeasure','unitPrice','_Region_key','_VMSize_key','location']],
    on=['_Region_key','_VMSize_key'],
    how='left'
)

merged = merged[merged['unitOfMeasure']=='1 Hour'].copy()

merged.rename(columns={'unitPrice':'Cost Per Hour', 'location':'Location'}, inplace=True)
merged.drop(['unitOfMeasure', '_Region_key', '_VMSize_key'], axis=1, inplace=True)
merged.to_csv('azure_test_with_cost_1.csv', index=False)

print(merged.head())


               Region   Instance Type  Instance Family  vCPUs  RAM (GiB)  \
203  AustraliaCentral  Standard_A1_v2  General Purpose      1        2.0   
204  AustraliaCentral  Standard_A1_v2  General Purpose      1        2.0   
205  AustraliaCentral  Standard_A1_v2  General Purpose      1        2.0   
206  AustraliaCentral  Standard_A1_v2  General Purpose      1        2.0   
207  AustraliaCentral  Standard_A1_v2  General Purpose      1        2.0   

     Memory MiB  Accelerators                           Storage Info  \
203        2048             0  {'OS (GB)': 1023.0, 'Temp (GB)': 0.0}   
204        2048             0  {'OS (GB)': 1023.0, 'Temp (GB)': 0.0}   
205        2048             0  {'OS (GB)': 1023.0, 'Temp (GB)': 0.0}   
206        2048             0  {'OS (GB)': 1023.0, 'Temp (GB)': 0.0}   
207        2048             0  {'OS (GB)': 1023.0, 'Temp (GB)': 0.0}   

     Network Performance  Cost Per Hour    Location  
203                  0.0       0.010000  AU Central  
20

In [None]:
df = pd.read_csv("azure_test_with_cost_1.csv")
cols = df.columns.tolist()
cols[1], cols[10] = cols[10], cols[1]
df = df[cols]
df.to_csv("Final_Azure_Test_Data_1.csv", index=False)
print("Columns reordered (headers moved with their data).")

Columns reordered (headers moved with their data).
