# Runtime-Optimized VM Allocation for Benchmarks

This notebook implements a bin packing algorithm to allocate benchmarks to VMs based on actual HiGHS v1.10 runtime data, aiming to minimize total runtime variance across VMs.

In [None]:
import pandas as pd
import numpy as np
import yaml
from pathlib import Path
import heapq
from collections import defaultdict

# CONFIGURATION
MAX_RUNTIME_PER_VM_SECONDS = 3600  # Set to a number (e.g., 3600 for 1 hour) to cap VM runtime, or None for no limit

## Load and Process Runtime Data

In [221]:
# Load HiGHS runtime data (including HiGHS-hipo if available)
highs_data = pd.read_csv('main_results.csv', header=None, names=[
    'Benchmark', 'Size', 'Solver', 'Solver Version', 'Solver Release Year',
    'Status', 'Termination Condition', 'Runtime (s)', 'Memory Usage (MB)',
    'Objective Value', 'Max Integrality Violation', 'Duality Gap',
    'Reported Runtime (s)', 'Timeout', 'Hostname', 'Run ID', 'Timestamp'
])

# Filter for HiGHS 1.10.0 (2025) and HiGHS-hipo successful runs
highs_v110 = highs_data[
    ((highs_data['Solver Version'] == '1.10.0') |
     (highs_data['Solver'] == 'highs-hipo')) &
    (highs_data['Status'] == 'ok')
]

# Filter runs with runtime between 15 minutes (900s) and 1 hour (3600s)
MIN_RUNTIME_SECONDS = 900   # 15 minutes
MAX_RUNTIME_SECONDS = 3600  # 1 hour

highs_v110 = highs_v110[
    (highs_v110['Runtime (s)'].astype(float) >= MIN_RUNTIME_SECONDS) &
    (highs_v110['Runtime (s)'].astype(float) <= MAX_RUNTIME_SECONDS)
]

print(f"Found {len(highs_v110)} successful HiGHS v1.10/hipo benchmark runs")
print(f"Runtime range: {MIN_RUNTIME_SECONDS}s ({MIN_RUNTIME_SECONDS/60:.0f} min) to {MAX_RUNTIME_SECONDS}s ({MAX_RUNTIME_SECONDS/3600:.0f} hour)")
print(f"Solvers included: {highs_v110['Solver'].unique()}")
highs_v110.head()

Found 13 successful HiGHS v1.10/hipo benchmark runs
Runtime range: 900s (15 min) to 3600s (1 hour)
Solvers included: ['highs']


Unnamed: 0,Benchmark,Size,Solver,Solver Version,Solver Release Year,Status,Termination Condition,Runtime (s),Memory Usage (MB),Objective Value,Max Integrality Violation,Duality Gap,Reported Runtime (s),Timeout,Hostname,Run ID,Timestamp
86,DCOPF-Carolinas_uc_2M,1-997,highs,1.10.0,2025,ok,optimal,2080.905538397994,2878.12,4463695.700557045,3.502851030676985e-11,9.478984509261144e-05,2079.601967334748,3600.0,benchmark-instance-z2-m41,20250503_040156_benchmark-instance-z2-m41,2025-05-03 21:11:07.318952
196,pypsa-eur-elec-op-ucconv,10-24h,highs,1.10.0,2025,ok,optimal,2060.496564678004,2751.096,10527862989.467287,3.219646771412954e-15,9.998511252765709e-05,2059.3151636123657,3600.0,benchmark-instance-z2-m38,20250501_144136_benchmark-instance-z2-m38,2025-05-02 00:26:36.498948
261,Sienna_modified_RTS_GMLC_DA_sys_NetTransport_H...,1-1h,highs,1.10.0,2025,ok,optimal,1198.634356795999,838.736,796622.7803292888,3.998330308359003e-15,9.978093355103796e-05,1198.434877872467,3600.0,benchmark-instance-z-m35,20250429_090601_benchmark-instance-z-m35,2025-04-30 05:59:11.473448
317,pypsa-eur-sec,5-12h,highs,1.10.0,2025,ok,optimal,1472.7624695099948,1671.124,15716534864.16614,,,1468.8594851493835,3600.0,benchmark-instance-z-m24,20250429_090644_benchmark-instance-z-m24,2025-04-30 09:42:31.254238
365,Sienna_modified_RTS_GMLC_DA_sys_NetCopperPlate...,1-1h,highs,1.10.0,2025,ok,optimal,1272.9416091339954,965.796,796885.0277238319,1.5987211554602254e-14,9.849357480446288e-05,1272.7875311374664,3600.0,benchmark-instance-z-m23,20250429_090640_benchmark-instance-z-m23,2025-04-30 09:36:56.834400


In [222]:
# Create benchmark runtime mapping
benchmark_runtimes = {}
for _, row in highs_v110.iterrows():
    benchmark_key = f"{row['Benchmark']}-{row['Size']}"
    try:
        benchmark_runtimes[benchmark_key] = float(row['Runtime (s)'])
    except Exception as e:
        print(f"Error processing row: {row}")
        print(f"Error: {e}")

print(f"Runtime data available for {len(benchmark_runtimes)} benchmarks")
print(f"Total runtime: {sum(benchmark_runtimes.values())} seconds ({sum(benchmark_runtimes.values())/3600:.1f} hours)")
# print(sum(benchmark_runtimes.values()))

Runtime data available for 13 benchmarks
Total runtime: 25216.991877514996 seconds (7.0 hours)


## Load Benchmark Metadata

In [223]:
# Load benchmark metadata to get size categories and URLs
meta = yaml.safe_load(open('results/metadata.yaml'))

# Create a lookup for metadata
metadata_lookup = {}
for name, benchmark in meta['benchmarks'].items():
    for size_info in benchmark['Sizes']:
        instance_key = f"{name}-{size_info['Name']}"
        metadata_lookup[instance_key] = size_info

# Categorize benchmarks by size and create instances list
# Use ONLY the filtered highs_v110 dataset
benchmarks_by_size = {'S': [], 'M': [], 'L': []}
all_benchmark_instances = []

for _, row in highs_v110.iterrows():
    instance_key = f"{row['Benchmark']}-{row['Size']}"

    # Get metadata for this instance
    size_info = metadata_lookup.get(instance_key)
    if size_info is None:
        print(f"Warning: No metadata found for {instance_key}")
        continue

    instance = {
        'name': row['Benchmark'],
        'size_name': row['Size'],
        'size_category': size_info['Size'],
        'instance_key': instance_key,
        'runtime': float(row['Runtime (s)']),
        'num_variables': size_info.get('Num. variables', 0),
        'num_constraints': size_info.get('Num. constraints', 0),
        'url': size_info['URL']
    }

    benchmarks_by_size[size_info['Size']].append(instance)
    all_benchmark_instances.append(instance)

print(f"Total benchmark instances (from filtered dataset): {len(all_benchmark_instances)}")
for size, instances in benchmarks_by_size.items():
    print(f"  {size}: {len(instances)}")
print(f"All instances have runtime data from highs_v110")

Total benchmark instances (from filtered dataset): 13
  S: 0
  M: 13
  L: 0
All instances have runtime data from highs_v110


## Bin Packing Algorithms

In [224]:
class VMAllocation:
    def __init__(self, vm_id: int):
        self.vm_id = vm_id
        self.benchmarks = []
        self.total_runtime = 0.0

    def add_benchmark(self, benchmark: dict):
        """Add benchmark with real runtime data only"""
        if benchmark['runtime'] is None:
            raise ValueError(f"Benchmark {benchmark['instance_key']} has no runtime data!")

        self.benchmarks.append(benchmark)
        self.total_runtime += benchmark['runtime']

    def get_total_runtime(self):
        return self.total_runtime

    def __lt__(self, other):
        # For heap operations - compare by total runtime
        return self.total_runtime < other.total_runtime

In [225]:
def first_fit_decreasing(benchmarks: List[dict], num_vms: int) -> List[VMAllocation]:
    """
    First Fit Decreasing bin packing algorithm.
    Uses ONLY benchmarks with real runtime data.
    """
    # Filter to only benchmarks with real runtime data
    runtime_benchmarks = [b for b in benchmarks if b['runtime'] is not None]
    print(f"Using {len(runtime_benchmarks)} benchmarks with real runtime data (filtered from {len(benchmarks)} total)")

    # Create VMs
    vms = [VMAllocation(i) for i in range(num_vms)]

    # Sort benchmarks by runtime (descending)
    sorted_benchmarks = sorted(runtime_benchmarks, key=lambda x: x['runtime'], reverse=True)

    # Assign benchmarks to VMs
    for benchmark in sorted_benchmarks:
        # Find VM with minimum current runtime
        min_vm = min(vms, key=lambda vm: vm.total_runtime)
        min_vm.add_benchmark(benchmark)

    return vms

In [226]:
def longest_processing_time_first(benchmarks: List[dict], num_vms: int) -> List[VMAllocation]:
    """
    Longest Processing Time First algorithm using a min-heap.
    Uses ONLY benchmarks with real runtime data.
    """
    # Filter to only benchmarks with real runtime data
    runtime_benchmarks = [b for b in benchmarks if b['runtime'] is not None]
    print(f"Using {len(runtime_benchmarks)} benchmarks with real runtime data (filtered from {len(benchmarks)} total)")

    # Create VMs and initialize heap
    vms = [VMAllocation(i) for i in range(num_vms)]
    vm_heap = list(vms)  # Min-heap based on total runtime
    heapq.heapify(vm_heap)

    # Sort benchmarks by runtime (descending)
    sorted_benchmarks = sorted(runtime_benchmarks, key=lambda x: x['runtime'], reverse=True)

    # Assign benchmarks
    for benchmark in sorted_benchmarks:
        # Get VM with minimum load
        min_vm = heapq.heappop(vm_heap)
        min_vm.add_benchmark(benchmark)
        # Re-insert VM into heap
        heapq.heappush(vm_heap, min_vm)

    return vms

In [227]:
def balanced_partition(benchmarks: List[dict], num_vms: int, max_runtime_per_vm: float = None) -> List[VMAllocation]:
    """
    Balanced partition algorithm that tries to achieve equal total runtime per VM.
    Uses ONLY benchmarks with real runtime data.

    Args:
        benchmarks: List of benchmark dictionaries with runtime data
        num_vms: Initial number of VMs to create
        max_runtime_per_vm: Maximum runtime allowed per VM (in seconds). If None, no limit.
    """
    # Filter to only benchmarks with real runtime data
    runtime_benchmarks = [b for b in benchmarks if b['runtime'] is not None]
    print(f"Using {len(runtime_benchmarks)} benchmarks with real runtime data (filtered from {len(benchmarks)} total)")

    # Calculate total runtime and target per VM
    total_runtime = sum(b['runtime'] for b in runtime_benchmarks)
    target_runtime_per_vm = total_runtime / num_vms

    # If max_runtime_per_vm is set and target exceeds it, increase num_vms
    if max_runtime_per_vm is not None and target_runtime_per_vm > max_runtime_per_vm:
        original_num_vms = num_vms
        num_vms = int(np.ceil(total_runtime / max_runtime_per_vm))
        target_runtime_per_vm = total_runtime / num_vms
        print(f"⚠️  Target runtime {target_runtime_per_vm/3600:.1f}h exceeds max {max_runtime_per_vm/3600:.1f}h")
        print(f"   Increasing VMs from {original_num_vms} to {num_vms} to respect runtime cap")

    print(f"Target runtime per VM: {target_runtime_per_vm:.1f} seconds ({target_runtime_per_vm/3600:.1f} hours)")
    if max_runtime_per_vm is not None:
        print(f"Max runtime per VM: {max_runtime_per_vm:.1f} seconds ({max_runtime_per_vm/3600:.1f} hours)")

    # Create VMs
    vms = [VMAllocation(i) for i in range(num_vms)]

    # Sort benchmarks by runtime (descending)
    sorted_benchmarks = sorted(runtime_benchmarks, key=lambda x: x['runtime'], reverse=True)

    # Assign benchmarks with balance consideration
    for benchmark in sorted_benchmarks:
        benchmark_runtime = benchmark['runtime']

        # Find VM that would be closest to target after adding this benchmark
        best_vm = None
        best_score = float('inf')

        for vm in vms:
            current_runtime = vm.total_runtime
            after_runtime = current_runtime + benchmark_runtime

            # Skip if this would exceed max runtime (only if max is set)
            if max_runtime_per_vm is not None and after_runtime > max_runtime_per_vm:
                # Check if any VM can still fit this benchmark
                if current_runtime + benchmark_runtime <= max_runtime_per_vm * 1.05:  # Allow 5% overflow
                    pass  # Continue to consider this VM
                else:
                    continue  # Skip this VM

            # Score based on deviation from target
            score = abs(after_runtime - target_runtime_per_vm)

            # Prefer VMs that are under-loaded
            if current_runtime < target_runtime_per_vm:
                score *= 0.8  # Bonus for under-loaded VMs

            if score < best_score:
                best_score = score
                best_vm = vm

        # Add benchmark to best VM (or create new VM if needed)
        if best_vm is None and max_runtime_per_vm is not None:
            # All VMs are at capacity, create a new one
            print(f"⚠️  All VMs at capacity, creating additional VM for benchmark {benchmark['instance_key']}")
            best_vm = VMAllocation(len(vms))
            vms.append(best_vm)

        if best_vm is not None:
            best_vm.add_benchmark(benchmark)
        else:
            print(f"❌ Could not allocate benchmark {benchmark['instance_key']} (runtime: {benchmark_runtime:.1f}s)")

    return vms

## Algorithm Comparison

In [228]:
def analyze_allocation(vms: List[VMAllocation], algorithm_name: str):
    """
    Analyze and print statistics for a VM allocation.
    """
    runtimes = [vm.total_runtime for vm in vms]

    # Filter out empty VMs (should not happen with real runtime data only)
    active_vms = [vm for vm in vms if vm.total_runtime > 0]
    active_runtimes = [vm.total_runtime for vm in active_vms]

    print(f"\n=== {algorithm_name} ===")
    print(f"Total VMs created: {len(vms)}")
    print(f"Active VMs (with benchmarks): {len(active_vms)}")
    print(f"Empty VMs: {len(vms) - len(active_vms)}")

    if len(active_vms) > 0:
        print(f"Total runtime: {sum(active_runtimes):.1f} seconds ({sum(active_runtimes)/3600:.1f} hours)")
        print(f"Average runtime per active VM: {np.mean(active_runtimes):.1f} seconds ({np.mean(active_runtimes)/3600:.1f} hours)")
        print(f"Runtime standard deviation: {np.std(active_runtimes):.1f} seconds ({np.std(active_runtimes)/3600:.1f} hours)")
        print(f"Min runtime: {min(active_runtimes):.1f} seconds ({min(active_runtimes)/3600:.1f} hours)")
        print(f"Max runtime: {max(active_runtimes):.1f} seconds ({max(active_runtimes)/3600:.1f} hours)")
        print(f"Runtime ratio (max/min): {max(active_runtimes)/min(active_runtimes):.2f}")

        # Efficiency (how balanced the allocation is)
        efficiency = 1 - (np.std(active_runtimes) / np.mean(active_runtimes))
        print(f"Load balance efficiency: {efficiency:.3f} (1.0 = perfect balance)")
    else:
        print("No active VMs found!")
        efficiency = 0

    return {
        'algorithm': algorithm_name,
        'total_runtime': sum(active_runtimes) if active_vms else 0,
        'std_runtime': np.std(active_runtimes) if active_vms else 0,
        'max_runtime': max(active_runtimes) if active_vms else 0,
        'min_runtime': min(active_runtimes) if active_vms else 0,
        'efficiency': efficiency,
        'runtimes': runtimes,
        'active_vms': len(active_vms),
        'num_vms': len(vms)
    }

In [229]:
# Use ONLY benchmarks that have real runtime data - no estimation!
benchmarks_with_runtime = [b for b in all_benchmark_instances if b['runtime'] is not None]
print(f"Using {len(benchmarks_with_runtime)} benchmarks with real HiGHS runtime data")
print(f"Excluded {len(all_benchmark_instances) - len(benchmarks_with_runtime)} benchmarks without runtime data")
print(f"Total runtime of included benchmarks: {sum(b['runtime'] for b in benchmarks_with_runtime)/3600:.1f} hours")

if MAX_RUNTIME_PER_VM_SECONDS is not None:
    print(f"\n⚙️  Runtime cap enabled: {MAX_RUNTIME_PER_VM_SECONDS} seconds ({MAX_RUNTIME_PER_VM_SECONDS/3600:.1f} hours) per VM")
else:
    print(f"\n⚙️  No runtime cap configured (unlimited)")

# Separate L-size benchmarks for highmem machines
l_size_benchmarks = [b for b in benchmarks_with_runtime if b['size_category'] == 'L']
non_l_benchmarks = [b for b in benchmarks_with_runtime if b['size_category'] != 'L']

print(f"\nBenchmark separation by size category:")
print(f"  L-size (highmem): {len(l_size_benchmarks)} benchmarks, {sum(b['runtime'] for b in l_size_benchmarks)/3600:.1f} hours")
print(f"  S/M-size (standard): {len(non_l_benchmarks)} benchmarks, {sum(b['runtime'] for b in non_l_benchmarks)/3600:.1f} hours")

# Test different numbers of VMs for each category
results = []

# L-size benchmarks (fewer VMs since they need highmem)
print(f"\n{'='*50}")
print(f"TESTING L-SIZE BENCHMARKS (HIGHMEM MACHINES)")
print(f"{'='*50}")

l_vm_options = [2, 3, 4, 5] if len(l_size_benchmarks) > 0 else [1]
for num_vms in l_vm_options:
    if len(l_size_benchmarks) == 0:
        print("No L-size benchmarks with runtime data")
        break

    print(f"\nTesting {num_vms} highmem VMs for L-size benchmarks:")

    bp_vms = balanced_partition(l_size_benchmarks, num_vms, MAX_RUNTIME_PER_VM_SECONDS)
    bp_result = analyze_allocation(bp_vms, f"L-size Balanced Partition ({num_vms} VMs)")
    bp_result['num_vms'] = num_vms
    bp_result['size_category'] = 'L'
    results.append(bp_result)

# S/M-size benchmarks (more VMs with standard machines)
print(f"\n{'='*50}")
print(f"TESTING S/M-SIZE BENCHMARKS (STANDARD MACHINES)")
print(f"{'='*50}")

sm_vm_options = [8, 10, 12, 15]
for num_vms in sm_vm_options:
    if len(non_l_benchmarks) == 0:
        print("No S/M-size benchmarks with runtime data")
        break

    print(f"\nTesting {num_vms} standard VMs for S/M-size benchmarks:")

    bp_vms = balanced_partition(non_l_benchmarks, num_vms, MAX_RUNTIME_PER_VM_SECONDS)
    bp_result = analyze_allocation(bp_vms, f"S/M-size Balanced Partition ({num_vms} VMs)")
    bp_result['num_vms'] = num_vms
    bp_result['size_category'] = 'S/M'
    results.append(bp_result)

Using 13 benchmarks with real HiGHS runtime data
Excluded 0 benchmarks without runtime data
Total runtime of included benchmarks: 7.0 hours

⚙️  Runtime cap enabled: 3600 seconds (1.0 hours) per VM

Benchmark separation by size category:
  L-size (highmem): 0 benchmarks, 0.0 hours
  S/M-size (standard): 13 benchmarks, 7.0 hours

TESTING L-SIZE BENCHMARKS (HIGHMEM MACHINES)
No L-size benchmarks with runtime data

TESTING S/M-SIZE BENCHMARKS (STANDARD MACHINES)

Testing 8 standard VMs for S/M-size benchmarks:
Using 13 benchmarks with real runtime data (filtered from 13 total)
Target runtime per VM: 3152.1 seconds (0.9 hours)
Max runtime per VM: 3600.0 seconds (1.0 hours)

=== S/M-size Balanced Partition (8 VMs) ===
Total VMs created: 8
Active VMs (with benchmarks): 8
Empty VMs: 0
Total runtime: 25217.0 seconds (7.0 hours)
Average runtime per active VM: 3152.1 seconds (0.9 hours)
Runtime standard deviation: 276.7 seconds (0.1 hours)
Min runtime: 2706.9 seconds (0.8 hours)
Max runtime: 357

## Results Summary

In [230]:
# Print summary comparison table
print("\\n" + "="*80)
print("ALGORITHM COMPARISON SUMMARY")
print("="*80)

df_results = pd.DataFrame(results)

# Separate results by size category
l_results = df_results[df_results['size_category'] == 'L'] if 'size_category' in df_results.columns else pd.DataFrame()
sm_results = df_results[df_results['size_category'] == 'S/M'] if 'size_category' in df_results.columns else df_results

print(f"\\n{'Size':<6} {'VM Count':<9} {'Algorithm':<25} {'Efficiency':<12} {'Max Runtime (h)':<15} {'Std Dev (h)':<12}")
print("-" * 85)

for _, row in df_results.iterrows():
    size_cat = row.get('size_category', 'Mixed')
    alg_name = row['algorithm'].split('(')[0].strip()
    print(f"{size_cat:<6} {row['num_vms']:<9} {alg_name:<25} "
          f"{row['efficiency']:.3f}{'':8} {row['max_runtime']/3600:.1f}{'':12} "
          f"{row['std_runtime']/3600:.1f}")

# Find best configurations for each size category
print(f"\\n{'='*80}")
print("BEST CONFIGURATIONS:")
print(f"{'='*80}")

if len(l_results) > 0:
    best_l = l_results.loc[l_results['efficiency'].idxmax()]
    print(f"Best L-size (highmem): {best_l['num_vms']} VMs (efficiency: {best_l['efficiency']:.3f})")

if len(sm_results) > 0:
    best_sm = sm_results.loc[sm_results['efficiency'].idxmax()]
    print(f"Best S/M-size (standard): {best_sm['num_vms']} VMs (efficiency: {best_sm['efficiency']:.3f})")

# Calculate total deployment
if len(l_results) > 0 and len(sm_results) > 0:
    total_vms = best_l['num_vms'] + best_sm['num_vms']
    total_efficiency = (best_l['efficiency'] + best_sm['efficiency']) / 2
    print(f"\\nTotal deployment: {total_vms} VMs ({best_l['num_vms']} highmem + {best_sm['num_vms']} standard)")
    print(f"Average efficiency: {total_efficiency:.3f}")
elif len(sm_results) > 0:
    print(f"\\nTotal deployment: {best_sm['num_vms']} standard VMs only")
    print(f"Efficiency: {best_sm['efficiency']:.3f}")

ALGORITHM COMPARISON SUMMARY
\nSize   VM Count  Algorithm                 Efficiency   Max Runtime (h) Std Dev (h) 
-------------------------------------------------------------------------------------
S/M    8         S/M-size Balanced Partition 0.912         1.0             0.1
S/M    10        S/M-size Balanced Partition 0.858         0.9             0.1
S/M    12        S/M-size Balanced Partition 0.846         0.8             0.1
S/M    15        S/M-size Balanced Partition 0.760         0.8             0.1
BEST CONFIGURATIONS:
Best S/M-size (standard): 8 VMs (efficiency: 0.912)
\nTotal deployment: 8 standard VMs only
Efficiency: 0.912


## Generate Optimal Allocation

In [231]:
# Generate optimal allocations for both size categories
print(f"\n\nGenerating optimal allocations with size-based machine separation...")

if MAX_RUNTIME_PER_VM_SECONDS is not None:
    print(f"Runtime cap: {MAX_RUNTIME_PER_VM_SECONDS}s ({MAX_RUNTIME_PER_VM_SECONDS/3600:.1f}h) per VM")

optimal_l_vms = []
optimal_sm_vms = []
best_l_result = None
best_sm_result = None

# Generate L-size allocation (highmem machines)
if len(l_results) > 0:
    best_l_result = l_results.loc[l_results['efficiency'].idxmax()]
    optimal_l_num_vms = best_l_result['num_vms']

    print(f"\nL-size benchmarks: {optimal_l_num_vms} highmem VMs")
    print(f"Efficiency: {best_l_result['efficiency']:.3f}")
    print(f"Max VM runtime: {best_l_result['max_runtime']/3600:.1f} hours")

    optimal_l_vms = balanced_partition(l_size_benchmarks, optimal_l_num_vms, MAX_RUNTIME_PER_VM_SECONDS)
    l_final_result = analyze_allocation(optimal_l_vms, f"Final L-size Allocation - Highmem")

# Generate S/M-size allocation (standard machines)
if len(sm_results) > 0:
    best_sm_result = sm_results.loc[sm_results['efficiency'].idxmax()]
    optimal_sm_num_vms = best_sm_result['num_vms']

    print(f"\nS/M-size benchmarks: {optimal_sm_num_vms} standard VMs")
    print(f"Efficiency: {best_sm_result['efficiency']:.3f}")
    print(f"Max VM runtime: {best_sm_result['max_runtime']/3600:.1f} hours")

    optimal_sm_vms = balanced_partition(non_l_benchmarks, optimal_sm_num_vms, MAX_RUNTIME_PER_VM_SECONDS)
    sm_final_result = analyze_allocation(optimal_sm_vms, f"Final S/M-size Allocation - Standard")

# Combined summary
total_vms = len(optimal_l_vms) + len(optimal_sm_vms)
total_runtime = sum(vm.total_runtime for vm in optimal_l_vms + optimal_sm_vms)

print(f"\n{'='*60}")
print("FINAL ALLOCATION SUMMARY")
print(f"{'='*60}")
print(f"Total VMs: {total_vms}")
print(f"  - Highmem VMs (L-size): {len(optimal_l_vms)}")
print(f"  - Standard VMs (S/M-size): {len(optimal_sm_vms)}")
print(f"Total allocated runtime: {total_runtime/3600:.1f} hours")
print(f"Machine separation ensures optimal resource utilization")



Generating optimal allocations with size-based machine separation...
Runtime cap: 3600s (1.0h) per VM

S/M-size benchmarks: 8 standard VMs
Efficiency: 0.912
Max VM runtime: 1.0 hours
Using 13 benchmarks with real runtime data (filtered from 13 total)
Target runtime per VM: 3152.1 seconds (0.9 hours)
Max runtime per VM: 3600.0 seconds (1.0 hours)

=== Final S/M-size Allocation - Standard ===
Total VMs created: 8
Active VMs (with benchmarks): 8
Empty VMs: 0
Total runtime: 25217.0 seconds (7.0 hours)
Average runtime per active VM: 3152.1 seconds (0.9 hours)
Runtime standard deviation: 276.7 seconds (0.1 hours)
Min runtime: 2706.9 seconds (0.8 hours)
Max runtime: 3570.5 seconds (1.0 hours)
Runtime ratio (max/min): 1.32
Load balance efficiency: 0.912 (1.0 = perfect balance)

FINAL ALLOCATION SUMMARY
Total VMs: 8
  - Highmem VMs (L-size): 0
  - Standard VMs (S/M-size): 8
Total allocated runtime: 7.0 hours
Machine separation ensures optimal resource utilization


## Export Configuration

In [232]:
# Export the allocation to YAML files for infrastructure
# NOTE: This exports ONLY benchmarks with real runtime data, separated by size category
# ONLY exports VMs that have benchmarks assigned (skips empty VMs)
output_dir = Path('infrastructure/benchmarks/runtime_optimized')
output_dir.mkdir(exist_ok=True, parents=True)

# Clear existing files
for file in output_dir.glob('*.yaml'):
    file.unlink()

exported_vms = 0
total_benchmarks_exported = 0

# Filter to only VMs with benchmarks
active_l_vms = [vm for vm in optimal_l_vms if vm.benchmarks]
active_sm_vms = [vm for vm in optimal_sm_vms if vm.benchmarks]

print(f"Exporting {len(active_l_vms)} highmem VMs and {len(active_sm_vms)} standard VMs (skipping empty VMs)\n")

# Export L-size VMs (highmem machines)
for vm_idx, vm in enumerate(active_l_vms):
    # L-size benchmarks always get highmem machines
    machine_type = 'c4-highmem-8'
    years = [2025]  # Include highs-hipo for L benchmarks

    # Create benchmark structure with runtime metadata
    benchmarks_dict = {}
    for benchmark in vm.benchmarks:
        benchmark_name = benchmark['name']
        if benchmark_name not in benchmarks_dict:
            benchmarks_dict[benchmark_name] = {'Sizes': []}

        size_entry = {
            'Name': benchmark['size_name'],
            'Size': benchmark['size_category'],
            'URL': benchmark['url'],
            '_runtime_s': round(benchmark['runtime'], 2)  # Add runtime for cross-checking
        }
        benchmarks_dict[benchmark_name]['Sizes'].append(size_entry)

    # Create YAML content with total runtime metadata
    yaml_content = {
        'machine-type': machine_type,
        'years': years,
        '_total_runtime_s': round(vm.total_runtime, 2),  # Total runtime for this VM
        '_total_runtime_h': round(vm.total_runtime / 3600, 2),  # In hours for readability
        '_num_benchmarks': len(vm.benchmarks),
        'benchmarks': benchmarks_dict
    }

    # Write to file
    filename = f'highmem_vm_{vm_idx:02d}.yaml'
    with open(output_dir / filename, 'w') as f:
        yaml.safe_dump(yaml_content, f, default_flow_style=False, sort_keys=False)

    print(f"Exported {filename}: {len(vm.benchmarks)} L-size benchmarks, "
          f"{vm.total_runtime/3600:.1f}h runtime")

    total_benchmarks_exported += len(vm.benchmarks)

# Export S/M-size VMs (standard machines)
for vm_idx, vm in enumerate(active_sm_vms):
    # S/M-size benchmarks get standard machines
    machine_type = 'c4-standard-2'
    # years = [2020, 2022, 2023, 2024, 2025]
    years = [2025]

    # Create benchmark structure with runtime metadata
    benchmarks_dict = {}
    for benchmark in vm.benchmarks:
        benchmark_name = benchmark['name']
        if benchmark_name not in benchmarks_dict:
            benchmarks_dict[benchmark_name] = {'Sizes': []}

        size_entry = {
            'Name': benchmark['size_name'],
            'Size': benchmark['size_category'],
            'URL': benchmark['url'],
            '_runtime_s': round(benchmark['runtime'], 2)  # Add runtime for cross-checking
        }
        benchmarks_dict[benchmark_name]['Sizes'].append(size_entry)

    # Create YAML content with total runtime metadata
    yaml_content = {
        'machine-type': machine_type,
        'years': years,
        '_total_runtime_s': round(vm.total_runtime, 2),  # Total runtime for this VM
        '_total_runtime_h': round(vm.total_runtime / 3600, 2),  # In hours for readability
        '_num_benchmarks': len(vm.benchmarks),
        'benchmarks': benchmarks_dict
    }

    # Write to file
    filename = f'highs_prelim_test_{vm_idx:02d}.yaml'
    with open(output_dir / filename, 'w') as f:
        yaml.safe_dump(yaml_content, f, default_flow_style=False, sort_keys=False)

    print(f"Exported {filename}: {len(vm.benchmarks)} S/M-size benchmarks, "
          f"{vm.total_runtime/3600:.1f}h runtime")

    total_benchmarks_exported += len(vm.benchmarks)

total_exported_vms = len(active_l_vms) + len(active_sm_vms)

print(f"\n{'='*70}")
print(f"Configuration files written to {output_dir}/")
print(f"Total VMs exported: {total_exported_vms} (skipped {len(optimal_l_vms) + len(optimal_sm_vms) - total_exported_vms} empty VMs)")
print(f"  - Highmem VMs: {len(active_l_vms)}")
print(f"  - Standard VMs: {len(active_sm_vms)}")
print(f"Total benchmarks exported: {total_benchmarks_exported}")
print(f"Total runtime allocated: {sum(vm.total_runtime for vm in active_l_vms + active_sm_vms)/3600:.1f} hours")
print(f"\nMACHINE SEPARATION POLICY:")
print(f"  - L-size benchmarks → c4-highmem-8 (high memory for large problems)")
print(f"  - S/M-size benchmarks → c4-standard-2 (cost-effective for smaller problems)")
print(f"\nNOTE: Only benchmarks with real HiGHS runtime data were included.")
print(f"      Runtime metadata added with '_runtime_s' and '_total_runtime_s' keys for cross-checking.")

Exporting 0 highmem VMs and 8 standard VMs (skipping empty VMs)

Exported highs_prelim_test_00.yaml: 1 S/M-size benchmarks, 0.8h runtime
Exported highs_prelim_test_01.yaml: 1 S/M-size benchmarks, 0.8h runtime
Exported highs_prelim_test_02.yaml: 1 S/M-size benchmarks, 0.8h runtime
Exported highs_prelim_test_03.yaml: 2 S/M-size benchmarks, 0.9h runtime
Exported highs_prelim_test_04.yaml: 2 S/M-size benchmarks, 0.9h runtime
Exported highs_prelim_test_05.yaml: 2 S/M-size benchmarks, 0.9h runtime
Exported highs_prelim_test_06.yaml: 2 S/M-size benchmarks, 1.0h runtime
Exported highs_prelim_test_07.yaml: 2 S/M-size benchmarks, 1.0h runtime

Configuration files written to infrastructure/benchmarks/runtime_optimized/
Total VMs exported: 8 (skipped 0 empty VMs)
  - Highmem VMs: 0
  - Standard VMs: 8
Total benchmarks exported: 13
Total runtime allocated: 7.0 hours

MACHINE SEPARATION POLICY:
  - L-size benchmarks → c4-highmem-8 (high memory for large problems)
  - S/M-size benchmarks → c4-standar