In [1]:
import time
import json
import requests

BASE_URL = "http://127.0.0.1:8000"
MEASURE_URL = f"{BASE_URL}/api/v1/measure"
TASK_URL = f"{BASE_URL}/api/v1/tasks"


In [2]:
# Simple function to square a number
candidate_code = """\
def solve(x):
    return x * x
"""

def make_tests(x: int):
    # Two trivial tests per job
    return [
        {"test_id": f"t1-{x}", "inputs": [x], "expected_output": x * x},
        {"test_id": f"t2-{x}", "inputs": [x], "expected_output": x * x},
    ]


In [3]:
def queue_measure(x: int, trials: int = 3, warmup: int = 1, timeout: int = 5):
    payload = {
        "candidate_code": candidate_code,
        "function_name": "solve",
        "test_cases": make_tests(x),
        "timeout_seconds": timeout,
        "energy_measurement_trials": trials,
        "warmup_trials": warmup,
    }
    r = requests.post(MEASURE_URL, json=payload, timeout=30)
    r.raise_for_status()
    return r.json()  # contains task_id, poll_url, etc.


In [4]:
inputs = [2, 7, 11, 13, 17, 19, 23, 29]  # adjust as desired
queued = [queue_measure(x) for x in inputs]
task_map = {item["task_id"]: x for item, x in zip(queued, inputs)}

print(f"Queued {len(task_map)} tasks")
for task_id, x in task_map.items():
    print(task_id, "=>", x)


Queued 8 tasks
f1a42358-3652-4deb-930b-b97a3f7e2fca => 2
2720c8ea-e042-4f38-9cea-56fbcc841d90 => 7
791e81ee-8954-4ae4-aa4f-00311695a2c1 => 11
5099fb65-6868-4e1c-a202-cc656af3e53e => 13
c6183394-04ba-41fb-a3a0-6e43ab32a447 => 17
597becf6-6351-4d6f-ae96-2ea901bd75bf => 19
5989aa83-9422-4cbd-84a2-be6b3d00eaa1 => 23
8f2f8119-c17e-4f53-a412-30e2434883ae => 29


In [5]:
def get_task(task_id: str):
    r = requests.get(f"{TASK_URL}/{task_id}", timeout=30)
    r.raise_for_status()
    return r.json()

def is_done(resp: dict) -> bool:
    # API returns status: queued | running | completed | failed
    return resp.get("status") in {"completed", "failed"}


In [6]:
results = {}
pending = set(task_map.keys())
start = time.time()
poll_interval = 1.0
max_wait_seconds = 300  # 5 minutes cap

while pending and (time.time() - start) < max_wait_seconds:
    done_now = []
    for task_id in list(pending):
        try:
            resp = get_task(task_id)
            status = resp.get("status")
            print(f"{task_id[:8]}… status={status}")
            if is_done(resp):
                results[task_id] = resp
                done_now.append(task_id)
        except Exception as e:
            print(f"{task_id[:8]}… poll error: {e}")
    for t in done_now:
        pending.remove(t)
    if pending:
        time.sleep(poll_interval)

print(f"\nCompleted {len(results)}/{len(task_map)}")
if pending:
    print("Still pending:", list(pending))


c6183394… status=completed
8f2f8119… status=completed
5099fb65… status=completed
5989aa83… status=completed
597becf6… status=completed
2720c8ea… status=failed
791e81ee… status=completed
f1a42358… status=failed

Completed 8/8


In [7]:
def pretty_energy(resp: dict):
    em = resp.get("energy_metrics") or {}
    return {
        "median_pkg_J": em.get("median_package_energy_joules"),
        "median_ram_J": em.get("median_ram_energy_joules"),
        "median_total_J": em.get("median_total_energy_joules"),
        "median_time_s": em.get("median_execution_time_seconds"),
        "power_W": em.get("power_consumption_watts"),
    }

rows = []
for task_id, x in task_map.items():
    resp = results.get(task_id)
    if not resp:
        rows.append({"task_id": task_id, "x": x, "status": "missing"})
        continue

    status = resp.get("status")
    if status == "completed":
        rows.append({
            "task_id": task_id,
            "x": x,
            "status": status,
            **pretty_energy(resp),
        })
    else:
        rows.append({
            "task_id": task_id,
            "x": x,
            "status": status,
            "error_type": resp.get("error_type"),
            "error_message": resp.get("error_message"),
        })

from pprint import pprint
pprint(rows)


[{'error_message': 'Energy measurement failed',
  'error_type': 'incorrect_solution',
  'status': 'failed',
  'task_id': 'f1a42358-3652-4deb-930b-b97a3f7e2fca',
  'x': 2},
 {'error_message': 'Energy measurement failed',
  'error_type': 'incorrect_solution',
  'status': 'failed',
  'task_id': '2720c8ea-e042-4f38-9cea-56fbcc841d90',
  'x': 7},
 {'median_pkg_J': 1.4099999999999999e-09,
  'median_ram_J': 1.8e-10,
  'median_time_s': 2.2040003386791795e-06,
  'median_total_J': 1.5899999999999999e-09,
  'power_W': 0.0007214154971286711,
  'status': 'completed',
  'task_id': '791e81ee-8954-4ae4-aa4f-00311695a2c1',
  'x': 11},
 {'median_pkg_J': 1.52e-09,
  'median_ram_J': 1.9e-10,
  'median_time_s': 2.3070006136549637e-06,
  'median_total_J': 1.7099999999999999e-09,
  'power_W': 0.0007412221695471766,
  'status': 'completed',
  'task_id': '5099fb65-6868-4e1c-a202-cc656af3e53e',
  'x': 13},
 {'median_pkg_J': 1.6e-09,
  'median_ram_J': 1.8e-10,
  'median_time_s': 2.1670002752216533e-06,
  'median

In [8]:
import statistics as stats

completed = [r for r in rows if r.get("status") == "completed"]
if not completed:
    print("No completed tasks to summarize.")
else:
    total_J = [r["median_total_J"] for r in completed if r.get("median_total_J") is not None]
    time_s = [r["median_time_s"] for r in completed if r.get("median_time_s") is not None]
    power_W = [r["power_W"] for r in completed if r.get("power_W") is not None]

    summary = {
        "tasks_completed": len(completed),
        "median_total_J": (stats.median(total_J) if total_J else None),
        "median_time_s": (stats.median(time_s) if time_s else None),
        "median_power_W": (stats.median(power_W) if power_W else None),
    }
    pprint(summary)


{'median_power_W': 0.0007420001792236132,
 'median_time_s': 2.2549993445863947e-06,
 'median_total_J': 1.665e-09,
 'tasks_completed': 6}


In [43]:
# Helpers + candidate implementations
import requests, time
from pprint import pprint

try:
    BASE_URL
except NameError:
    BASE_URL = "http://127.0.0.1:8000"
MEASURE_URL = f"{BASE_URL}/api/v1/measure"
TASK_URL = f"{BASE_URL}/api/v1/tasks"

def fib_ref(n: int) -> int:
    a, b = 0, 1
    for _ in range(n):
        a, b = b, a + b
    return a

candidate_code_fast = """\
def solve(n):
    a, b = 0, 1
    for _ in range(n):
        a, b = b, a + b
    return a
"""

candidate_code_slow = """\
def solve(n):
    if n < 2:
        return n
    return solve(n-1) + solve(n-2)
"""


In [44]:
def make_tests_fib(ns):
    return [
        {"test_id": f"fib-{n}", "inputs": [n], "expected_output": fib_ref(n)}
        for n in ns
    ]

# Choose n values (keep slow ones modest)
# fast_ns = [28, 32, 36]
# slow_ns = [22, 24]  # naive recursion; keep small to avoid timeouts

fast_ns = [100, 54 , 60]
slow_ns = [100, 54 , 60]  # naive recursion; keep small to avoid timeouts


In [45]:
def queue_measure_fib(candidate_code: str, ns, trials: int, warmup: int, timeout: int):
    payload = {
        "candidate_code": candidate_code,
        "function_name": "solve",
        "test_cases": make_tests_fib(ns),
        "timeout_seconds": timeout,
        "energy_measurement_trials": trials,
        "warmup_trials": warmup,
    }
    r = requests.post(MEASURE_URL, json=payload, timeout=30)
    r.raise_for_status()
    return r.json()  # includes task_id, poll_url


In [46]:
jobs = {}
jobs["fast"] = {
    "ns": fast_ns,
    "enqueue": queue_measure_fib(candidate_code_fast, fast_ns, trials=3, warmup=1, timeout=5),
}
jobs["slow"] = {
    "ns": slow_ns,
    "enqueue": queue_measure_fib(candidate_code_slow, slow_ns, trials=1, warmup=0, timeout=8),
}

for name, info in jobs.items():
    print(name, "=> task_id:", info["enqueue"]["task_id"], "ns:", info["ns"])


fast => task_id: 99788410-c626-4632-a260-da678b4b73bc ns: [100, 54, 60]
slow => task_id: 9e91b09a-76cc-45ae-a9d1-25020d2b7476 ns: [100, 54, 60]


In [41]:
def get_task(task_id: str):
    r = requests.get(f"{TASK_URL}/{task_id}", timeout=30)
    r.raise_for_status()
    return r.json()

def is_done(resp: dict) -> bool:
    return resp.get("status") in {"completed", "failed"}

def pretty_energy(resp: dict):
    em = resp.get("energy_metrics") or {}
    return {
        "median_pkg_J": em.get("median_package_energy_joules"),
        "median_ram_J": em.get("median_ram_energy_joules"),
        "median_total_J": em.get("median_total_energy_joules"),
        "median_time_s": em.get("median_execution_time_seconds"),
        "power_W": em.get("power_consumption_watts"),
    }


In [48]:
results = {}
start = time.time()
while set(results.keys()) != set(jobs.keys()):
    for name, info in jobs.items():
        if name in results:
            continue
        task_id = info["enqueue"]["task_id"]
        resp = get_task(task_id)
        print(f"{name}: {resp.get('status')}")
        if is_done(resp):
            results[name] = resp
    if set(results.keys()) != set(jobs.keys()):
        time.sleep(1.0)

print("\nDone:")
pprint({k: v.get("status") for k, v in results.items()})


fast: completed
slow: failed

Done:
{'fast': 'completed', 'slow': 'failed'}


In [49]:
rows = {}
for name, resp in results.items():
    if resp.get("status") == "completed":
        rows[name] = {
            "status": "completed",
            **pretty_energy(resp),
            "ns": jobs[name]["ns"],
        }
    else:
        rows[name] = {
            "status": resp.get("status"),
            "error_type": resp.get("error_type"),
            "error_message": resp.get("error_message"),
            "ns": jobs[name]["ns"],
        }

pprint(rows)


{'fast': {'median_pkg_J': 1.31e-09,
          'median_ram_J': 1.7e-10,
          'median_time_s': 1.2283999240025878e-05,
          'median_total_J': 1.48e-09,
          'ns': [100, 54, 60],
          'power_W': 0.00012048193516469822,
          'status': 'completed'},
 'slow': {'error_message': 'Energy measurement failed',
          'error_type': 'incorrect_solution',
          'ns': [100, 54, 60],
          'status': 'failed'}}


In [50]:
# Median

In [51]:
import time, requests
from pprint import pprint

BASE_URL = "http://127.0.0.1:8000"
MEASURE_URL = f"{BASE_URL}/api/v1/measure"
TASK_URL = f"{BASE_URL}/api/v1/tasks"

def get_task(task_id: str):
    r = requests.get(f"{TASK_URL}/{task_id}", timeout=30)
    r.raise_for_status()
    return r.json()

def is_done(resp: dict) -> bool:
    return resp.get("status") in {"completed", "failed"}


In [62]:
candidate_slow_class = """\
from typing import List

class Solution:
    def findMedianSortedArrays(self, nums1: List[int], nums2: List[int]) -> float:
        merged = []
        i = j = 0
        while i < len(nums1) and j < len(nums2):
            if nums1[i] < nums2[j]:
                merged.append(nums1[i]); i += 1
            else:
                merged.append(nums2[j]); j += 1
        while i < len(nums1):
            merged.append(nums1[i]); i += 1
        while j < len(nums2):
            merged.append(nums2[j]); j += 1
        n = len(merged)
        mid = n // 2
        if n % 2 == 0:
            return (merged[mid-1] + merged[mid]) / 2.0
        else:
            return float(merged[mid])

def findMedianSortedArrays(nums1, nums2):
    # top-level wrapper so the service can call this function name
    return Solution().findMedianSortedArrays(nums1, nums2)
"""



In [63]:
candidate_optimal = """\
def findMedianSortedArrays(nums1, nums2):
    A, B = nums1, nums2
    m, n = len(A), len(B)
    if m > n:
        A, B, m, n = B, A, n, m
    total = m + n
    half = total // 2

    lo, hi = 0, m
    while lo <= hi:
        i = (lo + hi) // 2
        j = half - i

        Aleft = A[i-1] if i > 0 else float('-inf')
        Aright = A[i] if i < m else float('inf')
        Bleft = B[j-1] if j > 0 else float('-inf')
        Bright = B[j] if j < n else float('inf')

        if Aleft <= Bright and Bleft <= Aright:
            if total % 2:
                return float(min(Aright, Bright))
            return (max(Aleft, Bleft) + min(Aright, Bright)) / 2.0
        elif Aleft > Bright:
            hi = i - 1
        else:
            lo = i + 1

    raise ValueError("Invalid input")
"""


In [65]:
cases = [
    ([20, 67], [37, 85], 52.0),
    ([1, 2, 13, 22, 34, 46, 63, 86], [59, 80], 40.0),
    ([8, 57, 82, 87], [8, 18, 20, 23, 40, 41, 54, 63, 72, 93], 47.5),
    ([3, 36, 78], [13, 20, 28, 45, 59, 89], 36.0),
    ([17, 34, 43, 48, 51, 53, 83, 88], [43, 48, 54, 78, 88], 51.0),
    ([21, 28, 53, 63, 94], [6, 9, 25, 27, 31, 55, 60, 67, 78], 42.0),
    ([17, 50, 54], [1, 20, 46, 52, 53, 62, 63, 66, 70], 52.5),
    ([11, 13, 15, 27, 29, 55, 74, 89, 96], [9, 39, 72], 34.0),
    ([21, 24, 26, 33, 50, 53, 68, 87, 100], [1, 6, 12, 14, 34, 64, 81, 92, 93], 42.0),
    ([12, 29, 31, 38, 45, 47, 73, 76, 89, 92], [2, 22, 57, 85], 46.0),
]

def make_tests(cases):
    return [
        {
            "test_id": f"m-{i}",
            "inputs": [a, b],           # two positional args
            "expected_output": exp
        }
        for i, (a, b, exp) in enumerate(cases, 1)
    ]

tests = make_tests(cases)


In [66]:
import requests, time
from pprint import pprint

BASE_URL = "http://127.0.0.1:8000"
MEASURE_URL = f"{BASE_URL}/api/v1/measure"
TASK_URL = f"{BASE_URL}/api/v1/tasks"

def get_task(task_id: str):
    r = requests.get(f"{TASK_URL}/{task_id}", timeout=30)
    r.raise_for_status()
    return r.json()

def is_done(resp: dict) -> bool:
    return resp.get("status") in {"completed", "failed"}

def queue_job(code: str, tests: list, timeout=15, trials=1, warmup=0, mem_mb=1024):
    payload = {
        "candidate_code": code,
        "function_name": "findMedianSortedArrays",
        "test_cases": tests,
        "timeout_seconds": timeout,
        "memory_limit_mb": mem_mb,
        "energy_measurement_trials": trials,
        "warmup_trials": warmup,
    }
    r = requests.post(MEASURE_URL, json=payload, timeout=60)
    r.raise_for_status()
    return r.json()


In [69]:
jobs = {
    "slow_class": queue_job(candidate_slow_class, tests_moderate, timeout=15, trials=3, warmup=1),
    "optimal":    queue_job(candidate_optimal,    tests_moderate, timeout=15, trials=3, warmup=1),
}
pprint({k: v["task_id"] for k, v in jobs.items()})

results = {}
while set(results) != set(jobs):
    for name, q in jobs.items():
        if name in results: 
            continue
        resp = get_task(q["task_id"])
        print(f"{name}: {resp.get('status')}")
        if is_done(resp):
            results[name] = resp
    if set(results) != set(jobs):
        time.sleep(1.0)

print("\nDone:")
pprint({k: v.get("status") for k, v in results.items()})


{'optimal': '761a98e2-5af1-43b1-8718-48e67dc1eeac',
 'slow_class': 'fbcdca93-12c5-4ccc-ad40-cf59139d04a7'}
slow_class: failed
optimal: running
optimal: completed

Done:
{'optimal': 'completed', 'slow_class': 'failed'}


In [71]:
def energy_row(resp: dict):
    em = resp.get("energy_metrics") or {}
    return {
        "median_pkg_J": em.get("median_package_energy_joules"),
        "median_ram_J": em.get("median_ram_energy_joules"),
        "median_total_J": em.get("median_total_energy_joules"),
        "median_time_s": em.get("median_execution_time_seconds"),
        "power_W": em.get("power_consumption_watts"),
    }

rows = {}
for name, resp in results.items():
    if resp.get("status") == "completed":
        rows[name] = {"status": "completed", **energy_row(resp)}
    else:
        rows[name] = {"status": resp.get("status"),
                      "error_type": resp.get("error_type"),
                      "error_message": resp.get("error_message")}
pprint(rows)

# Simple winner by energy (lower is better), then time
def pick_winner(rows):
    a, b = rows.get("brute"), rows.get("optimal")
    if not a or not b or a.get("status") != "completed" or b.get("status") != "completed":
        return "inconclusive"
    Ea, Eb = a["median_total_J"], b["median_total_J"]
    Ta, Tb = a["median_time_s"], b["median_time_s"]
    if Ea is None or Eb is None:
        return "inconclusive"
    if abs(Ea - Eb) > 0:  # prefer lower energy
        return "brute" if Ea < Eb else "optimal"
    if Ta is not None and Tb is not None:
        return "brute" if Ta < Tb else "optimal"
    return "inconclusive"

print("Winner:", pick_winner(rows))


{'optimal': {'median_pkg_J': 8.5e-10,
             'median_ram_J': 9e-11,
             'median_time_s': 1.8553999325376935e-05,
             'median_total_J': 9.4e-10,
             'power_W': 5.066293166855569e-05,
             'status': 'completed'},
 'slow_class': {'error_message': 'Energy measurement failed',
                'error_type': 'incorrect_solution',
                'status': 'failed'}}
Winner: inconclusive


In [72]:
import random

def make_sorted_array(n, low=0, high=10**6, seed=None):
    rng = random.Random(seed)
    arr = [rng.randint(low, high) for _ in range(n)]
    arr.sort()
    return arr

def median_two_sorted(a, b):
    # O(m+n) single pass to the median position
    m, n = len(a), len(b)
    total = m + n
    i = j = 0
    prev = curr = None
    target = total // 2
    for k in range(target + 1):
        prev = curr
        if i < m and (j >= n or a[i] <= b[j]):
            curr = a[i]; i += 1
        else:
            curr = b[j]; j += 1
    if total % 2:
        return float(curr)
    return (prev + curr) / 2.0


In [73]:
# Choose larger sizes; keep it to 1–2 cases for runtime
sizes = [(50_000, 50_000), (100_000, 50_000)]  # (len(nums1), len(nums2))

heavy_cases = []
for idx, (n1, n2) in enumerate(sizes, 1):
    a = make_sorted_array(n1, seed=10_000 + idx)
    b = make_sorted_array(n2, seed=20_000 + idx)
    exp = median_two_sorted(a, b)
    heavy_cases.append((a, b, exp))

def make_tests_from_cases(cases):
    return [
        {"test_id": f"heavy-{i}", "inputs": [a, b], "expected_output": exp}
        for i, (a, b, exp) in enumerate(cases, 1)
    ]

heavy_tests = make_tests_from_cases(heavy_cases)
len(heavy_tests), [t["test_id"] for t in heavy_tests]


(2, ['heavy-1', 'heavy-2'])

In [74]:
def queue_median_job(code: str, tests: list, trials=1, warmup=0, timeout=120, mem_mb=2048):
    payload = {
        "candidate_code": code,
        "function_name": "findMedianSortedArrays",
        "test_cases": tests,
        "timeout_seconds": timeout,        # per-test validation budget
        "memory_limit_mb": mem_mb,         # per-test memory budget
        "energy_measurement_trials": trials,
        "warmup_trials": warmup,
    }
    r = requests.post(MEASURE_URL, json=payload, timeout=60)
    r.raise_for_status()
    return r.json()

jobs_heavy = {
    "brute_heavy": queue_median_job(candidate_bruteforce, heavy_tests, trials=1, warmup=0, timeout=180, mem_mb=4096),
    "optimal_heavy": queue_median_job(candidate_optimal, heavy_tests, trials=1, warmup=0, timeout=180, mem_mb=4096),
}
{ k: v["task_id"] for k, v in jobs_heavy.items() }


{'brute_heavy': 'eb4f96b6-6873-40af-9c4e-924459cc24c4',
 'optimal_heavy': 'bf43823a-1b8b-43f4-9bb8-b984c36229a0'}

In [75]:
results_heavy = {}
start = time.time()
while set(results_heavy) != set(jobs_heavy):
    for name, q in jobs_heavy.items():
        if name in results_heavy:
            continue
        resp = get_task(q["task_id"])
        print(f"{name}: {resp.get('status')}")
        if is_done(resp):
            results_heavy[name] = resp
    if set(results_heavy) != set(jobs_heavy):
        time.sleep(2.0)

print("\nDone:")
pprint({k: v.get("status") for k, v in results_heavy.items()})


brute_heavy: completed
optimal_heavy: completed

Done:
{'brute_heavy': 'completed', 'optimal_heavy': 'completed'}


In [76]:
def energy_row(resp: dict):
    em = resp.get("energy_metrics") or {}
    return {
        "median_pkg_J": em.get("median_package_energy_joules"),
        "median_ram_J": em.get("median_ram_energy_joules"),
        "median_total_J": em.get("median_total_energy_joules"),
        "median_time_s": em.get("median_execution_time_seconds"),
        "power_W": em.get("power_consumption_watts"),
    }

rows_heavy = {}
for name, resp in results_heavy.items():
    if resp.get("status") == "completed":
        rows_heavy[name] = {"status": "completed", **energy_row(resp)}
    else:
        rows_heavy[name] = {"status": resp.get("status"),
                            "error_type": resp.get("error_type"),
                            "error_message": resp.get("error_message")}
pprint(rows_heavy)

def pick_winner(rows):
    a, b = rows.get("brute_heavy"), rows.get("optimal_heavy")
    if not a or not b or a.get("status") != "completed" or b.get("status") != "completed":
        return "inconclusive"
    Ea, Eb = a["median_total_J"], b["median_total_J"]
    Ta, Tb = a["median_time_s"], b["median_time_s"]
    if Ea is None or Eb is None: return "inconclusive"
    if abs(Ea - Eb) > 0: return "brute_heavy" if Ea < Eb else "optimal_heavy"
    if Ta is not None and Tb is not None: return "brute_heavy" if Ta < Tb else "optimal_heavy"
    return "inconclusive"

print("Winner (heavy):", pick_winner(rows_heavy))


{'brute_heavy': {'median_pkg_J': 2.327e-08,
                 'median_ram_J': 1.8200000000000001e-09,
                 'median_time_s': 0.00959544699981052,
                 'median_total_J': 2.509e-08,
                 'power_W': 2.6147817814527504e-06,
                 'status': 'completed'},
 'optimal_heavy': {'median_pkg_J': 2.225e-08,
                   'median_ram_J': 1.72e-09,
                   'median_time_s': 3.046400161110796e-05,
                   'median_total_J': 2.3970000000000002e-08,
                   'power_W': 0.000786830315530837,
                   'status': 'completed'}}
Winner (heavy): optimal_heavy


# Stress

In [85]:
import time
import random
import statistics as stats
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from pprint import pprint

BASE_URL = "http://127.0.0.1:8000"
MEASURE_URL = f"{BASE_URL}/api/v1/measure"
TASK_URL = f"{BASE_URL}/api/v1/tasks"

# Stress parameters
NUM_TASKS = 1000            # total tasks to queue
QUEUE_CONCURRENCY = 30     # parallel POSTs to API
POLL_CONCURRENCY = 30      # parallel polls
VALIDATION_TIMEOUT = 20    # per-test validation seconds
MEMORY_LIMIT_MB = 1024
TRIALS = 1                 # keep low for speed
WARMUP = 0


In [86]:
# A tiny CPU-bound workload with deterministic output
candidate_code = """\
def solve(n):
    s = 0
    for i in range(n):
        s = (s * 1664525 + 1013904223) & 0xFFFFFFFF
        s ^= i
    return int(s % 1000)
"""

def local_expected(n: int) -> int:
    s = 0
    for i in range(n):
        s = (s * 1664525 + 1013904223) & 0xFFFFFFFF
        s ^= i
    return int(s % 1000)


In [87]:
# Each task uses two tests with different n so there’s a little work
def make_task_tests(n1: int, n2: int):
    return [
        {"test_id": f"n1-{n1}", "inputs": [n1], "expected_output": local_expected(n1)},
        {"test_id": f"n2-{n2}", "inputs": [n2], "expected_output": local_expected(n2)},
    ]

# Generate random ns sized to finish well under VALIDATION_TIMEOUT
rng = random.Random(42)
pairs = [(rng.randint(50_000, 120_000), rng.randint(50_000, 120_000)) for _ in range(NUM_TASKS)]
work = [make_task_tests(a, b) for (a, b) in pairs]
len(work)


1000

In [88]:
def queue_one(tests):
    payload = {
        "candidate_code": candidate_code,
        "function_name": "solve",
        "test_cases": tests,
        "timeout_seconds": VALIDATION_TIMEOUT,
        "memory_limit_mb": MEMORY_LIMIT_MB,
        "energy_measurement_trials": TRIALS,
        "warmup_trials": WARMUP,
    }
    t0 = time.time()
    r = requests.post(MEASURE_URL, json=payload, timeout=60)
    r.raise_for_status()
    task_id = r.json()["task_id"]
    return {"task_id": task_id, "enqueued_at": t0}

def poll_one(task_id):
    while True:
        r = requests.get(f"{TASK_URL}/{task_id}", timeout=60)
        if r.status_code != 200:
            time.sleep(0.5); continue
        resp = r.json()
        status = resp.get("status")
        if status in {"completed", "failed"}:
            return {"task_id": task_id, "status": status, "resp": resp, "done_at": time.time()}
        time.sleep(0.5)


In [89]:
t_start = time.time()
queued = []

with ThreadPoolExecutor(max_workers=QUEUE_CONCURRENCY) as ex:
    futures = [ex.submit(queue_one, tests) for tests in work]
    for fut in as_completed(futures):
        try:
            queued.append(fut.result())
        except Exception as e:
            print("queue error:", e)

print(f"Queued {len(queued)}/{NUM_TASKS} tasks in {time.time()-t_start:.2f}s")


Queued 1000/1000 tasks in 4.03s


In [90]:
poll_start = time.time()
results = []

with ThreadPoolExecutor(max_workers=POLL_CONCURRENCY) as ex:
    futures = [ex.submit(poll_one, q["task_id"]) for q in queued]
    for fut in as_completed(futures):
        try:
            results.append(fut.result())
        except Exception as e:
            print("poll error:", e)

total_time = time.time() - t_start
print(f"Polled {len(results)}/{len(queued)} to completion in {time.time()-poll_start:.2f}s (total {total_time:.2f}s)")


Polled 1000/1000 to completion in 36.97s (total 46.55s)


In [91]:
# Join enqueued_at to compute end-to-end latency
enq_map = {q["task_id"]: q["enqueued_at"] for q in queued}
for r in results:
    r["latency_s"] = r["done_at"] - enq_map.get(r["task_id"], r["done_at"])

completed = [r for r in results if r["status"] == "completed"]
failed = [r for r in results if r["status"] == "failed"]

lat_all = [r["latency_s"] for r in results]
summary = {
    "total_tasks": len(queued),
    "completed": len(completed),
    "failed": len(failed),
    "success_rate_%": 100.0 * len(completed)/len(queued) if queued else 0.0,
    "lat_p50_s": (stats.median(lat_all) if lat_all else None),
    "lat_p95_s": (sorted(lat_all)[int(0.95*len(lat_all))-1] if lat_all else None),
    "throughput_tasks_per_s": (len(results)/ (max(total_time, 1e-6))),
}
pprint(summary)


{'completed': 1000,
 'failed': 0,
 'lat_p50_s': 20.387210607528687,
 'lat_p95_s': 38.97353911399841,
 'success_rate_%': 100.0,
 'throughput_tasks_per_s': 21.48347870944626,
 'total_tasks': 1000}


In [92]:
def take_energy(resp):
    em = (resp or {}).get("energy_metrics") or {}
    return (
        em.get("median_total_energy_joules"),
        em.get("median_execution_time_seconds"),
        em.get("power_consumption_watts"),
    )

Es, Ts, Ps = [], [], []
for r in completed:
    E, T, P = take_energy(r["resp"])
    if E is not None: Es.append(E)
    if T is not None: Ts.append(T)
    if P is not None: Ps.append(P)

energy_summary = {
    "E_total_median_J": (stats.median(Es) if Es else None),
    "T_median_s": (stats.median(Ts) if Ts else None),
    "P_median_W": (stats.median(Ps) if Ps else None),
    "samples": len(Es),
}
pprint(energy_summary)


{'E_total_median_J': 3.899999999999999e-09,
 'P_median_W': 1.4927716063548747e-07,
 'T_median_s': 0.02613814599953912,
 'samples': 1000}
