In [40]:
!pip -q install pyarrow tqdm
import pyarrow.dataset as ds, pandas as pd, numpy as np, pathlib, math
from tqdm.auto import tqdm
from cost_model import CostModel
DATA   = pathlib.Path("../data")
RESULT = pathlib.Path("../results"); RESULT.mkdir(exist_ok=True)

cm = CostModel("../provider_configs/qpu_demo.yml")

In [41]:
blocks = ds.dataset(DATA/"blocks.parquet").to_table().to_pandas()

# 1-2-3 round-robin → equal Atom/Photon/Spin share
type_cycle = (["Atom", "Photon", "Spin"]
              * math.ceil(len(blocks)/3))[:len(blocks)]
blocks["type_initial"] = type_cycle

size2type_base = dict(zip(blocks.qpu_units, blocks.type_initial))
print("Blocks:", len(blocks),
      "| baseline distribution:",
      pd.Series(size2type_base).value_counts().to_dict())

Blocks: 981230 | baseline distribution: {'Atom': 327077, 'Photon': 327077, 'Spin': 327076}


In [42]:
# total jobs per size
tot_jobs = (ds.dataset(DATA/"workloads_daily.parquet")
              .to_table(columns=["qpu_units","n_workloads"])
              .to_pandas()
              .groupby("qpu_units")["n_workloads"]
              .sum())

# active days each block exists (180 – lease_day)
active_days = 180 - blocks.set_index("qpu_units")["lease_day"]
avg_jobs    = (tot_jobs / active_days).to_dict()

def cheapest(avg):
    if   avg >= 900:  return "Atom"      # break-even thresholds
    elif avg >= 176:  return "Photon"
    else:             return "Spin"

size2type_opt = {sz: cheapest(avg_jobs.get(sz,0)) for sz in blocks.qpu_units}

In [48]:
# ------------------------------------------------------------
# Cell 3-bis  –  Baseline B one-shot cheapest tag
# ------------------------------------------------------------
size2type_baseB = {sz: cheapest(avg_jobs.get(sz, 0))
                   for sz in blocks.qpu_units}

In [43]:
wl_ds = ds.dataset(DATA/"workloads_daily.parquet")

def workloads_by_day():
    for d in range(180):
        tbl = wl_ds.filter(ds.field("day") == d).to_table()
        if tbl.num_rows:
            yield d, tbl.to_pandas()

def exec_trigger_cost(day_df, type_map):
    cost = 0.0
    for r in day_df.itertuples(index=False):
        typ  = type_map[r.qpu_units]
        jobs = r.n_workloads
        cost += cm.exec(typ, jobs) + cm.trigger(jobs)
    return cost

In [49]:
def exec_trigger_cost_B(day_df):
    cost = 0.0
    for r in day_df.itertuples(index=False):
        typ  = size2type_baseB[r.qpu_units]
        cost += cm.exec(typ, r.n_workloads) + cm.trigger(r.n_workloads)
    return cost

In [52]:
from collections import defaultdict, deque

# 7-day rolling job totals for each size
ROLL_DAYS = 7
rolling_sum  : dict[int, int]      = defaultdict(int)        # size → jobs in last ≤7 days
rolling_q    : dict[int, deque]    = defaultdict(deque)      # size → deque([count_d-6 … d])

# mutable type map that can change over time
size2type_dyn = size2type_baseB.copy()    # start from baseline-B tags

def tag_for(avg7: float) -> str:
    """Return cheapest tag given 7-day avg jobs (using same break-even)."""
    if   avg7 >= 900:  return "Atom"
    elif avg7 >= 176:  return "Photon"
    else:              return "Spin"

In [53]:
records = []

for day, df in tqdm(workloads_by_day(), total=180):
    # --- acquisition on this day ---
    acq_today = cm.acquisition((blocks.lease_day == day).sum())

    # --- update rolling window & maybe retag each size seen today ---
    trans_cost_today = 0.0
    for r in df.itertuples(index=False):
        sz, jobs = r.qpu_units, r.n_workloads

        # update rolling structures
        rolling_q[sz].append(jobs)
        rolling_sum[sz] += jobs
        if len(rolling_q[sz]) > ROLL_DAYS:
            rolling_sum[sz] -= rolling_q[sz].popleft()

        avg7 = rolling_sum[sz] / len(rolling_q[sz])          # 1–7 day avg
        new_tag = tag_for(avg7)
        cur_tag = size2type_dyn[sz]

        if new_tag != cur_tag:                               # retag & pay fee
            trans_cost_today += cm.transfer(new_tag, 1)
            size2type_dyn[sz] = new_tag

    # --- active block mask (≤ today) ---
    active_sizes = blocks.qpu_units[blocks.lease_day <= day]

    # ----- lease fees for each strategy -----
    nA  = pd.Series(size2type_base ).reindex(active_sizes).value_counts()
    nB  = pd.Series(size2type_baseB).reindex(active_sizes).value_counts()
    nG  = pd.Series(size2type_dyn ).reindex(active_sizes).value_counts()

    leaseA = sum(cm.lease(t, n_blocks=nA.get(t,0)) for t in nA.index)
    leaseB = sum(cm.lease(t, n_blocks=nB.get(t,0)) for t in nB.index)
    leaseG = sum(cm.lease(t, n_blocks=nG.get(t,0)) for t in nG.index)

    # ----- exec + trigger -----
    execA = exec_trigger_cost(df, size2type_base )
    execB = exec_trigger_cost_B(df)                 # same helper from earlier
    execG = exec_trigger_cost(df, size2type_dyn  )

    records.append({
        "day":day,
        "baselineA_cost": acq_today + leaseA + execA,
        "baselineB_cost": acq_today + leaseB + execB,
        "greedy_cost"   : acq_today + leaseG + execG + trans_cost_today  # v3
    })

metrics = pd.DataFrame(records)
metrics.to_parquet(RESULT/"daily_metrics.parquet", index=False, compression="snappy")

  0%|          | 0/180 [00:00<?, ?it/s]

In [54]:
# ------------------------------------------------------------
# Cell X – Headline metrics & savings
# ------------------------------------------------------------
import pandas as pd, pathlib

METRICS_PATH = pathlib.Path("../results/daily_metrics.parquet")
metrics = pd.read_parquet(METRICS_PATH)

tot_A = metrics.baselineA_cost.sum()
tot_B = metrics.baselineB_cost.sum()
tot_G = metrics.greedy_cost.sum()

print(f"Baseline A (equal thirds) : ${tot_A:,.2f}")
print(f"Baseline B (one-shot tag) : ${tot_B:,.2f}")
print(f"Greedy v3 (7-day retag)   : ${tot_G:,.2f}")

print(f"\nSavings vs Baseline B     : {(tot_B - tot_G) / tot_B * 100:,.1f} %")

Baseline A (equal thirds) : $3,937,122,560.85
Baseline B (one-shot tag) : $1,878,361,787.71
Greedy v3 (7-day retag)   : $1,815,573,126.70

Savings vs Baseline B     : 3.3 %


In [47]:
import pandas as pd, pyarrow.dataset as ds
ds_blocks = ds.dataset("../data/blocks.parquet")
ds_wl     = ds.dataset("../data/workloads_daily.parquet")

# jobs by size
jobs = (ds_wl
        .to_table(columns=["qpu_units","n_workloads"])
        .to_pandas()
        .groupby("qpu_units")["n_workloads"]
        .sum())

# merge with baseline tag
tags = pd.Series(size2type_base, name="baseline_tag")
hot  = (pd.concat([jobs, tags], axis=1)
          .nlargest(10, "n_workloads"))
display(hot.assign(exec_fee=lambda d:
        d.baseline_tag.map({"Atom":0.01,"Photon":0.05,"Spin":0.20})))

Unnamed: 0,n_workloads,baseline_tag,exec_fee
33823285,39590.0,Photon,0.05
77946800,39566.0,Spin,0.2
96319763,39543.0,Atom,0.01
63482173,39523.0,Spin,0.2
17119592,39507.0,Atom,0.01
85200970,39490.0,Spin,0.2
9892445,39480.0,Photon,0.05
98464681,39438.0,Atom,0.01
9697788,39433.0,Photon,0.05
19160176,39424.0,Spin,0.2
