In [1]:
# Cell 1 – imports
import pyarrow.dataset as ds, pandas as pd, numpy as np, math, pathlib
from collections import defaultdict, deque
from tqdm.auto import tqdm
from cost_model import CostModel

DATA   = pathlib.Path("../data")
RESULT = pathlib.Path("../results"); RESULT.mkdir(exist_ok=True)
cm     = CostModel("../provider_configs/qpu_demo.yml")

  from pandas.core import (


In [2]:
# Cell 2 – load blocks and set Baseline A tags (equal thirds)
blocks = ds.dataset(DATA/"blocks.parquet").to_table().to_pandas()
blocks["tag_A"] = (["Atom","Photon","Spin"] *
                   math.ceil(len(blocks)/3))[:len(blocks)]

size2tag_A = dict(zip(blocks.qpu_units, blocks.tag_A))

In [3]:
# Cell 3 – pre-compute lifetime-avg jobs & Baseline B tags
tot_jobs = (ds.dataset(DATA/"workloads_daily.parquet")
              .to_table(columns=["qpu_units","n_workloads"])
              .to_pandas()
              .groupby("qpu_units")["n_workloads"]
              .sum())
active_days = 180 - blocks.set_index("qpu_units").lease_day
avg_jobs_life = (tot_jobs / active_days).fillna(0)

def cheapest(j):             # break-even thresholds
    return "Atom"   if j>=900 else ("Photon" if j>=176 else "Spin")

blocks["tag_B"] = [cheapest(avg_jobs_life[sz])
                   for sz in blocks.qpu_units]
size2tag_B = dict(zip(blocks.qpu_units, blocks.tag_B))

In [4]:
# Cell 4 – helpers for strategies S (static per-day) & D (rolling)
ROLL_DAYS = 7
decay     = 0.8              # EWMA factor for strategy S
ewma      = defaultdict(float)
rolling_q = defaultdict(deque)
rolling_s = defaultdict(int)

size2tag_S = size2tag_B.copy()   # start from lifetime optimum
size2tag_D = size2tag_B.copy()

In [5]:
# Cell 5 – workload iterator + cost helper
wl_ds = ds.dataset(DATA/"workloads_daily.parquet")
def workloads_by_day():
    for d in range(180):
        tbl = wl_ds.filter(ds.field("day")==d).to_table()
        if tbl.num_rows:
            yield d, tbl.to_pandas()

def exec_trigger(df, mapping):
    return sum(cm.exec(mapping[r.qpu_units], r.n_workloads) +
               cm.trigger(r.n_workloads) for r in df.itertuples(index=False))

In [6]:
# Cell 6 – daily loop
records=[]
for day, df in tqdm(workloads_by_day(), total=180):
    acq_today = cm.acquisition((blocks.lease_day==day).sum())
    active    = blocks.qpu_units[blocks.lease_day<=day]

    # ---------- Strategy S & D retagging ----------
    trans_S = trans_D = 0.0
    for r in df.itertuples(index=False):
        sz, jobs = r.qpu_units, r.n_workloads

        # ----- EWMA for Static S -----
        ewma[sz] = decay*ewma[sz] + (1-decay)*jobs
        new_S = cheapest(ewma[sz])
        if new_S != size2tag_S[sz]:
            trans_S += cm.transfer(new_S, 1)
            size2tag_S[sz] = new_S

        # ----- rolling window for Dynamic D -----
        rolling_q[sz].append(jobs)
        rolling_s[sz] += jobs
        if len(rolling_q[sz])>ROLL_DAYS:
            rolling_s[sz] -= rolling_q[sz].popleft()
        avg7 = rolling_s[sz]/len(rolling_q[sz])
        new_D = cheapest(avg7)
        if new_D != size2tag_D[sz]:
            trans_D += cm.transfer(new_D, 1)
            size2tag_D[sz] = new_D

    # ---------- lease fees ----------
    def lease_sum(map_):
        vc = pd.Series(map_).reindex(active).value_counts()
        return sum(cm.lease(t, n_blocks=vc.get(t,0)) for t in vc.index)
    lease_A = lease_sum(size2tag_A)
    lease_B = lease_sum(size2tag_B)
    lease_S = lease_sum(size2tag_S)
    lease_D = lease_sum(size2tag_D)

    # ---------- exec + trigger ----------
    cA = exec_trigger(df, size2tag_A)
    cB = exec_trigger(df, size2tag_B)
    cS = exec_trigger(df, size2tag_S)
    cD = exec_trigger(df, size2tag_D)

    records.append({
        "day":day,
        "cost_A": acq_today+lease_A+cA,
        "cost_B": acq_today+lease_B+cB,
        "cost_S": acq_today+lease_S+cS+trans_S,
        "cost_D": acq_today+lease_D+cD+trans_D
    })

metrics = pd.DataFrame(records)
metrics.to_parquet(RESULT/"daily_metrics.parquet", compression="snappy")

  0%|          | 0/180 [00:00<?, ?it/s]

In [10]:
# ------- Cost-comparison summary for all 4 strategies ------------
!pip -q install pyarrow tqdm

import pandas as pd, numpy as np, pathlib, itertools, importlib.util, sys, subprocess, json, math, collections, datetime, re, os, types
from IPython.display import display

# 1) load the 6-month daily metrics you wrote in Notebook 03
metrics = pd.read_parquet("../results/daily_metrics.parquet")

# 2) total cost for each strategy
totals = {
    "A Equal Thirds" : metrics["cost_A"].sum(),
    "B One-Shot"     : metrics["cost_B"].sum(),
    "S EWMA Daily"   : metrics["cost_S"].sum(),
    "D Rolling 7-Day": metrics["cost_D"].sum(),
}

tot_df = (pd.Series(totals, name="Total $")
            .sort_values()
            .to_frame())
tot_df["Rank"] = range(1, len(tot_df)+1)

# 3) pair-wise % gain/loss
strategies = tot_df.index.tolist()
comp = pd.DataFrame(index=strategies, columns=strategies, dtype=float)

for i, j in itertools.product(strategies, strategies):
    if i == j:
        comp.loc[i, j] = 0.0
    else:
        comp.loc[i, j] = (totals[j] - totals[i]) / totals[i] * 100

comp = comp.round(2)

# 4) display
print("🧾 6-Month Cost per Strategy (lower $ is better)")
display(tot_df.style.format({"Total $": "${:,.2f}"}))

print("↕️  % Gain (+) or Loss (-) relative to each other")
display(comp.style.format("{:+.2f}%"))

best = tot_df.index[0]
print(f"\n🏆  Cheapest strategy overall: **{best}** → "
      f"{tot_df.loc[best,'Total $']:,.2f} USD")

🧾 6-Month Cost per Strategy (lower $ is better)


Unnamed: 0,Total $,Rank
D Rolling 7-Day,"$1,815,573,126.70",1
S EWMA Daily,"$1,822,512,061.93",2
B One-Shot,"$1,878,361,787.71",3
A Equal Thirds,"$3,937,122,560.85",4


↕️  % Gain (+) or Loss (-) relative to each other


Unnamed: 0,D Rolling 7-Day,S EWMA Daily,B One-Shot,A Equal Thirds
D Rolling 7-Day,+0.00%,+0.38%,+3.46%,+116.85%
S EWMA Daily,-0.38%,+0.00%,+3.06%,+116.03%
B One-Shot,-3.34%,-2.97%,+0.00%,+109.60%
A Equal Thirds,-53.89%,-53.71%,-52.29%,+0.00%



🏆  Cheapest strategy overall: **D Rolling 7-Day** → 1,815,573,126.70 USD
