In [1]:
!pip -q install "stable-baselines3==2.2.1" gymnasium==0.29 pyarrow tqdm
import numpy as np, pandas as pd, pyarrow.dataset as ds, pathlib, collections, math
from stable_baselines3 import DQN
from cost_model import CostModel

  from pandas.core import (


In [2]:
DATA   = pathlib.Path("../data")
blocks = ds.dataset(DATA/"blocks.parquet").to_table().to_pandas()
wl_ds  = ds.dataset(DATA/"workloads_daily.parquet")
cm     = CostModel("../provider_configs/qpu_demo.yml")

EXEC      = cm.exec_fee
TRANSFER  = cm.transfer_fee
LEASE     = {t: cm.lease_fee[t]*24 for t in cm.lease_fee}   # daily $

In [3]:
jobs_tbl = (wl_ds
            .to_table(columns=["qpu_units","day","n_workloads"])
            .to_pandas()
            .pivot(index="qpu_units", columns="day", values="n_workloads")
            .fillna(0)
            .astype(np.int32))

lease_day = blocks.set_index("qpu_units").lease_day.to_dict()

# lifetime-avg jobs → one-shot cheapest tag
tot_jobs   = jobs_tbl.sum(axis=1).values
active_days= 180 - np.array([lease_day[s] for s in jobs_tbl.index])
avg_jobs   = tot_jobs / active_days
bins   = np.array([0, 176, 900, np.inf])
labels = np.array(["Spin","Photon","Atom"])
tag_idx_B = labels[np.digitize(avg_jobs, bins)-1]            # array size N
tag_map_B = dict(zip(jobs_tbl.index, tag_idx_B))

In [4]:
MODEL_PATH = pathlib.Path("models/qpu_dqn.zip")
model = DQN.load(MODEL_PATH, env=None)
print("✔ loaded agent", MODEL_PATH)

  th_object = th.load(file_content, map_location=device)


✔ loaded agent models/qpu_dqn.zip


In [5]:
def fast_simulate(policy):
    sizes  = jobs_tbl.index.values
    jobs   = jobs_tbl.values                 # shape (N, 180)
    lease  = np.array([lease_day[s] for s in sizes])

    tag_arr = np.vectorize({"Atom":0,"Photon":1,"Spin":2}.get)(tag_idx_B.copy())
    total_B = total_DQN = 0.0
    rolling_sum = np.zeros_like(jobs[:,0], dtype=float)

    for d in range(180):
        active = lease <= d
        vc = np.bincount(tag_arr[active], minlength=3)
        total_B   += LEASE["Atom"]*vc[0] + LEASE["Photon"]*vc[1] + LEASE["Spin"]*vc[2]
        total_DQN += total_B - total_B   # placeholder; will add below

        j_today = jobs[:, d]
        idx     = np.where((j_today>0) & active)[0]
        if idx.size:
            rolling_sum[idx] += j_today[idx]
            over = (d - lease[idx] >= 7)
            if over.any():
                rolling_sum[idx[over]] -= jobs[idx[over], d-7]
            avg7 = rolling_sum[idx] / np.minimum(7, d - lease[idx] + 1)

            obs = np.stack([np.full(idx.size, 179-d),
                            avg7/1e6,
                            tag_arr[idx],
                            j_today[idx]/1e6], axis=1).astype(np.float32)

            new_tags = policy.predict(obs, deterministic=True)[0]
            changed  = new_tags != tag_arr[idx]
            if changed.any():
                changed_to = new_tags[changed]
                total_DQN += np.sum([TRANSFER[{0:"Atom",1:"Photon",2:"Spin"}[t]] for t in changed_to])
                tag_arr[idx[changed]] = changed_to

            typ_arr = np.array(["Atom","Photon","Spin"])[tag_arr[idx]]
            total_B   += np.sum([EXEC[t]*j + cm.trigger_fee*j for t,j in zip(typ_arr, j_today[idx])])
            total_DQN += np.sum([EXEC[t]*j + cm.trigger_fee*j for t,j in zip(typ_arr, j_today[idx])])
    return total_B, total_DQN

In [6]:
cost_B, cost_DQN = fast_simulate(model)
print(f"Baseline B  : ${cost_B:,.2f}")
print(f"DQN agent   : ${cost_DQN:,.2f}")
print(f"Savings vs B: {(cost_B-cost_DQN)/cost_B*100:.1f} %")

Baseline B  : $1,880,178,685.56
DQN agent   : $1,032,365,809.11
Savings vs B: 45.1 %


In [7]:
import json, pathlib
RESULTS = pathlib.Path("../results")
RESULTS.mkdir(exist_ok=True, parents=True)
json.dump({"rl_total": cost_DQN}, open(RESULTS/"rl_total.json", "w"))
print("✔ saved RL total →", RESULTS/"rl_total.json")

✔ saved RL total → ../results/rl_total.json
