In [None]:
# ───────────────────── manual_inference.py ─────────────────────
import os
import torch
import numpy as np
from easydict import EasyDict as edict

# 1)  Your DI-engine policy
from ding.policy.pdqn import PDQNPolicy          # or pdqn_command if that’s your install
# 2)  Your custom environment
from exch_gym_env import ExchangeCNOTEnvDI

# ─── configuration ---------------------------------------------------
CKPT = "pdqn_exchange_cnot_250601_023553/ckpt/iteration_174.pth.tar"   # adjust path if needed
if not os.path.isfile(CKPT):
    raise FileNotFoundError(f"Checkpoint not found: {CKPT}")

cfg = edict(
    type="pdqn",
    cuda=torch.cuda.is_available(),
    on_policy=False,

    model=dict(
        obs_shape=163,
        action_shape=edict(
            action_type_shape=5,
            action_args_shape=1,
            encoder_hidden_size_list=[256, 256, 256],
        ),
    ),

    collect=dict(n_sample=0),                     # no data collection
    other=dict(replay_buffer=dict(replay_buffer_size=1)),  # dummy stub
    model_load_mode="ckpt",
    load_path=CKPT,
)

# ─── build policy and load weights -----------------------------------
policy = PDQNPolicy(cfg, enable_field=["eval"])
ckpt   = torch.load(CKPT, map_location="cpu")

# main network
policy._model.load_state_dict(ckpt["model"], strict=False)
# target network (if the algorithm created one)
if hasattr(policy, "_target_model"):
    policy._target_model.load_state_dict(ckpt["model"], strict=False)

# evaluation proxy (always inference, no ε-greedy, no noise)
eval_pol = policy.eval_mode        # ←  IMPORTANT: no parentheses

# ─── evaluation run --------------------------------------------------
env     = ExchangeCNOTEnvDI(use_act_scale=True)     # same flag as training
obs     = env.reset()                               # numpy-array shape (163,)
seq     = []                                        # list of (pair_idx, p) tuples
total   = 0.0
done    = False
device  = next(policy._model.parameters()).device   # cpu or cuda

while not done and len(seq) < env.max_depth:
    obs_t = torch.as_tensor(obs, dtype=torch.float32, device=device)
    # PDQN expects a dict mapping env-id → obs tensor
    act_dict = eval_pol.forward({0: obs_t})
    act      = act_dict[0]["action"]
    pair_idx = int(act["action_type"])
    p_value  = float(act["action_args"])

    seq.append((pair_idx, p_value))
    obs, reward, done, info = env.step((pair_idx, p_value))
    total += reward

# ─── print results ----------------------------------------------------
print("\n=== BEST-PATH SEQUENCE  (" + os.path.basename(CKPT) + ") ===\n")
for t, (idx, p) in enumerate(seq, 1):
    print(f"Step {t:2d}: pair = {idx},  p = {p:+.4f}")

print("\nFinal metrics:")
print(f" 64×64 fidelity    : {info.get('fid64', np.nan):.6f}")
print(f"  9×9 block fidelity: {info.get('fid9',  np.nan):.6f}")
print(f" Total return       : {total:.6f}")
# ────────────────────────────────────────────────────────────────────


=== BEST-PATH SEQUENCE  (iteration_174.pth.tar) ===

Step  1: pair = 2,  p = +0.9791
Step  2: pair = 2,  p = +0.9542
Step  3: pair = 2,  p = +0.9816
Step  4: pair = 2,  p = +0.9590
Step  5: pair = 2,  p = +0.9684
Step  6: pair = 2,  p = +0.9791
Step  7: pair = 2,  p = +0.9843
Step  8: pair = 2,  p = +0.9920
Step  9: pair = 2,  p = +0.9974
Step 10: pair = 2,  p = +0.9967
Step 11: pair = 2,  p = +0.9988
Step 12: pair = 2,  p = +0.9982
Step 13: pair = 2,  p = +0.9981
Step 14: pair = 2,  p = +0.9990
Step 15: pair = 2,  p = +0.9991
Step 16: pair = 2,  p = +0.9996
Step 17: pair = 2,  p = +0.9999
Step 18: pair = 2,  p = +0.9999

Final metrics:
 64×64 fidelity    : 0.071728
  9×9 block fidelity: 0.157529
 Total return       : 61.971058


In [45]:
import pickle

file_path = "pdqn_exchange_cnot_250601_023553/result.pkl"          # adjust path if it sits elsewhere

with open(file_path, "rb") as f:
    obj = pickle.load(f)           

In [48]:
print(type(obj))
for k, v in obj.items():
    print(f"{k}: {type(v)}  →  sample: {str(v)[:800]} …")


<class 'dict'>
stop: <class 'numpy.bool_'>  →  sample: False …
env_step: <class 'int'>  →  sample: 25056 …
train_iter: <class 'int'>  →  sample: 174 …
eval_value: <class 'numpy.float64'>  →  sample: -6.028942584991455 …
eval_value_raw: <class 'list'>  →  sample: [-6.028942584991455, -6.028942584991455, -6.028942584991455, -6.028942584991455] …
finish_time: <class 'str'>  →  sample: Sun Jun  1 02:56:14 2025 …


In [49]:
import json, pandas as pd

# dict → JSON
with open("results.json", "w") as f:
    json.dump(obj, f, indent=2, default=str)   # default=str handles non-JSON types

# DataFrame → CSV
if isinstance(obj, pd.DataFrame):
    obj.to_csv("results.csv", index=False)
