To launch in SuperCloud from a Computed Node

LLsub -i full #for an exclusive node

LLsub -i -s 40 #for node with 40 CPUs

LLsub -i -s 40 -g volta:1 #for node with 40 CPUs and 1 Volta GPU

salloc  --job-name=interactive --qos=high --time=02:00:00 --partition=debug-gpu --gres=gpu:volta:1 --cpus-per-task=40 srun    --pty bash -i

salloc  --job-name=interactive --qos=high --time=02:00:00 --partition=debug-cpu --cpus-per-task=40 srun  --pty bash -i

LLsub -i full

module load anaconda/2023a-pytorch

jupyter lab --no-browser --ip=0.0.0.0 --port=8890

In [1]:
# --- CONFIG: EDIT THESE ---
MODEL_PATH   = "/home/gridsan/spalacios/Designing complex biological circuits with deep neural networks/manuscript/trained_agents/GAT_MLP_with_scalars_200_logic_functions/trained_model.zip"
SELECTED_GRAPHS_FILE = "/home/gridsan/spalacios/Designing complex biological circuits with deep neural networks/manuscript/trained_agents/GAT_MLP_with_scalars_200_logic_functions/selected_graphs.txt"
OUTPUT_CSV   = "per_graph_eval.csv"

N_EVAL_EPISODES = 100     # use 10–20 for faster runs
DETERMINISTIC = False
MAX_STEPS = 10
MAX_NODES = 100
DEVICE = "cuda"  # or "cpu"
# ----------------

import os, pickle, re, warnings
from pathlib import Path
import numpy as np
import pandas as pd
import networkx as nx
import torch as th

from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env
from sb3_contrib import MaskablePPO
from sb3_contrib.common.maskable.evaluation import evaluate_policy

from dgd.environments.drl3env_loader6 import DRL3env, _compute_truth_key
from dgd.utils.utils5 import energy_score, check_implicit_OR_existence_v3

# --- Read the 4000 graph paths ---
with open(SELECTED_GRAPHS_FILE, "r") as f:
    lines = [l.strip() for l in f if l.strip().startswith("Selected")]
graph_paths = [l.split("Selected")[-1].strip() for l in lines]
print(f"Loaded {len(graph_paths)} graph paths from {SELECTED_GRAPHS_FILE}")

# --- Helpers ---
def load_graph_pickle(filename: str) -> nx.DiGraph:
    with open(filename, "rb") as f:
        num_nodes, edges, node_attrs = pickle.load(f)
    G = nx.DiGraph()
    for node, attr in node_attrs.items():
        G.add_node(node, type=attr) if attr is not None else G.add_node(node)
    G.add_edges_from(edges)
    return G

def make_single_env(G: nx.DiGraph):
    existing_keys = {_compute_truth_key(G)}
    def _factory():
        return DRL3env(
            max_nodes=MAX_NODES,
            graphs=[G],
            enable_full_graph_replacement=True,
            show_plots=False,
            log_info=False,
            strict_iso_check=False,
            max_steps=MAX_STEPS,
            registry_read_only=True,
            existing_keys=existing_keys,
        )
    return make_vec_env(_factory, n_envs=1, vec_env_cls=DummyVecEnv, seed=0)

# --- Load trained model ---
use_cuda = (DEVICE == "cuda") and th.cuda.is_available()
device = "cuda" if use_cuda else "cpu"
model = MaskablePPO.load(MODEL_PATH, device=device)
print(f"Model loaded on {device}")

# --- Evaluate per graph ---
rows = []
for i, fp in enumerate(graph_paths, 1):
    print(i)
    try:
        G = load_graph_pickle(fp)
        env = make_single_env(G)

        base_energy = energy_score(G, check_implicit_OR_existence_v3)[0]
        mean_r, std_r = evaluate_policy(model, env, n_eval_episodes=N_EVAL_EPISODES, deterministic=DETERMINISTIC)

        rows.append({
            "index": i,
            "path": fp,
            "hex_id": re.search(r"(0x[0-9A-Fa-f]+)_NIG", os.path.basename(fp)).group(1),
            "baseline_energy": base_energy,
            "mean_reward": mean_r,
            "std_reward": std_r,
        })

        if i % 50 == 0 or i == len(graph_paths):
            print(f"[{i}/{len(graph_paths)}] {os.path.basename(fp)} → mean_reward={mean_r:.3f}")

        env.close()

    except Exception as e:
        warnings.warn(f"Skipping {fp}: {e}")
        rows.append({"index": i, "path": fp, "hex_id": None, "baseline_energy": np.nan, "mean_reward": np.nan, "std_reward": np.nan})

# ------
df = pd.DataFrame(rows)
df.to_csv(OUTPUT_CSV, index=False)
print(f"Saved results to {OUTPUT_CSV}")
display(df.head())

Done loading action motifs. There are 15928 unique motifs.
Loaded 200 graph paths from /home/gridsan/spalacios/Designing complex biological circuits with deep neural networks/manuscript/trained_agents/GAT_MLP_with_scalars_200_logic_functions/selected_graphs.txt
Model loaded on cuda
1


  logger.warn(


2
3


KeyboardInterrupt: 

Load shared registry and evaluate there

In [None]:
# --- CONFIG: EDIT THESE ---
MODEL_PATH    = "/home/gridsan/spalacios/Designing complex biological circuits with deep neural networks/manuscript/trained_agents/GAT_MLP_with_scalars_4000_logic_functions/trained_model.zip"
REGISTRY_PATH = "/home/gridsan/spalacios/Designing complex biological circuits with deep neural networks/manuscript/trained_agents/GAT_MLP_with_scalars_4000_logic_functions/final_shared_registry.pkl"
OUTPUT_CSV    = "eval_from_registry_sampling.csv"

N_EVAL_EPISODES = 100
DETERMINISTIC   = False
MAX_STEPS       = 10
MAX_NODES       = 100
N_WORKERS       = 40          # set ~ to your CPU cores
DEVICE          = "cuda"      # or "cpu"

# --- Subsampling config ---
SUBSAMPLE_MODE       = "count"   # "fraction", "count", or "per_bucket"
SUBSAMPLE_FRACTION   = 0.01         # used if MODE == "fraction"
SUBSAMPLE_COUNT      = 10000         # used if MODE == "count"
SUBSAMPLE_PER_BUCKET = 3            # used if MODE == "per_bucket"
SUBSAMPLE_SEED       = 42           # reproducibility
# --------------------------

import pickle, warnings, os, multiprocessing as mp
from pathlib import Path
import numpy as np
import pandas as pd
import torch as th
import networkx as nx
from concurrent.futures import ProcessPoolExecutor, as_completed

from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env
from sb3_contrib import MaskablePPO
from sb3_contrib.common.maskable.evaluation import evaluate_policy

from dgd.environments.drl3env_loader6 import DRL3env, _compute_truth_key

# NEW: tqdm for progress bars (works in notebooks & terminals)
try:
    from tqdm.auto import tqdm
except ImportError:
    def tqdm(x, **kwargs):
        return x

def _rebuild_orig_graph(bucket_item):
    """Worker: take (canon_nl, orig_nl, e) → (orig_graph, e). Build ONLY orig."""
    canon_nl, orig_nl, e = bucket_item
    orig_g = nx.node_link_graph(orig_nl)
    return (orig_g, float(e))

print("Loading model...")
use_cuda = (DEVICE == "cuda") and th.cuda.is_available()
device = "cuda" if use_cuda else "cpu"
model = MaskablePPO.load(MODEL_PATH, device=device)
print(f"Model on {device}")

print(f"Loading registry: {REGISTRY_PATH}")
with open(REGISTRY_PATH, "rb") as f:
    raw_reg = pickle.load(f)  # dict: key -> list[(canon_nl, orig_nl, e), ...]

total_entries = sum(len(b) for b in raw_reg.values())
print(f"Registry buckets: {len(raw_reg)} | total entries: {total_entries}")

# -----------------
# Subsample registry
# -----------------
rng = np.random.default_rng(SUBSAMPLE_SEED)

if SUBSAMPLE_MODE == "per_bucket":
    sampled_items = []
    for bucket in raw_reg.values():
        if not bucket:
            continue
        k = min(len(bucket), SUBSAMPLE_PER_BUCKET)
        idxs = rng.choice(len(bucket), size=k, replace=False)
        sampled_items.extend(bucket[i] for i in idxs)
else:
    all_items = [item for bucket in raw_reg.values() for item in bucket]
    if SUBSAMPLE_MODE == "fraction":
        k = max(1, int(len(all_items) * SUBSAMPLE_FRACTION))
    elif SUBSAMPLE_MODE == "count":
        k = min(SUBSAMPLE_COUNT, len(all_items))
    else:
        raise ValueError(f"Unknown SUBSAMPLE_MODE: {SUBSAMPLE_MODE}")
    if k < len(all_items):
        idxs = rng.choice(len(all_items), size=k, replace=False)
        sampled_items = [all_items[i] for i in idxs]
    else:
        sampled_items = all_items

if len(sampled_items) == 0:
    raise RuntimeError("Subsampling resulted in 0 items. Adjust SUBSAMPLE_* settings.")

print(f"Subsampled entries: {len(sampled_items)} (mode='{SUBSAMPLE_MODE}')")

# ----------------------------------------------
# Parallel rebuild of ONLY original graphs (fast)
# ----------------------------------------------
rebuilt = []
best_energy = np.inf
effective_workers = min(N_WORKERS, max(1, len(sampled_items)))
with ProcessPoolExecutor(max_workers=effective_workers) as ex:
    futures = [ex.submit(_rebuild_orig_graph, it) for it in sampled_items]
    for fut in tqdm(as_completed(futures), total=len(futures), desc="Rebuilding graphs", smoothing=0.1):
        try:
            orig_g, e = fut.result()
            rebuilt.append((orig_g, e))
            if e < best_energy:
                best_energy = e
        except Exception as exc:
            warnings.warn(f"Skipping one registry item due to error: {exc}")

if len(rebuilt) == 0:
    raise RuntimeError("No graphs were rebuilt successfully from the subsample.")

print(f"Rebuilt {len(rebuilt)} original graphs. Best energy in subsample: {best_energy:.4f}")

# ---------------------------------------------------------
# Build lightweight read-only "registry" using (orig as canon)
# ---------------------------------------------------------
light_registry = {}
existing_keys = set()

for orig_g, e in tqdm(rebuilt, desc="Building keys/light registry", smoothing=0.1):
    try:
        key = _compute_truth_key(orig_g)
        bucket = light_registry.setdefault(key, [])
        bucket.append((orig_g, orig_g, e))  # (canon, orig, e) with canon=orig
        existing_keys.add(key)
    except Exception:
        # skip unusual cases
        pass

if len(light_registry) == 0:
    raise RuntimeError("Light registry is empty after rebuild. Nothing to sample from.")

print(f"Light registry buckets: {len(light_registry)} (read-only)")

# ---------------------------------------------------------
# REQUIRED BY DRL3env WHEN USING shared_registry:
#   - registry_lock
#   - best_energy_across_workers (shared double)
# ---------------------------------------------------------
REGISTRY_LOCK = mp.Lock()
GLOBAL_BEST = mp.Value('d', best_energy if np.isfinite(best_energy) else 1e12)

def make_registry_sampling_env():
    # graphs=[]: rely on shared read-only registry for initial state sampling
    return DRL3env(
        max_nodes=MAX_NODES,
        graphs=[],
        enable_full_graph_replacement=True,
        show_plots=False,
        log_info=False,
        max_steps=MAX_STEPS,
        # read-only registry sampling
        shared_registry=light_registry,
        registry_lock=REGISTRY_LOCK,
        best_energy_across_workers=GLOBAL_BEST,
        store_every_new_graph=False,
        sampling_from_shared_registry=True,
        initial_state_sampling_factor=0,
        strict_iso_check=False,
        registry_read_only=True,
        existing_keys=existing_keys,
    )

# Single-env evaluation with sampling from the (subsampled) shared registry
env = make_vec_env(make_registry_sampling_env, n_envs=1, vec_env_cls=DummyVecEnv, seed=0)

ep_rewards, ep_lengths = evaluate_policy(
    model, env, n_eval_episodes=N_EVAL_EPISODES,
    deterministic=DETERMINISTIC, return_episode_rewards=True
)

mean_r, std_r = float(np.mean(ep_rewards)), float(np.std(ep_rewards))
print(f"\n[Registry sampling FAST — subsampled] episodes={N_EVAL_EPISODES} det={DETERMINISTIC}")
print(f"Mean reward = {mean_r:.3f} | Std = {std_r:.3f}")

df_trained = pd.DataFrame({
    "episode": np.arange(1, len(ep_rewards)+1),
    "reward": ep_rewards,
    "length": ep_lengths,
    "deterministic": DETERMINISTIC,
})
df_trained.to_csv(OUTPUT_CSV, index=False)
print(f"Saved per-episode rewards to {Path(OUTPUT_CSV).resolve()}")

try:
    display(df_trained.head())
except Exception:
    pass


Loading model...
Model on cuda
Loading registry: /home/gridsan/spalacios/Designing complex biological circuits with deep neural networks/manuscript/trained_agents/GAT_MLP_with_scalars_4000_logic_functions/final_shared_registry.pkl
Registry buckets: 391728 | total entries: 391728
Subsampled entries: 10000 (mode='count')


Rebuilding graphs:   0%|          | 0/10000 [00:00<?, ?it/s]

Rebuilt 10000 original graphs. Best energy in subsample: 2.0000


Building keys/light registry:   0%|          | 0/10000 [00:00<?, ?it/s]

Light registry buckets: 1470 (read-only)


  logger.warn(



[Registry sampling FAST — subsampled] episodes=100 det=False
Mean reward = 9.225 | Std = 4.106
Saved per-episode rewards to /home/gridsan/spalacios/Designing complex biological circuits with deep neural networks/eval_from_registry_sampling.csv


Unnamed: 0,episode,reward,length,deterministic
0,1,11.111111,10,False
1,2,2.222222,10,False
2,3,4.347826,10,False
3,4,10.0,10,False
4,5,3.703704,10,False


In [None]:
with open("trained_run_sampled_items.pkl", "wb") as f:
    pickle.dump(sampled_items, f)
print("Saved exact subsample to trained_run_sampled_items.pkl")

Saved exact subsample to trained_run_sampled_items.pkl


In [None]:
# --- CONFIG: EDIT THESE ---
MODEL_PATH    = "/home/gridsan/spalacios/Designing complex biological circuits with deep neural networks/manuscript/trained_agents/GAT_MLP_with_scalars_4000_logic_functions/trained_model.zip"
SAMPLED_ITEMS_IN = "trained_run_sampled_items.pkl"   # <-- produced by the trained run
OUTPUT_CSV    = "eval_from_registry_sampling_untrained.csv"

N_EVAL_EPISODES = 100
DETERMINISTIC   = False
MAX_STEPS       = 10
MAX_NODES       = 100
N_WORKERS       = 40
DEVICE          = "cuda"      # or "cpu"

import pickle, warnings, os, multiprocessing as mp
from pathlib import Path
import numpy as np
import pandas as pd
import torch as th
import networkx as nx
from concurrent.futures import ProcessPoolExecutor, as_completed

from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env
from sb3_contrib import MaskablePPO
from sb3_contrib.common.maskable.evaluation import evaluate_policy

from dgd.environments.drl3env_loader6 import DRL3env, _compute_truth_key

try:
    from tqdm.auto import tqdm
except ImportError:
    def tqdm(x, **kwargs): return x

def _rebuild_orig_graph(bucket_item):
    canon_nl, orig_nl, e = bucket_item
    orig_g = nx.node_link_graph(orig_nl)
    return (orig_g, float(e))

# --------------------------------------------------------
# Step 0) Load the exact same subsample picked by trained run
# --------------------------------------------------------
if not os.path.exists(SAMPLED_ITEMS_IN):
    raise FileNotFoundError(f"Missing {SAMPLED_ITEMS_IN}. Run the trained script first.")
with open(SAMPLED_ITEMS_IN, "rb") as f:
    sampled_items = pickle.load(f)
if not isinstance(sampled_items, list) or not sampled_items:
    raise RuntimeError("Loaded SAMPLED_ITEMS_IN is empty or invalid.")

print(f"Loaded preselected sampled items: {len(sampled_items)}")

# ----------------------------------------------
# Rebuild ONLY original graphs (fast)
# ----------------------------------------------
rebuilt = []
best_energy = np.inf
effective_workers = min(N_WORKERS, max(1, len(sampled_items)))
with ProcessPoolExecutor(max_workers=effective_workers) as ex:
    futures = [ex.submit(_rebuild_orig_graph, it) for it in sampled_items]
    for fut in tqdm(as_completed(futures), total=len(futures), desc="Rebuilding graphs", smoothing=0.1):
        try:
            orig_g, e = fut.result()
            rebuilt.append((orig_g, e))
            if e < best_energy:
                best_energy = e
        except Exception as exc:
            warnings.warn(f"Skipping one registry item due to error: {exc}")

if len(rebuilt) == 0:
    raise RuntimeError("No graphs were rebuilt successfully from the saved subsample.")

print(f"Rebuilt {len(rebuilt)} original graphs. Best energy in subsample: {best_energy:.4f}")

# ---------------------------------------------------------
# Build lightweight read-only registry (orig as canon)
# ---------------------------------------------------------
light_registry = {}
existing_keys = set()
for orig_g, e in tqdm(rebuilt, desc="Building keys/light registry", smoothing=0.1):
    try:
        key = _compute_truth_key(orig_g)
        bucket = light_registry.setdefault(key, [])
        bucket.append((orig_g, orig_g, e))  # (canon, orig, e)
        existing_keys.add(key)
    except Exception:
        pass

if len(light_registry) == 0:
    raise RuntimeError("Light registry is empty after rebuild.")

print(f"Light registry buckets: {len(light_registry)} (read-only)")

# ---------------------------------------------------------
# DRL3env shared state
# ---------------------------------------------------------
REGISTRY_LOCK = mp.Lock()
GLOBAL_BEST = mp.Value('d', best_energy if np.isfinite(best_energy) else 1e12)

def make_registry_sampling_env():
    return DRL3env(
        max_nodes=MAX_NODES,
        graphs=[],
        enable_full_graph_replacement=True,
        show_plots=False,
        log_info=False,
        max_steps=MAX_STEPS,
        shared_registry=light_registry,
        registry_lock=REGISTRY_LOCK,
        best_energy_across_workers=GLOBAL_BEST,
        store_every_new_graph=False,
        sampling_from_shared_registry=True,
        initial_state_sampling_factor=0,
        strict_iso_check=False,
        registry_read_only=True,
        existing_keys=existing_keys,
    )

env = make_vec_env(make_registry_sampling_env, n_envs=1, vec_env_cls=DummyVecEnv, seed=0)

# --------------------------------------------------------
# Instantiate a brand-new (UNTRAINED) model with same policy class/kwargs
# --------------------------------------------------------
print("Inspecting saved model to copy architecture (not weights)...")
use_cuda = (DEVICE == "cuda") and th.cuda.is_available()
device = "cuda" if use_cuda else "cpu"

_tmp_loaded = MaskablePPO.load(MODEL_PATH, device=device)
policy_class = type(_tmp_loaded.policy)
policy_kwargs = getattr(_tmp_loaded, "policy_kwargs", {}) or {}

# Mirror common algo hyperparams when present
algo_kwargs = {}
for k in ["n_steps", "batch_size", "n_epochs", "gamma", "gae_lambda",
          "clip_range", "ent_coef", "vf_coef", "max_grad_norm"]:
    if hasattr(_tmp_loaded, k):
        algo_kwargs[k] = getattr(_tmp_loaded, k)
if hasattr(_tmp_loaded, "learning_rate"):
    algo_kwargs["learning_rate"] = _tmp_loaded.learning_rate
elif hasattr(_tmp_loaded, "lr_schedule"):
    try:
        algo_kwargs["learning_rate"] = float(_tmp_loaded.lr_schedule(1.0))
    except Exception:
        pass
del _tmp_loaded

print(f"Policy class: {policy_class.__name__} | device: {device}")
print("Instantiating a fresh, UNTRAINED MaskablePPO model...")
model = MaskablePPO(
    policy_class,
    env,
    policy_kwargs=policy_kwargs,
    device=device,
    **algo_kwargs,
)

# Evaluate (no training)
ep_rewards, ep_lengths = evaluate_policy(
    model, env, n_eval_episodes=N_EVAL_EPISODES,
    deterministic=DETERMINISTIC, return_episode_rewards=True
)

mean_r, std_r = float(np.mean(ep_rewards)), float(np.std(ep_rewards))
print(f"\n[Same subsample — UNTRAINED policy] episodes={N_EVAL_EPISODES} det={DETERMINISTIC}")
print(f"Mean reward = {mean_r:.3f} | Std = {std_r:.3f}")

df_untrained = pd.DataFrame({
    "episode": np.arange(1, len(ep_rewards)+1),
    "reward": ep_rewards,
    "length": ep_lengths,
    "deterministic": DETERMINISTIC,
})
df_untrained.to_csv(OUTPUT_CSV, index=False)
print(f"Saved per-episode rewards to {Path(OUTPUT_CSV).resolve()}")

try:
    display(df_untrained.head())
except Exception:
    pass


Loaded preselected sampled items: 10000


Rebuilding graphs:   0%|          | 0/10000 [00:00<?, ?it/s]

Rebuilt 10000 original graphs. Best energy in subsample: 2.0000


Building keys/light registry:   0%|          | 0/10000 [00:00<?, ?it/s]

Light registry buckets: 1470 (read-only)
Inspecting saved model to copy architecture (not weights)...
Policy class: MaskableMultiInputActorCriticPolicy | device: cuda
Instantiating a fresh, UNTRAINED MaskablePPO model...


  logger.warn(



[Same subsample — UNTRAINED policy] episodes=100 det=False
Mean reward = 4.789 | Std = 3.265
Saved per-episode rewards to /home/gridsan/spalacios/Designing complex biological circuits with deep neural networks/eval_from_registry_sampling_untrained.csv


Unnamed: 0,episode,reward,length,deterministic
0,1,5.882353,10,False
1,2,7.142857,10,False
2,3,12.5,10,False
3,4,2.12766,10,False
4,5,7.692308,10,False
