In [1]:
# List saved block files from data folders (mode-selectable)
import os, glob, json
import numpy as np
import pandas as pd

# Notebook is at repo root; data is under ./data
repo_root = os.getcwd()
data_dir = os.path.join(repo_root, 'data')

# Mode selection: 'pve' (default) or 'pvp'
mode_select = 'pve'  # change to 'pvp' if needed
mode_dir = os.path.join(data_dir, mode_select)

# Collect block-json files (saved by play_game.py) for selected mode only
def list_block_files_for_mode(mode_dir, mode_select):
    files = []
    if os.path.isdir(mode_dir):
        for path in sorted(glob.glob(os.path.join(mode_dir, f"{mode_select}-blocks-*.json"))):
            files.append((mode_select, path))
    return files

files = list_block_files_for_mode(mode_dir, mode_select)
print(f"Mode: {mode_select}. Found {len(files)} block files:")
for i, (mode, path) in enumerate(files):
    print(f"[{i}] {mode} :: {os.path.basename(path)}")

# Choose which file and player to fit
file_idx = 0   # set target file index here
player_select = 2  # 1 for player1, 2 for player2 (default 2)

if not files:
    raise RuntimeError(f"No block files found under data/{mode_select}.")
mode, selected_path = files[file_idx]
print(f"\nSelected: [{file_idx}] {mode} :: {os.path.basename(selected_path)}")

# Load selected block JSON
with open(selected_path, 'r') as f:
    block_json = json.load(f)

player_key = 'player2' if player_select == 2 else 'player1'
blk = block_json[player_key]

env_pid = blk.get('env_player_id', [])
trials = blk.get('trial', [])
if not env_pid or len(env_pid) != len(trials):
    env_pid = [player_select - 1] * len(trials)  # fallback: use selected player id (0/1)

# Build subdata DataFrame for the chosen player
subdata = pd.DataFrame({
    'trial': trials,
    'board_idx': blk.get('board_idx', []),
    'action_idx': blk.get('action_idx', []),
    'time_elapsed': blk.get('time_elapsed', []),
    'rt': blk.get('rt', []),
    'env_player_id': env_pid
})
print(subdata.head())

# Extract design & response sequences for fitting
board_arr = np.array(subdata['board_idx'], dtype=int)
resp_arr = np.array(subdata['action_idx'], dtype=int)
player_arr = np.array(subdata['env_player_id'], dtype=int)
design = np.stack([board_arr, player_arr], axis=1)
response = resp_arr
print(f"design shape: {design.shape}, response length: {len(response)}")

Mode: pve. Found 1 block files:
[0] pve :: pve-blocks-2025-12-24_18-05-27.json

Selected: [0] pve :: pve-blocks-2025-12-24_18-05-27.json
   trial        board_idx  action_idx  time_elapsed        rt  env_player_id
0      1      31381059609          23      1.155936  0.452100              1
1      3      31386905460          15      3.464790  1.096701              1
2      5     132503653089          31      7.277315  1.056190              1
3      7   23009296108212          30      9.058379  0.441688              1
4      9  228900428203347          29     15.765532  0.377894              1
design shape: (6, 2), response length: 6


In [3]:
import sys, time
from pybads import BADS
from env.agent import *
from env.chess import *
from utils.nnl import IBS
import numpy as np
import pandas as pd

# Reproducibility for sampling-based objective
np.random.seed(42)

# Instantiate model and environment
model = BFSAgent(four_in_a_row())

# Show true/default parameters for reference
true_params = default_params().to_list()
print("True (default_params):", np.round(true_params, 5).tolist())

# Set up IBS objective
ibs = IBS(
            model.response_generator,
            response,
            design, 
            vectorized=True,
            acceleration=1.5,
            num_samples_per_call=0,
            max_iter=10000,
            max_time=np.inf,
        )

# Progress logging wrapper around objective
progress_log = []  # will store dicts: {eval, neg_logl, params, elapsed}
state_best = {"f": np.inf, "x": None}
state_ctr = {"n": 0}

param_names = getattr(model, 'p_names', [f"p{i}" for i in range(len(true_params))])

def target_logged(para: np.ndarray):
    t0 = time.time()
    f = ibs(para, num_reps=20)
    dt = time.time() - t0

    # update counters and best-so-far
    state_ctr["n"] += 1
    k = state_ctr["n"]
    progress_log.append({
        "eval": k,
        "neg_logl": float(f),
        "elapsed": float(dt),
        "params": np.array(para, dtype=float).tolist()
    })
    if f < state_best["f"]:
        state_best["f"] = float(f)
        state_best["x"] = np.array(para, dtype=float).copy()
        flag = "*"  # mark improvement
    else:
        flag = " "

    # brief live print of progress
    print(f"[{k:04d}]{flag} NLL={f:10.3f} | time={dt:6.2f}s | params[:3]={np.round(para[:3],4)} ...")
    return f

# Fit model using BADS
lb = np.array([b[0] for b in model.p_bnds])
ub = np.array([b[1] for b in model.p_bnds])
plb = np.array([b[0] for b in model.p_pbnds])
pub = np.array([b[1] for b in model.p_pbnds])
para0 = plb + np.random.rand(model.n_params) * (pub - plb)

bads_opt = {
    'uncertainty_handling': True,
    'noise_final_samples': 0,
    # keep BADS quiet; we print our own progress
    'display': 'off',
}

bads_ibs = BADS(
    target_logged,
    para0,
    lb,
    ub,
    plb,
    pub,
    options=bads_opt
)

# Run the optimization (prints happen during objective calls)
theta_ibs = bads_ibs.optimize()

# Summaries after finish
print("\n=== Optimization Summary ===")
print(f"Evaluations: {state_ctr['n']}")
print("Best NLL:", round(state_best["f"], 5))
if state_best["x"] is not None:
    best_arr = state_best["x"].astype(float)
    diff = best_arr - np.array(true_params, dtype=float)
    print("Best params:")
    for i, (name, v, t, d) in enumerate(zip(param_names, best_arr, true_params, diff)):
        print(f"  {i:02d} {name:>8}: est={v: .5f} | true={t: .5f} | Î”={d: .5f}")

# Make a compact DataFrame for inspection (without huge param arrays expanded)
prog_df = pd.DataFrame([
    {"eval": r["eval"], "neg_logl": r["neg_logl"], "elapsed": r["elapsed"]}
    for r in progress_log
])
print("\nProgress (last 10):")
print(prog_df.tail(10))


True (default_params): [0.02, 0.02, 2.0, 0.005, 0.92498, 0.60913, 0.90444, 0.45076, 3.4272, 20.1728]


KeyboardInterrupt: 

In [None]:
# Inspect and visualize parameter evolution (optional to run after fitting)
import numpy as np
import pandas as pd

if 'progress_log' in globals() and len(progress_log) > 0:
    df = pd.DataFrame(progress_log)
    # expand params into separate columns
    P = pd.DataFrame(df['params'].tolist())
    if 'param_names' in globals() and len(param_names) == P.shape[1]:
        P.columns = param_names
    else:
        P.columns = [f"p{i}" for i in range(P.shape[1])]
    df = pd.concat([df.drop(columns=['params']), P], axis=1)
    df['cum_best'] = df['neg_logl'].cummin()

    print("Rows:", len(df))
    print(df[['eval','neg_logl','cum_best'] + list(P.columns)].tail(10))

    # Uncomment to plot evolution (can be noisy/slow)
    # import matplotlib.pyplot as plt
    # fig, axes = plt.subplots(1, 2, figsize=(12,4))
    # axes[0].plot(df['eval'], df['neg_logl'], label='neg_logl')
    # axes[0].plot(df['eval'], df['cum_best'], label='cum_best')
    # axes[0].set_xlabel('eval')
    # axes[0].set_ylabel('neg_logl')
    # axes[0].legend()
    # for name in P.columns:
    #     axes[1].plot(df['eval'], df[name], label=name, alpha=0.7)
    # if 'true_params' in globals():
    #     for j, name in enumerate(P.columns):
    #         axes[1].axhline(true_params[j], color='k', lw=0.5, alpha=0.3)
    # axes[1].set_xlabel('eval')
    # axes[1].set_ylabel('param value')
    # axes[1].legend(ncol=2, fontsize=8)
    # plt.tight_layout()
else:
    print("progress_log is empty; run the fitting cell first.")
