In [50]:
import sys, os
sys.path.append(os.path.abspath(".."))

In [27]:
import numpy as np
from platform_attention.kode import SimParams, AgentParams, TrainParams

# --- Simulation parameters (environment) ---
sim = SimParams(
    n=2,                   # number of firms
    K=1,                   # memory length
    price_grid=(1.0, 1.25, 1.5, 1.75, 2.0),  # 5 prices as you wanted
    kappa=np.inf,           # initial attention intensity (will be replaced by grid values)
    rho=0.0,                # i.i.d. demand shocks
    sigma=0.0,             # shock volatility
    b=3.0,                  # demand intercept (optional, default already 3.0)
    c=1.0,                  # marginal cost
    phi=1.0,                # demand sensitivity
    observed_shocks=True,   # public shocks
    seed=42
)

# --- Agent parameters (Q-learning) ---
ap = AgentParams(
    n_actions=len(sim.price_grid),
    alpha=0.05,   # learning rate
    delta=0.995,  # discount factor
    tau0=1.0,     # initial temperature
    tau_min=0.01, # minimum temperature
    gamma=0.99997 # cooling rate
)

# --- Training control parameters ---
tr = TrainParams(
    T_max=50000,      # training horizon per run
    replications=4,   # number of random seeds
    base_seed=42,
    n_jobs=-1         # use all cores for parallel processing
)

# --- Attention intensity grid to scan ---
kap_grid = [np.inf, 100, 50, 25, 10, 5, 1, 0]

from kode.threshold import estimate_kappa_star

df, kstar = estimate_kappa_star(
    out_dir="results",
    sim_base=sim,
    ap=ap,
    kap_grid=kap_grid,
    tr=tr,
    delta=0.995,
    mu=0.3,
    tag_base="n2_K1"
)

print(df)
print("Estimated κ* =", kstar)


   kappa  markup_mean  markup_sd  n_seeds
0    inf     0.265625   0.027063        4
1  100.0     0.012500   0.021651        4
2   50.0     0.012500   0.021651        4
3   25.0     0.062500   0.081968        4
4   10.0     0.075000   0.082916        4
5    5.0     0.200000   0.000000        4
6    1.0     0.114583   0.080012        4
7    0.0    -0.023438   0.025911        4
Estimated κ* = 0.0


In [12]:
import os, sys, numpy as np

# If this notebook is inside platform_attention/, add parent to sys.path
sys.path.append(os.path.abspath(".."))

import platform_attention as pa

print("Using platform_attention from:", pa.__file__)

# Build env
sim = pa.SimParams(
    n=2, K=1,
    price_grid=(1.0, 1.25, 1.5, 1.75, 2.0),
    kappa=0.0,   # no steering
    mu=1.5,      # own-price logit slope
)
env = pa.AttentionEnv(sim, seed=0)

# ---- Single-vector profits ----
p = np.array([1.4, 1.6])
pi_single = env.static_profits(p, theta=0.0)
print("Single-vector profits:", np.round(pi_single, 6))

# ---- Batch profits (shape: (B, n)) ----
P = np.stack([
    np.array([1.4, 1.6]),
    np.array([1.6, 1.4]),
    np.array([1.5, 1.5]),
], axis=0)
pi_batch = env.static_profits_batch(P, theta=0.0)
print("Batch profits shape:", pi_batch.shape)
print("Batch profits:\n", np.round(pi_batch, 6))


Using platform_attention from: /Users/asifkhan/Downloads/platform_attention/__init__.py
Single-vector profits: [0.344666 0.383002]
Batch profits shape: (3, 2)
Batch profits:
 [[0.344666 0.383002]
 [0.383002 0.344666]
 [0.375    0.375   ]]


In [None]:
# === Driver: Baseline (kappa=0) -> Threshold -> Plots (n=2, K=2) ===

import os, sys, numpy as np
import sys, os
sys.path.append(os.path.abspath(".."))
from platform_attention.kode.threshold import estimate_kappa_star
from platform_attention.kode.eval import load_greedy_agents
from platform_attention.kode.figures import fig_impulse_response, plot_kappa_threshold
from platform_attention import SimParams, AgentParams, TrainParams, Trainer, AttentionEnv

# --------------------------
# Helpers
# --------------------------
import numpy as np, os

def kappa_to_tag(kappa: float) -> str:
    """Match the κ tag used during training in estimate_kappa_star: kap{inf|int|with p}."""
    if np.isinf(kappa):
        return "inf"
    kf = float(kappa)
    # Use integer form when possible (e.g., 0.0 -> "0", 10.0 -> "10")
    if kf.is_integer():
        return str(int(kf))
    return str(kf).replace(".", "p")

def find_any_seed_for_tag(out_dir: str, base_tag: str, kappa: float) -> int:
    """
    Look for a checkpoint directory for this κ. Tries both 'kap' and (fallback) 'kappa' variants.
    Returns the first seed it finds.
    """
    tag_core = kappa_to_tag(kappa)
    candidates = [
        os.path.join(out_dir, "checkpoints", f"{base_tag}_kap{tag_core}"),
        os.path.join(out_dir, "checkpoints", f"{base_tag}_kappa{tag_core}"),  # fallback to baseline naming style
    ]
    for ckpt_dir in candidates:
        if os.path.isdir(ckpt_dir):
            for fname in os.listdir(ckpt_dir):
                if "seed" in fname and fname.endswith(".json"):
                    try:
                        return int(fname.split("seed")[1].split(".json")[0])
                    except Exception:
                        pass
    raise FileNotFoundError(f"No checkpoint directory found for any of {candidates}")

import os

def find_seed_exact(out_dir: str, full_tag: str) -> int:
    """
    Find any seed for a checkpoint directory named exactly:
      {out_dir}/checkpoints/{full_tag}/
    Returns the first seed integer found in filenames like Q_agent0_seed{SEED}.json
    """
    ckpt_dir = os.path.join(out_dir, "checkpoints", full_tag)
    if not os.path.isdir(ckpt_dir):
        raise FileNotFoundError(f"No checkpoint directory for tag: {ckpt_dir}")

    for fname in os.listdir(ckpt_dir):
        if fname.endswith(".json") and "seed" in fname:
            try:
                return int(fname.split("seed")[1].split(".json")[0])
            except Exception:
                continue
    raise FileNotFoundError(f"No seed files found in: {ckpt_dir}")


# --------------------------
# 0) Global config (n=2, K=2)
# --------------------------
OUT_DIR = "results"  # keep artifacts inside the package
os.makedirs(OUT_DIR, exist_ok=True)

# Calvano-like environment (no steering), K=2, patient agents.
sim = SimParams(
    n=2, K=2,
    price_grid=(1.0, 1.2, 1.4, 1.6, 1.8),
    kappa=0.0,          # NO steering baseline
    mu=1.5,             # own-price logit slope
    rho=0.0, sigma=0.08,
    b=3.0, c=1.0, phi=0.8,
    seed=42
)
ap = AgentParams(
    n_actions=len(sim.price_grid),
    alpha=0.05, delta=0.997, tau0=1.0, tau_min=0.01, gamma=0.99998
)

# --------------------------
# 1) Train baseline at kappa=0 (Calvano replication)
# --------------------------
tr_base = TrainParams(T_max=300_000, replications=8, base_seed=42, n_jobs=-1)
tag_base0 = "calvano_n2_K2_kappa0"

print(">>> Training baseline (kappa=0)...")
trainer = Trainer(sim, ap, out_dir=OUT_DIR)
seeds_baseline = trainer.train(tr_base, tag_base0)
print("Baseline trained seeds:", seeds_baseline)

# Impulse-response at baseline (Calvano Fig-4 analog)
print(">>> Generating impulse-response at kappa=0...")
env0, g_agents0, s0 = load_greedy_agents(OUT_DIR, tag_base0, seeds_baseline[0], sim)
fig0, series0 = fig_impulse_response(
    env0, g_agents0, s0, out_dir=OUT_DIR, tag=tag_base0,
    horizon_after=15, reps=24, warmup_greedy=1000,
    deviator_index=0, base_seed=777, n_jobs=-1
)
print("Baseline impulse figure:", fig0)

# --------------------------
# 2) Sweep kappa and estimate kappa*
# --------------------------
kap_grid = [np.inf, 100, 50, 25, 10, 5, 1, 0]  # descending handled inside estimator
tr_sweep = TrainParams(T_max=150_000, replications=8, base_seed=99, n_jobs=-1)

print(">>> Estimating kappa* on grid:", kap_grid)
df, kstar = estimate_kappa_star(
    out_dir=OUT_DIR,
    sim_base=sim,
    ap=ap,
    kap_grid=kap_grid,
    tr=tr_sweep,
    delta=ap.delta, mu=sim.mu,
    tag_base="calvano_n2_K2",
    tol_markup=0.05,  # 5% above Bertrand rule
    n_jobs=-1
)
print(df)
print("Estimated kappa* =", kstar)

# Plot threshold curve
print(">>> Plotting kappa-threshold curve...")
pdf, png = plot_kappa_threshold(df, kstar, out_dir=OUT_DIR, tag="calvano_n2_K2", tol_markup=0.05)
print("Saved threshold plot:", pdf)

# --------------------------
# 3) Impulse at kappa near kappa* (pick the first competitive kappa)
# --------------------------
# Choose the smallest kappa with mean markup <= tol (robust visual choice)
tol = 0.05
eligible = df[df["markup_mean"] <= tol]
if not eligible.empty:
    kappa_for_impulse = float(eligible.sort_values("kappa").iloc[0]["kappa"])
else:
    # fallback: use kstar if finite, else choose a mid-grid competitive kappa (e.g., 25)
    kappa_for_impulse = float(kstar) if np.isfinite(kstar) else 25.0

print(f">>> Impulse at kappa ~ kappa*: selected κ = {kappa_for_impulse}")

# build the exact tag produced by estimate_kappa_star:
base_tag = "calvano_n2_K2"                       # <- the tag_base you used in estimate_kappa_star
kap_tag  = kappa_to_tag(kappa_for_impulse)       # e.g., "0", "10", "inf"
tag_kappa = f"{base_tag}_kap{kap_tag}"           # e.g., "calvano_n2_K2_kap0"

# find a seed for that exact tag
seed_k = find_seed_exact(OUT_DIR, tag_kappa)

# load and run impulse
sim_k = SimParams(**{**sim.__dict__, "kappa": float(kappa_for_impulse)})
envK, g_agentsK, sK = load_greedy_agents(OUT_DIR, tag_kappa, seed_k, sim_k)
figK, seriesK = fig_impulse_response(
    envK, g_agentsK, sK, out_dir=OUT_DIR, tag=tag_kappa,
    horizon_after=15, reps=24, warmup_greedy=1000,
    deviator_index=0, base_seed=1234, n_jobs=-1
)
print("Impulse figure near kappa*:", figK)

print("\n✅ Done. Artifacts saved under:", os.path.abspath(OUT_DIR))
print("   - Baseline impulse:", fig0)
print("   - Threshold plot:  ", pdf)
print("   - Impulse @ κ*:   ", figK)


>>> Training baseline (kappa=0)...
Baseline trained seeds: [42, 59, 76, 93, 110, 127, 144, 161]
>>> Generating impulse-response at kappa=0...
Baseline impulse figure: results/figures/calvano_n2_K2_kappa0/impulse_response.pdf
>>> Estimating kappa* on grid: [inf, 100, 50, 25, 10, 5, 1, 0]
   kappa  markup_mean  markup_sd  n_seeds
0    inf     0.367708   0.036902        8
1  100.0     0.139757   0.030752        8
2   50.0     0.142361   0.028421        8
3   25.0     0.152778   0.018373        8
4   10.0     0.161458   0.013780        8
5    5.0     0.187500   0.029463        8
6    1.0     0.011719   0.040127        8
7    0.0    -0.003472   0.009187        8
Estimated kappa* = 0.0
>>> Plotting kappa-threshold curve...
Saved threshold plot: results/figures/calvano_n2_K2/kappa_threshold.pdf
>>> Impulse at kappa ~ kappa*: selected κ = 0.0
Impulse figure near kappa*: results/figures/calvano_n2_K2_kap0/impulse_response.pdf

✅ Done. Artifacts saved under: /Users/asifkhan/Downloads/platform_at

#### Impulse Response Test

In [17]:
import sys, os
sys.path.append(os.path.abspath(".."))
from platform_attention import SimParams, AgentParams, TrainParams
from platform_attention.kode.train import Trainer
from platform_attention.kode.figures import fig_impulse_response
from platform_attention.kode.eval import load_greedy_agents, static_nash_price, static_monopoly_price


In [27]:
# Calvano style i.e. kappa = 0
import numpy as np

sim = SimParams(
    n=3, K=0,
    price_grid=(1.0, 1.2, 1.4, 1.6, 1.8),
    kappa=0.0,     # no steering
    mu=1.5,        # ignored when κ=0 (uniform shares)
    b=3.0, c=1.0, phi=0.8,
    rho=0.0, sigma=0.08,
    seed=42
)

ap = AgentParams(
    n_actions=len(sim.price_grid),
    alpha=0.05, delta=0.997,
    tau0=1.0, tau_min=0.01, gamma=0.99998
)

tr = TrainParams(T_max=T_MAX, replications=REPS_TRAIN, base_seed=BASE_SEED, n_jobs=-1)

OUT_DIR = "results"                # because the notebook is inside platform_attention/
TAG     = "calvano_n2_K2_kappa0"   # choose your tag

trainer = Trainer(sim, ap, out_dir=OUT_DIR)
seeds = trainer.train(tr, TAG)
print("Trained seeds:", seeds)
print("Checkpoints saved under:", f"{OUT_DIR}/checkpoints/{TAG}")


Trained seeds: [42, 59, 76, 93]
Checkpoints saved under: results/checkpoints/calvano_n2_K2_kappa0


In [28]:
# Load trained run and generate impulse figures
# Pick a seed that exists in your checkpoints folder
seed = 42
env, greedy_agents, start_state = load_greedy_agents(
    out_dir="results",
    tag="calvano_n2_K2_kappa0",
    seed=seed,
    sim=sim
)

# Run impulse-response experiment
fig_path, series = fig_impulse_response(
    env, greedy_agents, start_state,
    out_dir="results",
    tag="calvano_n2_K2_kappa0",
    horizon_after=15,    # how many periods to plot after the impulse
    reps=24,             # averaging replications
    warmup_greedy=1000,  # steps to reach the collusive state
    deviator_index=0,    # which agent deviates
    base_seed=777,
    n_jobs=-1
)
print("Impulse figure saved to:", fig_path)


Impulse figure saved to: results/figures/calvano_n2_K2_kappa0/impulse_response.pdf


In [12]:
# === Impulse responses for (n,K) in {(2,1),(2,2),(3,1),(3,2)} — save JSON + PNG ===
import os, sys, json
import numpy as np
import matplotlib.pyplot as plt

# ensure we can import the package from the parent
sys.path.append(os.path.abspath(".."))

from platform_attention import SimParams, AgentParams, TrainParams
from platform_attention.kode.train import Trainer
from platform_attention.kode.eval import load_greedy_agents, static_nash_price, static_monopoly_price
from platform_attention.kode.figures import impulse_response_avg_once

# ----- Configs you might tweak -----
OUT_DIR = "results"           # notebook is inside platform_attention/
IR_DIR  = os.path.join(OUT_DIR, "irplots")
os.makedirs(IR_DIR, exist_ok=True)

# === Calibrated params: feasible/collusive prices in ~[1.0, 2.0] ===
PRICE_GRID = (1.0, 1.2, 1.4, 1.6, 1.8)   # include 2.0 so peak can sit near 1.9–2.0
DELTA      = 0.997
ALPHA      = 0.05
TAU0, TAU_MIN, GAMMA = 1.0, 0.01, 0.99998
SIGMA, RHO = 0.08, 0.0

# demand / attention primitives
A_INTERCEPT = 2.8   # a ≡ b in κ=0 Calvano branch (total Q = a + θ − min p)
B_INTERCEPT = 2.8
C_COST      = 1.0
PHI         = 1.0   # only matters when κ>0
MU          = 1.5

# training control
T_MAX      = 300_000
REPS_TRAIN = 4
BASE_SEED  = 42

# impulse settings
HORIZON_AFTER  = 15
WARMUP_GREEDY  = 1000
DEVIATOR_INDEX = 0
REPS_IMPULSE   = 24
BASE_SEED_IMP  = 777
N_JOBS         = -1

def ensure_trained_and_run_ir(n: int, K: int):
    """
    Train (if needed) a baseline κ=0 model for given (n,K),
    run impulse response, save JSON and PNG with required filenames.
    """
    tag = f"calvano_n{n}_K{K}_kappa0"  # consistent with your prior naming

    # --- Build params ---
    # baseline for Calvano
    sim = SimParams(
        n=n, K=K,
        price_grid=(1.0, 1.2, 1.4, 1.6, 1.8),
        kappa=np.inf,          # <<<<<<<<<<  WTA baseline (Calvano)
        a=2.8, b=2.8, c=1.0,
        phi=1.0, mu=1.5,       # mu/phi irrelevant in κ=∞ branch
        sigma=0.08, rho=0.0,
        observed_shocks=True,
        seed=BASE_SEED
    )

    ap = AgentParams(
        n_actions=len(sim.price_grid),
        alpha=ALPHA, delta=DELTA,
        tau0=TAU0, tau_min=TAU_MIN, gamma=GAMMA
    )
    tr = TrainParams(T_max=T_MAX, replications=REPS_TRAIN, base_seed=BASE_SEED, n_jobs=N_JOBS)

    # --- Always train fresh (overwrite checkpoints) ---
    ckpt_dir = os.path.join(OUT_DIR, "checkpoints", tag)
    if os.path.isdir(ckpt_dir):
        # remove any existing checkpoints to avoid confusion
        import shutil
        print(f"[train] Removing existing checkpoint directory: {ckpt_dir}")
        shutil.rmtree(ckpt_dir)

    os.makedirs(ckpt_dir, exist_ok=True)
    print(f"[train] Training {tag} (fresh overwrite)...")

    trainer = Trainer(sim, ap, out_dir=OUT_DIR)
    seeds = trainer.train(tr, tag)

    print(f"[train] Done. New checkpoints written to {ckpt_dir}")
    print(f"[train] Seeds: {seeds}")


    # --- Load greedy agents from one seed (pick the first available) ---
    seed_pick = None
    for f in sorted(os.listdir(ckpt_dir)):
        if f.endswith(".json") and "seed" in f:
            try:
                seed_pick = int(f.split("seed")[1].split(".json")[0])
                break
            except Exception:
                pass
    if seed_pick is None:
        raise FileNotFoundError(f"No saved Q files found in {ckpt_dir}")

    env, g_agents, s0 = load_greedy_agents(OUT_DIR, tag, seed_pick, sim)

    # --- Run impulse-response (averaged) ---
    json_path = os.path.join(IR_DIR, f"ir_n{n}_K{K}.json")
    p_dev, p_riv, theta_bar = impulse_response_avg_once(
        env, g_agents, s0,
        horizon_after=HORIZON_AFTER,
        reps=REPS_IMPULSE,
        warmup_greedy=WARMUP_GREEDY,
        deviator_index=DEVIATOR_INDEX,
        base_seed=BASE_SEED_IMP,
        n_jobs=N_JOBS,
        out_path=json_path
    )
    print(f"[save] JSON -> {json_path}")

    # --- Compute benchmarks at θ̄ ---
    pB  = static_nash_price(env, theta_bar)
    pM  = static_monopoly_price(env, theta_bar)
    pLR = float((p_dev[-1] + p_riv[-1]) / 2.0)

    # --- Plot and save PNG (Calvano-style figure) ---
    t = np.arange(len(p_dev))
    plt.figure(figsize=(6.6, 3.3), dpi=150)
    plt.plot(t, p_dev, marker='o', linewidth=2, label="Deviating agent")
    plt.plot(t, p_riv, marker='^', linewidth=2, label="Nondeviating agent")
    plt.hlines(pB, xmin=t[0], xmax=t[-1], linestyles="dotted",  label="Nash price")
    plt.hlines(pM, xmin=t[0], xmax=t[-1], linestyles="dashdot", label="Monopoly price")
    plt.hlines(pLR, xmin=t[0], xmax=t[-1], colors="gray", linewidth=1.0, label="Long-run price")
    plt.xlabel("Time"); plt.ylabel("Price")
    plt.title(f"Impulse response (n={n}, K={K}, κ=0)")
    plt.legend(frameon=False, ncol=2)
    plt.tight_layout()

    png_path = os.path.join(IR_DIR, f"ir_n{n}_K{K}.png")
    plt.savefig(png_path, dpi=150)
    plt.show()
    print(f"[save] PNG  -> {png_path}")

    return {"json": json_path, "png": png_path, "theta_bar": theta_bar, "pB": pB, "pM": pM, "pLR": pLR}




In [None]:
# ---- Run all four combos ----
results = {}
for n in (2, 3):
    for K in (1, 2):
        print(f"\n=== Running impulse for (n={n}, K={K}) ===")
        results[(n, K)] = ensure_trained_and_run_ir(n, K)

print("\nDone. Outputs:")
for (n, K), r in results.items():
    print(f"(n={n},K={K}) -> JSON: {r['json']} | PNG: {r['png']} | θ̄={r['theta_bar']:.3f} | pB={r['pB']:.3f} | pM={r['pM']:.3f} | pLR={r['pLR']:.3f}")

In [38]:
# IR batch with auto-train fallback using Trainer/TrainParams
# Output: results/irplots/ir_n{n}_K{K}.json / .png for (n,K) in {(2,1),(2,2),(3,1),(3,2)}

import os, re, json, sys
import numpy as np
import matplotlib.pyplot as plt

# Make sure we can import the package when running from .../platform_attention
if (os.path.isdir("./platform_attention") and "./" not in sys.path):
    sys.path.insert(0, "./")

from platform_attention import (
    SimParams,
    Trainer, TrainParams,
    AgentParams,           # <-- we'll construct this for Trainer
)
from platform_attention.kode.eval import (
    impulse_response_avg_once,
    static_nash_price,
    static_monopoly_price,
    load_greedy_agents,
)
from platform_attention.kode.io import ensure_dir, load_json

# ---------- helpers ----------
def _first_tag_for_nK(checkpoints_dir: str, n: int, K: int):
    """Return first tag matching 'n{n}_K{K}_*' or None."""
    if not os.path.isdir(checkpoints_dir):
        return None
    prefix = f"n{n}_K{K}_"
    cands = sorted(d for d in os.listdir(checkpoints_dir) if d.startswith(prefix))
    return cands[0] if cands else None

_seed_re = re.compile(r"_seed(\d+)\.json$")
def _one_seed_for_tag(tag_dir: str, n_agents: int) -> int:
    """Pick a seed that has Q files for all agent indices 0..n_agents-1."""
    files = [f for f in os.listdir(tag_dir) if f.endswith(".json")]
    seeds = {}
    for fn in files:
        m = _seed_re.search(fn)
        if not m: 
            continue
        sd = int(m.group(1))
        m2 = re.search(r"Q_agent(\d+)_seed", fn)
        if not m2:
            continue
        ai = int(m2.group(1))
        seeds.setdefault(sd, set()).add(ai)
    for sd, have in sorted(seeds.items()):
        if have == set(range(n_agents)):
            return sd
    raise RuntimeError(f"No seed in {tag_dir} has all {n_agents} agent Q-files.")

def _simparams_from_meta(meta: dict) -> SimParams:
    """Rebuild SimParams from saved Q meta (only recognized fields are used)."""
    fields = {"n","c","a","a0","mu","price_grid","K","seed","m","xi","b","phi","sigma","rho","observed_shocks","kappa"}
    kwargs = {k: meta[k] for k in meta.keys() if k in fields}
    if "price_grid" in kwargs and isinstance(kwargs["price_grid"], list):
        kwargs["price_grid"] = tuple(kwargs["price_grid"])
    return SimParams(**kwargs)

# ---------- config ----------
pairs = [(2,1), (2,2), (3,1), (3,2)]
root_results = "results"
ckpt_root = os.path.join(root_results, "checkpoints")
out_dir = ensure_dir(os.path.join(root_results, "irplots"))

HORIZON_AFTER = 15
REPS = 24
WARMUP = 1000
DEVIATOR_INDEX = 0
BASE_SEED = 777
N_JOBS = -1

# Train defaults (adjust if you want)
train_cfg = TrainParams(
    T_max=120_000,
    replications=16,
    base_seed=42,
    n_jobs=-1
)

made = []
for (n, K) in pairs:
    print(f"\n=== (n={n}, K={K}) ===")
    tag = _first_tag_for_nK(ckpt_root, n, K)

    if tag is None:
        # --------- auto-train fallback ----------
        print(f"No checkpoint found for (n={n}, K={K}). Training now...")
        sim_for_train = SimParams(n=n, K=K)  # uses your env defaults for the rest

        # Build AgentParams for Trainer. Trainer internally sets n_actions from env,
        # so we only need placeholders for the learning hyperparams; defaults are fine.
        # AgentParams requires n_actions; use the grid length as a safe placeholder.
        n_actions_placeholder = len(sim_for_train.price_grid)
        ap_for_train = AgentParams(n_actions=n_actions_placeholder)

        # Tag must start with n{n}_K{K}_ so downstream lookup works
        tag = f"n{n}_K{K}_auto"

        # Instantiate Trainer with sim + ap and results dir
        trainer = Trainer(sim_for_train, ap_for_train, out_dir="results")

        # Run training (saves Q tables under results/checkpoints/{tag}/)
        _ = trainer.train(train_cfg, tag=tag)
        print(f"Training complete -> tag={tag}")

    tag_dir = os.path.join(ckpt_root, tag)
    seed = _one_seed_for_tag(tag_dir, n_agents=n)

    # read meta to rebuild the exact SimParams that were trained
    sample = [f for f in os.listdir(tag_dir) if f.startswith("Q_agent0_") and f"_seed{seed}." in f]
    if not sample:
        sample = [f for f in os.listdir(tag_dir) if f"_seed{seed}." in f]
    if not sample:
        raise FileNotFoundError(f"No Q-table found for (n={n},K={K}) seed={seed}")
    meta = load_json(os.path.join(tag_dir, sample[0])).get("meta", {})
    sim = _simparams_from_meta(meta)

    # load greedy agents + steady state
    env, greedy_agents, start_state = load_greedy_agents(out_dir=root_results, tag=tag, seed=seed, sim=sim)

    # impulse response
    p_dev, p_riv, theta_bar = impulse_response_avg_once(
        env, greedy_agents, start_state,
        horizon_after=HORIZON_AFTER, reps=REPS, warmup_greedy=WARMUP,
        deviator_index=DEVIATOR_INDEX, base_seed=BASE_SEED, n_jobs=N_JOBS
    )

    # static benchmarks (only defined for n=2 in your helpers)
    has_two_firm_bench = (sim.n == 2)
    if has_two_firm_bench:
        p_ne  = static_nash_price(env, theta_bar)
        p_mon = static_monopoly_price(env, theta_bar)
    else:
        p_ne, p_mon = np.nan, np.nan
    p_lr  = float((p_dev[-1] + p_riv[-1]) / 2.0)

    # save JSON
    base = os.path.join(out_dir, f"ir_n{n}_K{K}")
    with open(base + ".json", "w") as f:
        json.dump({
            "meta": sim.__dict__,
            "tag": tag,
            "seed": seed,
            "theta_bar": float(theta_bar),
            "t": list(range(len(p_dev))),
            "p_dev": [float(x) for x in p_dev],
            "p_riv": [float(x) for x in p_riv],
            "p_ne":  None if not has_two_firm_bench else float(p_ne),
            "p_mon": None if not has_two_firm_bench else float(p_mon),
            "p_long_run": float(p_lr)
        }, f, indent=2)

    # plot
    t = np.arange(len(p_dev))
    plt.figure(figsize=(6.8,3.4), dpi=150)
    plt.plot(t, p_dev, marker="o", linewidth=2, label="Deviator")
    plt.plot(t, p_riv, marker="^", linewidth=2, label="Rival")
    if has_two_firm_bench:
        plt.hlines(p_ne,  xmin=t[0], xmax=t[-1], linestyles="dotted",  label="Nash price")
        plt.hlines(p_mon, xmin=t[0], xmax=t[-1], linestyles="dashdot", label="Monopoly price")
    plt.hlines(p_lr,  xmin=t[0], xmax=t[-1], linewidth=1.0, label="Long-run price")
    plt.title(f"Impulse response (n={n}, K={K}) | tag={tag}")
    plt.xlabel("Time"); plt.ylabel("Price")
    plt.legend(frameon=False, ncol=2)
    plt.tight_layout()
    plt.savefig(base + ".png", dpi=200)
    plt.close()

    made.append((base + ".png", base + ".json", tag, seed))

print("\n=== Completed ===")
for png, jsn, tag, sd in made:
    print(f"- {png}\n  {jsn}\n  (tag={tag}, seed={sd})")



=== (n=2, K=1) ===

=== (n=2, K=2) ===
No checkpoint found for (n=2, K=2). Training now...
Training complete -> tag=n2_K2_auto

=== (n=3, K=1) ===
No checkpoint found for (n=3, K=1). Training now...
Training complete -> tag=n3_K1_auto

=== (n=3, K=2) ===
No checkpoint found for (n=3, K=2). Training now...
Training complete -> tag=n3_K2_auto

=== Completed ===
- results/irplots/ir_n2_K1.png
  results/irplots/ir_n2_K1.json
  (tag=n2_K1_kap0, seed=42)
- results/irplots/ir_n2_K2.png
  results/irplots/ir_n2_K2.json
  (tag=n2_K2_auto, seed=42)
- results/irplots/ir_n3_K1.png
  results/irplots/ir_n3_K1.json
  (tag=n3_K1_auto, seed=42)
- results/irplots/ir_n3_K2.png
  results/irplots/ir_n3_K2.json
  (tag=n3_K2_auto, seed=42)


In [40]:
# Lagged-IR + smoothing (no repo edits). Produces 4 JSONs/PNGs for (n,K)=(2,1),(2,2),(3,1),(3,2).
# Files: results/irplots/ir_n{n}_K{K}.json / .png

import os, re, json, sys
import numpy as np
import matplotlib.pyplot as plt

# Ensure local import
if (os.path.isdir("./platform_attention") and "./" not in sys.path):
    sys.path.insert(0, "./")

from platform_attention import SimParams, Trainer, TrainParams, AgentParams
from platform_attention.kode.eval import load_greedy_agents, static_nash_price, static_monopoly_price
from platform_attention.kode.io import ensure_dir, load_json

# ---------------- helpers ----------------
def _first_tag_for_nK(checkpoints_dir: str, n: int, K: int):
    if not os.path.isdir(checkpoints_dir):
        return None
    prefix = f"n{n}_K{K}_"
    cands = sorted(d for d in os.listdir(checkpoints_dir) if d.startswith(prefix))
    return cands[0] if cands else None

_seed_re = re.compile(r"_seed(\d+)\.json$")
def _one_seed_for_tag(tag_dir: str, n_agents: int) -> int:
    files = [f for f in os.listdir(tag_dir) if f.endswith(".json")]
    seeds = {}
    for fn in files:
        m = _seed_re.search(fn)
        if not m: 
            continue
        sd = int(m.group(1))
        m2 = re.search(r"Q_agent(\d+)_seed", fn)
        if not m2:
            continue
        ai = int(m2.group(1))
        seeds.setdefault(sd, set()).add(ai)
    for sd, have in sorted(seeds.items()):
        if have == set(range(n_agents)):
            return sd
    raise RuntimeError(f"No seed in {tag_dir} has all {n_agents} agent Q-files.")

def _simparams_from_meta(meta: dict) -> SimParams:
    fields = {"n","c","a","a0","mu","price_grid","K","seed","m","xi","b","phi","sigma","rho","observed_shocks","kappa"}
    kwargs = {k: meta[k] for k in meta.keys() if k in fields}
    if "price_grid" in kwargs and isinstance(kwargs["price_grid"], list):
        kwargs["price_grid"] = tuple(kwargs["price_grid"])
    return SimParams(**kwargs)

def _moving_average(x, w: int):
    if w is None or w <= 1:
        return np.array(x, dtype=float)
    w = int(w)
    pad = w - 1
    x = np.asarray(x, dtype=float)
    # simple centered moving average (pad on both ends)
    xpad = np.pad(x, (pad//2, pad - pad//2), mode="edge")
    ma = np.convolve(xpad, np.ones(w)/w, mode="valid")
    return ma

# --------- Lagged impulse response runner (per the user's expectation) ----------
def impulse_response_avg_once_lagged(env, greedy_agents, start_state, *,
                                     horizon_after=55, reps=24, warmup_greedy=1000,
                                     deviator_index=0, base_seed=777, n_jobs=-1):
    """
    Same as your default IR, except at τ=1 rivals keep their τ=0 prices (one-period lagged response).
    From τ≥2, everyone plays greedy again.
    """
    from joblib import Parallel, delayed
    from platform_attention.kode.env import AttentionEnv

    # Warm to steady state using greedy
    s = start_state
    def act_g(ag, st): return ag.act_greedy(st)
    for _ in range(warmup_greedy):
        acts = [act_g(a, s) for a in greedy_agents]
        s, _, _ = env.step(acts)

    steady_hist = list(env.hist); steady_theta = float(env.theta)
    A = env.A
    n = env.n

    def run_once(rep_seed):
        env_r = AttentionEnv(env.prm, seed=rep_seed)
        env_r.hist = list(steady_hist); env_r.theta = steady_theta
        s0 = (tuple(steady_hist), float(steady_theta)) if env_r.prm.observed_shocks else tuple(steady_hist)

        # τ=0: everybody greedy
        acts0 = [act_g(a, s0) for a in greedy_agents]
        s1, _, info0 = env_r.step(acts0)
        p0 = info0["p_vec"].astype(float); th0 = float(info0["theta"])

        dev, riv = [p0[deviator_index]], [p0[1 - deviator_index]]

        # τ=1 (DEVIATOR undercuts; RIVALS HOLD their τ=0 prices = one-period lag)
        # deviator undercuts rival-by-one-tick, capped by deviator's static BR vs rival's τ=0 price
        rival0 = p0[1 - deviator_index]
        r0_idx = int(np.abs(A - rival0).argmin())
        undercut_idx = max(0, r0_idx - 1)
        # deviator's static BR to rival0 at current theta
        def _static_best_response(envX, rival_price, i, theta):
            A_ = envX.A; best_p, best_v = None, -1e300
            for p in A_:
                pv = np.full(envX.n, rival_price); pv[i] = p
                v = envX.static_profits(pv, theta)[i]
                if (v > best_v) or (np.isclose(v, best_v) and (best_p is not None) and (p < best_p)):
                    best_p, best_v = float(p), v
            return best_p
        p_dev_br = _static_best_response(env_r, rival0, i=deviator_index, theta=th0)
        br_idx = int(np.abs(A - p_dev_br).argmin())
        dev_idx = min(undercut_idx, br_idx)

        # Build τ=1 actions: rivals keep τ=0 price indices; deviator deviates
        acts1 = []
        for j in range(n):
            if j == deviator_index:
                acts1.append(dev_idx)
            else:
                pj0 = p0[j]
                pj0_idx = int(np.abs(A - pj0).argmin())
                acts1.append(pj0_idx)

        s2, _, info1 = env_r.step(acts1)
        p1 = info1["p_vec"].astype(float)
        dev.append(p1[deviator_index]); riv.append(p1[1 - deviator_index])

        # τ≥2: everyone greedy
        s = s2
        for _ in range(horizon_after - 1):
            acts = [act_g(a, s) for a in greedy_agents]
            s, _, info = env_r.step(acts); p = info["p_vec"].astype(float)
            dev.append(p[deviator_index]); riv.append(p[1 - deviator_index])

        return np.array(dev), np.array(riv), th0

    seeds = [base_seed + 13*r for r in range(reps)]
    results = Parallel(n_jobs=n_jobs, prefer="processes")(delayed(run_once)(sd) for sd in seeds)
    dev_paths, riv_paths, thetas = zip(*results)
    dev_mean = np.mean(np.stack(dev_paths), axis=0)
    riv_mean = np.mean(np.stack(riv_paths), axis=0)
    theta_bar = float(np.mean(thetas))
    return dev_mean, riv_mean, theta_bar

# ---------------- main batch ----------------
pairs = [(2,1), (2,2), (3,1), (3,2)]
root_results = "results"
ckpt_root = os.path.join(root_results, "checkpoints")
out_dir = ensure_dir(os.path.join(root_results, "irplots"))

# IR controls
HORIZON_AFTER = 15
REPS = 64          # ↑ reps to average more
WARMUP = 2000      # ↑ warmup to settle cycle
DEVIATOR_INDEX = 0
BASE_SEED = 777
N_JOBS = -1
SMOOTH_WIN = 3     # moving-average window (set to 1/None to disable)

train_cfg = TrainParams(T_max=120_000, replications=16, base_seed=42, n_jobs=-1)

made = []
for (n, K) in pairs:
    print(f"\n=== (n={n}, K={K}) ===")
    tag = _first_tag_for_nK(ckpt_root, n, K)

    if tag is None:
        print(f"No checkpoint for (n={n},K={K}). Training...")
        sim_for_train = SimParams(n=n, K=K)
        ap_for_train  = AgentParams(n_actions=len(sim_for_train.price_grid))
        tag = f"n{n}_K{K}_auto"
        trainer = Trainer(sim_for_train, ap_for_train, out_dir="results")
        _ = trainer.train(train_cfg, tag=tag)
        print(f"Training complete -> tag={tag}")

    tag_dir = os.path.join(ckpt_root, tag)
    seed = _one_seed_for_tag(tag_dir, n_agents=n)

    # rebuild SimParams from saved meta
    sample = [f for f in os.listdir(tag_dir) if f.startswith("Q_agent0_") and f"_seed{seed}." in f] or \
             [f for f in os.listdir(tag_dir) if f"_seed{seed}." in f]
    if not sample:
        raise FileNotFoundError(f"No Q-table found for (n={n},K={K}) seed={seed}")
    meta = load_json(os.path.join(tag_dir, sample[0])).get("meta", {})
    sim = _simparams_from_meta(meta)

    # load greedy agents + start state
    env, greedy_agents, start_state = load_greedy_agents(out_dir=root_results, tag=tag, seed=seed, sim=sim)

    # run lagged IR (rival reacts from τ=2 onward)
    p_dev, p_riv, theta_bar = impulse_response_avg_once_lagged(
        env, greedy_agents, start_state,
        horizon_after=HORIZON_AFTER, reps=REPS, warmup_greedy=WARMUP,
        deviator_index=DEVIATOR_INDEX, base_seed=BASE_SEED, n_jobs=N_JOBS
    )

    # optional smoothing (after averaging across reps)
    p_dev_s = _moving_average(p_dev, SMOOTH_WIN)
    p_riv_s = _moving_average(p_riv, SMOOTH_WIN)

    # reference lines (only coded for n=2 helpers)
    has_two_firm_bench = (sim.n == 2)
    if has_two_firm_bench:
        p_ne  = float(static_nash_price(env, theta_bar))
        p_mon = float(static_monopoly_price(env, theta_bar))
    p_lr  = float((p_dev[-1] + p_riv[-1]) / 2.0)

    # save JSON (store both raw and smoothed)
    base = os.path.join(out_dir, f"ir_n{n}_K{K}")
    with open(base + ".json", "w") as f:
        json.dump({
            "meta": sim.__dict__,
            "tag": tag,
            "seed": seed,
            "theta_bar": float(theta_bar),
            "t": list(range(len(p_dev))),
            "p_dev": [float(x) for x in p_dev],
            "p_riv": [float(x) for x in p_riv],
            "p_dev_smooth": [float(x) for x in p_dev_s],
            "p_riv_smooth": [float(x) for x in p_riv_s],
            "p_ne":  None if not has_two_firm_bench else float(p_ne),
            "p_mon": None if not has_two_firm_bench else float(p_mon),
            "p_long_run": float(p_lr)
        }, f, indent=2)

    # plot (smoothed series)
    t = np.arange(len(p_dev_s))
    plt.figure(figsize=(6.8,3.4), dpi=150)
    plt.plot(t, p_dev_s, marker="o", linewidth=2, label="Deviator (smooth)")
    plt.plot(t, p_riv_s, marker="^", linewidth=2, label="Rival (smooth)")
    if has_two_firm_bench:
        plt.hlines(p_ne,  xmin=t[0], xmax=t[-1], linestyles="dotted",  label="Nash price")
        plt.hlines(p_mon, xmin=t[0], xmax=t[-1], linestyles="dashdot", label="Monopoly price")
    plt.hlines(p_lr,  xmin=t[0], xmax=t[-1], linewidth=1.0, label="Long-run price")
    plt.title(f"Impulse response (lagged rival) (n={n}, K={K}) | tag={tag}")
    plt.xlabel("Time"); plt.ylabel("Price")
    plt.legend(frameon=False, ncol=2)
    plt.tight_layout()
    plt.savefig(base + ".png", dpi=200)
    plt.close()

    made.append((base + ".png", base + ".json", tag, seed))

print("\n=== Completed ===")
for png, jsn, tag, sd in made:
    print(f"- {png}\n  {jsn}\n  (tag={tag}, seed={sd})")



=== (n=2, K=1) ===

=== (n=2, K=2) ===

=== (n=3, K=1) ===

=== (n=3, K=2) ===

=== Completed ===
- results/irplots/ir_n2_K1.png
  results/irplots/ir_n2_K1.json
  (tag=n2_K1_kap0, seed=42)
- results/irplots/ir_n2_K2.png
  results/irplots/ir_n2_K2.json
  (tag=n2_K2_auto, seed=42)
- results/irplots/ir_n3_K1.png
  results/irplots/ir_n3_K1.json
  (tag=n3_K1_auto, seed=42)
- results/irplots/ir_n3_K2.png
  results/irplots/ir_n3_K2.json
  (tag=n3_K2_auto, seed=42)


In [54]:
# --- CELL 1: Long-run markup & mean price by number of firms (Table 2) ---

import os, numpy as np, pandas as pd
from joblib import Parallel, delayed

from platform_attention import SimParams, AgentParams, TrainParams, Trainer
from platform_attention import AttentionEnv
from platform_attention.kode.eval import load_greedy_agents
from platform_attention.kode.io import ensure_dir

# ---------- config ----------
out_dir = "results"
Ns = [2,3,4,5]
K = 1
kappa = 0.0          # neutral attention for this table
delta = 0.95
mu = 0.25
price_grid = (1.424,1.464,1.505,1.545,1.586,1.626,1.667,1.707,1.747,1.788,1.828,1.869,1.909,1.950,1.990)

ap = AgentParams(n_actions=len(price_grid), alpha=0.05, delta=delta, tau0=1.0, tau_min=0.01, gamma=0.99997)
tr = TrainParams(T_max=120_000, replications=8, base_seed=42, n_jobs=-1)  # shrink reps if you need quick test

def tag_from(n, K, grid, kappa, delta, mu):
    # mirror your io.tag_from_params, inlined to keep the cell self-contained
    import numpy as _np
    kappa_tag = "inf" if _np.isinf(kappa) else str(kappa).replace(".","p")
    grid_tag = "g" + "-".join(str(x) for x in grid)
    return f"n{n}_K{K}_{grid_tag}_kap{kappa_tag}_del{str(delta).replace('.','p')}_mu{str(mu).replace('.','p')}"

def simulate_lr_metrics(env, greedy_agents, start_state, burn=2000, T=6000):
    s = start_state
    prices = []
    lerners = []
    # burn to steady cycle
    g = lambda ag, st: ag.act_greedy(st)
    for _ in range(burn):
        acts = [g(a, s) for a in greedy_agents]
        s, _, _ = env.step(acts)
    # record T periods
    for _ in range(T):
        acts = [g(a, s) for a in greedy_agents]
        s, _, info = env.step(acts)
        p = info["p_vec"].astype(float)
        prices.append(p.mean())
        lerners.append(((p - env.prm.c) / p).mean())  # Lerner per-firm then average
    return float(np.mean(lerners)), float(np.mean(prices))

rows = []
for n in Ns:
    sim = SimParams(n=n, K=K, kappa=kappa, mu=mu, price_grid=price_grid, c=1.0, a=2.0, a0=0.0, seed=42)
    tag = tag_from(n, K, price_grid, kappa, delta, mu)

    # Train or reuse checkpoints
    Trainer(sim, ap, out_dir=out_dir).train(tr, tag)

    # Evaluate across seeds used in training
    seeds = [tr.base_seed + 17*i for i in range(tr.replications)]
    metrics = []
    for sd in seeds:
        env, greedy_agents, s0 = load_greedy_agents(out_dir, tag, sd, sim)
        Lbar, pbar = simulate_lr_metrics(env, greedy_agents, s0, burn=2000, T=6000)
        metrics.append((Lbar, pbar))
    L_mean = float(np.mean([m[0] for m in metrics]))
    p_mean = float(np.mean([m[1] for m in metrics]))
    rows.append({"Number of Firms": n, "Mean Lerner Index": L_mean, "Mean Price": p_mean})

df_tab2 = pd.DataFrame(rows).sort_values("Number of Firms").reset_index(drop=True)

# Save
ensure_dir(os.path.join(out_dir, "tables"))
csv_path = os.path.join(out_dir, "tables", "table2_longrun_markup_meanprice.csv")
tex_path = os.path.join(out_dir, "tables", "table2_longrun_markup_meanprice.tex")
df_tab2.to_csv(csv_path, index=False)
with open(tex_path, "w") as f:
    f.write(df_tab2.to_latex(index=False, float_format="%.6f", caption="Long-run Markup and Implied Mean Price by Number of Firms", label="tab:longrun_markup"))

df_tab2


Unnamed: 0,Number of Firms,Mean Lerner Index,Mean Price
0,2,0.369175,1.588551
1,3,0.371402,1.600691
2,4,0.376328,1.617096
3,5,0.308349,1.451544


In [55]:
# --- CELL 2: Static Bertrand (pB) and Joint–Monopoly (pM) by n (Table 3) ---

import os, numpy as np, pandas as pd
from platform_attention import SimParams, AttentionEnv
from platform_attention.kode.eval import static_nash_price, static_monopoly_price
from platform_attention.kode.io import ensure_dir

out_dir = "results"
Ns = [2,3,4,5]
price_grid = (1.424,1.464,1.505,1.545,1.586,1.626,1.667,1.707,1.747,1.788,1.828,1.869,1.909,1.950,1.990)

rows = []
for n in Ns:
    sim = SimParams(n=n, K=2, kappa=10.0, mu=0.25, price_grid=price_grid, c=1.0, a=2.0, a0=0.0)
    env = AttentionEnv(sim, seed=123)
    theta_eval = 0.0  # or replace with theta_bar from a learned run
    pB = static_nash_price(env, theta_eval)
    pM = static_monopoly_price(env, theta_eval)
    rows.append({"Number of Firms": int(n), "pB": float(pB), "pM": float(pM)})

df_tab3 = pd.DataFrame(rows).sort_values("Number of Firms").reset_index(drop=True)

# Save
ensure_dir(os.path.join(out_dir, "tables"))
csv_path = os.path.join(out_dir, "tables", "table3_pB_pM_by_n.csv")
tex_path = os.path.join(out_dir, "tables", "table3_pB_pM_by_n.tex")
df_tab3.to_csv(csv_path, index=False)
with open(tex_path, "w") as f:
    f.write(df_tab3.to_latex(index=False, float_format="%.3f", caption="Bertrand and Joint–Monopoly Prices by Number of Firms", label="tab:pB_pM_n"))

df_tab3


Unnamed: 0,Number of Firms,pB,pM
0,2,1.424,1.909
1,3,1.424,1.909
2,4,1.424,1.909
3,5,1.424,1.909


In [51]:
# --- CELL 3: Static Bertrand (pB) and Joint–Monopoly (pM) by K (Table 3(b)) ---

import os, numpy as np, pandas as pd
from platform_attention import SimParams, AttentionEnv
from platform_attention.kode.eval import static_nash_price, static_monopoly_price
from platform_attention.kode.io import ensure_dir

out_dir = "results"
Ks = [1,2,3,4]
n=2
price_grid = (1.424,1.464,1.505,1.545,1.586,1.626,1.667,1.707,1.747,1.788,1.828,1.869,1.909,1.950,1.990)

rows = []
for K in Ks:
    sim = SimParams(n=n, K=K, kappa=0.0, mu=0.25, price_grid=price_grid, c=1.0, a=2.0, a0=0.0)
    env = AttentionEnv(sim, seed=123)
    theta_eval = 0.0  # or replace with theta_bar from a learned run
    pB = static_nash_price(env, theta_eval)
    pM = static_monopoly_price(env, theta_eval)
    rows.append({"Memory(K)": int(K), "pB": float(pB), "pM": float(pM)})

df_tab3 = pd.DataFrame(rows).sort_values("Memory(K)").reset_index(drop=True)

# Save
ensure_dir(os.path.join(out_dir, "tables"))
csv_path = os.path.join(out_dir, "tables", "table3_pB_pM_by_K.csv")
tex_path = os.path.join(out_dir, "tables", "table3_pB_pM_by_K.tex")
df_tab3.to_csv(csv_path, index=False)
with open(tex_path, "w") as f:
    f.write(df_tab3.to_latex(index=False, float_format="%.3f", caption="Bertrand and Joint–Monopoly Prices by K", label="tab:pB_pM_K"))

df_tab3


Unnamed: 0,Memory(K),pB,pM
0,1,1.464,1.909
1,2,1.464,1.909
2,3,1.464,1.909
3,4,1.464,1.909


In [53]:
# --- CELL 3: Profit-gain volatility with & without shocks ---

import numpy as np, pandas as pd, os
from platform_attention import SimParams, AgentParams, TrainParams, Trainer
from platform_attention.kode.eval import load_greedy_agents, static_nash_price
from platform_attention.kode.io import ensure_dir

out_dir = "results"
price_grid = (1.424,1.464,1.505,1.545,1.586,1.626,1.667,1.707,1.747,1.788,1.828,1.869,1.909,1.950,1.990)

# --- train once at sigma=0 (no shocks) ---
sim_train = SimParams(n=2, K=1, kappa=0.0, mu=0.25, price_grid=price_grid, c=1.0, a=2.0, a0=0.0, sigma=0.0, rho=0.0)
ap = AgentParams(n_actions=len(price_grid), alpha=0.05, delta=0.95, tau0=1.0, tau_min=0.01, gamma=0.99997)
tr = TrainParams(T_max=120_000, replications=4, base_seed=2025, n_jobs=-1)
tag = f"voltest_n{sim_train.n}_K{sim_train.K}_sigma0"

Trainer(sim_train, ap, out_dir=out_dir).train(tr, tag)
seeds = [tr.base_seed + 17*i for i in range(tr.replications)]

def eval_profit_gain_variance(sim_eval, tag, seeds, T=6000, burn=2000):
    # evaluate loaded greedy policies on a given env (possibly shocked)
    gains = []
    for sd in seeds:
        env, greedy_agents, s = load_greedy_agents(out_dir, tag, sd, sim_eval)
        # burn to steady state
        g = lambda ag, st: ag.act_greedy(st)
        for _ in range(burn):
            acts = [g(a, s) for a in greedy_agents]
            s, _, _ = env.step(acts)
        # collect profits & Bertrand baseline (at theta_bar)
        prices = []; thetas = []; profits = []
        for _ in range(T):
            acts = [g(a, s) for a in greedy_agents]
            s, _, info = env.step(acts)
            p_vec = info["p_vec"].astype(float)
            theta = float(info["theta"])
            pr = env.static_profits(p_vec, theta=theta)  # per-firm profits at time t
            prices.append(p_vec.mean()); thetas.append(theta); profits.append(pr.mean())
        theta_bar = float(np.mean(thetas))
        pB = static_nash_price(env, theta_bar)  # Bertrand price at mean shock
        # Bertrand per-firm profit at pB (symmetric)
        piB = env.static_profits(np.full(env.n, pB), theta=theta_bar).mean()
        # Profit gain path and its variance
        gains_path = (np.array(profits) - piB) / max(piB, 1e-12)
        gains.append(np.var(gains_path))
    return float(np.mean(gains)), float(np.std(gains))

# baseline (no shocks)
sim_eval_base = SimParams(**{**sim_train.__dict__})
var_base_mean, var_base_sd = eval_profit_gain_variance(sim_eval_base, tag, seeds)

# shocked evaluation (same greedy policies, now AR(1) θ path)
sim_eval_shock = SimParams(**{**sim_train.__dict__})
var_shock_mean, var_shock_sd = eval_profit_gain_variance(sim_eval_shock, tag, seeds)

df_vol = pd.DataFrame({
    "scenario": ["no_shock (σ=0, ρ=0)", "shock (σ=0.15, ρ=0.85)"],
    "mean_var_profit_gain": [var_base_mean, var_shock_mean],
    "sd_over_seeds": [var_base_sd, var_shock_sd],
})
ensure_dir(os.path.join(out_dir, "tables"))
csv_path = os.path.join(out_dir, "tables", "profit_gain_volatility.csv")
tex_path = os.path.join(out_dir, "tables", "profit_gain_volatility.tex")
df_vol.to_csv(csv_path, index=False)
with open(tex_path, "w") as f:
    f.write(df_vol.to_latex(index=False, float_format="%.5f", caption="Profit-gain variance without and with shocks", label="tab:volatility"))

df_vol


Unnamed: 0,scenario,mean_var_profit_gain,sd_over_seeds
0,"no_shock (σ=0, ρ=0)",0.004124,0.00262
1,"shock (σ=0.15, ρ=0.85)",0.004124,0.00262
