In [1]:
# ==== DEMO: Evaluate Baseline vs FGSM vs OIA (paper-aligned, CBF-aware) ====
import os, json, numpy as np
import matplotlib.pyplot as plt
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize

# If attacks live in attacks.py (or converted notebook), import them:
try:
    from attacks import FGSMAttack, OIAttack
except Exception as e:
    raise ImportError("Import FGSMAttack/OIAttack failed. Make sure attacks.py exists or run attacks.ipynb first.") from e

# ============ Config ============
LOGDIR   = "runs/ppo_baseline"
VEC_PATH = os.path.join(LOGDIR, "vecnormalize.pkl")
MDL_PATH = os.path.join(LOGDIR, "ppo_acc.zip")

EPISODES = 100          # paper uses 100
EPSILON  = 0.02         # normalized epsilon; sweep around [0.005, 0.02]
SEED     = 2025

# ============ Env helpers ============
def make_env(seed=123, with_braking=True, normalize_obs=False):
    """
    ACCEnv factory. Keep normalize_obs=False and let VecNormalize handle scaling (SB3 best practice).
    We attach the paper's braking window via an attribute 'lead_profile' that ACCEnv.step() uses.
    """
    def _thunk():
        env = ACCEnv(normalize_obs=normalize_obs, seed=seed)
        if with_braking:
            # Paper-consistent "critical event": a_l = -2.0 m/s^2 for 3 s (Δt=0.1 -> 30 steps)
            def lead_profile(step):
                return -2.0 if 120 <= step < 150 else 0.0
            env.lead_profile = lead_profile
        return env
    return _thunk

def _get_inner_env(vec_env):
    """
    Robustly fetch the underlying Gym env (first subenv) from (possibly wrapped) VecEnv.
    Works for VecNormalize(DummyVecEnv(...)).
    """
    v = getattr(vec_env, "venv", None) or getattr(vec_env, "env", None) or vec_env
    while hasattr(v, "venv"):
        v = v.venv
    if hasattr(v, "envs") and len(v.envs) > 0:
        return v.envs[0]
    return None

def reset_unwrap(vec_env, **kwargs):
    out = vec_env.reset(**kwargs)
    if isinstance(out, np.ndarray):  # (n_envs, obs_dim)
        return out[0]
    if isinstance(out, tuple) and len(out) == 2:
        obs, _ = out
        return obs[0] if isinstance(obs, np.ndarray) and obs.ndim == 2 else obs
    return out

def step_unwrap(vec_env, action):
    out = vec_env.step(action)
    if len(out) == 4:
        obs, reward, done, info = out
        return obs[0], float(np.asarray(reward).ravel()[0]), bool(np.asarray(done).ravel()[0]), info[0]
    elif len(out) == 5:
        obs, reward, terminated, truncated, info = out
        done = bool(np.asarray(terminated).ravel()[0] or np.asarray(truncated).ravel()[0])
        return obs[0], float(np.asarray(reward).ravel()[0]), done, info[0]
    raise RuntimeError(f"Unexpected env.step() return length: {len(out)}")

def _is_collision(info, inner_env):
    # Prefer explicit flag; fall back to dx<=0; final fallback to env.collision
    if isinstance(info, dict) and "collision" in info:
        return bool(info["collision"])
    if isinstance(info, dict) and "dx" in info:
        try:
            return float(info["dx"]) <= 0.0
        except Exception:
            pass
    if inner_env is not None and hasattr(inner_env, "collision"):
        return bool(inner_env.collision)
    return False

# ============ Evaluation ============
def eval_suite(model, vec_norm_env, attack=None, eps=0.0, episodes=100, seed=0,
               collect_one_trace=False):
    """
    Evaluate a model on a VecNormalize-wrapped env.
      - attack: None, FGSMAttack(model), or OIAttack(model)
      - eps: epsilon in the *normalized* obs space
      - collect_one_trace: if True, return per-step traces for 1 rep episode for plotting RMSE/time series
    Returns dict with collision_rate, return_mean/std, and optional 'trace' for baseline/fgsm/oia.
    """
    rng = np.random.default_rng(seed)
    vec_norm_env.training = False
    vec_norm_env.norm_reward = False
    inner = _get_inner_env(vec_norm_env)

    total_coll = 0
    returns = []
    trace = None

    for ep in range(episodes):
        obs = reset_unwrap(vec_norm_env)
        ep_ret = 0.0
        collided = False
        done = False

        if collect_one_trace and trace is None:
            trace = {"t": [], "obs_used": [], "dx_true": [], "ego_v_true": [], "lead_v_true": [],
                     "action_raw": [], "action_applied": []}
            tstep = 0

        while not done:
            obs_for_policy = obs
            # ==== Attack in normalized space ====
            if attack is not None and eps > 0:
                adv = attack(obs, eps)  # consumes normalized obs, returns normalized obs
                # Let the CBF compute clamp using the attacked view of the state:
                if inner is not None and hasattr(inner, "set_safety_obs_for_filter"):
                    # If env expects raw units for CBF, de-normalize if helper exists
                    if hasattr(inner, "_denormalize") and np.max(np.abs(adv)) <= 1.1:
                        inner.set_safety_obs_for_filter(inner._denormalize(adv))
                    else:
                        inner.set_safety_obs_for_filter(adv)
                obs_for_policy = adv
            else:
                if inner is not None and hasattr(inner, "clear_safety_obs_for_filter"):
                    inner.clear_safety_obs_for_filter()

            action, _ = model.predict(obs_for_policy, deterministic=True)
            next_obs, r, done, info = step_unwrap(vec_norm_env, action)

            # Collision is judged on TRUE env state (info or inner state), not perturbed obs
            collided = collided or _is_collision(info, inner)

            # Optional trace (first episode only)
            if collect_one_trace and trace is not None:
                trace["t"].append(tstep * 0.1)  # Δt = 0.1 s
                trace["obs_used"].append(obs_for_policy.copy())
                trace["dx_true"].append(float(info.get("dx", np.nan)))
                trace["ego_v_true"].append(float(info.get("ego_v", np.nan)))
                trace["lead_v_true"].append(float(info.get("lead_v", np.nan)))
                trace["action_raw"].append(float(action[0] if hasattr(action, "__len__") else action))
                trace["action_applied"].append(float(info.get("applied_action", np.nan)))
                tstep += 1

            ep_ret += r
            obs = next_obs

        total_coll += int(collided)
        returns.append(ep_ret)

    out = {
        "episodes": episodes,
        "collision_rate": total_coll / max(1, episodes),
        "return_mean": float(np.mean(returns)),
        "return_std": float(np.std(returns)),
    }
    if collect_one_trace and trace is not None:
        # Stack to arrays for easier plotting
        for k in trace:
            trace[k] = np.asarray(trace[k])
        out["trace"] = trace
    return out

# ============ Load model + env ============
if not (os.path.exists(VEC_PATH) and os.path.exists(MDL_PATH)):
    raise FileNotFoundError(
        f"Missing saved artifacts:\n - {VEC_PATH}\n - {MDL_PATH}\nTrain first (longer run for paper-like numbers)."
    )

base_eval = DummyVecEnv([make_env(seed=SEED, with_braking=True, normalize_obs=False)])
env = VecNormalize.load(VEC_PATH, base_eval)
env.training = False
env.norm_reward = False
model = PPO.load(MDL_PATH, env=env)

# Attack callables (operate in normalized space)
fgsm = FGSMAttack(model)
oia  = OIAttack(model)

# ============ Run evaluations ============
results = {}
results["baseline"] = eval_suite(model, env, attack=None, eps=0.0,   episodes=EPISODES, seed=SEED,   collect_one_trace=True)
results["fgsm"]     = eval_suite(model, env, attack=fgsm, eps=EPSILON, episodes=EPISODES, seed=SEED+1)
results["oia"]      = eval_suite(model, env, attack=oia,  eps=EPSILON, episodes=EPISODES, seed=SEED+2)

print(json.dumps({k: {m: round(v[m], 3) for m in v if isinstance(v[m], (int, float))}
                  for k, v in results.items()}, indent=2))

# ============ Save metrics ============
os.makedirs("demo_outputs", exist_ok=True)
with open("demo_outputs/metrics.json", "w") as f:
    json.dump({k: {kk: (float(v[kk]) if isinstance(v[kk], (np.floating, float)) else v[kk])
                   for kk in v if kk != "trace"}
               for k, v in results.items()}, f, indent=2)

# ============ Plots ============
labels = ["Baseline","FGSM","OIA"]
x = np.arange(len(labels))

# Collision (NOTE: attacks should be non-zero; baseline may remain ~0 if agent is good)
coll = [results[k]["collision_rate"] for k in ["baseline","fgsm","oia"]]
plt.figure(figsize=(6,4))
plt.bar(x, coll)
plt.xticks(x, labels)
plt.ylim(0, 1)
for i, r in enumerate(coll):
    plt.text(i, min(0.98, r+0.03), f"{r:.2f}", ha="center")
plt.title(f"Collision Rate (ε={EPSILON})")
plt.ylabel("collided episodes / total")
plt.tight_layout()
plt.savefig("demo_outputs/collision_rate.png", dpi=200)
plt.show()

# Returns
rets = [results[k]["return_mean"] for k in ["baseline","fgsm","oia"]]
plt.figure(figsize=(6,4))
plt.bar(x, rets)
plt.xticks(x, labels)
for i, r in enumerate(rets):
    plt.text(i, r + 0.05*abs(r), f"{r:.2f}", ha="center")
plt.title(f"Episode Return (ε={EPSILON})")
plt.ylabel("mean return")
plt.tight_layout()
plt.savefig("demo_outputs/returns.png", dpi=200)
plt.show()

# ε-sweep (half episodes to keep it quick)
eps_grid = np.array([0.0, 0.005, 0.01, 0.015, 0.02], dtype=np.float32)
fgsm_curve, oia_curve = [], []
for eps in eps_grid:
    fgsm_curve.append(eval_suite(model, env, attack=fgsm, eps=float(eps), episodes=max(10, EPISODES//2), seed=SEED+3)["collision_rate"])
    oia_curve.append( eval_suite(model, env, attack=oia,  eps=float(eps), episodes=max(10, EPISODES//2), seed=SEED+4)["collision_rate"])

plt.figure(figsize=(6.5,4))
plt.plot(eps_grid, fgsm_curve, marker="o", label="FGSM")
plt.plot(eps_grid, oia_curve,  marker="o", label="OIA")
plt.xlabel("ε (normalized)")
plt.ylabel("collision rate")
plt.title("Collision Rate vs Attack Budget ε")
plt.legend()
plt.tight_layout()
plt.savefig("demo_outputs/epsilon_sweep.png", dpi=200)
plt.show()

# Time-series + Stealth (RMSE) from the single traced baseline episode vs attacks run for ONE episode each
def rmse(a, b): 
    a = np.asarray(a); b = np.asarray(b); 
    return float(np.sqrt(np.mean((a-b)**2)))

# Re-run ONE representative episode per condition to collect traces in normalized space
one_base = results["baseline"]["trace"]
one_fgsm = eval_suite(model, env, attack=fgsm, eps=EPSILON, episodes=1, seed=SEED+10, collect_one_trace=True)["trace"]
one_oia  = eval_suite(model, env, attack=oia,  eps=EPSILON, episodes=1, seed=SEED+11, collect_one_trace=True)["trace"]

# Plot distance & ego speed (true env signals from info)
plt.figure(figsize=(9,3))
plt.plot(one_base["t"], one_base["ego_v_true"], label="baseline")
plt.plot(one_fgsm["t"], one_fgsm["ego_v_true"], label="FGSM")
plt.plot(one_oia["t"],  one_oia["ego_v_true"],  label="OIA")
plt.xlabel("time (s)"); plt.ylabel("ego speed (m/s)")
plt.title("Time-series: Ego Speed")
plt.legend(); plt.tight_layout()
plt.savefig("demo_outputs/timeseries_ego_speed.png", dpi=200); plt.show()

plt.figure(figsize=(9,3))
plt.plot(one_base["t"], one_base["dx_true"], label="baseline")
plt.plot(one_fgsm["t"], one_fgsm["dx_true"], label="FGSM")
plt.plot(one_oia["t"],  one_oia["dx_true"],  label="OIA")
plt.axhline(0.0, linestyle="--")
plt.xlabel("time (s)"); plt.ylabel("headway distance (m)")
plt.title("Time-series: Inter-vehicle Distance")
plt.legend(); plt.tight_layout()
plt.savefig("demo_outputs/timeseries_distance.png", dpi=200); plt.show()

# Stealth RMSE between the normalized obs the policy actually consumed
# Align lengths defensively (episodes may terminate at different steps due to collision)
L = min(len(one_base["obs_used"]), len(one_fgsm["obs_used"]), len(one_oia["obs_used"]))
rmse_fgsm = rmse(one_base["obs_used"][:L], one_fgsm["obs_used"][:L])
rmse_oia  = rmse(one_base["obs_used"][:L], one_oia["obs_used"][:L])

plt.figure(figsize=(6,4))
plt.bar([0,1], [rmse_fgsm, rmse_oia], tick_label=["FGSM","OIA"])
plt.ylabel("RMSE vs baseline (normalized obs)")
plt.title("Stealth (lower RMSE is stealthier)")
for i, v in enumerate([rmse_fgsm, rmse_oia]):
    plt.text(i, v + 0.02*max(1e-6, v), f"{v:.3f}", ha="center")
plt.tight_layout()
plt.savefig("demo_outputs/rmse_stealth.png", dpi=200)
plt.show()

print("Saved figures to demo_outputs/:",
      "collision_rate.png, returns.png, epsilon_sweep.png, timeseries_ego_speed.png, timeseries_distance.png, rmse_stealth.png")


2025-09-25 23:38:42.727473: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Imported ACCEnv from acc_env.py
Train env created. obs_space: Box([  0. -30.   0.], [200.  30.  40.], (3,), float32) act_space: Box(-3.5, 2.0, (1,), float32)
Using cpu device
Logging to runs/ppo_baseline
Starting training for 200000 timesteps ...
-----------------------------
| time/              |      |
|    fps             | 1942 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 1024 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1441        |
|    iterations           | 2           |
|    time_elapsed         | 1           |
|    total_timesteps      | 2048        |
| train/                  |             |
|    approx_kl            | 0.009113206 |
|    clip_fraction        | 0.0938      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.42       |
|    explained_variance   | -0.0173     |
|    learning_rate        | 0.0003     

NameError: name 'ACCEnv' is not defined