In [6]:
import os, sys, subprocess

def ensure_py(stem: str):
    ipynb, py = f"{stem}.ipynb", f"{stem}.py"
    if not os.path.exists(py) and os.path.exists(ipynb):
        subprocess.run(["jupyter", "nbconvert", "--to", "python", ipynb], check=True)
        with open(py, "r", encoding="utf-8") as f:
            lines = f.readlines()
        fut = [l for l in lines if l.strip().startswith("from __future__")]
        oth = [l for l in lines if l not in fut]
        with open(py, "w", encoding="utf-8") as f:
            f.writelines(fut + oth)

for m in ["acc_env", "attacks"]:
    ensure_py(m)

if os.getcwd() not in sys.path:
    sys.path.insert(0, os.getcwd())

from acc_env import ACCEnv           # env with CBF clamp inside step()
from attacks import FGSMAttack, OIAttack
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
import numpy as np, math
import matplotlib.pyplot as plt


In [7]:
# Demo evaluation cell (drop-in)
import os, numpy as np, matplotlib.pyplot as plt
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize

# tune these
FGSM_EPS = 0.01
OIA_EPS  = 0.03
N_EPISODES = 20
BRake_START = 30     # time step when lead starts braking (make hazard appear)
BRake_DUR   = 40     # how many steps the lead brakes
LEAD_DECEL  = -3.0   # lead deceleration (m/s^2) to create hazardous scenario

# Utility: if env is a DummyVecEnv, get underlying envs list; otherwise wrap single env.
def _get_unwrapped_envs(env):
    # If Vec env
    if hasattr(env, "envs"):
        return env.envs
    # If vectorized wrapper like VecNormalize around DummyVecEnv
    if hasattr(env, "venv") and hasattr(env.venv, "envs"):
        return env.venv.envs
    # Single env
    return [env.unwrapped if hasattr(env, "unwrapped") else env]

# Hook to set override on all underlying envs (consumed next step)
def set_safety_override_on_all(env, obs_adv):
    envs = _get_unwrapped_envs(env)
    for e in envs:
        try:
            e.set_safety_obs_for_filter(obs_adv)
        except Exception:
            # If method isn't present, ignore
            pass

# make evaluation env factory (normalized obs)
def make_eval_env(normalize_obs=True, seed=0):
    def _thunk():
        return ACCEnv(normalize_obs=normalize_obs, seed=seed)
    return DummyVecEnv([_thunk])

# load model + env (tries to load VecNormalize wrapper if present)
def load_model_and_env(logdir="runs/ppo_baseline"):
    # If saved VecNormalize exists, create base DummyVecEnv and then load wrapper
    base = DummyVecEnv([make_eval_env(normalize_obs=False, seed=0)])  # base returns raw obs
    vec_path = os.path.join(logdir, "vecnormalize.pkl")
    if os.path.exists(vec_path):
        env = VecNormalize.load(vec_path, base)
        env.training = False
        env.norm_reward = False
    else:
        # If no saved VecNormalize, use env that returns normalized obs (ACCEnv(normalize_obs=True))
        env = make_eval_env(normalize_obs=True, seed=0)
    model_path = os.path.join(logdir, "ppo_acc")
    if os.path.exists(model_path) or os.path.exists(model_path + ".zip"):
        model = PPO.load(model_path)
    else:
        # no model file; train a quick tiny model to allow demo (keeps demo runnable)
        print("Saved model not found; training quick model in-memory for demo (very short).")
        tmp_env = make_eval_env(normalize_obs=True, seed=1)
        model = PPO("MlpPolicy", tmp_env, verbose=0)
        model.learn(1000)
    return model, env


# Robust step/unpack helpers for Gymnasium vs Gym legacy
def _unpack_step(step_out):
    # expected gymnasium: obs, reward, terminated, truncated, info
    if len(step_out) == 5:
        obs, reward, terminated, truncated, info = step_out
        done = bool(terminated or truncated)
    else:
        # legacy gym: obs, reward, done, info
        obs, reward, done, info = step_out
        terminated = done
        truncated = False
    return obs, reward, terminated, truncated, info

def _unpack_reset(reset_out):
    if isinstance(reset_out, tuple) and len(reset_out) == 2:
        return reset_out[0], reset_out[1]
    return reset_out, {}

# The core run_episode: uses attacker to produce obs_adv, sets safety override, calls model.predict on obs_adv,
# then steps env with returned action. Tracks return, collision, RMSE stealth, trajectory.
def run_episode(model, env, attacker=None, max_steps=300, brake_start=BRake_START, brake_dur=BRake_DUR, lead_decel=LEAD_DECEL):
    obs_out = env.reset()
    obs, _ = _unpack_reset(obs_out)
    total_r = 0.0
    collided = 0
    traj = {"dx": [], "v": [], "a": [], "r": []}
    rmse_terms = []

    # Get underlying env to set lead_acc
    unwrapped_envs = _get_unwrapped_envs(env)
    base_env = unwrapped_envs[0]

    for t in range(max_steps):
        # schedule lead braking to create hazard
        if (t >= brake_start) and (t < brake_start + brake_dur):
            base_env.lead_acc = lead_decel
        else:
            base_env.lead_acc = None

        # attacker produces perturbed observation (attacker expects normalized obs if env outputs normalized)
        if attacker is None:
            obs_adv = obs
        else:
            # attacker.perturb should accept a single observation (1d array) and return same-shaped array
            obs_adv = attacker.perturb(np.asarray(obs))

        # set attack perception for safety filter to use when computing clamp
        set_safety_override_on_all(env, obs_adv)

        # feed the attacked observation to the policy (policy expects same normalization as obs_adv)
        # model.predict for SB3 accepts array shaped as observation; keep shape consistent.
        action, _ = model.predict(obs_adv, deterministic=True)

        # step environment with chosen action
        step_out = env.step(action)
        obs_next, reward, terminated, truncated, info = _unpack_step(step_out if isinstance(step_out, tuple) else (step_out,))
        obs = obs_next

        # accumulate metrics
        total_r += float(reward) if not isinstance(reward, (list, tuple, np.ndarray)) else float(reward[0])
        # info may be dict or list-of-dicts if vec env; handle both
        info_dict = info[0] if isinstance(info, (list, tuple)) else info
        traj["dx"].append(info_dict.get("dx", np.nan))
        traj["v"].append(info_dict.get("ego_v", np.nan))
        traj["a"].append(info_dict.get("applied_action", np.nan))
        traj["r"].append(float(reward) if not isinstance(reward, (list, tuple, np.ndarray)) else float(reward[0]))

        # stealth metric: RMSE between obs_adv and the actual obs returned by env (both normalized or raw depending on env)
        try:
            d = np.asarray(obs_adv) - np.asarray(obs)
            rmse_terms.append(float((d**2).mean()))
        except Exception:
            pass

        # collision detection
        if info_dict.get("collision", False):
            collided = 1
            break

    rmse = float(np.sqrt(np.mean(rmse_terms))) if len(rmse_terms) > 0 else 0.0
    return {"return": total_r, "collision": collided, "rmse": rmse, "traj": traj}

# evaluation wrapper to run many episodes and average metrics
def evaluate_attack(model, env_factory, attacker_cls=None, eps=0.0, episodes=N_EPISODES):
    rets, cols, rmses = [], [], []
    for i in range(episodes):
        env = env_factory(seed=100+i) if callable(env_factory) else env_factory
        attacker = None if attacker_cls is None else attacker_cls(model, epsilon=eps, device="cpu")
        res = run_episode(model, env, attacker=attacker)
        rets.append(res["return"]); cols.append(res["collision"]); rmses.append(res["rmse"])
    return np.array(rets), np.array(cols), np.array(rmses)

# plotting helper
def plot_results(baseline, fgsm, oia):
    rets_b, cols_b, rms_b = baseline
    rets_f, cols_f, rms_f = fgsm
    rets_o, cols_o, rms_o = oia

    print("Collision rates  -> Baseline:", cols_b.mean(), " FGSM:", cols_f.mean(), " OIA:", cols_o.mean())
    print("Avg returns      -> Baseline:", rets_b.mean(), " FGSM:", rets_f.mean(), " OIA:", rets_o.mean())
    print("Stealth RMSE     -> FGSM:", rms_f.mean(), " OIA:", rms_o.mean())

    plt.figure(figsize=(6,4))
    plt.bar(["Baseline","FGSM","OIA"], [cols_b.mean(), cols_f.mean(), cols_o.mean()])
    plt.ylim(0,1)
    plt.title("Collision rate")
    plt.show()

# --- RUN demo (tweak logdir to your saved model path) ---
LOGDIR = "runs/ppo_baseline"   # adjust to where your model+vecnormalize.pkl are saved
model, env = load_model_and_env(LOGDIR)

# evaluate baseline, FGSM, OIA
baseline = evaluate_attack(model, lambda seed=None: make_eval_env(normalize_obs=True)(seed) if False else make_eval_env(normalize_obs=True), attacker_cls=None, eps=0.0)
fgsm     = evaluate_attack(model, lambda seed=None: make_eval_env(normalize_obs=True)(seed) if False else make_eval_env(normalize_obs=True), attacker_cls=FGSMAttack, eps=FGSM_EPS)
oia      = evaluate_attack(model, lambda seed=None: make_eval_env(normalize_obs=True)(seed) if False else make_eval_env(normalize_obs=True), attacker_cls=OIAttack,  eps=OIA_EPS)

plot_results(baseline, fgsm, oia)


TypeError: 'DummyVecEnv' object is not callable

In [8]:
LOGDIR  = "runs/ppo_baseline"
VECPATH = os.path.join(LOGDIR, "vecnormalize.pkl")
MDLPATH = os.path.join(LOGDIR, "ppo_acc.zip")
os.makedirs(LOGDIR, exist_ok=True)

def reset_unwrap(env, **kw):
    out = env.reset(**kw)
    if isinstance(out, tuple) and len(out)==2: return out[0]
    return out

def step_unwrap(env, action):
    out = env.step(action)
    if len(out)==5:
        obs, r, term, trunc, info = out
        return obs, r, bool(term), bool(trunc), info
    elif len(out)==4:
        obs, r, done, info = out
        return obs, r, bool(done), False, info
    raise RuntimeError(f"Unexpected step() length: {len(out)}")

def info_dict(info):
    if isinstance(info, dict): return info
    if hasattr(info, "__len__") and len(info)>0: return info[0]
    return {}

def make_env_train(seed=42):
    def _thunk():
        return ACCEnv(brake_profile=False, normalize_obs=False, seed=seed)
    return _thunk

def make_env_eval(seed=123):
    def _thunk():
        return ACCEnv(brake_profile=True, normalize_obs=False, seed=seed,
                      brake_start_s=3.0, brake_dur_s=4.0, lead_decel=-3.0)
    return _thunk

if os.path.exists(VECPATH) and os.path.exists(MDLPATH):
    base_eval = DummyVecEnv([make_env_eval(123)])
    env = VecNormalize.load(VECPATH, base_eval)
    env.training = False
    env.norm_reward = False
    model = PPO.load(MDLPATH, env=env)
    print(f"Loaded PPO+VecNormalize from {LOGDIR}")
else:
    print(f"Saved files not found in {LOGDIR}. Quick-training (~8k steps).")
    base_train = DummyVecEnv([make_env_train(42)])
    train_env  = VecNormalize(base_train, norm_obs=True, norm_reward=True, clip_obs=1.0)
    model = PPO("MlpPolicy", train_env, verbose=0, seed=42,
                n_steps=512, batch_size=128, learning_rate=3e-4,
                gamma=0.99, gae_lambda=0.95, clip_range=0.2, ent_coef=0.0)
    model.learn(total_timesteps=8_000)
    base_eval = DummyVecEnv([make_env_eval(123)])
    env = VecNormalize(base_eval, norm_obs=True, norm_reward=False, clip_obs=1.0)
    env.obs_rms = train_env.obs_rms
    env.ret_rms = train_env.ret_rms
    env.training = False
    print("Quick-train complete.")


TypeError: ACCEnv.__init__() got an unexpected keyword argument 'brake_profile'

In [9]:
def run_episode(mdl, venv, attacker=None, eps=0.01, max_steps=None):
    obs = reset_unwrap(venv)
    total_r, collided, steps = 0.0, 0, 0
    traj = {"Δx": [], "v": [], "a": [], "r": []}
    rmse_terms = []

    while True:
        orig_obs = obs
        obs_adv = attacker.perturb(orig_obs) if attacker else orig_obs
        d = np.asarray(obs_adv) - np.asarray(orig_obs)     # stealth measured pre-step
        rmse_terms.append(float((d**2).mean()))

        action, _ = mdl.predict(obs_adv, deterministic=True)
        obs, r, term, trunc, info = step_unwrap(venv, action)
        r_scalar = float(r[0]) if hasattr(r, "__len__") else float(r)
        total_r += r_scalar

        dct = info_dict(info)
        traj["Δx"].append(dct.get("Δx", math.nan))
        traj["v"].append(dct.get("v",  math.nan))
        traj["a"].append(dct.get("a",  math.nan))
        traj["r"].append(r_scalar)

        if dct.get("collision", False) or (not np.isnan(traj["Δx"][-1]) and traj["Δx"][-1] <= 0.0):
            collided = 1

        steps += 1
        if term or trunc or (max_steps is not None and steps >= max_steps):
            break

    jerk = float(np.mean(np.abs(np.diff(traj["a"])))) if len(traj["a"])>1 else 0.0
    rmse = float(np.sqrt(np.mean(rmse_terms))) if rmse_terms else 0.0
    return {"return": total_r, "collision": collided, "jerk": jerk, "rmse": rmse, "traj": traj}

def evaluate_condition(attacker_cls=None, eps=0.01, episodes=20, base_seed=100):
    rets, cols, jerks, rmses = [], [], [], []
    sample = None
    for i in range(episodes):
        seed = base_seed + i
        base = DummyVecEnv([make_env_eval(seed)])
        venv = VecNormalize.load(VECPATH, base) if os.path.exists(VECPATH) else base
        if isinstance(venv, VecNormalize):
            venv.training=False; venv.norm_reward=False
        mdl = PPO.load(MDLPATH, env=venv) if os.path.exists(MDLPATH) else model
        attacker = attacker_cls(mdl, epsilon=eps, device="cpu") if attacker_cls else None
        res = run_episode(mdl, venv, attacker=attacker, eps=eps)
        rets.append(res["return"]); cols.append(res["collision"]); jerks.append(res["jerk"]); rmses.append(res["rmse"])
        if sample is None: sample = res
    return {
        "returns": np.array(rets, dtype=float),
        "collisions": np.array(cols, dtype=float),
        "jerks": np.array(jerks, dtype=float),
        "rmses": np.array(rmses, dtype=float),
        "sample": sample,
    }


In [10]:
EPISODES = 20
EPS_FGSM = 0.010
EPS_OIA  = 0.020   # slightly larger to reflect OIA’s stronger impact

baseline = evaluate_condition(None,        eps=0.0,      episodes=EPISODES)
fgsm     = evaluate_condition(FGSMAttack,  eps=EPS_FGSM, episodes=EPISODES)
oia      = evaluate_condition(OIAttack,    eps=EPS_OIA,  episodes=EPISODES)

print("collisions (Baseline):", baseline["collisions"].tolist())
print("collisions (FGSM)    :", fgsm["collisions"].tolist())
print("collisions (OIA)     :", oia["collisions"].tolist())

def safe_mean(x, default=np.nan):
    x = np.asarray(x, dtype=float)
    return float(np.nanmean(x)) if x.size else float(default)

avg_return     = [safe_mean(baseline["returns"]),    safe_mean(fgsm["returns"]),    safe_mean(oia["returns"])]
collision_rate = [safe_mean(baseline["collisions"], 0.0), safe_mean(fgsm["collisions"], 0.0), safe_mean(oia["collisions"], 0.0)]
avg_jerk       = [safe_mean(baseline["jerks"]),      safe_mean(fgsm["jerks"]),      safe_mean(oia["jerks"])]
stealth_rmse   = [np.nan, safe_mean(fgsm["rmses"]),  safe_mean(oia["rmses"])]

labels = ["Baseline","FGSM","OIA"]

plt.figure(); plt.bar(labels, np.nan_to_num(avg_return,     nan=0.0)); plt.title("Average Return");   plt.ylabel("return"); plt.show()
plt.figure(); plt.bar(labels, np.nan_to_num(collision_rate, nan=0.0)); plt.title("Collision Rate");  plt.ylim(0,1);       plt.show()
plt.figure(); plt.bar(labels, np.nan_to_num(avg_jerk,       nan=0.0)); plt.title("Average Jerk");    plt.ylabel("|Δa|");  plt.show()

plt.figure(); plt.bar(["FGSM","OIA"], np.nan_to_num(stealth_rmse[1:], nan=0.0))
plt.title("Stealth (RMSE) — lower is stealthier"); plt.ylabel("RMSE of per-step perturbation"); plt.show()

def plot_traj(traj, title):
    t = np.arange(len(traj["Δx"]))
    plt.figure(); plt.plot(t, traj["Δx"]); plt.xlabel("step"); plt.ylabel("m");   plt.title(title+" — headway (Δx)"); plt.show()
    plt.figure(); plt.plot(t, traj["v"]);   plt.xlabel("step"); plt.ylabel("m/s"); plt.title(title+" — speed (v)");   plt.show()
    plt.figure(); plt.plot(t, traj["a"]);   plt.xlabel("step"); plt.ylabel("m/s²");plt.title(title+" — accel (a)");   plt.show()

plot_traj(baseline["sample"]["traj"], "Baseline")
plot_traj(fgsm["sample"]["traj"],     "FGSM")
plot_traj(oia["sample"]["traj"],      "OIA")

print("\nAverages across", EPISODES, "episodes")
print("Baseline :", {"avg_return": avg_return[0], "collision_rate": collision_rate[0], "avg_jerk": avg_jerk[0]})
print("FGSM     :", {"avg_return": avg_return[1], "collision_rate": collision_rate[1], "avg_jerk": avg_jerk[1], "avg_rmse": stealth_rmse[1]})
print("OIA      :", {"avg_return": avg_return[2], "collision_rate": collision_rate[2], "avg_jerk": avg_jerk[2], "avg_rmse": stealth_rmse[2]})


TypeError: ACCEnv.__init__() got an unexpected keyword argument 'brake_profile'