In [34]:
import os, sys, subprocess

def ensure_py(stem: str):
    ipynb, py = f"{stem}.ipynb", f"{stem}.py"
    if not os.path.exists(py) and os.path.exists(ipynb):
        subprocess.run(["jupyter", "nbconvert", "--to", "python", ipynb], check=True)
        with open(py, "r", encoding="utf-8") as f:
            lines = f.readlines()
        fut = [l for l in lines if l.strip().startswith("from __future__")]
        oth = [l for l in lines if l not in fut]
        with open(py, "w", encoding="utf-8") as f:
            f.writelines(fut + oth)

for m in ["acc_env", "attacks"]:
    ensure_py(m)

if os.getcwd() not in sys.path:
    sys.path.insert(0, os.getcwd())

from acc_env import ACCEnv           # env with CBF clamp inside step()
from attacks import FGSMAttack, OIAttack
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
import numpy as np, math
import matplotlib.pyplot as plt


In [35]:
# ===================== demo_autodetect.py =====================
import os, sys, inspect, importlib, importlib.util
import numpy as np
import matplotlib.pyplot as plt
import torch
from pathlib import Path


import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.vec_env.base_vec_env import VecEnv

from typing import Optional, Tuple


def _find_latest_model(logdir: str) -> Optional[str]:
    """Search recursively for .zip models under logdir and return the newest by mtime."""
    p = Path(logdir)
    if not p.exists():
        return None
    zips = sorted(p.rglob("*.zip"), key=lambda f: f.stat().st_mtime, reverse=True)
    return str(zips[0]) if zips else None

def _find_vecnormalize(logdir: str) -> Optional[str]:
    """Look for vecnormalize.pkl under logdir (top-level or recursively)."""
    p = Path(logdir)
    if not p.exists():
        return None
    # common names/locations
    candidates = list(p.rglob("vecnormalize.pkl")) + list(p.glob("vecnormalize.pkl"))
    return str(candidates[0]) if candidates else None

# -------------------------- USER CONFIG --------------------------
LOGDIR = "runs/ppo_baseline"         # contains best_model.zip and vecnormalize.pkl
MODEL_PATH = os.path.join(LOGDIR, "best_model.zip")
VEC_PATH   = os.path.join(LOGDIR, "vecnormalize.pkl")

N_EVAL_EPISODES = 100
MAX_EPISODE_LEN = 1000
EPSILONS = [0.0, 0.005, 0.01, 0.02]
SEED = 0
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# -----------------------------------------------------------------

# ---------- Robustly import acc_env.py, even if no make_acc_env ----------
def _import_acc_env_module():
    # Try normal import first
    try:
        import acc_env as mod
        return mod
    except Exception:
        pass

    # Try by absolute path next to this script
    here = os.path.abspath(os.path.dirname(__file__)) if "__file__" in globals() else os.getcwd()
    cand = os.path.join(here, "acc_env.py")
    if not os.path.exists(cand):
        raise ImportError("Cannot find acc_env.py; place demo_autodetect.py in the same folder as acc_env.py.")
    spec = importlib.util.spec_from_file_location("acc_env_autoload", cand)
    mod = importlib.util.module_from_spec(spec)
    sys.modules["acc_env_autoload"] = mod
    spec.loader.exec_module(mod)  # type: ignore
    return mod

acc_env_mod = _import_acc_env_module()
print("[demo] Loaded acc_env module from:", getattr(acc_env_mod, "__file__", "<memory>"))

# Build a make_acc_env wrapper no matter what shapes acc_env provides
def _resolve_env_builder(mod):
    # 1) If module already has make_acc_env, use it
    if hasattr(mod, "make_acc_env") and inspect.isfunction(getattr(mod, "make_acc_env")):
        print("[demo] Using acc_env.make_acc_env(...)")
        return getattr(mod, "make_acc_env")

    # 2) If module exposes ACCEnv class, build a factory around it
    if hasattr(mod, "ACCEnv") and inspect.isclass(getattr(mod, "ACCEnv")):
        ACCEnv = getattr(mod, "ACCEnv")
        print("[demo] Constructing env from acc_env.ACCEnv class")
        def make_acc_env(normalize_obs=False, seed=None):
            # Try several common constructor signatures
            tried = []
            for kwargs in [
                {},  # no-arg
                {"normalize_obs": normalize_obs},
                {"seed": seed},
                {"normalize_obs": normalize_obs, "seed": seed},
            ]:
                try:
                    env = ACCEnv(**{k:v for k,v in kwargs.items() if v is not None})
                    return env
                except Exception as e:
                    tried.append((kwargs, str(e)))
            raise RuntimeError(f"Could not construct ACCEnv with common kwargs. Tried: {tried}")
        return make_acc_env

    # 3) Common builder names: make_env / create_env / build_env
    for name in ("make_env", "create_env", "build_env"):
        if hasattr(mod, name) and inspect.isfunction(getattr(mod, name)):
            fn = getattr(mod, name)
            print(f"[demo] Using acc_env.{name}(...) as builder")
            def make_acc_env(normalize_obs=False, seed=None):
                # Try with/without kwargs
                tried = []
                for kwargs in [{}, {"normalize_obs": normalize_obs}, {"seed": seed},
                               {"normalize_obs": normalize_obs, "seed": seed}]:
                    try:
                        env = fn(**{k:v for k,v in kwargs.items() if v is not None})
                        return env
                    except Exception as e:
                        tried.append((kwargs, str(e)))
                raise RuntimeError(f"Could not construct env via {name} with common kwargs. Tried: {tried}")
            return make_acc_env

    raise ImportError(
        "acc_env.py does not expose make_acc_env, ACCEnv, or a builder named make_env/create_env/build_env.\n"
        "Please either:\n"
        "  (A) add in acc_env.py:\n"
        "      def make_acc_env(normalize_obs=False, seed=None):\n"
        "          return ACCEnv(...)\n"
        "  or (B) rename your existing creator to one of the above."
    )

make_acc_env = _resolve_env_builder(acc_env_mod)

# ---------- Optionally import your attack.py (fgsm_attack/oia_attack) ----------
attack_module = None
try:
    import attack as attack_module
    print("[demo] Imported attack.py")
except Exception:
    print("[demo] No attack.py detected — will use built-in FGSM/OIA.")

# ---------- Env factory: MUST return RAW gym.Env (unwrap if VecEnv sneaks in) ----------
def make_eval_env_factory(normalize_obs=False, seed=0):
    def _init():
        env = make_acc_env(normalize_obs=normalize_obs, seed=seed)

        # If someone returned a VecEnv, unwrap to the underlying raw env
        if isinstance(env, VecEnv):
            print("[demo] WARN: builder returned a VecEnv; unwrapping to raw env.")
            raw = env.envs[0].unwrapped
            try:
                env.close()
            except Exception:
                pass
            env = raw

        assert isinstance(env, gym.Env), (
            "make_eval_env_factory must return a RAW Gymnasium env. "
            "Edit acc_env to return ACCEnv(...), not DummyVecEnv([...])."
        )

        # Seed
        try:
            env.reset(seed=seed)
        except TypeError:
            try: env.seed(seed)
            except Exception: pass
        return env
    return _init

# Guard
_test = make_eval_env_factory(normalize_obs=False, seed=SEED)
_tmp = _test()
assert isinstance(_tmp, gym.Env) and not isinstance(_tmp, VecEnv), "Factory returned a VecEnv — fix acc_env."
try: _tmp.close()
except Exception: pass

# ---------- Load model + VecNormalize ----------
def load_model_and_env(logdir: str):
    model_path = os.path.join(logdir, "best_model.zip")
    vec_path   = os.path.join(logdir, "vecnormalize.pkl")

    base_raw = DummyVecEnv([make_eval_env_factory(normalize_obs=False, seed=SEED)])

    if os.path.exists(vec_path):
        print(f"[demo] Loading VecNormalize from: {vec_path}")
        vec_env = VecNormalize.load(vec_path, base_raw)
        vec_env.training = False
        vec_env.norm_reward = False
    else:
        print("[demo] No VecNormalize found; creating fresh stats (ensure consistent with training!)")
        base_norm = DummyVecEnv([make_eval_env_factory(normalize_obs=True, seed=SEED)])
        vec_env = VecNormalize(base_norm, norm_obs=True, norm_reward=False, clip_obs=10.0)
        vec_env.training = False

    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Missing model at {model_path}")
    model = PPO.load(model_path, device=DEVICE)
    model.policy.to(DEVICE); model.policy.eval()
    return model, vec_env

# ---------- SB3 policy/value access ----------
def _policy_mean_actions(model: PPO, obs_t: torch.Tensor) -> torch.Tensor:
    p = model.policy
    try:
        feats = p.extract_features(obs_t)
        return p.action_net(feats)
    except Exception:
        lat_pi, _ = p.mlp_extractor(obs_t)
        return p.action_net(lat_pi)

def _value_of(model: PPO, obs_t: torch.Tensor) -> torch.Tensor:
    p = model.policy
    try:
        _, lat_vf = p.mlp_extractor(obs_t)
        return p.value_net(lat_vf)
    except Exception:
        lat_pi, lat_vf = p._get_latent(obs_t)
        return p.value_net(lat_vf)

def _as_t(obs_np):
    t = torch.as_tensor(obs_np, device=DEVICE, dtype=torch.float32)
    if t.dim() == 1: t = t.unsqueeze(0)
    t.requires_grad_(True)
    return t

# ---------- Built-in attacks if attack.py missing ----------
def _fgsm_on_policy_normalized(obs_np, epsilon, model: PPO):
    obs_t = _as_t(obs_np)
    obj = _policy_mean_actions(model, obs_t).mean()
    model.policy.zero_grad()
    if obs_t.grad is not None:
        obs_t.grad.detach_(); obs_t.grad.zero_()
    obj.backward()
    grad = obs_t.grad.detach().cpu().numpy()
    return (obs_np.reshape(grad.shape) + epsilon * np.sign(grad)).squeeze()

def _oia_on_critic_normalized(obs_np, epsilon, model: PPO):
    obs_t = _as_t(obs_np)
    obj = _value_of(model, obs_t).mean()
    model.policy.zero_grad()
    if obs_t.grad is not None:
        obs_t.grad.detach_(); obs_t.grad.zero_()
    obj.backward()
    grad = obs_t.grad.detach().cpu().numpy()
    return (obs_np.reshape(grad.shape) + epsilon * np.sign(grad)).squeeze()

def get_attack_fn(name: str):
    if attack_module is not None:
        if name == "fgsm" and hasattr(attack_module, "fgsm_attack"):
            return attack_module.fgsm_attack
        if name == "oia" and hasattr(attack_module, "oia_attack"):
            return attack_module.oia_attack
    return _fgsm_on_policy_normalized if name == "fgsm" else (_oia_on_critic_normalized if name == "oia" else None)

# ---------- CBF clamp (match your training params here if different) ----------
def cbf_safety_clamp(action, raw_obs):
    # raw_obs = [delta_x, delta_v, v]
    delta_x = float(raw_obs[0]); delta_v = float(raw_obs[1]); v = float(raw_obs[2])
    T_h = 1.5; dt = 0.1
    a_min, a_max = -3.5, 2.0
    a_max_safe = (delta_x - T_h * v + delta_v * dt) / (T_h * dt)
    safe_upper = max(min(a_max, a_max_safe), a_min)
    return float(np.clip(action, a_min, safe_upper))

# ---------- Evaluation ----------
def evaluate(model: PPO, vec_env: VecNormalize, attack_fn, epsilon: float,
             n_episodes: int = 10, max_steps: int = 1000, seed: int = 0):
    collisions = 0
    returns, lengths, trajectories = [], [], []

    for ep in range(n_episodes):
        obs = vec_env.reset()
        ep_ret, ep_len = 0.0, 0
        traj = {"delta_x": [], "actions": [], "attacks": [], "raw_obs": []}

        for t in range(max_steps):
            obs_agent = obs[0] if (isinstance(obs, np.ndarray) and obs.ndim == 2 and obs.shape[0] == 1) else obs

            # Attack on normalized obs
            if attack_fn is not None and epsilon > 0.0:
                attacked = attack_fn(obs_agent, epsilon, model)
            else:
                attacked = obs_agent

            action, _ = model.predict(attacked, deterministic=True)
            action = float(np.array(action).squeeze())

            # Unnormalize for CBF
            if isinstance(vec_env, VecNormalize):
                mean = vec_env.obs_rms.mean; std = np.sqrt(vec_env.obs_rms.var + 1e-8)
                raw_obs = obs_agent * std + mean
            else:
                raw_obs = obs_agent

            a_safe = cbf_safety_clamp(action, raw_obs)
            obs_next, reward, done, info = vec_env.step(np.array([[a_safe]], dtype=np.float32))
            next_agent_obs = obs_next[0] if (isinstance(obs_next, np.ndarray) and obs_next.ndim == 2 and obs_next.shape[0] == 1) else obs_next

            # Unnormalize next for logging
            if isinstance(vec_env, VecNormalize):
                raw_next = next_agent_obs * std + mean
            else:
                raw_next = next_agent_obs

            if raw_next[0] <= 0.0:
                collisions += 1

            traj["delta_x"].append(float(raw_next[0]))
            traj["actions"].append(float(a_safe))
            traj["raw_obs"].append(np.array(raw_next, dtype=float))
            traj["attacks"].append(float(np.linalg.norm((next_agent_obs - obs_agent).ravel())))

            ep_ret += float(reward[0] if isinstance(reward, np.ndarray) else reward)
            ep_len += 1
            obs = obs_next

            if (isinstance(done, (list, np.ndarray)) and done[0]) or (isinstance(done, (bool, np.bool_)) and done):
                break

        returns.append(ep_ret); lengths.append(ep_len); trajectories.append(traj)

    return {
        "mean_return": float(np.mean(returns)) if returns else 0.0,
        "std_return":  float(np.std(returns))  if returns else 0.0,
        "mean_length": float(np.mean(lengths)) if lengths else 0.0,
        "collision_rate": collisions / max(1, n_episodes),
        "trajectories": trajectories
    }

# ---------- Run + plots ----------
def main():
    model, vec_env = load_model_and_env(LOGDIR)
    attack_fns = {"baseline": None, "fgsm": get_attack_fn("fgsm"), "oia": get_attack_fn("oia")}
    results = {k: {} for k in attack_fns}

    for atype, fn in attack_fns.items():
        print(f"[demo] Running: {atype}")
        for eps in EPSILONS:
            if atype == "baseline" and eps != 0.0: continue
            print(f"  eps={eps} ...", end="", flush=True)
            res = evaluate(model, vec_env, fn, eps, n_episodes=N_EVAL_EPISODES, max_steps=MAX_EPISODE_LEN, seed=SEED)
            results[atype][eps] = res
            print(f" done (ret={res['mean_return']:.3f}, coll={res['collision_rate']:.3f})")

    # 1) Headway trajectories
    plt.figure(figsize=(8,5))
    for atype in ["baseline","fgsm","oia"]:
        eps = 0.0 if atype=="baseline" else (0.01 if 0.01 in results[atype] else sorted(results[atype].keys())[0])
        traj = results[atype][eps]["trajectories"][0]
        plt.plot(traj["delta_x"], label=f"{atype} (eps={eps})")
    plt.xlabel("Time step"); plt.ylabel("Headway Δx (raw)")
    plt.title("Example headway trajectories")
    plt.grid(True); plt.legend(); plt.tight_layout(); plt.show()

    # 2) Stealth proxy vs epsilon
    plt.figure(figsize=(6,4))
    for atype in ["fgsm","oia"]:
        xs, ys = [], []
        for eps in EPSILONS:
            if eps == 0.0 or eps not in results[atype]: continue
            trajs = results[atype][eps]["trajectories"]
            per_ep = [np.array(tr["attacks"]).mean() if tr["attacks"] else 0.0 for tr in trajs]
            xs.append(eps); ys.append(float(np.mean(per_ep)))
        plt.plot(xs, ys, marker='o', label=atype)
    plt.xlabel("epsilon (normalized)"); plt.ylabel("Mean per-step perturbation norm")
    plt.title("Stealth vs epsilon"); plt.grid(True); plt.legend(); plt.tight_layout(); plt.show()

    # 3) Collision rate bars
    labels, vals = [], []
    for atype in ["baseline","fgsm","oia"]:
        eps = 0.0 if atype=="baseline" else (0.01 if 0.01 in results[atype] else sorted(results[atype].keys())[0])
        labels.append(f"{atype}\n(eps={eps})"); vals.append(results[atype][eps]["collision_rate"])
    plt.figure(figsize=(6,4))
    plt.bar(labels, vals)
    plt.ylabel("Collision rate"); plt.title("Collision rate by condition")
    plt.grid(axis='y'); plt.tight_layout(); plt.show()

    # 4) Mean return bars
    labels, vals = [], []
    for atype in ["baseline","fgsm","oia"]:
        eps = 0.0 if atype=="baseline" else (0.01 if 0.01 in results[atype] else sorted(results[atype].keys())[0])
        labels.append(f"{atype}\n(eps={eps})"); vals.append(results[atype][eps]["mean_return"])
    plt.figure(figsize=(6,4))
    plt.bar(labels, vals)
    plt.ylabel("Mean return"); plt.title("Mean return by condition")
    plt.grid(axis='y'); plt.tight_layout(); plt.show()

    print("\n[demo] Summary:")
    for atype in results:
        for eps, res in results[atype].items():
            print(f"  {atype:8s} eps={eps:<5}: ret={res['mean_return']:.3f} ± {res['std_return']:.3f}, "
                  f"len={res['mean_length']:.1f}, coll={res['collision_rate']:.3f}")
    try: vec_env.close()
    except Exception: pass

if __name__ == "__main__":
    main()
# =================== end demo_autodetect.py ===================


[demo] Loaded acc_env module from: /Users/lucylikesphotography/Documents/GitHub/PPO-and-Adversarial-Attack/PPO and Adversarial Attack/acc_env.py
[demo] Constructing env from acc_env.ACCEnv class
[demo] No attack.py detected — will use built-in FGSM/OIA.
[demo] Loading VecNormalize from: runs/ppo_baseline/vecnormalize.pkl


FileNotFoundError: Missing model at runs/ppo_baseline/best_model.zip