In [1]:
# === Make model/env ready for the demo ===
import os, sys, subprocess, shlex
from IPython import get_ipython

# 1) Ensure ACCEnv is defined (import .py; else convert .ipynb -> .py; else %run notebook)
try:
    from acc_env import ACCEnv  # if you already have acc_env.py alongside this notebook
    print("Imported ACCEnv from acc_env.py")
except ModuleNotFoundError:
    if os.path.exists("acc_env.ipynb"):
        print("Converting acc_env.ipynb -> acc_env.py ...")
        subprocess.run(shlex.split("jupyter nbconvert --to python acc_env.ipynb"), check=True)
        if os.getcwd() not in sys.path:
            sys.path.append(os.getcwd())
        from acc_env import ACCEnv
        print("Imported ACCEnv from converted acc_env.py")
    else:
        print("Running acc_env.ipynb directly...")
        get_ipython().run_line_magic("run", "./acc_env.ipynb")
        # ACCEnv should now be in globals

# 2) Try to load saved PPO + VecNormalize; otherwise quick-train a small model
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize

LOGDIR = "runs/ppo_baseline"  # change if you saved elsewhere
vec_path = os.path.join(LOGDIR, "vecnormalize.pkl")
mdl_path = os.path.join(LOGDIR, "ppo_acc.zip")

def make_env(seed=123, brake_profile=True, normalize_obs=True):
    def _thunk():
        return ACCEnv(brake_profile=brake_profile, normalize_obs=normalize_obs, seed=seed)
    return _thunk

model = None
env = None

if os.path.exists(vec_path) and os.path.exists(mdl_path):
    print(f"Loading saved model/env from {LOGDIR} ...")
    base_env = DummyVecEnv([make_env(seed=123, brake_profile=True, normalize_obs=True)])
    env = VecNormalize.load(vec_path, base_env)
    env.training = False
    env.norm_reward = False
    model = PPO.load(mdl_path, env=env)
    print("Loaded saved PPO and VecNormalize.")
else:
    print("Saved files not found; doing a quick in-memory train so the demo can run...")
    # quick train on a stationary-lead scenario (no braking) so it learns *something*
    train_env = DummyVecEnv([make_env(seed=42, brake_profile=False, normalize_obs=True)])
    train_env = VecNormalize(train_env, norm_obs=True, norm_reward=True, clip_obs=1.0)
    model = PPO(
        "MlpPolicy", train_env, seed=42, verbose=0,
        n_steps=512, batch_size=128, learning_rate=3e-4,
        gamma=0.99, gae_lambda=0.95, clip_range=0.2, ent_coef=0.0
    )
    model.learn(total_timesteps=8_000)  # small, fast
    # build an eval env (with braking enabled) sharing the same VecNormalize statistics
    eval_base = DummyVecEnv([make_env(seed=123, brake_profile=True, normalize_obs=True)])
    env = VecNormalize(eval_base, norm_obs=True, norm_reward=False, clip_obs=1.0)
    # copy normalization stats from training env so obs scales match what policy expects
    env.obs_rms = train_env.obs_rms
    env.ret_rms = train_env.ret_rms
    env.training = False
    print("Quick train done; model/env are ready.")

print("\n✅ model and env are ready in this kernel.")


Imported ACCEnv from acc_env.py


2025-09-21 23:03:21.618009: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Saved files not found; doing a quick in-memory train so the demo can run...
Quick train done; model/env are ready.

✅ model and env are ready in this kernel.


In [2]:
from __future__ import annotations
import gymnasium as gym
import torch
import numpy as np
from typing import Any

In [3]:
def _to_tensor(x: np.ndarray) -> torch.Tensor:
    return torch.as_tensor(x, dtype=torch.float32)

In [4]:
class AttackWrapper:
    """Base wrapper that perturbs observations before the agent acts."""
    def __init__(self, model: Any, epsilon: float = 0.01, device: str = "cpu") -> None:
        self.model = model
        self.eps = float(epsilon)
        self.device = device

    def perturb(self, obs: np.ndarray) -> np.ndarray:
        return obs

    def act(self, obs: np.ndarray):
        # Compute adversarial observation (gradients enabled in perturb),
        # then call model.predict without gradients.
        obs_adv = self.perturb(obs)
        with torch.no_grad():
            action, _ = self.model.predict(obs_adv, deterministic=True)
        return action, obs_adv

In [5]:
class FGSMAttack(AttackWrapper):
    """FGSM with respect to policy mean action (pre-squash)."""
    def perturb(self, obs: np.ndarray) -> np.ndarray:
        # prepare policy for gradients
        self.model.policy.set_training_mode(True)
        self.model.policy.zero_grad(set_to_none=True)

        obs_t = _to_tensor(obs)
        single = False
        if obs_t.ndim == 1:
            obs_t = obs_t.unsqueeze(0)
            single = True
        obs_t = obs_t.to(self.device)
        obs_t.requires_grad_(True)

        # forward through policy internals to get mean action
        features = self.model.policy.extract_features(obs_t)
        latent_pi, _ = self.model.policy.mlp_extractor(features)
        mean_actions = self.model.policy.action_net(latent_pi)  # [B, act_dim]

        # simple scalar objective: increase squared mean action
        obj = (mean_actions ** 2).sum()
        obj.backward()

        grad_sign = torch.sign(obs_t.grad)
        adv = torch.clamp(obs_t + self.eps * grad_sign, -1.0, 1.0)
        adv_np = adv.detach().cpu().numpy()
        return adv_np[0] if single else adv_np

In [6]:
class OIAttack(AttackWrapper):
    """Optimism Induction Attack: increase the critic value V(s)."""
    def perturb(self, obs: np.ndarray) -> np.ndarray:
        self.model.policy.set_training_mode(True)
        self.model.policy.zero_grad(set_to_none=True)

        obs_t = _to_tensor(obs)
        single = False
        if obs_t.ndim == 1:
            obs_t = obs_t.unsqueeze(0)
            single = True
        obs_t = obs_t.to(self.device)
        obs_t.requires_grad_(True)

        features = self.model.policy.extract_features(obs_t)
        _, latent_vf = self.model.policy.mlp_extractor(features)
        values = self.model.policy.value_net(latent_vf)  # [B,1]

        obj = values.sum()
        obj.backward()

        grad_sign = torch.sign(obs_t.grad)
        adv = torch.clamp(obs_t + self.eps * grad_sign, -1.0, 1.0)
        adv_np = adv.detach().cpu().numpy()
        return adv_np[0] if single else adv_np


In [7]:
def print_attack_sanity(model, env, eps=0.01):
    atk = FGSMAttack(model, epsilon=eps, device="cpu")
    obs = env.reset()[0]
    adv = atk.perturb(obs)
    print("FGSM sanity:")
    print(" original obs:", obs)
    print(" adv obs     :", adv)
    print(" max |Δ|     :", float(np.max(np.abs(np.array(adv) - np.array(obs)))))

    atk2 = OIAttack(model, epsilon=eps, device="cpu")
    adv2 = atk2.perturb(obs)
    print("\nOIA sanity:")
    print(" original obs:", obs)
    print(" adv obs     :", adv2)
    print(" max |Δ|     :", float(np.max(np.abs(np.array(adv2) - np.array(obs)))))

In [8]:
# Quick demo run (will only work if `model` and `env` exist in the kernel).
# If not present, this prints an instructive message.
try:
    print("Running a one-step demo with current model/env (if available)...\n")
    atk = FGSMAttack(model, epsilon=0.01, device="cpu")
    obs = env.reset()[0]
    adv = atk.perturb(obs)
    print("FGSM max |Δ|:", float(np.max(np.abs(np.array(adv) - np.array(obs)))))
    atk2 = OIAttack(model, epsilon=0.01, device="cpu")
    adv2 = atk2.perturb(obs)
    print("OIA  max |Δ|:", float(np.max(np.abs(np.array(adv2) - np.array(obs)))))
except NameError:
    print("Define `model` and `env` (load your PPO and VecNormalize env) before running the demo cell.")

Running a one-step demo with current model/env (if available)...

FGSM max |Δ|: 0.009999990463256836
OIA  max |Δ|: 0.009999990463256836
