In [1]:
# Instalación de librerías
%pip install "stable-baselines3[extra]" gymnasium

Looking in indexes: https://pypi.org/simple, https://packagecloud.io/github/git-lfs/pypi/simple
Collecting gymnasium
  Using cached gymnasium-1.2.2-py3-none-any.whl.metadata (10 kB)
Collecting stable-baselines3[extra]
  Using cached stable_baselines3-2.7.0-py3-none-any.whl.metadata (4.8 kB)
Collecting numpy<3.0,>=1.20 (from stable-baselines3[extra])
  Using cached numpy-2.3.5-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (62 kB)
Collecting torch<3.0,>=2.3 (from stable-baselines3[extra])
  Using cached torch-2.9.1-cp313-cp313-manylinux_2_28_x86_64.whl.metadata (30 kB)
Collecting cloudpickle (from stable-baselines3[extra])
  Using cached cloudpickle-3.1.2-py3-none-any.whl.metadata (7.1 kB)
Collecting pandas (from stable-baselines3[extra])
  Using cached pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (91 kB)
Collecting matplotlib (from stable-baselines3[extra])
  Using cached matplotlib-3.10.7-cp313-cp313-manylinux2014_x86_64.manyl

In [15]:
# Importación de dependencias necesarias
import numpy as np
import joblib
from envs.attack_env import AttackEnv
from stable_baselines3 import PPO

In [None]:
# Carga del dataset
data = np.load("../data/synthetic_2d.npz")
X_train, X_test = data["X_train"], data["X_test"]
y_train, y_test = data["y_train"], data["y_test"]

# Cargamos solo muestras de ataque para el entorno
attack_mask = y_train == 1 # Crea un array booleano a true para muestras de ataque
attack_samples = X_train[attack_mask] # Nos quedamos solo con las muestras de ataque

# Imprimimos la forma de las muestras de ataque
print("Attack samples:", attack_samples.shape)

# Carga del modelo clasificador
clf = joblib.load("../classifiers/logreg_synthetic_2d.joblib")


Attack samples: (800, 2)


In [None]:
# Creación del entorno
env = AttackEnv(
    attack_samples=attack_samples, # Lista de todas las muestras de ataque
    clf=clf, # Modelo clasificador
    threshold=0.5, # A partid de de esta probabilidad se considera que es ataque
    epsilon=0.5, # Distancia máxima que el agente puede mofificar el punto original
    penalty=0.05, # Penalización por moverse mucho
)

In [16]:
# Constucción y entrenamiento del agente PPO (RL)
policy_kwargs = dict(net_arch=[64, 64]) # Arquitectura de la red neuronal, dos capas ocultas de 64 neuronas cada una

model = PPO(
    "MlpPolicy",
    env,
    verbose=1,
    policy_kwargs=policy_kwargs,
    n_steps=2048,
    batch_size=64,
    learning_rate=3e-4,
)

model.learn(total_timesteps=100_000)
model.save("../agents/ppo_attacker_synthetic_2d")


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 0.00113  |
| time/              |          |
|    fps             | 983      |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1           |
|    ep_rew_mean          | -0.00887    |
| time/                   |             |
|    fps                  | 783         |
|    iterations           | 2           |
|    time_elapsed         | 5           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.012797458 |
|    clip_fraction        | 0.185       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.8        |
|    explained_variance   | 0.11        |
|    learning_rate        | 0.



In [17]:
# Evaluar al atacante
def evaluate_attacker(model, env, n_episodes=1000):
    successes = 0
    deltas_norm = []

    obs, info = env.reset()
    for _ in range(n_episodes):
        action, _ = model.predict(obs, deterministic=True)
        next_obs, reward, terminated, truncated, info = env.step(action)

        if info["success"] == 1:
            successes += 1

        delta = info["x_adv"] - info["x_orig"]
        deltas_norm.append(np.linalg.norm(delta))

        if terminated or truncated:
            obs, info = env.reset()
        else:
            obs = next_obs

    success_rate = successes / n_episodes
    mean_delta = float(np.mean(deltas_norm))
    return success_rate, mean_delta

success_rate, mean_delta = evaluate_attacker(model, env, n_episodes=2000)
print(f"Tasa de evasión: {success_rate:.3f}")
print(f"Norma media de la perturbación: {mean_delta:.3f}")


Tasa de evasión: 0.071
Norma media de la perturbación: 0.386


In [11]:
# Comparar con un atacante random
def evaluate_random_attacker(env, n_episodes=1000):
    successes = 0
    deltas_norm = []

    obs, info = env.reset()
    for _ in range(n_episodes):
        action = env.action_space.sample()
        next_obs, reward, terminated, truncated, info = env.step(action)

        if info["success"] == 1:
            successes += 1

        delta = info["x_adv"] - info["x_orig"]
        deltas_norm.append(np.linalg.norm(delta))

        if terminated or truncated:
            obs, info = env.reset()
        else:
            obs = next_obs

    success_rate = successes / n_episodes
    mean_delta = float(np.mean(deltas_norm))
    return success_rate, mean_delta

rand_success, rand_delta = evaluate_random_attacker(env, n_episodes=2000)
print(f"[Random] Tasa de evasión: {rand_success:.3f}")
print(f"[Random] Norma media de la perturbación: {rand_delta:.3f}")

[Random] Tasa de evasión: 0.013
[Random] Norma media de la perturbación: 0.375
