<h2> Deep Q-Network (DQN) </h2>
<h4>Deep Q-Network (DQN) is a reinforcement learning algorithm that combines Q-learning with deep neural networks. 
    
DQN replaces the Q-table with a neural network (the Q-network) that approximates the Q-function:
Q(state, action) ≈ predicted reward</h4>

<h4>Setting up the environment and training for DQN</h4>

In [None]:
pip install -e src/gym-idsgame

In [None]:
!pip uninstall gymnasium
!pip install gym==0.21.0
!pip install scikit-learn

!pip install gym-idsgame==1.0.12

In [2]:
import gymnasium
from gymnasium.core import Env

def patched_reset(self):
    return self.reset()

Env.reset = patched_reset


In [3]:
import os
import gym
import gym_idsgame
import numpy as np
import torch
# src/environment/idsgame_wrapper.py
# from src.agents.dqn_agent import DQNAgent
from src.environment.compatibility_wrapper import GymCompatibilityWrapper
from src.utils.utils import print_summary
from src.utils.plotting import plot_results
# from src.utils import create_artefact_dirs

print('DONE IMPORTING')


DONE IMPORTING


In [None]:
!cp ~/Desktop/AI-Agent-for-Cyber-Security/missing_files_for_gym/*.py \
/usr/local/lib/python3.10/dist-packages/gym/utils/

In [4]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np

from src.environment.explorer import IDSGameExplorer
explorer = IDSGameExplorer()
# explorer.run_comprehensive_exploration()


<h4>Trainging and running the algorithm for "idsgame-random_attack-v8" environment</h4>

In [5]:
import src.agents.dqn_agent
print(src.agents.dqn_agent.__file__)


/Users/krishnaasrinivas/Desktop/AI-Agent-for-Cyber-Security/src/agents/dqn_agent.py


In [6]:
import os
import gymnasium as gym
import warnings
from datetime import datetime

# from gym_idsgame.agents.training_agents.q_learning.q_agent_config import QAgentConfig
# from gym_idsgame.agents.training_agents.q_learning.dqn.dqn_config import DQNConfig
from experiments.util import util
from experiments.util.plotting_util import read_and_plot_results
# from src.agents.ddqn_agent import DDQNAgent
from src.utils.utils import get_output_dir, print_summary
from src.environment.compatibility_wrapper import GymCompatibilityWrapper
from src.utils.plotting import plot_results

warnings.filterwarnings('ignore')
print('done')

done


In [7]:
env_name = "idsgame-random_defense-v0"
output_dir = os.getcwd()
random_seed = 33
env = gym.make(env_name, save_dir=output_dir + "results/data/" + str(random_seed))
# env = GymCompatibilityWrapper(env)

env = GymCompatibilityWrapper(env)
env = env.unwrapped

print("\nEnvironment Information:")
print(f"Observation Space: {env.observation_space}")
print(f"Action Space: {env.action_space}")



Environment Information:
Observation Space: Box(0, 9, (3, 11), int32)
Action Space: Discrete(30)


In [8]:
import os
import csv
import matplotlib.pyplot as plt

def save_rewards_to_csv(reward_history, filename="rewards.csv"):
    with open(filename, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["Episode", "Reward"])
        for i, reward in enumerate(reward_history):
            writer.writerow([i, reward])

def plot_rewards(reward_history, title="Reward Over Episodes", save_path="reward_plot.png"):
    plt.figure(figsize=(10, 5))
    plt.plot(reward_history, label="Reward")
    plt.xlabel("Episode")
    plt.ylabel("Total Reward")
    plt.title(title)
    plt.legend()
    plt.grid(True)
    plt.savefig(save_path)
    plt.close()

def print_summary(result, title="Summary"):
    """
    Print formatted training or evaluation summary.

    Args:
        result (dict): A dictionary containing metrics such as rewards, episode lengths, etc.
        title (str): Title of the summary.
    """
    print(f"{title} Summary:")
    print("-" * 50)
    print("Final Defense Performance:")
    print(f"- Average Reward: {result['average_reward']:.2f} ± {result['reward_std']:.2f}")
    print(f"- Max-Min Reward: {result['max_reward']:.2f} - {result['min_reward']:.2f}")
    print(f"- Average Episode Length: {result['average_episode_length']:.2f} ± {result['episode_length_std']:.2f}")
    print(f"- Max-Min Episode Length: {result['max_episode_length']:.2f} - {result['min_episode_length']:.2f}")
    print(f"- Average Hack Probability: {result['average_hack_probability']:.2f}% ± {result['hack_probability_std']:.2f}%")
    print(f"- Max-Min Hack Probability: {result['max_hack_probability']:.2f} - {result['min_hack_probability']:.2f}")
    print(f"- Final Cumulative Reward: {int(result['cumulative_reward'])}")



In [24]:
from src.agents.dqn_agent import DQNAgent
import numpy as np
import gym
import gym_idsgame


# === Get dimensions ===
sample_obs = env.reset()[0]
state_dim = np.array(sample_obs).flatten().shape[0]
action_dim = env.attacker_action_space.n

# === Agent ===
# agent = DQNAgent(state_dim=state_dim, action_dim=action_dim)



agent = DQNAgent(
    state_dim=state_dim,
    action_dim=action_dim,
    gamma=0.99,
    lr=1e-3,
    batch_size=64,
    buffer_capacity=10000,
    epsilon_start=1.0,
    epsilon_end=0.01,
    epsilon_decay=0.995,
    device="cpu"
)

# === Training Loop ===
reward_history = []
num_episodes = 20000
max_steps = 100

for ep in range(num_episodes):
    state = extract_attacker_obs(env.reset())
    total_reward = 0

    for t in range(max_steps):
        action = agent.select_action(state)
        defense_action = env.defender_action_space.sample()
        full_action = (action, defense_action)

        next_obs, reward, terminated, truncated, _ = env.step(full_action)
        done = terminated or truncated
        next_state = extract_attacker_obs(next_obs)

        agent.store(state, action, reward[0], next_state, done)
        agent.update()

        state = next_state
        total_reward += reward[0]

        if done:
            break

    reward_history.append(total_reward)

    if ep % 1000 == 0:
        avg_last_1000 = np.mean(reward_history[-10:])
        print(f"[Ep {ep}] AvgReward (last 10): {avg_last_1000:.2f} | Total: {total_reward:.2f} | Epsilon: {agent.epsilon:.3f}")

# === Evaluation Summary ===
result = {
    "average_reward": np.mean(reward_history),
    "reward_std": np.std(reward_history),
    "max_reward": np.max(reward_history),
    "min_reward": np.min(reward_history),
    "average_episode_length": max_steps,
    "episode_length_std": 0.0,
    "max_episode_length": max_steps,
    "min_episode_length": max_steps,
    "average_hack_probability": 0.0,
    "hack_probability_std": 0.0,
    "max_hack_probability": 0.0,
    "min_hack_probability": 0.0,
    "cumulative_reward": int(np.sum(reward_history)),
}

# === Print Results ===
print("\n📊 Final DQN Training Performance:")
print('Results: ',result)
print(f"- Average Reward: {result['average_reward']:.2f} ± {result['reward_std']:.2f}")
print(f"- Max-Min Reward: {result['max_reward']:.2f} - {result['min_reward']:.2f}")
print(f"- Average Episode Length: {result['average_episode_length']:.2f}")
print(f"- Cumulative Reward: {result['cumulative_reward']}")


[Ep 0] AvgReward (last 10): -5.00 | Total: -5.00 | Epsilon: 1.000
[Ep 1000] AvgReward (last 10): 1.00 | Total: 1.00 | Epsilon: 0.010
[Ep 2000] AvgReward (last 10): 0.40 | Total: 1.00 | Epsilon: 0.010
[Ep 3000] AvgReward (last 10): 0.80 | Total: 1.00 | Epsilon: 0.010
[Ep 4000] AvgReward (last 10): 1.00 | Total: 1.00 | Epsilon: 0.010
[Ep 5000] AvgReward (last 10): 1.00 | Total: 1.00 | Epsilon: 0.010
[Ep 6000] AvgReward (last 10): 1.00 | Total: 1.00 | Epsilon: 0.010
[Ep 7000] AvgReward (last 10): 1.00 | Total: 1.00 | Epsilon: 0.010
[Ep 8000] AvgReward (last 10): 1.00 | Total: 1.00 | Epsilon: 0.010
[Ep 9000] AvgReward (last 10): -4.90 | Total: -1.00 | Epsilon: 0.010
[Ep 10000] AvgReward (last 10): -8.80 | Total: -28.00 | Epsilon: 0.010
[Ep 11000] AvgReward (last 10): -0.30 | Total: -1.00 | Epsilon: 0.010
[Ep 12000] AvgReward (last 10): -1.00 | Total: -1.00 | Epsilon: 0.010
[Ep 13000] AvgReward (last 10): -41.00 | Total: -26.00 | Epsilon: 0.010
[Ep 14000] AvgReward (last 10): -1.00 | Total:

In [23]:
from src.agents.dqn_agent import DQNAgent
import numpy as np
import gym
import gym_idsgame


# === Get dimensions ===
sample_obs = env.reset()[0]
state_dim = np.array(sample_obs).flatten().shape[0]
action_dim = env.attacker_action_space.n

# === Agent ===
# agent = DQNAgent(state_dim=state_dim, action_dim=action_dim)



agent = DQNAgent(
    state_dim=state_dim,
    action_dim=action_dim,
    gamma=0.99,
    lr=1e-3,
    batch_size=64,
    buffer_capacity=10000,
    epsilon_start=1.0,
    epsilon_end=0.01,
    epsilon_decay=0.995,
    device="cpu"
)

# === Training Loop ===
reward_history = []
num_episodes = 10000
max_steps = 100

for ep in range(num_episodes):
    state = extract_attacker_obs(env.reset())
    total_reward = 0

    for t in range(max_steps):
        action = agent.select_action(state)
        defense_action = env.defender_action_space.sample()
        full_action = (action, defense_action)

        next_obs, reward, terminated, truncated, _ = env.step(full_action)
        done = terminated or truncated
        next_state = extract_attacker_obs(next_obs)

        agent.store(state, action, reward[0], next_state, done)
        agent.update()

        state = next_state
        total_reward += reward[0]

        if done:
            break

    reward_history.append(total_reward)

    if ep % 1000 == 0:
        avg_last_1000 = np.mean(reward_history[-10:])
        print(f"[Ep {ep}] AvgReward (last 10): {avg_last_1000:.2f} | Total: {total_reward:.2f} | Epsilon: {agent.epsilon:.3f}")

# === Evaluation Summary ===
result = {
    "average_reward": np.mean(reward_history),
    "reward_std": np.std(reward_history),
    "max_reward": np.max(reward_history),
    "min_reward": np.min(reward_history),
    "average_episode_length": max_steps,
    "episode_length_std": 0.0,
    "max_episode_length": max_steps,
    "min_episode_length": max_steps,
    "average_hack_probability": 0.0,
    "hack_probability_std": 0.0,
    "max_hack_probability": 0.0,
    "min_hack_probability": 0.0,
    "cumulative_reward": int(np.sum(reward_history)),
}

# === Print Results ===
print("\n📊 Final DQN Training Performance:")
print('Results: ',result)
print(f"- Average Reward: {result['average_reward']:.2f} ± {result['reward_std']:.2f}")
print(f"- Max-Min Reward: {result['max_reward']:.2f} - {result['min_reward']:.2f}")
print(f"- Average Episode Length: {result['average_episode_length']:.2f}")
print(f"- Cumulative Reward: {result['cumulative_reward']}")

# # === Training ===
# reward_history = []
# num_episodes = 10000
# max_steps = 100

# def extract_attacker_obs(obs):
#     obs = obs[0] if isinstance(obs, tuple) else obs
#     return np.array(obs).flatten()

# for ep in range(num_episodes):
#     state = extract_attacker_obs(env.reset())
#     total_reward = 0

#     for t in range(max_steps):
#         action = agent.select_action(state)
#         def_action = env.defender_action_space.sample()
#         full_action = (action, def_action)

#         next_obs, reward, terminated, truncated, _ = env.step(full_action)
#         done = terminated or truncated
#         next_state = extract_attacker_obs(next_obs)

#         agent.store(state, action, reward[0], next_state, done)
#         agent.update()

#         state = next_state
#         total_reward += reward[0]

#         if done:
#             break

#     reward_history.append(total_reward)
#     print(f'$$$$$$$$$$$$ ep: {ep}, reward : {total_reward}')

#     # Logging
#     if ep % 10 == 0:
#         avg_last_10 = np.mean(reward_history[-10:])
#         print(f"[Ep {ep}] AvgReward (last 10): {avg_last_10:.2f} | Total: {total_reward:.2f} | Epsilon: {agent.epsilon:.3f}")


[Ep 0] AvgReward (last 10): -20.00 | Total: -20.00 | Epsilon: 1.000
[Ep 1000] AvgReward (last 10): -0.40 | Total: -1.00 | Epsilon: 0.010
[Ep 2000] AvgReward (last 10): 0.80 | Total: 1.00 | Epsilon: 0.010
[Ep 3000] AvgReward (last 10): -3.90 | Total: -1.00 | Epsilon: 0.010
[Ep 4000] AvgReward (last 10): 1.00 | Total: 1.00 | Epsilon: 0.010
[Ep 5000] AvgReward (last 10): 0.90 | Total: 1.00 | Epsilon: 0.010
[Ep 6000] AvgReward (last 10): 1.00 | Total: 1.00 | Epsilon: 0.010
[Ep 7000] AvgReward (last 10): 1.00 | Total: 1.00 | Epsilon: 0.010
[Ep 8000] AvgReward (last 10): 0.80 | Total: 1.00 | Epsilon: 0.010
[Ep 9000] AvgReward (last 10): 1.00 | Total: 1.00 | Epsilon: 0.010

📊 Final DQN Training Performance:
Results:  {'average_reward': 0.4551, 'reward_std': 3.1199974342938175, 'max_reward': 1, 'min_reward': -96, 'average_episode_length': 100, 'episode_length_std': 0.0, 'max_episode_length': 100, 'min_episode_length': 100, 'average_hack_probability': 0.0, 'hack_probability_std': 0.0, 'max_hack

In [12]:
save_rewards_to_csv(reward_history, filename="rewards.csv")
plot_rewards(reward_history, title="DQN Training Rewards")
