In [1]:
!pip install nasim
!pip install neo4j


Collecting nasim
  Downloading nasim-0.12.0-py3-none-any.whl.metadata (8.6 kB)
Downloading nasim-0.12.0-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.2/78.2 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nasim
Successfully installed nasim-0.12.0
Collecting neo4j
  Downloading neo4j-5.28.1-py3-none-any.whl.metadata (5.9 kB)
Downloading neo4j-5.28.1-py3-none-any.whl (312 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m312.3/312.3 kB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: neo4j
Successfully installed neo4j-5.28.1


In [14]:
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
import nasim
from collections import deque
from neo4j import GraphDatabase

# ====================================
#  1️⃣ NEO4J CONFIGURATION & DATA FETCHING
# ====================================

NEO4J_URI = "bolt://0.tcp.in.ngrok.io:13454"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "12345678"

class Neo4jConnector:
    """Handles interaction with Neo4j database for retrieving threat details."""

    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def get_random_cve(self):
        """Retrieve a random CVE from the Neo4j database."""
        query = """
        MATCH (cve:CVE)
        RETURN cve.ID AS CVE_ID, cve.Name AS CVE_Name, cve.Description AS CVE_Description
        ORDER BY rand()
        LIMIT 1
        """
        with self.driver.session() as session:
            result = session.run(query)
            return result.single()

# Initialize Neo4j Connection
neo4j_db = Neo4jConnector(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)

# ====================================
#  2️⃣ DQN MODEL
# ====================================

class DQN(nn.Module):
    """Deep Q-Network (DQN) for learning attack strategies and detecting threats."""

    def __init__(self, state_dim, action_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_dim, 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc3 = nn.Linear(256, 128)
        self.q_value_layer = nn.Linear(128, action_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        q_values = self.q_value_layer(x)
        return q_values

# ====================================
#  3️⃣ EXPERIENCE REPLAY BUFFER
# ====================================

class ReplayBuffer:
    """Experience Replay Buffer for storing past transitions."""

    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done):
        self.buffer.append((state, action, reward, next_state, done))

    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)

    def size(self):
        return len(self.buffer)

# ====================================
#  4️⃣ ACTION SELECTION (EPSILON-GREEDY)
# ====================================

def select_action(state, epsilon):
    """Epsilon-Greedy Policy for selecting integer actions."""
    if random.random() < epsilon:  # Explore
        return int(env.action_space.sample())  # Ensure integer return
    else:  # Exploit
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        q_values = policy_net(state_tensor)
        return int(torch.argmax(q_values).item())  # Ensure integer return

# ====================================
#  5️⃣ TRAINING FUNCTION
# ====================================

def train_dqn():
    """Train the DQN using Experience Replay."""
    if memory.size() < BATCH_SIZE:
        return

    batch = memory.sample(BATCH_SIZE)
    states, actions, rewards, next_states, dones = zip(*batch)

    states = torch.FloatTensor(states)
    actions = torch.LongTensor(actions).unsqueeze(1)
    rewards = torch.FloatTensor(rewards)
    next_states = torch.FloatTensor(next_states)
    dones = torch.FloatTensor(dones)

    q_values = policy_net(states).gather(1, actions).squeeze(1)

    with torch.no_grad():
        max_next_q_values = target_net(next_states).max(1)[0]

    target_q_values = rewards + (GAMMA * max_next_q_values * (1 - dones))

    loss = nn.MSELoss()(q_values, target_q_values)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# ====================================
#  6️⃣ TRAINING LOOP
# ====================================

# Hyperparameters
GAMMA = 0.99
ALPHA = 0.001
EPSILON = 1.0
EPSILON_DECAY = 0.997
MIN_EPSILON = 0.01
BATCH_SIZE = 32
MEMORY_SIZE = 10000
TARGET_UPDATE = 10
MAX_EPISODES = 100

# Initialize NASim Environment
env = nasim.make_benchmark('tiny', flat_actions=True, flat_obs=True)
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n

# Initialize Networks
policy_net = DQN(state_dim, action_dim)
target_net = DQN(state_dim, action_dim)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

optimizer = optim.Adam(policy_net.parameters(), lr=ALPHA)
memory = ReplayBuffer(MEMORY_SIZE)

# ✅ Training Loop
epsilon = EPSILON
reward_history = []

for episode in range(MAX_EPISODES):
    state, _ = env.reset()
    done = False
    total_reward = 0

    # 🔺 Get a random CVE attack type for this episode
    random_threat = neo4j_db.get_random_cve()
    threat_name = random_threat["CVE_Name"] if random_threat else "Unknown Threat"
    threat_description = random_threat["CVE_Description"] if random_threat else "No Description Available"

    while not done:
        action = select_action(state, epsilon)
        next_state, reward, done, _, _ = env.step(action)

        memory.push(state, action, reward, next_state, done)
        state = next_state
        train_dqn()
        total_reward += reward

    # 🔹 Print attack details at the end of the episode
    print(f" Episode {episode}: Reward = {total_reward}, Epsilon = {epsilon:.4f}")
    print(f" Detected Threat: {threat_name}")
    print(f" Description: {threat_description}\n")

    epsilon = max(MIN_EPSILON, epsilon * EPSILON_DECAY)

# ✅ Save Model & Close DB
torch.save(policy_net.state_dict(), "dqn_nasim_model.pth")
neo4j_db.close()
print("✅ Simulation Completed.")


🎯 Episode 0: Reward = 140.0, Epsilon = 1.0000
🔺 Detected Threat: CVE-2021-27571
📝 Description: ['An issue was discovered in Emote Remote Mouse through 4.0.0.0. Attackers can retrieve recently used and running applications, their icons, and their file paths. This information is sent in cleartext and is not protected by any authentication logic.']

🎯 Episode 1: Reward = -50.0, Epsilon = 0.9970
🔺 Detected Threat: CVE-2021-36770
📝 Description: ['Encode.pm, as distributed in Perl through 5.34.0, allows local users to gain privileges via a Trojan horse Encode::ConfigLocal library (in the current working directory) that preempts dynamic module loading. Exploitation requires an unusual configuration, and certain 2021 versions of Encode.pm (3.05 through 3.11). This issue occurs because the || operator evaluates @INC in a scalar context, and thus @INC has only an integer value.']

🎯 Episode 2: Reward = 56.0, Epsilon = 0.9940
🔺 Detected Threat: CVE-2021-29068
📝 Description: ['Certain NETGEAR devi

In [12]:
# 8:56
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
import nasim
from collections import deque
from neo4j import GraphDatabase

# ====================================
#  1️⃣ NEO4J CONFIGURATION & DATA FETCHING
# ====================================

NEO4J_URI = "bolt://0.tcp.in.ngrok.io:13454"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "12345678"

class Neo4jConnector:
    """Handles interaction with Neo4j database for retrieving threat details."""

    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def fetch_attack_data(self):
        """Retrieve attack threats from the Neo4j database."""
        query = """
        MATCH (capec:CAPEC)
        RETURN capec.ID AS CAPEC_ID, capec.Name AS CAPEC_Name, capec.Description AS CAPEC_Description
        """
        with self.driver.session() as session:
            result = session.run(query)
            return {record["CAPEC_ID"]: record.data() for record in result}

    def get_threat_by_id(self, threat_id):
        """Retrieve attack details for a given threat ID."""
        query = """
        MATCH (capec:CAPEC {ID: $threat_id})
        RETURN capec.Name AS CAPEC_Name, capec.Description AS CAPEC_Description
        """
        with self.driver.session() as session:
            result = session.run(query, threat_id=threat_id)
            return result.single()

# Initialize Neo4j Connection
neo4j_db = Neo4jConnector(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
attack_data = neo4j_db.fetch_attack_data()

# ✅ Filter Allowed Threats
allowed_attack_ids = ["CVE-2021-0109", "CVE-2021-0259", "CVE-2021-0102"]
valid_threats = [tid for tid in allowed_attack_ids if tid in attack_data]
num_threats = max(1, len(valid_threats))

print(f"✅ Valid Threats: {valid_threats}")

# ====================================
#  2️⃣ DQN MODEL
# ====================================

class DQN(nn.Module):
    """Deep Q-Network (DQN) for learning attack strategies and detecting threats."""

    def __init__(self, state_dim, action_dim, num_threats):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_dim, 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc3 = nn.Linear(256, 128)
        self.q_value_layer = nn.Linear(128, action_dim)
        self.threat_layer = nn.Linear(128, num_threats)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        q_values = self.q_value_layer(x)
        threat_classification = torch.softmax(self.threat_layer(x), dim=1)
        return q_values, threat_classification

# ====================================
#  3️⃣ EXPERIENCE REPLAY BUFFER
# ====================================

class ReplayBuffer:
    """Experience Replay Buffer for storing past transitions."""

    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done, threat_type):
        self.buffer.append((state, action, reward, next_state, done, threat_type))

    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)

    def size(self):
        return len(self.buffer)

# ====================================
#  4️⃣ ACTION SELECTION (EPSILON-GREEDY)
# ====================================

def select_action(state, epsilon):
    """Epsilon-Greedy Policy for selecting integer actions."""
    if random.random() < epsilon:  # Explore
        return int(env.action_space.sample())  # Ensure integer return
    else:  # Exploit
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        q_values, _ = policy_net(state_tensor)
        return int(torch.argmax(q_values).item())  # Ensure integer return

def get_attack_type(action):
    """Map NASim action index to attack type."""
    if isinstance(action, int) and action < len(env.action_space.actions):
        return env.action_space.actions[action].__str__()  # Convert action to string
    return "Unknown Action"

# ====================================
#  5️⃣ TRAINING FUNCTION
# ====================================

def train_dqn():
    """Train the DQN using Experience Replay."""
    if memory.size() < BATCH_SIZE:
        return

    batch = memory.sample(BATCH_SIZE)
    states, actions, rewards, next_states, dones, _ = zip(*batch)

    states = torch.FloatTensor(states)
    actions = torch.LongTensor(actions).unsqueeze(1)
    rewards = torch.FloatTensor(rewards)
    next_states = torch.FloatTensor(next_states)
    dones = torch.FloatTensor(dones)

    q_values, _ = policy_net(states)
    q_values = q_values.gather(1, actions).squeeze(1)

    with torch.no_grad():
        next_q_values, _ = target_net(next_states)
        max_next_q_values = next_q_values.max(1)[0]

    target_q_values = rewards + (GAMMA * max_next_q_values * (1 - dones))

    loss = nn.MSELoss()(q_values, target_q_values)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# ====================================
#  6️⃣ TRAINING LOOP
# ====================================

# Hyperparameters
GAMMA = 0.99
ALPHA = 0.001
EPSILON = 1.0
EPSILON_DECAY = 0.997
MIN_EPSILON = 0.01
BATCH_SIZE = 32
MEMORY_SIZE = 10000
TARGET_UPDATE = 10
MAX_EPISODES = 100

# Initialize NASim Environment
env = nasim.make_benchmark('tiny', flat_actions=True, flat_obs=True)
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n

# Initialize Networks
policy_net = DQN(state_dim, action_dim, num_threats)
target_net = DQN(state_dim, action_dim, num_threats)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

optimizer = optim.Adam(policy_net.parameters(), lr=ALPHA)
memory = ReplayBuffer(MEMORY_SIZE)

# ✅ Training Loop
epsilon = EPSILON
reward_history = []

for episode in range(MAX_EPISODES):
    state, _ = env.reset()
    done = False
    total_reward = 0

    while not done:
        action = select_action(state, epsilon)

        # ✅ Get NASim attack type
        attack_type = get_attack_type(action)

        next_state, reward, done, _, _ = env.step(action)

        # Predict Threat Type
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        _, threat_prediction = policy_net(state_tensor)

        threat_type = torch.argmax(threat_prediction, dim=1).item()
        threat_details = neo4j_db.get_threat_by_id(threat_type)
        threat_description = threat_details["CAPEC_Name"] if threat_details else "Unknown Threat"

        memory.push(state, action, reward, next_state, done, threat_type)
        state = next_state
        train_dqn()
        total_reward += reward

        # ✅ Print Attack Type & Threat
        print(f"Episode {episode} | Action: {attack_type} | Reward: {reward} | Threat: {threat_description}")

    reward_history.append(total_reward)

    if episode % TARGET_UPDATE == 0:
        target_net.load_state_dict(policy_net.state_dict())

    epsilon = max(MIN_EPSILON, epsilon * EPSILON_DECAY)
    print(f"🎯 Episode {episode} completed. Total Reward = {total_reward}, Epsilon = {epsilon:.4f}")

# ✅ Save Model & Close DB
torch.save(policy_net.state_dict(), "dqn_nasim_model.pth")
neo4j_db.close()
print("✅ Simulation Completed.")


✅ Valid Threats: []
Episode 0 | Action: SubnetScan: target=(3, 0), cost=1.00, prob=1.00, req_access=USER | Reward: -1.0 | Threat: Unknown Threat
Episode 0 | Action: PrivilegeEscalation: target=(3, 0), cost=1.00, prob=1.00, req_access=USER, os=linux, process=tomcat, access=2 | Reward: -1.0 | Threat: Unknown Threat
Episode 0 | Action: ServiceScan: target=(3, 0), cost=1.00, prob=1.00, req_access=USER | Reward: -1.0 | Threat: Unknown Threat
Episode 0 | Action: OSScan: target=(1, 0), cost=1.00, prob=1.00, req_access=USER | Reward: -1 | Threat: Unknown Threat
Episode 0 | Action: Exploit: target=(2, 0), cost=1.00, prob=0.80, req_access=USER, os=linux, service=ssh, access=1 | Reward: -1.0 | Threat: Unknown Threat
Episode 0 | Action: SubnetScan: target=(3, 0), cost=1.00, prob=1.00, req_access=USER | Reward: -1.0 | Threat: Unknown Threat
Episode 0 | Action: PrivilegeEscalation: target=(1, 0), cost=1.00, prob=1.00, req_access=USER, os=linux, process=tomcat, access=2 | Reward: -1.0 | Threat: Unkno

KeyboardInterrupt: 

In [11]:
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
import nasim
from collections import deque
from neo4j import GraphDatabase

# ====================================
#  1️⃣ NEO4J CONFIGURATION & DATA FETCHING
# ====================================

NEO4J_URI = "bolt://0.tcp.in.ngrok.io:13454"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "12345678"

class Neo4jConnector:
    """Handles interaction with Neo4j database for retrieving threat details."""

    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def fetch_attack_data(self):
        """Retrieve attack threats from the Neo4j database."""
        query = """
        MATCH (capec:CAPEC)
        RETURN capec.ID AS CAPEC_ID, capec.Name AS CAPEC_Name, capec.Description AS CAPEC_Description
        """
        with self.driver.session() as session:
            result = session.run(query)
            return {record["CAPEC_ID"]: record.data() for record in result}

    def get_threat_by_id(self, threat_id):
        """Retrieve attack details for a given threat ID."""
        query = """
        MATCH (capec:CAPEC {ID: $threat_id})
        RETURN capec.Name AS CAPEC_Name, capec.Description AS CAPEC_Description
        """
        with self.driver.session() as session:
            result = session.run(query, threat_id=threat_id)
            return result.single()

# Initialize Neo4j Connection
neo4j_db = Neo4jConnector(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
attack_data = neo4j_db.fetch_attack_data()

# ✅ Filter Allowed Threats
allowed_attack_ids = ["CVE-2021-0109", "CVE-2021-0259", "CVE-2021-0102"]
valid_threats = [tid for tid in allowed_attack_ids if tid in attack_data]
num_threats = max(1, len(valid_threats))

print(f"✅ Valid Threats: {valid_threats}")

# ====================================
#  2️⃣ DQN MODEL
# ====================================

class DQN(nn.Module):
    """Deep Q-Network (DQN) for learning attack strategies and detecting threats."""

    def __init__(self, state_dim, action_dim, num_threats):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_dim, 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc3 = nn.Linear(256, 128)
        self.q_value_layer = nn.Linear(128, action_dim)
        self.threat_layer = nn.Linear(128, num_threats)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        q_values = self.q_value_layer(x)
        threat_classification = torch.softmax(self.threat_layer(x), dim=1)
        return q_values, threat_classification

# ====================================
#  3️⃣ EXPERIENCE REPLAY BUFFER
# ====================================

class ReplayBuffer:
    """Experience Replay Buffer for storing past transitions."""

    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done, threat_type):
        self.buffer.append((state, action, reward, next_state, done, threat_type))

    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)

    def size(self):
        return len(self.buffer)

# ====================================
#  4️⃣ ACTION SELECTION (EPSILON-GREEDY)
# ====================================

def select_action(state, epsilon):
    """Epsilon-Greedy Policy for selecting integer actions."""
    if random.random() < epsilon:  # Explore
        return int(env.action_space.sample())  # Ensure integer return
    else:  # Exploit
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        q_values, _ = policy_net(state_tensor)
        return int(torch.argmax(q_values).item())  # Ensure integer return


# ====================================
#  5️⃣ TRAINING FUNCTION
# ====================================

def train_dqn():
    """Train the DQN using Experience Replay."""
    if memory.size() < BATCH_SIZE:
        return

    batch = memory.sample(BATCH_SIZE)
    states, actions, rewards, next_states, dones, _ = zip(*batch)

    states = torch.FloatTensor(states)
    actions = torch.LongTensor(actions).unsqueeze(1)
    rewards = torch.FloatTensor(rewards)
    next_states = torch.FloatTensor(next_states)
    dones = torch.FloatTensor(dones)

    q_values, _ = policy_net(states)
    q_values = q_values.gather(1, actions).squeeze(1)

    with torch.no_grad():
        next_q_values, _ = target_net(next_states)
        max_next_q_values = next_q_values.max(1)[0]

    target_q_values = rewards + (GAMMA * max_next_q_values * (1 - dones))

    loss = nn.MSELoss()(q_values, target_q_values)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# ====================================
#  6️⃣ TRAINING LOOP
# ====================================

# Hyperparameters
GAMMA = 0.99
ALPHA = 0.001
EPSILON = 1.0
EPSILON_DECAY = 0.997
MIN_EPSILON = 0.01
BATCH_SIZE = 32
MEMORY_SIZE = 10000
TARGET_UPDATE = 10
MAX_EPISODES = 100

# Initialize NASim Environment
env = nasim.make_benchmark('tiny', flat_actions=True, flat_obs=True)
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n

# Initialize Networks
policy_net = DQN(state_dim, action_dim, num_threats)
target_net = DQN(state_dim, action_dim, num_threats)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

optimizer = optim.Adam(policy_net.parameters(), lr=ALPHA)
memory = ReplayBuffer(MEMORY_SIZE)

# ✅ Training Loop
epsilon = EPSILON
reward_history = []

for episode in range(MAX_EPISODES):
    state, _ = env.reset()
    done = False
    total_reward = 0

    while not done:
        action = select_action(state, epsilon)
        next_state, reward, done, _, _ = env.step(action)

        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        _, threat_prediction = policy_net(state_tensor)
        threat_type = torch.argmax(threat_prediction, dim=1).item()

        memory.push(state, action, reward, next_state, done, threat_type)
        state = next_state
        train_dqn()
        total_reward += reward

    reward_history.append(total_reward)

    if episode % TARGET_UPDATE == 0:
        target_net.load_state_dict(policy_net.state_dict())

    epsilon = max(MIN_EPSILON, epsilon * EPSILON_DECAY)
    print(f"🎯 Episode {episode}: Reward = {total_reward}, Epsilon = {epsilon:.4f}")

# ✅ Save Model & Close DB
torch.save(policy_net.state_dict(), "dqn_nasim_model.pth")
neo4j_db.close()
print("✅ Simulation Completed.")


✅ Valid Threats: []
🎯 Episode 0: Reward = 116.0, Epsilon = 0.9970
🎯 Episode 1: Reward = 101.0, Epsilon = 0.9940
🎯 Episode 2: Reward = 159.0, Epsilon = 0.9910
🎯 Episode 3: Reward = 72.0, Epsilon = 0.9881
🎯 Episode 4: Reward = 95.0, Epsilon = 0.9851
🎯 Episode 5: Reward = -33.0, Epsilon = 0.9821
🎯 Episode 6: Reward = 108.0, Epsilon = 0.9792
🎯 Episode 7: Reward = 121.0, Epsilon = 0.9763
🎯 Episode 8: Reward = 151.0, Epsilon = 0.9733
🎯 Episode 9: Reward = 114.0, Epsilon = 0.9704
🎯 Episode 10: Reward = 106.0, Epsilon = 0.9675
🎯 Episode 11: Reward = 48.0, Epsilon = 0.9646
🎯 Episode 12: Reward = 122.0, Epsilon = 0.9617
🎯 Episode 13: Reward = 117.0, Epsilon = 0.9588
🎯 Episode 14: Reward = 58.0, Epsilon = 0.9559
🎯 Episode 15: Reward = 96.0, Epsilon = 0.9531
🎯 Episode 16: Reward = 84.0, Epsilon = 0.9502
🎯 Episode 17: Reward = 80.0, Epsilon = 0.9474
🎯 Episode 18: Reward = 136.0, Epsilon = 0.9445
🎯 Episode 19: Reward = 138.0, Epsilon = 0.9417
🎯 Episode 20: Reward = 148.0, Epsilon = 0.9389
🎯 Episode 

In [7]:
# 4 :45
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
import nasim
from collections import deque
from neo4j import GraphDatabase

# Neo4j Configuration
NEO4J_URI = "bolt://0.tcp.in.ngrok.io:13454"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "12345678"

# Connect to Neo4j
class Neo4jConnector:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def fetch_attack_data(self):
        """Fetch attack details dynamically from Neo4j"""
        query = """
        MATCH (capec:CAPEC)
        RETURN capec.ID AS CAPEC_ID, capec.Name AS CAPEC_Name, capec.Description AS CAPEC_Description
        """
        with self.driver.session() as session:
            result = session.run(query)
            return {record["CAPEC_ID"]: record.data() for record in result}  # Return a dictionary {ID: Details}

    def get_threat_by_id(self, threat_id):
        """Fetch attack details for a given CAPEC_ID"""
        query = """
        MATCH (capec:CAPEC {ID: $threat_id})
        RETURN capec.Name AS CAPEC_Name, capec.Description AS CAPEC_Description
        """
        with self.driver.session() as session:
            result = session.run(query, threat_id=threat_id)
            return result.single()  # Return single record

# Initialize Neo4j Connection
neo4j_db = Neo4jConnector(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
attack_data = neo4j_db.fetch_attack_data()

# ✅ Provide a list of valid attack IDs from Neo4j (Example: Only allow certain threats)
allowed_attack_ids = ["CVE-2021-0109", "CVE-2021-0259", "CVE-2021-0102"]
filtered_attack_data = {key: attack_data[key] for key in allowed_attack_ids if key in attack_data}

# Extract Known Threats
valid_threats = list(filtered_attack_data.keys())  # Only keep valid threat IDs
num_threats = max(1, len(valid_threats))

print(f"Valid Threat IDs: {valid_threats}")
print(f"Number of Known Threat Categories: {num_threats}")

# Threat Mapping
def map_threat_id(index):
    """Maps predicted index to allowed CAPEC_ID"""
    if 0 <= index < len(valid_threats):
        return valid_threats[index]
    return None  # If invalid, return None

# ✅ NASim Attack Mapping
def get_attack_type(action_index):
    """Returns a human-readable attack type for a given NASim action index"""
    action = env.action_space.get_action(action_index)
  # Retrieve action details

    if action.is_scan():
        return "Scan Attack"
    elif action.is_exploit():
        return f"Exploit Attack (Vulnerability: {action.name})"
    elif action.is_privilege_escalation():
        return "Privilege Escalation Attack"
    else:
        return "Unknown Attack"

# Define DQN Model
class DQN(nn.Module):
    def __init__(self, state_dim, action_dim, num_threats):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_dim, 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc3 = nn.Linear(256, 128)
        self.q_value_layer = nn.Linear(128, action_dim)
        self.threat_layer = nn.Linear(128, num_threats)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        q_values = self.q_value_layer(x)
        threat_classification = torch.softmax(self.threat_layer(x), dim=1)
        return q_values, threat_classification

# Experience Replay Buffer
class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done, threat_type):
        self.buffer.append((state, action, reward, next_state, done, threat_type))

    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)

    def size(self):
        return len(self.buffer)

# Hyperparameters
GAMMA = 0.99
ALPHA = 0.001
EPSILON = 1.0
EPSILON_DECAY = 0.997
MIN_EPSILON = 0.01
BATCH_SIZE = 32
MEMORY_SIZE = 10000
TARGET_UPDATE = 10
MAX_EPISODES = 100

# Initialize NASim Environment
env = nasim.make_benchmark('tiny', flat_actions=True, flat_obs=True)
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n

# Initialize Networks
policy_net = DQN(state_dim, action_dim, num_threats)
target_net = DQN(state_dim, action_dim, num_threats)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

optimizer = optim.Adam(policy_net.parameters(), lr=ALPHA)
memory = ReplayBuffer(MEMORY_SIZE)

# Training Loop
epsilon = EPSILON
reward_history = []

for episode in range(MAX_EPISODES):
    state, _ = env.reset()
    state = np.zeros(state_dim) if state is None or len(state) == 0 else state
    done = False
    total_reward = 0

    while not done:
        action = select_action(state, epsilon)
        attack_type = get_attack_type(action)  # ✅ Get NASim attack name
        next_state, reward, done, _, info = env.step(action)
        next_state = np.zeros(state_dim) if next_state is None or len(next_state) == 0 else next_state

        # Predict Threat Type
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        _, threat_prediction = policy_net(state_tensor)

        # Ensure detected threats match only the allowed threat IDs
        threat_type = torch.argmax(threat_prediction, dim=1).item()
        threat_id = map_threat_id(threat_type)

        # Query Neo4j for threat details
        if threat_id:
            threat_details = neo4j_db.get_threat_by_id(threat_id)
            threat_description = threat_details["CAPEC_Name"] if threat_details else "Unknown Threat"
        else:
            threat_description = "No Threat Detected"

        # Store experience
        memory.push(state, action, reward, next_state, done, threat_type)
        state = next_state
        train_dqn()
        total_reward += reward

        # ✅ Print Attack Type + Detected Threat
        print(f"Episode {episode} | Reward: {reward}")

    reward_history.append(total_reward)

    # Update Target Network
    if episode % TARGET_UPDATE == 0:
        target_net.load_state_dict(policy_net.state_dict())

    epsilon = max(MIN_EPSILON, epsilon * EPSILON_DECAY)
    print(f"Episode {episode} completed. Total Reward = {total_reward}, Epsilon = {epsilon:.4f} Action: {attack_type} ")

# Save Model
torch.save(policy_net.state_dict(), "dqn_nasim_model.pth")
neo4j_db.close()
print("Simulation complete.")


Valid Threat IDs: []
Number of Known Threat Categories: 1
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1
Episode 0 | Reward: -1
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1
Episode 0 | Reward: -1
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1
Episode 0 | Reward: -1
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1.0
Episode 0 | Reward: -1
Episode 0 | Reward: -1.0
Episode 0 | Rewa

KeyboardInterrupt: 

In [None]:
!pip install nasim
!pip install neo4j

Collecting nasim
  Downloading nasim-0.12.0-py3-none-any.whl.metadata (8.6 kB)
Downloading nasim-0.12.0-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.2/78.2 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nasim
Successfully installed nasim-0.12.0
Collecting neo4j
  Downloading neo4j-5.28.1-py3-none-any.whl.metadata (5.9 kB)
Downloading neo4j-5.28.1-py3-none-any.whl (312 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m312.3/312.3 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: neo4j
Successfully installed neo4j-5.28.1


In [None]:
# 4:30pm
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
import nasim
from collections import deque
from neo4j import GraphDatabase

# Neo4j Configuration
NEO4J_URI = "bolt://0.tcp.in.ngrok.io:11755"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "12345678"

# Connect to Neo4j
class Neo4jConnector:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def fetch_attack_data(self):
        """Fetch attack details dynamically from Neo4j"""
        query = """
        MATCH (capec:CAPEC)
        RETURN capec.ID AS CAPEC_ID, capec.Name AS CAPEC_Name, capec.Description AS CAPEC_Description
        """
        with self.driver.session() as session:
            result = session.run(query)
            return {record["CAPEC_ID"]: record.data() for record in result}  # Return a dictionary {ID: Details}

    def get_threat_by_id(self, threat_id):
        """Fetch attack details for a given CAPEC_ID"""
        query = """
        MATCH (capec:CAPEC {ID: $threat_id})
        RETURN capec.Name AS CAPEC_Name, capec.Description AS CAPEC_Description
        """
        with self.driver.session() as session:
            result = session.run(query, threat_id=threat_id)
            return result.single()  # Return single record

# Initialize Neo4j Connection
neo4j_db = Neo4jConnector(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
attack_data = neo4j_db.fetch_attack_data()

# ✅ Provide a list of valid attack IDs from Neo4j (Example: Only allow certain threats)
allowed_attack_ids = ["CAPEC-1", "CAPEC-42", "CAPEC-99"]
filtered_attack_data = {key: attack_data[key] for key in allowed_attack_ids if key in attack_data}

# Extract Known Threats
valid_threats = list(filtered_attack_data.keys())  # Only keep valid threat IDs
num_threats = max(1, len(valid_threats))

print(f"Valid Threat IDs: {valid_threats}")
print(f"Number of Known Threat Categories: {num_threats}")

# Threat Mapping
def map_threat_id(index):
    """Maps predicted index to allowed CAPEC_ID"""
    if 0 <= index < len(valid_threats):
        return valid_threats[index]
    return None  # If invalid, return None

# Define DQN Model
class DQN(nn.Module):
    def __init__(self, state_dim, action_dim, num_threats):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_dim, 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc3 = nn.Linear(256, 128)
        self.q_value_layer = nn.Linear(128, action_dim)
        self.threat_layer = nn.Linear(128, num_threats)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        q_values = self.q_value_layer(x)
        threat_classification = torch.softmax(self.threat_layer(x), dim=1)
        return q_values, threat_classification

# Experience Replay Buffer
class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done, threat_type):
        self.buffer.append((state, action, reward, next_state, done, threat_type))

    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)

    def size(self):
        return len(self.buffer)

# Hyperparameters
GAMMA = 0.99
ALPHA = 0.001
EPSILON = 1.0
EPSILON_DECAY = 0.997
MIN_EPSILON = 0.01
BATCH_SIZE = 32
MEMORY_SIZE = 10000
TARGET_UPDATE = 10
MAX_EPISODES = 100

# Initialize NASim Environment
env = nasim.make_benchmark('tiny', flat_actions=True, flat_obs=True)
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n

# Initialize Networks
policy_net = DQN(state_dim, action_dim, num_threats)
target_net = DQN(state_dim, action_dim, num_threats)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

optimizer = optim.Adam(policy_net.parameters(), lr=ALPHA)
memory = ReplayBuffer(MEMORY_SIZE)

# Action Selection
def select_action(state, epsilon):
    if random.random() < epsilon:
        return random.randint(0, env.action_space.n - 1)
    state_tensor = torch.FloatTensor(state).unsqueeze(0)
    with torch.no_grad():
        return policy_net(state_tensor)[0].argmax().item()

# Train DQN Model
def train_dqn():
    if memory.size() < BATCH_SIZE:
        return
    batch = memory.sample(BATCH_SIZE)
    states, actions, rewards, next_states, dones, threat_types = zip(*batch)

    states = torch.FloatTensor(states)
    actions = torch.LongTensor(actions).unsqueeze(1)
    rewards = torch.FloatTensor(rewards).unsqueeze(1)
    next_states = torch.FloatTensor(next_states)
    dones = torch.FloatTensor([float(d) for d in dones]).unsqueeze(1)

    threat_types = torch.clamp(torch.LongTensor(threat_types), 0, num_threats - 1)

    q_values, threat_preds = policy_net(states)
    q_values = q_values.gather(1, actions)
    next_q_values, _ = target_net(next_states)
    target_q_values = rewards + GAMMA * next_q_values.max(1, keepdim=True)[0] * (1 - dones)

    q_loss = nn.MSELoss()(q_values, target_q_values.detach())
    threat_loss = nn.CrossEntropyLoss()(threat_preds, threat_types)
    loss = q_loss + threat_loss

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# Training Loop
epsilon = EPSILON
reward_history = []

for episode in range(MAX_EPISODES):
    state, _ = env.reset()
    state = np.zeros(state_dim) if state is None or len(state) == 0 else state
    done = False
    total_reward = 0

    while not done:
        action = select_action(state, epsilon)
        next_state, reward, done, truncated, info = env.step(action)
        next_state = np.zeros(state_dim) if next_state is None or len(next_state) == 0 else next_state

        # Predict Threat Type
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        _, threat_prediction = policy_net(state_tensor)

        # Ensure detected threats match only the allowed threat IDs
        threat_type = torch.argmax(threat_prediction, dim=1).item()
        threat_id = map_threat_id(threat_type)

        # Query Neo4j for threat details
        if threat_id:
            threat_details = neo4j_db.get_threat_by_id(threat_id)
            threat_description = threat_details["CAPEC_Name"] if threat_details else "Unknown Threat"
        else:
            threat_description = "No Threat Detected"

        # Store experience
        memory.push(state, action, reward, next_state, done, threat_type)
        state = next_state
        train_dqn()
        total_reward += reward

    reward_history.append(total_reward)

    # Update Target Network
    if episode % TARGET_UPDATE == 0:
        target_net.load_state_dict(policy_net.state_dict())

    epsilon = max(MIN_EPSILON, epsilon * EPSILON_DECAY)
    print(f"Episode {episode}: Total Reward = {total_reward}, Epsilon = {epsilon:.4f}, Threat Detected: {threat_description}")

# Save Model
torch.save(policy_net.state_dict(), "dqn_nasim_model.pth")
neo4j_db.close()
print("Training complete.")


Valid Threat IDs: []
Number of Known Threat Categories: 1
Episode 0: Total Reward = 100.0, Epsilon = 0.9970, Threat Detected: No Threat Detected
Episode 1: Total Reward = 99.0, Epsilon = 0.9940, Threat Detected: No Threat Detected
Episode 2: Total Reward = 132.0, Epsilon = 0.9910, Threat Detected: No Threat Detected
Episode 3: Total Reward = 137.0, Epsilon = 0.9881, Threat Detected: No Threat Detected
Episode 4: Total Reward = 42.0, Epsilon = 0.9851, Threat Detected: No Threat Detected
Episode 5: Total Reward = 96.0, Epsilon = 0.9821, Threat Detected: No Threat Detected
Episode 6: Total Reward = 54.0, Epsilon = 0.9792, Threat Detected: No Threat Detected
Episode 7: Total Reward = 118.0, Epsilon = 0.9763, Threat Detected: No Threat Detected
Episode 8: Total Reward = 103.0, Epsilon = 0.9733, Threat Detected: No Threat Detected
Episode 9: Total Reward = 97.0, Epsilon = 0.9704, Threat Detected: No Threat Detected
Episode 10: Total Reward = 99.0, Epsilon = 0.9675, Threat Detected: No Threat

KeyboardInterrupt: 

In [None]:
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
import nasim
from collections import deque
from neo4j import GraphDatabase

# Neo4j Configuration
NEO4J_URI = "bolt://0.tcp.in.ngrok.io:11755"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "12345678"

# Connect to Neo4j
class Neo4jConnector:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def fetch_attack_data(self):
        """Fetch attack details dynamically from Neo4j"""
        query = """
        MATCH (cve:CVE)-[:RelatedAttackPattern]->(capec:CAPEC)
        OPTIONAL MATCH (cve)-[:hasConsequence]->(con:Consequence)
        OPTIONAL MATCH (cve)-[:hasMitigation]->(mit:Mitigation)
        RETURN
            cve.Name AS CVE_ID,
            cve.Description AS Description,
            capec.Name AS CAPEC_ID,
            capec.Description AS CAPEC_Description,
            con.Description AS Consequence,
            mit.Description AS Mitigation
        """
        with self.driver.session() as session:
            result = session.run(query)
            return [record.data() for record in result]

# Initialize Neo4j Connection
neo4j_db = Neo4jConnector(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
attack_data = neo4j_db.fetch_attack_data()
num_threats = max(1, len(attack_data))

print(f"Number of Threat Categories: {num_threats}")

# Threat Mapping
def map_threat_type(index):
    if index < len(attack_data):
        return attack_data[index]["CAPEC_Description"]
    return "Unknown Threat"

# Define DQN Model
class DQN(nn.Module):
    def __init__(self, state_dim, action_dim, num_threats):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_dim, 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc3 = nn.Linear(256, 128)
        self.q_value_layer = nn.Linear(128, action_dim)
        self.threat_layer = nn.Linear(128, num_threats)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        q_values = self.q_value_layer(x)
        threat_classification = torch.softmax(self.threat_layer(x), dim=1)
        return q_values, threat_classification

# Experience Replay Buffer
class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done, threat_type):
        self.buffer.append((state, action, reward, next_state, done, threat_type))

    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)

    def size(self):
        return len(self.buffer)

# Hyperparameters
GAMMA = 0.99
ALPHA = 0.001
EPSILON = 1.0
EPSILON_DECAY = 0.997
MIN_EPSILON = 0.01
BATCH_SIZE = 32
MEMORY_SIZE = 10000
TARGET_UPDATE = 10
MAX_EPISODES = 100

# Initialize NASim Environment
env = nasim.make_benchmark('tiny', flat_actions=True, flat_obs=True)
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n

# Initialize Networks
policy_net = DQN(state_dim, action_dim, num_threats)
target_net = DQN(state_dim, action_dim, num_threats)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

optimizer = optim.Adam(policy_net.parameters(), lr=ALPHA)
memory = ReplayBuffer(MEMORY_SIZE)

# Action Selection
def select_action(state, epsilon):
    if random.random() < epsilon:
        return random.randint(0, env.action_space.n - 1)
    state_tensor = torch.FloatTensor(state).unsqueeze(0)
    with torch.no_grad():
        return policy_net(state_tensor)[0].argmax().item()

# Train DQN Model
def train_dqn():
    if memory.size() < BATCH_SIZE:
        return
    batch = memory.sample(BATCH_SIZE)
    states, actions, rewards, next_states, dones, threat_types = zip(*batch)

    states = torch.FloatTensor(states)
    actions = torch.LongTensor(actions).unsqueeze(1)
    rewards = torch.FloatTensor(rewards).unsqueeze(1)
    next_states = torch.FloatTensor(next_states)
    dones = torch.FloatTensor([float(d) for d in dones]).unsqueeze(1)

    threat_types = torch.clamp(torch.LongTensor(threat_types), 0, num_threats - 1)

    q_values, threat_preds = policy_net(states)
    q_values = q_values.gather(1, actions)
    next_q_values, _ = target_net(next_states)
    target_q_values = rewards + GAMMA * next_q_values.max(1, keepdim=True)[0] * (1 - dones)

    q_loss = nn.MSELoss()(q_values, target_q_values.detach())
    threat_loss = nn.CrossEntropyLoss()(threat_preds, threat_types)
    loss = q_loss + threat_loss

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# Training Loop
epsilon = EPSILON
reward_history = []

for episode in range(MAX_EPISODES):
    state, _ = env.reset()
    state = np.zeros(state_dim) if state is None or len(state) == 0 else state
    done = False
    total_reward = 0

    while not done:
        action = select_action(state, epsilon)
        next_state, reward, done, truncated, info = env.step(action)
        next_state = np.zeros(state_dim) if next_state is None or len(next_state) == 0 else next_state

        # Predict Threat Type
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        _, threat_prediction = policy_net(state_tensor)

        if threat_prediction.numel() == 0 or threat_prediction.shape[1] != num_threats:
            print("Invalid Threat Prediction! Defaulting to 0")
            threat_type = 0
        else:
            threat_type = torch.argmax(threat_prediction, dim=1).item()

        threat_type = max(0, min(threat_type, num_threats - 1))
        memory.push(state, action, reward, next_state, done, threat_type)
        state = next_state
        train_dqn()
        total_reward += reward

    reward_history.append(total_reward)

    # Update Target Network
    if episode % TARGET_UPDATE == 0:
        target_net.load_state_dict(policy_net.state_dict())

    epsilon = max(MIN_EPSILON, epsilon * EPSILON_DECAY)
    print(f"Episode {episode}: Total Reward = {total_reward}, Epsilon = {epsilon:.4f}, Threat Detected: {map_threat_type(threat_type)}")

# Save Model
torch.save(policy_net.state_dict(), "dqn_nasim_model.pth")
neo4j_db.close()
print("Training complete.")


Number of Threat Categories: 1
Episode 0: Total Reward = 112.0, Epsilon = 0.9970, Threat Detected: Unknown Threat
Episode 1: Total Reward = 147.0, Epsilon = 0.9940, Threat Detected: Unknown Threat
Episode 2: Total Reward = 77.0, Epsilon = 0.9910, Threat Detected: Unknown Threat
Episode 3: Total Reward = 86.0, Epsilon = 0.9881, Threat Detected: Unknown Threat
Episode 4: Total Reward = 61.0, Epsilon = 0.9851, Threat Detected: Unknown Threat
Episode 5: Total Reward = 47.0, Epsilon = 0.9821, Threat Detected: Unknown Threat
Episode 6: Total Reward = 104.0, Epsilon = 0.9792, Threat Detected: Unknown Threat
Episode 7: Total Reward = -18.0, Epsilon = 0.9763, Threat Detected: Unknown Threat
Episode 8: Total Reward = 96.0, Epsilon = 0.9733, Threat Detected: Unknown Threat
Episode 9: Total Reward = 97.0, Epsilon = 0.9704, Threat Detected: Unknown Threat
Episode 10: Total Reward = 26.0, Epsilon = 0.9675, Threat Detected: Unknown Threat
Episode 11: Total Reward = 113.0, Epsilon = 0.9646, Threat Det

In [None]:
attack_data

[]

In [None]:
# THIS IS THE FIXED DQN + NEO4J + NASIM MODEL

import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
import nasim
import json
from collections import deque
from neo4j import GraphDatabase

# Neo4j Configuration
NEO4J_URI = "bolt://0.tcp.in.ngrok.io:11755"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "12345678"

# Connect to Neo4j
class Neo4jConnector:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def fetch_attack_data(self):
        """Fetch attack details dynamically from Neo4j"""
        query = """
        MATCH (cve:CVE)-[:RelatedAttackPattern]->(capec:CAPEC)
        OPTIONAL MATCH (cve)-[:hasConsequence]->(con:Consequence)
        OPTIONAL MATCH (cve)-[:hasMitigation]->(mit:Mitigation)
        RETURN cve.Name AS CVE_ID, cve.Description AS Description, capec.Name AS CAPEC_ID, capec.Description AS CAPEC_Description,
               con.Description AS Consequence, mit.Description AS Mitigation
        """
        with self.driver.session() as session:
            result = session.run(query)
            return [record.data() for record in result]

# Initialize Neo4j Connection
neo4j_db = Neo4jConnector(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)

# Fetch attack data
attack_data = neo4j_db.fetch_attack_data()
num_threats = max(1, len(attack_data))  # Ensure at least 1 threat class

print(f"Number of Threat Categories: {num_threats}")

# Define DQN Model
class DQN(nn.Module):
    def __init__(self, state_dim, action_dim, num_threats):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_dim, 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc3 = nn.Linear(256, 128)
        self.q_value_layer = nn.Linear(128, action_dim)
        self.threat_layer = nn.Linear(128, num_threats)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        q_values = self.q_value_layer(x)
        threat_classification = torch.softmax(self.threat_layer(x), dim=1)
        return q_values, threat_classification

# Experience Replay Buffer
class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done, threat_type):
        self.buffer.append((state, action, reward, next_state, done, threat_type))

    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)

    def size(self):
        return len(self.buffer)

# Hyperparameters
GAMMA = 0.99
ALPHA = 0.001
EPSILON = 1.0
EPSILON_DECAY = 0.997
MIN_EPSILON = 0.01
BATCH_SIZE = 32
MEMORY_SIZE = 10000
TARGET_UPDATE = 10
MAX_EPISODES = 100

# Initialize NASim Environment
env = nasim.make_benchmark('tiny', flat_actions=True, flat_obs=True)
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n

# Initialize Networks
policy_net = DQN(state_dim, action_dim, num_threats)
target_net = DQN(state_dim, action_dim, num_threats)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

optimizer = optim.Adam(policy_net.parameters(), lr=ALPHA)
memory = ReplayBuffer(MEMORY_SIZE)

# Action Selection
def select_action(state, epsilon):
    if random.random() < epsilon:
        return random.randint(0, env.action_space.n - 1)
    state_tensor = torch.FloatTensor(state).unsqueeze(0)
    with torch.no_grad():
        return policy_net(state_tensor)[0].argmax().item()

# Train DQN Model
def train_dqn():
    if memory.size() < BATCH_SIZE:
        return
    batch = memory.sample(BATCH_SIZE)
    states, actions, rewards, next_states, dones, threat_types = zip(*batch)

    states = torch.FloatTensor(states)
    actions = torch.LongTensor(actions).unsqueeze(1)
    rewards = torch.FloatTensor(rewards).unsqueeze(1)
    next_states = torch.FloatTensor(next_states)
    dones = torch.FloatTensor([float(d) for d in dones]).unsqueeze(1)

    # Fix: Ensure threat_types are within range
    threat_types = torch.clamp(torch.LongTensor(threat_types), 0, num_threats - 1)

    q_values, threat_preds = policy_net(states)
    q_values = q_values.gather(1, actions)
    next_q_values, _ = target_net(next_states)
    target_q_values = rewards + GAMMA * next_q_values.max(1, keepdim=True)[0] * (1 - dones)

    q_loss = nn.MSELoss()(q_values, target_q_values.detach())
    threat_loss = nn.CrossEntropyLoss()(threat_preds, threat_types)
    loss = q_loss + threat_loss

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# Training Loop
epsilon = EPSILON
reward_history = []

for episode in range(MAX_EPISODES):
    state, _ = env.reset()
    state = np.zeros(state_dim) if state is None or len(state) == 0 else state
    done = False
    total_reward = 0

    while not done:
        action = select_action(state, epsilon)
        next_state, reward, done, truncated, info = env.step(action)
        next_state = np.zeros(state_dim) if next_state is None or len(next_state) == 0 else next_state

        # Predict Threat Type
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        _, threat_prediction = policy_net(state_tensor)

        if threat_prediction.numel() == 0 or threat_prediction.shape[1] != num_threats:
            print("Invalid Threat Prediction! Defaulting to 0")
            threat_type = 0
        else:
            threat_type = torch.argmax(threat_prediction, dim=1).item()

        # Ensure threat_type is within valid range
        threat_type = max(0, min(threat_type, num_threats - 1))

        # Store experience
        memory.push(state, action, reward, next_state, done, threat_type)
        state = next_state
        train_dqn()
        total_reward += reward

    reward_history.append(total_reward)

    # Update Target Network
    if episode % TARGET_UPDATE == 0:
        target_net.load_state_dict(policy_net.state_dict())

    epsilon = max(MIN_EPSILON, epsilon * EPSILON_DECAY)
    print(f"Episode {episode}: Total Reward = {total_reward}, Epsilon = {epsilon:.4f}")

# Save Model
torch.save(policy_net.state_dict(), "dqn_nasim_model.pth")
neo4j_db.close()
print("Training complete.")




Number of Threat Categories: 1


  states = torch.FloatTensor(states)


Episode 0: Total Reward = 30.0, Epsilon = 0.9970
Episode 1: Total Reward = 96.0, Epsilon = 0.9940
Episode 2: Total Reward = 2.0, Epsilon = 0.9910
Episode 3: Total Reward = 61.0, Epsilon = 0.9881
Episode 4: Total Reward = 121.0, Epsilon = 0.9851
Episode 5: Total Reward = 131.0, Epsilon = 0.9821
Episode 6: Total Reward = 122.0, Epsilon = 0.9792
Episode 7: Total Reward = 101.0, Epsilon = 0.9763
Episode 8: Total Reward = 64.0, Epsilon = 0.9733
Episode 9: Total Reward = 35.0, Epsilon = 0.9704
Episode 10: Total Reward = 141.0, Epsilon = 0.9675
Episode 11: Total Reward = 138.0, Epsilon = 0.9646
Episode 12: Total Reward = 65.0, Epsilon = 0.9617
Episode 13: Total Reward = 79.0, Epsilon = 0.9588
Episode 14: Total Reward = 155.0, Epsilon = 0.9559
Episode 15: Total Reward = 137.0, Epsilon = 0.9531
Episode 16: Total Reward = 126.0, Epsilon = 0.9502
Episode 17: Total Reward = 101.0, Epsilon = 0.9474
Episode 18: Total Reward = 154.0, Epsilon = 0.9445
Episode 19: Total Reward = 122.0, Epsilon = 0.9417

In [None]:
from neo4j import GraphDatabase

uri = "bolt://0.tcp.in.ngrok.io:11755"  # Change to your Neo4j instance
user = "neo4j"
password = "12345678"

try:
    driver = GraphDatabase.driver(uri, auth=(user, password))
    with driver.session() as session:
        result = session.run("MATCH (n) RETURN n LIMIT 5")  # Fetch some data
        for record in result:
            print(record)
    print("Neo4j connection successful!")
except Exception as e:
    print("Neo4j connection failed:", e)



attack_type = "SQL Injection"  # Replace with detected attack type from RL model
query = f"MATCH (a:Attack {{type: '{attack_type}'}}) RETURN a.description"
with driver.session() as session:
    result = session.run(query)
    for record in result:
        print("Attack Description:", record["a.description"])


<Record n=<Node element_id='0' labels=frozenset({'GeneralInfo_CVE'}) properties={'Data_Format': 'MITRE', 'Data_Type': 'CVE', 'No_CVEs': '10861', 'Data_Version': '4.0', 'Timestamp': '2021-09-15T07:00Z'}>>
<Record n=<Node element_id='1' labels=frozenset({'CVE'}) properties={'Assigner': 'secure@intel.com', 'Description': ['Observable timing discrepancy in Intel(R) IPP before version 2020 update 1 may allow authorized user to potentially enable information disclosure via local access.'], 'Published_Date': '2021-06-09T20:15Z', 'Last_Modified_Date': '2021-06-28T18:03Z', 'Name': 'CVE-2021-0001'}>>
<Record n=<Node element_id='2' labels=frozenset({'CVE'}) properties={'Assigner': 'secure@intel.com', 'Description': ['Improper conditions check in some Intel(R) Ethernet Controllers 800 series Linux drivers before version 1.4.11 may allow an authenticated user to potentially enable information disclosure or denial of service via local access.'], 'Published_Date': '2021-08-11T13:15Z', 'Last_Modified_

In [None]:
# THIS IS THE REFINED DQN MODEL CONNECTING NEO4J + NASIM

import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
import nasim
import json
from collections import deque
from neo4j import GraphDatabase

# Neo4j Configuration (Modify with actual credentials)
NEO4J_URI = "bolt://0.tcp.in.ngrok.io:11755"  # Replace with your ngrok forwarding address
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "12345678"

# Connect to Neo4j
class Neo4jConnector:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def fetch_attack_data(self, attack_name=None):
        """Fetch attack details dynamically from Neo4j"""
        query = """
        MATCH (cve:CVE)-[:RelatedAttackPattern]->(capec:CAPEC)
        OPTIONAL MATCH (cve)-[:hasConsequence]->(con:Consequence)
        OPTIONAL MATCH (cve)-[:hasMitigation]->(mit:Mitigation)
        RETURN cve.Name AS CVE_ID, cve.Description AS Description,
               capec.Name AS CAPEC_ID, capec.Description AS CAPEC_Description,
               con.Description AS Consequence, mit.Description AS Mitigation
        """
        with self.driver.session() as session:
            result = session.run(query)
            return [record.data() for record in result]

# Initialize Neo4j Connection
neo4j_db = Neo4jConnector(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)

# Fetch attack data dynamically
attack_data = neo4j_db.fetch_attack_data()

# Define DQN Model with Threat Classification
class DQN(nn.Module):
    def __init__(self, state_dim, action_dim, num_threats):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_dim, 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc3 = nn.Linear(256, 128)
        self.q_value_layer = nn.Linear(128, action_dim)  # Q-values for actions
        self.threat_layer = nn.Linear(128, num_threats)  # Threat classification

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        q_values = self.q_value_layer(x)
        threat_classification = torch.softmax(self.threat_layer(x), dim=1)
        return q_values, threat_classification

# Experience Replay Buffer
class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done, threat_type):
        self.buffer.append((state, action, reward, next_state, done, threat_type))

    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)

    def size(self):
        return len(self.buffer)

# Hyperparameters
GAMMA = 0.99
ALPHA = 0.001
EPSILON = 1.0
EPSILON_DECAY = 0.997
MIN_EPSILON = 0.01
BATCH_SIZE = 32
MEMORY_SIZE = 10000
TARGET_UPDATE = 10
MAX_EPISODES = 100

# Initialize NASim Environment
env = nasim.make_benchmark('tiny', flat_actions=True, flat_obs=True)
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n
num_threats = len(attack_data)  # Dynamically set based on attack data

# Initialize Networks
policy_net = DQN(state_dim, action_dim, num_threats)
target_net = DQN(state_dim, action_dim, num_threats)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

optimizer = optim.Adam(policy_net.parameters(), lr=ALPHA)
memory = ReplayBuffer(MEMORY_SIZE)

# Action Selection with Epsilon-Greedy Strategy
def select_action(state, epsilon):
    if random.random() < epsilon:
        return random.randint(0, env.action_space.n - 1)
    else:
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        with torch.no_grad():
            return policy_net(state_tensor)[0].argmax().item()

# Train DQN Model
def train_dqn():
    if memory.size() < BATCH_SIZE:
        return
    batch = memory.sample(BATCH_SIZE)
    states, actions, rewards, next_states, dones, threat_types = zip(*batch)

    states = torch.FloatTensor(states)
    actions = torch.LongTensor(actions).unsqueeze(1)
    rewards = torch.FloatTensor(rewards).unsqueeze(1)
    next_states = torch.FloatTensor(next_states)
    dones = torch.FloatTensor(dones).unsqueeze(1)
    threat_types = torch.LongTensor(threat_types)

    q_values, threat_preds = policy_net(states)
    q_values = q_values.gather(1, actions)
    next_q_values, _ = target_net(next_states)
    target_q_values = rewards + GAMMA * next_q_values.max(1, keepdim=True)[0] * (1 - dones)

    q_loss = nn.MSELoss()(q_values, target_q_values.detach())
    threat_loss = nn.CrossEntropyLoss()(threat_preds, threat_types)
    loss = q_loss + threat_loss

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# Training Loop with Neo4j Integration
epsilon = EPSILON
reward_history = []

for episode in range(MAX_EPISODES):
    state, _ = env.reset()
    state = np.zeros(state_dim) if state is None or len(state) == 0 else state

    done = False
    total_reward = 0

    while not done:
        action = select_action(state, epsilon)
        next_state, reward, done, truncated, info = env.step(action)
        next_state = np.zeros(state_dim) if next_state is None or len(next_state) == 0 else next_state

        # Predict Threat Type
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        _, threat_prediction = policy_net(state_tensor)
        threat_type = torch.argmax(threat_prediction, dim=1).item()

        # Fetch CVE Details from Neo4j
        if 0 <= threat_type < len(attack_data):
            detected_threat = attack_data[threat_type]
            print(f"Detected Threat: {detected_threat}")
        else:
            print(f"Warning: Threat type {threat_type} out of range.")

        # Adjust Reward Based on CVSS
        reward += 10 if "successful_attack" in info else 0
        reward -= 5 if "false_positive" in info else 0
        reward -= 15 if "missed_attack" in info else 0

        # Store experience
        memory.push(state, action, reward, next_state, done, threat_type)
        state = next_state
        train_dqn()
        total_reward += reward

    reward_history.append(total_reward)

    # Update Target Network
    if episode % TARGET_UPDATE == 0:
        target_net.load_state_dict(policy_net.state_dict())

    # Decay epsilon
    epsilon = max(MIN_EPSILON, epsilon * EPSILON_DECAY)

    print(f"Episode {episode}: Total Reward = {total_reward}, Epsilon = {epsilon:.4f}")

    # Early Stopping
    if episode > 50 and np.std(reward_history[-50:]) < 1.0:
        print(f"Stopping early at Episode {episode} as rewards have stabilized.")
        break

# Save Model
torch.save(policy_net.state_dict(), "dqn_nasim_model.pth")
neo4j_db.close()
print("Training complete. Model and logs saved.")


IndexError: argmax(): Expected reduction dim 1 to have non-zero size.

In [None]:
from neo4j import GraphDatabase

# Neo4j Connection Details
NEO4J_URI = "bolt://0.tcp.in.ngrok.io:11755"  # Update this if ngrok changes the port
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "12345678"

# Connect to Neo4j
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

# Function to Get CVE Details and Format Output
def get_cve_details(cve_id):
    query = f"MATCH (c:CVE {{Name: '{cve_id}'}}) RETURN c"
    with driver.session() as session:
        result = session.run(query)
        cve_list = []
        for record in result:
            node = record["c"]
            cve_info = {
                "CVE_ID": node["Name"],
                "Assigner": node["Assigner"],
                "Description": node["Description"][0],  # Extracting first description
                "Published Date": node["Published_Date"],
                "Last Modified Date": node["Last_Modified_Date"]
            }
            cve_list.append(cve_info)
        return cve_list if cve_list else "No CVE found."

# Example Usage:
cve_data = get_cve_details("CVE-2021-0009")
print(cve_data)  # Now returns formatted JSON

# Close the driver when done
driver.close()


[{'CVE_ID': 'CVE-2021-0009', 'Assigner': 'secure@intel.com', 'Description': 'Out-of-bounds read in the firmware for Intel(R) Ethernet Adapters 800 Series Controllers and associated adapters before version 1.5.3.0 may allow an unauthenticated user to potentially enable denial of service via adjacent access.', 'Published Date': '2021-08-11T13:15Z', 'Last Modified Date': '2021-09-14T18:34Z'}]


Collecting nasim
  Downloading nasim-0.12.0-py3-none-any.whl.metadata (8.6 kB)
Collecting gymnasium>=0.26 (from nasim)
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium>=0.26->nasim)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading nasim-0.12.0-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.2/78.2 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading gymnasium-1.0.0-py3-none-any.whl (958 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m958.1/958.1 kB[0m [31m50.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium, nasim
Successfully installed farama-notifications-0.0.4 gymnasium-1.0.0 nasim-0.12.0


In [None]:
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
import nasim
from collections import deque
import json

# Define DQN Model with Threat Classification
class DQN(nn.Module):
    def __init__(self, state_dim, action_dim, num_threats):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_dim, 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc3 = nn.Linear(256, 128)
        self.q_value_layer = nn.Linear(128, action_dim)  # Q-values for actions
        self.threat_layer = nn.Linear(128, num_threats)  # Threat classification

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        q_values = self.q_value_layer(x)
        threat_classification = torch.softmax(self.threat_layer(x), dim=1)
        return q_values, threat_classification

# Experience Replay Buffer
class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done, threat_type):
        self.buffer.append((state, action, reward, next_state, done, threat_type))

    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)

    def size(self):
        return len(self.buffer)

# Hyperparameters
GAMMA = 0.99
ALPHA = 0.001
EPSILON = 1.0
EPSILON_DECAY = 0.997  # Faster decay
MIN_EPSILON = 0.01
BATCH_SIZE = 32
MEMORY_SIZE = 10000
TARGET_UPDATE = 10
MAX_EPISODES = 1000  # Increased episodes

# Initialize NASim Environment
env = nasim.make_benchmark('tiny', flat_actions=True, flat_obs=True)
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n
num_threats = 5  # Number of attack types to classify

# Initialize Networks
policy_net = DQN(state_dim, action_dim, num_threats)
target_net = DQN(state_dim, action_dim, num_threats)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

optimizer = optim.Adam(policy_net.parameters(), lr=ALPHA)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)  # Reduce LR every 50 episodes
memory = ReplayBuffer(MEMORY_SIZE)

# Action Selection with Epsilon-Greedy Strategy
def select_action(state, epsilon):
    if random.random() < epsilon:
        action = random.randint(0, env.action_space.n - 1)  # Ensure valid integer action
    else:
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        with torch.no_grad():
            action = policy_net(state_tensor)[0].argmax().item()  # Get Q-values and select best action
    return int(action)  # Ensure integer action for NASim

# Train DQN Model
def train_dqn():
    if memory.size() < BATCH_SIZE:
        return
    batch = memory.sample(BATCH_SIZE)
    states, actions, rewards, next_states, dones, threat_types = zip(*batch)

    states = torch.FloatTensor(states)
    actions = torch.LongTensor(actions).unsqueeze(1)
    rewards = torch.FloatTensor(rewards).unsqueeze(1)
    next_states = torch.FloatTensor(next_states)
    dones = torch.FloatTensor([float(d) for d in dones]).unsqueeze(1)  # Convert to float
    threat_types = torch.LongTensor(threat_types)

    q_values, threat_preds = policy_net(states)
    q_values = q_values.gather(1, actions)
    next_q_values, _ = target_net(next_states)
    target_q_values = rewards + GAMMA * next_q_values.max(1, keepdim=True)[0] * (1 - dones)

    # Compute loss
    q_loss = nn.MSELoss()(q_values, target_q_values.detach())
    threat_loss = nn.CrossEntropyLoss()(threat_preds, threat_types)
    loss = q_loss + threat_loss

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# Training Loop with Reward Shaping
epsilon = EPSILON
log_data = []
reward_history = []

for episode in range(MAX_EPISODES):
    state, _ = env.reset()
    if state is None or len(state) == 0:
        state = np.zeros(state_dim)  # Ensure valid state

    done = False
    previous_action = None  # Track previous action for repeated action penalty
    episode_log = []
    total_reward = 0

    while not done:
        action = select_action(state, epsilon)
        next_state, reward, done, truncated, info = env.step(action)

        if next_state is None or len(next_state) == 0:
            next_state = np.zeros(state_dim)  # Ensure valid next state

        # Predict the threat type correctly
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        _, threat_prediction = policy_net(state_tensor)
        threat_type = torch.argmax(threat_prediction, dim=1).item()

        # Reward Shaping Implementation
        if "successful_attack" in info:
            reward += 10  # Reward for detecting threats correctly
        if "false_positive" in info:
            reward -= 5  # Penalize false positives
        if "missed_attack" in info:
            reward -= 15  # Penalize missing an actual attack
        if action == previous_action:
            reward -= 2  # Penalize repeated actions

        previous_action = action  # Update previous action

        # Store experience
        memory.push(state, action, reward, next_state, done, threat_type)
        state = next_state
        train_dqn()
        total_reward += reward

        # Log details
        episode_log.append({
            "state": state.tolist(),
            "action": action,
            "next_state": next_state.tolist(),
            "reward": reward,
            "threat_detected": threat_type
        })

    log_data.append(episode_log)
    reward_history.append(total_reward)

    # Update Target Network
    if episode % TARGET_UPDATE == 0:
        target_net.load_state_dict(policy_net.state_dict())

    # Decay epsilon
    epsilon = max(MIN_EPSILON, epsilon * EPSILON_DECAY)

    # Adjust Learning Rate
    scheduler.step()

    print(f"Episode {episode}: Total Reward = {total_reward}, Epsilon = {epsilon:.4f}, LR = {scheduler.get_last_lr()[0]:.6f}")

    # **Early Stopping Check**: If rewards stabilize over 50 episodes, stop training
    if episode > 50 and np.std(reward_history[-50:]) < 1.0:  # Check variance of last 50 episodes
        print(f"Stopping early at Episode {episode} as reward has stabilized.")
        break

# Save Logs and Model
with open("attack_logs.json", "w") as f:
    json.dump(log_data, f)

torch.save(policy_net.state_dict(), "dqn_nasim_model.pth")
print("Training complete. Model and logs saved.")


  states = torch.FloatTensor(states)


Episode 0: Total Reward = 66.0, Epsilon = 0.9970, LR = 0.001000
Episode 1: Total Reward = 156.0, Epsilon = 0.9940, LR = 0.001000
Episode 2: Total Reward = 10.0, Epsilon = 0.9910, LR = 0.001000
Episode 3: Total Reward = 160.0, Epsilon = 0.9881, LR = 0.001000
Episode 4: Total Reward = 61.0, Epsilon = 0.9851, LR = 0.001000
Episode 5: Total Reward = 119.0, Epsilon = 0.9821, LR = 0.001000
Episode 6: Total Reward = 140.0, Epsilon = 0.9792, LR = 0.001000
Episode 7: Total Reward = 68.0, Epsilon = 0.9763, LR = 0.001000
Episode 8: Total Reward = 35.0, Epsilon = 0.9733, LR = 0.001000
Episode 9: Total Reward = 97.0, Epsilon = 0.9704, LR = 0.001000
Episode 10: Total Reward = 119.0, Epsilon = 0.9675, LR = 0.001000
Episode 11: Total Reward = 82.0, Epsilon = 0.9646, LR = 0.001000


KeyboardInterrupt: 