In [10]:
import os
import sys
import pandas as pd
import torch
import numpy as np
import networkx as nx
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from torch_geometric.data import Data
from torch_geometric.nn import NNConv
import torch.nn as nn
import torch.nn.functional as F

In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cpu


In [12]:
CSV_PATH = '../datasets/data/NF-ToN-IoT.csv'
df = pd.read_csv(CSV_PATH)
# df = df.sample(frac=0.30, random_state=42)

In [13]:
label_encoder = LabelEncoder()
df["label"] = label_encoder.fit_transform(df["Label"])

In [14]:
df["src_node"] = df["IPV4_SRC_ADDR"].astype(str) + ":" + df["L4_SRC_PORT"].astype(str)
df["dst_node"] = df["IPV4_DST_ADDR"].astype(str) + ":" + df["L4_DST_PORT"].astype(str)

In [15]:
# Randomize IPs
unique_src_ips = df["IPV4_SRC_ADDR"].unique()
new_ips = np.random.permutation(unique_src_ips)
ip_map = dict(zip(unique_src_ips, new_ips))
df["IPV4_SRC_ADDR"] = df["IPV4_SRC_ADDR"].map(ip_map)
df["IPV4_DST_ADDR"] = df["IPV4_DST_ADDR"].map(lambda x: ip_map.get(x, x))
df["src_node"] = df["IPV4_SRC_ADDR"].astype(str) + ":" + df["L4_SRC_PORT"].astype(str)
df["dst_node"] = df["IPV4_DST_ADDR"].astype(str) + ":" + df["L4_DST_PORT"].astype(str)

In [16]:
all_nodes = pd.Index(df["src_node"].tolist() + df["dst_node"].tolist()).unique()
ip_to_idx = {node: idx for idx, node in enumerate(all_nodes)}
num_nodes = len(all_nodes)

In [17]:
pkt_pair, byte_pair, flow_pair = {}, {}, {}
node_total_flows = {node: 0 for node in all_nodes}
node_attack_flows = {node: 0 for node in all_nodes}
edge_list, edge_attr_list = [], []

In [18]:
edge_list = []
edge_attr_list = []
pkt_pair = {}
byte_pair = {}
flow_pair = {}
node_total_flows = {node: 0 for node in all_nodes}
node_attack_flows = {node: 0 for node in all_nodes}

In [19]:
for row in df.itertuples(index=False):
    src = row.src_node
    dst = row.dst_node
    src_idx = ip_to_idx[src]
    dst_idx = ip_to_idx[dst]
    in_pkts = row.IN_PKTS
    out_pkts = row.OUT_PKTS
    in_bytes = row.IN_BYTES
    out_bytes = row.OUT_BYTES
    duration = row.FLOW_DURATION_MILLISECONDS
    flags = row.TCP_FLAGS
    l7 = row.L7_PROTO
    protocol = row.PROTOCOL
    total_pkts = in_pkts + out_pkts
    total_bytes = in_bytes + out_bytes

    key = (src_idx, dst_idx)
    pkt_pair[key] = pkt_pair.get(key, 0) + total_pkts
    byte_pair[key] = byte_pair.get(key, 0) + total_bytes
    flow_pair[key] = flow_pair.get(key, 0) + 1

    node_total_flows[src] += 1
    node_total_flows[dst] += 1
    if row.label != 0:
        node_attack_flows[src] += 1
        node_attack_flows[dst] += 1

    edge_list.append((src_idx, dst_idx))
    edge_attr_list.append([in_bytes, out_bytes, in_pkts, out_pkts, duration, flags, l7, protocol])

In [20]:
filtered_edges, filtered_attrs = [], []
for i, (src_idx, dst_idx) in enumerate(edge_list):
    if pkt_pair.get((src_idx, dst_idx), 0) >= 50:
        filtered_edges.append([src_idx, dst_idx])
        filtered_attrs.append(edge_attr_list[i])

if not filtered_edges:
    raise ValueError("No edges meet the packet threshold.")

In [21]:
edge_index = torch.tensor(filtered_edges, dtype=torch.long).t().contiguous().to(device)

In [22]:
scaler = StandardScaler()
edge_attr_np = scaler.fit_transform(filtered_attrs)
edge_attr = torch.tensor(edge_attr_np, dtype=torch.float).to(device)

In [23]:
G = nx.Graph()
G.add_edges_from(edge_index.t().cpu().tolist())

In [24]:
degree_dict = dict(G.degree())
clustering_dict = nx.clustering(G)
degree_arr = np.array([degree_dict.get(i, 0) for i in range(num_nodes)], dtype=float)
clustering_arr = np.array([clustering_dict.get(i, 0.0) for i in range(num_nodes)], dtype=float)

In [25]:
total_pkts_arr = np.zeros(num_nodes)
total_bytes_arr = np.zeros(num_nodes)
flow_count_arr = np.zeros(num_nodes)
attack_fraction_arr = np.zeros(num_nodes)

In [26]:
for (u, v), val in pkt_pair.items():
    total_pkts_arr[u] += val
    total_pkts_arr[v] += val
for (u, v), val in byte_pair.items():
    total_bytes_arr[u] += val
    total_bytes_arr[v] += val

for i, node in enumerate(all_nodes):
    total = node_total_flows[node]
    attacks = node_attack_flows[node]
    flow_count_arr[i] = total
    attack_fraction_arr[i] = attacks / total if total > 0 else 0.0

In [27]:
node_stats = np.vstack([
    degree_arr,
    total_pkts_arr,
    total_bytes_arr,
    flow_count_arr,
    attack_fraction_arr,
    clustering_arr
]).T

In [28]:
x = torch.tensor(StandardScaler().fit_transform(node_stats), dtype=torch.float).to(device)

In [29]:
node_labels = np.array([
    1 if node_total_flows[node] > 0 and (node_attack_flows[node] / node_total_flows[node]) >= 0.3 else 0
    for node in all_nodes
], dtype=int)

In [30]:
y = torch.tensor(node_labels, dtype=torch.long).to(device)
data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)

In [None]:
idx = np.arange(data.num_nodes)
train_idx, test_idx = train_test_split(idx, test_size=0.3, stratify=data.y.cpu())
train_idx, val_idx = train_test_split(train_idx, test_size=0.15/0.7, stratify=data.y[train_idx].cpu())

In [None]:
train_mask = torch.zeros(data.num_nodes, dtype=torch.bool).to(device)
val_mask = torch.zeros(data.num_nodes, dtype=torch.bool).to(device)
test_mask = torch.zeros(data.num_nodes, dtype=torch.bool).to(device)
train_mask[train_idx] = True
val_mask[val_idx] = True
test_mask[test_idx] = True

In [None]:
data.train_mask = train_mask
data.val_mask = val_mask
data.test_mask = test_mask

In [None]:
class EdgeEnhancedGCN(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, edge_dim):
        super().__init__()
        self.edge_mlp1 = nn.Sequential(
            nn.Linear(edge_dim, hidden_channels * in_channels),
            nn.ReLU(),
            nn.Linear(hidden_channels * in_channels, in_channels * hidden_channels)
        )
        self.conv1 = NNConv(in_channels, hidden_channels, self.edge_mlp1, aggr='mean')

        self.edge_mlp2 = nn.Sequential(
            nn.Linear(edge_dim, hidden_channels * hidden_channels),
            nn.ReLU(),
            nn.Linear(hidden_channels * hidden_channels, hidden_channels * out_channels)
        )
        self.conv2 = NNConv(hidden_channels, out_channels, self.edge_mlp2, aggr='mean')

        self.dropout = nn.Dropout(0.3)

    def forward(self, data):
        x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
        x = F.relu(self.conv1(x, edge_index, edge_attr))
        x = self.dropout(x)
        x = self.conv2(x, edge_index, edge_attr)
        x = torch.clamp(x, -1e6, 1e6)
        return x

In [None]:
in_dim = data.x.shape[1]
hidden_dim = 32
out_dim = 2
edge_dim = data.edge_attr.shape[1]

In [None]:
model = EdgeEnhancedGCN(in_dim, hidden_dim, out_dim, edge_dim).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

class_weights = torch.tensor([
    1.0,
    (train_mask.sum().item() / max(1, (node_labels[train_mask.cpu()] == 1).sum()))
], dtype=torch.float).to(device)

loss_fn = nn.CrossEntropyLoss(weight=class_weights)

In [None]:
best_val_f1 = 0.0
epochs_without_improve = 0
patience = 15

In [None]:
for epoch in range(200):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = loss_fn(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

    model.eval()
    with torch.no_grad():
        logits = model(data)
        val_preds = logits[data.val_mask].argmax(dim=1).cpu().numpy()
        val_true = data.y[data.val_mask].cpu().numpy()
        val_f1 = f1_score(val_true, val_preds, average="weighted")

    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(model.state_dict(), "best_model.pt")
        epochs_without_improve = 0
    else:
        epochs_without_improve += 1
        if epochs_without_improve >= patience:
            break

    print(f"Epoch {epoch}, Loss: {loss.item():.4f}, Val F1: {val_f1:.4f}")

In [None]:
from sklearn.metrics import f1_score, precision_score, recall_score

model.load_state_dict(torch.load("best_model.pt"))
model.eval()
with torch.no_grad():
    logits = model(data)
    test_preds = logits[data.test_mask].argmax(dim=1).cpu().numpy()
    test_true = data.y[data.test_mask].cpu().numpy()

    test_f1 = f1_score(test_true, test_preds, average="weighted")
    test_precision = precision_score(test_true, test_preds, average="weighted", zero_division=0)
    test_recall = recall_score(test_true, test_preds, average="weighted", zero_division=0)
    acc = (test_preds == test_true).sum() / len(test_true)

print(f"Test Accuracy: {acc:.4f}")
print(f"Test Weighted F1: {test_f1:.4f}")
print(f"Test Weighted Precision: {test_precision:.4f}")
print(f"Test Weighted Recall: {test_recall:.4f}")

```Output base model: 
Using device: cuda
Epoch 0, Loss: 0.7441, Val F1: 0.5357
Epoch 1, Loss: 0.6976, Val F1: 0.9971
Epoch 2, Loss: 0.6508, Val F1: 0.9874
Epoch 3, Loss: 0.7190, Val F1: 0.9871
Epoch 4, Loss: 0.6067, Val F1: 0.9868
Epoch 5, Loss: 0.5903, Val F1: 0.9868
Epoch 6, Loss: 0.5644, Val F1: 0.9863
Epoch 7, Loss: 0.5383, Val F1: 0.9862
Epoch 8, Loss: 0.5324, Val F1: 0.9860
Epoch 9, Loss: 0.5119, Val F1: 0.9860
Epoch 10, Loss: 0.4807, Val F1: 0.9860
Epoch 11, Loss: 0.4679, Val F1: 0.9863
Epoch 12, Loss: 0.4515, Val F1: 0.9863
Epoch 13, Loss: 0.4357, Val F1: 0.9863
Epoch 14, Loss: 0.4178, Val F1: 0.9863
Epoch 15, Loss: 0.3886, Val F1: 0.9863
Test Accuracy: 0.9976, Test Weighted F1: 0.9976