In [348]:
import random
from typing import Literal, NamedTuple

from TabM import Model, make_parameter_groups
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from datetime import datetime
import time
import cpuinfo
import torch
import psutil
import random
import torch.nn as nn
import torch.optim as optim
import platform
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, average_precision_score, accuracy_score, roc_auc_score, precision_recall_curve, auc, f1_score, recall_score, precision_score, classification_report

In [349]:
SEED = 1234
random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
np.random.seed(SEED)

def get_memory_usage():
    process = psutil.Process(os.getpid())
    mem = process.memory_info().rss / (1024 ** 2)
    return mem

In [350]:
def test(model, test_loader):
    start_time = time.time()
    all_predictions = []
    all_labels = []
    model.eval()
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch).cpu().numpy()
            preds = (outputs >= 0.5).astype(int)
            all_predictions.extend(preds if preds.ndim == 1 else preds.tolist())
            all_labels.extend(y_batch.cpu().numpy().tolist())

    all_predictions = np.array(all_predictions)
    all_labels = np.array(all_labels)
    elapsed_time = time.time() - start_time

    M = "OK-"
    if len(set(all_predictions)) == 1:
        M = "ER-"
        
    cm = confusion_matrix(all_labels, all_predictions)
    print("Confusion Matrix:")
    print(cm)
    
    tn, fp, fn, tp = cm.ravel()
    false_positive_rate = fp / (fp + tn) if (fp + tn) > 0 else 0
    false_negative_rate = fn / (fn + tp) if (fn + tp) > 0 else 0
    
    sensitivity = tp / (tp + fn) if (tp + fn) != 0 else 0
    specificity = tn / (tn + fp) if (tn + fp) != 0 else 0
    accuracy = accuracy_score(all_labels, all_predictions)
    precision = precision_score(all_labels, all_predictions)
    recall = recall_score(all_labels, all_predictions)
    f1 = f1_score(all_labels, all_predictions)
    roc_auc = roc_auc_score(all_labels, all_predictions)
    aupr = average_precision_score(all_labels, all_predictions)
    Far = fp / (fp + tn) if (fp + tn) != 0 else 0
    
    print(classification_report(all_labels, all_predictions, digits=5))
    errors = [(i, p, l) for i, (p, l) in enumerate(zip(all_predictions, all_labels)) if p != l]
    print(f"Total Errors: {len(errors)}")
    for i, pred, label in errors[:5]:
        print(f"Index: {i}, Predicted: {pred}, Actual: {label}")
    memory_usage = get_memory_usage()
    return f"{M} Accuracy: {accuracy:.5f}, Precision: {precision:.5f}, Recall: {recall:.5f}, F1: {f1:.5f}, ROC AUC: {roc_auc:.5f}, AUPR (PR-AUC): {aupr:.5f}, Sensitivity: {sensitivity:.5f}, Specificity: {specificity:.5f}, Far: {Far}, False Positive Rate (FPR): {false_positive_rate:.5f}, False Negative Rate (FNR): {false_negative_rate:.5f}, Runtime: {elapsed_time:.3f} sec , Memory Usage: {memory_usage:.2f} MB"


In [363]:
def train(model, criterion, optimizer, scheduler, epochs, train_loader, val_loader, test_loader):
    train_losses = []
    val_losses = []

    for epoch in range(epochs):
        model.train()
        for batch_idx, (X_train_batch, y_train_batch) in enumerate(train_loader):
            optimizer.zero_grad()
            outputs_train = model(X_train_batch)
            loss_train = criterion(outputs_train, y_train_batch.float())
            loss_train.backward()
            optimizer.step()
            scheduler.step()
            
            if (batch_idx + 1) % 10 == 0:
                model.eval()
                with torch.no_grad():
                    val_loss_batch = 0
                    for X_val_batch, y_val_batch in val_loader:
                        outputs_val = model(X_val_batch)
                        loss_val = criterion(outputs_val, y_val_batch.float())
                        val_loss_batch += loss_val.item()
                    train_loss_batch = 0
                    for X_train_batch, y_train_batch in train_loader:
                        outputs_train = model(X_train_batch)
                        loss_train = criterion(outputs_train, y_train_batch.float())
                        train_loss_batch += loss_train.item()
                        
                    val_loss_avg = val_loss_batch / len(val_loader)
                    train_loss_avg = train_loss_batch / len(train_loader)
                
                for param_group in optimizer.param_groups:
                    lrnum = param_group['lr']
                train_losses.append(train_loss_avg)
                val_losses.append(val_loss_avg)
                print(f'Epoch [{epoch+1}/{epochs}], Batch [{batch_idx+1}/{len(train_loader)}], Train Loss: {loss_train.item():.4f}, Val Loss: {val_loss_avg:.4f}, LR: {lrnum:.10f}')
                model.train()
        
        if test_loader and epoch % 10 == 0:
            RES = test(model, test_loader)
            print(f"Epoch {epoch+1}: {RES}")

    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss')
    plt.plot(range(1, len(val_losses) + 1), val_losses, label='Val Loss')
    plt.legend()
    plt.savefig('loss.pdf')
    plt.close()

In [376]:
class EarlyStopping:
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', warmup_epochs=0):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.warmup_epochs = warmup_epochs

    def __call__(self, val_loss, model, epoch):
        if epoch < self.warmup_epochs:
            if self.verbose:
                print(f"Warmup epoch [{epoch+1}/{self.warmup_epochs}]. Skipping EarlyStopping check.")
            return
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0
    def save_checkpoint(self, val_loss, model):
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

def train(model, criterion, optimizer, scheduler, epochs, train_loader, val_loader, device, early_stopping):
    train_losses = []
    val_losses = []
    for epoch in range(epochs):
        model.train()
        running_train_loss = 0.0
        for X_train_batch, y_train_batch in train_loader:
            X_train_batch, y_train_batch = X_train_batch.to(device), y_train_batch.to(device)
            optimizer.zero_grad()
            outputs_train = model(X_train_batch)
            loss_train = criterion(outputs_train, y_train_batch.float())
            loss_train.backward()
            optimizer.step()
            if scheduler:
                scheduler.step()
            running_train_loss += loss_train.item()

        epoch_train_loss = running_train_loss / len(train_loader)
        train_losses.append(epoch_train_loss)
        model.eval()
        running_val_loss = 0.0
        with torch.no_grad():
            for X_val_batch, y_val_batch in val_loader:
                X_val_batch, y_val_batch = X_val_batch.to(device), y_val_batch.to(device)
                outputs_val = model(X_val_batch)
                loss_val = criterion(outputs_val, y_val_batch.float())
                running_val_loss += loss_val.item()
        epoch_val_loss = running_val_loss / len(val_loader)
        val_losses.append(epoch_val_loss)
        print(f'Epoch [{epoch+1}/{epochs}] | Train Loss: {epoch_train_loss:.4f} | Val Loss: {epoch_val_loss:.4f}')
        early_stopping(epoch_val_loss, model, epoch)
        if early_stopping.early_stop:
            print("Early stopping triggered")
            break

    print(f"Loading best model from '{early_stopping.path}' with validation loss: {early_stopping.val_loss_min:.4f}")
    model.load_state_dict(torch.load(early_stopping.path, weights_only=True))

    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss')
    plt.plot(range(1, len(val_losses) + 1), val_losses, label='Val Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss Per Epoch')
    plt.legend()
    plt.grid(True)
    plt.savefig('loss_per_epoch.pdf')
    plt.close()
    
    return model

In [371]:
def CONVERT(df):
    X = df.drop(' Label', axis=1)
    y = df[' Label']
    X.columns = X.columns.str.strip()
    important_features = ['Bwd Packet Length Std', 'Average Packet Size', 'Flow Duration', 'Flow IAT Std']
    important_df = X[important_features] * 2.0
    remaining_df = X.drop(columns=important_features)
    X = pd.concat([remaining_df, important_df], axis=1)
    return X, y

train_loader, val_loader, test_loader = None, None, None
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'=========== D: {device} ===========\n')
current_dir = "Data/"
df = pd.read_csv(os.path.join(current_dir, 'DDos.csv'))
encoder = LabelEncoder()
df[' Label'] = encoder.fit_transform(df[' Label'])
df = df.replace([np.inf, -np.inf], np.nan)
df = df.dropna()
df = df.sample(n=10000, random_state=28, replace=False)
X, y = CONVERT(df)
print(y.value_counts())

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

X_train = np.array(X_train)
y_train = np.array(y_train)
X_val = np.array(X_val)
y_val = np.array(y_val)
X_test = np.array(X_test)
y_test = np.array(y_test)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)


 Label
1    5675
0    4325
Name: count, dtype: int64


In [372]:
batch_size = 64
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [387]:
class MODEL(nn.Module):
    def __init__(self, tabm):
        super(MODEL, self).__init__()
        self.tabm = tabm
        self.C1 = nn.Conv1d(10, 16, 4)
        self.C2 = nn.Conv1d(16, 16, 4)
        self.C3 = nn.Conv1d(16, 16, 4)
        self.pool = nn.MaxPool1d(3)
        self.F1 = nn.Linear(16, 32)
        self.F2 = nn.Linear(32, 1)
        self.tanh = nn.Tanh()
        self.sigmoid = nn.Sigmoid()
        self.dropout = nn.Dropout(0.1)
        self.attn_weights = nn.Parameter(torch.randn(78))

    def forward(self, x=None, x_cat=None):
        t = self.tabm(x, x_cat)
        attention = torch.matmul(x, self.attn_weights)
        attention = torch.sigmoid(attention).unsqueeze(1)
        y = (x * attention).unsqueeze(1)
        
        x = x.unsqueeze(1)
        x = torch.cat((x, y, t), dim=1)
        x = self.tanh(self.C1(x))
        x = self.pool(x)
        x = self.dropout(x)
        x = self.tanh(self.C2(x))
        x = self.pool(x)
        x = self.dropout(x)
        x = self.tanh(self.C3(x))
        x = self.pool(x)
        x = self.dropout(x).squeeze(2)
        x = self.tanh(self.F1(x))
        x = self.dropout(x)
        x = self.sigmoid(self.F2(x)).squeeze(1)
        return x

backbone_config = {
    'type': 'MLP',
    'n_blocks': 2,
    'd_block': 32,
    'dropout': 0.1,
}
tabm = Model(
    n_num_features=78,
    cat_cardinalities=[],
    n_classes=78,
    backbone=backbone_config,
    num_embeddings=None,
    arch_type='tabm',
    bins=None,
    k=8
)

model = MODEL(tabm=tabm)

In [388]:
epochs = 200
SL = len(train_loader) * epochs
model.to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=SL, eta_min=1e-5)
early_stopper = EarlyStopping(patience=30, verbose=True, path='best_model.pt', warmup_epochs=100)

total_params = sum(p.numel() for p in model.parameters())
print(f"=========== TP: {total_params:,} ===========")



In [389]:
best_model = train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    epochs=epochs,
    train_loader=train_loader,
    val_loader=val_loader,
    device=device,
    early_stopping=early_stopper
)
RES = test(best_model, test_loader)
print(RES)

Epoch [1/200] | Train Loss: 0.4059 | Val Loss: 0.0602
Warmup epoch [1/100]. Skipping EarlyStopping check.
Epoch [2/200] | Train Loss: 0.0498 | Val Loss: 0.0322
Warmup epoch [2/100]. Skipping EarlyStopping check.
Epoch [3/200] | Train Loss: 0.0410 | Val Loss: 0.0188
Warmup epoch [3/100]. Skipping EarlyStopping check.
Epoch [4/200] | Train Loss: 0.0331 | Val Loss: 0.0197
Warmup epoch [4/100]. Skipping EarlyStopping check.
Epoch [5/200] | Train Loss: 0.0353 | Val Loss: 0.0160
Warmup epoch [5/100]. Skipping EarlyStopping check.
Epoch [6/200] | Train Loss: 0.0314 | Val Loss: 0.0330
Warmup epoch [6/100]. Skipping EarlyStopping check.
Epoch [7/200] | Train Loss: 0.0268 | Val Loss: 0.0132
Warmup epoch [7/100]. Skipping EarlyStopping check.
Epoch [8/200] | Train Loss: 0.0267 | Val Loss: 0.0143
Warmup epoch [8/100]. Skipping EarlyStopping check.
Epoch [9/200] | Train Loss: 0.0227 | Val Loss: 0.0110
Warmup epoch [9/100]. Skipping EarlyStopping check.
Epoch [10/200] | Train Loss: 0.0229 | Val Loss

In [391]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SEEDlist = [28, 7, 1234, 2025]
for i, SEED in enumerate(SEEDlist):
    train_loader, val_loader, test_loader = None, None, None
    print(f"=========== SEED: {SEED} , FOLD: {i+1}/{len(SEEDlist)}, D: {device} ===========")
    random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    np.random.seed(SEED)
    current_dir = "Data/"
    df = pd.read_csv(os.path.join(current_dir, 'DDos.csv'))
    encoder = LabelEncoder()
    df[' Label'] = encoder.fit_transform(df[' Label'])
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.dropna()
    df = df.astype(int)
    df = df.sample(n=10000, random_state=SEED, replace=False)
    X = df.drop(' Label', axis=1)
    y = df[' Label']
    X.columns = X.columns.str.strip()
    important_features = ['Bwd Packet Length Std', 'Average Packet Size', 'Flow Duration', 'Flow IAT Std']
    important_df = X[important_features] * 2.0
    remaining_df = X.drop(columns=important_features)
    X = pd.concat([remaining_df, important_df], axis=1)
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)
    print(y.value_counts())
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
    X_train = np.array(X_train)
    y_train = np.array(y_train)
    X_val = np.array(X_val)
    y_val = np.array(y_val)
    X_test = np.array(X_test)
    y_test = np.array(y_test)
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
    y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
    X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(device)
    y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(device)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
    y_test_tensor = torch.tensor(y_test, dtype=torch.long)
    batch_size = 64
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    epochs = 150
    SL = len(train_loader) * epochs
    backbone_config = {
        'type': 'MLP',
        'n_blocks': 2,
        'd_block': 32,
        'dropout': 0.1,
    }
    tabm = Model(
        n_num_features=78,
        cat_cardinalities=[],
        n_classes=78,
        backbone=backbone_config,
        num_embeddings=None,
        arch_type='tabm',
        bins=None,
        k=8
    )
    model = MODEL(tabm=tabm).to(device)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=SL, eta_min=1e-5)
    early_stopper = EarlyStopping(patience=15, verbose=True, path='best_model.pt', warmup_epochs=100)
    total_params = sum(p.numel() for p in model.parameters())
    print(f"=========== TP: {total_params:,} ===========")
    best_model = train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        epochs=epochs,
        train_loader=train_loader,
        val_loader=val_loader,
        device=device,
        early_stopping=early_stopper
    )
    RES = test(best_model, test_loader)
    print(RES)

 Label
1    5675
0    4325
Name: count, dtype: int64
Epoch [1/150] | Train Loss: 0.4152 | Val Loss: 0.0460
Warmup epoch [1/100]. Skipping EarlyStopping check.
Epoch [2/150] | Train Loss: 0.0500 | Val Loss: 0.0369
Warmup epoch [2/100]. Skipping EarlyStopping check.
Epoch [3/150] | Train Loss: 0.0425 | Val Loss: 0.0260
Warmup epoch [3/100]. Skipping EarlyStopping check.
Epoch [4/150] | Train Loss: 0.0332 | Val Loss: 0.0240
Warmup epoch [4/100]. Skipping EarlyStopping check.
Epoch [5/150] | Train Loss: 0.0288 | Val Loss: 0.0172
Warmup epoch [5/100]. Skipping EarlyStopping check.
Epoch [6/150] | Train Loss: 0.0259 | Val Loss: 0.0121
Warmup epoch [6/100]. Skipping EarlyStopping check.
Epoch [7/150] | Train Loss: 0.0197 | Val Loss: 0.0140
Warmup epoch [7/100]. Skipping EarlyStopping check.
Epoch [8/150] | Train Loss: 0.0198 | Val Loss: 0.0119
Warmup epoch [8/100]. Skipping EarlyStopping check.
Epoch [9/150] | Train Loss: 0.0153 | Val Loss: 0.0067
Warmup epoch [9/100]. Skipping EarlyStopping 