In [1]:
# Implementation of Model 2 (Three-Class Classification: Benign, DDoS, non-DDoS)

import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv1D, Flatten, BatchNormalization, LSTM, InputLayer
from tensorflow.keras.optimizers import Adam

# --- Step 1: Load Dataset ---
df = pd.read_csv("/Users/user/Desktop/0model/0final_cleaned_dataset.csv")

# --- Step 2: Map to 3-class Labels: BENIGN, DDoS, non-DDoS ---
df['Three_Class_Label'] = df['Label'].apply(lambda x: 'BENIGN' if x == 'BenignTraffic' else ('DDoS' if 'DDoS' in x else 'non-DDoS'))
label_encoder = LabelEncoder()
df['Label_Encoded'] = label_encoder.fit_transform(df['Three_Class_Label'])
print("Label Mapping:", dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_))))

# --- Step 3: Drop Unnecessary Columns and Handle Numeric Features ---
drop_columns = ['Label', 'BroadLabel', 'Three_Class_Label']
features = df.drop(columns=drop_columns + ['Label_Encoded'], errors='ignore')
features = features.select_dtypes(include=['number']).copy()

# --- Step 4: Log Normalization & Clean NaNs ---
features.replace([np.inf, -np.inf], np.nan, inplace=True)
features.dropna(inplace=True)
df = df.loc[features.index]  # Align label with features
labels = df['Label_Encoded']

features = np.log1p(features)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)

# --- Step 5: Train-Test Split (60% Train, 20% Val, 20% Test) ---
X_temp, X_test, y_temp, y_test = train_test_split(X_scaled, labels, test_size=0.2, stratify=labels, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, stratify=y_temp, random_state=42)

X_train_seq = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_val_seq = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
X_test_seq = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# --- Step 6: Define Model 2 (LSTM-based for 3-class classification) ---
def build_lstm_model(input_shape, output_dim):
    model = Sequential([
        InputLayer(input_shape=input_shape),
        Conv1D(128, kernel_size=3, activation='relu'),
        LSTM(256, return_sequences=True),
        LSTM(512),
        BatchNormalization(),
        Flatten(),
        Dense(512, activation='relu'),
        Dropout(0.3),
        Dense(512, activation='relu'),
        Dense(output_dim, activation='softmax')
    ])
    return model

# --- Step 7: Compile and Train ---
model = build_lstm_model((X_train_seq.shape[1], 1), output_dim=3)
model.compile(optimizer=Adam(learning_rate=2e-4), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history = model.fit(X_train_seq, y_train, epochs=200, batch_size=256, validation_data=(X_val_seq, y_val), verbose=1)

# --- Step 8: Evaluate ---
y_pred_probs = model.predict(X_test_seq)
y_pred = np.argmax(y_pred_probs, axis=1)

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
ConfusionMatrixDisplay(confusion_matrix(y_test, y_pred), display_labels=label_encoder.classes_).plot()
plt.title("Confusion Matrix: Model 2 (LSTM - 3 Class)")
plt.show()

# --- Step 9: Save Model ---
model.save("model2_lstm_3class.h5")


Label Mapping: {'non-DDoS': 0}


2025-05-18 19:29:49.723262: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/200
 843/5445 [===>..........................] - ETA: 4:08:35 - loss: 0.0053 - accuracy: 0.9990 


KeyboardInterrupt

