In [57]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping


In [45]:
# Load dataset
df = pd.read_csv('DDoS_dataset.csv') 

In [46]:
# Encode categorical features
le = LabelEncoder()
df['Highest Layer'] = le.fit_transform(df['Highest Layer'])
df['Transport Layer'] = le.fit_transform(df['Transport Layer'])
df['Dest IP'] = le.fit_transform(df['Dest IP'])


In [48]:
# Select features and label
features = ['Highest Layer', 'Transport Layer', 'Source IP', 'Dest IP', 'Source Port', 'Dest Port', 'Packet Length', 'Packets/Time']
label = 'Label'
X = df[features]
y = df[label]

In [51]:
# Normalize numerical features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [53]:
# Convert data to time-series format
def create_sequences(data, labels, time_steps=5):
    sequences, labels_out = [], []
    for i in range(len(data) - time_steps):
        sequences.append(data[i:i + time_steps])
        labels_out.append(labels[i + time_steps])
    return np.array(sequences), np.array(labels_out)

TIME_STEPS = 10  # Adjust based on analysis
X_seq, y_seq = create_sequences(X, y, TIME_STEPS)


In [54]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)

In [31]:
# Build LSTM Model
model = Sequential([
    Input(shape=(TIME_STEPS, X_train.shape[2])),  # Explicit Input Layer
    Bidirectional(LSTM(64, return_sequences=True)),
    Dropout(0.3),
    Bidirectional(LSTM(32, return_sequences=False)),
    Dense(16, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

In [33]:

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [59]:
# to automatically stop training when performance stops improving 
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
# Train Model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32,callbacks=[early_stopping])

Epoch 1/10
[1m21315/21315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m344s[0m 16ms/step - accuracy: 0.9995 - loss: 0.0022 - val_accuracy: 0.9994 - val_loss: 0.0020
Epoch 2/10
[1m21315/21315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m374s[0m 18ms/step - accuracy: 0.9995 - loss: 0.0023 - val_accuracy: 0.9995 - val_loss: 0.0019
Epoch 3/10
[1m21315/21315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m431s[0m 20ms/step - accuracy: 0.9995 - loss: 0.0024 - val_accuracy: 0.9995 - val_loss: 0.0020
Epoch 4/10
[1m21315/21315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m567s[0m 27ms/step - accuracy: 0.9995 - loss: 0.0021 - val_accuracy: 0.9996 - val_loss: 0.0020
Epoch 5/10
[1m21315/21315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m232s[0m 11ms/step - accuracy: 0.9996 - loss: 0.0021 - val_accuracy: 0.9995 - val_loss: 0.0019
Epoch 6/10
[1m21315/21315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m249s[0m 12ms/step - accuracy: 0.9995 - loss: 0.0022 - val_accuracy: 0.9996 - val

<keras.src.callbacks.history.History at 0x211afcce780>

In [61]:
# Save Model
model.save('ddos_lstm_model.keras')
print('✅ Model training complete & saved!')


✅ Model training complete & saved!


In [9]:
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load dataset
df = pd.read_csv('DDoS_dataset.csv')

# Encode categorical features
le_highest = LabelEncoder()
le_transport = LabelEncoder()

df['Highest Layer'] = le_highest.fit_transform(df['Highest Layer'])
df['Transport Layer'] = le_transport.fit_transform(df['Transport Layer'])
df['Dest IP'] = le_transport.fit_transform(df['Dest IP'])
le_highest.fit(["ARP", "TCP", "UDP"*5])
le_transport.fit(["UDP", "TCP", "ARP"*5])
# Select features
features = ['Highest Layer', 'Transport Layer', 'Source IP', 'Dest IP', 'Source Port', 'Dest Port', 'Packet Length', 'Packets/Time']
X = df[features]

# Normalize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Save encoders and scaler
joblib.dump(scaler, "ddos_scaler.pkl")
joblib.dump(le_highest, "highest_layer_encoder.pkl")
joblib.dump(le_transport, "transport_layer_encoder.pkl")

print("✅ DDoS scalers and encoders saved successfully!")

✅ DDoS scalers and encoders saved successfully!


In [9]:
import pandas as pd
import joblib
from sklearn.preprocessing import LabelEncoder

# Load dataset
df = pd.read_csv('DDoS_dataset.csv')

# Convert Source IP and Dest IP to strings, handling any numeric values
df['Source IP'] = df['Source IP'].astype(str)
df['Dest IP'] = df['Dest IP'].astype(str)

# Create IP encoder
le_ip = LabelEncoder()
ips = pd.concat([df['Source IP'], df['Dest IP']]).unique()
le_ip.fit(ips)

# Save the encoder
joblib.dump(le_ip, "ip_encoder.pkl")

print("✅ IP encoder saved successfully!")

✅ IP encoder saved successfully!
