In [3]:
# !which python
# !pip list | grep tensorflow
# !pip list | grep keras

# import sys
# print("Python executable:", sys.executable)
# print("sys.executable:", sys.executable)
# print("sys.path:", sys.path)

# import tensorflow as tf
# from tensorflow.keras.layers import LSTM, Dense, Dropout
# print("TensorFlow version:", tf.__version__)
import tensorflow as tf

# List physical and logical devices for 'MPS'
physical_mps = tf.config.list_physical_devices('MPS')
logical_mps = tf.config.list_logical_devices('MPS')
print("Physical MPS devices:", physical_mps)
print("Logical MPS devices:", logical_mps)

import os
# Ensure the environment variable is set BEFORE importing TensorFlow
os.environ['TF_ENABLE_MPS_FALLBACK'] = '1'


tf.debugging.set_log_device_placement(True)

# Run a test operation
a = tf.constant([[1.0, 2.0], [3.0, 4.0]])
b = tf.constant([[1.0, 1.0], [0.0, 1.0]])
c = tf.matmul(a, b)
print("Result:", c.numpy())

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# For data splitting, scaling, and class weighting
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.utils.class_weight import compute_class_weight

# For oversampling
from imblearn.over_sampling import SMOTE

# For deep learning model (using TensorFlow/Keras)
import tensorflow as tf
# Verify GPU Availability
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print("GPUs available:", tf.config.list_physical_devices('GPU'))

# Configure TensorFlow to Use the GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential

# Step 2: Load the dataset (update file path as needed)
file_path = "C:\\Users\\mrroo\\Downloads\\archive\\02-15-2018.csv"
df = pd.read_csv(file_path)
# Preview the dataset
print(df.head())
print(df.info())
print(df['Label'].value_counts())

# Step 3: Data Cleaning (drop duplicates and missing values)
df = df.drop_duplicates()
df = df.dropna()

# (Optional) If needed, parse the Timestamp column (not used here)
# df['Timestamp'] = pd.to_datetime(df['Timestamp'], format='%d/%m/%Y %H:%M:%S')
# If the data should be sorted by time:
# df = df.sort_values('Timestamp')

Num GPUs Available:  0
GPUs available: []
   Dst Port  Protocol            Timestamp  Flow Duration  Tot Fwd Pkts  \
0         0         0  15/02/2018 08:25:18      112641158             3   
1        22         6  15/02/2018 08:29:05       37366762            14   
2     47514         6  15/02/2018 08:29:42            543             2   
3         0         0  15/02/2018 08:28:07      112640703             3   
4         0         0  15/02/2018 08:30:56      112640874             3   

   Tot Bwd Pkts  TotLen Fwd Pkts  TotLen Bwd Pkts  Fwd Pkt Len Max  \
0             0                0                0                0   
1            12             2168             2993              712   
2             0               64                0               64   
3             0                0                0                0   
4             0                0                0                0   

   Fwd Pkt Len Min  ...  Fwd Seg Size Min  Active Mean     Active Std  \
0            

In [None]:
# Step 4: Select features and label
features = ['Flow Duration', 'Fwd Pkt Len Mean', 'Bwd Pkt Len Std', 'Pkt Size Avg', 'Flow IAT Mean']
X = df[features]
y = df['Label']   # Expected values: "Benign", "DDoS-GoldenEye", "DDoS-Slowloris"

# Step 5: Encode labels to numeric values
le = LabelEncoder()
y_encoded = le.fit_transform(y)  
# Now, for example, 0: Benign, 1: DDoS-GoldenEye, 2: DDoS-Slowloris

# Step 6: Scale features using StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Step 7: Split the data (time-based split to preserve temporal order)
# Here we assume that the data is already ordered by time.
split_index = int(0.8 * len(X_scaled))
X_train = X_scaled[:split_index]
X_test = X_scaled[split_index:]
y_train = y_encoded[:split_index]
y_test = y_encoded[split_index:]

# Step 8: Time-Series Windowing
def create_sequences(X, y, window_size):
    Xs, ys = [], []
    for i in range(len(X) - window_size + 1):
        Xs.append(X[i:i+window_size])
        ys.append(y[i+window_size-1])  # label of the last time step in the window
    return np.array(Xs), np.array(ys)

window_size = 10  # Adjust as needed
X_train_seq, y_train_seq = create_sequences(X_train, y_train, window_size)
X_test_seq, y_test_seq = create_sequences(X_test, y_test, window_size)

print("Windowed training data shape:", X_train_seq.shape)  # (num_train_samples, window_size, num_features)
print("Windowed test data shape:", X_test_seq.shape)

Windowed training data shape: (832977, 10, 5)
Windowed test data shape: (208238, 10, 5)


In [None]:
# Step 9: Apply SMOTE on the training set
# SMOTE requires 2D data, so reshape the windowed training data:
num_train_samples, win_size, n_features = X_train_seq.shape
X_train_seq_2d = X_train_seq.reshape(num_train_samples, win_size * n_features)

# Apply SMOTE (only on training data)
sm = SMOTE(random_state=42)
X_train_res_2d, y_train_res = sm.fit_resample(X_train_seq_2d, y_train_seq)

# Reshape back to 3D for LSTM input:
X_train_res = X_train_res_2d.reshape(-1, win_size, n_features)
print("After SMOTE, training data shape:", X_train_res.shape)

After SMOTE, training data shape: (2343711, 10, 5)


In [None]:
# Step 10: Compute class weights using the SMOTE-adjusted training labels
classes = np.unique(y_train_res)
class_weights = compute_class_weight('balanced', classes=classes, y=y_train_res)
class_weight_dict = dict(zip(classes, class_weights))
print("Class weights:", class_weight_dict)

# Step 11: Convert labels to categorical (one-hot encoding) for training and testing
num_classes = len(classes)  # Should be 3
print("Number of classes:", num_classes)
y_train_cat = to_categorical(y_train_res, num_classes=num_classes)
y_test_cat = to_categorical(y_test_seq, num_classes=num_classes)


Class weights: {np.int64(0): np.float64(1.0), np.int64(1): np.float64(1.0), np.int64(2): np.float64(1.0)}
Number of classes: 3


In [None]:
# Step 12: Build the LSTM Model
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(window_size, n_features)),
    Dropout(0.2),
    LSTM(32),
    Dense(16, activation='relu'),
    Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

  super().__init__(**kwargs)


In [None]:
# Step 13: Train the Model with Class Weights
history = model.fit(
    X_train_res, 
    y_train_cat, 
    epochs=50, 
    batch_size=128, 
    validation_split=0.2, 
    class_weight=class_weight_dict
)

Epoch 1/50
[1m14649/14649[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 5ms/step - accuracy: 0.9712 - loss: 0.0713 - val_accuracy: 0.9990 - val_loss: 0.0068
Epoch 2/50
[1m 2901/14649[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m56s[0m 5ms/step - accuracy: 0.9977 - loss: 0.0076

KeyboardInterrupt: 

In [None]:
# Step 14: Plot Training and Validation Accuracy and Loss
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Accuracy vs. Epochs')

plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Loss vs. Epochs')
plt.show()

In [None]:
# Step 15: Evaluate the Model on the Test Set
test_loss, test_acc = model.evaluate(X_test_seq, to_categorical(y_test_seq, num_classes=num_classes))
print("Test Loss: {:.4f} - Test Accuracy: {:.4f}".format(test_loss, test_acc))

# Step 16: Save the Trained Model
model.save("lstm_ddos_model.h5")
print("Model saved as lstm_ddos_model.h5")