In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import joblib

In [5]:
# Step 1: Load Dataset
df = pd.read_csv("Data_Set_Training.csv")

In [7]:
# Ensure required features exist
FEATURES = [
    "method", "path", "body", "single_q", "double_q", "dashes", "braces", "spaces",
    "url_length", "body_length", "base64_count", "path_entropy", "body_entropy",
    "header_count", "sql_injection_count", "xss_attack_count",
    "command_injection_count", "directory_traversal_count", "csrf_count"
]

In [9]:
LABEL = "label"  # 0 = Normal, 1 = Attack

In [11]:

# ✅ Step 3: Encode Categorical Features
le_method = LabelEncoder()
le_path = LabelEncoder()
le_body = LabelEncoder()
df["method"] = le_method.fit_transform(df["method"])
df["path"] = le_path.fit_transform(df["path"])
df["body"] = le_path.fit_transform(df["body"])

In [13]:
# Step 4: Extract Features & Labels
X = df[FEATURES]
y = df[LABEL]

In [15]:
# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [17]:
def create_sequences(data, labels, time_steps=10):
    sequences, sequence_labels = [], []
    for i in range(len(data) - time_steps):
        sequences.append(data[i: i + time_steps])
        sequence_labels.append(labels[i + time_steps])
    return np.array(sequences), np.array(sequence_labels)

X_seq, y_seq = create_sequences(X_scaled, y, time_steps=10)

In [19]:
X_train, X_val, y_train, y_val = train_test_split(X_seq, y_seq, test_size=0.2, shuffle=False, random_state=42)

In [21]:
# Build LSTM model correctly
model = Sequential([
    Input(shape=(10, len(FEATURES))),  # 👈 Correct input shape
    LSTM(64, return_sequences=True),
    Dropout(0.2),
    LSTM(32, return_sequences=False),
    Dense(16, activation="relu"),
    Dropout(0.2),
    Dense(1, activation="sigmoid")  # Binary classification
])

In [23]:
# ✅ Step 9: Compile Model
model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate=0.001), metrics=["accuracy"])

In [25]:
# ✅ Step 10: Train Model
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=200, batch_size=32)

Epoch 1/200
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - accuracy: 0.9227 - loss: 0.2503 - val_accuracy: 1.0000 - val_loss: 3.2598e-04
Epoch 2/200
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.9631 - loss: 0.1096 - val_accuracy: 1.0000 - val_loss: 8.1292e-05
Epoch 3/200
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.9659 - loss: 0.0918 - val_accuracy: 1.0000 - val_loss: 4.3700e-05
Epoch 4/200
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.9630 - loss: 0.0952 - val_accuracy: 1.0000 - val_loss: 8.5623e-06
Epoch 5/200
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.9605 - loss: 0.0939 - val_accuracy: 1.0000 - val_loss: 2.8125e-05
Epoch 6/200
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.9676 - loss: 0.0912 - val_accuracy: 1.0000 - val_loss: 7.7863e-

<keras.src.callbacks.history.History at 0x1a435abe6c0>

In [27]:
model.save("lstm_model_common_attack.keras")
joblib.dump(scaler, "ml_scaler.pkl")
joblib.dump(le_method, "ML_method_encoder.pkl")
joblib.dump(le_path, "ML_path_encoder.pkl")
joblib.dump(le_body, "ML_body_encoder.pkl")

['ML_body_encoder.pkl']