In [1]:
import pandas as pd
import numpy as np
from tensorflow import keras
import optuna
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline

# Define model function for hyperparameter tuning
def create_model(trial):
    model = Sequential()
    model.add(Dense(trial.suggest_int("units1", 64, 256), activation='relu', input_shape=(X_train.shape[1],)))
    model.add(Dropout(trial.suggest_float("dropout1", 0.1, 0.5)))
    model.add(Dense(trial.suggest_int("units2", 32, 128), activation='relu'))
    model.add(Dropout(trial.suggest_float("dropout2", 0.1, 0.5)))
    model.add(Dense(len(np.unique(y)), activation='softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [2]:
# Load datasets
df_boa = pd.read_csv("/kaggle/input/kube-ids0/boa_dataset/processed/boa_dataset_ml_ready_frontend_microservice.csv")
df_dvwa = pd.read_csv("/kaggle/input/kube-ids0/dvwa_dataset/processed/dvwa_dataset_ml_ready.csv")

In [3]:
# Combine datasets
df = pd.concat([df_boa, df_dvwa], ignore_index=True)

In [4]:
# Handle missing values
df.fillna(0, inplace=True)

In [5]:
# Encode labels
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

In [6]:
# Select features and target
X = df.drop(columns=['label'])
y = df['label']

In [7]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
# Define objective function for Optuna tuning
def objective(trial):
    model = create_model(trial)
    model.fit(X_train, y_train, epochs=10, batch_size=trial.suggest_int("batch_size", 16, 64), verbose=0, validation_split=0.2)
    _, accuracy = model.evaluate(X_test, y_test, verbose=0)
    return accuracy

In [10]:
# Optimize hyperparameters
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

[I 2025-04-15 15:54:52,466] A new study created in memory with name: no-name-cfef7be9-4adf-4259-b22b-29c22c5cbc96
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
[I 2025-04-15 15:55:32,900] Trial 0 finished with value: 0.9991353750228882 and parameters: {'units1': 248, 'dropout1': 0.12546153311699434, 'units2': 114, 'dropout2': 0.37720604213073583, 'batch_size': 25}. Best is trial 0 with value: 0.9991353750228882.
[I 2025-04-15 15:56:14,500] Trial 1 finished with value: 0.9955328106880188 and parameters: {'units1': 148, 'dropout1': 0.39965540865705507, 'units2': 95, 'dropout2': 0.44581010724104464, 'batch_size': 19}. Best is trial 0 with value: 0.9991353750228882.
[I 2025-04-15 15:56:55,268] Trial 2 finished with value: 0.9992794990539551 and parameters: {'units1': 186, 'dropout1': 0.15605252236650427, 'units2': 124, 'dropout2': 0.29691424871148053, 'batch_size': 21}. Best is trial 2 with value: 0.9992794990539551.
[I 2025-04-15 15:57:24,431] Trial 3 finished wi

In [11]:
# Train final model with best parameters
best_params = study.best_params
final_model = create_model(optuna.trial.FixedTrial(best_params))
final_model.fit(X_train, y_train, epochs=50, batch_size=best_params['batch_size'], verbose=1, validation_split=0.2)

Epoch 1/50
[1m1033/1033[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9572 - loss: 0.1235 - val_accuracy: 0.9957 - val_loss: 0.0138
Epoch 2/50
[1m1033/1033[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9925 - loss: 0.0242 - val_accuracy: 0.9937 - val_loss: 0.0131
Epoch 3/50
[1m1033/1033[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9961 - loss: 0.0150 - val_accuracy: 0.9981 - val_loss: 0.0136
Epoch 4/50
[1m1033/1033[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9974 - loss: 0.0100 - val_accuracy: 0.9977 - val_loss: 0.0248
Epoch 5/50
[1m1033/1033[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9975 - loss: 0.0112 - val_accuracy: 0.9992 - val_loss: 0.0063
Epoch 6/50
[1m1033/1033[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9977 - loss: 0.0082 - val_accuracy: 0.9994 - val_loss: 0.0063
Epoch 7/50
[1m1

<keras.src.callbacks.history.History at 0x7bb708b278e0>

In [12]:
# Evaluate final model
loss, accuracy = final_model.evaluate(X_test, y_test)
print(f"Final Model Accuracy: {accuracy:.2f}")

[1m434/434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9997 - loss: 0.0039
Final Model Accuracy: 1.00


In [13]:
# Save model
final_model.save("models/neural_network_model.h5")