In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model, Input
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import joblib
import xgboost as xgb

In [2]:
tf.random.set_seed(42)

# DATA IMPORT

In [3]:
X = pd.read_csv(r"ProcessedFeatures.csv")
y = pd.read_csv(r"ProcessedLabels.csv")

In [4]:
zero_day_labels = ['Web Attack - XSS', 'Heartbleed', 'Web Attack - SQL Injection']
df = X.copy()
df["Label"] = y["Label"]

In [5]:
test_attacks_df = df[df["Label"].isin(zero_day_labels)].copy()
benign_df = df[df["Label"] == "BENIGN"]
test_benign_df = benign_df.sample(frac=0.1, random_state=42)  # 10% of benign data
test_df = pd.concat([test_attacks_df, test_benign_df], ignore_index=True)
train_df = df.drop(index=test_df.index)
test_df.to_csv("test_data_for_simulation.csv", index=False)

In [6]:
X_train = train_df.drop(columns=["Label"])
y_train_raw = train_df["Label"]
y_train = y_train_raw.apply(lambda x: 0 if x == "BENIGN" else 1)

In [7]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
joblib.dump(scaler, "scaler.pkl")

['scaler.pkl']

In [8]:
X_benign_scaled = X_train_scaled[y_train == 0]

# AUTOENCODER

In [9]:
input_dim = X_train.shape[1]
encoding_dim = 16

# Encoder
input_layer = Input(shape=(input_dim,))
encoded = layers.Dense(64, activation="relu")(input_layer)
encoded = layers.Dense(encoding_dim, activation="relu")(encoded)

# Decoder
decoded = layers.Dense(64, activation="relu")(encoded)
decoded = layers.Dense(input_dim, activation="linear")(decoded)

autoencoder = Model(input_layer, decoded, name="autoencoder")
encoder = Model(input_layer, encoded, name="encoder")

In [10]:
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(X_benign_scaled, X_benign_scaled, epochs=50, batch_size=64, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x1b92fcaa8e0>

In [11]:
autoencoder.save("autoencoder_model.keras")
encoder.save("ae_encoder.keras")

# XGBoost Downstream

In [12]:
z_train = encoder.predict(X_train_scaled)
xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric="logloss")



In [13]:
xgb_model.fit(z_train, y_train)

In [14]:
xgb_model.save_model("xgb_model.json")