In [1]:
import pandas as pd
import time
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score

In [2]:
def create_deep_learning_model(input_shape):
    """Creates and compiles the Deep Learning model."""
    model = keras.Sequential(
        [
            keras.Input(shape=(input_shape,)),
            layers.Dense(50, activation="relu"),
            layers.Dense(50, activation="relu"),
            layers.Dense(1, activation="sigmoid"),
        ]
    )
    model.compile(optimizer="Nadam", loss="binary_crossentropy", metrics=[keras.metrics.AUC(name="auc_score")])
    return model

def load_dataset(file_paths, separator=";"):
    """Loads, concatenates and shuffles datasets."""
    df_list = [pd.read_csv(f, sep=separator) for f in file_paths]
    combined_df = pd.concat(df_list, ignore_index=True)
    combined_df = combined_df.sample(frac=1, random_state=42).reset_index(drop=True)
    combined_df = combined_df.apply(pd.to_numeric, errors='coerce').fillna(0)
    X = combined_df.drop(columns=["label"])
    y = combined_df["label"]
    return X, y

def calculate_all_metrics(y_true, y_pred):
    """
    Calculates and returns the confusion matrix, accuracy, and other key metrics.
    """
    cm = confusion_matrix(y_true, y_pred)
    ar = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    return ar, precision, recall, f1, cm

def print_results(model_name, ar, duration, precision, recall, f1):
    """Prints the calculated metrics and training duration."""
    print(f"--- {model_name} Results ---")
    print(f"Accuracy Rate: {ar:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    print(f"Training Duration: {duration} ms")
    print("-" * 30)

def get_f1(model, X, y, is_dl=False):
    """Calculates F1 score for evaluation."""
    if is_dl:
        y_pred = (model.predict(X) > 0.45).astype(int)
    else:
        y_pred = model.predict(X)
    # Note: Ensure calculate_all_metrics returns (ar, precision, recall, f1, cm)
    _, _, _, f1, _ = calculate_all_metrics(y, y_pred)
    return round(f1, 4)

In [9]:
pretrain_files = [
    "../datasets/HelloFloodAttack.csv", "../datasets/DecreasedRankAttack.csv"
]
finetune_files = [
    "../datasets/VersionNumberAttack.csv"
]

In [10]:
X_pre_full, y_pre_full = load_dataset(pretrain_files)
X_ft_full, y_ft_full = load_dataset(finetune_files)

# Splitting Data
X_pre_train, X_pre_test, y_pre_train, y_pre_test = train_test_split(
    X_pre_full, y_pre_full, test_size=0.3, random_state=13, stratify=y_pre_full
)
X_ft_train, X_ft_test, y_ft_train, y_ft_test = train_test_split(
    X_ft_full, y_ft_full, test_size=0.3, random_state=13, stratify=y_ft_full
)

# Normalization
scaler = Normalizer()
cols_to_norm = X_pre_train.columns[2:].tolist()
X_pre_train[cols_to_norm] = scaler.fit_transform(X_pre_train[cols_to_norm])
X_pre_test[cols_to_norm] = scaler.transform(X_pre_test[cols_to_norm])
X_ft_train[cols_to_norm] = scaler.transform(X_ft_train[cols_to_norm])
X_ft_test[cols_to_norm] = scaler.transform(X_ft_test[cols_to_norm])

# Model Definitions
models = {
    "XGBoost": xgb.XGBClassifier(max_depth=3, n_estimators=10, random_state=3),
    "LightGBM": lgb.LGBMClassifier(max_depth=3, n_estimators=8, random_state=3, verbosity=-1),
    "CatBoost": CatBoostClassifier(max_depth=3, n_estimators=8, random_state=3, verbose=0),
    "Deep Learning": create_deep_learning_model(X_pre_train.shape[1])
}

detailed_results = []

for name, model in models.items():
    is_dl = (name == "Deep Learning")

    # --- PRE-TRAINING ---
    start_pre = time.time()
    if is_dl:
        model.fit(X_pre_train, y_pre_train, epochs=50, verbose=0)
    else:
        model.fit(X_pre_train, y_pre_train)
    time_pre = (time.time() - start_pre) * 1000

    # Scores Before Fine-Tuning
    pre_f1_before = get_f1(model, X_pre_test, y_pre_test, is_dl)
    ft_f1_before  = get_f1(model, X_ft_test, y_ft_test, is_dl)

    # --- FINE-TUNING ---
    start_ft = time.time()
    if is_dl:
        model.fit(X_ft_train, y_ft_train, epochs=20, verbose=0)
    elif "XGBoost" in name:
        model.fit(X_ft_train, y_ft_train, xgb_model=model)
    else:
        model.fit(X_ft_train, y_ft_train, init_model=model)
    time_ft = (time.time() - start_ft) * 1000

    # Scores After Fine-Tuning
    pre_f1_after = get_f1(model, X_pre_test, y_pre_test, is_dl)
    ft_f1_after  = get_f1(model, X_ft_test, y_ft_test, is_dl)

    detailed_results.append({
        "Model": name,
        "Pre_Time(ms)": round(time_pre, 2),
        "FT_Time(ms)": round(time_ft, 2),
        "PreData_BeforeFT": pre_f1_before,
        "PreData_AfterFT": pre_f1_after,
        "FTData_BeforeFT": ft_f1_before,
        "FTData_AfterFT": ft_f1_after
    })

# --- Final Performance & Timing Report ---
print("\n" + "="*100)
print("FINAL PERFORMANCE AND TIMING MATRIX")
print("="*100)
summary_df = pd.DataFrame(detailed_results)
print(summary_df.to_string(index=False))
print("="*100)


FINAL PERFORMANCE AND TIMING MATRIX
        Model  Pre_Time(ms)  FT_Time(ms)  PreData_BeforeFT  PreData_AfterFT  FTData_BeforeFT  FTData_AfterFT
      XGBoost         20.02        20.27            0.9668           0.9238           0.6163          0.9398
     LightGBM         14.64         9.98            0.9485           0.9171           0.8358          0.9375
     CatBoost          6.99         5.01            0.9601           0.9183           0.7290          0.9333
Deep Learning       2255.80       356.91            0.9123           0.9096           0.8344          0.9291
