In [None]:
import pandas as pd
import time
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer

In [4]:
def create_deep_learning_model(input_shape):
    """Creates and compiles the Deep Learning model."""
    model = keras.Sequential(
        [
            keras.Input(shape=(input_shape,)),
            layers.Dense(50, activation="relu"),
            layers.Dense(50, activation="relu"),
            layers.Dense(1, activation="sigmoid"),
        ]
    )
    model.compile(optimizer="Nadam", loss="binary_crossentropy", metrics=[keras.metrics.AUC(name="auc_score")])
    return model

def load_dataset(file_paths, separator=";"):
    """Loads, concatenates and shuffles datasets."""
    df_list = [pd.read_csv(f, sep=separator) for f in file_paths]
    combined_df = pd.concat(df_list, ignore_index=True)
    combined_df = combined_df.sample(frac=1, random_state=42).reset_index(drop=True)
    combined_df = combined_df.apply(pd.to_numeric, errors='coerce').fillna(0)
    X = combined_df.drop(columns=["label"])
    y = combined_df["label"]
    return X, y

def calculate_all_metrics(y_true, y_pred):
    """
    Calculates and returns the confusion matrix, accuracy, and other key metrics.
    """
    cm = confusion_matrix(y_true, y_pred)
    ar = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    return ar, precision, recall, f1, cm

def print_results(model_name, ar, duration, precision, recall, f1):
    """Prints the calculated metrics and training duration."""
    print(f"--- {model_name} Results ---")
    print(f"Accuracy Rate: {ar:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    print(f"Training Duration: {duration} ms")
    print("-" * 30)

def get_f1(model, X, y, is_dl=False):
    """Calculates F1 score for evaluation."""
    if is_dl:
        y_pred = (model.predict(X) > 0.45).astype(int)
    else:
        y_pred = model.predict(X)
    # Note: Ensure calculate_all_metrics returns (ar, precision, recall, f1, cm)
    _, _, _, f1, _ = calculate_all_metrics(y, y_pred)
    return round(f1, 4)

In [9]:
all_data_files = [
    "../datasets/DecreasedRankAttack.csv", 
    "../datasets/HelloFloodAttack.csv",
    "../datasets/VersionNumberAttack.csv"
]

X_full, y_full = load_dataset(all_data_files)
X_train, X_test, y_train, y_test = train_test_split(
    X_full, y_full, test_size=0.3, random_state=13, stratify=y_full
)

scaler = Normalizer()
cols_to_norm = X_train.columns[2:].tolist()
X_train[cols_to_norm] = scaler.fit_transform(X_train[cols_to_norm])
X_test[cols_to_norm] = scaler.transform(X_test[cols_to_norm])

# --- Model Initialization ---

models = {
    "XGBoost": xgb.XGBClassifier(max_depth=3, n_estimators=10, random_state=3),
    "LightGBM": lgb.LGBMClassifier(max_depth=3, n_estimators=8, random_state=3, verbosity=-1),
    "CatBoost": CatBoostClassifier(max_depth=3, n_estimators=8, random_state=3, verbose=0),
    "Deep Learning": create_deep_learning_model(X_train.shape[1])
}

standard_results = []

# --- Standard Training Loop ---
for name, model in models.items():
    start_time = time.time()
    # Standard fitting process
    if name == "Deep Learning":
        model.fit(X_train, y_train, epochs=70, verbose=0)
        y_pred = (model.predict(X_test) > 0.45).astype(int)
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
    execution_time = (time.time() - start_time) * 1000

    # Calculate Metrics
    ar, precision, recall, f1, cm = calculate_all_metrics(y_test, y_pred)
    
    standard_results.append({
        "Model": name,
        "Train_Time(ms)": round(execution_time, 2),
        "F1-Score": round(f1, 4),
        "Accuracy": round(ar, 4),
        "Precision": round(precision, 4),
        "Recall": round(recall, 4)
    })

# --- Final Summary Table ---

print("\n" + "="*40)
print("STANDARD TRAINING SUMMARY REPORT")
print("="*40)
summary_df = pd.DataFrame(standard_results)
print(summary_df.to_string(index=False))
print("="*40)


STANDARD TRAINING SUMMARY REPORT
        Model  Train_Time(ms)  F1-Score  Accuracy  Precision  Recall
      XGBoost           32.35    0.9410    0.9012     0.9473  0.9348
     LightGBM           17.89    0.9150    0.8432     0.8432  1.0000
     CatBoost            8.73    0.9319    0.8841     0.9231  0.9410
Deep Learning         4314.26    0.9209    0.8596     0.8772  0.9692
