In [None]:
test_set_path = "../../../datasets/test_set.csv"

train_set_sample = 0.5

In [None]:
import pandas as pd

test_df = pd.read_csv(test_set_path)
print(f"test set count: {test_df.shape[0]:,}")

print("Test Set Used:", test_df.shape)

In [None]:
X_test = test_df.drop(
    columns=["attack_binary", "attack_categorical", "attack_class"]
).values

y_test = test_df["attack_binary"].values

y_test_class = test_df["attack_class"].values

print(f"test set count: {test_df.shape[0]:,} with {test_df.shape[1]:,} features")
print(f"unique values: {test_df['attack_class'].value_counts()}")
test_df.head(3)

In [None]:
import numpy as np
import onnxruntime as ort

autoencoder_onnx_path = "saved_models/onnx/autoencoder_encoder_cidds_001.onnx"


def extract_latent_features(onnx_model_path, input_data):
    """
    Extract latent representation from ONNX autoencoder model

    Args:
        onnx_model_path: Path to the ONNX model file
        input_data: Numpy array of shape (n_samples, 15) - the input data

    Returns:
        Latent representations as numpy array
    """
    # Create ONNX Runtime session
    session = ort.InferenceSession(onnx_model_path)

    # Get input name
    input_name = session.get_inputs()[0].name

    # Convert input data to the right format (float32)
    input_data = input_data.astype(np.float32)

    # Get the latent representation (encoder output)
    # The 9-dimensional output from encoder (Linear layer with 9 output features)
    latent = session.run(None, {input_name: input_data})[0]

    return latent

In [None]:
X_test_encoded = extract_latent_features(autoencoder_onnx_path, X_test)

print(f"Latent representation shape: {X_test_encoded.shape}")  # shoudl be (11271, 16)

In [None]:
import joblib


def load_dbocsvm_model(filename):
    """
    Load a DBOCSVM model from disk

    Parameters:
    -----------
    filename : str
        Path to the saved model file

    Returns:
    --------
    DBOCSVM
        The loaded model
    """
    return joblib.load(filename)


dbocsvm_model_path = "saved_models/joblib/dbocsvm_nsl_kdd.joblib"
loaded_dbocsvm_model = load_dbocsvm_model(dbocsvm_model_path)

In [None]:
y_pred = loaded_dbocsvm_model.predict(X_test_encoded)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred, labels=[-1, 1])


def plot_confusion_matrix(cm, labels, title):
    plt.figure(figsize=(5, 4))
    sns.heatmap(
        cm, annot=True, fmt="d", cmap="Blues", xticklabels=labels, yticklabels=labels
    )
    plt.xlabel("Predicted Label")
    plt.ylabel("Actual Label")
    plt.title(title)
    plt.show()


print("Confusion Matrix of ONNX OCSVM Predictions")
plot_confusion_matrix(cm, ["Anomaly", "Normal"], "Confusion Matrix (Anomaly vs Normal)")

In [None]:
from sklearn.metrics import (
    classification_report,
    precision_score,
    recall_score,
    f1_score,
    accuracy_score,
)

print("Classification Report ONNX:")
print(classification_report(y_test, y_pred, target_names=["Anomaly", "Normal"]))

precision = precision_score(y_test, y_pred, pos_label=-1)
recall = recall_score(y_test, y_pred, pos_label=-1)
f1 = f1_score(y_test, y_pred, pos_label=-1)

print(f"Precision: {precision * 100:.2f}%")
print(f"Recall: {recall * 100:.2f}%")
print(f"F1 Score: {f1 * 100:.2f}%")
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")