In [10]:
# BEFORE PERTURBATIONS

import os
import numpy as np
import pandas as pd
from catboost import CatBoostClassifier

# Directory paths
models_dir = "../models"
data_dir = "../data/processed"

# Loading models
models = {file.split('_model.cbm')[0]: CatBoostClassifier().load_model(os.path.join(models_dir, file))
          for file in os.listdir(models_dir) if file.endswith('.cbm')}

# Testing models
accuracies = []
for model_name, model in models.items():
    # File name of the test data corresponds to the model name
    test_file_name = f"{model_name}_processed.csv"
    test_file_path = os.path.join(data_dir, test_file_name)
    
    # Loading test data
    X_test = pd.read_csv(test_file_path)
    y_test = np.ones(X_test.shape[0])  # Assuming all samples are malicious, so y_test should be all ones
    
    # Model prediction
    predictions = model.predict(X_test)
    
    # Calculating accuracy
    accuracy = np.mean(predictions == y_test)
    accuracies.append(accuracy)
    
    # Detailed logging
    print(f"Testing {model_name}:")
    print(f"Accuracy: {accuracy:.5%}")
    print(f"Predicted distribution: {np.unique(predictions, return_counts=True)}")
    print(f"Number of samples: {len(predictions)}")
    print(f"Number of malicious (1) predictions: {np.sum(predictions == 1)}")
    print(f"Number of benign (0) predictions: {np.sum(predictions == 0)}\n")

# Average accuracy
mean_accuracy = np.mean(accuracies)
print(f"\nAverage accuracy across all test cases: {mean_accuracy:.5%}")


Testing botnet-capture-20110815-fast-flux:
Accuracy: 100.00000%
Predicted distribution: (array([1]), array([887]))
Number of samples: 887
Number of malicious (1) predictions: 887
Number of benign (0) predictions: 0

Testing botnet-capture-20110812-rbot:
Accuracy: 99.96874%
Predicted distribution: (array([0, 1]), array([   2, 6396]))
Number of samples: 6398
Number of malicious (1) predictions: 6396
Number of benign (0) predictions: 2

Testing botnet-capture-20110815-fast-flux-2:
Accuracy: 100.00000%
Predicted distribution: (array([1]), array([37718]))
Number of samples: 37718
Number of malicious (1) predictions: 37718
Number of benign (0) predictions: 0

Testing botnet-capture-20110818-bot-2:
Accuracy: 100.00000%
Predicted distribution: (array([1]), array([259]))
Number of samples: 259
Number of malicious (1) predictions: 259
Number of benign (0) predictions: 0

Testing botnet-capture-20110815-rbot-dos:
Accuracy: 100.00000%
Predicted distribution: (array([1]), array([215]))
Number of sa

In [11]:
# for control

df_norm = pd.read_csv("../data/processed/CTU13_Normal_Traffic_20.csv")

X_test = df_norm
y_test = np.zeros(X_test.shape[0])

# Model prediction
predictions = model.predict(X_test)
    
    # Calculating accuracy
accuracy = np.mean(predictions == y_test)
accuracies.append(accuracy)
    
    # Detailed logging

print(f"Accuracy: {accuracy:.5%}")
print(f"Predicted distribution: {np.unique(predictions, return_counts=True)}")
print(f"Number of samples: {len(predictions)}")
print(f"Number of malicious (1) predictions: {np.sum(predictions == 1)}")
print(f"Number of benign (0) predictions: {np.sum(predictions == 0)}\n")


Accuracy: 99.98124%
Predicted distribution: (array([0, 1]), array([10661,     2]))
Number of samples: 10663
Number of malicious (1) predictions: 2
Number of benign (0) predictions: 10661

