# Testing

## Importing libraries

In [None]:
import pandas as pd
import time
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
from sklearn.metrics import (
    accuracy_score, balanced_accuracy_score, classification_report,
    confusion_matrix, precision_score, recall_score, f1_score
)
from joblib import load

## Selecting the dataset

In [None]:
datasets = ['binary_cic_ids_2017', 'cic_ids_2017', 'binary_nsl_kdd', 'nsl_kdd', 'binary_unsw_nb15', 'unsw_nb15']

dataset = datasets[0]

## Selecting the classifier

In [None]:
classifiers = ['AB', 'KNN', 'LDA', 'LR', 'NB', 'RF']

classifier = classifiers[0]

## Loading the data

In [None]:
# Load the trained model
model = load(f'../trained-models/{dataset}_{classifier}.joblib')

# Load the test data
test = pd.read_csv(f"../processed-datasets/{dataset}_test_normalized.csv")

## Splitting into X and y

In [None]:
X_test = test.iloc[:, :-1]
y_test = test.iloc[:, -1]

## Testing

In [None]:
# Start time
start_time_test = time.time()

# Make predictions on the test data
y_pred = model.predict(X_test)

# End time
end_time_test = time.time()

# Testing time
testing_time = end_time_test - start_time_test

print(f'Testing Time: {testing_time} seconds')

## Adding the testing time to a DataFrame

In [None]:
# Try to load the CSV file
try:
    times = pd.read_csv(f"../results/{dataset}_testing_times.csv")
except:
    # If the file does not exist, create an empty DataFrame
    times = pd.DataFrame()

In [None]:
new_line = pd.DataFrame({'Classifier': [f'{classifier}'],
                         'Testing Time': round(testing_time, 4)})

times = pd.concat([times, new_line], ignore_index=True)

In [None]:
times

In [None]:
# Saving the DataFrame
times.to_csv(f"../results/{dataset}_testing_times.csv", index=False)

## Model Evaluation

### Accuracy, Balanced Accuracy, and Accuracy per Class

In [None]:
# Accuracy
total_accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {round(total_accuracy, 4)}')

# Balanced Accuracy
balanced_accuracy = balanced_accuracy_score(y_test, y_pred)
print(f'Balanced Accuracy: {round(balanced_accuracy, 4)}')

# Dictionary to store the accuracy of each class
class_accuracies = {}

class_labels = sorted(list(set(y_test)))

# Iterate over all unique classes present in the true labels
for class_label in class_labels:
    # Filter predictions and true labels for the current class
    true_class_indices = [i for i, label in enumerate(y_test) if label == class_label]
    class_predictions = [y_pred[i] for i in true_class_indices]
    true_class_labels = [y_test[i] for i in true_class_indices]
    
    # Calculate accuracy for the current class
    class_accuracy = accuracy_score(true_class_labels, class_predictions)
    
    # Store the class accuracy in the dictionary
    class_accuracies[class_label] = class_accuracy

# Rounding the values in the dictionary
rounded_dictionary = {key: round(value, 4) for key, value in class_accuracies.items()}

# Print the accuracy of each class
for class_label, accuracy in class_accuracies.items():
    print(f'Accuracy of class {class_label}: {accuracy}')

### Precision, Recall, and F1-Score

In [None]:
# Precision, Recall, and F1-score
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f'Precision: {round(precision, 4)}')
print(f'Recall: {round(recall, 4)}')
print(f'F1-score: {round(f1, 4)}\n')

### Metrics per class

In [None]:
# Metrics per class
report = classification_report(y_test, y_pred)
print(f'Metrics per class:\n{report}')

### Confusion Matrix

In [None]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(12, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=class_labels, yticklabels=class_labels)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()

## Adding the metric values to a DataFrame

In [None]:
# Try to load the CSV file
try:
    evaluation = pd.read_csv(f"../results/{dataset}_models_evaluation.csv")
except:
    # If the file does not exist, create an empty DataFrame
    evaluation = pd.DataFrame()

In [None]:
# Add a row to the DataFrame with the provided data
row_data = {
    'Classifier': classifier,
    'Accuracy': round(total_accuracy, 4),
    'Balanced Accuracy': round(balanced_accuracy, 4),
    'Precision': round(precision, 4),
    'Recall': round(recall, 4),
    'F1-Score': round(f1, 4)
}

row_data.update(class_accuracies)

In [None]:
new_line = pd.DataFrame([row_data])

evaluation = pd.concat([evaluation, new_line], ignore_index=True)

In [None]:
evaluation

In [None]:
# Saving the DataFrame
evaluation.to_csv(f"../results/{dataset}_models_evaluation.csv", index=False)