# Notebook 05: Visualization Analysis

This notebook supports the results and discussion sections of the paper titled:

**Performance Assessment of Machine Learning Models for Network Anomaly Detection: A Case Study with CICIDS2017**

We visualize the results of anomaly detection using multiple models (Isolation Forest, One-Class SVM, K-Means, LOF) across protocols (HTTP, DNS, DHCP, BROWSER).


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import os

sns.set(style='whitegrid')
plt.rcParams['figure.figsize'] = (10, 6)

# Load data
df = pd.read_csv('output_packets_data.csv')
df.head()

## 1. Anomaly Count per Protocol and Model

In [None]:
# Count anomalies by protocol and model
anomaly_counts = df[df['anomaly'] == 1].groupby(['protocol', 'model']).size().reset_index(name='count')

# Barplot
sns.barplot(data=anomaly_counts, x='protocol', y='count', hue='model')
plt.title('Anomalies Detected per Protocol by Model')
plt.ylabel('Anomaly Count')
plt.xlabel('Protocol')
plt.legend(title='Model')
plt.tight_layout()
plt.show()

## 2. Classification Report Comparison

In [None]:
# Assuming classification reports were saved in a dictionary format
# Example: {'model': 'IF', 'precision': 0.9, 'recall': 0.85, 'f1': 0.87}
metrics = [
    {'model': 'IF', 'precision': 0.91, 'recall': 0.84, 'f1': 0.87},
    {'model': 'SVM', 'precision': 0.95, 'recall': 0.78, 'f1': 0.86},
    {'model': 'KMeans', 'precision': 0.89, 'recall': 0.92, 'f1': 0.90},
    {'model': 'LOF', 'precision': 0.80, 'recall': 0.65, 'f1': 0.71}
]

metric_df = pd.DataFrame(metrics)
metric_df_melted = metric_df.melt(id_vars='model', var_name='metric', value_name='score')

sns.barplot(data=metric_df_melted, x='model', y='score', hue='metric')
plt.title('Model Performance Comparison')
plt.ylabel('Score')
plt.xlabel('Model')
plt.ylim(0, 1)
plt.tight_layout()
plt.show()