In [None]:
# IDS using Isolation Forest on pcap_data.csv
# -------------------------------------------------------------

import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest

# -------------------------------------------------------------
# 1. Load dataset
# -------------------------------------------------------------

df = pd.read_csv(r"C:\Users\ΝΙΚΟΣ\Desktop\pcap_data.csv")

print("Dataset shape:", df.shape)
print("Columns:", df.columns.tolist())
df.head()

# -------------------------------------------------------------
# 2. Preprocessing
# -------------------------------------------------------------

# Convert to numpy
X_raw = df.values

# Normalize
scaler = StandardScaler()
X = scaler.fit_transform(X_raw)

print("Feature matrix shape:", X.shape)

# -------------------------------------------------------------
# 3. Train IDS model (Isolation Forest)
# -------------------------------------------------------------

model = IsolationForest(
    n_estimators=200,
    contamination=0.05,
    random_state=42
)

model.fit(X)

# Predictions
y_pred = model.predict(X)

# -1 = anomaly, 1 = normal
df["prediction"] = y_pred

print(df["prediction"].value_counts())

# -------------------------------------------------------------
# 4. Visualization
# -------------------------------------------------------------

plt.figure(figsize=(12,5))
plt.plot(df["prediction"], marker='o', linestyle='-', markersize=2)
plt.title("IDS Anomaly Detection Output")
plt.xlabel("Flow Index")
plt.ylabel("Normal (1) / Anomaly (-1)")
plt.grid(True)
plt.show()

# -------------------------------------------------------------
# 5. Percentage of anomalies
# -------------------------------------------------------------

anomaly_ratio = (df["prediction"] == -1).mean() * 100
print(f"Anomaly percentage: {anomaly_ratio:.2f}%")

# -------------------------------------------------------------
# 6. Save results
# -------------------------------------------------------------

df.to_csv("ids_output.csv", index=False)
print("Results saved to ids_output.csv")
