In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
# --- Imports ---
import numpy as np
import pandas as pd
from pathlib import Path
import joblib
from sklearn.ensemble import IsolationForest
from sklearn.metrics import (
    classification_report, confusion_matrix, roc_auc_score, f1_score
)

In [5]:
DATA_DIR = Path("/content/drive/MyDrive/zeusOps/data/UNSW-NB15")
MODEL_DIR = Path("/content/drive/MyDrive/zeusOps/models")

# --- Load Preprocessed Data ---
X_train = pd.read_pickle(DATA_DIR / "unsw_x_train_v2.pkl")
X_test  = pd.read_pickle(DATA_DIR / "unsw_x_test_v2.pkl")
y_test  = pd.read_pickle(DATA_DIR / "unsw_y_test_v2.pkl")

# Convert to numpy if needed
X_train = X_train.values if hasattr(X_train, "values") else np.array(X_train)
X_test  = X_test.values if hasattr(X_test, "values") else np.array(X_test)

In [6]:
# Binary labels: 0 = normal (label=7), 1 = attack
y_test_binary = np.where(y_test == 7, 0, 1)

# --- Hyperparameter Grid ---
param_grid = {
    "n_estimators": [200, 400],
    "max_samples": [0.6, 0.8, 1.0],
    "contamination": [0.05, 0.07, 0.1]
}

best_model = None
best_f1 = -1
best_params = {}

In [7]:
# --- Grid Search ---
for n in param_grid["n_estimators"]:
    for ms in param_grid["max_samples"]:
        for c in param_grid["contamination"]:
            print(f"Training IF with n={n}, max_samples={ms}, contamination={c}")

            if_model = IsolationForest(
                n_estimators=n,
                max_samples=ms,
                contamination=c,
                random_state=42,
                n_jobs=-1
            )
            if_model.fit(X_train)

            # Predict: IF outputs {1 = normal, -1 = anomaly}
            y_pred = if_model.predict(X_test)
            y_pred_binary = np.where(y_pred == 1, 0, 1)

            f1 = f1_score(y_test_binary, y_pred_binary)
            print(f"F1 = {f1:.4f}")

            if f1 > best_f1:
                best_f1 = f1
                best_model = if_model
                best_params = {"n_estimators": n, "max_samples": ms, "contamination": c}

print("\nBest Params:", best_params)
print("Best F1 Score:", best_f1)

Training IF with n=200, max_samples=0.6, contamination=0.05
F1 = 0.2462
Training IF with n=200, max_samples=0.6, contamination=0.07
F1 = 0.2531
Training IF with n=200, max_samples=0.6, contamination=0.1
F1 = 0.2478
Training IF with n=200, max_samples=0.8, contamination=0.05
F1 = 0.2391
Training IF with n=200, max_samples=0.8, contamination=0.07
F1 = 0.2501
Training IF with n=200, max_samples=0.8, contamination=0.1
F1 = 0.2454
Training IF with n=200, max_samples=1.0, contamination=0.05
F1 = 0.2409
Training IF with n=200, max_samples=1.0, contamination=0.07
F1 = 0.2511
Training IF with n=200, max_samples=1.0, contamination=0.1
F1 = 0.2475
Training IF with n=400, max_samples=0.6, contamination=0.05
F1 = 0.2486
Training IF with n=400, max_samples=0.6, contamination=0.07
F1 = 0.2560
Training IF with n=400, max_samples=0.6, contamination=0.1
F1 = 0.2510
Training IF with n=400, max_samples=0.8, contamination=0.05
F1 = 0.2307
Training IF with n=400, max_samples=0.8, contamination=0.07
F1 = 0.2

In [8]:
# --- Final Evaluation with best model ---
y_pred_best = best_model.predict(X_test)
y_pred_binary_best = np.where(y_pred_best == 1, 0, 1)

print("\nConfusion Matrix:")
print(confusion_matrix(y_test_binary, y_pred_binary_best))
print("\nClassification Report:")
print(classification_report(y_test_binary, y_pred_binary_best, digits=4))
print("ROC-AUC:", roc_auc_score(y_test_binary, y_pred_binary_best))

# --- Save Best Model ---
joblib.dump(best_model, MODEL_DIR / "if_tuned.pkl")



Confusion Matrix:
[[368810  23144]
 [ 13606   6323]]

Classification Report:
              precision    recall  f1-score   support

           0     0.9644    0.9410    0.9525    391954
           1     0.2146    0.3173    0.2560     19929

    accuracy                         0.9108    411883
   macro avg     0.5895    0.6291    0.6043    411883
weighted avg     0.9281    0.9108    0.9188    411883

ROC-AUC: 0.6291142927881331


['/content/drive/MyDrive/zeusOps/models/if_tuned.pkl']