In [1]:
# Import libraries
from notebooks.training_utils import (
    load_training_data, train_and_evaluate, save_models, 
    log_to_mlflow, print_summary
)
import numpy as np
import xgboost as xgb
import mlflow
import mlflow.xgboost

# Setup MLflow
mlflow.set_tracking_uri("file:./mlruns")
mlflow.set_experiment("network-intrusion-detection")

print("✅ Libraries imported")
print(f"MLflow Tracking URI: {mlflow.get_tracking_uri()}")

✅ Libraries imported
MLflow Tracking URI: file:./mlruns


  return FileStore(store_uri, store_uri)


## 1. Load Data

In [2]:
# Load SMOTE data
X_train_smote, X_test, y_train_smote, y_test, project_root = load_training_data(use_smote=True)

# Load original data for scale_pos_weight strategy
X_train, _, y_train, _, _ = load_training_data(use_smote=False)

# Calculate scale_pos_weight
scale_pos_weight = np.sum(y_train == 0) / np.sum(y_train == 1)
print(f"Scale pos weight: {scale_pos_weight:.2f}")

Loading SMOTE training data...
  Training set: (446182, 334)
  Test set: (57960, 334)
  Train class distribution: Benign=223091, Attack=223091
Loading original training data...
  Training set: (231839, 334)
  Test set: (57960, 334)
  Train class distribution: Benign=223091, Attack=8748
Scale pos weight: 25.50


## 2. Train XGBoost Models

In [3]:
# SMOTE Strategy
xgb_smote = xgb.XGBClassifier(
    n_estimators=100,
    max_depth=6,
    learning_rate=0.1,
    random_state=42,
    n_jobs=-1,
    eval_metric='logloss'
)
xgb_smote, metrics_smote = train_and_evaluate(
    xgb_smote, X_train_smote, y_train_smote, X_test, y_test,
    "XGBoost - SMOTE Strategy"
)

# Scale Pos Weight Strategy
xgb_weighted = xgb.XGBClassifier(
    n_estimators=100,
    max_depth=6,
    learning_rate=0.1,
    scale_pos_weight=scale_pos_weight,
    random_state=42,
    n_jobs=-1,
    eval_metric='logloss'
)
xgb_weighted, metrics_weighted = train_and_evaluate(
    xgb_weighted, X_train, y_train, X_test, y_test,
    "XGBoost - Scale Pos Weight Strategy"
)

TRAINING: XGBoost - SMOTE Strategy
✅ Training completed in 4.21 seconds

Test Set Metrics:
  accuracy: 1.0000
  precision: 0.9991
  recall: 1.0000
  f1: 0.9995
  roc_auc: 1.0000
  pr_auc: 1.0000
  train_time: 4.21s
TRAINING: XGBoost - Scale Pos Weight Strategy
✅ Training completed in 2.71 seconds

Test Set Metrics:
  accuracy: 1.0000
  precision: 0.9991
  recall: 1.0000
  f1: 0.9995
  roc_auc: 1.0000
  pr_auc: 1.0000
  train_time: 2.71s


## 3. Save Models

In [4]:
save_models(xgb_smote, xgb_weighted, metrics_smote, metrics_weighted, 'xgb', project_root)

✅ Saved: /Users/matthewweaver/Repositories/nidstream/models/xgb_smote.pkl
✅ Saved: /Users/matthewweaver/Repositories/nidstream/models/xgb_weighted.pkl
✅ Saved metrics: /Users/matthewweaver/Repositories/nidstream/models/xgb_metrics.pkl


## 4. Log to MLflow

In [5]:
# Log SMOTE model
log_to_mlflow(
    xgb_smote, metrics_smote, "XGB_SMOTE", "XGBoost", "SMOTE",
    {"n_estimators": 100, "max_depth": 6, "learning_rate": 0.1},
    X_train_smote, X_test, y_train_smote,
    mlflow.xgboost
)

# Log Weighted model
log_to_mlflow(
    xgb_weighted, metrics_weighted, "XGB_ScalePosWeight", "XGBoost", "Scale_Pos_Weight",
    {"n_estimators": 100, "max_depth": 6, "learning_rate": 0.1, "scale_pos_weight": float(scale_pos_weight)},
    X_train, X_test, y_train,
    mlflow.xgboost
)

print("\n✅ All models logged to MLflow")

Logging XGB_SMOTE to MLflow...




  ✅ Run ID: 2b41b312dbcc492db690f8de36be99b7
Logging XGB_ScalePosWeight to MLflow...




  ✅ Run ID: 6df7d422b4bf4248964d1d11a4bd3397

✅ All models logged to MLflow


## 5. Summary

In [6]:
print_summary(metrics_smote, metrics_weighted, "XGBoost")


XGBOOST TRAINING COMPLETE

SMOTE Strategy:
  PR-AUC: 1.0000
  F1 Score: 0.9995
  Recall: 1.0000

Class Weight Strategy:
  PR-AUC: 1.0000
  F1 Score: 0.9995
  Recall: 1.0000

✅ Better strategy for XGBoost: Class Weight
