In [1]:
# Import libraries
from notebooks.training_utils import (
    load_training_data, train_and_evaluate, save_models, 
    log_to_mlflow, print_summary
)
from sklearn.ensemble import RandomForestClassifier
import mlflow
import mlflow.sklearn

# Setup MLflow
mlflow.set_tracking_uri("file:./mlruns")
mlflow.set_experiment("network-intrusion-detection")

print("✅ Libraries imported")
print(f"MLflow Tracking URI: {mlflow.get_tracking_uri()}")

✅ Libraries imported
MLflow Tracking URI: file:./mlruns


  return FileStore(store_uri, store_uri)


## 1. Load Data

In [2]:
# Load SMOTE data
X_train_smote, X_test, y_train_smote, y_test, project_root = load_training_data(use_smote=True)

# Load original data for class weight strategy
X_train, _, y_train, _, _ = load_training_data(use_smote=False)

Loading SMOTE training data...
  Training set: (446182, 334)
  Test set: (57960, 334)
  Train class distribution: Benign=223091, Attack=223091
Loading original training data...
  Training set: (231839, 334)
  Test set: (57960, 334)
  Train class distribution: Benign=223091, Attack=8748


## 2. Train Random Forest Models

In [3]:
# SMOTE Strategy
rf_smote = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    random_state=42,
    n_jobs=-1
)
rf_smote, metrics_smote = train_and_evaluate(
    rf_smote, X_train_smote, y_train_smote, X_test, y_test,
    "Random Forest - SMOTE Strategy"
)

# Class Weight Strategy
rf_weighted = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    random_state=42,
    n_jobs=-1,
    class_weight='balanced'
)
rf_weighted, metrics_weighted = train_and_evaluate(
    rf_weighted, X_train, y_train, X_test, y_test,
    "Random Forest - Class Weight Strategy"
)

TRAINING: Random Forest - SMOTE Strategy
✅ Training completed in 8.55 seconds

Test Set Metrics:
  accuracy: 0.9960
  precision: 0.9048
  recall: 1.0000
  f1: 0.9500
  roc_auc: 1.0000
  pr_auc: 1.0000
  train_time: 8.55s
TRAINING: Random Forest - Class Weight Strategy
✅ Training completed in 4.46 seconds

Test Set Metrics:
  accuracy: 0.9919
  precision: 0.8240
  recall: 1.0000
  f1: 0.9035
  roc_auc: 1.0000
  pr_auc: 0.9999
  train_time: 4.46s


## 3. Save Models

In [4]:
save_models(rf_smote, rf_weighted, metrics_smote, metrics_weighted, 'rf', project_root)

✅ Saved: /Users/matthewweaver/Repositories/nidstream/models/rf_smote.pkl
✅ Saved: /Users/matthewweaver/Repositories/nidstream/models/rf_weighted.pkl
✅ Saved metrics: /Users/matthewweaver/Repositories/nidstream/models/rf_metrics.pkl


## 4. Log to MLflow

In [5]:
# Log SMOTE model
log_to_mlflow(
    rf_smote, metrics_smote, "RF_SMOTE", "RandomForest", "SMOTE",
    {"n_estimators": 100, "max_depth": 10},
    X_train_smote, X_test, y_train_smote,
    mlflow.sklearn
)

# Log Weighted model
log_to_mlflow(
    rf_weighted, metrics_weighted, "RF_Weighted", "RandomForest", "Class_Weight",
    {"n_estimators": 100, "max_depth": 10, "class_weight": "balanced"},
    X_train, X_test, y_train,
    mlflow.sklearn
)

print("\n✅ All models logged to MLflow")

Logging RF_SMOTE to MLflow...




  ✅ Run ID: 8c6680de25af489bb4e9c67d45685e29
Logging RF_Weighted to MLflow...




  ✅ Run ID: 53b41a442bb9473d88b6f88b62a71ad8

✅ All models logged to MLflow


## 5. Summary

In [6]:
print_summary(metrics_smote, metrics_weighted, "Random Forest")


RANDOM FOREST TRAINING COMPLETE

SMOTE Strategy:
  PR-AUC: 1.0000
  F1 Score: 0.9500
  Recall: 1.0000

Class Weight Strategy:
  PR-AUC: 0.9999
  F1 Score: 0.9035
  Recall: 1.0000

✅ Better strategy for Random Forest: SMOTE
