In [7]:
from notebooks.training_utils import (
    load_training_data, train_and_evaluate, save_models, 
    log_to_mlflow, print_summary
)
from sklearn.linear_model import LogisticRegression
import mlflow
import mlflow.sklearn

# Setup MLflow
mlflow.set_tracking_uri("file:./mlruns")
mlflow.set_experiment("network-intrusion-detection")

print("✅ Libraries imported")
print(f"MLflow Tracking URI: {mlflow.get_tracking_uri()}")

✅ Libraries imported
MLflow Tracking URI: file:./mlruns


## 1. Load Data

In [8]:
# Load SMOTE data
X_train_smote, X_test, y_train_smote, y_test, project_root = load_training_data(use_smote=True)

# Load original data for class weight strategy
X_train, _, y_train, _, _ = load_training_data(use_smote=False)

Loading SMOTE training data...
  Training set: (446182, 334)
  Test set: (57960, 334)
  Train class distribution: Benign=223091, Attack=223091
Loading original training data...
  Training set: (231839, 334)
  Test set: (57960, 334)
  Train class distribution: Benign=223091, Attack=8748


## 2. Train Logistic Regression Models

In [9]:
# SMOTE Strategy
lr_smote = LogisticRegression(max_iter=1000, random_state=42, n_jobs=-1)
lr_smote, metrics_smote = train_and_evaluate(
    lr_smote, X_train_smote, y_train_smote, X_test, y_test,
    "Logistic Regression - SMOTE Strategy"
)

# Class Weight Strategy  
lr_weighted = LogisticRegression(max_iter=1000, random_state=42, n_jobs=-1, 
                                  class_weight='balanced')
lr_weighted, metrics_weighted = train_and_evaluate(
    lr_weighted, X_train, y_train, X_test, y_test,
    "Logistic Regression - Class Weight Strategy"
)

TRAINING: Logistic Regression - SMOTE Strategy
✅ Training completed in 8.98 seconds

Test Set Metrics:
  accuracy: 0.9999
  precision: 0.9982
  recall: 1.0000
  f1: 0.9991
  roc_auc: 1.0000
  pr_auc: 0.9997
  train_time: 8.98s
TRAINING: Logistic Regression - Class Weight Strategy
✅ Training completed in 4.46 seconds

Test Set Metrics:
  accuracy: 0.9999
  precision: 0.9986
  recall: 1.0000
  f1: 0.9993
  roc_auc: 1.0000
  pr_auc: 1.0000
  train_time: 4.46s


## 3. Save Models

In [10]:
save_models(lr_smote, lr_weighted, metrics_smote, metrics_weighted, 'lr', project_root)

✅ Saved: /Users/matthewweaver/Repositories/nidstream/models/lr_smote.pkl
✅ Saved: /Users/matthewweaver/Repositories/nidstream/models/lr_weighted.pkl
✅ Saved metrics: /Users/matthewweaver/Repositories/nidstream/models/lr_metrics.pkl


## 4. Log to MLflow

In [11]:
# Log SMOTE model
log_to_mlflow(
    lr_smote, metrics_smote, "LR_SMOTE", "LogisticRegression", "SMOTE",
    {"max_iter": 1000}, X_train_smote, X_test, y_train_smote,
    mlflow.sklearn
)

# Log Weighted model
log_to_mlflow(
    lr_weighted, metrics_weighted, "LR_Weighted", "LogisticRegression", "Class_Weight",
    {"max_iter": 1000, "class_weight": "balanced"}, X_train, X_test, y_train,
    mlflow.sklearn
)

print("\n✅ All models logged to MLflow")

Logging LR_SMOTE to MLflow...




  ✅ Run ID: d2ce6a8e11c343988962c66b199c5fa3
Logging LR_Weighted to MLflow...




  ✅ Run ID: 28eaed66932d4597bac1aeb03ac9f9ce

✅ All models logged to MLflow


## 5. Summary

In [12]:
print_summary(metrics_smote, metrics_weighted, "Logistic Regression")


LOGISTIC REGRESSION TRAINING COMPLETE

SMOTE Strategy:
  PR-AUC: 0.9997
  F1 Score: 0.9991
  Recall: 1.0000

Class Weight Strategy:
  PR-AUC: 1.0000
  F1 Score: 0.9993
  Recall: 1.0000

✅ Better strategy for Logistic Regression: Class Weight
