In [1]:
# Import libraries
from notebooks.training_utils import (
    load_training_data, train_and_evaluate, save_models, 
    log_to_mlflow, print_summary
)
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
import mlflow
import mlflow.sklearn

# Setup MLflow
mlflow.set_tracking_uri("file:./mlruns")
mlflow.set_experiment("network-intrusion-detection")

print("✅ Libraries imported")
print(f"MLflow Tracking URI: {mlflow.get_tracking_uri()}")

✅ Libraries imported
MLflow Tracking URI: file:./mlruns


  return FileStore(store_uri, store_uri)


## 1. Load Data

In [2]:
# Load SMOTE data
X_train_smote, X_test, y_train_smote, y_test, project_root = load_training_data(use_smote=True)

# Load original data for class weight strategy
X_train, _, y_train, _, _ = load_training_data(use_smote=False)

print("\n✅ LinearSVC trains much faster than RBF kernel SVM")

Loading SMOTE training data...
  Training set: (446182, 334)
  Test set: (57960, 334)
  Train class distribution: Benign=223091, Attack=223091
Loading original training data...
  Training set: (231839, 334)
  Test set: (57960, 334)
  Train class distribution: Benign=223091, Attack=8748

✅ LinearSVC trains much faster than RBF kernel SVM


## 2. Train Linear SVM Models

Linear SVM is 100x faster than RBF kernel SVM and performs well on high-dimensional data.

In [3]:
# SMOTE Strategy
svm_smote_base = LinearSVC(
    C=1.0,
    max_iter=2000,
    dual='auto',
    random_state=42,
    verbose=1
)
# Wrap with CalibratedClassifierCV to get predict_proba
svm_smote = CalibratedClassifierCV(svm_smote_base, cv=3)
svm_smote, metrics_smote = train_and_evaluate(
    svm_smote, X_train_smote, y_train_smote, X_test, y_test,
    "LinearSVM - SMOTE Strategy"
)

# Class Weight Strategy
svm_weighted_base = LinearSVC(
    C=1.0,
    max_iter=2000,
    dual='auto',
    class_weight='balanced',
    random_state=42,
    verbose=1
)
# Wrap with CalibratedClassifierCV to get predict_proba
svm_weighted = CalibratedClassifierCV(svm_weighted_base, cv=3)
svm_weighted, metrics_weighted = train_and_evaluate(
    svm_weighted, X_train, y_train, X_test, y_test,
    "LinearSVM - Class Weight Strategy"
)

TRAINING: LinearSVM - SMOTE Strategy
[LibLinear]iter  1 act 2.427e+05 pre 2.365e+05 delta 4.964e-01 f 2.975e+05 |g| 2.276e+06 CG   5
cg reaches trust region boundary
iter  2 act 2.866e+04 pre 2.517e+04 delta 6.083e-01 f 5.476e+04 |g| 1.624e+05 CG   8
cg reaches trust region boundary
iter  3 act 1.544e+04 pre 1.257e+04 delta 9.876e-01 f 2.610e+04 |g| 5.806e+04 CG   6
cg reaches trust region boundary
iter  4 act 1.020e+04 pre 1.019e+04 delta 9.890e-01 f 1.067e+04 |g| 1.224e+05 CG  12
iter  5 act 2.272e+02 pre 2.141e+02 delta 9.890e-01 f 4.668e+02 |g| 5.861e+04 CG   5
iter  6 act 9.841e+01 pre 8.981e+01 delta 9.890e-01 f 2.396e+02 |g| 1.079e+04 CG  12
iter  7 act -3.614e+03 pre 8.943e+01 delta 6.646e-02 f 1.412e+02 |g| 3.346e+03 CG  19
cg reaches trust region boundary
iter  7 act -5.028e+03 pre 5.066e+01 delta 1.661e-02 f 1.412e+02 |g| 3.346e+03 CG   8
cg reaches trust region boundary
iter  7 act -7.515e+02 pre 2.043e+01 delta 4.153e-03 f 1.412e+02 |g| 3.346e+03 CG   3
cg reaches trust re

## 3. Save Models

In [4]:
save_models(svm_smote, svm_weighted, metrics_smote, metrics_weighted, 'svm', project_root)

✅ Saved: /Users/matthewweaver/Repositories/nidstream/models/svm_smote.pkl
✅ Saved: /Users/matthewweaver/Repositories/nidstream/models/svm_weighted.pkl
✅ Saved metrics: /Users/matthewweaver/Repositories/nidstream/models/metrics/svm_metrics.pkl


## 4. Log to MLflow

In [5]:
# Log SMOTE model
log_to_mlflow(
    svm_smote, metrics_smote, "LinearSVM_SMOTE", "LinearSVM", "SMOTE",
    {"kernel": "linear", "C": 1.0, "max_iter": 2000},
    X_train_smote, X_test, y_train_smote,
    mlflow.sklearn
)

# Log Weighted model
log_to_mlflow(
    svm_weighted, metrics_weighted, "LinearSVM_Weighted", "LinearSVM", "Class_Weight",
    {"kernel": "linear", "C": 1.0, "max_iter": 2000, "class_weight": "balanced"},
    X_train, X_test, y_train,
    mlflow.sklearn
)

print("\n✅ All models logged to MLflow")

Logging LinearSVM_SMOTE to MLflow...




  ✅ Run ID: 7e850aa69a414ecd84c85e84a79a1b1f
Logging LinearSVM_Weighted to MLflow...




  ✅ Run ID: a73b5d9f8c35444d92ef901c318a8d34

✅ All models logged to MLflow


## 5. Summary

In [6]:
print_summary(metrics_smote, metrics_weighted, "LinearSVM")

print("\n💡 LinearSVM is optimized for large datasets and trains ~100x faster than RBF kernel SVM.")
print("   It performs well on high-dimensional data like network traffic features.")


LINEARSVM TRAINING COMPLETE

SMOTE Strategy:
  PR-AUC: 1.0000
  F1 Score: 0.9998
  Recall: 1.0000

Class Weight Strategy:
  PR-AUC: 1.0000
  F1 Score: 0.9998
  Recall: 1.0000

✅ Better strategy for LinearSVM: Class Weight

💡 LinearSVM is optimized for large datasets and trains ~100x faster than RBF kernel SVM.
   It performs well on high-dimensional data like network traffic features.
