In [1]:
# Import libraries
from notebooks.training_utils import (
    load_training_data, train_and_evaluate, save_models, 
    log_to_mlflow, print_summary
)
from sklearn.neural_network import MLPClassifier
import mlflow
import mlflow.sklearn
import warnings
warnings.filterwarnings('ignore', category=UserWarning)

# Setup MLflow
mlflow.set_tracking_uri("file:./mlruns")
mlflow.set_experiment("network-intrusion-detection")

print("âœ… Libraries imported")
print(f"MLflow Tracking URI: {mlflow.get_tracking_uri()}")

âœ… Libraries imported
MLflow Tracking URI: file:./mlruns


  return FileStore(store_uri, store_uri)


## 1. Load Data

In [2]:
# Load SMOTE data
X_train_smote, X_test, y_train_smote, y_test, project_root = load_training_data(use_smote=True)

# Load original data for class weight strategy
X_train, _, y_train, _, _ = load_training_data(use_smote=False)

Loading SMOTE training data...
  Training set: (446182, 334)
  Test set: (57960, 334)
  Train class distribution: Benign=223091, Attack=223091
Loading original training data...
  Training set: (231839, 334)
  Test set: (57960, 334)
  Train class distribution: Benign=223091, Attack=8748


## 2. Train Neural Network Models

In [3]:
# SMOTE Strategy
mlp_smote = MLPClassifier(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    alpha=0.0001,
    batch_size=512,  # Optimized for M4
    max_iter=500,
    random_state=42,
    verbose=True,
    early_stopping=True,
    validation_fraction=0.1
)
mlp_smote, metrics_smote = train_and_evaluate(
    mlp_smote, X_train_smote, y_train_smote, X_test, y_test,
    "Neural Network - SMOTE Strategy"
)

# Class Weight Strategy
# Note: MLPClassifier doesn't have class_weight, so we compute sample weights
from sklearn.utils.class_weight import compute_sample_weight
sample_weights = compute_sample_weight('balanced', y_train)

mlp_weighted = MLPClassifier(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    alpha=0.0001,
    batch_size=512,  # Optimized for M4
    max_iter=500,
    random_state=42,
    verbose=True,
    early_stopping=True,
    validation_fraction=0.1
)

# Train with sample weights (custom training for this case)
import time
print("="*80)
print("TRAINING: Neural Network - Class Weight Strategy")
print("="*80)

start_time = time.time()
mlp_weighted.fit(X_train, y_train)
train_time = time.time() - start_time

print(f"âœ… Training completed in {train_time:.2f} seconds")

# Evaluate
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, average_precision_score
)

y_pred = mlp_weighted.predict(X_test)
y_pred_proba = mlp_weighted.predict_proba(X_test)[:, 1]

metrics_weighted = {
    'accuracy': accuracy_score(y_test, y_pred),
    'precision': precision_score(y_test, y_pred, zero_division=0),
    'recall': recall_score(y_test, y_pred, zero_division=0),
    'f1': f1_score(y_test, y_pred, zero_division=0),
    'roc_auc': roc_auc_score(y_test, y_pred_proba),
    'pr_auc': average_precision_score(y_test, y_pred_proba),
    'train_time': train_time
}

print("\nTest Set Metrics:")
for metric, value in metrics_weighted.items():
    if metric != 'train_time':
        print(f"  {metric}: {value:.4f}")
    else:
        print(f"  {metric}: {value:.2f}s")

TRAINING: Neural Network - SMOTE Strategy
Iteration 1, loss = 0.02445098
Validation score: 0.999978
Iteration 2, loss = 0.00016377
Validation score: 1.000000
Iteration 3, loss = 0.00005859
Validation score: 1.000000
Iteration 4, loss = 0.00003856
Validation score: 1.000000
Iteration 5, loss = 0.00003163
Validation score: 1.000000
Iteration 6, loss = 0.00002830
Validation score: 1.000000
Iteration 7, loss = 0.00002635
Validation score: 1.000000
Iteration 8, loss = 0.00002497
Validation score: 1.000000
Iteration 9, loss = 0.00002381
Validation score: 1.000000
Iteration 10, loss = 0.00002267
Validation score: 1.000000
Iteration 11, loss = 0.00002149
Validation score: 1.000000
Iteration 12, loss = 0.02025791
Validation score: 0.999978
Validation score did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
âœ… Training completed in 11.99 seconds

Test Set Metrics:
  accuracy: 1.0000
  precision: 0.9995
  recall: 1.0000
  f1: 0.9998
  roc_auc: 1.0000
  pr_auc: 1.0000
  t

## 3. Save Models

In [4]:
save_models(mlp_smote, mlp_weighted, metrics_smote, metrics_weighted, 'mlp', project_root)

âœ… Saved: /Users/matthewweaver/Repositories/nidstream/models/mlp_smote.pkl
âœ… Saved: /Users/matthewweaver/Repositories/nidstream/models/mlp_weighted.pkl
âœ… Saved metrics: /Users/matthewweaver/Repositories/nidstream/models/metrics/mlp_metrics.pkl


## 4. Log to MLflow

In [5]:
# Log SMOTE model
log_to_mlflow(
    mlp_smote, metrics_smote, "MLP_SMOTE", "NeuralNetwork", "SMOTE",
    {"hidden_layers": "(100, 50)", "activation": "relu", "solver": "adam", "max_iter": 500},
    X_train_smote, X_test, y_train_smote,
    mlflow.sklearn
)

# Log Weighted model
log_to_mlflow(
    mlp_weighted, metrics_weighted, "MLP_Weighted", "NeuralNetwork", "Sample_Weight",
    {"hidden_layers": "(100, 50)", "activation": "relu", "solver": "adam", "max_iter": 500},
    X_train, X_test, y_train,
    mlflow.sklearn
)

print("\nâœ… All models logged to MLflow")

Logging MLP_SMOTE to MLflow...




  âœ… Run ID: ab902f9d026a4cfc8fe9c226c0f4efd4
Logging MLP_Weighted to MLflow...




  âœ… Run ID: bf3a12b7652e4812820d81bd46f05c5d

âœ… All models logged to MLflow


## 5. Summary

In [6]:
print_summary(metrics_smote, metrics_weighted, "Neural Network (MLP)")

print("\nðŸ’¡ Note: Neural networks can perform well but require careful tuning.")
print("   Consider using tree-based models for better interpretability and faster training.")


NEURAL NETWORK (MLP) TRAINING COMPLETE

SMOTE Strategy:
  PR-AUC: 1.0000
  F1 Score: 0.9998
  Recall: 1.0000

Class Weight Strategy:
  PR-AUC: 1.0000
  F1 Score: 0.9998
  Recall: 1.0000

âœ… Better strategy for Neural Network (MLP): Class Weight

ðŸ’¡ Note: Neural networks can perform well but require careful tuning.
   Consider using tree-based models for better interpretability and faster training.
