#### Imports

In [1]:
import joblib
import json
from pathlib import Path
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score, precision_recall_curve, auc
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler

# TensorFlow / Keras for MLP & Autoencoder
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam

2026-01-20 19:42:49.811004: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2026-01-20 19:43:14.613088: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2026-01-20 19:43:26.383614: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.


In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
# -----------------------------
# Helper function for PR AUC
# -----------------------------
def compute_pr_auc(y_true, y_scores):
    from sklearn.metrics import precision_recall_curve, auc
    precision, recall, _ = precision_recall_curve(y_true, y_scores)
    return auc(recall, precision)


####  Ensure folders exist

In [4]:
ARTIFACTS_DIR = Path("artifacts")
ARTIFACTS_DIR.mkdir(exist_ok=True)

MODELS_DIR = Path("models")
MODELS_DIR.mkdir(exist_ok=True)

#### Load preprocessed data

In [5]:
data = joblib.load(ARTIFACTS_DIR / "data_splits.pkl")
X_train, y_train = data["X_train"], data["y_train"]
X_val, y_val     = data["X_val"], data["y_val"]

# Scale numeric features for MLP / Autoencoder / SVM
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Save scaler for deployment
joblib.dump(scaler, MODELS_DIR / "scaler.pkl")

['models/scaler.pkl']

In [6]:
# -----------------------------
# Dictionary to store metrics
# -----------------------------
metrics_deep_anomaly = {}


## MLP Neural Network

In [8]:
mlp = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])
mlp.compile(optimizer=Adam(0.001), loss='binary_crossentropy', metrics=['AUC'])

mlp.fit(X_train_scaled, y_train, validation_data=(X_val_scaled, y_val), epochs=20, batch_size=32, verbose=0)

y_val_proba_mlp = mlp.predict(X_val_scaled).ravel()
y_val_pred_mlp = (y_val_proba_mlp >= 0.5).astype(int)

metrics_deep_anomaly["MLP"] = {
    "roc_auc": roc_auc_score(y_val, y_val_proba_mlp),
    "pr_auc": compute_pr_auc(y_val, y_val_proba_mlp),
    "f1": f1_score(y_val, y_val_pred_mlp),
    "precision": precision_score(y_val, y_val_pred_mlp),
    "recall": recall_score(y_val, y_val_pred_mlp)
}

mlp.save(MODELS_DIR / "mlp.h5")
print("MLP metrics:", metrics_deep_anomaly["MLP"])

2026-01-20 19:46:45.001900: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


[1m191/191[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 687us/step




MLP metrics: {'roc_auc': 0.9078172261301838, 'pr_auc': 0.553612900240347, 'f1': 0.5503432494279176, 'precision': 0.492827868852459, 'recall': 0.6230569948186528}


## IsolationForest (Anomaly Detection)

In [9]:
iso = IsolationForest(n_estimators=200, contamination=0.1, random_state=42)
iso.fit(X_train_scaled)

# Anomaly score (flip sign so higher = more likely positive)
y_val_scores_iso = -iso.decision_function(X_val_scaled)

# Convert scores to binary predictions using median threshold
threshold = np.median(y_val_scores_iso)
y_val_pred_iso = (y_val_scores_iso >= threshold).astype(int)

metrics_deep_anomaly["IsolationForest"] = {
    "roc_auc": roc_auc_score(y_val, y_val_scores_iso),
    "pr_auc": compute_pr_auc(y_val, y_val_scores_iso),
    "f1": f1_score(y_val, y_val_pred_iso),
    "precision": precision_score(y_val, y_val_pred_iso),
    "recall": recall_score(y_val, y_val_pred_iso)
}

joblib.dump(iso, MODELS_DIR / "isolation_forest.pkl")
print("IsolationForest metrics:", metrics_deep_anomaly["IsolationForest"])

IsolationForest metrics: {'roc_auc': 0.6029977655197114, 'pr_auc': 0.21658276280320338, 'f1': 0.2486910994764398, 'precision': 0.15583989501312337, 'recall': 0.6152849740932642}


## One-Class SVM (Anomaly Detection)

In [10]:
ocsvm = OneClassSVM(kernel='rbf', gamma='scale', nu=0.05)
ocsvm.fit(X_train_scaled)

y_val_scores_ocsvm = -ocsvm.decision_function(X_val_scaled)
threshold_ocsvm = np.median(y_val_scores_ocsvm)
y_val_pred_ocsvm = (y_val_scores_ocsvm >= threshold_ocsvm).astype(int)

metrics_deep_anomaly["OneClassSVM"] = {
    "roc_auc": roc_auc_score(y_val, y_val_scores_ocsvm),
    "pr_auc": compute_pr_auc(y_val, y_val_scores_ocsvm),
    "f1": f1_score(y_val, y_val_pred_ocsvm),
    "precision": precision_score(y_val, y_val_pred_ocsvm),
    "recall": recall_score(y_val, y_val_pred_ocsvm)
}

joblib.dump(ocsvm, MODELS_DIR / "one_class_svm.pkl")
print("One-Class SVM metrics:", metrics_deep_anomaly["OneClassSVM"])

One-Class SVM metrics: {'roc_auc': 0.6043174324497923, 'pr_auc': 0.19582352711694906, 'f1': 0.2591623036649215, 'precision': 0.1624015748031496, 'recall': 0.6411917098445595}


## Autoencoder (Unsupervised)

In [12]:
input_dim = X_train_scaled.shape[1]
encoding_dim = 32

input_layer = Input(shape=(input_dim,))
encoded = Dense(64, activation='relu')(input_layer)
encoded = Dense(encoding_dim, activation='relu')(encoded)
decoded = Dense(64, activation='relu')(encoded)
decoded = Dense(input_dim, activation='linear')(decoded)

autoencoder = Model(inputs=input_layer, outputs=decoded)
autoencoder.compile(optimizer='adam', loss='mse')

autoencoder.fit(X_train_scaled, X_train_scaled, 
                validation_data=(X_val_scaled, X_val_scaled),
                epochs=20, batch_size=32, verbose=0)

# Reconstruction error as anomaly score
X_val_pred = autoencoder.predict(X_val_scaled)
reconstruction_error = np.mean((X_val_scaled - X_val_pred)**2, axis=1)

threshold_ae = np.median(reconstruction_error)
y_val_pred_ae = (reconstruction_error >= threshold_ae).astype(int)

metrics_deep_anomaly["Autoencoder"] = {
    "roc_auc": roc_auc_score(y_val, reconstruction_error),
    "pr_auc": compute_pr_auc(y_val, reconstruction_error),
    "f1": f1_score(y_val, y_val_pred_ae),
    "precision": precision_score(y_val, y_val_pred_ae),
    "recall": recall_score(y_val, y_val_pred_ae)
}

autoencoder.save(MODELS_DIR / "autoencoder.h5")
print("Autoencoder metrics:", metrics_deep_anomaly["Autoencoder"])

# -----------------------------
# Save metrics to JSON
# -----------------------------
with open(ARTIFACTS_DIR / "metrics_deep_anomaly.json", "w") as f:
    json.dump(metrics_deep_anomaly, f, indent=4)

[1m191/191[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 775us/step




Autoencoder metrics: {'roc_auc': 0.6452901223514207, 'pr_auc': 0.22554583633013764, 'f1': 0.268586387434555, 'precision': 0.16830708661417323, 'recall': 0.6645077720207254}


In [13]:
print("✅ Deep Learning and anomaly models trained, metrics and models saved")

✅ Deep Learning and anomaly models trained, metrics and models saved


✅ Key Notes

MLP → simple feedforward neural network for binary classification.

IsolationForest & One-Class SVM → unsupervised anomaly detection; anomaly scores converted to binary using median threshold.

Autoencoder → reconstruction error used as anomaly score.

Metrics → ROC AUC, PR AUC, F1, Precision, Recall

Saved artifacts:

artifacts/metrics_deep_anomaly.json

models/mlp.h5

models/isolation_forest.pkl

models/one_class_svm.pkl

models/autoencoder.h5

Scaler saved → ensures all downstream notebooks and deployment use the same preprocessing.