In [3]:
import os
import sys

parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(parent_dir)

import pandas as pd
from src.data import load_scaler
from src.cluster import load_kmeans_model
from src.eval import final_run
from src.model import AutoEncoder
import torch
import numpy as np
import torch.nn.functional as F
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix




In [19]:
def load_config(cluster_no):
    config_path = f'../models/ae_cluster_{cluster_no}/ae_cluster_{cluster_no}_config.txt'
    config = {}
    with open(config_path, "r") as f:
        for line in f:
            if ":" in line:
                key, value = line.split(":", 1)
                key = key.strip()
                value = value.strip()

                # Try to convert to int/float, otherwise keep string
                if value.isdigit():
                    value = int(value)
                else:
                    try:
                        value = float(value)
                    except ValueError:
                        pass

                config[key] = value
    return config


In [None]:
thresholds = [0.5, 0.5, 0.5, 0.5]   # threshold for each cluster
tune_data_path='../data/tune_data.csv'
model_paths = '../checkpoints/'
#def final_run(thresholds=[0.5, 0.5, 0.5, 0.5], tune_data_path='../data/tune_data.csv'):

# -- Load data --
df = pd.read_csv(tune_data_path)

# Separate features and labels
X = df.drop(columns=["Class"]).values
y = df["Class"].values 

# -- Scale & cluster data --
scaler = load_scaler()
X_scaled = scaler.transform(X)   

kmeans_model = load_kmeans_model()
cluster_labels = kmeans_model.predict(X_scaled)

# Array for global predictions
y_pred = np.zeros_like(y)
cluster_metrics = {}

for i, threshold in enumerate(thresholds):      
    print(f"Evaluating for cluster {i} with threshold {threshold}")
    
    # mask for points in cluster i
    mask = (cluster_labels == i)

    X_cluster = X_scaled[mask]
    y_cluster = y[mask]

    # -- Load for this cluster --
    weights_path = f'../models/ae_cluster_{i}/ae_cluster_{i}.pt'
    weights = torch.load(weights_path, map_location=torch.device('cpu'))

    # -- Load config --
    cfg = load_config(cluster_no=i)

    # Initialize model and load weights
    model = AutoEncoder(in_dim=X_cluster.shape[1], hidden_units=cfg['hidden_dim'], latent_features=cfg['latent'], num_layers=cfg['num_layers'])
    model.load_state_dict(weights)
    model.eval()

    # Forward pass and compute reconstruction error
    X_cluster_t = torch.tensor(X_cluster, dtype=torch.float32)

    with torch.no_grad():
        out = model(X_cluster_t)          # out is a dict: {'z': ..., 'x_hat': ...}
        reconstructed = out['x_hat']      # (batch_size, in_dim)
        reconstruction_error = F.mse_loss(
            reconstructed,
            X_cluster_t,
            reduction='none'
        ).mean(dim=1)  # mean over features â†’ one error per sample
    
    # -- Thresholding --
    reconstruction_error_np = reconstruction_error.cpu().numpy()
    pred_cluster = (reconstruction_error_np > threshold).astype(int)

    # Put the predictions back into the global array
    y_pred[mask] = pred_cluster

    acc_c = accuracy_score(y_cluster, pred_cluster)
    prec_c = precision_score(y_cluster, pred_cluster, zero_division=0)
    rec_c = recall_score(y_cluster, pred_cluster, zero_division=0)
    f1_c = f1_score(y_cluster, pred_cluster, zero_division=0)

    cluster_metrics[i] = {
        "accuracy": acc_c,
        "precision": prec_c,
        "recall": rec_c,
        "f1": f1_c,
        "n_samples": len(y_cluster)
    }

    print(f"  Cluster {i} - n={len(y_cluster)}")
    print(f"    Accuracy : {acc_c:.4f}")
    print(f"    Precision: {prec_c:.4f}")
    print(f"    Recall   : {rec_c:.4f}")
    print(f"    F1       : {f1_c:.4f}")

# --- Global metrics over all clusters ---
acc = accuracy_score(y, y_pred)
prec = precision_score(y, y_pred, zero_division=0)
rec = recall_score(y, y_pred, zero_division=0)
f1 = f1_score(y, y_pred, zero_division=0)
cm = confusion_matrix(y, y_pred)

error_rate = 1.0 - acc

print("\n=== Overall metrics ===")
print(f"Accuracy   : {acc:.4f}")
print(f"Error rate : {error_rate:.4f}")
print(f"Precision  : {prec:.4f}")
print(f"Recall     : {rec:.4f}")
print(f"F1-score   : {f1:.4f}")
print("Confusion matrix [ [TN FP], [FN TP] ]:")
print(cm)



Evaluating for cluster 0 with threshold 0.5
Evaluating for cluster 1 with threshold 0.5
Evaluating for cluster 2 with threshold 0.5
Evaluating for cluster 3 with threshold 0.5
  Cluster 3 - n=718
    Accuracy : 0.9485
    Precision: 0.7339
    Recall   : 0.9091
    F1       : 0.8122

=== Overall metrics ===
Accuracy   : 0.7439
Error rate : 0.2561
Precision  : 0.4307
Recall     : 0.8720
F1-score   : 0.5766
Confusion matrix [ [TN FP], [FN TP] ]:
[[1401  567]
 [  63  429]]


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [None]:
# Load the model
    model = Autoencoder(in_dim=X_scaled.shape[1], hidden_units=64, latent_features=2, num_layers=1)
    model.load_state_dict(weights)

    # Evaluate model on X_cluster
    X_cluster_t = torch.tensor(X_cluster, dtype=torch.float32)
    model.eval()

    # No gradients needed
    with torch.no_grad():
        reconstructed = model(X_cluster_t)
        


# # Reconstruction error per sample
    reconstruction_error = F.mse_loss(
            reconstructed,                  
            X_cluster_t,                  
            reduction='none'
        ).mean(dim=1)   # mean across features for each sample  
    with torch.no_grad():
        outputs = model(X_t)
        X_hat = outputs["x_hat"]
        errors = torch.mean((X_hat - X_t)**2, dim=1).cpu().numpy()