In [2]:
import pickle
import numpy as np
import os
import glob
from collections import defaultdict

# --- Settings for SMD ---
window_size = 100     # SMD window size
stride = 50           # SMD stride
Ttotal = 27600        # total length in samples (longer than last anomaly)

# --- Anomalous ranges (ground truth for SMD) ---
anomalies = [
    (16964, 17515),
    (18072, 18528),
    (19368, 20088),
    (20787, 21195),
    (24680, 24682),
    (26115, 26116),
    (27555, 27556),
]

total_windows = (Ttotal - window_size) // stride + 1

# --- Ground truth anomalous windows ---
start_times = [i * stride for i in range(total_windows)]
anomalous_indices = set()
for idx, start in enumerate(start_times):
    end = start + window_size
    for Astart, Aend in anomalies:
        if min(end, Aend) > max(start, Astart):  # overlap check
            anomalous_indices.add(idx)

print(f"Total windows: {total_windows}")
print(f"Ground truth anomalous windows: {len(anomalous_indices)}\n")

# --- Loop over all result files automatically ---
result_files = glob.glob("results/SMD/ensemble_res_*.pkl")

for file in result_files:
    x = os.path.splitext(os.path.basename(file))[0].split("_")[-1]

    print(f"\n=== Evaluating file {file} (Run {x}) ===")

    with open(file, "rb") as f:
        all_results = pickle.load(f)

    # --- Compute anomaly scores ---
    anomaly_scores = defaultdict(lambda: {'score_sum': 0.0, 'count': 0})

    for iteration_result in all_results:
        buckets = iteration_result['buckets']
        bucket_results = iteration_result['bucket_results']

        for bucket_result in bucket_results:
            bucket_idx = bucket_result['bucket_idx']
            final_results = bucket_result['final_results']
            indices_in_bucket = buckets[bucket_idx]

            mean = np.mean(final_results)
            std = np.std(final_results) if np.std(final_results) != 0 else 1e-8

            for i, idx in enumerate(indices_in_bucket):
                sim = final_results[i]
                deviation = abs(sim - mean) / std
                anomaly_scores[idx]['score_sum'] += deviation
                anomaly_scores[idx]['count'] += 1

    final_scores = {
        idx: score_data['score_sum'] / score_data['count']
        for idx, score_data in anomaly_scores.items()
    }

    # --- Percentile threshold based on anomaly ratio ---
    true_anomaly_count = len(anomalous_indices)
    PERCENTILE = 100 - (true_anomaly_count / total_windows) * 100
    all_score_values = list(final_scores.values())
    threshold = np.percentile(all_score_values, PERCENTILE)

    # --- Detected anomalous windows ---
    detected_windows = {
        idx: score for idx, score in final_scores.items() if score >= threshold
    }
    detected_indices = set(detected_windows.keys())

    # --- Metrics ---
    true_positives = len(detected_indices & anomalous_indices)
    false_positives = len(detected_indices - anomalous_indices)
    false_negatives = len(anomalous_indices - detected_indices)

    precision = true_positives / len(detected_indices) * 100 if detected_indices else 0
    recall = true_positives / len(anomalous_indices) * 100 if anomalous_indices else 0
    f1_score = (2 * precision * recall / (precision + recall)) if (precision + recall) > 0 else 0

    print(f"Detected {len(detected_windows)} anomalous windows above {PERCENTILE:.2f}th percentile (threshold ≈ {threshold:.4f})")
    print(f"True Positives: {true_positives}")
    print(f"False Positives: {false_positives}")
    print(f"False Negatives: {false_negatives}")
    print(f"Precision: {precision:.2f}%")
    print(f"Recall: {recall:.2f}%")
    print(f"F1 Score: {f1_score:.2f}%")

    # --- Save results ---
    output_dir = "evaluations/SMD/"
    os.makedirs(output_dir, exist_ok=True)
    output_filename = f"results_w{x}.txt"
    output_path = os.path.join(output_dir, output_filename)

    with open(output_path, "w") as f:
        for idx, score in sorted(detected_windows.items()):
            f.write(f"Window {idx} - Score: {score:.4f}\n")
        f.write(f"\nTrue Positives: {true_positives}")
        f.write(f"\nFalse Positives: {false_positives}")
        f.write(f"\nFalse Negatives: {false_negatives}")
        f.write(f"\nPrecision: {precision:.2f}%")
        f.write(f"\nRecall: {recall:.2f}%")
        f.write(f"\nF1 Score: {f1_score:.2f}%")

    print(f"Saved to {output_path}")


Total windows: 551
Ground truth anomalous windows: 55


=== Evaluating file results/SMD\ensemble_res_1.pkl (Run 1) ===
Detected 15 anomalous windows above 90.02th percentile (threshold ≈ 0.8874)
True Positives: 0
False Positives: 15
False Negatives: 55
Precision: 0.00%
Recall: 0.00%
F1 Score: 0.00%
Saved to evaluations/SMD/results_w1.txt

=== Evaluating file results/SMD\ensemble_res_2.pkl (Run 2) ===
Detected 15 anomalous windows above 90.02th percentile (threshold ≈ 0.9099)
True Positives: 0
False Positives: 15
False Negatives: 55
Precision: 0.00%
Recall: 0.00%
F1 Score: 0.00%
Saved to evaluations/SMD/results_w2.txt
