In [2]:
import pickle
import numpy as np
import os
import glob
from collections import defaultdict
from sklearn.metrics import roc_auc_score

# --- Settings for SMD ---
window_size = 100     # SMD window size
stride = 50           # SMD stride
Ttotal = 27600        # total length in samples (longer than last anomaly)

# --- Anomalous ranges (ground truth for SMD) ---
anomalies = [
    (16964, 17515),
    (18072, 18528),
    (19368, 20088),
    (20787, 21195),
    (24680, 24682),
    (26115, 26116),
    (27555, 27556),
]

total_windows = (Ttotal - window_size) // stride + 1

# --- Ground truth anomalous windows ---
start_times = [i * stride for i in range(total_windows)]
anomalous_indices = set()
for idx, start in enumerate(start_times):
    end = start + window_size
    for Astart, Aend in anomalies:
        if min(end, Aend) > max(start, Astart):  # overlap check
            anomalous_indices.add(idx)

print(f"Total windows: {total_windows}")
print(f"Ground truth anomalous windows: {len(anomalous_indices)}\n")

# --- Loop over all result files automatically ---
result_files = glob.glob("results/SMD1/ensemble_res_*.pkl")

for file in result_files:
    x = os.path.splitext(os.path.basename(file))[0].split("_")[-1]

    print(f"\n=== Evaluating file {file} (Run {x}) ===")

    with open(file, "rb") as f:
        all_results = pickle.load(f)

    # --- Compute anomaly scores ---
    anomaly_scores = defaultdict(lambda: {'score_sum': 0.0, 'count': 0})

    for iteration_result in all_results:
        buckets = iteration_result['buckets']
        bucket_results = iteration_result['bucket_results']

        for bucket_result in bucket_results:
            bucket_idx = bucket_result['bucket_idx']
            final_results = bucket_result['final_results']
            indices_in_bucket = buckets[bucket_idx]

            mean = np.mean(final_results)
            std = np.std(final_results) if np.std(final_results) != 0 else 1e-8

            for i, idx in enumerate(indices_in_bucket):
                sim = final_results[i]
                deviation = abs(sim - mean) / std
                anomaly_scores[idx]['score_sum'] += deviation
                anomaly_scores[idx]['count'] += 1

    final_scores = {
        idx: score_data['score_sum'] / score_data['count']
        for idx, score_data in anomaly_scores.items()
    }

    # --- Percentile threshold based on anomaly ratio ---
    true_anomaly_count = len(anomalous_indices)
    PERCENTILE = 100 - (true_anomaly_count / total_windows) * 100
    all_score_values = list(final_scores.values())
    threshold = np.percentile(all_score_values, PERCENTILE)

    # --- Detected anomalous windows ---
    detected_windows = {
        idx: score for idx, score in final_scores.items() if score >= threshold
    }
    detected_indices = set(detected_windows.keys())


    # --- Metrics ---
    true_positives = len(detected_indices & anomalous_indices)
    false_positives = len(detected_indices - anomalous_indices)
    false_negatives = len(anomalous_indices - detected_indices)

    true_negatives = total_windows - (true_positives + false_positives + false_negatives)

    precision = true_positives / len(detected_indices) * 100 if detected_indices else 0
    recall = true_positives / len(anomalous_indices) * 100 if anomalous_indices else 0
    f1_score = (2 * precision * recall / (precision + recall)) if (precision + recall) > 0 else 0

    accuracy = (true_positives + true_negatives) / total_windows * 100

    # Balanced Accuracy
    tpr = recall / 100  # already in percent, divide by 100
    tnr = true_negatives / (true_negatives + false_positives) if (true_negatives + false_positives) > 0 else 0
    balanced_accuracy = (tpr + tnr) / 2 * 100

    # AUC (needs raw scores, not thresholded)
    labels = [1 if i in anomalous_indices else 0 for i in range(total_windows)]
    scores = [final_scores.get(i, 0) for i in range(total_windows)]
    auc = roc_auc_score(labels, scores) * 100


    # --- Save results ---
    output_dir = "evaluations/SMD1/"
    os.makedirs(output_dir, exist_ok=True)
    output_filename = f"results_w{x}.txt"
    output_path = os.path.join(output_dir, output_filename)

    with open(output_path, "w") as f:
        for idx, score in sorted(detected_windows.items()):
            f.write(f"Window {idx} - Score: {score:.4f}\n")
        f.write(f"\nTrue Positives: {true_positives}")
        f.write(f"\nFalse Positives: {false_positives}")
        f.write(f"\nFalse Negatives: {false_negatives}")
        f.write(f"\nTrue Negatives: {true_negatives}")
        f.write(f"\nPrecision: {precision:.2f}%")
        f.write(f"\nRecall: {recall:.2f}%")
        f.write(f"\nF1 Score: {f1_score:.2f}%")
        f.write(f"\nAccuracy: {accuracy:.2f}%")
        f.write(f"\nBalanced Accuracy: {balanced_accuracy:.2f}%")
        f.write(f"\nROC AUC: {auc:.2f}%")



Total windows: 551
Ground truth anomalous windows: 55


=== Evaluating file results/SMD1\ensemble_res_1.pkl (Run 1) ===

=== Evaluating file results/SMD1\ensemble_res_10.pkl (Run 10) ===

=== Evaluating file results/SMD1\ensemble_res_2.pkl (Run 2) ===

=== Evaluating file results/SMD1\ensemble_res_3.pkl (Run 3) ===

=== Evaluating file results/SMD1\ensemble_res_4.pkl (Run 4) ===

=== Evaluating file results/SMD1\ensemble_res_5.pkl (Run 5) ===

=== Evaluating file results/SMD1\ensemble_res_6.pkl (Run 6) ===

=== Evaluating file results/SMD1\ensemble_res_7.pkl (Run 7) ===

=== Evaluating file results/SMD1\ensemble_res_8.pkl (Run 8) ===

=== Evaluating file results/SMD1\ensemble_res_9.pkl (Run 9) ===


In [1]:
import pickle
import numpy as np
import os
import glob
from collections import defaultdict
from sklearn.metrics import roc_auc_score

# --- Settings for SMD ---
window_size = 100     # SMD window size
stride = 50           # SMD stride
Ttotal = 27600        # total length in samples (longer than last anomaly)

# --- Anomalous ranges (ground truth for SMD) ---
anomalies = [
    (4630, 4688),
    (5487, 5491),
    (5876, 5951),
    (15416, 15418),
    (15541, 15605),
    (15926, 15973),
    (18646, 18801),
    (20236, 20271),
    (22265, 22336),
    (23094, 23115),

]

total_windows = (Ttotal - window_size) // stride + 1

# --- Ground truth anomalous windows ---
start_times = [i * stride for i in range(total_windows)]
anomalous_indices = set()
for idx, start in enumerate(start_times):
    end = start + window_size
    for Astart, Aend in anomalies:
        if min(end, Aend) > max(start, Astart):  # overlap check
            anomalous_indices.add(idx)

print(f"Total windows: {total_windows}")
print(f"Ground truth anomalous windows: {len(anomalous_indices)}\n")

# --- Loop over all result files automatically ---
result_files = glob.glob("results/SMD2/ensemble_res_*.pkl")

for file in result_files:
    x = os.path.splitext(os.path.basename(file))[0].split("_")[-1]

    print(f"\n=== Evaluating file {file} (Run {x}) ===")

    with open(file, "rb") as f:
        all_results = pickle.load(f)

    # --- Compute anomaly scores ---
    anomaly_scores = defaultdict(lambda: {'score_sum': 0.0, 'count': 0})

    for iteration_result in all_results:
        buckets = iteration_result['buckets']
        bucket_results = iteration_result['bucket_results']

        for bucket_result in bucket_results:
            bucket_idx = bucket_result['bucket_idx']
            final_results = bucket_result['final_results']
            indices_in_bucket = buckets[bucket_idx]

            mean = np.mean(final_results)
            std = np.std(final_results) if np.std(final_results) != 0 else 1e-8

            for i, idx in enumerate(indices_in_bucket):
                sim = final_results[i]
                deviation = abs(sim - mean) / std
                anomaly_scores[idx]['score_sum'] += deviation
                anomaly_scores[idx]['count'] += 1

    final_scores = {
        idx: score_data['score_sum'] / score_data['count']
        for idx, score_data in anomaly_scores.items()
    }

    # --- Percentile threshold based on anomaly ratio ---
    true_anomaly_count = len(anomalous_indices)
    PERCENTILE = 100 - (true_anomaly_count / total_windows) * 100
    all_score_values = list(final_scores.values())
    threshold = np.percentile(all_score_values, PERCENTILE)

    # --- Detected anomalous windows ---
    detected_windows = {
        idx: score for idx, score in final_scores.items() if score >= threshold
    }
    detected_indices = set(detected_windows.keys())


    # --- Metrics ---
    true_positives = len(detected_indices & anomalous_indices)
    false_positives = len(detected_indices - anomalous_indices)
    false_negatives = len(anomalous_indices - detected_indices)

    true_negatives = total_windows - (true_positives + false_positives + false_negatives)

    precision = true_positives / len(detected_indices) * 100 if detected_indices else 0
    recall = true_positives / len(anomalous_indices) * 100 if anomalous_indices else 0
    f1_score = (2 * precision * recall / (precision + recall)) if (precision + recall) > 0 else 0

    accuracy = (true_positives + true_negatives) / total_windows * 100

    # Balanced Accuracy
    tpr = recall / 100  # already in percent, divide by 100
    tnr = true_negatives / (true_negatives + false_positives) if (true_negatives + false_positives) > 0 else 0
    balanced_accuracy = (tpr + tnr) / 2 * 100

    # AUC (needs raw scores, not thresholded)
    labels = [1 if i in anomalous_indices else 0 for i in range(total_windows)]
    scores = [final_scores.get(i, 0) for i in range(total_windows)]
    auc = roc_auc_score(labels, scores) * 100


    # --- Save results ---
    output_dir = "evaluations/SMD2/"
    os.makedirs(output_dir, exist_ok=True)
    output_filename = f"results_w{x}.txt"
    output_path = os.path.join(output_dir, output_filename)

    with open(output_path, "w") as f:
        for idx, score in sorted(detected_windows.items()):
            f.write(f"Window {idx} - Score: {score:.4f}\n")
        f.write(f"\nTrue Positives: {true_positives}")
        f.write(f"\nFalse Positives: {false_positives}")
        f.write(f"\nFalse Negatives: {false_negatives}")
        f.write(f"\nTrue Negatives: {true_negatives}")
        f.write(f"\nPrecision: {precision:.2f}%")
        f.write(f"\nRecall: {recall:.2f}%")
        f.write(f"\nF1 Score: {f1_score:.2f}%")
        f.write(f"\nAccuracy: {accuracy:.2f}%")
        f.write(f"\nBalanced Accuracy: {balanced_accuracy:.2f}%")
        f.write(f"\nROC AUC: {auc:.2f}%")


Total windows: 551
Ground truth anomalous windows: 33


=== Evaluating file results/SMD2\ensemble_res_1.pkl (Run 1) ===

=== Evaluating file results/SMD2\ensemble_res_10.pkl (Run 10) ===

=== Evaluating file results/SMD2\ensemble_res_2.pkl (Run 2) ===

=== Evaluating file results/SMD2\ensemble_res_3.pkl (Run 3) ===

=== Evaluating file results/SMD2\ensemble_res_4.pkl (Run 4) ===

=== Evaluating file results/SMD2\ensemble_res_5.pkl (Run 5) ===

=== Evaluating file results/SMD2\ensemble_res_6.pkl (Run 6) ===

=== Evaluating file results/SMD2\ensemble_res_7.pkl (Run 7) ===

=== Evaluating file results/SMD2\ensemble_res_8.pkl (Run 8) ===

=== Evaluating file results/SMD2\ensemble_res_9.pkl (Run 9) ===


In [11]:
import pickle
import numpy as np
import os
import glob
from collections import defaultdict
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, balanced_accuracy_score
from sklearn.metrics import roc_auc_score

# --- Settings for SMD ---
window_size = 100
stride = 50
Ttotal = 27600

# --- Ground truth anomaly ranges ---
anomalies = [
    (16964, 17515),
    (18072, 18528),
    (19368, 20088),
    (20787, 21195),
    (24680, 24682),
    (26115, 26116),
    (27555, 27556),
]

total_windows = (Ttotal - window_size) // stride + 1
start_times = [i * stride for i in range(total_windows)]
anomalous_indices = set()
for idx, start in enumerate(start_times):
    end = start + window_size
    for Astart, Aend in anomalies:
        if min(end, Aend) > max(start, Astart):  # overlap check
            anomalous_indices.add(idx)

print(f"Total windows: {total_windows}")
print(f"Ground truth anomalous windows: {len(anomalous_indices)}\n")

# --- Load results ---
result_files = glob.glob("results/SMD_s5/ensemble_res_*.pkl")

for file in result_files:
    x = os.path.splitext(os.path.basename(file))[0].split("_")[-1]
    print(f"\n=== Evaluating file {file} (Run {x}) ===")

    with open(file, "rb") as f:
        all_results = pickle.load(f)

    # --- Compute anomaly scores ---
    anomaly_scores = defaultdict(lambda: {'score_sum': 0.0, 'count': 0})
    for iteration_result in all_results:
        buckets = iteration_result['buckets']
        bucket_results = iteration_result['bucket_results']
        for bucket_result in bucket_results:
            bucket_idx = bucket_result['bucket_idx']
            final_results = bucket_result['final_results']
            indices_in_bucket = buckets[bucket_idx]
            mean = np.mean(final_results)
            std = np.std(final_results) if np.std(final_results) != 0 else 1e-8
            for i, idx in enumerate(indices_in_bucket):
                sim = final_results[i]
                deviation = abs(sim - mean) / std
                anomaly_scores[idx]['score_sum'] += deviation
                anomaly_scores[idx]['count'] += 1

    final_scores = {
        idx: score_data['score_sum'] / score_data['count']
        for idx, score_data in anomaly_scores.items()
    }

    # --- Ground truth labels (0 = normal, 1 = anomaly) ---
    y_true = np.array([1 if idx in anomalous_indices else 0 for idx in range(total_windows)])
    y_scores = np.array([final_scores.get(idx, 0.0) for idx in range(total_windows)])

    # --- Thresholding based on the 99th percentile of scores ---
    threshold = np.percentile(list(final_scores.values()), 90)  # Use the 99th percentile as threshold
    y_pred = (y_scores >= threshold).astype(int)

    # --- Metrics ---
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    accuracy = accuracy_score(y_true, y_pred)
    bal_accuracy = balanced_accuracy_score(y_true, y_pred)
    auc = roc_auc_score(y_true, y_scores)

    print(f"Threshold (99th percentile): {threshold:.4f}")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1 Score: {f1:.2f}")
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Balanced Accuracy: {bal_accuracy:.2f}")
    print(f"AUC: {auc:.2f}")

    # --- Save results ---
    output_dir = "evaluations/SMD_s5/"
    os.makedirs(output_dir, exist_ok=True)
    output_filename = f"results_w{x}_99th_percentile.txt"
    output_path = os.path.join(output_dir, output_filename)

    with open(output_path, "w") as f:
        f.write(f"Threshold (99th percentile): {threshold:.4f}\n")
        f.write(f"Precision: {precision:.2f}\n")
        f.write(f"Recall: {recall:.2f}\n")
        f.write(f"F1 Score: {f1:.2f}\n")
        f.write(f"Accuracy: {accuracy:.2f}\n")
        f.write(f"Balanced Accuracy: {bal_accuracy:.2f}\n")
        f.write(f"AUC: {auc:.2f}\n")

    anomalous_percentage = (len(anomalous_indices) / total_windows) * 100
    print(f"Percentage of anomalous windows: {anomalous_percentage:.2f}%")

    print(f"Saved to {output_path}")


Total windows: 551
Ground truth anomalous windows: 55


=== Evaluating file results/SMD_s5\ensemble_res_1.pkl (Run 1) ===
Threshold (99th percentile): 0.8407
Precision: 0.49
Recall: 0.51
F1 Score: 0.50
Accuracy: 0.90
Balanced Accuracy: 0.73
AUC: 0.87
Percentage of anomalous windows: 9.98%
Saved to evaluations/SMD_s5/results_w1_99th_percentile.txt

=== Evaluating file results/SMD_s5\ensemble_res_10.pkl (Run 10) ===
Threshold (99th percentile): 0.8409
Precision: 0.50
Recall: 0.51
F1 Score: 0.50
Accuracy: 0.90
Balanced Accuracy: 0.73
AUC: 0.83
Percentage of anomalous windows: 9.98%
Saved to evaluations/SMD_s5/results_w10_99th_percentile.txt

=== Evaluating file results/SMD_s5\ensemble_res_2.pkl (Run 2) ===
Threshold (99th percentile): 0.8392
Precision: 0.46
Recall: 0.47
F1 Score: 0.46
Accuracy: 0.89
Balanced Accuracy: 0.71
AUC: 0.82
Percentage of anomalous windows: 9.98%
Saved to evaluations/SMD_s5/results_w2_99th_percentile.txt

=== Evaluating file results/SMD_s5\ensemble_res_3.pkl (Run 

In [None]:
import pickle
import numpy as np
import os
import glob
from collections import defaultdict
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, balanced_accuracy_score
from sklearn.metrics import roc_auc_score

# --- Settings for SMD ---
window_size = 100
stride = 50
Ttotal = 23694

# --- Ground truth anomaly ranges ---
anomalies = [
    (4630, 4688),
    (5487, 5491),
    (5876, 5951),
    (15416, 15418),
    (15541, 15605),
    (15926, 15973),
    (18646, 18801),
    (20236, 20271),
    (22265, 22336),
    (23094, 23115),

]

total_windows = (Ttotal - window_size) // stride + 1
start_times = [i * stride for i in range(total_windows)]
anomalous_indices = set()
for idx, start in enumerate(start_times):
    end = start + window_size
    for Astart, Aend in anomalies:
        if min(end, Aend) > max(start, Astart):  # overlap check
            anomalous_indices.add(idx)

print(f"Total windows: {total_windows}")
print(f"Ground truth anomalous windows: {len(anomalous_indices)}\n")

# --- Load results ---
result_files = glob.glob("results/SMD2/ensemble_res_*.pkl")

for file in result_files:
    x = os.path.splitext(os.path.basename(file))[0].split("_")[-1]
    print(f"\n=== Evaluating file {file} (Run {x}) ===")

    with open(file, "rb") as f:
        all_results = pickle.load(f)

    # --- Compute anomaly scores ---
    anomaly_scores = defaultdict(lambda: {'score_sum': 0.0, 'count': 0})
    for iteration_result in all_results:
        buckets = iteration_result['buckets']
        bucket_results = iteration_result['bucket_results']
        for bucket_result in bucket_results:
            bucket_idx = bucket_result['bucket_idx']
            final_results = bucket_result['final_results']
            indices_in_bucket = buckets[bucket_idx]
            mean = np.mean(final_results)
            std = np.std(final_results) if np.std(final_results) != 0 else 1e-8
            for i, idx in enumerate(indices_in_bucket):
                sim = final_results[i]
                deviation = abs(sim - mean) / std
                anomaly_scores[idx]['score_sum'] += deviation
                anomaly_scores[idx]['count'] += 1

    final_scores = {
        idx: score_data['score_sum'] / score_data['count']
        for idx, score_data in anomaly_scores.items()
    }

    # --- Ground truth labels (0 = normal, 1 = anomaly) ---
    y_true = np.array([1 if idx in anomalous_indices else 0 for idx in range(total_windows)])
    y_scores = np.array([final_scores.get(idx, 0.0) for idx in range(total_windows)])

    # --- Thresholding based on the 99th percentile of scores ---
    threshold = np.percentile(list(final_scores.values()), 90)  # Use the 99th percentile as threshold
    y_pred = (y_scores >= threshold).astype(int)

    # --- Metrics ---
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    accuracy = accuracy_score(y_true, y_pred)
    bal_accuracy = balanced_accuracy_score(y_true, y_pred)
    auc = roc_auc_score(y_true, y_scores)

    print(f"Threshold (99th percentile): {threshold:.4f}")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1 Score: {f1:.2f}")
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Balanced Accuracy: {bal_accuracy:.2f}")
    print(f"AUC: {auc:.2f}")

    # --- Save results ---
    output_dir = "evaluations/SMD2/"
    os.makedirs(output_dir, exist_ok=True)
    output_filename = f"results_w{x}_99th_percentile.txt"
    output_path = os.path.join(output_dir, output_filename)

    with open(output_path, "w") as f:
        f.write(f"Threshold (99th percentile): {threshold:.4f}\n")
        f.write(f"Precision: {precision:.2f}\n")
        f.write(f"Recall: {recall:.2f}\n")
        f.write(f"F1 Score: {f1:.2f}\n")
        f.write(f"Accuracy: {accuracy:.2f}\n")
        f.write(f"Balanced Accuracy: {bal_accuracy:.2f}\n")
        f.write(f"AUC: {auc:.2f}\n")

    anomalous_percentage = (len(anomalous_indices) / total_windows) * 100
    print(f"Percentage of anomalous windows: {anomalous_percentage:.2f}%")

    print(f"Saved to {output_path}")


Total windows: 551
Ground truth anomalous windows: 24

