# Code to genereate possible ensembles results based on previosuly stored scores

This code allows you to compute the optimal ensemble (in terms of average AUC-ROC) based on any algorithms you wish to include. However, it is important to note that the scores related to that algorithm must be computed and stored first. This can be done by either running the gridsearch, allowing you to deduce the optimal parameters for an algorithm and obtain the associated scores to each dataset for that set of parameters, or by running get_default_scores, allowing you to get the scores for the default parameters of that algorithm.

## Import libraries

In [8]:
import numpy as np
import pandas as pd
import os
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
from collections import defaultdict
import traceback
import gc

## Function that evaluates the algorithm based on the produced anomaly scores

In [9]:
def evaluate_algorithm(scores, data_path):
    # Load test data
    data_test = pd.read_csv(data_path)
    
    # Ensure 'is_anomaly' column exists in the data
    if 'is_anomaly' not in data_test.columns:
        raise ValueError(f"The test data at {data_path} must contain an 'is_anomaly' column.")

    # Extract the anomaly labels
    anomalies = data_test['is_anomaly']

    # Calculate AUC-ROC and AUC-PR
    auc_roc = roc_auc_score(anomalies, scores)
    precision, recall, _ = precision_recall_curve(anomalies, scores)
    auc_pr = auc(recall, precision)
    return auc_roc, auc_pr

## General function allowing you to compute the best ensemble in terms of average AUC-ROC

In [13]:
def combine_algorithms(algorithm_names, family="distance", n_weight_points=11):
    # Load scores for each algorithm upfront
    algorithm_scores = {}
    for algo in algorithm_names:
        try:
            # Load scores
            try:
                # Attempt to load the primary file, containing the best scores
                file_path = f'./results/best_{algo.lower()}_scores.npz' # Very important that your scores are saved in this format if you have already performed grid search!
                if not os.path.exists(file_path):
                    # If the primary file doesn't exist, switch to the default file
                    file_path = f'./results/default_{algo.lower()}_scores.npz' # Very important that your scores are saved in this format if you have already performed default score calculation!
                    if not os.path.exists(file_path):
                        raise FileNotFoundError(f"No stored scores found for algorithm {algo} in either file.")
                
                # Load the scores and convert to a proper dictionary
                scores = np.load(file_path)
                algorithm_scores[algo] = dict(scores)
            except FileNotFoundError as e:
                raise FileNotFoundError(str(e))
        except Exception as e:
            print(f"An unexpected error occurred: {e}")

    # Generate weight combinations
    n_algorithms = len(algorithm_names)
    weight_points = np.linspace(0, 1, n_weight_points)

    def generate_weight_combinations(n_algos):
        if n_algos == 2:
            return np.array([[w, 1 - w] for w in weight_points])

        weights = []
        for w in weight_points:
            sub_weights = generate_weight_combinations(n_algos - 1)
            for sw in sub_weights:
                if sum(sw) <= 1:
                    weights.append([w] + list((1 - w) * sw / sum(sw)))
        return np.array(weights)

    weight_combinations = generate_weight_combinations(n_algorithms)

    # Track the best results
    best_mean_auc_roc = -1
    best_results = {
        'weights': None,
        'mean_auc_roc': -1,
        'dataset_scores': {},
        'metrics': {}
    }

    # Process datasets for all weight combinations
    datasets = list(algorithm_scores[algorithm_names[0]].keys())
    for weights in weight_combinations:
        print(f"Testing weights: {weights}")
        total_auc_roc = 0
        current_results = {
            'dataset_scores': {},
            'metrics': defaultdict(dict)
        }

        for dataset in datasets:
            try:
                # Load dataset and labels
                data_test = pd.read_csv(dataset)
                if data_test is None:
                    print(f"Skipping file {dataset} due to loading error")
                    continue
                
                true_labels = data_test['is_anomaly'].values

                # Get scores for this dataset from each algorithm
                algo_scores = []
                for algo in algorithm_names:
                    scores = algorithm_scores[algo][dataset] if dataset in algorithm_scores[algo] else np.zeros(len(true_labels))
                    min_val, max_val = np.min(scores), np.max(scores)
                    if min_val != max_val:
                        scores = (scores - min_val) / (max_val - min_val)
                    algo_scores.append(scores)

                # Compute the weighted sum of scores
                combined_scores = np.zeros_like(algo_scores[0])
                for i, w in enumerate(weights):
                    combined_scores += w * algo_scores[i]

                # Evaluate and store results
                auc_roc, auc_pr = evaluate_algorithm(combined_scores, dataset)
                current_results['dataset_scores'][dataset] = combined_scores
                current_results['metrics'][dataset] = {
                    'AUC-ROC': auc_roc,
                    'AUC-PR': auc_pr
                }
                total_auc_roc += auc_roc

            except Exception as e:
                print(f"Error processing dataset {dataset}: {e}")
                traceback.print_exc()
                continue

        # Calculate mean AUC-ROC for current weight set
        current_mean_auc_roc = total_auc_roc / len(datasets)

        print(f"Mean AUC-ROC for weights {weights}: {current_mean_auc_roc:.3f}")

        # Update best results if current results are better
        if current_mean_auc_roc > best_mean_auc_roc:
            print(f"New best configuration found! Previous best: {best_mean_auc_roc:.3f}")
            best_mean_auc_roc = current_mean_auc_roc
            best_results = {
                'weights': weights,
                'mean_auc_roc': current_mean_auc_roc,
                'dataset_scores': current_results['dataset_scores'].copy(),
                'metrics': dict(current_results['metrics'])
            }

        # Clear memory
        gc.collect()

    # Save final results
    try:
        os.makedirs('./results', exist_ok=True)

        # Save weights and metrics
        weights_df = pd.DataFrame({
            'Algorithm': algorithm_names,
            'Weight': best_results['weights']
        })
        weights_df.to_csv(f'./results/best_algorithm_combination_{family}_weights.csv', index=False)

        # Save detailed metrics for each dataset
        detailed_metrics = []
        for dataset, metrics in best_results['metrics'].items():
            detailed_metrics.append({
                'Dataset': dataset,
                'AUC-ROC': metrics['AUC-ROC'],
                'AUC-PR': metrics['AUC-PR']
            })
        pd.DataFrame(detailed_metrics).to_csv(
            f'./results/best_algorithm_combination_{family}_metrics.csv', index=False
        )

        # Save anomaly scores for each dataset
        np.savez(
            f'./results/best_algorithm_combination_{family}_scores.npz',
            **best_results['dataset_scores']
        )

    except Exception as e:
        print(f"Error saving results: {e}")
        traceback.print_exc()

    return best_results

## Example of ensemble for 3 distance-based methods: Sub-LOF, kMeans & PS-SVM

In [15]:
algorithms = ["Sub-LOF", "kMeans", "PS-SVM"]
distance_results = combine_algorithms(algorithms, n_weight_points=11)
print("\nBest combination found:")
for algo, weight in zip(algorithms, distance_results['weights']):
    print(f"{algo}: {weight:.3f}")
print(f"Mean AUC-ROC: {distance_results['mean_auc_roc']:.3f}")

Testing weights: [0. 0. 1.]
Mean AUC-ROC for weights [0. 0. 1.]: 0.679
New best configuration found! Previous best: -1.000
Testing weights: [0.  0.1 0.9]
Mean AUC-ROC for weights [0.  0.1 0.9]: 0.710
New best configuration found! Previous best: 0.679
Testing weights: [0.  0.2 0.8]
Mean AUC-ROC for weights [0.  0.2 0.8]: 0.725
New best configuration found! Previous best: 0.710
Testing weights: [0.  0.3 0.7]
Mean AUC-ROC for weights [0.  0.3 0.7]: 0.738
New best configuration found! Previous best: 0.725
Testing weights: [0.  0.4 0.6]
Mean AUC-ROC for weights [0.  0.4 0.6]: 0.748
New best configuration found! Previous best: 0.738
Testing weights: [0.  0.5 0.5]
Mean AUC-ROC for weights [0.  0.5 0.5]: 0.757
New best configuration found! Previous best: 0.748
Testing weights: [0.  0.6 0.4]
Mean AUC-ROC for weights [0.  0.6 0.4]: 0.764
New best configuration found! Previous best: 0.757
Testing weights: [0.  0.7 0.3]
Mean AUC-ROC for weights [0.  0.7 0.3]: 0.766
New best configuration found! Pr

## Example of ensemble for 3 methods from different families: Sub-LOF, GrammarViz3 & DWT-MLEAD

In [14]:
algorithms = ["Sub-LOF", "GrammarViz3", "DWT-MLEAD"]
all_results = combine_algorithms(algorithms, family='all', n_weight_points=11)
print("\nBest combination found:")
for algo, weight in zip(algorithms, all_results['weights']):
    print(f"{algo}: {weight:.3f}")
print(f"Mean AUC-ROC: {all_results['mean_auc_roc']:.3f}")

Testing weights: [0. 0. 1.]
Mean AUC-ROC for weights [0. 0. 1.]: 0.760
New best configuration found! Previous best: -1.000
Testing weights: [0.  0.1 0.9]
Mean AUC-ROC for weights [0.  0.1 0.9]: 0.788
New best configuration found! Previous best: 0.760
Testing weights: [0.  0.2 0.8]
Mean AUC-ROC for weights [0.  0.2 0.8]: 0.790
New best configuration found! Previous best: 0.788
Testing weights: [0.  0.3 0.7]
Mean AUC-ROC for weights [0.  0.3 0.7]: 0.787
Testing weights: [0.  0.4 0.6]
Mean AUC-ROC for weights [0.  0.4 0.6]: 0.782
Testing weights: [0.  0.5 0.5]
Mean AUC-ROC for weights [0.  0.5 0.5]: 0.779
Testing weights: [0.  0.6 0.4]
Mean AUC-ROC for weights [0.  0.6 0.4]: 0.778
Testing weights: [0.  0.7 0.3]
Mean AUC-ROC for weights [0.  0.7 0.3]: 0.776
Testing weights: [0.  0.8 0.2]
Mean AUC-ROC for weights [0.  0.8 0.2]: 0.772
Testing weights: [0.  0.9 0.1]
Mean AUC-ROC for weights [0.  0.9 0.1]: 0.766
Testing weights: [0. 1. 0.]
Mean AUC-ROC for weights [0. 1. 0.]: 0.751
Testing wei