In [None]:
import os
import sys
from functools import partial
os.environ["CUDA_VISIBLE_DEVICES"]="2"

In [None]:
import pandas as pd
from aer.benchmark import benchmark, BENCHMARK_DATA, METRICS
from orion.evaluation import contextual_confusion_matrix
from orion.evaluation.contextual import record_observed, record_expected

# Datasets
NAB = ['artificialWithAnomaly', 'realAdExchange', 'realAWSCloudwatch', 'realTraffic', 'realTweets']
NASA = ['MSL', 'SMAP']
YAHOO = ['YAHOOA1', 'YAHOOA2', 'YAHOOA3', 'YAHOOA4']
UCR = ['UCR']
ALL_DATASETS = NAB + NASA + YAHOO + UCR

RESULTS_DIRECTORY = os.path.join(os.getcwd(), 'results')

# Additional Metrics
del METRICS['accuracy']
METRICS['confusion_matrix'] = contextual_confusion_matrix
METRICS['observed'] = record_observed
METRICS['expected'] = record_expected
METRICS = {k: partial(fun, weighted=False) for k, fun in METRICS.items()}


def run_experiment(experiment_name: str, pipelines: dict, datasets: list, metrics: dict,
                   results_directory: str = RESULTS_DIRECTORY, workers: int = 1,
                   tqdm_log_file: str = 'output.txt'):
    datasets = {key: BENCHMARK_DATA[key] for key in datasets}
    scores = benchmark(
        pipelines=pipelines,
        datasets=datasets,
        metrics=metrics,
        rank='f1',
        show_progress=True,
        workers=workers,
        tqdm_log_file=tqdm_log_file
    )
    return scores

In [None]:
experiment_name="AER (MULT)"
pipelines = {
    'aer': 'aer_ablation-mult'
}
results = run_experiment(
    experiment_name=experiment_name,
    pipelines=pipelines,
    datasets=ALL_DATASETS,
    metrics=METRICS,
    results_directory=RESULTS_DIRECTORY,
    workers=1,
    tqdm_log_file = f'{experiment_name}.txt'
)
results['pipeline'] = experiment_name
results.to_csv(f'results/{experiment_name}_results.csv', index=False)

In [None]:
experiment_name="AER (SUM)"
pipelines = {
    'aer': 'aer_ablation-sum'
}
_results = run_experiment(
    experiment_name=experiment_name,
    pipelines=pipelines,
    datasets=ALL_DATASETS,
    metrics=METRICS,
    results_directory=RESULTS_DIRECTORY,
    workers=1,
    tqdm_log_file = f'{experiment_name}.txt'
)
_results['pipeline'] = experiment_name
_results.to_csv(f'results/{experiment_name}_results.csv', index=False)
results = pd.concat([results, _results])

In [None]:
experiment_name="AER (PRED)"
pipelines = {
    'aer': 'aer_ablation-pred'
}
_results = run_experiment(
    experiment_name=experiment_name,
    pipelines=pipelines,
    datasets=ALL_DATASETS,
    metrics=METRICS,
    results_directory=RESULTS_DIRECTORY,
    workers=1,
    tqdm_log_file = f'{experiment_name}.txt'
)
_results['pipeline'] = experiment_name
_results.to_csv(f'results/{experiment_name}_results.csv', index=False)
results = pd.concat([results, _results])

In [None]:
experiment_name="AER (REC)"
pipelines = {
    'aer': 'aer_ablation-rec'
}
_results = run_experiment(
    experiment_name=experiment_name,
    pipelines=pipelines,
    datasets=ALL_DATASETS,
    metrics=METRICS,
    results_directory=RESULTS_DIRECTORY,
    workers=1,
    tqdm_log_file = f'{experiment_name}.txt'
)
_results['pipeline'] = experiment_name
_results.to_csv(f'results/{experiment_name}_results.csv', index=False)
results = pd.concat([results, _results])

In [None]:
results.to_csv(f'results/Table_IV_B_results.csv', index=False)