In [1]:
import os
import json
import logging
import pickle
import warnings
from functools import partial
from pathlib import Path

from IPython.display import display
import pandas as pd
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

warnings.simplefilter('ignore')

LOGGER = logging.getLogger(__name__)

### Table IV - A: Masking and Bi-directional scoring comparison

In [6]:
# RESULTS_PATH = 'results'   # real-time benchmarking results
RESULTS_PATH = 'paper-reported-results'

results = None
result_files = ['ARIMA', 'ARIMA (M)', 'LSTM-DT', 'LSTM-DT (M)', 'LSTM-AE', 'LSTM-AE (M)', 'LSTM-VAE', 'LSTM-VAE (M)', 'TadGAN', 'TadGAN (M)'] 
for filename in result_files:
    result = pd.read_csv(f"{RESULTS_PATH}/{filename}_results.csv")
    result['pipeline'] = filename
    if results is None:
        results = result
    else:
        results = pd.concat([results, result])

family = {
    "MSL": "NASA",
    "SMAP": "NASA",
    "YAHOOA1": "YAHOO",
    "YAHOOA2": "YAHOO",
    "YAHOOA3": "YAHOO",
    "YAHOOA4": "YAHOO",
    "artificialWithAnomaly": "NAB",
    "realAWSCloudwatch": "NAB",
    "realAdExchange": "NAB",
    "realTraffic": "NAB",
    "realTweets": "NAB",
    "UCR": "UCR"
}

dataset_renames = {
    "MSL": "MSL", 
    "SMAP": "SMAP", 
    "YAHOOA1": "A1", 
    "YAHOOA2": "A2", 
    "YAHOOA3": "A3", 
    "YAHOOA4": "A4",
    "artificialWithAnomaly": "Art", 
    "realAWSCloudwatch": "AdEx", 
    "realAdExchange": "AWS", 
    "realTraffic": "Traffic", 
    "realTweets": "Tweets", 
    "UCR": "UCR" 
}

order_pipelines = ['ARIMA', 'ARIMA (M)', 'LSTM-DT', 'LSTM-DT (M)',
                   'LSTM-AE', 'LSTM-AE (M)', 'LSTM-VAE', 'LSTM-VAE (M)', 
                   'TadGAN', 'TadGAN (M)']
order_datasets = dataset_renames.values()

df = results.copy(deep=True)
df['group'] = df['dataset'].apply(family.get)
df['dataset'] = df['dataset'].apply(dataset_renames.get)

df = df.groupby(['group', 'dataset', 'pipeline'])[['fp', 'fn', 'tp']].sum().reset_index()
df['precision'] = df.eval('tp / (tp + fp)')
df['recall'] = df.eval('tp / (tp + fn)')
df['f1'] = df.eval('2 * (precision * recall) / (precision + recall)')

df = df.set_index(['dataset', 'pipeline'])['f1'].unstack(0)
df = df[order_datasets]
df['AVG (F1)'] = df.mean(axis=1)
df['SD (F1)'] = df.std(axis=1)

# df = df.sort_index(key=lambda x: order_pipelines.get(x))
df = df.T[order_pipelines].T
df

dataset,MSL,SMAP,A1,A2,A3,A4,Art,AdEx,AWS,Traffic,Tweets,UCR,AVG (F1),SD (F1)
pipeline,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
ARIMA,0.442105,0.333333,0.733167,0.807425,0.81761,0.699774,0.352941,0.517647,0.740741,0.5,0.567164,0.123596,0.552959,0.206805
ARIMA (M),0.456522,0.358974,0.751918,0.809302,0.806861,0.689917,0.5,0.517647,0.769231,0.5,0.575758,0.148148,0.57369,0.193286
LSTM-DT,0.515464,0.707483,0.720627,0.9801,0.744186,0.637904,0.4,0.512821,0.740741,0.666667,0.57971,0.390805,0.633042,0.15887
LSTM-DT (M),0.520833,0.753623,0.728723,0.987469,0.733513,0.637838,0.6,0.512821,0.769231,0.685714,0.588235,0.446125,0.663677,0.139813
LSTM-AE,0.457143,0.725926,0.639053,0.960976,0.59375,0.367837,0.444444,0.678571,0.642857,0.592593,0.535714,0.329208,0.580673,0.165145
LSTM-AE (M),0.457143,0.771654,0.644776,0.963325,0.59375,0.367006,0.444444,0.678571,0.642857,0.615385,0.535714,0.333333,0.58733,0.169305
LSTM-VAE,0.432432,0.605634,0.61708,0.912442,0.597651,0.326336,0.444444,0.701754,0.758621,0.592593,0.535714,0.354029,0.573228,0.16246
LSTM-VAE (M),0.438356,0.704918,0.629213,0.940618,0.594555,0.327931,0.444444,0.714286,0.758621,0.615385,0.535714,0.3607,0.588728,0.170926
TadGAN,0.58427,0.61745,0.532508,0.842105,0.390698,0.296943,0.571429,0.677419,0.72,0.580645,0.588235,0.162476,0.547015,0.177678
TadGAN (M),0.58427,0.630137,0.534161,0.846154,0.394984,0.291262,0.615385,0.677419,0.72,0.580645,0.588235,0.164223,0.55224,0.179607


### Table IV - B: AER Ablation Study

In [2]:
RESULTS_PATH = 'results'   # real-time benchmarking results
# RESULTS_PATH = 'paper-reported-results'

results = None
result_files = ['AER (PRED)', 'AER (SUM)', 'AER (REC)', 'AER (MULT)'] 
for filename in result_files:
    result = pd.read_csv(f"{RESULTS_PATH}/{filename}_results.csv")
    result['pipeline'] = filename
    if results is None:
        results = result
    else:
        results = pd.concat([results, result])

family = {
    "MSL": "NASA",
    "SMAP": "NASA",
    "YAHOOA1": "YAHOO",
    "YAHOOA2": "YAHOO",
    "YAHOOA3": "YAHOO",
    "YAHOOA4": "YAHOO",
    "artificialWithAnomaly": "NAB",
    "realAWSCloudwatch": "NAB",
    "realAdExchange": "NAB",
    "realTraffic": "NAB",
    "realTweets": "NAB",
    "UCR": "UCR"
}

dataset_renames = {
    "MSL": "MSL", 
    "SMAP": "SMAP", 
    "YAHOOA1": "A1", 
    "YAHOOA2": "A2", 
    "YAHOOA3": "A3", 
    "YAHOOA4": "A4",
    "artificialWithAnomaly": "Art", 
    "realAWSCloudwatch": "AWS", 
    "realAdExchange": "AdEx", 
    "realTraffic": "Traffic", 
    "realTweets": "Tweets", 
    "UCR": "UCR" 
}

order_pipelines = ['AER (PRED)', 'AER (SUM)', 'AER (REC)', 'AER (MULT)']
order_datasets = dataset_renames.values()

df = results.copy(deep=True)
df['group'] = df['dataset'].apply(family.get)
df['dataset'] = df['dataset'].apply(dataset_renames.get)

df = df.groupby(['group', 'dataset', 'pipeline'])[['fp', 'fn', 'tp']].sum().reset_index()
df['precision'] = df.eval('tp / (tp + fp)')
df['recall'] = df.eval('tp / (tp + fn)')
df['f1'] = df.eval('2 * (precision * recall) / (precision + recall)')

df = df.set_index(['dataset', 'pipeline'])['f1'].unstack(0)
df = df[order_datasets]
df['AVG (F1)'] = df.mean(axis=1)
df['SD (F1)'] = df.std(axis=1)

# df = df.sort_index(key=lambda x: order_pipelines.get(x))
df = df.T[order_pipelines].T
df

dataset,MSL,SMAP,A1,A2,A3,A4,Art,AWS,AdEx,Traffic,Tweets,UCR,AVG (F1),SD (F1)
pipeline,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
AER (PRED),0.510638,0.671329,0.705882,0.982544,0.865027,0.721408,0.666667,0.714286,0.733333,0.666667,0.56338,0.45469,0.687988,0.136896
AER (SUM),0.457831,0.727273,0.720867,0.9801,0.889671,0.7306,0.8,0.740741,0.714286,0.685714,0.567164,0.423022,0.703106,0.153163
AER (REC),0.441558,0.736,0.697143,0.985,0.896673,0.712929,0.8,0.701754,0.714286,0.628571,0.509804,0.354497,0.681518,0.17198
AER (MULT),0.591549,0.746032,0.788571,0.992443,0.87929,0.707715,0.714286,0.727273,0.689655,0.702703,0.571429,0.478191,0.715761,0.130149
