In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
from pathlib import Path
module_path = str(Path.cwd().parents[0])
if module_path not in sys.path:
    sys.path.append(module_path)

import numpy as np

from src.eval_simple_baselines import evaluate_simple_baselines_on_all_paper_datasets

# Evaluate the simple baselines
In this notebook we run all our simple baselines on the datasets appearing in the paper. The main purpose of the notebook is reproducibility. If you want to use your own datasets, own methods and even combine them with ours, please look in the `simple_baselines_example_usage.ipynb` notebook for inspiration. 

In [7]:
def highlight_max(s, props=''):
    return np.where(s == np.nanmax(s.values), props, '')

## Evaluate on a single dataset
Full evaluation on all datasets can take some time. One can first try running on a single dataset of interest to get some fast results. Below we have an example of evaluating the methods only on SWAT. On the first part, we apply it using the optimal score normalization and on the second part we return all such scores to check the impact of different normalization options.

#### Optimal normalization

In [8]:
df_std = evaluate_simple_baselines_on_all_paper_datasets(
    root_path=module_path,
    dataset_names=['swat'],  # provide one or more dataset names e.g ['swat', 'wadi_127', 'wadi_112', 'smd', 'ucr_IB'], see dataset_reader enum.
    data_normalization="0-1",              
    eval_method='point_wise',
    score_normalization='optimal',  # Will only return the scores for the optimal score normalization method.
    verbose=True
)

[INFO]: Evaluating 1 datasets
Evaluating on SWAT.
Evaluation on SWAT finished


In [9]:
(
    df_std
    .style
    .format(precision=3)
    .apply(highlight_max, props='color:white;background-color:darkblue', axis=0)
)

Unnamed: 0_level_0,SWAT,SWAT,SWAT,SWAT
Unnamed: 0_level_1,F1,P,R,AUPRC
Sensor Range Deviation,0.231,0.131,0.979,0.556
Simple L2_norm,0.782,0.985,0.648,0.715
1-NN Distance,0.782,0.984,0.649,0.726
PCA_Error,0.833,0.965,0.733,0.745


#### See the impact of different score normalisations

In [10]:
df_std = evaluate_baselines_on_all_paper_datasets(
    root_path=module_path,
    dataset_names=['swat'],
    data_normalization="0-1",              
    eval_method='point_wise',
    score_normalization='all',  # Will return scores for all score normalization methods for baselines which return multiple outputs. In this case only PCA_Error.
    verbose=True
)

[INFO]: Evaluating 1 datasets
Evaluating on SWAT.
Evaluation on SWAT finished


In [11]:
(
    df_std
    .style
    .format(precision=3)
    .apply(highlight_max, props='color:white;background-color:darkblue', axis=0)
)

Unnamed: 0_level_0,SWAT,SWAT,SWAT,SWAT
Unnamed: 0_level_1,F1,P,R,AUPRC
Sensor Range Deviation,0.231,0.131,0.979,0.556
Simple L2_norm,0.782,0.985,0.648,0.715
1-NN Distance,0.782,0.984,0.649,0.726
PCA_Error (median-iqr norm),0.756,0.995,0.61,0.723
PCA_Error (mean-std norm),0.833,0.965,0.733,0.745
PCA_Error (no norm),0.77,0.985,0.632,0.714


## Evaluate on all datasets - Point-Wise metrics
Here we reproduce the point-wise F1 score of all simple baselines on all datasets.

In [37]:
df_point_wise = evaluate_baselines_on_all_paper_datasets(
    root_path=module_path,
    dataset_names=None,
    data_normalization="0-1",              
    eval_method='point_wise',  # 
    score_normalization='optimal',
    verbose=True
)

[INFO]: Evaluating 7 datasets
[INFO:] SMD contains 28 data traces.
[INFO:] MSL contains 27 data traces.
[INFO:] SMAP contains 54 data traces.
[INFO:] UCR contains 4 data traces
Evaluating on SWAT.
Evaluation on SWAT finished
Evaluating on WADI_127.
Evaluation on WADI_127 finished
Evaluating on WADI_112.
Evaluation on WADI_112 finished
Evaluating on SMD.


100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [01:28<00:00,  3.15s/it]


Evaluation on SMD finished: on 28 traces,  results are averaged
Evaluating on MSL.


100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:07<00:00,  3.78it/s]


Evaluation on MSL finished: on 27 traces,  results are averaged
Evaluating on SMAP.


  explained_variance_ratio_ = explained_variance_ / total_var
  explained_variance_ratio_ = explained_variance_ / total_var
100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 54/54 [00:18<00:00,  2.95it/s]


Evaluation on SMAP finished: on 54 traces,  results are averaged
Evaluating on UCR_IB.


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00,  5.80it/s]

Evaluation on UCR_IB finished: on 4 traces,  results are averaged





In [38]:
(
    df_point_wise
    .style
    .format(precision=3)
    .apply(highlight_max, props='color:white;background-color:darkblue', axis=0)
)

Unnamed: 0_level_0,SWAT,SWAT,SWAT,SWAT,WADI_127,WADI_127,WADI_127,WADI_127,WADI_112,WADI_112,WADI_112,WADI_112,SMD,SMD,SMD,SMD,MSL,MSL,MSL,MSL,SMAP,SMAP,SMAP,SMAP,UCR_IB,UCR_IB,UCR_IB,UCR_IB
Unnamed: 0_level_1,F1,P,R,AUPRC,F1,P,R,AUPRC,F1,P,R,AUPRC,F1,P,R,AUPRC,F1,P,R,AUPRC,F1,P,R,AUPRC,F1,P,R,AUPRC
Sensor Range Deviation,0.231,0.131,0.979,0.556,0.101,0.053,1.0,0.317,0.465,0.567,0.394,0.497,0.132,0.11,0.682,0.321,0.328,0.42,0.736,0.537,0.273,0.423,0.75,0.588,0.033,0.056,0.764,0.037
Simple L2_norm,0.782,0.985,0.648,0.715,0.281,1.0,0.163,0.21,0.513,0.887,0.361,0.474,0.404,0.569,0.455,0.343,0.395,0.451,0.665,0.305,0.351,0.389,0.673,0.253,0.037,0.019,0.885,0.014
1-NN Distance,0.782,0.984,0.649,0.726,0.281,1.0,0.163,0.211,0.568,0.779,0.447,0.501,0.463,0.626,0.458,0.389,0.404,0.449,0.59,0.324,0.352,0.459,0.61,0.306,0.879,0.824,0.95,0.803
PCA_Error,0.833,0.965,0.733,0.745,0.501,0.884,0.35,0.473,0.655,0.752,0.58,0.575,0.571,0.612,0.582,0.508,0.426,0.411,0.61,0.315,0.382,0.412,0.657,0.267,0.928,0.882,1.0,0.933


In [39]:
(
    df_point_wise
    .drop(['P', 'R','AUPRC'], axis=1, level=1)
    .style
    .format(precision=5)
    .apply(highlight_max, props='color:white;background-color:darkblue', axis=0)
)

Unnamed: 0_level_0,SWAT,WADI_127,WADI_112,SMD,MSL,SMAP,UCR_IB
Unnamed: 0_level_1,F1,F1,F1,F1,F1,F1,F1
Sensor Range Deviation,0.23114,0.10089,0.4653,0.13189,0.32841,0.27346,0.03295
Simple L2_norm,0.78159,0.2809,0.51278,0.40361,0.39535,0.35097,0.03655
1-NN Distance,0.78222,0.2809,0.56787,0.4625,0.40367,0.3519,0.87935
PCA_Error,0.83321,0.50117,0.65477,0.57123,0.42614,0.38236,0.92849


## Evaluate all datasets - Range-Wise metrics
Here we reproduce the range-wise scores of all simple baselines on all datasets.

In [None]:
df_range_wise = evaluate_baselines_on_all_paper_datasets(
    root_path=module_path,
    dataset_names=None,
    data_normalization="0-1",
    eval_method='range_wise',
    score_normalization='optimal',
    verbose=True
)

In [None]:
(
    df_range_wise
    .style
    .format(precision=3)
    .apply(highlight_max, props='color:white;background-color:darkblue', axis=0)
)

In [None]:
(
    df_range_wise
    .drop(['P', 'R','AUPRC'], axis=1, level=1)
    .style
    .format(precision=3)
    .apply(highlight_max, props='color:white;background-color:darkblue', axis=0)
)

In [13]:
df_range_wise.style.set_precision(3).apply(highlight_max, props='color:white;background-color:darkblue', axis=0)

Unnamed: 0_level_0,SWAT,SWAT,SWAT,SWAT,WADI_127,WADI_127,WADI_127,WADI_127,WADI_112,WADI_112,WADI_112,WADI_112,SMD,SMD,SMD,SMD,MSL,MSL,MSL,MSL,SMAP,SMAP,SMAP,SMAP,UCR_IB,UCR_IB,UCR_IB,UCR_IB
Unnamed: 0_level_1,F1,P,R,AUPRC,F1,P,R,AUPRC,F1,P,R,AUPRC,F1,P,R,AUPRC,F1,P,R,AUPRC,F1,P,R,AUPRC,F1,P,R,AUPRC
Sensor Range Deviation,0.23,0.131,0.928,0.534,0.098,0.053,0.678,0.374,0.526,0.569,0.489,0.543,0.116,0.121,0.508,0.325,0.217,0.391,0.656,0.533,0.173,0.416,0.68,0.588,0.023,0.088,0.764,0.056
Simple L2_norm,0.366,0.898,0.23,0.367,0.41,1.0,0.258,0.3,0.607,0.908,0.456,0.582,0.338,0.461,0.411,0.281,0.419,0.493,0.61,0.331,0.361,0.385,0.629,0.259,0.09,0.049,0.882,0.038
1-NN Distance,0.372,0.937,0.232,0.391,0.41,1.0,0.258,0.301,0.618,0.915,0.467,0.564,0.384,0.517,0.389,0.327,0.414,0.524,0.425,0.372,0.362,0.496,0.552,0.316,0.882,0.806,0.977,0.806
PCA_Error,0.574,0.918,0.417,0.504,0.557,0.884,0.406,0.543,0.699,0.752,0.652,0.64,0.58,0.641,0.597,0.516,0.51,0.487,0.609,0.383,0.393,0.429,0.649,0.284,0.928,0.882,1.0,0.933


In [14]:
df_range_wise.drop(['P', 'R','AUPRC'], axis=1, level=1).style.set_precision(3).apply(highlight_max, props='color:white;background-color:darkblue', axis=0)

Unnamed: 0_level_0,SWAT,WADI_127,WADI_112,SMD,MSL,SMAP,UCR_IB
Unnamed: 0_level_1,F1,F1,F1,F1,F1,F1,F1
Sensor Range Deviation,0.23,0.098,0.526,0.116,0.217,0.173,0.023
Simple L2_norm,0.366,0.41,0.607,0.338,0.419,0.361,0.09
1-NN Distance,0.372,0.41,0.618,0.384,0.414,0.362,0.882
PCA_Error,0.574,0.557,0.699,0.58,0.51,0.393,0.928
