# TimeEval shared parameter optimization result analysis

In [1]:
# Automatically reload packages:
%load_ext autoreload
%autoreload 2

In [2]:
# imports
import json
import warnings
import pandas as pd
import numpy as np
import scipy as sp
import plotly.offline as py
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.express as px
from plotly.subplots import make_subplots
from pathlib import Path
from timeeval import Datasets

## Configuration

Target parameters that were optimized in this run (per algorithm):

In [3]:
algo_param_mapping = {
  "HBOS": ["n_bins"],
  "MultiHMM": ["n_bins"],
  "MTAD-GAT": ["context_window_size", "mag_window_size", "score_window_size"],
  "PST": ["n_bins"]
}

Define data and results folder:

In [4]:
# constants and configuration
data_path = Path("../../data") / "test-cases"
result_root_path = Path("../timeeval_experiments/results")
experiment_result_folder = "2021-10-04_shared-optim2"

# build paths
result_paths = [d for d in result_root_path.iterdir() if d.is_dir()]
print("Available result directories:")
display(result_paths)

result_path = result_root_path / experiment_result_folder
print("\nSelecting:")
print(f"Data path: {data_path.resolve()}")
print(f"Result path: {result_path.resolve()}")

Available result directories:


[PosixPath('../timeeval_experiments/results/2021-09-30-torsk'),
 PosixPath('../timeeval_experiments/results/2021-09-27_shared-optim'),
 PosixPath('../timeeval_experiments/results/2021-10-04_shared-optim2')]


Selecting:
Data path: /home/sebastian/Documents/Projects/akita/data/test-cases
Result path: /home/sebastian/Documents/Projects/akita/timeeval/timeeval_experiments/results/2021-10-04_shared-optim2


Load results and dataset metadata:

In [5]:
def extract_hyper_params(param_names):
    def extract(value):
        params = json.loads(value)
        result = None
        for name in param_names:
            try:
                value = params[name]
                result = pd.Series([name, value], index=["optim_param_name", "optim_param_value"])
                break
            except KeyError:
                pass
        if result is None:
            raise ValueError(f"Parameters {param_names} not found in '{value}'")
        return result
    return extract

# load results
print(f"Reading results from {result_path.resolve()}")
df = pd.read_csv(result_path / "results.csv")

# add dataset_name column
df["dataset_name"] = df["dataset"].str.split(".").str[0]

# add optim_params column
df[["optim_param_name", "optim_param_value"]] = ""
for algo in algo_param_mapping:
    df_algo = df.loc[df["algorithm"] == algo]
    df.loc[df_algo.index, ["optim_param_name", "optim_param_value"]] = df_algo["hyper_params"].apply(extract_hyper_params(algo_param_mapping[algo]))

# load dataset metadata
dmgr = Datasets(data_path)

Reading results from /home/sebastian/Documents/Projects/akita/timeeval/timeeval_experiments/results/2021-10-04_shared-optim2


Define plotting functions:

In [6]:
def load_scores_df(algorithm_name, dataset_id, optim_params, repetition=1):
    params_id = df.loc[(df["algorithm"] == algorithm_name) & (df["collection"] == dataset_id[0]) & (df["dataset"] == dataset_id[1]) & (df["optim_param_name"] == optim_params[0]) & (df["optim_param_value"] == optim_params[1]), "hyper_params_id"].item()
    path = (
        result_path /
        algorithm_name /
        params_id /
        dataset_id[0] /
        dataset_id[1] /
        str(repetition) /
        "anomaly_scores.ts"
    )
    return pd.read_csv(path, header=None)

def plot_scores(algorithm_name, dataset_name):
    if isinstance(algorithm_name, tuple):
        algorithms = [algorithm_name]
    elif not isinstance(algorithm_name, list):
        raise ValueError("Please supply a tuple (algorithm_name, optim_param_name, optim_param_value) or a list thereof as first argument!")
    else:
        algorithms = algorithm_name
    # construct dataset ID
    dataset_id = ("GutenTAG", f"{dataset_name}.unsupervised")

    # load dataset details
    df_dataset = dmgr.get_dataset_df(dataset_id)

    # check if dataset is multivariate
    dataset_dim = df.loc[df["dataset_name"] == dataset_name, "dataset_input_dimensionality"].unique().item()
    dataset_dim = dataset_dim.lower()
    
    auroc = {}
    df_scores = pd.DataFrame(index=df_dataset.index)
    skip_algos = []
    algos = []
    for algo, optim_param_name, optim_param_value in algorithms:
        optim_params = f"{optim_param_name}={optim_param_value}"
        algos.append((algo, optim_params))
        # get algorithm metric results
        try:
            auroc[(algo, optim_params)] = df.loc[
                (df["algorithm"] == algo) & (df["dataset_name"] == dataset_name) & (df["optim_param_name"] == optim_param_name) & (df["optim_param_value"] == optim_param_value),
                "ROC_AUC"
            ].item()
        except ValueError:
            warnings.warn(f"No ROC_AUC score found! Probably {algo} with params {optim_params} was not executed on {dataset_name}.")
            auroc[(algo, optim_params)] = -1
            skip_algos.append((algo, optim_params))
            continue

        # load scores
        training_type = df.loc[df["algorithm"] == algo, "algo_training_type"].values[0].lower().replace("_", "-")
        try:
            df_scores[(algo, optim_params)] = load_scores_df(algo, ("GutenTAG", f"{dataset_name}.{training_type}"), (optim_param_name, optim_param_value)).iloc[:, 0]
        except (ValueError, FileNotFoundError):
            warnings.warn(f"No anomaly scores found! Probably {algo} was not executed on {dataset_name} with params {optim_params}.")
            df_scores[(algo, optim_params)] = np.nan
            skip_algos.append((algo, optim_params))
    algorithms = [a for a in algos if a not in skip_algos]

    # Create plot
    fig = make_subplots(2, 1)
    if dataset_dim == "multivariate":
        for i in range(1, df_dataset.shape[1]-1):
            fig.add_trace(go.Scatter(x=df_dataset.index, y=df_dataset.iloc[:, i], name=f"channel-{i}"), 1, 1)
    else:
        fig.add_trace(go.Scatter(x=df_dataset.index, y=df_dataset.iloc[:, 1], name="timeseries"), 1, 1)
    fig.add_trace(go.Scatter(x=df_dataset.index, y=df_dataset["is_anomaly"], name="label"), 2, 1)
    
    for item in algorithms:
        algo, optim_params = item
        fig.add_trace(go.Scatter(x=df_scores.index, y=df_scores[item], name=f"{algo}={auroc[item]:.4f} ({optim_params})"), 2, 1)
    fig.update_xaxes(matches="x")
    fig.update_layout(
        title=f"Results of {','.join(np.unique([a for a, _ in algorithms]))} on {dataset_name}",
        height=400
    )
    return py.iplot(fig)

## Analyze TimeEval results

In [7]:
df[["algorithm", "dataset_name", "status", "AVERAGE_PRECISION", "PR_AUC", "RANGE_PR_AUC", "ROC_AUC", "execute_main_time", "optim_param_name", "optim_param_value"]]

Unnamed: 0,algorithm,dataset_name,status,AVERAGE_PRECISION,PR_AUC,RANGE_PR_AUC,ROC_AUC,execute_main_time,optim_param_name,optim_param_value
0,HBOS,ecg-channels-all-of-3,Status.OK,0.032220,0.150289,0.143813,0.544104,26.899593,n_bins,5
1,HBOS,ecg-channels-single-of-10,Status.OK,0.012512,0.012796,0.010695,0.577128,31.133166,n_bins,5
2,HBOS,ecg-channels-single-of-2,Status.OK,0.010035,0.010804,0.012908,0.530409,33.759874,n_bins,5
3,HBOS,ecg-channels-single-of-20,Status.OK,0.012632,0.012819,0.011020,0.577631,33.053460,n_bins,5
4,HBOS,ecg-channels-single-of-5,Status.OK,0.010447,0.011373,0.010637,0.547163,29.512747,n_bins,5
...,...,...,...,...,...,...,...,...,...,...
3480,PST,sinus-type-pattern-shift,Status.OK,0.014713,0.014132,0.011032,0.472318,26.676900,n_bins,20
3481,PST,sinus-type-pattern,Status.OK,0.005067,0.005017,0.028875,0.001809,23.909352,n_bins,20
3482,PST,sinus-type-platform,Status.OK,0.005106,0.005054,0.005189,0.034916,23.341548,n_bins,20
3483,PST,sinus-type-trend,Status.OK,0.011853,0.011658,0.015139,0.577974,22.664949,n_bins,20


---

### Errors

In [8]:
df_error_counts = df.pivot_table(index=["algo_training_type", "algorithm"], columns=["status"], values="repetition", aggfunc="count")
df_error_counts = df_error_counts.fillna(value=0).astype(np.int64)

#### Aggregation of errors per algorithm grouped by algorithm training type

In [9]:
for tpe in ["SEMI_SUPERVISED", "SUPERVISED", "UNSUPERVISED"]:
    if tpe in df_error_counts.index:
        print(tpe)
        display(df_error_counts.loc[tpe])

SEMI_SUPERVISED


status,Status.ERROR,Status.OK,Status.TIMEOUT
algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MTAD-GAT,180,480,1045


SUPERVISED


status,Status.ERROR,Status.OK,Status.TIMEOUT
algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MultiHMM,125,494,1


UNSUPERVISED


status,Status.ERROR,Status.OK,Status.TIMEOUT
algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
HBOS,0,620,0
PST,0,540,0


#### Slow algorithms

Algorithms, for which more than 50% of all executions ran into the timeout.

In [10]:
df_error_counts[df_error_counts["Status.TIMEOUT"] > (df_error_counts["Status.ERROR"] + df_error_counts["Status.OK"])]

Unnamed: 0_level_0,status,Status.ERROR,Status.OK,Status.TIMEOUT
algo_training_type,algorithm,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SEMI_SUPERVISED,MTAD-GAT,180,480,1045


#### Broken algorithms

Algorithms, which failed for at least 50% of the executions.

In [11]:
error_threshold = 0.5
df_error_counts[df_error_counts["Status.ERROR"] > error_threshold*(
    df_error_counts["Status.TIMEOUT"] + df_error_counts["Status.ERROR"] + df_error_counts["Status.OK"]
)]

Unnamed: 0_level_0,status,Status.ERROR,Status.OK,Status.TIMEOUT
algo_training_type,algorithm,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


#### Detail errors

In [12]:
algo_list = ["MTAD-GAT", "MultiHMM"]

error_list = ["OOM", "Segfault", "ZeroDivisionError", "IncompatibleParameterConfig", "WrongDBNState", "SyntaxError", "other"]
errors = pd.DataFrame(0, index=error_list, columns=algo_list, dtype=np.int_)
for algo in algo_list:
    df_tmp = df[(df["algorithm"] == algo) & (df["status"] == "Status.ERROR")]
    for i, run in df_tmp.iterrows():
        path = result_path / run["algorithm"] / run["hyper_params_id"] / run["collection"] / run["dataset"] / str(run["repetition"]) / "execution.log"
        with path.open("r") as fh:
            log = fh.read()
            if "status code '139'" in log:
                errors.loc["Segfault", algo] += 1
            elif "status code '137'" in log:
                errors.loc["OOM", algo] += 1
            elif "Expected n_neighbors <= n_samples" in log:
                errors.loc["IncompatibleParameterConfig", algo] += 1
            elif "ZeroDivisionError" in log:
                errors.loc["ZeroDivisionError", algo] += 1
            elif "does not have key" in log:
                errors.loc["WrongDBNState", algo] += 1
            elif "NameError" in log:
                errors.loc["SyntaxError", algo] += 1
            else:
                print(f'\n\n#### {run["dataset"]} ({run["optim_param_name"]}:{run["optim_param_value"]})')
                print(log)
                errors.loc["other", algo] += 1
errors.T

Unnamed: 0,OOM,Segfault,ZeroDivisionError,IncompatibleParameterConfig,WrongDBNState,SyntaxError,other
MTAD-GAT,180,0,0,0,0,0,0
MultiHMM,0,0,0,0,0,125,0


---

### Parameter assessment

In [13]:
sort_by = ("ROC_AUC", "mean")
metric_agg_type = ["mean", "median"]
time_agg_type = "mean"
aggs = {
    "AVERAGE_PRECISION": metric_agg_type,
    "RANGE_PR_AUC": metric_agg_type,
    "PR_AUC": metric_agg_type,
    "ROC_AUC": metric_agg_type,
    "train_main_time": time_agg_type,
    "execute_main_time": time_agg_type,
    "repetition": "count"
}

df_tmp = df.reset_index()
df_tmp = df_tmp.groupby(by=["algorithm", "optim_param_name", "optim_param_value"]).agg(aggs)
df_tmp = df_tmp.reset_index()
df_tmp = df_tmp.sort_values(by=["algorithm", "optim_param_name", sort_by], ascending=False)
df_tmp = df_tmp.set_index(["algorithm", "optim_param_name", "optim_param_value"])

with pd.option_context("display.max_rows", None, "display.max_columns", None):
    display(df_tmp)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,AVERAGE_PRECISION,AVERAGE_PRECISION,RANGE_PR_AUC,RANGE_PR_AUC,PR_AUC,PR_AUC,ROC_AUC,ROC_AUC,train_main_time,execute_main_time,repetition
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,median,mean,median,mean,median,mean,median,mean,mean,count
algorithm,optim_param_name,optim_param_value,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
PST,n_bins,5,0.39708,0.317714,0.385879,0.302098,0.396243,0.317273,0.802249,0.89486,,34.725921,135
PST,n_bins,8,0.272842,0.076066,0.268959,0.120117,0.271491,0.074917,0.698583,0.794044,,35.959607,135
PST,n_bins,10,0.202958,0.035208,0.220957,0.101009,0.202411,0.034813,0.592948,0.689568,,37.905403,135
PST,n_bins,20,0.072902,0.014033,0.119969,0.064917,0.072639,0.013889,0.399095,0.29868,,46.914917,135
MultiHMM,n_bins,8,0.039097,0.014123,0.133103,0.026042,0.038271,0.009875,0.476832,0.471447,363.003773,6.446919,155
MultiHMM,n_bins,5,0.069705,0.016897,0.130878,0.022867,0.070247,0.011805,0.470824,0.480316,97.734562,6.673174,155
MultiHMM,n_bins,10,0.031629,0.013809,0.1435,0.027247,0.033808,0.009214,0.461211,0.460252,342.761122,6.536975,155
MultiHMM,n_bins,20,0.031147,0.011715,0.156079,0.039948,0.029807,0.009737,0.452648,0.467494,427.667099,6.671899,155
MTAD-GAT,score_window_size,52,0.208309,0.094298,0.219297,0.137429,0.206107,0.092904,0.650048,0.653008,7211.930384,503.538815,155
MTAD-GAT,score_window_size,28,0.208844,0.09168,0.22561,0.151763,0.20756,0.089998,0.612438,0.627799,7211.671927,564.45877,155


#### Selected parameters

- HBOS: `n_bins=20` (more is better)
- MultiHMM: `n_bins=5` (8 is slightly better, but takes way longer. The scores are very bad anyway!)
- MTAD-GAT: `context_window_size=30,mag_window_size=40,score_window_size=52` (very slow)
- PST: `n_bins=5` (less is better)

> **Note**
>
> MTAD-GAT is very slow! Exclude from further runs!

In [None]:
plot_scores([("MultiHMM", "n_bins", 5), ("MultiHMM", "n_bins", 8)], "sinus-type-mean")

In [None]:
plot_scores([("MTAD-GAT", "context_window_size", 30), ("MTAD-GAT", "context_window_size", 40)], "sinus-type-mean")