In [None]:
# imports
import re
import json
import copy
import warnings
import pandas as pd
import numpy as np
import scipy as sp
import os

from IPython.display import display, Markdown, Latex
from collections import defaultdict

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib.lines import Line2D
from matplotlib.markers import MarkerStyle
from matplotlib.legend_handler import HandlerBase
from matplotlib.patches import Rectangle

import plotly.offline as py
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.express as px
from plotly.subplots import make_subplots

from itertools import cycle
from pathlib import Path

from timeeval import DatasetManager, DefaultMetrics, TimeEval, Status, Datasets
from timeeval import TimeEval, ResourceConstraints, DefaultMetrics, Algorithm, TrainingType, InputDimensionality
from timeeval.timeeval import Experiment
from timeeval.timeeval import Times
from timeeval.utils.datasets import extract_labels
import utils

In [None]:


algo_meta = {
    "DeepAnT": {"name": "DeepAnT", "research_area": "Deep Learning", "method_family": "forecasting", "image_name": "deepant", "display_name": "DeepAnT", "tex_command": "\\deepant[cite]{}"},
    "EncDec-AD": {"name": "EncDec-AD", "research_area": "Deep Learning", "method_family": "reconstruction", "image_name": "encdec_ad", "display_name": "EncDec-AD", "tex_command": "\\encdecad[cite]{}"},
    "FastMCD": {"name": "FastMCD", "research_area": "Statistics (Regression & Forecasting)", "method_family": "distribution", "image_name": "fast_mcd", "display_name": "FastMCD", "tex_command": "\\fastmcd[cite]{}"},
    "IF-LOF": {"name": "IF-LOF", "research_area": "Ourlier Detection", "method_family": "trees", "image_name": "if_lof", "display_name": "IF-LOF", "tex_command": "\\iflof[cite]{}"},
    "iForest": {"name": "Isolation Forest (iForest)", "research_area": "Ourlier Detection", "method_family": "trees", "image_name": "iforest", "display_name": "iForest", "tex_command": "\\iforest[cite]{}"},
    "KMeans": {"name": "KMeans", "research_area": "Classic Machine Learning", "method_family": "distance", "image_name": "kmeans", "display_name": "KMeans", "tex_command": "\\kmeans[cite]{}"},
    "KNN": {"name": "KNN", "research_area": "Classic Machine Learning", "method_family": "distance", "image_name": "knn", "display_name": "KNN", "tex_command": "\\knn[cite]{}"},    
    "LOF": {"name": "LOF", "research_area": "Ourlier Detection", "method_family": "distance", "image_name": "lof", "display_name": "LOF", "tex_command": "\\lof[cite]{}"},
    "LSTM-AD": {"name": "LSTM-AD", "research_area": "Deep Learning", "method_family": "forecasting", "image_name": "lstm_ad", "display_name": "LSTM-AD", "tex_command": "\\lstmad[cite]{}"},
    "PCC": {"name": "PCC", "research_area": "Classic Machine Learning", "method_family": "reconstruction", "image_name": "pcc", "display_name": "PCC", "tex_command": "\\pcc[cite]{}"},
    "Roburst PCA": {"name": "Roburst PCA", "research_area": "Classic Machine Learning", "method_family": "reconstruction", "image_name": "robust_pca", "display_name": "Roburst PCA", "tex_command": "\\robustpca[cite]{}"},
}

algorithm_names = ['DeepAnT', 'EncDec-AD', 'KMeans', 'KNN', 'LOF', 'LSTM-AD',
                   'FastMCD', 'PCC', 'iForest', 'IF-LOF', 'Roburst PCA']
unsupervised_algorithm_names = [
    'KMeans', 'KNN', 'LOF', 'PCC', 'iForest', 'IF-LOF']
semi_supervised_algorithm_names = ['DeepAnT', 'EncDec-AD',  'LSTM-AD', 'FastMCD', 'Roburst PCA']

unsupervised_dataset_id = ('custom', 'MOVE-II_unsupervised')
semi_supervised_dataset_id = ('custom', 'MOVE-II_semi-supervised')
storePlotDir = r'plots'

class CalculatedMetrics:
    def __init__(self):
        self.ROC_AUC = 'ROC_AUC'
        self.PR_AUC = 'PR_AUC'
        self.RANGE_PR_AUC = 'RANGE_PR_AUC'
        self.OVERALL_RUN_TIME = 'overall_time'

CALCULATED_METRICS = CalculatedMetrics()


In [None]:
single_column_figwidth = 10
double_column_figwidth = 20
plt.rcParams.update({
    "font.size": 12,
    "font.weight": "regular",  # "bold"
    "figure.figsize": (double_column_figwidth, 10)
})
os.makedirs(storePlotDir, exist_ok=True)

In [None]:
# constants and configuration
data_path = Path(r"data")
result_root_path = Path(r"results")
result_paths = [d for d in result_root_path.iterdir() if d.is_dir()]
print(result_paths)
result_path = result_root_path / "2024_07_20_13_41_02"
print(f"Reading results from {result_path.resolve()}")

In [None]:
# algorithm	collection	dataset	algo_training_type	algo_input_dimensionality	dataset_training_type	dataset_input_dimensionality	train_preprocess_time	train_main_time	execute_preprocess_time repetition	hyper_params	hyper_params_id	ROC_AUC	PR_AUC	RANGE_PR_AUC	train_postprocess_time	dataset_name	overall_time	algorithm-index

custom_dataset = Path(r"data/datasets.json")
dm = DatasetManager(Path.cwd() / "data", create_if_missing=True,
                    custom_datasets_file=custom_dataset)

RESULT_DF  = utils.load_results(result_path)
MOVE_II_DF = dm.get_dataset_df(unsupervised_dataset_id)
MOVE_II_SEMI_SUPERVISE_DF = dm.get_dataset_df(semi_supervised_dataset_id)

In [None]:

# algorithm_names = ['LOF']
from sklearn.metrics import auc, roc_curve, precision_recall_curve
from utils import range_precision_recall_curve

metrics_functions = {}

metrics_functions[CALCULATED_METRICS.ROC_AUC] = roc_curve
metrics_functions[CALCULATED_METRICS.PR_AUC] = precision_recall_curve
metrics_functions[CALCULATED_METRICS.RANGE_PR_AUC] = range_precision_recall_curve


def plot_curves(metric: str, store_plot= False):
    fig = go.Figure()
    for algo in algorithm_names:
        if algo in unsupervised_algorithm_names:
            scoreDf: np.ndarray = utils.load_scores_df(
            RESULT_DF, result_path, algo, unsupervised_dataset_id).iloc[:, 0]
            labels = extract_labels(MOVE_II_DF)
        elif algo in semi_supervised_algorithm_names:
            scoreDf: np.ndarray = utils.load_scores_df(
            RESULT_DF, result_path, algo, semi_supervised_dataset_id).iloc[:, 0]
            labels = extract_labels(MOVE_II_SEMI_SUPERVISE_DF)
        y_true, y_scores = Experiment.scale_scores(labels, scoreDf)
        x, y, area = utils.curve_store(
            y_true, y_scores, metrics_functions[metric])
        name = f'{algo} | area = {area:.4f}'
        fig.add_trace(go.Scatter(x=x, y=y,
                             mode='lines',
                                  name=name))
    
    fig.update_layout(
        title={"text": f"{metric} curves",
               "xanchor": "center", "x": 0.5},
        legend_title="Metrics")
    fig.show()
    if store_plot:
        fig.write_image(file=f'{storePlotDir}/{metric}_curves_plot.pdf',
                        format='pdf')

    # {metrics_functions[metric].__name__}


plot_curves(CALCULATED_METRICS.ROC_AUC, True)
plot_curves(CALCULATED_METRICS.PR_AUC, True)
plot_curves(CALCULATED_METRICS.RANGE_PR_AUC, True)

# algorithm_names = ['EncDec-AD']

#  ROC Scores 
# fig = go.Figure()

# for algo in algorithm_names:
#     # scoreDf: np.ndarray = utils.load_scores_df(
#     #     df, result_path, algo, dataset_id).iloc[:, 0]

#     if algo in unsupervised_algorithm_names:
#         scoreDf: np.ndarray = utils.load_scores_df(
#             RESULT_DF, result_path, algo, unsupervised_dataset_id).iloc[:, 0]
#         labels = extract_labels(MOVE_II_DF)
#     elif algo in semi_supervised_algorithm_names:
#         scoreDf: np.ndarray = utils.load_scores_df(
#             RESULT_DF, result_path, algo, semi_supervised_dataset_id).iloc[:, 0]
#         labels = extract_labels(MOVE_II_SEMI_SUPERVISE_DF)

#     y_true, y_scores = Experiment.scale_scores(labels, scoreDf)
#     # fig_curv = go.Figure()

#     # roc_curve_x, roc_curve_y, roc_curve_area = utils.curve_store(y_true, y_scores, roc_curve)
#     # roc_curve_name = f'{algo} {roc_curve.__name__} | area = {roc_curve_area:.4f}'
#     # fig.add_trace(go.Scatter(x=roc_curve_x, y=roc_curve_y,
#     #                          mode='lines',
#     #                               name=roc_curve_name))
    

#     # pr_curve_x, pr_curve_y, pr_curve_area = utils.curve_store(
#     #     y_true, y_scores, precision_recall_curve)
#     # pr_curve_name = f'{precision_recall_curve.__name__} | area = {pr_curve_area:.4f}'
#     # fig_curv.add_trace(go.Scatter(x=pr_curve_x, y=pr_curve_y,
#     #                          mode='lines',
#     #                               name=pr_curve_name))

#     # range_pr_curve_x, range_pr_curve_y, range_pr_curve_area = utils.curve_store(
#     #     y_true, y_scores, range_precision_recall_curve)
#     # range_pr_curve_name = f'{range_precision_recall_curve.__name__} | area = {range_pr_curve_area:.4f}'
#     # fig_curv.add_trace(go.Scatter(x=range_pr_curve_x, y=range_pr_curve_y,
#     #                          mode='lines',
#     #                               name=range_pr_curve_name))

#     # utils.auc_plot_matplotlib(algo, y_true, y_scores,
#     #                           roc_curve, True, True, storePlotDir, 'png')
#     # utils.auc_plot_matplotlib(algo, y_true, y_scores,
#     #                           precision_recall_curve, True, True, storePlotDir, 'png')
#     # utils.auc_plot_matplotlib(algo, y_true, y_scores,
#     #                           range_precision_recall_curve, True, True, storePlotDir, 'png')

#     # roc_score, roc_fig = utils.auc_plot(y_true, y_scores, roc_curve)
#     # pr_score, pr_fig = utils.auc_plot(y_true, y_scores, precision_recall_curve)
#     # range_pr_score, range_pr_fig = utils.auc_plot(y_true, y_scores, range_precision_recall_curve)
#     # print(
#     #     f'Algorithm:{algo} => ROC_AUC = {roc_score}:  PR_AUC={pr_score}: RANGE_PR_AUC={range_pr_score}')
#     # roc_fig.show()
#     # pr_fig.show()
#     # range_pr_fig.show()

# # fig.update_layout(
# #     title={"text": f"{CALCULATED_METRICS.ROC_AUC}Evaluation evaluation curves",
# #                "xanchor": "center", "x": 0.5},
# #         legend_title="Metrics")
# # fig.show()
# # fig.write_image(file=f'{storePlotDir}/{CALCULATED_METRICS.ROC_AUC}_metrics_curves_plot.png',
# #                          format='png')

In [None]:


for algo in algorithm_names:
    if algo in unsupervised_algorithm_names:
        collection = unsupervised_dataset_id[0]
        dataset_name = unsupervised_dataset_id[1]
    elif algo in semi_supervised_algorithm_names:
        collection = semi_supervised_dataset_id[0]
        dataset_name = semi_supervised_dataset_id[1]
    
    fig = utils.plot_scores(
        algorithm_name=algo, collection_name=collection, dataset_name=dataset_name, df=RESULT_DF,
        dmgr=dm, result_path=result_path)
    print(f'{algo} Plot')
    # plotly.io.config.default_scale 1920 x 1080 pixels
    fig.show()
    fig.write_image(file=f'{storePlotDir}/{algo}_score_plot.png',
                    format='png', scale=1.0, width=1920, height=1080)

In [None]:
RESULT_DF["algo_family"] = RESULT_DF["algorithm"].apply(
    lambda algo: algo_meta[algo]["method_family"])
RESULT_DF["algo_area"] = RESULT_DF["algorithm"].apply(
    lambda algo: algo_meta[algo]["research_area"])
RESULT_DF["algo_display_name"] = RESULT_DF["algorithm"].apply(
    lambda algo: algo_meta[algo]["display_name"])

RESULT_DF[["algorithm", "algo_display_name",
           "algo_family", "algo_area"]]

### Errors

In [None]:
df_error_counts = RESULT_DF.pivot_table(index=["algo_training_type", "algorithm"], columns=[
                                 "status"], values="repetition")
df_error_counts = df_error_counts.fillna(value=0).astype(np.int64)

### Aggregation of errors per algorithm grouped by algorithm training type

In [None]:
for tpe in ["SEMI_SUPERVISED", "UNSUPERVISED"]:
    print(tpe)
    fig = ff.create_table(df_error_counts.loc[tpe], index=True)
    fig.write_image(file=f'{storePlotDir}/{tpe}_Run_plot.png',
                    format='png')
    py.iplot(fig)

#### Algorithm quality assessment
Overall algorithm performance based on ROC_AUC

In [None]:
# aggregatemetric = 'ROC_AUC'  # ROC_AUC

aggregations = ["min", "mean", "median", "max"]
df_overall_scores = RESULT_DF.pivot_table(
    index="algorithm", values=CALCULATED_METRICS.ROC_AUC, aggfunc=aggregations)
df_overall_scores.columns = aggregations
df_overall_scores = df_overall_scores.sort_values(by="median", ascending=False)

df_overall_scores.head()

In [None]:
df_asl = RESULT_DF.pivot(index="algorithm", columns="dataset_name",
                         values=CALCULATED_METRICS.ROC_AUC)
df_asl = df_asl.dropna(axis=0, how="all").dropna(axis=1, how="all")
df_asl["median"] = df_asl.median(axis=1)
df_asl = df_asl.sort_values(by="median", ascending=True)
df_asl = df_asl.drop(columns="median").T

In [None]:
df = px.data.tips()
df

In [None]:
roc_auc_fig = utils.plot_barplot(RESULT_DF, title=f"{CALCULATED_METRICS.ROC_AUC} Bar plot",
                                 ax_label="Algorithms", ay_label=f"{CALCULATED_METRICS.ROC_AUC} score", metric=CALCULATED_METRICS.ROC_AUC)
pr_auc_fig = utils.plot_barplot(RESULT_DF, title=f"{CALCULATED_METRICS.PR_AUC} Bar plot",
                                ax_label="Algorithms", ay_label=f"{CALCULATED_METRICS.PR_AUC} score", metric=CALCULATED_METRICS.PR_AUC)
range_pr_auc_fig = utils.plot_barplot(RESULT_DF, title=f"{CALCULATED_METRICS.RANGE_PR_AUC} Bar plot",
                                      ax_label="Algorithms", ay_label=f"{CALCULATED_METRICS.RANGE_PR_AUC} score", metric=CALCULATED_METRICS.RANGE_PR_AUC)

overall_time_plot = utils.plot_barplot(RESULT_DF, title="Overall runtime Bar plot",
                                         ax_label="Algorithms", ay_label="Overall runtime (in seconds)", metric=CALCULATED_METRICS.OVERALL_RUN_TIME)

py.iplot(roc_auc_fig)
roc_auc_fig.write_image(file=f'{storePlotDir}/{CALCULATED_METRICS.ROC_AUC}_score_plot.png',
                    format='png', scale=1.0, width=1920, height=1080)
py.iplot(pr_auc_fig)
pr_auc_fig.write_image(file=f'{storePlotDir}/{CALCULATED_METRICS.PR_AUC}_score_plot.png',
                        format='png', scale=1.0, width=1920, height=1080)
py.iplot(range_pr_auc_fig)
range_pr_auc_fig.write_image(file=f'{storePlotDir}/{CALCULATED_METRICS.RANGE_PR_AUC}_score_plot.png',
                        format='png', scale=1.0, width=1920, height=1080)
py.iplot(overall_time_plot)
overall_time_plot.write_image(file=f'{storePlotDir}/{CALCULATED_METRICS.OVERALL_RUN_TIME}_plot.png',
                        format='png', scale=1.0, width=1920, height=1080)

In [None]:
# df_datasets = RESULT_DF.pivot(index="dataset_name",
#                        columns="algorithm", values=aggregatemetric)
# # df_datasets = df_datasets.dropna(axis=0, how="all").dropna(axis=1, how="all")
# df_datasets["median"] = df_datasets.median(axis=1)
# df_datasets = df_datasets.sort_values(by="median", ascending=True)
# df_datasets = df_datasets.drop(columns="median").T


# CALCULATED_METRICS.RANGE_PR_AUC


# def plot_dataset_boxplot(characteristic):
#     df_c = df_datasets.drop(
#         columns=[c for c in df_datasets.columns if characteristic != c.split("-")[1]])
#     fig = go.Figure()
#     for i, c in enumerate(df_c.columns):
#         base_osci = c.split("-")[0]
#         fig.add_trace(go.Box(
#             x=df_c[c],
#             name=c,
#             boxpoints=False,
#             legendgroup=base_osci,
#             visible="legendonly" if base_osci != "sinus" else None
#         ))
#     fig.update_layout(
#         title={"text": f"Dataset scores by characteristic '{characteristic}'",
#                "xanchor": "center", "x": 0.5},
#         xaxis_title=f"{aggregatemetric} score",
#         legend_title="Datasets"
#     )
#     return py.iplot(fig)

def plot_box_for_metrics_base_on_learning_type(metric:str):

    df_learning_type = RESULT_DF.pivot(index="algo_training_type",
                                       columns="algorithm", values=metric)
    df_learning_type["median"] = df_learning_type.median(axis=1)
    df_learning_type = df_learning_type.sort_values(by="median", ascending=True)
    df_learning_type = df_learning_type.drop(columns="median").T

    fig = go.Figure()
    for i, c in enumerate(df_learning_type.columns):
        fig.add_trace(go.Box(
            x=df_learning_type[c],
            name=c,
            boxpoints=False,
            # text_auto=True
            # visible=None if i < n_show or i > len(
            #     df_learning_type.columns)-n_show-1 else "legendonly"
        ))
    fig.update_layout(
        title={"text": f"{metric} box plots by Algorithm learning type",
               "xanchor": "center", "x": 0.5},
        xaxis_title=f"{metric} score",
        legend_title="Algorithm Learning Type"
    )
    return fig

In [None]:

roc_auc_box_fig = plot_box_for_metrics_base_on_learning_type(
    CALCULATED_METRICS.ROC_AUC)
pr_auc__box_fig = plot_box_for_metrics_base_on_learning_type(
    CALCULATED_METRICS.PR_AUC)
range_pr_auc_box_fig = plot_box_for_metrics_base_on_learning_type(
    CALCULATED_METRICS.RANGE_PR_AUC)
py.iplot(roc_auc_box_fig)
roc_auc_box_fig.write_image(file=f'{storePlotDir}/{CALCULATED_METRICS.ROC_AUC}_score_Algorithms_box_plot.png',
                        format='png', scale=1.0, width=1920, height=1080)
py.iplot(pr_auc__box_fig)
pr_auc__box_fig.write_image(file=f'{storePlotDir}/{CALCULATED_METRICS.PR_AUC}_score_Algorithms_box_plot.png',
                       format='png', scale=1.0, width=1920, height=1080)
py.iplot(range_pr_auc_box_fig)
range_pr_auc_box_fig.write_image(file=f'{storePlotDir}/{CALCULATED_METRICS.RANGE_PR_AUC}_score_Algorithms_box_plot.png',
                             format='png', scale=1.0, width=1920, height=1080)

In [None]:
# ok = "- OK -"
# oom = "- OOM -"
# timeout = "- TIMEOUT -"
# error_mapping = {
#     "TimeoutError": timeout,
#     "status code '137'": oom,
#     "MemoryError: Unable to allocate": oom,
#     "ValueError: Expected 2D array, got 1D array instead": "Wrong shape error",
#     "could not broadcast input array from shape": "Wrong shape error",
#     # shapes (20,) and (19,500) not aligned
#     "not aligned": "Wrong shape error",
#     "array must not contain infs or NaNs": "unexpected Inf or NaN",
#     "contains NaN": "unexpected Inf or NaN",
#     "cannot convert float NaN to integer": "unexpected Inf or NaN",
#     "Error(s) in loading state_dict": "Model loading error",
#     "EOFError": "Model loading error",
#     "Restoring from checkpoint failed": "Model loading error",
#     "RecursionError: maximum recursion depth exceeded in comparison": "Max recursion depth exceeded",
#     # ValueError: X has 44 features, but PCA is expecting 43 features as input.
#     "but PCA is expecting": "BROKEN Exathlon DATASETS",
#     "input.size(-1) must be equal to input_size": "BROKEN Exathlon DATASETS",
#     "ValueError: The condensed distance matrix must contain only finite values.": "LinAlgError",
#     "LinAlgError": "LinAlgError",
#     "NameError: name 'nan' is not defined": "Not converged",
#     "Could not form valid cluster separation": "Not converged",
#     "contamination must be in": "Invariance/assumption not met",
#     "Data must not be constant": "Invariance/assumption not met",
#     "Cannot compute initial seasonals using heuristic method with less than two full seasonal cycles in the data": "Invariance/assumption not met",
#     "ValueError: Anom detection needs at least 2 periods worth of data": "Invariance/assumption not met",
#     "`dataset` input should have multiple elements": "Invariance/assumption not met",
#     "Cannot take a larger sample than population": "Invariance/assumption not met",
#     "num_samples should be a positive integer value": "Invariance/assumption not met",
#     "Cannot use heuristic method to compute initial seasonal and levels with less than periods + 10 datapoints": "Invariance/assumption not met",
#     "ValueError: The window size must be less than or equal to 0": "Invariance/assumption not met",
#     "The window size must be less than or equal to": "Incompatible parameters",
#     "window_size has to be greater": "Incompatible parameters",
#     "Set a higher piecewise_median_period_weeks": "Incompatible parameters",
#     "OutOfBoundsDatetime: cannot convert input with unit 'm'": "Incompatible parameters",
#     "`window_size` must be at least 4": "Incompatible parameters",
#     "elements of 'k' must be between": "Incompatible parameters",
#     "Expected n_neighbors <= n_samples": "Incompatible parameters",
#     "PAA size can't be greater than the timeseries size": "Incompatible parameters",
#     "All window sizes must be greater than or equal to": "Incompatible parameters",
#     "ValueError: __len__() should return >= 0": "Bug",
#     "stack expects a non-empty TensorList": "Bug",
#     "expected non-empty vector": "Bug",
#     "Found array with 0 feature(s)": "Bug",
#     "ValueError: On entry to DLASCL parameter number 4 had an illegal value": "Bug",
#     "Sample larger than population or is negative": "Bug",
#     "ZeroDivisionError": "Bug",
#     "IndexError": "Bug",
#     "status code '139'": "Bug",  # segfault
#     "replacement has length zero": "Bug",
#     "missing value where TRUE/FALSE needed": "Bug",
#     "invalid subscript type 'list'": "Bug",
#     "subscript out of bounds": "Bug",
#     "invalid argument to unary operator": "Bug",
#     "negative length vectors are not allowed": "Bug",
#     "negative dimensions are not allowed": "Bug",
#     "`std` must be positive": "Bug",
#     "does not have key": "Bug",  # State '1' does not have key '1'
#     "Less than 2 uniques breaks left": "Bug",
#     "The encoder for value is invalid": "Bug",
#     "arange: cannot compute length": "Bug",
#     "n_components=3 must be between 0 and min(n_samples, n_features)": "Bug",
#     "must match the size of tensor b": "Wrong shape error",
# }

# def get_folder(df, index):
#     series = df.loc[index]
#     # result_path = result_path
#     dataset_name = series["dataset"]
#     path = (
#         result_path /
#         series["algorithm"] /
#         series["hyper_params_id"] /
#         series["collection"] /
#         dataset_name /
#         str(series["repetition"])
#     )
#     return path


# def category_from_logfile(logfile):
#     with logfile.open() as fh:
#         log = fh.read()
#     for error in error_mapping:
#         if error in log:
#             return error_mapping[error]
#     # print(log)
#     return "other"


# def extract_category(series):
#     status = series["status"]
#     msg = series["error_message"]
#     if status == "Status.OK":
#         return ok
#     elif status == "Status.TIMEOUT":
#         return timeout
#     # status is ERROR:
#     elif "DockerAlgorithmFailedError" in msg:
#         path = get_folder(series.name) / "execution.log"
#         if path.exists():
#             return category_from_logfile(path)
#         return "DockerAlgorithmFailedError"
#     else:
#         m = re.search("^([\w]+)\(.*\)", msg)
#         if m:
#             error = m.group(1)
#         else:
#             error = msg
#         return f"TimeEval:{error}"


# RESULT_DF["error_category"] = RESULT_DF.apply(
#     extract_category, axis="columns", raw=False)
# df_error_category_overview = RESULT_DF.pivot_table(
#     index="error_category", columns="algorithm", values="repetition", aggfunc="count")
# df_error_category_overview.insert(
#     0, "ALL (sum)", df_error_category_overview.sum(axis=1))

# with pd.option_context("display.max_rows", None, "display.max_columns", None):
#     display(df_error_category_overview.style.format("{:.0f}", na_rep=""))

# df_error_summary = pd.DataFrame(df_error_category_overview.sum(axis=1))
# df_error_summary.columns = ["count"]

# df_error_summary.loc["- ERROR -",
#                      "count"] = df_error_summary[~df_error_summary.index.str.startswith("- ")].sum().item()
# df_error_summary = df_error_summary.drop(
#     df_error_summary[~df_error_summary.index.str.startswith("- ")].index)

# all_count = df_error_summary.sum().item()
# df_error_summary["percentage"] = df_error_summary / all_count
# df_error_summary["count"] = df_error_summary["count"].astype(np.int_)
# df_error_summary.style.format({"percentage": "{:06.2%}".format})

In [None]:
# dominant_aggregation = "mean"
# index_columns = ["algo_input_dimensionality",
#                  "algo_training_type", "algorithm"]


# df_asl = RESULT_DF.pivot(index=index_columns, columns=[
#                   "collection", "dataset"], values="ROC_AUC")
# df_asl = df_asl.dropna(axis=0, how="all").dropna(axis=1, how="all")
# df_asl[dominant_aggregation] = df_asl.agg(dominant_aggregation, axis=1)
# df_asl = df_asl.reset_index().sort_values(
#     by=index_columns[:-1] + [dominant_aggregation], ascending=True).set_index(index_columns)
# df_asl = df_asl.drop(columns=dominant_aggregation)

In [None]:
# df_asl_pr = RESULT_DF.pivot(index=index_columns, columns=[
#                      "collection", "dataset"], values="PR_AUC")
# df_asl_pr = df_asl_pr.dropna(axis=0, how="all").dropna(axis=1, how="all")
# df_asl_pr = df_asl_pr.reindex(df_asl.index)

In [None]:
# df_asl_rangepr = RESULT_DF.pivot(index=index_columns, columns=[
#                           "collection", "dataset"], values="RANGE_PR_AUC")
# df_asl_rangepr = df_asl_rangepr.dropna(
#     axis=0, how="all").dropna(axis=1, how="all")
# df_asl_rangepr = df_asl_rangepr.reindex(df_asl.index)

In [None]:
# df_asl_gt = RESULT_DF[RESULT_DF["collection"] == "custom"].pivot(
#     index=index_columns, columns=["collection", "dataset"], values="ROC_AUC")
# df_asl_gt = df_asl_gt.dropna(axis=0, how="all").dropna(axis=1, how="all")
# df_asl_gt = df_asl_gt.reindex(df_asl.index)

In [None]:
# relative = True

# df_algorithm_error_counts = RESULT_DF.pivot_table(index=["algorithm"], columns=[
#                                            "error_category"], values="repetition", aggfunc="count")
# df_algorithm_error_counts = df_algorithm_error_counts.fillna(
#     value=0).astype(np.int64)
# error_categories = [
#     c for c in df_algorithm_error_counts.columns if not c.startswith("-")]
# df_algorithm_error_counts["- ERROR -"] = df_algorithm_error_counts[error_categories].sum(
#     axis=1)
# df_algorithm_error_counts = df_algorithm_error_counts.drop(
#     columns=error_categories)
# df_algorithm_error_counts["- ALL -"] = df_algorithm_error_counts.sum(axis=1)
# df_algorithm_error_counts.columns = [
#     c.split(" ")[1] for c in df_algorithm_error_counts.columns]


# def get_error_count(algo, tpe="ERROR"):
#     if relative:
#         return df_algorithm_error_counts.loc[algo, tpe] / df_algorithm_error_counts.loc[algo, "ALL"]
#     else:
#         return df_algorithm_error_counts.loc[algo, tpe]

# # overview = []
# # for d, l, a in df_asl.index:
# #     overview.append([d,l,a])
# # df_overview_table = pd.DataFrame(overview, columns=["dimensionality", "learning type", "algorithm"])
# # df_overview_table["# TIMEOUT"] = df_overview_table["algorithm"].apply(get_error_count, tpe="TIMEOUT")
# # df_overview_table["# OOM"] = df_overview_table["algorithm"].apply(get_error_count, tpe="OOM")
# # df_overview_table["# ERROR"] = df_overview_table["algorithm"].apply(get_error_count, tpe="ERROR")
# # df_overview_table["algorithm"] = df_overview_table["algorithm"].apply(lambda algo: algo_meta[algo]["display_name"])

# # percent_format = "{:03.0%}"
# # with pd.option_context("display.max_rows", None, "display.max_columns", None):
# #     display(df_overview_table.style.format({"# TIMEOUT": percent_format, "# OOM": percent_format, "# ERROR": percent_format}))

In [None]:
# fliers = False
# show_labels = False
# title = "Overall algorithm quality"
# df_boxplot = df_asl.T

# labels = df_boxplot.columns
# labels = [f"{c[2]}" for c in labels]

# fig, axs = plt.subplots(1, 4, figsize=(20, 20), sharey=True)
# ax = axs[0]
# ax.boxplot([df_boxplot[c].dropna().values for c in df_boxplot.columns], labels=labels,
#            sym=None if fliers else "", vert=False, meanline=True, showmeans=True, showfliers=fliers, manage_ticks=True)
# ax.set_xlabel("ROC_AUC")
# ax.set_title("ROC_AUC all datasets")
# ax.set_xlim(-0.05, 1.05)
# if not show_labels:
#     ax.tick_params(axis="y", which="both", left=True,
#                    right=False, labelleft=False, labelright=False)

# ax = axs[1]
# ax.boxplot([df_asl_pr.T[c].dropna().values for c in df_boxplot.columns], labels=labels,
#            sym=None if fliers else "", vert=False, meanline=True, showmeans=True, showfliers=fliers, manage_ticks=True)
# ax.set_xlabel("PR_AUC")
# ax.set_title("PR_AUC all datasets")
# ax.set_xlim(-0.05, 1.05)
# ax.tick_params(axis="y", which="both", left=False,
#                right=False, labelleft=False, labelright=False)

# ax = axs[2]
# ax.boxplot([df_asl_rangepr.T[c].dropna().values for c in df_boxplot.columns], labels=labels,
#            sym=None if fliers else "", vert=False, meanline=True, showmeans=True, showfliers=fliers, manage_ticks=True)
# ax.set_xlabel("RANGE_PR_AUC")
# ax.set_title("RANGE_PR_AUC all datasets")
# ax.set_xlim(-0.05, 1.05)
# ax.tick_params(axis="y", which="both", left=False,
#                right=False, labelleft=False, labelright=False)

# ax = axs[3]
# ax.boxplot([df_asl_gt.T[c].dropna().values for c in df_boxplot.columns], labels=labels,
#            sym=None if fliers else "", vert=False, meanline=True, showmeans=True, showfliers=fliers, manage_ticks=True)
# ax.set_xlabel("ROC_AUC")
# # ax.xaxis.set_label_position("top")
# ax.set_title("ROC_AUC GutenTAG datasets only")
# ax.set_xlim(-0.05, 1.05)
# ax.tick_params(axis="y", which="both", left=False,
#                right=False, labelleft=False, labelright=False)

# # ax = axs[3]
# # ax.axis("tight")
# # ax.set_axis_off()
# table = ax.table(cellText=df_boxplot.columns,
#                  loc="center", edges="open", fontsize=200)

# # fig.tight_layout()
# fig.show()

In [None]:
# df_tmp = RESULT_DF.groupby(by="algorithm")[["ROC_AUC"]].mean()
# df_tmp = df_tmp.sort_values(by="ROC_AUC", ascending=False)
# df_tmp["Rank"] = df_tmp.rank(ascending=False)

# df_tmp_gt = RESULT_DF[RESULT_DF["collection"] == "custom"].groupby(by="algo_display_name")[
#     ["ROC_AUC"]].mean()
# df_tmp_gt = df_tmp_gt.sort_values(by="ROC_AUC", ascending=False)
# df_tmp_gt["Rank"] = df_tmp_gt.rank(ascending=False)
# df_tmp = pd.merge(df_tmp_gt, df_tmp, left_index=True,
#                   right_index=True, how="inner", suffixes=("_gt", "_all"))
# df_tmp["Diff_rank"] = df_tmp["Rank_all"] - df_tmp["Rank_gt"]
# df_tmp["Diff ROC_AUC"] = df_tmp["ROC_AUC_all"] - df_tmp["ROC_AUC_gt"]
# df_tmp.sort_values("Diff ROC_AUC").style.format(
#     {"Diff_rank": "{:+03.0f}".format, "Diff ROC_AUC": "{:+0.2f}".format})

### Reliability of our metric scores and ranking

In [None]:
# method_family_colormap = plt.get_cmap("Dark2")
# learning_type_colormap = plt.get_cmap("tab20")

In [None]:
# def method_family_marker_map(family):
#     mapping = {
#         "encoding": "v",
#         "distance": "o",
#         "distribution": "^",
#         "forecasting": "*",
#         "reconstruction": "s",
#         "trees": "P"
#     }
#     return mapping[family]

In [None]:
# colormap = method_family_colormap
# method_families = sorted(
#     set([algo_meta[algo]["method_family"] for algo in algo_meta]) - {"baseline"})
# # annotated_algorithms = [
# #     "LOF",
# #     "KNN",
# #     "KMeans",
# #     "PCC",
# #     "iForest",
# #     "IF-LOF",
# #     "LSTM-AD",
# #     "Roburst PCA",
# #     "FastMCD",
# #     "EncDec-AD",
# #     "DeepAnT",
# # ]

# # annotated_algorithms_supervised = []
# # annotated_algorithms_unsupervised = []

# cycler = plt.cycler(marker=["o", "+", "*", "x", ".", "X"]) * \
#     plt.cycler(color=plt.get_cmap("tab20").colors)

# algo_auroc = df_asl.T.mean()
# dataset_count_lut = df_algorithm_error_counts["OK"] / \
#     df_algorithm_error_counts["ALL"]


# df_tmp = algo_auroc.copy().to_frame("auroc")
# df_tmp.loc[:, "reliability"] = df_tmp.index.map(
#     lambda x: dataset_count_lut[x[2]])
# df_tmp

In [None]:
# def percentage_above(pct):
#     print(
#         f"{len(df_tmp[df_tmp.reliability > pct]) / len(df_tmp):.0%} of all algorithms reached a reliability of above {pct}.")


# def percentage_below(pct):
#     print(f"{len(df_tmp[df_tmp.reliability < pct]) / len(df_tmp):.0%} ({len(df_tmp[df_tmp.reliability < pct])}) of all algorithms reached a reliability of below {pct}.")


# percentage_above(0.7)
# percentage_above(0.99)


# def algorithm_auroc_reliability(alg):
#     s = df_tmp.xs(alg, level="algorithm")
#     print(
#         f"{alg} has a ROC_AUC of {s.auroc.iloc[0]:.2f} with a reliability of {s.reliability.iloc[0]:.0%}.")


# algorithm_auroc_reliability("LOF")
# algorithm_auroc_reliability("Roburst PCA")
# algorithm_auroc_reliability("EncDec-AD")
# algorithm_auroc_reliability("KNN")

# percentage_below(0.52)

In [None]:
# fig = plt.figure(figsize=(double_column_figwidth, 10))
# ax = fig.gca()
# ax.set_prop_cycle(cycler)

# scatter_plots = defaultdict(lambda: defaultdict(list))
# algo_positions = {}

# for (_, _, algo), r in df_tmp.iterrows():
#     algo_metadata = algo_meta[algo]
#     name = algo_metadata["display_name"]
#     method_family = algo_metadata["method_family"]
#     x = r.auroc
#     y = r.reliability
#     scatter_plots[method_family]["xs"].append(x)
#     scatter_plots[method_family]["ys"].append(y)
#     scatter_plots[method_family]["cs"].append(
#         colormap(method_families.index(method_family)))
#     if name in algorithm_names:
#         algo_positions[name] = (x, y)

# for family, scatter in scatter_plots.items():
#     xs = scatter["xs"]
#     ys = scatter["ys"]
#     cs = scatter["cs"]
#     ax.scatter(xs, ys, color=cs, s=40, marker=method_family_marker_map(family))

# ax.legend(handles=[
#     Line2D([0], [0], color="w", marker=method_family_marker_map(
#         fam), markerfacecolor=colormap(i), markeredgecolor=colormap(i), label=fam, markersize=8)
#     for i, fam in enumerate(method_families)
# ], loc="upper right")

# for a in algorithm_names:
#     ha = "left"
#     text_position = (2, -15)
#     # if a in ["LSTM-AD", "EncDec-AD", "IF-LOF", "KNN"]:
#     #     text_position = (0, -15)
#     # if a in ["KMeans"]:
#     #     ha = "right"
#     ax.annotate(
#         a, algo_positions[a], textcoords="offset points", xytext=text_position, ha=ha)

# # add vline to separate bad and good algos
# # ax.vlines([0.75], 0, 1, colors="grey", linestyles="dashed")
# ax.hlines([0.5], 0.45, 1, colors="grey", linestyles="dashed")
# ax.set_xlabel("AUC-ROC")
# ax.set_ylabel("Successfully processed algorithms on MOVE II dataset (relative)")
# # ax.set_title("Reliability of the ROC_AUC values")
# ax.set_xlim(0.44, 1.065)
# ax.set_ylim(-0.05, 1.08)
# # ax.legend(ncol=2, loc="upper left", bbox_to_anchor=(1, 1.01))
# fig.savefig(r"plots/reliability.pdf", bbox_inches="tight")
# fig.show()