In [1]:
from pathlib import Path

import numpy as np
import pandas as pd

In [2]:
root = "../models"

In [3]:
stat_files = Path(root).rglob("*/ares/metrics_*.csv")

In [4]:
import re

In [5]:
def fix_valid_cf_disc(column: pd.Series):
    out=  float(re.search(r"[-+]?\d*\.?\d+", column).group())
    return out

In [6]:
metric_means = {}

In [7]:
results = {}

In [8]:
# format_value = lambda x: f"{x:.3g}"
format_value = lambda x: x

for stat_file in stat_files:
    df = pd.read_csv(stat_file)
    df["valid_cf_disc"] = df["valid_cf_disc"].apply(fix_valid_cf_disc)
    new_df = pd.DataFrame(df.mean()).map(format_value).T.astype(str) + " ± " + pd.DataFrame(df.std()).map(format_value).T.astype(str)
    new_df["Dataset"] = str(stat_file).split("/")[2]
    cols = [new_df.columns[-1]] + list(new_df.columns[:-1])
    new_df = new_df[cols]
    results[str(stat_file)] = new_df

In [9]:
lr_experiments = []

for experiment in results:
    if "LogisticRegression" in experiment:
        lr_experiments.append(results[experiment])

lr_experiments_df = pd.concat(lr_experiments)
lr_experiments_df

Unnamed: 0,Dataset,model_returned_smth,valid_cf_disc,dissimilarity_proximity_categorical_hamming,dissimilarity_proximity_categorical_jaccard,dissimilarity_proximity_continuous_manhatan,dissimilarity_proximity_continuous_euclidean,dissimilarity_proximity_continuous_mad,distance_l2_jaccard,distance_mad_hamming,sparsity,time,flow_log_density_cfs,flow_log_density_xs,flow_prob_condition_acc,lof_scores_xs,lof_scores_cfs,isolation_forest_scores_xs,isolation_forest_scores_cfs
0,DigitsDataset,1.0 ± 0.0,0.2 ± 0.44721359549995804,0.00625 ± 0.013975424859373689,0.01087945642422602 ± 0.02432720412281617,0.3711902839116237 ± 0.8300067074138371,0.2972180791233736 ± 0.6645998290617745,0.5625597774704253 ± 1.2579219038310256,0.3080975355475996 ± 0.6889270331845907,0.5688097774704253 ± 1.2718973286903994,0.00625 ± 0.013975424859373689,0.13968615531921386 ± 0.007276296827309462,,,,,,,
0,HelocDataset,1.0 ± 0.0,0.00078 ± 0.0017441330224498358,nan ± nan,0.0 ± 0.0,0.005503597746745899 ± 0.005881953991783133,0.005503597746745899 ± 0.005881953991783133,0.04296111135194098 ± 0.020339545270537853,0.005503597746745899 ± 0.005881953991783133,nan ± nan,0.0017110566623497199 ± 0.0008868931414693399,15.20339331626892 ± 6.940537758825136,-9696722.192382812 ± 18764236.573674582,-4880.290170669556 ± 6714.008198195724,0.42916437089443205 ± 0.30251393647040775,1.0966964799999999 ± 0.004707008155612249,1.0981810724507026 ± 0.008372413226685173,0.06889611653097916 ± 0.0012202981847383998,0.054091671638493974 ± 0.00044473507041606374


In [10]:
mlp_experiments = []

for experiment in results:
    if "MultilayerPerceptron" in experiment:
        mlp_experiments.append(results[experiment])

mlp_experiments_df = pd.concat(mlp_experiments)
mlp_experiments_df

Unnamed: 0,Dataset,model_returned_smth,valid_cf_disc,dissimilarity_proximity_categorical_hamming,dissimilarity_proximity_categorical_jaccard,dissimilarity_proximity_continuous_manhatan,dissimilarity_proximity_continuous_euclidean,dissimilarity_proximity_continuous_mad,distance_l2_jaccard,distance_mad_hamming,sparsity,flow_log_density_cfs,flow_log_density_xs,flow_prob_condition_acc,lof_scores_xs,lof_scores_cfs,isolation_forest_scores_xs,isolation_forest_scores_cfs,time
0,HelocDataset,1.0 ± 0.0,0.06872 ± 0.05595477638236079,nan ± nan,0.0 ± 0.0,0.003963731734625419 ± 0.002351888594076475,0.003963731734625419 ± 0.002351888594076475,0.0663702852300738 ± 0.04882405465361031,0.003963731734625419 ± 0.002351888594076475,nan ± nan,0.0020932915331366 ± 0.001325164110814745,-30.4569393157959 ± 12.194132129331317,-16.005667209625244 ± 7.5793854832163285,0.6179763197898864 ± 0.11557020259760176,1.0966964799999999 ± 0.004707008155612249,1.100323851293803 ± 0.005566848938804063,0.06889611653097916 ± 0.0012202981847383998,0.049608271101776 ± 0.010547651991774635,16.329120874404907 ± 7.05394608038449


In [11]:
node_experiments = []

for experiment in results:
    if "NODE" in experiment:
        node_experiments.append(results[experiment])

node_experiments_df = pd.concat(node_experiments)
node_experiments_df

Unnamed: 0,Dataset,model_returned_smth,valid_cf_disc,dissimilarity_proximity_categorical_hamming,dissimilarity_proximity_categorical_jaccard,dissimilarity_proximity_continuous_manhatan,dissimilarity_proximity_continuous_euclidean,dissimilarity_proximity_continuous_mad,distance_l2_jaccard,distance_mad_hamming,sparsity,flow_log_density_cfs,flow_log_density_xs,flow_prob_condition_acc,lof_scores_xs,lof_scores_cfs,isolation_forest_scores_xs,isolation_forest_scores_cfs,time
0,HelocDataset,1.0 ± 0.0,0.00074 ± 0.0016546903033498443,nan ± nan,0.0 ± 0.0,0.005854576580767901 ± 0.004946086955006457,0.005854576580767901 ± 0.004946086955006457,0.06181067315197242 ± 0.04988998098474981,0.005854576580767901 ± 0.004946086955006457,nan ± nan,0.0027706532802618403 ± 0.0030172483921719205,-20.793095302581786 ± 18.244431040349657,-18.476094245910645 ± 12.12454130228529,0.5827072635293007 ± 0.3928742277503811,1.0966964799999999 ± 0.004707008155612249,1.100894461771989 ± 0.006752034341339176,0.06889611653097916 ± 0.0012202981847383998,0.05395013632641622 ± 0.0002718810409178794,19.449492025375367 ± 7.338908322784154


In [12]:
sdt_experiments = []

for experiment in results:
    if "SDT" in experiment:
        sdt_experiments.append(results[experiment])

sdt_experiments_df = pd.concat(sdt_experiments)
sdt_experiments_df

Unnamed: 0,Dataset,model_returned_smth,valid_cf_disc,dissimilarity_proximity_categorical_hamming,dissimilarity_proximity_categorical_jaccard,dissimilarity_proximity_continuous_manhatan,dissimilarity_proximity_continuous_euclidean,dissimilarity_proximity_continuous_mad,distance_l2_jaccard,distance_mad_hamming,sparsity,flow_log_density_cfs,flow_log_density_xs,flow_prob_condition_acc,lof_scores_xs,lof_scores_cfs,isolation_forest_scores_xs,isolation_forest_scores_cfs,time
0,HelocDataset,1.0 ± 0.0,0.0 ± 0.0,nan ± nan,0.0 ± 0.0,0.0074879287130473595 ± 0.006312003983112528,0.0074879287130473595 ± 0.006312003983112528,0.13657256722682104 ± 0.10968380098226321,0.0074879287130473595 ± 0.006312003983112528,nan ± nan,0.0034182589693644004 ± 0.0027548116559436534,-9817752.38671875 ± 19083948.513086487,-4917.661817550659 ± 6781.776461869365,0.41448152959346773 ± 0.2923307018440938,1.0966964799999999 ± 0.004707008155612249,1.0978987115507644 ± 0.005670395961142907,0.06889611653097916 ± 0.0012202981847383998,0.05361052847812444 ± 0.0008880190340256307,19.582036113739015 ± 6.941836429836783
