# Results Neurocomputing/ESANN 2024

This notebook contains the code to analyse the results of the 
Neurocomputing/ESANN 2024 paper, and it is responsible for generating
the figures and tables in the paper.

The notebook is organised as follows:

1. The first section contains imports, constants, helper functions and load the 
    data.

2. We show that the dict-wisard has competitive performance with the 
    classical machine learning algorithms.

## 1. General constants, hhelper functions, and data loading

Imports, global constants and packages' configuration.

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
from typing import List, Union
import plotly.graph_objects as go
import plotly.express as px
import json

from utils import write_figure, write_latex_table, aggregate_mean_std

In [2]:
# Configs
pd.set_option("display.float_format", lambda x: "%.4f" % x)

# ---------- Paths -------------
# -- Inputs
datasets_info_path = Path("datasets_info.json")
results_path = Path("results_wisard_folded.csv")
results_sklearn_path = Path("results_sklearn_folded.csv")

### Read inputs and create a full dataframe

1. Read the datasets specifications (`dataset_info`)
2. Read the wisard results (`wisard_results`)
3. Read the sklearn results (`sklearn_results`)
4. Create a results dataframe, mergind dataset_info, wisard_results and sklearn_results

#### Dataset information

In [3]:
# Datasets information
datasets_info = pd.read_json(datasets_info_path, orient="index").reset_index(drop=True)
datasets_info.rename(columns={"name": "dataset_name"}, inplace=True)
datasets_info.head(n=3)

Unnamed: 0,dataset_name,size,features,num_classes,train_size,test_size,balanced,metric
0,breast_cancer,141416,30,3,398,171,False,f1 weighted
1,dry_bean,1773910,16,7,10888,2723,False,f1 weighted
2,glass,17413,9,24,149,65,False,f1 weighted


In [4]:
info = datasets_info[
    ["dataset_name", "features", "size", "num_classes", "balanced"]
]
info.loc[:, "size"] = info["size"] / 1024

info = info.rename(
    columns={
        "dataset_name": "Dataset",
        "features": "Features",
        "size": "Size (KB)",
        "num_classes": "Classes",
        "balanced": "Is Balanced?",
    }
)

latex_str = info.to_latex(
    index=False,
    escape=True,
    caption="Datasets information",
    label="tab:datasets_info",
    float_format="%.2f",
)

write_latex_table("datasets_info.tex", latex_str)

Table written to: tables/datasets_info.tex


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  info.loc[:, "size"] = info["size"] / 1024
  latex_str = info.to_latex(


#### Wisard results

Read and parse wisard result to `wisard_results` dataframe.

**Note**: The `wisard_results` already has aggregated results for each dataset.

In [5]:
def parse_wisard_config_name(row) -> str:
    """Given a row, parse the name of configuration.

    Parameters
    ----------
    row : pd.Series
        The row of the dataframe.

    Returns
    -------
    str
        The name of the configuration.
    """

    names = []
    if not pd.isna(row["num_hitters"]):
        names.append(f"NR: {row['num_hitters']}")
    if not pd.isna(row["width"]):
        names.append(f"W: {row['width']}")
    if not pd.isna(row["depth"]):
        names.append(f"D: {row['depth']}")
    if not pd.isna(row["capacity"]):
        names.append(f"C: {row['capacity']}")
    if not pd.isna(row["bucket_size"]):
        names.append(f"BS: {row['bucket_size']}")
    if not pd.isna(row["threshold"]):
        names.append(f"T: {row['threshold']}")
    if not pd.isna(row["est_elements"]):
        names.append(f"EST: {row['est_elements']}")
    if not pd.isna(row["false_positive_rate"]):
        names.append(f"FPR: {row['false_positive_rate']}")

    if names:
        names = ", ".join(names)
        return f"{row['ram']} ({names})"
    else:
        return row["ram"]


# --- Read results and add a column with the name of the configuration ---
wisard_results = pd.read_csv(results_path).drop_duplicates()

# --- Add useful columns ---
wisard_results["tuple_size"] = (
    wisard_results["resolution"] / wisard_results["tuple_resolution_factor"]
)
wisard_results["config_name"] = wisard_results.apply(
    parse_wisard_config_name, axis=1
)

# --- Select the columns of interest ---
wisard_results = wisard_results[
    [
        "dataset_name",
        "config_name",
        "test_accuracy_mean",
        "test_accuracy_std",
        "test_f1 weighted_mean",
        "test_f1 weighted_std",
        "test_model size_mean",
        "test_model size_std",
        "test_ties_mean",
        "test_ties_std",
        "tuple_size",
        "encoder",
        "resolution",
        "bleach",
        "rams per discriminator",
        "ram",
    ]
]

# --- Rename columns ---
wisard_results = wisard_results.rename(
    columns={
        "dataset_name": "dataset",
        "ram": "model",
        "test_ties_mean": "ties",
        "test_ties_std": "ties_std",
        "test_accuracy_mean": "accuracy",
        "test_accuracy_std": "accuracy_std",
        "test_f1 weighted_mean": "f1",
        "test_f1 weighted_std": "f1_std",
        "test_model size_mean": "model_size",
        "test_model size_std": "model_size_std",
    }
)

# --- Add model column and drop duplicates ---
wisard_results["model"] = "Wisard"
wisard_results.drop_duplicates(inplace=True)

# Split dataset name from fold
wisard_results[["dataset", "fold"]] = wisard_results["dataset"].str.split(
    "_fold_", expand=True
)
wisard_results["fold"] = wisard_results["fold"].astype(int)


# Rearange columns
wisard_results = wisard_results[[
    "dataset",
    "fold",
    "model",
    "config_name",
    "tuple_size",
    "encoder",
    "resolution",
    "bleach",
    "rams per discriminator",
    "accuracy",
    "accuracy_std",
    "f1",
    "f1_std",
    "model_size",
    "model_size_std",
    "ties",
    "ties_std",
]]

wisard_results.sample(n=2)

Unnamed: 0,dataset,fold,model,config_name,tuple_size,encoder,resolution,bleach,rams per discriminator,accuracy,accuracy_std,f1,f1_std,model_size,model_size_std,ties,ties_std
11970,wine,1,Wisard,"StreamThreshold (W: 16.0, D: 1.0, T: 145.0)",20.0,thermometer,20,12,13,0.7222,0.0,0.6102,0.0,3120.0,0.0,13.0,0.8165
10246,segment,1,Wisard,"HeavyHitters (NR: 995.0, W: 73.0, D: 3.0)",32.0,distributive-thermometer,64,9,38,0.855,0.0077,0.8451,0.0115,237272.0,0.0,94.0,5.6569


In [6]:
dict_wisard_results = wisard_results[(wisard_results["config_name"] == "Dict")]

lines = []
for (dataset, fold), dataset_df in dict_wisard_results.groupby(["dataset", "fold"]):
    metric_name = datasets_info.loc[datasets_info["dataset_name"] == dataset, "metric"].iloc[0]
    if metric_name == "f1 weighted":
        metric_name = "f1"
    line = dataset_df.sort_values(by=metric_name, ascending=False).iloc[0]
    lines.append(line)

dict_wisard_results = pd.DataFrame(lines)

dict_wisard_results = aggregate_mean_std(
    dict_wisard_results, 
    group_by=["dataset"],
    keys_to_aggregate=["accuracy", "f1", "model_size"]
)

dict_wisard_results["model"] = "Dict-Wisard"

# Rearange columns
dict_wisard_results = dict_wisard_results[[
    "dataset",
    "model",
    "accuracy",
    "accuracy_std",
    "f1",
    "f1_std",
    "model_size",
    "model_size_std",
]]

dict_wisard_results

Unnamed: 0,dataset,model,accuracy,accuracy_std,f1,f1_std,model_size,model_size_std
0,breast_cancer,Dict-Wisard,0.9537,0.0179,0.9537,0.0177,61675.6667,64519.6794
1,dry_bean,Dict-Wisard,0.9027,0.004,0.9028,0.0035,968603.5333,443718.5251
2,glass,Dict-Wisard,0.6842,0.0509,0.6333,0.0481,53000.8667,22996.4997
3,image_segmentation,Dict-Wisard,0.8556,0.0181,0.8526,0.0223,142505.2,46475.5446
4,iris,Dict-Wisard,0.98,0.0277,0.98,0.0277,2949.1333,3307.9763
5,letter,Dict-Wisard,0.8739,0.0291,0.8751,0.0286,7193661.8667,4057207.8151
6,motion_sense,Dict-Wisard,0.738,0.0324,0.7261,0.0329,52461826.4,14175681.6535
7,optical_handwritten,Dict-Wisard,0.9715,0.0082,0.9715,0.0082,11184680.4667,13594443.0243
8,rice,Dict-Wisard,0.9178,0.0145,0.9176,0.0146,46699.2,29841.781
9,satimage,Dict-Wisard,0.8904,0.0052,0.8864,0.0058,6291516.6,4400154.0757


#### Scikit Learn results

Read and parse sklearn result to `sklearn_results` dataframe.

In [7]:
# Read sklearn results and aggregate multiple runs
sklearn_results = pd.read_csv(results_sklearn_path).drop_duplicates()

# Aggregate metric for multiple runs
sklearn_results = aggregate_mean_std(
    df=sklearn_results,
    group_by=[
        "model",
        "model kwargs",
        "dataset name",
        "experiment name",
    ],
    keys_to_aggregate=[
        "accuracy",
        "f1 weighted",
        "f1 macro",
        "f1 micro",
        "train time",
        "predict time",
        "model size",
    ],
)

# Select columns of interest
sklearn_results = sklearn_results[
    [
        "dataset name",
        "model",
        "model kwargs",
        "accuracy",
        "accuracy_std",
        "f1 weighted",
        "f1 weighted_std",
        "model size",
        "model size_std",
    ]
]

# Rename columns
sklearn_results.rename(
    columns={
        "dataset name": "dataset",
        "model kwargs": "config_name",
        "f1 weighted": "f1",
        "f1 weighted_std": "f1_std",
        "model size": "model_size",
        "model size_std": "model_size_std",
    },
    inplace=True,
)

def transform_config_to_model_name(row):
    config = json.loads(row["config_name"])
    if row["model"] == "knn":
        row["model"] = f"KNN-{config['n_neighbors']}"
    elif row["model"] == "mlp":
        row["model"] = f"MLP-{len(config['hidden_layer_sizes'])}L"
    elif row["model"] == "random-forest":
        row["model"] = "Random Forest"
    elif row["model"] == "svm":
        row["model"] = f"SVM-{config.get('kernel', 'rbf')}"
        
    row = row.drop("config_name")
    return row
        

# Transform config to model name
sklearn_results = sklearn_results.apply(transform_config_to_model_name, axis=1)

# Split dataset name from fold
sklearn_results[['dataset', 'fold']] = sklearn_results['dataset'].str.split('_fold_', expand=True)
sklearn_results['fold'] = sklearn_results['fold'].astype(int)

# Aggregate folds
n_folds = sklearn_results["fold"].nunique()
old_len = len(sklearn_results)

sklearn_results = aggregate_mean_std(
    df=sklearn_results,
    group_by=["model", "dataset"],
    keys_to_aggregate=["accuracy", "f1", "model_size"]
)

# Check if the aggregation was correct
assert len(sklearn_results) == old_len / n_folds, f"Expected {old_len / n_folds} got {len(sklearn_results)}"

# Rearange columns
sklearn_results = sklearn_results[[
    "dataset",
    "model",
    "accuracy",
    "accuracy_std",
    "f1",
    "f1_std",
    "model_size",
    "model_size_std",
]]

sklearn_results.sample(n=2)

Unnamed: 0,dataset,model,accuracy,accuracy_std,f1,f1_std,model_size,model_size_std
14,yeast,KNN-10,0.502,0.0866,0.4983,0.0748,182341.8,64.3988
34,iris,MLP-1L,0.98,0.0298,0.9798,0.0301,27602.5333,0.5055


In [8]:
# Merge results
results_df = pd.concat([dict_wisard_results, sklearn_results])
results_df = results_df.sort_values(by=["dataset", "model"]).reset_index(drop=True)
results_df.sample(n=3)

Unnamed: 0,dataset,model,accuracy,accuracy_std,f1,f1_std,model_size,model_size_std
42,iris,Random Forest,0.9467,0.0506,0.9465,0.0506,159934.4667,13085.7338
30,image_segmentation,MLP-1L,0.6429,0.2614,0.6391,0.2608,73339.4,0.5477
5,breast_cancer,MLP-3L,0.9087,0.0396,0.9082,0.0406,567566.0,260.416


In [9]:
# Add metric column based on dataset info

dfs = []

for _, row in datasets_info.iterrows():
    df = results_df[results_df["dataset"] == row["dataset_name"]].copy()
    if row["metric"] == "f1 weighted":
        metric = "f1"
        metric_std = "f1_std"
    else:
        metric = "accuracy"
        metric_std = "accuracy_std"
    
    df["metric"] = df[metric]
    df["metric_std"] = df[metric_std]
    df["performance_metric"] = metric
    dfs.append(df.reset_index(drop=True))

results_df = pd.concat(dfs).reset_index(drop=True)

# Some beautify
results_df.dataset = results_df.dataset.str.replace("_", " ")
results_df.dataset = results_df.dataset.str.title()
results_df

results_df.to_csv("results.csv", index=False)
print("Results saved to results.csv")

Results saved to results.csv


### Relative performance (per dataset, normalized by model with best metric value)

In [10]:
relative_results_df = results_df.copy()


for dset, df in relative_results_df.groupby("dataset"):
    highest_metric = df["metric"].idxmax()
    
    for metric in ["accuracy", "f1", "model_size", "metric"]:
        relative_results_df.loc[df.index, f"{metric}_relative"] = df[metric] / df.loc[highest_metric, metric]
        
relative_results_df.to_csv("results_relative.csv", index=False)
print("Results saved to results_relative.csv")

Results saved to results_relative.csv


In [11]:
relative_results_df.sample(n=3)

Unnamed: 0,dataset,model,accuracy,accuracy_std,f1,f1_std,model_size,model_size_std,metric,metric_std,performance_metric,accuracy_relative,f1_relative,model_size_relative,metric_relative
33,Image Segmentation,Random Forest,0.7286,0.2938,0.7228,0.3005,517000.6,25632.6888,0.7286,0.2938,accuracy,0.8516,0.8479,3.6279,0.8516
90,Segment,Dict-Wisard,0.873,0.014,0.8653,0.0186,131163.3333,34125.9199,0.873,0.014,accuracy,0.8883,0.8804,0.0648,0.8883
0,Breast Cancer,Dict-Wisard,0.9537,0.0179,0.9537,0.0177,61675.6667,64519.6794,0.9537,0.0177,f1,0.9896,0.9898,0.1969,0.9898


# 2. Wisard is competitive with classical machine learning algorithms

### Size and performance

In [34]:
def add_mean_line(df):
    line = {"dataset": "Mean"}
    for c in df.columns:
        if c != "dataset":
            line[c] = df[c].mean()
    df.loc[len(df)] = line
    return df

def raw_relative_table(df, raw_metric, relative_metric, order_of_models: List[str]):
    # Pivot the DataFrame to create the raw metric table and relative table
    raw_df = (
        df.pivot(index="dataset", columns="model", values=raw_metric)
        .rename_axis(None, axis=1)
        .reset_index()
    )
    raw_df = raw_df[["dataset"] + order_of_models]
    raw_df = add_mean_line(raw_df)
    raw_df.set_index("dataset", inplace=True)

    relative_df = (
        df.pivot(
            index="dataset", columns="model", values=relative_metric
        )
        .rename_axis(None, axis=1)
        .reset_index()
    )
    relative_df = relative_df[["dataset"] + order_of_models]
    relative_df = add_mean_line(relative_df)
    relative_df.set_index("dataset", inplace=True)
    
    # Concatenating the DataFrames
    final_df = pd.concat([raw_df, relative_df], axis=1)

    final_df.columns = pd.MultiIndex.from_product(
        [["Absolute", "Relative"], raw_df.columns.str.split("_").str[0]]
    )
    return final_df

In [35]:
df = pd.read_csv("results_relative.csv")
df = df[~df["model"].isin(["KNN-10", "MLP-3L", "SVM-poly"])]
df.loc[df["model"] == "KNN-5", "model"] = "KNN"
df.loc[df["model"] == "SVM-rbf", "model"] = "SVM"
df.loc[df["model"] == "Random Forest", "model"] = "RF"
df.loc[df["model"] == "Dict-Wisard", "model"] = "Wisard"

order_of_models = [
    "Wisard",
    "RF",
    "KNN",
    "MLP-1L",
    "MLP-2L",
    "SVM"
]

performance_df = raw_relative_table(df, "metric", "metric_relative", order_of_models)
order_of_datasets = performance_df["Relative"]["Wisard"].sort_values(ascending=False).keys().to_list()
order_of_datasets.remove("Mean")
order_of_datasets.append("Mean")
performance_df.index = order_of_datasets
write_latex_table("performance_table.tex", performance_df.to_latex(float_format="%.2f"))
performance_df

Table written to: tables/performance_table.tex


  write_latex_table("performance_table.tex", performance_df.to_latex(float_format="%.2f"))


Unnamed: 0_level_0,Absolute,Absolute,Absolute,Absolute,Absolute,Absolute,Relative,Relative,Relative,Relative,Relative,Relative
Unnamed: 0_level_1,Wisard,RF,KNN,MLP-1L,MLP-2L,SVM,Wisard,RF,KNN,MLP-1L,MLP-2L,SVM
Dry Bean,0.9537,0.9635,0.9308,0.9245,0.9037,0.9155,0.9898,1.0,0.966,0.9595,0.9379,0.9501
Image Segmentation,0.9028,0.8322,0.6526,0.3599,0.3804,0.5391,1.0,0.9218,0.7229,0.3987,0.4213,0.5971
Iris,0.6333,0.7807,0.607,0.4029,0.4016,0.1862,0.8113,1.0,0.7775,0.5161,0.5144,0.2385
Rice,0.8556,0.7286,0.6,0.6429,0.6397,0.5762,1.0,0.8516,0.7013,0.7514,0.7477,0.6735
Sepsis,0.98,0.9467,0.9467,0.98,0.98,0.9533,1.0,0.966,0.966,1.0,1.0,0.9728
Yeast,0.8739,0.9633,0.9519,0.9245,0.9536,0.9267,0.9072,1.0,0.9882,0.9597,0.9899,0.962
Breast Cancer,0.738,0.8995,0.7706,0.8455,0.8478,0.7537,0.8205,1.0,0.8567,0.94,0.9425,0.8379
Optical Handwritten,0.9715,0.9832,0.9872,0.9829,0.9841,0.9886,0.9825,0.9943,0.9984,0.994,0.9953,0.9998
Wine,0.9176,0.7589,0.7361,0.454,0.3829,0.736,1.0,0.8271,0.8023,0.4948,0.4173,0.8022
Satimage,0.8864,0.9137,0.9082,0.91,0.9112,0.8956,0.9701,1.0,0.994,0.996,0.9973,0.9802


In [39]:
size_df = raw_relative_table(df, "model_size", "model_size_relative", order_of_models)
size_df["Absolute"] = size_df["Absolute"] / 1024
size_df.index = order_of_datasets
write_latex_table("size_table.tex", size_df.to_latex(float_format="%.2f"))
size_df

Table written to: tables/size_table.tex


  write_latex_table("size_table.tex", size_df.to_latex(float_format="%.2f"))


Unnamed: 0_level_0,Absolute,Absolute,Absolute,Absolute,Absolute,Absolute,Relative,Relative,Relative,Relative,Relative,Relative
Unnamed: 0_level_1,Wisard,RF,KNN,MLP-1L,MLP-2L,SVM,Wisard,RF,KNN,MLP-1L,MLP-2L,SVM
Dry Bean,60.2301,305.8702,110.9039,81.8907,317.5171,32.124,0.1969,1.0,0.3626,0.2677,1.0381,0.105
Image Segmentation,945.9019,14015.5705,1446.8768,62.037,299.4306,1466.3193,1.0,14.8172,1.5296,0.0656,0.3166,1.5502
Iris,51.7587,781.1236,29.166,44.5428,282.0652,20.8877,0.0663,1.0,0.0373,0.057,0.3611,0.0267
Rice,139.1652,504.8834,26.9375,71.6205,308.5584,29.1854,1.0,3.6279,0.1936,0.5146,2.2172,0.2097
Sepsis,2.88,156.186,10.8428,26.9556,263.9021,3.8918,1.0,54.231,3.7648,9.3595,91.6323,1.3513
Yeast,7025.0604,111966.4944,2125.8564,109.7174,346.6115,2958.7748,0.0627,1.0,0.019,0.001,0.0031,0.0264
Breast Cancer,51232.2523,9875.4682,10004.9059,868.4888,1104.537,8880.3355,5.1878,1.0,1.0131,0.0879,0.1118,0.8992
Optical Handwritten,10922.5395,11446.8281,2283.8477,182.5983,418.5857,706.4299,22.367,23.4406,4.6768,0.3739,0.8572,1.4466
Wine,45.6047,2856.4433,400.0693,26.071,262.8928,75.6672,1.0,62.6349,8.7725,0.5717,5.7646,1.6592
Satimage,6144.0592,8897.3865,1487.6289,109.0923,346.0239,535.6561,0.6905,1.0,0.1672,0.0123,0.0389,0.0602


### Pareto frontier

In [42]:
# Identify Pareto frontier
def is_pareto_efficient(costs):
    is_efficient = np.ones(costs.shape[0], dtype=bool)
    for i, c in enumerate(costs):
        if is_efficient[i]:
            is_efficient[is_efficient] = np.any(costs[is_efficient] < c, axis=1)
            is_efficient[i] = True  # Keep the current point
    return is_efficient

for dset_name, dset_df in df.groupby("dataset"):
    costs = dset_df[["model_size", "metric"]].to_numpy()
    # Invert metric (lower is better)
    costs[:, 1] = 1 / costs[:, 1]
    pareto = is_pareto_efficient(costs)
    df.loc[dset_df.index, "pareto"] = pareto
    
df.to_csv("results_relative_pareto.csv", index=False)
print(f"Csv written to results_relative_pareto.csv")
df.head(n=12)

Csv written to results_relative_pareto.csv


Unnamed: 0,dataset,model,accuracy,accuracy_std,f1,f1_std,model_size,model_size_std,metric,metric_std,performance_metric,accuracy_relative,f1_relative,model_size_relative,metric_relative,pareto
0,Breast Cancer,Wisard,0.9537,0.0179,0.9537,0.0177,61675.6667,64519.6794,0.9537,0.0177,f1,0.9896,0.9898,0.1969,0.9898,True
2,Breast Cancer,KNN,0.9315,0.0294,0.9308,0.0307,113565.6,110.909,0.9308,0.0307,f1,0.9666,0.966,0.3626,0.966,False
3,Breast Cancer,MLP-1L,0.9251,0.042,0.9245,0.0424,83856.0667,755.9721,0.9245,0.0424,f1,0.96,0.9595,0.2677,0.9595,False
4,Breast Cancer,MLP-2L,0.9045,0.0294,0.9037,0.03,325137.5333,385.8725,0.9037,0.03,f1,0.9386,0.9379,1.0381,0.9379,False
6,Breast Cancer,RF,0.9637,0.0141,0.9635,0.0144,313211.1333,20666.8041,0.9635,0.0144,f1,1.0,1.0,1.0,1.0,True
8,Breast Cancer,SVM,0.9174,0.0169,0.9155,0.0184,32895.0,1392.8819,0.9155,0.0184,f1,0.952,0.9501,0.105,0.9501,True
9,Dry Bean,Wisard,0.9027,0.004,0.9028,0.0035,968603.5333,443718.5251,0.9028,0.0035,f1,1.0,1.0,1.0,1.0,True
11,Dry Bean,KNN,0.6562,0.1059,0.6526,0.1042,1481601.8,60.821,0.6526,0.1042,f1,0.727,0.7229,1.5296,0.7229,False
12,Dry Bean,MLP-1L,0.4367,0.1268,0.3599,0.1272,63525.8667,247.8536,0.3599,0.1272,f1,0.4838,0.3987,0.0656,0.3987,True
13,Dry Bean,MLP-2L,0.4398,0.0603,0.3804,0.051,306616.9333,270.8753,0.3804,0.051,f1,0.4872,0.4213,0.3166,0.4213,True


## 2. Wisard has competitive results with SKLearn

Here we show that the dict-wisard has competitive performance with the
classical machine learning algorithms.

To do that, we plot the accuracy of the wisard and sklearn algorithms for each
dataset.

In [None]:
# Create a dataframe with the best performance for each dataset and model
best_metric_df = (
    results.groupby(["dataset", "model"])
    .apply(lambda group: group.loc[group["metric"].idxmax()])
    .reset_index(drop=True)
)

best_metric_df.value_counts("model")

In [None]:
# Assume 'results' DataFrame with columns: 'dataset', 'model', 'accuracy'

# Create a grouped bar chart for accuracy per model and dataset
fig_grouped_bar = px.bar(
    best_metric_df,
    x="dataset",
    y="metric",
    error_y="metric_std",
    color="model",
    #  title='Metric Comparison by Model and Dataset',
    labels={"metric": "Performance", "dataset": "Dataset", "model": ""},
    barmode="group",
    color_discrete_sequence=px.colors.qualitative.Prism,
)


# Display the plot
fig_grouped_bar.update_layout(
    legend=dict(
        orientation="h", yanchor="top", y=1.20, xanchor="center", x=0.5
    ),
    height=400,
    width=2480 / 2.5,
    font=dict(family="Times New Roman", size=14),
)

write_figure("models_performance.pdf", fig_grouped_bar)
fig_grouped_bar.show()

In [None]:
# Assume 'results' DataFrame with columns: 'dataset', 'model', 'accuracy'

# Create a grouped horizontal bar chart for accuracy per model and dataset with reversed bar groups
fig_grouped_bar = px.bar(
    best_metric_df,
    y="dataset",
    x="metric",
    error_x="metric_std",
    color="model",
    #  title='Metric Comparison by Model and Dataset',
    labels={"metric": "Performance", "dataset": "Dataset", "model": ""},
    barmode="group",
    orientation='h',
    color_discrete_sequence=px.colors.qualitative.Prism,
)


# Reverse the order of the bar groups
fig_grouped_bar.update_layout(
    yaxis=dict(autorange="reversed"),
)

# Display the plot
fig_grouped_bar.update_layout(
    legend=dict(
        orientation="h", yanchor="top", y=1.05, xanchor="center", x=0.5
    ),
    height=1200,
    width=2480 / 4,
    font=dict(family="Times New Roman", size=14),
)

write_figure("models_performance_horizontal.pdf", fig_grouped_bar)
fig_grouped_bar.show()

In [None]:
# Count how many times per dataset, wisard is the best model

print("How many times per dataset, each model is the best?")
best_metric_df.loc[
    best_metric_df.groupby("dataset")["metric"].idxmax()
].value_counts("model").to_frame().reset_index()

## 3. Wisard has competitive results with SKLearn and is smallest

Here we show that costing up to 2% of performance, the dict-wisard is much
smaller than the sklearn algorithms.

In [None]:
# Up to 2% of accuracy loss
metric_threshold = 0.01

In [None]:
# Read results and filter bloom filter results
results = base_results.copy()
results = results[
    (results["model"] != "wisard") | (results["config_name"] == "Dict")
]
results.shape

In [None]:
# Create a dataframe with the best performance for each dataset and model
best_metric_dataset_model = (
    results.groupby(["dataset", "model"])
    .apply(lambda group: group.loc[group["metric"].idxmax()])
    .reset_index(drop=True)
)

best_metric_dataset_model.value_counts("model")

In [None]:
# Add the model_size_ratio column. This column is the ratio between the model
# size of each model and the model size of the best model for each dataset
temp = []

for dset_name, dset_df in best_metric_dataset_model.groupby("dataset"):
    best_row = dset_df.sort_values(by="metric", ascending=False).iloc[0]
    dset_df["model_size_ratio"] = dset_df["model_size"] / best_row["model_size"]
    # Min max normalization
    dset_df["normalized_model_size_ratio"] = (
        dset_df["model_size_ratio"] - dset_df["model_size_ratio"].min()
    ) / (dset_df["model_size_ratio"].max() - dset_df["model_size_ratio"].min())
    dset_df["best_tradeoff"] = False

    best_tradeoff = (
        dset_df[dset_df["metric"] >= best_row["metric"] - metric_threshold]
        .sort_values(by="normalized_model_size_ratio", ascending=True)
        .iloc[0]
    )
    dset_df.loc[best_tradeoff.name, "best_tradeoff"] = True

    temp.append(dset_df)

best_metric_dataset_model = pd.concat(temp)
best_metric_dataset_model.head(n=8)

In [None]:
# pd.set_option('display.max_rows', 100)
dfs = {}
for dset_name, dset_df in best_metric_dataset_model.groupby("dataset"):
    dset_df["model_size"] = (dset_df["model_size"] / 1024)
    max_val = dset_df["metric"].max()
    max_size =  dset_df["model_size"].max()
    dset_df["relative performance"] = dset_df["metric"]  / max_val
    dset_df["relative size"] = dset_df["model_size"]  / max_size
    # dset_df.index = dset_df["model"]
    dset_df = dset_df[["model", "metric",  "model_size", "relative performance", "relative size", "accuracy", "f1"]]
    dfs[dset_name] = dset_df
    
result_df = pd.concat(dfs.values(), keys=dfs.keys())
result_df.reset_index(level=0, inplace=True)
result_df = result_df.rename(columns={"level_0": "dataset"})
result_df.to_csv("temp.csv", index=False)
print(f"Results written to temp.csv")
result_df

In [None]:
# # Pivot the DataFrame to create the raw metric table
# table_df_raw = result_df.pivot(index='dataset', columns='model', values='metric')

# # Pivot the DataFrame to create the relative performance metric table
# table_df_relative_performance = result_df.pivot(index='dataset', columns='model', values='relative performance')

# # Join the two tables based on dataset
# joined_df = table_df_raw.join(table_df_relative_performance, lsuffix="_raw", rsuffix="_relative_performance")


# # joined_df.to_csv("temp.csv", index=True)

# # joined_df = joined_df.reset_index()

# joined_df = joined_df.rename_axis(None, axis=1).reset_index()
# # joined_df.index = range(len(joined_df))

# # joined_df.columns = joined_df.columns.to_list()


# raw_df = joined_df[['dataset', 'knn_raw', 'mlp_raw', 'random-forest_raw', 'svm_raw', 'wisard_raw']]
# relative_df = joined_df[['dataset', 'knn_relative_performance', 'mlp_relative_performance', 'random-forest_relative_performance', 'svm_relative_performance', 'wisard_relative_performance']]

# raw_df["dataset"] = raw_df["dataset"].str.replace("_", " ")
# relative_df["dataset"] = relative_df["dataset"].str.replace("_", " ")
# raw_df.columns = raw_df.columns.str.replace("-", " ")
# relative_df.columns = relative_df.columns.str.replace("-", " ")

# line = {"dataset": "Mean"}
# for c in raw_df.columns:
#     if c != "dataset":
#         line[c] = raw_df[c].mean()
# raw_df.loc[len(raw_df)] = line

# line = {"dataset": "Mean"}
# for c in relative_df.columns:
#     if c != "dataset":
#         line[c] = relative_df[c].mean()
# relative_df.loc[len(relative_df)] = line


# # Setting the dataset column as the index
# raw_df.set_index('dataset', inplace=True)
# relative_df.set_index('dataset', inplace=True)

# # # Concatenating the DataFrames
# final_df = pd.concat([raw_df, relative_df], axis=1)

# # # Renaming the columns
# final_df.columns = pd.MultiIndex.from_product([['Raw', 'Relative'], raw_df.columns.str.split('_').str[0]])

# write_latex_table("performance_table.tex", final_df.to_latex(float_format="%.2f"))
# final_df

In [None]:
def add_mean_line(df):
    line = {"dataset": "Mean"}
    for c in df.columns:
        if c != "dataset":
            line[c] = df[c].mean()
    df.loc[len(df)] = line
    return df

def raw_relative_table(df, raw_metric, relative_metric):
    # Pivot the DataFrame to create the raw metric table and relative table
    raw_df = (
        df.pivot(index="dataset", columns="model", values=raw_metric)
        .rename_axis(None, axis=1)
        .reset_index()
    )
    raw_df["dataset"] = raw_df["dataset"].str.replace("_", " ")
    raw_df.columns = raw_df.columns.str.replace("-", " ")
    raw_df = raw_df[["dataset", "svm", "mlp", "knn", "random forest", "wisard"]]
    raw_df = add_mean_line(raw_df)
    raw_df.set_index("dataset", inplace=True)

    relative_df = (
        df.pivot(
            index="dataset", columns="model", values=relative_metric
        )
        .rename_axis(None, axis=1)
        .reset_index()
    )

    relative_df["dataset"] = relative_df["dataset"].str.replace("_", " ")
    relative_df.columns = relative_df.columns.str.replace("-", " ")
    relative_df = relative_df[["dataset", "svm", "mlp", "knn", "random forest", "wisard"]]
    relative_df = add_mean_line(relative_df)
    relative_df.set_index("dataset", inplace=True)
    
    # Concatenating the DataFrames
    final_df = pd.concat([raw_df, relative_df], axis=1)

    final_df.columns = pd.MultiIndex.from_product(
        [["Absolute", "Relative"], raw_df.columns.str.split("_").str[0]]
    )
    return final_df

performance_df = raw_relative_table(result_df.copy(), "metric", "relative performance")
order_of_datasets = performance_df["Relative"]["wisard"].sort_values(ascending=False).keys().to_list()
order_of_datasets.remove("Mean")
order_of_datasets.append("Mean")
performance_df.index = order_of_datasets
write_latex_table("performance_table.tex", performance_df.to_latex(float_format="%.2f"))

size_df = raw_relative_table(result_df.copy(), "model_size", "relative size")
size_df.index = order_of_datasets
write_latex_table("size_table.tex", size_df.to_latex(float_format="%.2f"))

In [None]:
performance_df

In [None]:
# dfs = []

# def belongs_to_pareto(df, model):
#     metric = df[df["model"] == model]["metric"].iloc[0]
#     size = df[df["model"] == model]["model_size"].iloc[0]
    
#     for r_index, row in df.iterrows():
#         if row["metric"] > metric and row["model_size"] < size:
#             return False
        
#     return True
    
# columns = result_df["dataset"].unique()
    

# models = ["wisard", "random-forest", "svm", "mlp", "knn"]
# for c in columns:
#     x_df = result_df[result_df["dataset"] == c]
#     if belongs_to_pareto(x_df, "wisard"):
#         print(f"Wisard belongs to pareto in {c}")
#         # x_df.index = x_df.index.str.upper()
    
#     lines = []
#     for m in models:
#         line = x_df[x_df["model"] == m]
#         if belongs_to_pareto(x_df, m):
#             print(f"Wisard belongs to pareto in {c}")
#             line["pareto"] = True
#         else:
#             line["pareto"] = False
#         lines.append(line)
        
        
#     x_df = pd.concat(lines)
    
#     dfs.append(x_df)
    
# n = pd.concat(dfs).reset_index()
    
# # n["dataset"] = n["level_0"]
# # n["pareto"] = n["pareto"].astype(int)
# n

In [None]:
# # Assume 'results' DataFrame with columns: 'dataset', 'model', 'accuracy'

# # Create a grouped horizontal bar chart for accuracy per model and dataset with reversed bar groups
# fig_grouped_bar = px.bar(
#     n,
#     y="dataset",
#     x="metric",
#     # error_x="metric_std",
#     color="model",
#     #  title='Metric Comparison by Model and Dataset',
#     labels={"metric": "Performance", "dataset": "Dataset", "model": ""},
#     barmode="group",
#     orientation='h',
#     color_discrete_sequence=px.colors.qualitative.Prism,
# )


# # Reverse the order of the bar groups
# fig_grouped_bar.update_layout(
#     yaxis=dict(autorange="reversed"),
# )

# # Display the plot
# fig_grouped_bar.update_layout(
#     legend=dict(
#         orientation="h", yanchor="top", y=1.05, xanchor="center", x=0.5
#     ),
#     height=1200,
#     width=2480 / 4,
#     font=dict(family="Times New Roman", size=14),
# )

# write_figure("models_performance_horizontal.pdf", fig_grouped_bar)
# fig_grouped_bar.show()

In [None]:
result_df

In [None]:
# Identify Pareto frontier
def is_pareto_efficient(costs):
    is_efficient = np.ones(costs.shape[0], dtype=bool)
    for i, c in enumerate(costs):
        if is_efficient[i]:
            is_efficient[is_efficient] = np.any(costs[is_efficient] < c, axis=1)
            is_efficient[i] = True  # Keep the current point
    return is_efficient

for dset_name, dset_df in result_df.groupby("dataset"):
    costs = dset_df[["model_size", "metric"]].to_numpy()
    # Invert metric (lower is better)
    costs[:, 1] = 1 / costs[:, 1]
    pareto = is_pareto_efficient(costs)
    result_df.loc[dset_df.index, "pareto"] = pareto
    
result_df.to_csv("temp.csv", index=False)
print(f"Csv written to temp.csv")
result_df.head(n=10)

In [None]:
result_df.groupby("model").pareto.value_counts().to_frame()

In [None]:
import plotly.express as px
import plotly.graph_objects as go

# Assuming 'data' is our DataFrame
data = result_df.copy()

# dfs = []
# for dset in order_of_datasets:
#     x = data[data["dataset"] == dset].copy()
#     dfs.append(x)
# data = pd.concat(dfs).reset_index(drop=True)

# Define marker symbols for each model
marker_symbols = {
    "knn": "circle",
    "mlp": "square",
    "random-forest": "diamond",
    "svm": "cross",
    "wisard": "x",
}

# Define model names for legend
model_names = {
    "knn": "KNN",
    "mlp": "MLP",
    "random-forest": "Random Forest",
    "svm": "SVM",
    "wisard": "Wisard",
}

pareto_colors = {
    True: px.colors.qualitative.Plotly[1],
    False: px.colors.qualitative.Plotly[0],
}

# data["relative size"] = np.log(data["relative size"])

fig = px.scatter(
    data,
    x="relative size",
    y="metric",
    symbol="model",
    symbol_map=marker_symbols,
    color="pareto",
    color_discrete_map=pareto_colors,
    facet_col="dataset",
    facet_col_wrap=3,
    height=1000,
    width=900,
    facet_row_spacing=0.03,
)

fig.update_traces(
    marker=dict(size=7),
    selector=dict(mode="markers"),
    showlegend=False,  # Hides the legend entries created by Plotly Express
)

for anno in fig["layout"]["annotations"]:
    anno["text"] = anno["text"].split("=")[1].replace("_", " ")

# Manually map symbols to names in the legend
legend_labels = {
    symbol: model_names[model] for model, symbol in marker_symbols.items()
}

# Create a custom legend
custom_legend = []
for symbol, model_name in legend_labels.items():
    custom_legend.append(
        go.Scatter(
            x=[None],
            y=[None],
            mode="markers",
            marker=dict(
                symbol=symbol, size=12, color=px.colors.qualitative.Plotly[0]
            ),
            name=model_name,
        )
    )

# Add custom legend to the figure
for trace in custom_legend:
    fig.add_trace(trace)

fig.update_layout(
    legend=dict(
        title="",  # Set title to empty string to remove the legend title
        orientation="h",
        yanchor="top",
        y=1.07,
        xanchor="center",
        x=0.5,
        traceorder="normal",  # Set trace order to normal to arrange legend entries horizontally
    ),
    margin=dict(l=10, r=10, t=10, b=10),
    font=dict(family="Times New Roman", size=14),
)

write_figure("model_metric_size.pdf", fig)

fig.show()

In [None]:
data

In [None]:
import plotly.express as px
import plotly.graph_objects as go

# Assuming 'data' is our DataFrame
data = result_df.copy()

# Define marker symbols for each model
marker_symbols = {
    "knn": "circle",
    "mlp": "square",
    "random-forest": "diamond",
    "svm": "cross",
    "wisard": "x",
}

# Define model names for legend
model_names = {
    "knn": "KNN",
    "mlp": "MLP",
    "random-forest": "Random Forest",
    "svm": "SVM",
    "wisard": "Wisard",
}

pareto_colors = {
    True: px.colors.qualitative.Plotly[1],
    False: px.colors.qualitative.Plotly[0],
}


rows = 5
cols = 3

fig = go.Figure()







# data["relative size"] = np.log(data["relative size"])

fig = px.scatter(
    data,
    x="relative size",
    y="metric",
    symbol="model",
    symbol_map=marker_symbols,
    color="pareto",
    color_discrete_map=pareto_colors,
    facet_col="dataset",
    facet_col_wrap=3,
    height=1000,
    width=900,
    facet_row_spacing=0.03,
)

fig.update_traces(
    marker=dict(size=7),
    selector=dict(mode="markers"),
    showlegend=False,  # Hides the legend entries created by Plotly Express
)

for anno in fig["layout"]["annotations"]:
    anno["text"] = anno["text"].split("=")[1].replace("_", " ")

# Manually map symbols to names in the legend
legend_labels = {
    symbol: model_names[model] for model, symbol in marker_symbols.items()
}

# Create a custom legend
custom_legend = []
for symbol, model_name in legend_labels.items():
    custom_legend.append(
        go.Scatter(
            x=[None],
            y=[None],
            mode="markers",
            marker=dict(
                symbol=symbol, size=12, color=px.colors.qualitative.Plotly[0]
            ),
            name=model_name,
        )
    )

# Add custom legend to the figure
for trace in custom_legend:
    fig.add_trace(trace)

fig.update_layout(
    legend=dict(
        title="",  # Set title to empty string to remove the legend title
        orientation="h",
        yanchor="top",
        y=1.07,
        xanchor="center",
        x=0.5,
        traceorder="normal",  # Set trace order to normal to arrange legend entries horizontally
    ),
    margin=dict(l=10, r=10, t=10, b=10),
    font=dict(family="Times New Roman", size=14),
)

write_figure("model_metric_size.pdf", fig)

fig.show()

In [None]:
dfs = {}
for dset_name, dset_df in best_metric_dataset_model.groupby("dataset"):
    max_val = dset_df["metric"].max()
    dset_df["model_size (KB)"] = (dset_df["model_size"] / 1024)
    dset_df["relative performance"] = dset_df["metric"]  / max_val
    dset_df = dset_df[["relative performance", "model_size (KB)"]]
    dfs[dset_name] = dset_df
    
result_df = pd.concat(dfs.values(), keys=dfs.keys()).reset_index()
# result_df.reset_index(level=0, inplace=True)
result_df

In [None]:
print("How many times per dataset, each model is the best tradeoff?")
best_metric_dataset_model[
    best_metric_dataset_model.best_tradeoff == True
].value_counts("model").to_frame().reset_index()

In [None]:
facets = 3
cmap = px.colors.qualitative.Prism

colors = {
    name: cmap[i]
    for i, name in enumerate(sorted(best_metric_dataset_model.model.unique()))
}

# Scatter plot for trade-off with normalized model size
fig_tradeoff_normalized = px.scatter(
    best_metric_dataset_model,
    x="metric",
    y="normalized_model_size_ratio",
    color="model",
    facet_col="dataset",
    facet_col_wrap=facets,
    # title="Trade-off Between Metric and Normalized Model Size Across Datasets",
    labels={
        "metric": "Performance",
        "normalized_model_size_ratio": "Size Ratio (normalized)",
        "model": "",
    },
    facet_row_spacing=0.07,
    width=1400,
    height=800,
    color_discrete_sequence=cmap,
)


fig_tradeoff_normalized.update_traces(
    marker=dict(size=7.5),
)

fig_tradeoff_normalized.update_xaxes(showticklabels=True)

fig_tradeoff_normalized.update_yaxes(showticklabels=True)

fig_tradeoff_normalized.for_each_annotation(
    lambda a: a.update(text=a.text.split("=")[-1])
)

# fig_tradeoff_normalized.update_layout(
#     legend=dict(
#         orientation="h", yanchor="bottom", y=-0.15, xanchor="center", x=0.5,
#     ),

# )

fig_tradeoff_normalized.update_layout(
    legend=dict(
        orientation="h",
        yanchor="top",
        y=1.05,
        xanchor="center",
        x=0.5,
        itemsizing="constant",  # Set this to "constant" to show only one item for color
        font=dict(family="Times New Roman", size=14),
    ),
    height=1200,
    width=2480 /2.5,
    font=dict(family="Times New Roman", size=14),
)
num_rows = len(results.dataset.unique()) // facets
num_cols = facets
# datasets = list(sorted(results.dataset.unique(), reverse=False))

for r in range(num_rows):
    for c in range(num_cols):
       
        facet = r * num_cols + c
        dset = fig_tradeoff_normalized.layout.annotations[facet]['text']

        x_line = (
            results[results["dataset"] == dset]["metric"].max()
            - metric_threshold
        )
        
 # Scatter plot for best tradeoff points with a cross
        best_tradeoff_points = best_metric_dataset_model[
            (best_metric_dataset_model["dataset"] == dset)
            & (best_metric_dataset_model["best_tradeoff"] == True)
        ]
        
        best_tradeoff_model = best_tradeoff_points.model.iloc[0]
        
        # print(f"Facet {facet}: {dset} - {best_tradeoff_model}. Max: {results[results['dataset'] == dset]['metric'].max()}, line: {x_line}")
        
        fig_tradeoff_normalized.add_trace(
            go.Scatter(
                x=best_tradeoff_points["metric"],
                y=best_tradeoff_points["normalized_model_size_ratio"],
                mode="markers",
                marker=dict(
                    size=10,
                    symbol="x",
                    color=colors[best_tradeoff_model]
                ),
                showlegend=False,  # To not duplicate in the legend
            ),
            row=r + 1,
            col=c + 1,
        )
        
        fig_tradeoff_normalized.add_vline(
            x=x_line, line_dash="dot", row=r + 1, col=c + 1, line_width=1
        )

# Display the plot
fig_tradeoff_normalized.show()

write_figure("performance_size_tradeoff_normalized.pdf", fig_tradeoff_normalized)

In [None]:
best_metric_dataset_model[
    best_metric_dataset_model.best_tradeoff == True
][["dataset", "model", "config_name", "metric", "model_size", "model_size_ratio"]]

In [None]:
best_metric_dataset_model[best_metric_dataset_model ["dataset"] == "iris"].dropna(axis=1)[["model", "metric", "model_size", "model_size_ratio", "normalized_model_size_ratio"]]

In [None]:
best_metric_dataset_model[["dataset", "model", "config_name", "metric", "model_size", "model_size_ratio"]]

In [None]:
best_metric_dataset_model[
    (best_metric_dataset_model.best_tradeoff == True) & (best_metric_dataset_model.model == "wisard")
]["model_size_ratio"]

In [None]:
encoder_info = best_metric_dataset_model[best_metric_dataset_model.model == "wisard"]
encoder_info = encoder_info[["dataset", "encoder", "resolution", "tuple_size", "bleach"]].reset_index(drop=True)
encoder_info.rename(columns={"encoder": "Encoder", "resolution": "Resolution", "tuple_size": "Tuple Size", "bleach": "Bleach"}, inplace=True)
encoder_info["Encoder"] = encoder_info["Encoder"].apply(lambda x: "Distributive Thermometer" if x == "distributive-thermometer" else "Thermometer")
encoder_info["Resolution"] = encoder_info["Resolution"].astype(int)
encoder_info["Tuple Size"] = encoder_info["Tuple Size"].astype(int)
encoder_info["Bleach"] = encoder_info["Bleach"].astype(int)
encoder_info

In [None]:
latex_str = encoder_info.to_latex(
    index=False,
    escape=True,
    caption="Parameters used for each experiment",
    label="tab:experiment-parameters",
    float_format="%.2f",
)
write_latex_table("experiment_parameters.tex", latex_str)

## 4. Select the wisard with best bloom filter

Here we show that, costing up to 1% of performance of the best dict-wisard, a 
space-efficient bloom filter achieves the best results.

In [None]:
# Up to 1% of accuracy loss
metric_threshold = 0.01

In [None]:
results = base_results.copy()
best_results = best_metric_dataset_model.copy()

results = results[results["model"] == "wisard"]
best_results = best_results[best_results["model"] == "wisard"]

results["bloom-filter"] = results["config_name"].apply(lambda x: x.split(" ")[0])
best_results["bloom-filter"] = best_results["config_name"].apply(lambda x: x.split(" ")[0])

In [None]:
temp = []


for dset_name, dset_df in results.groupby("dataset"):
    best_model = best_results[best_results["dataset"] == dset_name].iloc[0]
    # print(f"*** Dataset: {dset_name} with metric: {best_model['metric']} and model size: {int(best_model['model_size'])}")
    bests = dset_df[dset_df["metric"] >= best_model["metric"] - metric_threshold]
    bests["metric_improvement"] = bests["metric"]/ best_model["metric"]
    bests["model_size_improvement"] = bests["model_size"]/ best_model["model_size"]
    bests["best_metric"]  = best_model["metric"]
    bests["best_model_size"]  = best_model["model_size"]
    temp.append(bests)
    
results = pd.concat(temp, ignore_index=True)
results.sample(n=3)

In [None]:
best_results = results.loc[results.groupby("dataset")["model_size_improvement"].idxmin()]
best_results["bloom-filter"].value_counts()

In [None]:
best_results_table = best_results.sort_values(by=["dataset"])[["dataset", "config_name", "model_size_improvement"]]

best_results_table.rename(
    columns={
        "dataset": "Dataset",
        "config_name": "Bloom Filter",
        "model_size_improvement": "Size Ratio",
    },
    inplace=True,
)

best_results_table

In [None]:
latex_str = best_results_table.to_latex(
    index=False,
    escape=True,
    caption="Best Bloom Filter configuration for each dataset",
    label="tab:best_bloom_filter",
    float_format="%.2f",
)

write_latex_table("best_bloom_filter.tex", latex_str)

In [None]:
best_results_table = best_results.sort_values(by=["dataset"])[["dataset", "bloom-filter", "config_name", "model_size_improvement"]]
best_results_table.groupby("bloom-filter")["model_size_improvement"].agg(["mean", "std", "count"])

# 5. Select the wisard with best aggregated bloom filter

Here we show that, costing up to 1% of performance of the best dict-wisard, a 
space-efficient bloom filter achieves the best results.

In [None]:
# Up to 2% of accuracy loss
metric_threshold = 0.01

In [None]:
results = base_results.copy()
best_results = best_metric_dataset_model.copy()

results = results[results["model"] == "wisard"]
best_results = best_results[best_results["model"] == "wisard"]

results["bloom-filter"] = results["config_name"].apply(lambda x: x.split(" ")[0])
best_results["bloom-filter"] = best_results["config_name"].apply(lambda x: x.split(" ")[0])

# Filter: only Dict, CountingBloomFilter and CountMinSketch
results = results[results["bloom-filter"].isin(["Dict", "CountingBloomFilter", "CountMinSketch"])]
best_results = best_results[best_results["bloom-filter"].isin(["Dict", "CountingBloomFilter", "CountMinSketch"])]

In [None]:
temp = []


for dset_name, dset_df in results.groupby("dataset"):
    best_model = best_results[best_results["dataset"] == dset_name].iloc[0]
    # print(f"*** Dataset: {dset_name} with metric: {best_model['metric']} and model size: {int(best_model['model_size'])}")
    bests = dset_df[dset_df["metric"] >= best_model["metric"] - metric_threshold]
    bests["metric_improvement"] = bests["metric"]/ best_model["metric"]
    bests["model_size_improvement"] = bests["model_size"]/ best_model["model_size"]
    bests["best_metric"]  = best_model["metric"]
    bests["best_model_size"]  = best_model["model_size"]
    temp.append(bests)
    
results = pd.concat(temp, ignore_index=True)
results.sample(n=3)

In [None]:
best_results = results.loc[results.groupby("dataset")["model_size_improvement"].idxmin()]
best_results["bloom-filter"].value_counts()

In [None]:
best_results_table = best_results.sort_values(by=["dataset"])[["dataset", "config_name", "model_size_improvement"]]

best_results_table.rename(
    columns={
        "dataset": "Dataset",
        "config_name": "Bloom Filter Configuration",
        "model_size_improvement": "Size Ratio",
    },
    inplace=True,
)

best_results_table

In [None]:
latex_str = best_results_table.to_latex(
    index=False,
    escape=True,
    caption="Best Bloom Filter configuration for each dataset",
    label="tab:best_bloom_filter_agg",
    float_format="%.2f",
)

write_latex_table("best_bloom_filter_agg.tex", latex_str)

In [None]:
best_results_table = best_results.sort_values(by=["dataset"])[["dataset", "bloom-filter", "config_name", "model_size_improvement"]]
best_results_table.groupby("bloom-filter")["model_size_improvement"].agg(["mean", "std", "count"])