# Results Neurocomputing/ESANN 2024

This notebook contains the code to analyse the results of the 
Neurocomputing/ESANN 2024 paper, and it is responsible for generating
the figures and tables in the paper.

The notebook is organised as follows:

1. The first section contains imports, constants, helper functions and load the 
    data.

2. We show that the dict-wisard has competitive performance with the 
    classical machine learning algorithms.

## 1. General constants, hhelper functions, and data loading

Imports, global constants and packages' configuration.

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
from typing import List, Union
import plotly.graph_objects as go
import plotly.express as px

In [2]:
# Configs
# pd.set_option("display.max_columns", 100)
pd.set_option("display.float_format", lambda x: "%.4f" % x)

# ---------- Paths -------------
# -- Inputs
datasets_info_path = Path("../experiments/datasets_info.json")

results_path = Path("../experiments_2/results_tune/results.csv")
results_sklearn_path = Path(
    "../experiments/results_sklearn/results_sklearn.csv"
)

# --- Ouptuts
latex_tables_path = Path("tables")
figures_path = Path("figures")

Useful functions

In [3]:
def write_figure(
    filename: str, fig: go.Figure, path: Union[Path, str] = figures_path
):
    """Write a Figure to a file.

    Parameters
    ----------
    filename : str
        The name of the file to write to.
    fig : go.Figure
        The plotly figure object.
    path : Union[Path, str], optional
        The path where the file will be stored, by default figures_path
    """
    path = Path(path)
    path.mkdir(exist_ok=True, parents=True)
    fname = path / filename
    fig.write_image(fname)

    print(f"Figure written to: {fname}")
    print(f"Filename   :", filename)
    print(f"Latex label:", filename.replace(".pdf", ""))


def write_latex_table(
    filename: str, table: str, path: Union[Path, str] = latex_tables_path
):
    """Write a latex table to a file.

    Parameters
    ----------
    filename : str
        The name of the file to write to.
    table : str
        The table, as a string.
    path : Union[Path, str], optional
        The path where the file will be stored, by default latex_tables_path
    """
    path = Path(path)
    path.mkdir(exist_ok=True, parents=True)
    fname = path / filename
    with fname.open("w") as f:
        f.write(
            "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"
        )
        f.write(
            "%% WARNING: DO NOT CHANGE THIS FILE. IT IS GENERATED AUTOMATICALLY %\n"
        )
        f.write(
            "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"
        )
        f.write(table)
    print(f"Table written to: {fname}")


def aggregate_mean_std(
    df: pd.DataFrame,
    group_by: List[str],
    keys_to_aggregate: List[str],
) -> pd.DataFrame:
    """Group and aggregate columns of a dataframe, using mean and std.

    Parameters
    ----------
    df : pd.DataFrame
        The dataframe to aggregate.
    group_by : List[str]
        The columns used to group the dataframe.
    keys_to_aggregate : List[str]
        The column names that will be aggregated.

    Returns
    -------
    pd.DataFrame
        A dataframe with the aggregated values.

    Example
    -------
    >>> df = pd.DataFrame({
    ...     "a": [1, 1, 2, 2],
    ...     "b": [1, 2, 3, 4],
    ...     "c": [5, 6, 7, 8],
    ... })
    >>> aggregate_mean_std(df, ["a"], ["b", "c"])
       a    b  b_std    c  c_std
    0  1 1.500  0.707 5.500  0.707
    1  2 3.500  0.707 7.500  0.707

    """
    x = (
        df.groupby(group_by)[keys_to_aggregate]
        .agg("mean")
        .join(
            df.groupby(group_by)[keys_to_aggregate].agg("std"), rsuffix="_std"
        )
    )
    return x.reset_index()

### Read inputs and create a full dataframe

1. Read the datasets specifications (`dataset_info`)
2. Read the wisard results (`wisard_results`)
3. Read the sklearn results (`sklearn_results`)
4. Create a results dataframe, mergind dataset_info, wisard_results and sklearn_results

#### Dataset information

In [4]:
# Datasets information
datasets_info = pd.read_json(datasets_info_path, orient="index").reset_index(drop=True)
datasets_info.rename(columns={"name": "dataset_name"}, inplace=True)
datasets_info.head(n=3)

Unnamed: 0,dataset_name,size,features,num_classes,train_size,test_size,balanced,metric
0,breast_cancer,141416,30,3,398,171,False,f1 weighted
1,dry_bean,1773910,16,7,10888,2723,False,f1 weighted
2,glass,17413,9,24,149,65,False,f1 weighted


In [5]:
info = datasets_info[
    ["dataset_name", "features", "size", "num_classes", "balanced"]
]
info["size"] = info["size"] / 1024

info = info.rename(
    columns={
        "dataset_name": "Dataset",
        "features": "Features",
        "size": "Size (KB)",
        "num_classes": "Classes",
        "balanced": "Is Balanced?",
    }
)

latex_str = info.to_latex(
    index=False,
    escape=True,
    caption="Datasets information",
    label="tab:datasets_info",
    float_format="%.2f",
)

write_latex_table("datasets_info.tex", latex_str)

Table written to: tables/datasets_info.tex


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  info["size"] = info["size"] / 1024
  latex_str = info.to_latex(


#### Wisard results

Read and parse wisard result to `wisard_results` dataframe.

**Note**: The `wisard_results` already has aggregated results for each dataset.

In [6]:
def parse_wisard_config_name(row) -> str:
    """Given a row, parse the name of configuration.

    Parameters
    ----------
    row : pd.Series
        The row of the dataframe.

    Returns
    -------
    str
        The name of the configuration.
    """

    names = []
    if not pd.isna(row["num_hitters"]):
        names.append(f"NR: {row['num_hitters']}")
    if not pd.isna(row["width"]):
        names.append(f"W: {row['width']}")
    if not pd.isna(row["depth"]):
        names.append(f"D: {row['depth']}")
    if not pd.isna(row["capacity"]):
        names.append(f"C: {row['capacity']}")
    if not pd.isna(row["bucket_size"]):
        names.append(f"BS: {row['bucket_size']}")
    if not pd.isna(row["threshold"]):
        names.append(f"T: {row['threshold']}")
    if not pd.isna(row["est_elements"]):
        names.append(f"EST: {row['est_elements']}")
    if not pd.isna(row["false_positive_rate"]):
        names.append(f"FPR: {row['false_positive_rate']}")

    if names:
        names = ", ".join(names)
        return f"{row['ram']} ({names})"
    else:
        return row["ram"]


# --- Read results and add a column with the name of the configuration ---
wisard_results = pd.read_csv(results_path).drop_duplicates()

# --- Add useful columns ---
wisard_results["tuple_size"] = (
    wisard_results["resolution"] / wisard_results["tuple_resolution_factor"]
)
wisard_results["config_name"] = wisard_results.apply(
    parse_wisard_config_name, axis=1
)

# --- Select the columns of interest ---
wisard_results = wisard_results[
    [
        "dataset_name",
        "config_name",
        "test_accuracy_mean",
        "test_accuracy_std",
        "test_f1 weighted_mean",
        "test_f1 weighted_std",
        "test_model size_mean",
        "test_model size_std",
        "test_ties_mean",
        "test_ties_std",
        "tuple_size",
        "encoder",
        "resolution",
        "bleach",
        "rams per discriminator",
        "ram",
    ]
]

# --- Rename columns ---
wisard_results = wisard_results.rename(
    columns={
        "dataset_name": "dataset",
        "ram": "model",
        "test_ties_mean": "ties",
        "test_ties_std": "ties_std",
        "test_accuracy_mean": "accuracy",
        "test_accuracy_std": "accuracy_std",
        "test_f1 weighted_mean": "f1",
        "test_f1 weighted_std": "f1_std",
        "test_model size_mean": "model_size",
        "test_model size_std": "model_size_std",
    }
)

# --- Add model column and drop duplicates ---
wisard_results["model"] = "wisard"
wisard_results.drop_duplicates(inplace=True)

wisard_results.sample(n=2)

Unnamed: 0,dataset,config_name,accuracy,accuracy_std,f1,f1_std,model_size,model_size_std,ties,ties_std,tuple_size,encoder,resolution,bleach,rams per discriminator,model
38381,vehicle,"HeavyHitters (NR: 310.0, W: 154.0, D: 4.0)",0.5373,0.0723,0.4873,0.0813,267840.0,0.0,121.0,14.8997,20.0,thermometer,40,2,36,wisard
38326,vehicle,"HeavyHitters (NR: 5.0, W: 253.0, D: 3.0)",0.4,0.0127,0.3431,0.0151,329616.0,0.0,140.3333,7.4087,13.0,thermometer,26,3,36,wisard


#### Scikit Learn results

Read and parse sklearn result to `sklearn_results` dataframe.

In [7]:
# Read sklearn results and aggregate multiple runs
sklearn_results = pd.read_csv(results_sklearn_path).drop_duplicates()

# Aggregate metric for multiple runs
sklearn_results = aggregate_mean_std(
    df=sklearn_results,
    group_by=[
        "model",
        "model kwargs",
        "dataset name",
        "experiment name",
    ],
    keys_to_aggregate=[
        "accuracy",
        "f1 weighted",
        "f1 macro",
        "f1 micro",
        "train time",
        "predict time",
        "model size",
    ],
)

# Select columns of interest
sklearn_results = sklearn_results[
    [
        "dataset name",
        "model",
        "model kwargs",
        "accuracy",
        "accuracy_std",
        "f1 weighted",
        "f1 weighted_std",
        "model size",
        "model size_std",
    ]
]

# Rename columns
sklearn_results.rename(
    columns={
        "dataset name": "dataset",
        "model kwargs": "config_name",
        "f1 weighted": "f1",
        "f1 weighted_std": "f1_std",
        "model size": "model_size",
        "model size_std": "model_size_std",
    },
    inplace=True,
)

# Add config_name (name of the model)
sklearn_results["config_name"] = ""

sklearn_results.sample(n=2)

Unnamed: 0,dataset,model,config_name,accuracy,accuracy_std,f1,f1_std,model_size,model_size_std
38,glass,logistic-regression,,0.6462,0.0,0.61,0.0,1215.0,0.0
95,letter,random-forest,,0.9607,0.0012,0.9607,0.0012,104223280.6667,35399.0349


#### Merge wisard results, sklearn results, and dataset information

Create a full dataframe with all the information named  `base_results`.

In [8]:
# results is a dataframe with the results of both wisard (only dict ram) and sklearn
# temp = wisard_results.loc[wisard_results[(wisard_results["config_name"] == "Dict")].groupby("dataset")["accuracy"].idxmax()]
# results = pd.concat([temp, sklearn_results], ignore_index=True)
base_results = pd.concat([wisard_results, sklearn_results], ignore_index=True)
base_results = pd.merge(
    base_results,
    datasets_info,
    left_on="dataset",
    right_on="dataset_name",
    how="left",
    validate="many_to_one",
)

base_results.rename(columns={"metric": "metric_to_use"}, inplace=True)

# add a metric column to results. If metric_to_use is f1 weighted, copy the f1 column to metric, else copy the accuracy column. Do it for each row
base_results["metric"] = base_results.apply(
    lambda row: row["accuracy"]
    if row["metric_to_use"] == "accuracy"
    else row["f1"],
    axis=1,
)
base_results["metric_std"] = base_results.apply(
    lambda row: row["accuracy_std"]
    if row["metric_to_use"] == "accuracy"
    else row["f1_std"],
    axis=1,
)

# Rename some columns
base_results.rename(
    columns={
        "size": "dataset_size",
    },
    inplace=True,
)

# Remove dataset_name column
base_results.drop(columns=["dataset_name"], inplace=True)

# We use metric column to compare models, instead of accuracy or f1

# Put dataset, model, config_name, metric, metric_std, model_size, model_size_std columns first
cols = base_results.columns.tolist()
cols = [
    "dataset",
    "model",
    "config_name",
    "metric",
    "metric_std",
    "model_size",
    "model_size_std",
] + cols[2:-4]
base_results = base_results[cols]

# Drop duplicates columns
base_results = base_results.loc[:,~base_results.columns.duplicated()]

base_results.sample(n=2)

Unnamed: 0,dataset,model,config_name,metric,metric_std,model_size,model_size_std,accuracy,accuracy_std,f1,...,tuple_size,encoder,resolution,bleach,rams per discriminator,dataset_size,features,num_classes,train_size,test_size
6691,sepsis,wisard,"StreamThreshold (W: 1.0, D: 5.0, T: 35.0)",0.0101,0.0,216.0,0.0,0.0737,0.0,0.0101,...,63.0,distributive-thermometer,63.0,774.0,3.0,3535311,3,2,88272,22069
6138,dry_bean,wisard,Dict,0.6787,0.0173,516411.5,13113.5,0.6952,0.0129,0.6787,...,38.0,distributive-thermometer,76.0,12.0,32.0,1773910,16,7,10888,2723


In [9]:
# Filter results
base_results = base_results[base_results["model"].isin(["wisard", "knn", "random-forest", "svm", "mlp"])]
base_results = base_results[~base_results["dataset"].isin(["mnist", "olivetti", "sensorless_drive"])]
# base_results = base_results[~base_results["dataset"].isin(["olivetti", "sensorless_drive"])]
base_results.shape

(14187, 23)

## 2. Wisard has competitive results with SKLearn

Here we show that the dict-wisard has competitive performance with the
classical machine learning algorithms.

To do that, we plot the accuracy of the wisard and sklearn algorithms for each
dataset.

In [10]:
# Read results and filter bloom filter results
results = base_results.copy()
results = results[
    (results["model"] != "wisard") | (results["config_name"] == "Dict")
]
results.shape

(3581, 23)

In [11]:
# Create a dataframe with the best performance for each dataset and model
best_metric_df = (
    results.groupby(["dataset", "model"])
    .apply(lambda group: group.loc[group["metric"].idxmax()])
    .reset_index(drop=True)
)

best_metric_df.value_counts("model")

model
knn              15
mlp              15
random-forest    15
svm              15
wisard           15
dtype: int64

In [12]:
# Assume 'results' DataFrame with columns: 'dataset', 'model', 'accuracy'

# Create a grouped bar chart for accuracy per model and dataset
fig_grouped_bar = px.bar(
    best_metric_df,
    x="dataset",
    y="metric",
    error_y="metric_std",
    color="model",
    #  title='Metric Comparison by Model and Dataset',
    labels={"metric": "Performance", "dataset": "Dataset", "model": ""},
    barmode="group",
    color_discrete_sequence=px.colors.qualitative.Prism,
)


# Display the plot
fig_grouped_bar.update_layout(
    legend=dict(
        orientation="h", yanchor="top", y=1.20, xanchor="center", x=0.5
    ),
    height=400,
    width=2480 / 2.5,
    font=dict(family="Times New Roman", size=14),
)

write_figure("models_performance.pdf", fig_grouped_bar)
fig_grouped_bar.show()

Figure written to: figures/models_performance.pdf
Filename   : models_performance.pdf
Latex label: models_performance


In [13]:
# Assume 'results' DataFrame with columns: 'dataset', 'model', 'accuracy'

# Create a grouped horizontal bar chart for accuracy per model and dataset with reversed bar groups
fig_grouped_bar = px.bar(
    best_metric_df,
    y="dataset",
    x="metric",
    error_x="metric_std",
    color="model",
    #  title='Metric Comparison by Model and Dataset',
    labels={"metric": "Performance", "dataset": "Dataset", "model": ""},
    barmode="group",
    orientation='h',
    color_discrete_sequence=px.colors.qualitative.Prism,
)


# Reverse the order of the bar groups
fig_grouped_bar.update_layout(
    yaxis=dict(autorange="reversed"),
)

# Display the plot
fig_grouped_bar.update_layout(
    legend=dict(
        orientation="h", yanchor="top", y=1.05, xanchor="center", x=0.5
    ),
    height=1200,
    width=2480 / 4,
    font=dict(family="Times New Roman", size=14),
)

write_figure("models_performance_horizontal.pdf", fig_grouped_bar)
fig_grouped_bar.show()

Figure written to: figures/models_performance_horizontal.pdf
Filename   : models_performance_horizontal.pdf
Latex label: models_performance_horizontal


In [14]:
# Count how many times per dataset, wisard is the best model

print("How many times per dataset, each model is the best?")
best_metric_df.loc[
    best_metric_df.groupby("dataset")["metric"].idxmax()
].value_counts("model").to_frame().reset_index()

How many times per dataset, each model is the best?


Unnamed: 0,model,0
0,random-forest,9
1,svm,2
2,wisard,2
3,knn,1
4,mlp,1


## 3. Wisard has competitive results with SKLearn and is smallest

Here we show that costing up to 2% of performance, the dict-wisard is much
smaller than the sklearn algorithms.

In [15]:
# Up to 2% of accuracy loss
metric_threshold = 0.01

In [16]:
# Read results and filter bloom filter results
results = base_results.copy()
results = results[
    (results["model"] != "wisard") | (results["config_name"] == "Dict")
]
results.shape

(3581, 23)

In [17]:
# Create a dataframe with the best performance for each dataset and model
best_metric_dataset_model = (
    results.groupby(["dataset", "model"])
    .apply(lambda group: group.loc[group["metric"].idxmax()])
    .reset_index(drop=True)
)

best_metric_dataset_model.value_counts("model")

model
knn              15
mlp              15
random-forest    15
svm              15
wisard           15
dtype: int64

In [18]:
# Add the model_size_ratio column. This column is the ratio between the model
# size of each model and the model size of the best model for each dataset
temp = []

for dset_name, dset_df in best_metric_dataset_model.groupby("dataset"):
    best_row = dset_df.sort_values(by="metric", ascending=False).iloc[0]
    dset_df["model_size_ratio"] = dset_df["model_size"] / best_row["model_size"]
    # Min max normalization
    dset_df["normalized_model_size_ratio"] = (
        dset_df["model_size_ratio"] - dset_df["model_size_ratio"].min()
    ) / (dset_df["model_size_ratio"].max() - dset_df["model_size_ratio"].min())
    dset_df["best_tradeoff"] = False

    best_tradeoff = (
        dset_df[dset_df["metric"] >= best_row["metric"] - metric_threshold]
        .sort_values(by="normalized_model_size_ratio", ascending=True)
        .iloc[0]
    )
    dset_df.loc[best_tradeoff.name, "best_tradeoff"] = True

    temp.append(dset_df)

best_metric_dataset_model = pd.concat(temp)
best_metric_dataset_model.head(n=8)

Unnamed: 0,dataset,model,config_name,metric,metric_std,model_size,model_size_std,accuracy,accuracy_std,f1,...,bleach,rams per discriminator,dataset_size,features,num_classes,train_size,test_size,model_size_ratio,normalized_model_size_ratio,best_tradeoff
0,breast_cancer,knn,,0.9179,0.0,99380.0,0.0,0.9181,0.0,0.9179,...,,,141416,30,3,398,171,0.3508,0.2818,False
1,breast_cancer,mlp,,0.9103,0.0,85156.0,0.0,0.9123,0.0,0.9103,...,,,141416,30,3,398,171,0.3006,0.2262,False
2,breast_cancer,random-forest,,0.9709,0.0,283286.0,0.0,0.9708,0.0,0.9709,...,,,141416,30,3,398,171,1.0,1.0,False
3,breast_cancer,svm,,0.8731,0.0,27227.0,0.0,0.8772,0.0,0.8731,...,,,141416,30,3,398,171,0.0961,0.0,False
4,breast_cancer,wisard,Dict,0.9628,0.01,94920.6667,2226.8169,0.963,0.0099,0.9628,...,4.0,30.0,141416,30,3,398,171,0.3351,0.2644,True
5,dry_bean,knn,,0.7175,0.0,1481493.0,0.0,0.7216,0.0,0.7175,...,,,1773910,16,7,10888,2723,0.1044,0.1004,False
6,dry_bean,mlp,,0.2251,0.147,63218.3333,219.978,0.3286,0.0946,0.2251,...,,,1773910,16,7,10888,2723,0.0045,0.0,False
7,dry_bean,random-forest,,0.9198,0.0017,14189404.0,54370.3571,0.9199,0.0017,0.9198,...,,,1773910,16,7,10888,2723,1.0,1.0,True


In [19]:
# pd.set_option('display.max_rows', 100)
dfs = {}
for dset_name, dset_df in best_metric_dataset_model.groupby("dataset"):
    dset_df["model_size"] = (dset_df["model_size"] / 1024)
    max_val = dset_df["metric"].max()
    max_size =  dset_df["model_size"].max()
    dset_df["relative performance"] = dset_df["metric"]  / max_val
    dset_df["relative size"] = dset_df["model_size"]  / max_size
    # dset_df.index = dset_df["model"]
    dset_df = dset_df[["model", "metric",  "model_size", "relative performance", "relative size", "accuracy", "f1"]]
    dfs[dset_name] = dset_df
    
result_df = pd.concat(dfs.values(), keys=dfs.keys())
result_df.reset_index(level=0, inplace=True)
result_df = result_df.rename(columns={"level_0": "dataset"})
result_df.to_csv("temp.csv", index=False)
print(f"Results written to temp.csv")
result_df

Results written to temp.csv


Unnamed: 0,dataset,model,metric,model_size,relative performance,relative size,accuracy,f1
0,breast_cancer,knn,0.9179,97.0508,0.9454,0.3508,0.9181,0.9179
1,breast_cancer,mlp,0.9103,83.1602,0.9376,0.3006,0.9123,0.9103
2,breast_cancer,random-forest,0.9709,276.6465,1.0000,1.0000,0.9708,0.9709
3,breast_cancer,svm,0.8731,26.5889,0.8992,0.0961,0.8772,0.8731
4,breast_cancer,wisard,0.9628,92.6960,0.9917,0.3351,0.9630,0.9628
...,...,...,...,...,...,...,...,...
70,yeast,knn,0.5464,178.0469,0.8802,0.0169,0.5522,0.5464
71,yeast,mlp,0.5896,52.9580,0.9498,0.0050,0.6027,0.5896
72,yeast,random-forest,0.6207,10533.5645,1.0000,1.0000,0.6296,0.6207
73,yeast,svm,0.5992,137.1494,0.9653,0.0130,0.6094,0.5992


In [20]:
# # Pivot the DataFrame to create the raw metric table
# table_df_raw = result_df.pivot(index='dataset', columns='model', values='metric')

# # Pivot the DataFrame to create the relative performance metric table
# table_df_relative_performance = result_df.pivot(index='dataset', columns='model', values='relative performance')

# # Join the two tables based on dataset
# joined_df = table_df_raw.join(table_df_relative_performance, lsuffix="_raw", rsuffix="_relative_performance")


# # joined_df.to_csv("temp.csv", index=True)

# # joined_df = joined_df.reset_index()

# joined_df = joined_df.rename_axis(None, axis=1).reset_index()
# # joined_df.index = range(len(joined_df))

# # joined_df.columns = joined_df.columns.to_list()


# raw_df = joined_df[['dataset', 'knn_raw', 'mlp_raw', 'random-forest_raw', 'svm_raw', 'wisard_raw']]
# relative_df = joined_df[['dataset', 'knn_relative_performance', 'mlp_relative_performance', 'random-forest_relative_performance', 'svm_relative_performance', 'wisard_relative_performance']]

# raw_df["dataset"] = raw_df["dataset"].str.replace("_", " ")
# relative_df["dataset"] = relative_df["dataset"].str.replace("_", " ")
# raw_df.columns = raw_df.columns.str.replace("-", " ")
# relative_df.columns = relative_df.columns.str.replace("-", " ")

# line = {"dataset": "Mean"}
# for c in raw_df.columns:
#     if c != "dataset":
#         line[c] = raw_df[c].mean()
# raw_df.loc[len(raw_df)] = line

# line = {"dataset": "Mean"}
# for c in relative_df.columns:
#     if c != "dataset":
#         line[c] = relative_df[c].mean()
# relative_df.loc[len(relative_df)] = line


# # Setting the dataset column as the index
# raw_df.set_index('dataset', inplace=True)
# relative_df.set_index('dataset', inplace=True)

# # # Concatenating the DataFrames
# final_df = pd.concat([raw_df, relative_df], axis=1)

# # # Renaming the columns
# final_df.columns = pd.MultiIndex.from_product([['Raw', 'Relative'], raw_df.columns.str.split('_').str[0]])

# write_latex_table("performance_table.tex", final_df.to_latex(float_format="%.2f"))
# final_df

In [21]:
def add_mean_line(df):
    line = {"dataset": "Mean"}
    for c in df.columns:
        if c != "dataset":
            line[c] = df[c].mean()
    df.loc[len(df)] = line
    return df

def raw_relative_table(df, raw_metric, relative_metric):
    # Pivot the DataFrame to create the raw metric table and relative table
    raw_df = (
        df.pivot(index="dataset", columns="model", values=raw_metric)
        .rename_axis(None, axis=1)
        .reset_index()
    )
    raw_df["dataset"] = raw_df["dataset"].str.replace("_", " ")
    raw_df.columns = raw_df.columns.str.replace("-", " ")
    raw_df = raw_df[["dataset", "svm", "mlp", "knn", "random forest", "wisard"]]
    raw_df = add_mean_line(raw_df)
    raw_df.set_index("dataset", inplace=True)

    relative_df = (
        df.pivot(
            index="dataset", columns="model", values=relative_metric
        )
        .rename_axis(None, axis=1)
        .reset_index()
    )

    relative_df["dataset"] = relative_df["dataset"].str.replace("_", " ")
    relative_df.columns = relative_df.columns.str.replace("-", " ")
    relative_df = relative_df[["dataset", "svm", "mlp", "knn", "random forest", "wisard"]]
    relative_df = add_mean_line(relative_df)
    relative_df.set_index("dataset", inplace=True)
    
    # Concatenating the DataFrames
    final_df = pd.concat([raw_df, relative_df], axis=1)

    final_df.columns = pd.MultiIndex.from_product(
        [["Absolute", "Relative"], raw_df.columns.str.split("_").str[0]]
    )
    return final_df

performance_df = raw_relative_table(result_df.copy(), "metric", "relative performance")
order_of_datasets = performance_df["Relative"]["wisard"].sort_values(ascending=False).keys().to_list()
order_of_datasets.remove("Mean")
order_of_datasets.append("Mean")
performance_df.index = order_of_datasets
write_latex_table("performance_table.tex", performance_df.to_latex(float_format="%.2f"))

size_df = raw_relative_table(result_df.copy(), "model_size", "relative size")
size_df.index = order_of_datasets
write_latex_table("size_table.tex", size_df.to_latex(float_format="%.2f"))

Table written to: tables/performance_table.tex
Table written to: tables/size_table.tex



In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.


In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.



In [22]:
performance_df

Unnamed: 0_level_0,Absolute,Absolute,Absolute,Absolute,Absolute,Relative,Relative,Relative,Relative,Relative
Unnamed: 0_level_1,svm,mlp,knn,random forest,wisard,svm,mlp,knn,random forest,wisard
image segmentation,0.8731,0.9103,0.9179,0.9709,0.9628,0.8992,0.9376,0.9454,1.0,0.9917
iris,0.6014,0.2251,0.7175,0.9198,0.9024,0.6538,0.2447,0.78,1.0,0.981
sepsis,0.185,0.4554,0.7093,0.706,0.6615,0.2608,0.6421,1.0,0.9954,0.9327
breast cancer,0.5,0.5556,0.5476,0.619,0.8571,0.5833,0.6481,0.6389,0.7222,1.0
optical handwritten,1.0,0.9778,0.9556,0.9333,1.0,1.0,0.9778,0.9556,0.9333,1.0
rice,0.92,0.9205,0.9455,0.9607,0.9001,0.9576,0.9581,0.9842,1.0,0.9369
dry bean,0.5382,0.6565,0.5049,0.8078,0.65,0.6663,0.8127,0.625,1.0,0.8046
wine,0.992,0.981,0.9884,0.9828,0.978,1.0,0.9889,0.9964,0.9907,0.9859
satimage,0.8672,0.4777,0.8637,0.9153,0.8994,0.9475,0.522,0.9437,1.0,0.9827
letter,0.8925,0.9116,0.9025,0.9106,0.8867,0.9791,1.0,0.99,0.9989,0.9726


In [23]:
# dfs = []

# def belongs_to_pareto(df, model):
#     metric = df[df["model"] == model]["metric"].iloc[0]
#     size = df[df["model"] == model]["model_size"].iloc[0]
    
#     for r_index, row in df.iterrows():
#         if row["metric"] > metric and row["model_size"] < size:
#             return False
        
#     return True
    
# columns = result_df["dataset"].unique()
    

# models = ["wisard", "random-forest", "svm", "mlp", "knn"]
# for c in columns:
#     x_df = result_df[result_df["dataset"] == c]
#     if belongs_to_pareto(x_df, "wisard"):
#         print(f"Wisard belongs to pareto in {c}")
#         # x_df.index = x_df.index.str.upper()
    
#     lines = []
#     for m in models:
#         line = x_df[x_df["model"] == m]
#         if belongs_to_pareto(x_df, m):
#             print(f"Wisard belongs to pareto in {c}")
#             line["pareto"] = True
#         else:
#             line["pareto"] = False
#         lines.append(line)
        
        
#     x_df = pd.concat(lines)
    
#     dfs.append(x_df)
    
# n = pd.concat(dfs).reset_index()
    
# # n["dataset"] = n["level_0"]
# # n["pareto"] = n["pareto"].astype(int)
# n

In [24]:
# # Assume 'results' DataFrame with columns: 'dataset', 'model', 'accuracy'

# # Create a grouped horizontal bar chart for accuracy per model and dataset with reversed bar groups
# fig_grouped_bar = px.bar(
#     n,
#     y="dataset",
#     x="metric",
#     # error_x="metric_std",
#     color="model",
#     #  title='Metric Comparison by Model and Dataset',
#     labels={"metric": "Performance", "dataset": "Dataset", "model": ""},
#     barmode="group",
#     orientation='h',
#     color_discrete_sequence=px.colors.qualitative.Prism,
# )


# # Reverse the order of the bar groups
# fig_grouped_bar.update_layout(
#     yaxis=dict(autorange="reversed"),
# )

# # Display the plot
# fig_grouped_bar.update_layout(
#     legend=dict(
#         orientation="h", yanchor="top", y=1.05, xanchor="center", x=0.5
#     ),
#     height=1200,
#     width=2480 / 4,
#     font=dict(family="Times New Roman", size=14),
# )

# write_figure("models_performance_horizontal.pdf", fig_grouped_bar)
# fig_grouped_bar.show()

In [25]:
result_df

Unnamed: 0,dataset,model,metric,model_size,relative performance,relative size,accuracy,f1
0,breast_cancer,knn,0.9179,97.0508,0.9454,0.3508,0.9181,0.9179
1,breast_cancer,mlp,0.9103,83.1602,0.9376,0.3006,0.9123,0.9103
2,breast_cancer,random-forest,0.9709,276.6465,1.0000,1.0000,0.9708,0.9709
3,breast_cancer,svm,0.8731,26.5889,0.8992,0.0961,0.8772,0.8731
4,breast_cancer,wisard,0.9628,92.6960,0.9917,0.3351,0.9630,0.9628
...,...,...,...,...,...,...,...,...
70,yeast,knn,0.5464,178.0469,0.8802,0.0169,0.5522,0.5464
71,yeast,mlp,0.5896,52.9580,0.9498,0.0050,0.6027,0.5896
72,yeast,random-forest,0.6207,10533.5645,1.0000,1.0000,0.6296,0.6207
73,yeast,svm,0.5992,137.1494,0.9653,0.0130,0.6094,0.5992


In [26]:
# Identify Pareto frontier
def is_pareto_efficient(costs):
    is_efficient = np.ones(costs.shape[0], dtype=bool)
    for i, c in enumerate(costs):
        if is_efficient[i]:
            is_efficient[is_efficient] = np.any(costs[is_efficient] < c, axis=1)
            is_efficient[i] = True  # Keep the current point
    return is_efficient

for dset_name, dset_df in result_df.groupby("dataset"):
    costs = dset_df[["model_size", "metric"]].to_numpy()
    # Invert metric (lower is better)
    costs[:, 1] = 1 / costs[:, 1]
    pareto = is_pareto_efficient(costs)
    result_df.loc[dset_df.index, "pareto"] = pareto
    
result_df.to_csv("temp.csv", index=False)
print(f"Csv written to temp.csv")
result_df.head(n=10)

Csv written to temp.csv


Unnamed: 0,dataset,model,metric,model_size,relative performance,relative size,accuracy,f1,pareto
0,breast_cancer,knn,0.9179,97.0508,0.9454,0.3508,0.9181,0.9179,False
1,breast_cancer,mlp,0.9103,83.1602,0.9376,0.3006,0.9123,0.9103,True
2,breast_cancer,random-forest,0.9709,276.6465,1.0,1.0,0.9708,0.9709,True
3,breast_cancer,svm,0.8731,26.5889,0.8992,0.0961,0.8772,0.8731,True
4,breast_cancer,wisard,0.9628,92.696,0.9917,0.3351,0.963,0.9628,True
5,dry_bean,knn,0.7175,1446.7705,0.78,0.1044,0.7216,0.7175,False
6,dry_bean,mlp,0.2251,61.7367,0.2447,0.0045,0.3286,0.2251,True
7,dry_bean,random-forest,0.9198,13856.8398,1.0,1.0,0.9199,0.9198,True
8,dry_bean,svm,0.6014,1468.3936,0.6538,0.106,0.6372,0.6014,False
9,dry_bean,wisard,0.9024,455.0872,0.981,0.0328,0.9032,0.9024,True


In [27]:
result_df.groupby("model").pareto.value_counts().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,pareto
model,pareto,Unnamed: 2_level_1
knn,False,11
knn,True,4
mlp,True,12
mlp,False,3
random-forest,True,9
random-forest,False,6
svm,False,10
svm,True,5
wisard,True,8
wisard,False,7


In [47]:
import plotly.express as px
import plotly.graph_objects as go

# Assuming 'data' is our DataFrame
data = result_df.copy()

# dfs = []
# for dset in order_of_datasets:
#     x = data[data["dataset"] == dset].copy()
#     dfs.append(x)
# data = pd.concat(dfs).reset_index(drop=True)

# Define marker symbols for each model
marker_symbols = {
    "knn": "circle",
    "mlp": "square",
    "random-forest": "diamond",
    "svm": "cross",
    "wisard": "x",
}

# Define model names for legend
model_names = {
    "knn": "KNN",
    "mlp": "MLP",
    "random-forest": "Random Forest",
    "svm": "SVM",
    "wisard": "Wisard",
}

pareto_colors = {
    True: px.colors.qualitative.Plotly[1],
    False: px.colors.qualitative.Plotly[0],
}

# data["relative size"] = np.log(data["relative size"])

fig = px.scatter(
    data,
    x="relative size",
    y="metric",
    symbol="model",
    symbol_map=marker_symbols,
    color="pareto",
    color_discrete_map=pareto_colors,
    facet_col="dataset",
    facet_col_wrap=3,
    height=1000,
    width=900,
    facet_row_spacing=0.03,
)

fig.update_traces(
    marker=dict(size=7),
    selector=dict(mode="markers"),
    showlegend=False,  # Hides the legend entries created by Plotly Express
)

for anno in fig["layout"]["annotations"]:
    anno["text"] = anno["text"].split("=")[1].replace("_", " ")

# Manually map symbols to names in the legend
legend_labels = {
    symbol: model_names[model] for model, symbol in marker_symbols.items()
}

# Create a custom legend
custom_legend = []
for symbol, model_name in legend_labels.items():
    custom_legend.append(
        go.Scatter(
            x=[None],
            y=[None],
            mode="markers",
            marker=dict(
                symbol=symbol, size=12, color=px.colors.qualitative.Plotly[0]
            ),
            name=model_name,
        )
    )

# Add custom legend to the figure
for trace in custom_legend:
    fig.add_trace(trace)

fig.update_layout(
    legend=dict(
        title="",  # Set title to empty string to remove the legend title
        orientation="h",
        yanchor="top",
        y=1.07,
        xanchor="center",
        x=0.5,
        traceorder="normal",  # Set trace order to normal to arrange legend entries horizontally
    ),
    margin=dict(l=10, r=10, t=10, b=10),
    font=dict(family="Times New Roman", size=14),
)

write_figure("model_metric_size.pdf", fig)

fig.show()

Figure written to: figures/model_metric_size.pdf
Filename   : model_metric_size.pdf
Latex label: model_metric_size


In [37]:
data

Unnamed: 0,level_0,index,dataset,model,metric,model_size,relative performance,relative size,accuracy,f1,pareto
0,0,20,iris,knn,0.9556,9.6777,0.9556,0.0737,0.9556,0.9556,False
1,1,21,iris,mlp,0.9778,26.9551,0.9778,0.2054,0.9778,0.9778,False
2,2,22,iris,random-forest,0.9333,131.2373,0.9333,1.0,0.9333,0.9333,False
3,3,23,iris,svm,1.0,3.6074,1.0,0.0275,1.0,1.0,False
4,4,24,iris,wisard,1.0,1.3398,1.0,0.0102,1.0,1.0,True
5,0,55,sepsis,knn,0.891,5838.1943,0.9993,0.658,0.923,0.891,False
6,1,56,sepsis,mlp,0.8909,16.7725,0.9992,0.0019,0.9263,0.8909,True
7,2,57,sepsis,random-forest,0.8908,8872.002,0.9991,1.0,0.9262,0.8908,False
8,3,58,sepsis,svm,0.8909,560.6406,0.9992,0.0632,0.9263,0.8909,False
9,4,59,sepsis,wisard,0.8916,33.5684,1.0,0.0038,0.9246,0.8916,True


In [29]:
import plotly.express as px
import plotly.graph_objects as go

# Assuming 'data' is our DataFrame
data = result_df.copy()

# Define marker symbols for each model
marker_symbols = {
    "knn": "circle",
    "mlp": "square",
    "random-forest": "diamond",
    "svm": "cross",
    "wisard": "x",
}

# Define model names for legend
model_names = {
    "knn": "KNN",
    "mlp": "MLP",
    "random-forest": "Random Forest",
    "svm": "SVM",
    "wisard": "Wisard",
}

pareto_colors = {
    True: px.colors.qualitative.Plotly[1],
    False: px.colors.qualitative.Plotly[0],
}


rows = 5
cols = 3

fig = go.Figure()







# data["relative size"] = np.log(data["relative size"])

fig = px.scatter(
    data,
    x="relative size",
    y="metric",
    symbol="model",
    symbol_map=marker_symbols,
    color="pareto",
    color_discrete_map=pareto_colors,
    facet_col="dataset",
    facet_col_wrap=3,
    height=1000,
    width=900,
    facet_row_spacing=0.03,
)

fig.update_traces(
    marker=dict(size=7),
    selector=dict(mode="markers"),
    showlegend=False,  # Hides the legend entries created by Plotly Express
)

for anno in fig["layout"]["annotations"]:
    anno["text"] = anno["text"].split("=")[1].replace("_", " ")

# Manually map symbols to names in the legend
legend_labels = {
    symbol: model_names[model] for model, symbol in marker_symbols.items()
}

# Create a custom legend
custom_legend = []
for symbol, model_name in legend_labels.items():
    custom_legend.append(
        go.Scatter(
            x=[None],
            y=[None],
            mode="markers",
            marker=dict(
                symbol=symbol, size=12, color=px.colors.qualitative.Plotly[0]
            ),
            name=model_name,
        )
    )

# Add custom legend to the figure
for trace in custom_legend:
    fig.add_trace(trace)

fig.update_layout(
    legend=dict(
        title="",  # Set title to empty string to remove the legend title
        orientation="h",
        yanchor="top",
        y=1.07,
        xanchor="center",
        x=0.5,
        traceorder="normal",  # Set trace order to normal to arrange legend entries horizontally
    ),
    margin=dict(l=10, r=10, t=10, b=10),
    font=dict(family="Times New Roman", size=14),
)

write_figure("model_metric_size.pdf", fig)

fig.show()

Figure written to: figures/model_metric_size.pdf
Filename   : model_metric_size.pdf
Latex label: model_metric_size


In [None]:
dfs = {}
for dset_name, dset_df in best_metric_dataset_model.groupby("dataset"):
    max_val = dset_df["metric"].max()
    dset_df["model_size (KB)"] = (dset_df["model_size"] / 1024)
    dset_df["relative performance"] = dset_df["metric"]  / max_val
    dset_df = dset_df[["relative performance", "model_size (KB)"]]
    dfs[dset_name] = dset_df
    
result_df = pd.concat(dfs.values(), keys=dfs.keys()).reset_index()
# result_df.reset_index(level=0, inplace=True)
result_df

In [None]:
print("How many times per dataset, each model is the best tradeoff?")
best_metric_dataset_model[
    best_metric_dataset_model.best_tradeoff == True
].value_counts("model").to_frame().reset_index()

In [None]:
facets = 3
cmap = px.colors.qualitative.Prism

colors = {
    name: cmap[i]
    for i, name in enumerate(sorted(best_metric_dataset_model.model.unique()))
}

# Scatter plot for trade-off with normalized model size
fig_tradeoff_normalized = px.scatter(
    best_metric_dataset_model,
    x="metric",
    y="normalized_model_size_ratio",
    color="model",
    facet_col="dataset",
    facet_col_wrap=facets,
    # title="Trade-off Between Metric and Normalized Model Size Across Datasets",
    labels={
        "metric": "Performance",
        "normalized_model_size_ratio": "Size Ratio (normalized)",
        "model": "",
    },
    facet_row_spacing=0.07,
    width=1400,
    height=800,
    color_discrete_sequence=cmap,
)


fig_tradeoff_normalized.update_traces(
    marker=dict(size=7.5),
)

fig_tradeoff_normalized.update_xaxes(showticklabels=True)

fig_tradeoff_normalized.update_yaxes(showticklabels=True)

fig_tradeoff_normalized.for_each_annotation(
    lambda a: a.update(text=a.text.split("=")[-1])
)

# fig_tradeoff_normalized.update_layout(
#     legend=dict(
#         orientation="h", yanchor="bottom", y=-0.15, xanchor="center", x=0.5,
#     ),

# )

fig_tradeoff_normalized.update_layout(
    legend=dict(
        orientation="h",
        yanchor="top",
        y=1.05,
        xanchor="center",
        x=0.5,
        itemsizing="constant",  # Set this to "constant" to show only one item for color
        font=dict(family="Times New Roman", size=14),
    ),
    height=1200,
    width=2480 /2.5,
    font=dict(family="Times New Roman", size=14),
)
num_rows = len(results.dataset.unique()) // facets
num_cols = facets
# datasets = list(sorted(results.dataset.unique(), reverse=False))

for r in range(num_rows):
    for c in range(num_cols):
       
        facet = r * num_cols + c
        dset = fig_tradeoff_normalized.layout.annotations[facet]['text']

        x_line = (
            results[results["dataset"] == dset]["metric"].max()
            - metric_threshold
        )
        
 # Scatter plot for best tradeoff points with a cross
        best_tradeoff_points = best_metric_dataset_model[
            (best_metric_dataset_model["dataset"] == dset)
            & (best_metric_dataset_model["best_tradeoff"] == True)
        ]
        
        best_tradeoff_model = best_tradeoff_points.model.iloc[0]
        
        # print(f"Facet {facet}: {dset} - {best_tradeoff_model}. Max: {results[results['dataset'] == dset]['metric'].max()}, line: {x_line}")
        
        fig_tradeoff_normalized.add_trace(
            go.Scatter(
                x=best_tradeoff_points["metric"],
                y=best_tradeoff_points["normalized_model_size_ratio"],
                mode="markers",
                marker=dict(
                    size=10,
                    symbol="x",
                    color=colors[best_tradeoff_model]
                ),
                showlegend=False,  # To not duplicate in the legend
            ),
            row=r + 1,
            col=c + 1,
        )
        
        fig_tradeoff_normalized.add_vline(
            x=x_line, line_dash="dot", row=r + 1, col=c + 1, line_width=1
        )

# Display the plot
fig_tradeoff_normalized.show()

write_figure("performance_size_tradeoff_normalized.pdf", fig_tradeoff_normalized)

In [None]:
best_metric_dataset_model[
    best_metric_dataset_model.best_tradeoff == True
][["dataset", "model", "config_name", "metric", "model_size", "model_size_ratio"]]

In [None]:
best_metric_dataset_model[best_metric_dataset_model ["dataset"] == "iris"].dropna(axis=1)[["model", "metric", "model_size", "model_size_ratio", "normalized_model_size_ratio"]]

In [None]:
best_metric_dataset_model[["dataset", "model", "config_name", "metric", "model_size", "model_size_ratio"]]

In [None]:
best_metric_dataset_model[
    (best_metric_dataset_model.best_tradeoff == True) & (best_metric_dataset_model.model == "wisard")
]["model_size_ratio"]

In [None]:
encoder_info = best_metric_dataset_model[best_metric_dataset_model.model == "wisard"]
encoder_info = encoder_info[["dataset", "encoder", "resolution", "tuple_size", "bleach"]].reset_index(drop=True)
encoder_info.rename(columns={"encoder": "Encoder", "resolution": "Resolution", "tuple_size": "Tuple Size", "bleach": "Bleach"}, inplace=True)
encoder_info["Encoder"] = encoder_info["Encoder"].apply(lambda x: "Distributive Thermometer" if x == "distributive-thermometer" else "Thermometer")
encoder_info["Resolution"] = encoder_info["Resolution"].astype(int)
encoder_info["Tuple Size"] = encoder_info["Tuple Size"].astype(int)
encoder_info["Bleach"] = encoder_info["Bleach"].astype(int)
encoder_info

In [None]:
latex_str = encoder_info.to_latex(
    index=False,
    escape=True,
    caption="Parameters used for each experiment",
    label="tab:experiment-parameters",
    float_format="%.2f",
)
write_latex_table("experiment_parameters.tex", latex_str)

## 4. Select the wisard with best bloom filter

Here we show that, costing up to 1% of performance of the best dict-wisard, a 
space-efficient bloom filter achieves the best results.

In [None]:
# Up to 1% of accuracy loss
metric_threshold = 0.01

In [None]:
results = base_results.copy()
best_results = best_metric_dataset_model.copy()

results = results[results["model"] == "wisard"]
best_results = best_results[best_results["model"] == "wisard"]

results["bloom-filter"] = results["config_name"].apply(lambda x: x.split(" ")[0])
best_results["bloom-filter"] = best_results["config_name"].apply(lambda x: x.split(" ")[0])

In [None]:
temp = []


for dset_name, dset_df in results.groupby("dataset"):
    best_model = best_results[best_results["dataset"] == dset_name].iloc[0]
    # print(f"*** Dataset: {dset_name} with metric: {best_model['metric']} and model size: {int(best_model['model_size'])}")
    bests = dset_df[dset_df["metric"] >= best_model["metric"] - metric_threshold]
    bests["metric_improvement"] = bests["metric"]/ best_model["metric"]
    bests["model_size_improvement"] = bests["model_size"]/ best_model["model_size"]
    bests["best_metric"]  = best_model["metric"]
    bests["best_model_size"]  = best_model["model_size"]
    temp.append(bests)
    
results = pd.concat(temp, ignore_index=True)
results.sample(n=3)

In [None]:
best_results = results.loc[results.groupby("dataset")["model_size_improvement"].idxmin()]
best_results["bloom-filter"].value_counts()

In [None]:
best_results_table = best_results.sort_values(by=["dataset"])[["dataset", "config_name", "model_size_improvement"]]

best_results_table.rename(
    columns={
        "dataset": "Dataset",
        "config_name": "Bloom Filter",
        "model_size_improvement": "Size Ratio",
    },
    inplace=True,
)

best_results_table

In [None]:
latex_str = best_results_table.to_latex(
    index=False,
    escape=True,
    caption="Best Bloom Filter configuration for each dataset",
    label="tab:best_bloom_filter",
    float_format="%.2f",
)

write_latex_table("best_bloom_filter.tex", latex_str)

In [None]:
best_results_table = best_results.sort_values(by=["dataset"])[["dataset", "bloom-filter", "config_name", "model_size_improvement"]]
best_results_table.groupby("bloom-filter")["model_size_improvement"].agg(["mean", "std", "count"])

# 5. Select the wisard with best aggregated bloom filter

Here we show that, costing up to 1% of performance of the best dict-wisard, a 
space-efficient bloom filter achieves the best results.

In [None]:
# Up to 2% of accuracy loss
metric_threshold = 0.01

In [None]:
results = base_results.copy()
best_results = best_metric_dataset_model.copy()

results = results[results["model"] == "wisard"]
best_results = best_results[best_results["model"] == "wisard"]

results["bloom-filter"] = results["config_name"].apply(lambda x: x.split(" ")[0])
best_results["bloom-filter"] = best_results["config_name"].apply(lambda x: x.split(" ")[0])

# Filter: only Dict, CountingBloomFilter and CountMinSketch
results = results[results["bloom-filter"].isin(["Dict", "CountingBloomFilter", "CountMinSketch"])]
best_results = best_results[best_results["bloom-filter"].isin(["Dict", "CountingBloomFilter", "CountMinSketch"])]

In [None]:
temp = []


for dset_name, dset_df in results.groupby("dataset"):
    best_model = best_results[best_results["dataset"] == dset_name].iloc[0]
    # print(f"*** Dataset: {dset_name} with metric: {best_model['metric']} and model size: {int(best_model['model_size'])}")
    bests = dset_df[dset_df["metric"] >= best_model["metric"] - metric_threshold]
    bests["metric_improvement"] = bests["metric"]/ best_model["metric"]
    bests["model_size_improvement"] = bests["model_size"]/ best_model["model_size"]
    bests["best_metric"]  = best_model["metric"]
    bests["best_model_size"]  = best_model["model_size"]
    temp.append(bests)
    
results = pd.concat(temp, ignore_index=True)
results.sample(n=3)

In [None]:
best_results = results.loc[results.groupby("dataset")["model_size_improvement"].idxmin()]
best_results["bloom-filter"].value_counts()

In [None]:
best_results_table = best_results.sort_values(by=["dataset"])[["dataset", "config_name", "model_size_improvement"]]

best_results_table.rename(
    columns={
        "dataset": "Dataset",
        "config_name": "Bloom Filter Configuration",
        "model_size_improvement": "Size Ratio",
    },
    inplace=True,
)

best_results_table

In [None]:
latex_str = best_results_table.to_latex(
    index=False,
    escape=True,
    caption="Best Bloom Filter configuration for each dataset",
    label="tab:best_bloom_filter_agg",
    float_format="%.2f",
)

write_latex_table("best_bloom_filter_agg.tex", latex_str)

In [None]:
best_results_table = best_results.sort_values(by=["dataset"])[["dataset", "bloom-filter", "config_name", "model_size_improvement"]]
best_results_table.groupby("bloom-filter")["model_size_improvement"].agg(["mean", "std", "count"])