In [1]:
from pathlib import Path

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import json
import plotly.graph_objects as go
import plotly.express as px
from collections import defaultdict
from typing import Union

%matplotlib inline

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
matplotlib.rcParams["font.family"] = "Times New Roman"

In [3]:
datasets = [
    "breast_cancer",
    "glass",
    "iris",
    "letter",
    "mnist",
    "motion_sense",
    "satimage",
    "segment",
    "vehicle",
    "wine"
]

datasets_sizes = {
    "breast_cancer": 144,
    "glass": 24,
    "iris": 12,
    "letter": 2664,
    "mnist": 53672,
    "motion_sense": 12512,
    "satimage": 1864,
    "segment": 368,
    "vehicle": 132,
    "wine": 24
}

dataset_classes = {
    "breast_cancer": 2,
    "glass": 6,
    "iris": 3,
    "letter": 26,
    "mnist": 10,
    "motion_sense": 6,
    "satimage": 6,
    "segment": 7,
    "vehicle": 3,
    "wine": 3
}

dataset_train_samples = {
    "breast_cancer": 398,
    "glass": 149,
    "iris": 105,
    "letter": 1400,
    "mnist": 60000,
    "motion_sense": 3414,
    "satimage": 4501,
    "segment": 1617,
    "vehicle": 676,
    "wine": 124 
}

dataset_test_samples = {
    "breast_cancer": 171,
    "glass": 65,
    "iris": 45,
    "letter": 6000,
    "mnist": 10000,
    "motion_sense": 1020,
    "satimage": 1929,
    "segment": 693,
    "vehicle": 170,
    "wine": 54 
}

dataset_balanced = {
    "breast_cancer": "no",
    "glass": "no",
    "iris": "yes",
    "letter": "yes",
    "mnist": "yes",
    "motion_sense": "yes",
    "satimage": "no",
    "segment": "yes",
    "vehicle": "yes",
    "wine": "no"   
}

metrics_to_use = {
    "breast_cancer": "f1 weighted",
    "glass": "f1 weighted",
    "iris": "accuracy",
    "letter": "accuracy",
    "mnist": "accuracy",
    "motion_sense": "accuracy",
    "satimage": "f1 weighted",
    "segment": "accuracy",
    "vehicle": "accuracy",
    "wine": "f1 weighted"
}


dfs = [
    pd.read_csv(exp)
    for dset in datasets
    for exp in Path(f"{dset} experiment").glob("*.csv")
]

df = pd.concat(dfs)
df.replace({"mnist-dist-16": "mnist"}, inplace=True)
df
# d = json.loads(df.iloc[0]["encoder kwargs"])

Unnamed: 0,bleach,accuracy,f1 weighted,f1 macro,f1 micro,ties,run,train time,predict time,ram name,...,encoder kwargs,experiment name,model size,train samples,test samples,classes,rams per discriminator,discriminators,seed,indices
0,2,0.918129,0.918781,0.914107,0.918129,18,1,1.812207,0.481705,DictRam,...,"{""resolution"": 16}",breast_cancer experiment,9624,398,171,2,30,2,1670370805,480
1,5,0.929825,0.929825,0.925088,0.929825,7,1,1.812207,0.338999,DictRam,...,"{""resolution"": 16}",breast_cancer experiment,9624,398,171,2,30,2,1670370805,480
2,10,0.947368,0.947101,0.943263,0.947368,3,1,1.812207,0.338749,DictRam,...,"{""resolution"": 16}",breast_cancer experiment,9624,398,171,2,30,2,1670370805,480
3,2,0.929825,0.930384,0.926378,0.929825,26,2,0.497607,0.338319,DictRam,...,"{""resolution"": 16}",breast_cancer experiment,9720,398,171,2,30,2,1670370808,480
4,5,0.941520,0.941694,0.937954,0.941520,13,2,0.497607,0.339714,DictRam,...,"{""resolution"": 16}",breast_cancer experiment,9720,398,171,2,30,2,1670370808,480
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
430,2,0.814815,0.813095,0.819048,0.814815,8,3,0.071017,0.085170,stream-threshold,...,"{""resolution"": 16}",wine experiment,117624,124,54,3,13,3,1670371352,208
431,5,0.870370,0.869781,0.873519,0.870370,3,3,0.071017,0.087331,stream-threshold,...,"{""resolution"": 16}",wine experiment,117624,124,54,3,13,3,1670371352,208
432,8,0.888889,0.888060,0.892690,0.888889,6,3,0.071017,0.086613,stream-threshold,...,"{""resolution"": 16}",wine experiment,117624,124,54,3,13,3,1670371352,208
433,10,0.814815,0.812150,0.816460,0.814815,5,3,0.071017,0.081598,stream-threshold,...,"{""resolution"": 16}",wine experiment,117624,124,54,3,13,3,1670371352,208


In [8]:
meta = defaultdict(dict)
for dset in datasets:
    meta[dset]["size (KB)"] = datasets_sizes[dset]
    meta[dset]["classes"] = dataset_classes[dset]
    meta[dset]["train samples"] = dataset_train_samples[dset]
    meta[dset]["test samples"] = dataset_test_samples[dset]
    meta[dset]["balanced"] = dataset_balanced[dset]

meta_df = pd.DataFrame(meta).T
meta_df

Unnamed: 0,size (KB),classes,train samples,test samples,balanced
breast_cancer,144,2,398,171,no
glass,24,6,149,65,no
iris,12,3,105,45,yes
letter,2664,26,1400,6000,yes
mnist,53672,10,60000,10000,yes
motion_sense,12512,6,3414,1020,yes
satimage,1864,6,4501,1929,no
segment,368,7,1617,693,yes
vehicle,132,3,676,170,yes
wine,24,3,124,54,no


In [9]:
latex_tables_path = Path("tables")
figures_path = Path("figures")

def write_figure(filename: str, fig: go.Figure, path: Union[Path, str] = figures_path):
    """Write a Figure to a file.

    Parameters
    ----------
    filename : str
        The name of the file to write to.
    fig : go.Figure
        The plotly figure object.
    path : Union[Path, str], optional
        The path where the file will be stored, by default figures_path
    """
    path = Path(path)
    path.mkdir(exist_ok=True, parents=True)
    fname = path/filename
    fig.write_image(fname)
    print(f"Figure written to: {fname}")
    print(f"Filename   :", filename)
    print(f"Latex label:", filename.replace(".pdf",""))
    
def write_latex_table(filename: str, table: str, path: Union[Path, str] = latex_tables_path):
    """Write a latex table to a file.

    Parameters
    ----------
    filename : str
        The name of the file to write to.
    table : str
        The table, as a string.
    path : Union[Path, str], optional
        The path where the file will be stored, by default latex_tables_path
    """
    path = Path(path)
    path.mkdir(exist_ok=True, parents=True)
    fname = path/filename
    with fname.open("w") as f:
        f.write("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n")
        f.write("%% WARNING: DO NOT CHANGE THIS FILE. IT IS GENERATED AUTOMATICALLY %\n")
        f.write("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n")
        f.write(table)
    print(f"Table written to: {fname}")



In [10]:
df = df[df["dataset name"] != "segment"]
df = df[df["ram kwargs"] != '{"est_elements": 1000, "false_positive_rate": 0.05}']
df = df[df["ram kwargs"] != '{"width": 1000, "depth": 5}']
df = df[df["ram kwargs"] != '{"width": 1000, "depth": 3}']
df


Unnamed: 0,bleach,accuracy,f1 weighted,f1 macro,f1 micro,ties,run,train time,predict time,ram name,...,encoder kwargs,experiment name,model size,train samples,test samples,classes,rams per discriminator,discriminators,seed,indices
0,2,0.918129,0.918781,0.914107,0.918129,18,1,1.812207,0.481705,DictRam,...,"{""resolution"": 16}",breast_cancer experiment,9624,398,171,2,30,2,1670370805,480
1,5,0.929825,0.929825,0.925088,0.929825,7,1,1.812207,0.338999,DictRam,...,"{""resolution"": 16}",breast_cancer experiment,9624,398,171,2,30,2,1670370805,480
2,10,0.947368,0.947101,0.943263,0.947368,3,1,1.812207,0.338749,DictRam,...,"{""resolution"": 16}",breast_cancer experiment,9624,398,171,2,30,2,1670370805,480
3,2,0.929825,0.930384,0.926378,0.929825,26,2,0.497607,0.338319,DictRam,...,"{""resolution"": 16}",breast_cancer experiment,9720,398,171,2,30,2,1670370808,480
4,5,0.941520,0.941694,0.937954,0.941520,13,2,0.497607,0.339714,DictRam,...,"{""resolution"": 16}",breast_cancer experiment,9720,398,171,2,30,2,1670370808,480
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
430,2,0.814815,0.813095,0.819048,0.814815,8,3,0.071017,0.085170,stream-threshold,...,"{""resolution"": 16}",wine experiment,117624,124,54,3,13,3,1670371352,208
431,5,0.870370,0.869781,0.873519,0.870370,3,3,0.071017,0.087331,stream-threshold,...,"{""resolution"": 16}",wine experiment,117624,124,54,3,13,3,1670371352,208
432,8,0.888889,0.888060,0.892690,0.888889,6,3,0.071017,0.086613,stream-threshold,...,"{""resolution"": 16}",wine experiment,117624,124,54,3,13,3,1670371352,208
433,10,0.814815,0.812150,0.816460,0.814815,5,3,0.071017,0.081598,stream-threshold,...,"{""resolution"": 16}",wine experiment,117624,124,54,3,13,3,1670371352,208


In [50]:
rename_elements = {
    'DictRam {}': "Dict-WiSARD",
    'count-bloom {"est_elements": 100, "false_positive_rate": 0.02}': 'CB FPR=0.02',
    'count-bloom {"est_elements": 100, "false_positive_rate": 0.05}': 'CB FPR=0.05',
    'count-bloom {"est_elements": 100, "false_positive_rate": 0.08}': 'CB FPR=0.08',
    'count-min-sketch {"width": 20, "depth": 2}': 'CMS W=20 D=2',
    'count-min-sketch {"width": 20, "depth": 3}': 'CMS W=20 D=3',
    'count-min-sketch {"width": 50, "depth": 2}': 'CMS W=50 D=2',
    'count-min-sketch {"width": 50, "depth": 3}': 'CMS W=50 D=3',
    'count-min-sketch {"width": 100, "depth": 3}': 'CMS W=100 D=3',
    'count-min-sketch {"width": 100, "depth": 5}': 'CMS W=100 D=5',
    'count-min-sketch {"width": 500, "depth": 3}': 'CMS W=500 D=3',
    'count-min-sketch {"width": 500, "depth": 5}': 'CMS W=500 D=5',
    'count-min-sketch {"width": 1000, "depth": 3}': 'CMS W=1000 D=3',
    'count-min-sketch {"width": 1000, "depth": 5}': 'CMS W=1000 D=5',
}

element_order = [
    "Dict-WiSARD",
    'CB FPR=0.02',
    'CB FPR=0.05',
    'CB FPR=0.08',
    'CMS W=20 D=2',
    'CMS W=20 D=3',
    'CMS W=50 D=2',
    'CMS W=50 D=3',
    'CMS W=100 D=3',
    'CMS W=100 D=5',
    'CMS W=500 D=3',
    'CMS W=500 D=5',
    'CMS W=1000 D=3',
    'CMS W=1000 D=5'
]

datasets_order = [
    "iris",
    "glass",
    "wine",
    "vehicle",
    "breast_cancer",
    # "segment (368)",
    "satimage",
    'letter',
    'motion_sense',
    'mnist'
]

In [51]:
def get_best(df: pd.DataFrame, metric: str = None, improvement_col: str = "DictRam {}", valid_configs: tuple = ("DictRam", "count-min-sketch", "count-bloom")):
    if valid_configs is None:
        valid_configs = df["ram name"].unique()
    
    configs = list(k for k, _ in df[df["ram name"].isin(valid_configs)].groupby(["ram name", "ram kwargs"]))
    datasets = list(df["dataset name"].unique())

    d = defaultdict(dict)
    for c in configs:
        c_str = f"{c[0]} {c[1]}"
        for dset in datasets:
            metric_to_use = metrics_to_use[dset]
            x = df.loc[(df["ram name"] == c[0]) & (df["ram kwargs"] == c[1]) & (df["dataset name"] == dset)]
            best = x.sort_values(by=metric_to_use, ascending=False).iloc[0]
            if metric is None:
                metric = metric_to_use
            d[c_str][dset] = best[metric]
            
    d = pd.DataFrame(d)
    if improvement_col is not None:
        r = d[improvement_col]
        for c in d.columns:
            d[c] = d[c] / r
                
    return pd.DataFrame(d)

def do_rename_and_reorder(d, remove_dict: bool = False):
    d = d.rename(columns=rename_elements)
    if remove_dict:
        d = d[element_order[1:]]
    else:
        d = d[element_order]
    d = d.T
    # d = d.rename(columns=datasets_rename)
    d = d[datasets_order]
    return d


In [52]:

d = get_best(df, metric=None, improvement_col="DictRam {}", )
d = do_rename_and_reorder(d, remove_dict=False)
d["average"] = d.mean(axis=1)

d_m = get_best(df, metric="model size", improvement_col="DictRam {}")
d_m = do_rename_and_reorder(d_m, remove_dict=False)
d_m["average"] = d_m.mean(axis=1)


# Get the accuracy improvement data
accuracy_improvement = d["average"]

# Get the model size improvement data
model_size_improvement = d_m["average"]

# # Plot the graph
# plt.figure(figsize=(8, 6))
# plt.scatter(model_size_improvement, accuracy_improvement)
# plt.xlabel("Model Size Improvement")
# plt.ylabel("Accuracy Improvement")
# plt.title("Accuracy Improvement vs Model Size Improvement")
# plt.grid(True)
# plt.show()


KeyError: "['CMS W=1000 D=3', 'CMS W=1000 D=5'] not in index"

In [53]:
d = get_best(df, metric=None, improvement_col="DictRam {}", valid_configs=None)
d

Unnamed: 0,DictRam {},"count-bloom {""est_elements"": 100, ""false_positive_rate"": 0.02}","count-bloom {""est_elements"": 100, ""false_positive_rate"": 0.05}","count-bloom {""est_elements"": 100, ""false_positive_rate"": 0.08}","count-cuckoo {""capacity"": 100, ""bucket_size"": 2}","count-cuckoo {""capacity"": 100, ""bucket_size"": 4}","count-cuckoo {""capacity"": 1000, ""bucket_size"": 2}","count-cuckoo {""capacity"": 1000, ""bucket_size"": 4}","count-cuckoo {""capacity"": 500, ""bucket_size"": 2}","count-cuckoo {""capacity"": 500, ""bucket_size"": 4}",...,"stream-threshold {""threshold"": 100, ""width"": 1000, ""depth"": 3}","stream-threshold {""threshold"": 100, ""width"": 1000, ""depth"": 5}","stream-threshold {""threshold"": 100, ""width"": 250, ""depth"": 3}","stream-threshold {""threshold"": 100, ""width"": 250, ""depth"": 5}","stream-threshold {""threshold"": 50, ""width"": 100, ""depth"": 2}","stream-threshold {""threshold"": 50, ""width"": 100, ""depth"": 3}","stream-threshold {""threshold"": 50, ""width"": 1000, ""depth"": 3}","stream-threshold {""threshold"": 50, ""width"": 1000, ""depth"": 5}","stream-threshold {""threshold"": 50, ""width"": 250, ""depth"": 3}","stream-threshold {""threshold"": 50, ""width"": 250, ""depth"": 5}"
breast_cancer,1.0,1.011882,0.993383,0.999476,1.012289,0.999476,1.000139,0.987711,1.012168,0.999669,...,0.987307,1.006077,1.00593,1.018273,0.98788,0.999669,1.005389,0.993771,0.999669,1.005587
glass,1.0,1.003155,1.039505,1.046843,1.064129,1.042679,1.042679,1.025175,1.036127,1.047019,...,1.089614,1.006471,0.983756,1.105478,0.994586,1.023018,1.090723,1.025831,1.023018,1.024269
iris,1.0,1.0,1.0,1.023442,1.0,1.046729,1.046729,1.046729,0.976869,1.046729,...,1.023442,1.0,1.023442,1.023442,1.023442,1.023442,1.023442,1.023442,1.023442,1.023442
letter,1.0,0.990913,0.992477,0.999775,0.990275,0.998459,1.011001,0.985575,0.998119,0.992644,...,1.004497,0.990774,0.990189,1.005405,0.974987,0.994633,1.000403,0.995133,0.99689,0.984613
mnist,1.0,1.001253,1.000044,0.999044,1.003365,0.999815,1.000937,0.999887,1.000787,1.000478,...,1.00096,1.000465,1.00361,1.000926,0.995367,0.996819,0.999809,0.999678,0.998609,1.000862
motion_sense,1.0,0.987127,0.99423,0.997931,0.998677,0.980293,0.98993,0.975933,0.981319,0.97321,...,0.985413,0.997352,1.024057,1.004846,0.94962,0.953756,0.986481,1.006636,0.991765,1.006696
satimage,1.0,1.001291,1.00349,1.00164,1.008746,1.005668,1.004431,0.999344,1.000881,0.993955,...,1.012661,1.008101,0.999889,0.998847,0.996716,1.005526,1.002861,1.001441,1.004683,1.00069
vehicle,1.0,1.0808,1.0808,1.133835,1.069535,1.016451,1.107782,0.958017,1.070537,1.096035,...,1.009498,1.04107,1.15683,0.994764,1.046574,1.085012,1.15683,1.008552,1.079267,1.10325
wine,1.0,1.00066,0.981003,1.00066,0.981428,0.981493,0.961553,1.038868,0.98113,0.981493,...,0.943464,0.962363,0.981,0.980256,0.94092,0.962508,0.981,0.980256,0.962508,0.980256


In [41]:
model_size_improvement

Dict-WiSARD       1.000000
CB FPR=0.02      19.669741
CB FPR=0.05      15.088130
CB FPR=0.08      14.449569
CMS W=20 D=2      1.197328
CMS W=20 D=3      1.741568
CMS W=50 D=2      2.830047
CMS W=50 D=3      4.190647
CMS W=100 D=3     7.292196
CMS W=100 D=5    12.089694
Name: average, dtype: float64

In [42]:
1/model_size_improvement

Dict-WiSARD      1.000000
CB FPR=0.02      0.050840
CB FPR=0.05      0.066277
CB FPR=0.08      0.069206
CMS W=20 D=2     0.835193
CMS W=20 D=3     0.574195
CMS W=50 D=2     0.353351
CMS W=50 D=3     0.238627
CMS W=100 D=3    0.137133
CMS W=100 D=5    0.082715
Name: average, dtype: float64

In [32]:
d_m = get_best(df, metric="model size", improvement_col="DictRam {}")
d_m = do_rename_and_reorder(d_m, remove_dict=False)
d_m["average"] = d_m.mean(axis=1)
d_m = d_m.T

d_m

Unnamed: 0,Dict-WiSARD,CB FPR=0.02,CB FPR=0.05,CB FPR=0.08,CMS W=20 D=2,CMS W=20 D=3,CMS W=50 D=2,CMS W=50 D=3,CMS W=100 D=3,CMS W=100 D=5
iris,1.0,45.852749,35.172414,29.692451,2.460391,3.578751,5.815471,8.61137,16.999068,28.182665
glass,1.0,14.278114,10.952358,24.655865,2.043047,2.971705,4.829021,7.150665,5.293349,8.775816
wine,1.0,41.184804,31.591758,26.669672,2.209916,3.214424,5.223439,7.734707,15.268513,25.313587
vehicle,1.0,31.810345,24.400862,20.599138,1.706897,2.482759,4.034483,5.974138,11.793103,19.551724
breast_cancer,1.0,20.551378,15.764411,13.308271,1.102757,1.60401,2.606516,3.859649,7.619048,12.631579
satimage,1.0,6.547209,5.022188,4.239717,0.351314,0.511002,0.830378,1.229598,2.427258,4.024138
letter,1.0,9.047256,6.939907,5.85865,0.485463,0.706127,1.147457,1.699119,3.354105,5.560752
motion_sense,1.0,4.890643,3.751481,3.166989,0.262425,0.381709,0.620277,0.918487,1.813116,3.005956
mnist,1.0,2.865168,2.197793,1.855371,0.153741,0.223623,0.363387,0.538092,1.062208,1.76103
average,1.0,19.669741,15.08813,14.449569,1.197328,1.741568,2.830047,4.190647,7.292196,12.089694


In [29]:
d = get_best(df, metric=None, improvement_col="DictRam {}")
d

Unnamed: 0,DictRam {},"count-bloom {""est_elements"": 100, ""false_positive_rate"": 0.02}","count-bloom {""est_elements"": 100, ""false_positive_rate"": 0.05}","count-bloom {""est_elements"": 100, ""false_positive_rate"": 0.08}","count-min-sketch {""width"": 100, ""depth"": 3}","count-min-sketch {""width"": 100, ""depth"": 5}","count-min-sketch {""width"": 20, ""depth"": 2}","count-min-sketch {""width"": 20, ""depth"": 3}","count-min-sketch {""width"": 50, ""depth"": 2}","count-min-sketch {""width"": 50, ""depth"": 3}","count-min-sketch {""width"": 500, ""depth"": 3}","count-min-sketch {""width"": 500, ""depth"": 5}"
breast_cancer,1.0,1.011882,0.993383,0.999476,0.999264,1.012032,0.98752,0.975622,0.969388,0.993771,0.993383,1.018383
glass,1.0,1.003155,1.039505,1.046843,1.051983,1.070902,1.002355,1.006372,1.000554,1.036606,1.081003,1.070902
iris,1.0,1.0,1.0,1.023442,1.046729,1.046729,1.000208,1.000208,1.000208,1.000208,1.023442,1.046729
letter,1.0,0.990913,0.992477,0.999775,1.000607,0.99523,0.850384,0.908937,0.962972,0.971739,0.98291,0.99825
mnist,1.0,1.001253,1.000044,0.999044,0.995224,0.998697,0.907175,0.918543,0.976026,0.982934,0.999895,0.997876
motion_sense,1.0,0.987127,0.99423,0.997931,0.993908,0.970239,0.696629,0.660171,0.986759,0.949648,0.991756,0.981736
satimage,1.0,1.001291,1.00349,1.00164,0.999394,0.993321,0.957823,0.966275,0.994027,1.006444,1.000878,1.006498
vehicle,1.0,1.0808,1.0808,1.133835,1.075592,1.079648,1.063251,1.118284,1.120864,0.981948,0.973554,1.117418
wine,1.0,1.00066,0.981003,1.00066,1.038868,0.980256,1.038868,0.961004,0.961004,0.961004,0.981153,0.961879


In [69]:
d = get_best(df, metric="model size", valid_configs=None)
# d = do_rename_and_reorder(d, remove_dict=False)
# d["max"] = d.max(axis=1)
# d["average"] = d.mean(axis=1)
# d["min"] = d.min(axis=1)

d = d

fig = px.imshow(
    d,
    text_auto=".2f",
    aspect="auto",
    # color_continuous_scale='RdBu_r',
    color_continuous_scale=[(0.0, "lightgreen"), (0.0025, "lightyellow"), (1.0, "red")],
    color_continuous_midpoint=1.0,
    # zmax=2,
    zmin=0,
)

# fig.update_xaxes(side="top")
fig.update_layout(
    # xaxis_title="Filter",
    # yaxis_title="Dataset",
    # width=1000,
    # height=200,
    font_family="Times New Roman", 
    font_size=12, 
    margin=dict(l=0, r=0, t=10, b=0),
    xaxis=dict(tickangle=0, tickfont = dict(size=12)),
    yaxis=dict(tickangle=0, tickfont = dict(size=12))
)

# write_figure("size_improvement.pdf", fig)

fig.show()

In [61]:
d = get_best(df, metric=None, valid_configs=None)
# d = do_rename_and_reorder(d, remove_dict=True)
d = d.T
d

dsets = list(d.index)
fig = go.Figure()
for i, c in enumerate(d.columns):
    x = dsets
    y = d[c]
    name = c
    fig.add_trace(
        go.Bar(
            x=x,
            y=y,
            # text="pop",
            # textfont="Times New Roman",
            # textposition="auto",
            name=name,
            marker_color=px.colors.qualitative.Plotly[i]
        )
    )
    
fig.add_hline(y=1, line_width=1, line_dash="dash", line_color="red")

fig.update_yaxes(range=[0.60, 1.15]) 

fig.update_layout(
    yaxis_title="Performance improvement",
    # xaxis_title="Dataset",
    # width=800,
    # height=200,
    font_family="Times New Roman", 
    font_size=12,
    margin=dict(l=0, r=0, t=10, b=0),
    xaxis=dict(tickfont = dict(size=12))
    # legend_title_text="Filter"
)

# write_figure("accuracy_improvement.pdf", fig)
    
fig.show()

In [19]:
d = get_best(df, metric=None, improvement_col=None)
d = do_rename_and_reorder(d, remove_dict=False)
d = d.T

d_m = get_best(df, metric="model size", improvement_col=None)
d_m = do_rename_and_reorder(d_m, remove_dict=False)
d_m = d_m.T


acc_meta = meta_df.merge(d[["Dict-WiSARD"]], left_index=True, right_index=True)
acc_meta = acc_meta.rename(columns={"Dict-WiSARD": "Score"})
acc_meta = acc_meta.merge(d_m[["Dict-WiSARD"]]//1e3, left_index=True, right_index=True)
acc_meta = acc_meta.rename(columns={"Dict-WiSARD": "Memory Footprint (Kb)"})
acc_meta = acc_meta.reset_index().rename(columns={"index": "dataset"})
acc_meta = acc_meta.rename(columns={
    "dataset": "Dataset",
    "size (KB)": "Size (KB)",
    "classes": "Classes",
    "train samples": "#Train",
    "test samples": "#Test",
    "balanced": "Balanced",
})
latex_str = acc_meta.to_latex(
    float_format="%.2f",
    index=False,
    caption="Description of the datasets used in the experiments and the performance obtained using Dict WiSARD. For unbalanced datasets, the f1-score is reported as score, else the accuracy is reported.",
    label="tab:datasets",
)

# write_latex_table("datasets.tex", latex_str)

print(latex_str)

\begin{table}
\caption{Description of the datasets used in the experiments and the performance obtained using Dict WiSARD. For unbalanced datasets, the f1-score is reported as score, else the accuracy is reported.}
\label{tab:datasets}
\begin{tabular}{llllllrr}
\toprule
Dataset & Size (KB) & Classes & #Train & #Test & Balanced & Score & Memory Footprint (Kb) \\
\midrule
breast_cancer & 144 & 2 & 398 & 171 & no & 0.95 & 9.00 \\
glass & 24 & 6 & 149 & 65 & no & 0.64 & 12.00 \\
iris & 12 & 3 & 105 & 45 & yes & 0.96 & 1.00 \\
letter & 2664 & 26 & 1400 & 6000 & yes & 0.86 & 301.00 \\
mnist & 53672 & 10 & 60000 & 10000 & yes & 0.91 & 8975.00 \\
motion_sense & 12512 & 6 & 3414 & 1020 & yes & 0.63 & 1448.00 \\
satimage & 1864 & 6 & 4501 & 1929 & no & 0.87 & 108.00 \\
vehicle & 132 & 3 & 676 & 170 & yes & 0.69 & 5.00 \\
wine & 24 & 3 & 124 & 54 & no & 0.96 & 3.00 \\
\bottomrule
\end{tabular}
\end{table}

