In [1]:
from utils import *
import pandas as pd 
import numpy as np 

In [2]:
def get_resetter_params(row):
    if "_" in row:
        return float(row.split("_")[-1])
    else:
        return

In [3]:
df = pd.read_csv("../reports/detector_comparison/v5.csv.zip")
df["accuracy"] *= 100
df["reset_lr"] = df["resetter"].apply(get_resetter_params)
df["resetter"] = df["resetter"].apply(lambda x: x.split("_")[0])

In [4]:
df_mean = agg_dataframe(
    df[df['resetter'] == "P-KSWIN"],
    cols_to_keep=["dataset", "gamma", "base_lr", "reset_lr"],
)
df_best = get_best_params(
    df_mean,
    params=["gamma", "base_lr", "reset_lr"],
    fixed_params=["dataset"],
    metric="accuracy_mean",
)
df_best

Unnamed: 0,dataset,gamma,base_lr,reset_lr,seed_mean,seed_std,Unnamed: 0_mean,Unnamed: 0_std,batch_size_mean,batch_size_std,...,step_mean,step_std,runtime_mean,runtime_std,accuracy_mean,accuracy_std,lr_mean,lr_std,loss_mean,loss_std
0,Covertype,0.999939,0.25,1.0,2.0,1.581139,3242867.0,1719.839958,4.0,0.0,...,50050.0,0.0,0.012621,0.000256,83.529,0.181641,0.249947,1.26749e-06,0.108469,0.001479
1,Electricity,0.999756,0.03125,0.5,2.0,1.581139,2660799.0,754.397468,4.0,0.0,...,22700.0,0.0,0.012303,9.4e-05,73.9117,0.429636,0.031212,8.075448e-07,0.523307,0.004602
2,Insects abrupt,0.999756,0.25,1.0,2.0,1.581139,1200590.0,827.5685,4.0,0.0,...,26450.0,0.0,0.012201,0.00027,71.72803,0.125727,0.249698,7.877833e-06,0.211204,0.000927
3,Insects gradual,0.999939,0.5,0.5,2.0,1.581139,784450.5,404.438885,4.0,0.0,...,12100.0,0.0,0.013571,0.001452,75.420747,0.154327,0.499724,4.647804e-06,0.180773,0.000902
4,Insects incr.,0.999878,0.25,0.125,2.0,1.581139,1877898.0,951.521713,4.0,0.0,...,28550.0,0.0,0.012233,0.000184,60.589474,0.223565,0.249366,3.354569e-06,0.268621,0.000979
5,RBF abrupt,0.999756,0.25,0.5,2.0,1.581139,75533.12,311.641311,4.0,0.0,...,10050.0,0.0,0.012177,9.1e-05,94.984,0.258732,0.249435,2.325629e-05,0.066471,0.002655
6,RBF incr.,0.999939,0.25,0.0625,2.0,1.581139,326487.9,350.080904,4.0,0.0,...,10050.0,0.0,0.012242,0.000116,73.724,1.418571,0.249396,1.659723e-05,0.265326,0.008012
7,SEA,0.999756,0.03125,0.5,2.0,1.581139,627493.7,321.999271,4.0,0.0,...,10050.0,0.0,0.012023,0.000126,87.524,6.159758,0.031211,1.010644e-06,0.286425,0.143804


In [5]:
df = df[(df["resetter"] != "P-KSWIN") | (df["reset_lr"] == 0.5)]
df = df[df["gamma"] == 1 - 2**-12]
df = df[["dataset", "resetter", "accuracy", 'base_lr', 'seed']]

In [6]:
df['resetter'].unique()

array(['P-KSWIN', 'ADWIN', 'KSWIN', 'OneTailedADWIN', 'Ground Truth',
       'ADWIN Weight Reset'], dtype=object)

In [7]:
# Define which data to show in table
datasets = [
    "RBF abrupt",
    "RBF incr.",
    "Covertype",
    "Insects abrupt",
    "Electricity",
]
ks = [1, 3]
columns = [(dataset, "accuracy") for dataset in datasets]
for k in ks:
    df = df[df["dataset"].isin(datasets)]
    table = get_topk_results(
        df,
        other_variables=["dataset", "resetter"],
        top_variable="base_lr",
        k=k,
    )
    table = table[["dataset", "resetter", "accuracy_mean", "accuracy_std"]]
    table = col_to_header(table, "dataset", index=["resetter"])
    mask_best = get_best_within_std(
        table, params_fixed=[], metrics=columns, modes="max"
    )
    table = merge_mean_std(table, columns, formats="{:.2f}")
    style = table.style
    style = apply_style_attribute(style, mask=mask_best)
    style.to_latex(f"../pub/tables/lr_resetting_top{k}.tex", hrules=True)

In [8]:
style

Unnamed: 0_level_0,RBF abrupt,RBF incr.,Covertype,Insects abrupt,Electricity
Unnamed: 0_level_1,accuracy,accuracy,accuracy,accuracy,accuracy
resetter,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
ADWIN,94.28±.37,69.76±.92,82.70±.27,71.27±.14,73.05±.45
ADWIN Weight Reset,66.24±.55,61.12±.38,81.71±.06,47.87±.34,69.06±.90
Ground Truth,94.58±.27,nan±nan,nan±nan,71.43±.13,nan±nan
KSWIN,94.23±.55,70.10±1.98,83.01±.06,71.38±.16,73.13±.31
OneTailedADWIN,94.25±.38,70.22±2.58,82.64±.20,71.25±.15,73.00±.50
P-KSWIN,93.86±.48,70.30±1.07,83.03±.24,71.17±.12,73.23±.49
