In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from utils import make_topk_tables, get_scheduler_params, label_resets, agg_dataframe, get_best_params, filter_dataframe
import pandas as pd 
import numpy as np

In [3]:
df1 = pd.read_csv("../reports/schedule/v5.csv.zip")

In [7]:
dfn = df1[(df1["drift_confidence"] == 0)]
df2 = pd.read_csv("../reports/schedule/new_reset.csv.zip")
pd.concat([dfn, df2]).to_csv("../reports/schedule/v6.csv.zip", index=False)

In [8]:
# Load results for LR schedulers
df_schedulers = pd.read_csv("../reports/schedule/v6.csv.zip")
get_scheduler_params(df_schedulers)
df_schedulers.loc[df_schedulers["gamma"] == 1.0, "schedule"] = "Fixed"
df_schedulers["schedule"] = df_schedulers.apply(label_resets, axis=1)

# Load results for scheduler-free training
df_optims = pd.read_csv("../reports/optimizers/v7.csv.zip")
df_optims["drift_confidence"] = 0
df_optims = df_optims.query('optimizer != "COCOB" or base_lr == 100')
df = pd.concat([df_schedulers, df_optims])
df = df.fillna(1)

In [10]:
df['accuracy'] *= 100

In [11]:
df_mean = agg_dataframe(
    df,
    cols_to_keep=[
        "dataset",
        "optimizer",
        "schedule",
        "gamma",
        "maxlr",
        "base_lr",
        "drift_confidence",
    ],
)
df_best = get_best_params(
    df_mean,
    params=["gamma", "maxlr", "base_lr", "drift_confidence"],
    fixed_params=["dataset", "optimizer", "schedule"],
    metric="accuracy_mean",
)

In [12]:
df_best[df_best.dataset == "Insects abrupt"].to_csv("../reports/step_sizes/best_params_insects_abrupt.csv", index=False)

In [13]:
df_best.groupby(["schedule"])['gamma'].agg(pd.Series.mode)

schedule
Cyclic                              1.0
Cyclic reset                        1.0
Exponential                    0.999878
Exponential reset              0.999939
Fixed                               1.0
Fixed reset                         1.0
Plateau                             1.0
Step                 [0.5, 0.75, 0.875]
Step reset                          0.5
Name: gamma, dtype: object

In [14]:
df_best.groupby(["schedule"])['maxlr'].agg(pd.Series.mode)

schedule
Cyclic               0.25
Cyclic reset         0.25
Exponential          1.00
Exponential reset    1.00
Fixed                1.00
Fixed reset          1.00
Plateau              1.00
Step                 1.00
Step reset           1.00
Name: maxlr, dtype: float64

In [15]:
drift_confidence = 1e-4
filter_dicts = [
    {"schedule": "Step", "gamma": 0.75},
    {"schedule": "Step reset", "gamma": 0.5, "drift_confidence": drift_confidence},
    {"schedule": "Exponential", "gamma": 1 - 2**-13}, 
    {
        "schedule": "Exponential reset",
        "gamma": 1 - 2**-12,
        "drift_confidence": drift_confidence
    },
    {"schedule": "Cyclic", "maxlr": 0.250},
    {"schedule": "Cyclic reset", "maxlr": 0.250, "drift_confidence": drift_confidence},
    {"schedule": "Fixed"},
    {"schedule": "Fixed reset"},
    {"optimizer": "COCOB", "base_lr": 100},
]
# df_filter = pd.DataFrame(filter_dicts)
df_filtered = filter_dataframe(df, filter_dicts)

In [16]:
df['schedule'].unique()

array(['Plateau', 'Exponential', 'Step', 'Cyclic', 'Fixed', 'Fixed reset',
       'Exponential reset', 'Step reset', 'Cyclic reset'], dtype=object)

In [17]:
# Define which data to show in table
datasets_all = df_filtered["dataset"].unique()
schedules_all = [
    "Fixed",
    "Fixed reset",
    "Exponential",
    "Exponential reset",
    "Step",
    "Step reset",
    "Cyclic",
    "Cyclic reset",
]
optimizers_all = [
    "SGD",
    "Adam",
    "SGDHD",
    "COCOB",
    "WNGrad",
    "DAdaptSGD",
    "DoG",
    "DDoG",
    "Mechanic",
    "DAdaptAdam",
    "DAdaptLion",
    "AdaGrad",
]
make_topk_tables(
    df_filtered, datasets_all, optimizers_all, schedules_all, ks=[1, 3], suffix="all"
)