In [27]:
import pandas as pd
import typing as t
import os

DATASETS = {
    "S-FMNIST", "S-CIFAR10", "S-CIFAR100", "S-CORe50", "SE-CIFAR100", "SE-CORe50"
}


In [28]:
# for all files in the directory

os.chdir("experiment_logs")
for filename in os.listdir("."):
    num, host, repo_hash, label, dataset, model, strategy = filename.split("_")

    if strategy == "cumulative":
        os.rename(filename, f"{num}_{host}_{repo_hash}_{label}_{dataset}_{model}_nonContinual")
os.chdir("..")

In [29]:
def load_clean_table():
    exp_data = pd.read_csv("results/all_experiments.csv")

    def repo_version_filter(df: pd.DataFrame, versions: t.Dict[str, t.Set[str]]) -> pd.DataFrame:
        """Filter a dataframe by repo version"""

        def _is_valid(row):
            if row["dataset"] in versions:
                return row["repo_hash"] in versions[row["dataset"]]
            return False
        return df[df.apply(_is_valid, axis=1)]

    def did_complete(row) -> bool:
        """Check if an experiment has completed"""
        return row["completed_tasks"] == row["n_experiences"]

    def remove_unused_parameters(row):
        """Remove unused hyperparameter from rows"""
        if not row["use_packnet"]:
            row["prune_proportion"] = ""
        elif row["prune_proportion"][0] == "[":
            row["prune_proportion"] = "ep"
        if not row["use_experience_replay"]:
            row["replay_buffer"] = ""
        return row

    exp_data = repo_version_filter(
            exp_data,
            {
                "S-FMNIST": {"1555acb6", "0a229afa", "4e7023cd", "4e7023cdD", "0a229afaD", "54dcf601"},
                "S-CIFAR10": {"1555acb6", "0a229afa", "4e7023cd", "4e7023cdD", "0a229afaD"},
                "S-CIFAR100": {"1555acb6", "0a229afa", "4e7023cd", "4e7023cdD", "0a229afaD"},
                "S-CORe50": {"1555acb6", "4e7023cd", "4e7023cdD"},
                "SE-CIFAR100": {"1555acb6", "0a229afa", "4e7023cd", "4e7023cdD", "0a229afaD"},
                "SE-CORe50": {"1555acb6", "4e7023cd", "4e7023cdD"},
            },
            
    )
    # remove certain experiment categories
    exp_data = exp_data[~exp_data["experiment_category"].isin(["TEST"])]
    exp_data = exp_data[~exp_data["strategy"].isin(["LwF", "SI"])]
    # Filter out incomplete experiments
    exp_data = exp_data[exp_data.apply(did_complete, axis=1)]
    # Simplify hyper-parameters
    exp_data = exp_data.apply(remove_unused_parameters, axis=1)

    # exp_data = hyper_parameters(exp_data)
    # print(exp_data.columns)
    # pd.set_option('display.max_rows', None)

    group_by_keys = [
        "strategy",
        "architecture",
        "replay_buffer",
        "prune_proportion",
        "architecture",
        "dataset",]

    """
    This is 
    """
    grouped = exp_data.groupby(group_by_keys).sample(10, replace=False, random_state=0).groupby(group_by_keys)
    table_components = {
        "final_accuracy_mean": grouped["final_accuracy"].mean(),
        "final_accuracy_std": grouped["final_accuracy"].std(),
        "count": grouped["experiment_code"].count()
    }

    table = pd.DataFrame(
        table_components
    )

    table = table.pivot_table(
            values=["final_accuracy_mean", "final_accuracy_std", "count"], 
            columns="dataset", 
            index=['strategy', 'replay_buffer', 'prune_proportion', 'architecture'])
    # table.reindex()
    return table
clean_table = load_clean_table()

In [30]:
# Add data from different repositories

from os import PathLike


def add_csv(df: pd.DataFrame, index: t.Tuple, csv_file: PathLike, ):
    data = pd.read_csv(csv_file, index_col=[0], comment="#")

    # Add rows
    for dataset in DATASETS:
        if dataset not in data.columns:
            continue
        df.loc[index, ("final_accuracy_mean",dataset)] = data.mean()[dataset]
        df.loc[index, ("final_accuracy_std",dataset)] = data.std()[dataset]
        df.loc[index, ("count", dataset)] = data.count()[dataset]

    return df
clean_table = add_csv(clean_table, ("SnB", "1000", "", "FF"), "results/SnB.csv")
clean_table = add_csv(clean_table, ("BIR", "", "", "VAE"), "results/BIR.csv")
clean_table = add_csv(clean_table, ("GR", "", "", "VAE"), "results/GR.csv")


In [31]:
clean_table = clean_table.reindex(["nonContinual", "taskOracle", "finetuning", "replay", "SnB", "GR", "BIR", "taskInference"], level="strategy")

In [32]:
DISPLAY_STRATEGY_NAMES = {
    "nonContinual": "Non-Continual",
    "taskOracle": "Task Oracle PackNet \cite{Mallya_Lazebnik_2018}",
    "replay": "Experience Replay",
    "finetuning": "Finetuning",
    "SnB": "Split and Bridge \cite{Kim_Choi_2021}",
    "GR": "Generative Replay \cite{vandevenBraininspiredReplayContinual2020}",
    "BIR": "Brain Inspired Replay \cite{vandevenBraininspiredReplayContinual2020}",
    "taskInference": "CI-PackNet (ours)"
}

In [33]:
clean_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,count,count,count,count,count,final_accuracy_mean,final_accuracy_mean,final_accuracy_mean,final_accuracy_mean,final_accuracy_mean,final_accuracy_mean,final_accuracy_std,final_accuracy_std,final_accuracy_std,final_accuracy_std,final_accuracy_std,final_accuracy_std
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,dataset,S-CIFAR10,S-CIFAR100,S-CORe50,S-FMNIST,SE-CIFAR100,SE-CORe50,S-CIFAR10,S-CIFAR100,S-CORe50,S-FMNIST,SE-CIFAR100,SE-CORe50,S-CIFAR10,S-CIFAR100,S-CORe50,S-FMNIST,SE-CIFAR100,SE-CORe50
strategy,replay_buffer,prune_proportion,architecture,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
nonContinual,,,AE,10.0,10.0,10.0,10.0,10.0,10.0,0.87354,0.61027,0.348352,0.91969,0.46653,0.751212,0.004272,0.004829,0.024389,0.001879,0.002992,0.008263
nonContinual,,,VAE,10.0,10.0,10.0,10.0,10.0,10.0,0.87287,0.59763,0.357711,0.91974,0.45534,0.732983,0.002922,0.004862,0.028231,0.00299,0.001989,0.004642
taskOracle,,0.5,AE,10.0,10.0,10.0,10.0,10.0,10.0,0.94553,0.62674,0.342062,0.98117,0.52854,0.405975,0.014504,0.006806,0.036982,0.01067,0.015046,0.014396
finetuning,,,AE,10.0,10.0,10.0,10.0,10.0,10.0,0.19516,0.0861,0.067569,0.19811,0.07692,0.094674,0.003368,0.003613,0.010457,0.002802,0.003495,0.001804
finetuning,,,VAE,10.0,10.0,10.0,10.0,10.0,10.0,0.19246,0.08647,0.064834,0.19921,0.08004,0.093727,0.004916,0.004661,0.009187,0.00099,0.002007,0.003664
replay,100.0,,AE,10.0,10.0,10.0,10.0,10.0,10.0,0.27539,0.09468,0.139542,0.58754,0.08845,0.317575,0.038332,0.005566,0.018695,0.059762,0.00409,0.029098
replay,1000.0,,AE,10.0,10.0,10.0,10.0,10.0,10.0,0.58191,0.21352,0.298375,0.80422,0.17938,0.61577,0.033321,0.009381,0.032555,0.029193,0.008213,0.028341
replay,10000.0,,AE,10.0,10.0,10.0,10.0,10.0,10.0,0.82199,0.49552,0.360398,0.88472,0.37252,0.691239,0.009495,0.008609,0.017623,0.010086,0.008034,0.018188
SnB,1000.0,,FF,10.0,10.0,10.0,10.0,,,0.75913,0.48274,0.17382,0.865,,,0.021474,0.005105,0.018464,0.012059,,
GR,,,VAE,10.0,10.0,10.0,10.0,10.0,10.0,0.18925,0.06915,0.04699,0.76374,0.13085,0.50066,0.010612,0.005424,0.007906,0.0187,0.005671,0.024495


In [34]:
from pandas.io.formats.style import Styler
from IPython.display import HTML
import numpy as np

# print(clean_table.index)


def create_latex_table(
        df: pd.DataFrame,
        caption: str = "",
        label: str = "",
        bolding_ignores_rows = []) -> str:
    """Convert a dataframe to latex"""

    # Make the index more readable
    df = df.reset_index()
    def _format_hp(row):
        hp = ""
        prune_proportion = row["prune_proportion"][0]
        if prune_proportion == "ep":
            hp = "Equal Prune"
        elif prune_proportion == "Best":
            hp = "Best"
        elif prune_proportion != "":
            hp = f"$\lambda$={float(prune_proportion)*100:.0f}\%"

        replay_buffer = row["replay_buffer"][0]
        if replay_buffer != "":
            hp += f"$n$={int(replay_buffer)}"
        return hp
    # The new index will consist of the strategy, hyper-parameters, and the architecture
    df["Hyper-Parameters"] = df.apply(_format_hp, axis=1)
    df["Strategy"]         = df["strategy"].apply(lambda x: DISPLAY_STRATEGY_NAMES[x])
    df["Architecture"]     = df["architecture"]
    df = df.drop(columns=["strategy", "replay_buffer", "prune_proportion", "architecture"])
    df.set_index(["Strategy", "Hyper-Parameters", "Architecture"], inplace=True)
    
    
    # Format the values into a format like '81.6±8.6'
    def _format_values(row):
        pretty_row = {}
        for dataset, value in row.groupby("dataset"):
            final_accuracy_mean = value["final_accuracy_mean"][0] * 100
            final_accuracy_std = value["final_accuracy_std"][0] * 100
            if not np.isnan(final_accuracy_mean) and not np.isnan(final_accuracy_std):
                pretty_row[dataset] = f"{final_accuracy_mean:.1f}$\pm${final_accuracy_std:.2f}"
            else:
                pretty_row[dataset] = "-"
            
        return pd.Series(pretty_row)
    
    # Save the indices of the best cells for each dataset
    relevant_rows = np.setdiff1d(list(range(df.shape[0])), bolding_ignores_rows)
    best_rows = df["final_accuracy_mean"].iloc[relevant_rows].idxmax()


    df = df.apply(_format_values, axis=1)
    style: Styler = df.style

    # Bold the best cells
    def _bold_best(row):
        return ["font-weight: bold" if row.name == best_row else "" for best_row in best_rows]
    style = style.apply(_bold_best, axis=1)

    # Export to latex
    result = style.to_latex(
        convert_css=True,
        hrules=True,
        position_float="centering",
        multirow_align="t",
        caption=caption,
        label=label,
    )
    return result

def copy_latex(latex: str):
    latex = latex.replace("\\", "\\\\")
    latex = latex.replace("\n", "\\n")
    return HTML(f"""<button onclick="navigator.clipboard.writeText('""" + latex + """')">COPY</button>""")


In [35]:
df = clean_table.copy(deep=True)
best_idx = df.groupby("strategy").idxmax().loc[("taskInference"), ("final_accuracy_mean")]

# Create new row
idx = ("taskInference", "", "Best", "AE or VAE")
df.loc[idx] = None
row = df.loc[idx]

for key, value in best_idx.items():
#     df.loc[idx] = "hi"
    row.loc["final_accuracy_std",key] = clean_table.loc[value].loc["final_accuracy_std",key]
    row.loc["final_accuracy_mean",key] = clean_table.loc[value].loc["final_accuracy_mean",key]
    row.loc["count",key] = clean_table.loc[value].loc["count",key]

df = df.reset_index()
best_vs_rest = df[~((df["strategy"] == "taskInference") & (df["prune_proportion"] != "Best"))]
best_vs_rest.set_index(["strategy", "prune_proportion", "replay_buffer", "architecture"], inplace=True)
best_vs_rest

  df.loc[idx] = None


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,count,count,count,count,count,final_accuracy_mean,final_accuracy_mean,final_accuracy_mean,final_accuracy_mean,final_accuracy_mean,final_accuracy_mean,final_accuracy_std,final_accuracy_std,final_accuracy_std,final_accuracy_std,final_accuracy_std,final_accuracy_std
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,dataset,S-CIFAR10,S-CIFAR100,S-CORe50,S-FMNIST,SE-CIFAR100,SE-CORe50,S-CIFAR10,S-CIFAR100,S-CORe50,S-FMNIST,SE-CIFAR100,SE-CORe50,S-CIFAR10,S-CIFAR100,S-CORe50,S-FMNIST,SE-CIFAR100,SE-CORe50
strategy,prune_proportion,replay_buffer,architecture,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
nonContinual,,,AE,10.0,10.0,10.0,10.0,10.0,10.0,0.87354,0.61027,0.348352,0.91969,0.46653,0.751212,0.004272,0.004829,0.024389,0.001879,0.002992,0.008263
nonContinual,,,VAE,10.0,10.0,10.0,10.0,10.0,10.0,0.87287,0.59763,0.357711,0.91974,0.45534,0.732983,0.002922,0.004862,0.028231,0.00299,0.001989,0.004642
taskOracle,0.5,,AE,10.0,10.0,10.0,10.0,10.0,10.0,0.94553,0.62674,0.342062,0.98117,0.52854,0.405975,0.014504,0.006806,0.036982,0.01067,0.015046,0.014396
finetuning,,,AE,10.0,10.0,10.0,10.0,10.0,10.0,0.19516,0.0861,0.067569,0.19811,0.07692,0.094674,0.003368,0.003613,0.010457,0.002802,0.003495,0.001804
finetuning,,,VAE,10.0,10.0,10.0,10.0,10.0,10.0,0.19246,0.08647,0.064834,0.19921,0.08004,0.093727,0.004916,0.004661,0.009187,0.00099,0.002007,0.003664
replay,,100.0,AE,10.0,10.0,10.0,10.0,10.0,10.0,0.27539,0.09468,0.139542,0.58754,0.08845,0.317575,0.038332,0.005566,0.018695,0.059762,0.00409,0.029098
replay,,1000.0,AE,10.0,10.0,10.0,10.0,10.0,10.0,0.58191,0.21352,0.298375,0.80422,0.17938,0.61577,0.033321,0.009381,0.032555,0.029193,0.008213,0.028341
replay,,10000.0,AE,10.0,10.0,10.0,10.0,10.0,10.0,0.82199,0.49552,0.360398,0.88472,0.37252,0.691239,0.009495,0.008609,0.017623,0.010086,0.008034,0.018188
SnB,,1000.0,FF,10.0,10.0,10.0,10.0,,,0.75913,0.48274,0.17382,0.865,,,0.021474,0.005105,0.018464,0.012059,,
GR,,,VAE,10.0,10.0,10.0,10.0,10.0,10.0,0.18925,0.06915,0.04699,0.76374,0.13085,0.50066,0.010612,0.005424,0.007906,0.0187,0.005671,0.024495


In [36]:
latex_table = create_latex_table(
        best_vs_rest,
        caption="Best CI-PackNet vs Others \\\\ Mean Final Accuracy $\pm$ One Standard Deviation after 10 runs \\\\ Top: Non-Class-IL Methods, Bottom: Class-IL Methods",
        label="tab:best_vs_rest",
        bolding_ignores_rows=[0, 1, 2]
)
copy_latex(latex_table)

  df = df.drop(columns=["strategy", "replay_buffer", "prune_proportion", "architecture"])


In [37]:
latex_table = create_latex_table(
        clean_table.loc[["taskInference"]],
        caption="CI-PackNet with Different Hyper-Parameters \\\\ Mean Final Accuracy $\pm$ One Standard Deviation after 10 runs",
        label="tab:hp_ci_packnet",
)
copy_latex(latex_table)



  df = df.drop(columns=["strategy", "replay_buffer", "prune_proportion", "architecture"])
