In [3]:
import pandas as pd
import typing as t
import os
DISPLAY_NAME = "SurpriseNet"
DATASETS = [
    "S-FMNIST", "S-CIFAR10", "S-CIFAR100", "S-CORe50", "SE-CIFAR100", "SE-CORe50"
]
COLUMNS = ["final_accuracy_mean", "final_accuracy_std", "count"]
CSV_FILE = "data/ExperimentLogs.csv"

In [7]:
def load_clean_table():
    exp_data = pd.read_csv(CSV_FILE)

    def repo_version_filter(df: pd.DataFrame, versions: t.Dict[str, t.Set[str]]) -> pd.DataFrame:
        """Filter a dataframe by repo version"""

        def _is_valid(row):
            if row["dataset"] in versions:
                return row["repo_hash"] in versions[row["dataset"]]
            return False
        return df[df.apply(_is_valid, axis=1)]

    def did_complete(row) -> bool:
        """Check if an experiment has completed"""
        return row["completed_tasks"] == row["n_experiences"]

    def remove_unused_parameters(row):
        """Remove unused hyperparameter from rows"""
        if not row["use_packnet"]:
            row["prune_proportion"] = ""
        elif row["prune_proportion"][0] == "[":
            row["prune_proportion"] = "ep"
        if not row["use_experience_replay"] or pd.isna(row["replay_buffer"]):
            row["replay_buffer"] = ""
        return row

    exp_data = repo_version_filter(
            exp_data,
            {
                "S-FMNIST": {"1555acb6", "0a229afa", "4e7023cd", "4e7023cdD", "0a229afaD", "54dcf601"},
                "S-CIFAR10": {"1555acb6", "0a229afa", "4e7023cd", "4e7023cdD", "0a229afaD"},
                "S-CIFAR100": {"1555acb6", "0a229afa", "4e7023cd", "4e7023cdD", "0a229afaD"},
                "S-CORe50":    {"84260321", "a6134f6c", "a3411a3b", "ecc57a24"},
                "SE-CORe50":   {"84260321", "a6134f6c", "a3411a3b", "ecc57a24"},
                "SE-CIFAR100": {"84260321", "a6134f6c", "a3411a3b", "ecc57a24"},
            },
            
    )
    # remove certain experiment categories
    exp_data = exp_data[~exp_data["experiment_category"].isin(["TEST"])]
    exp_data = exp_data[~exp_data["strategy"].isin(["LwF", "SI"])]
    # Filter out incomplete experiments
    exp_data = exp_data[exp_data.apply(did_complete, axis=1)]
    # Simplify hyper-parameters
    exp_data = exp_data.apply(remove_unused_parameters, axis=1)


    group_by_keys = [
        "strategy",
        "architecture",
        "replay_buffer",
        "prune_proportion",
        "dataset"
    ]

    grouped = exp_data.groupby(group_by_keys, dropna=False)

    # Calculate the mean and standard deviation of the final accuracy
    table_components = {
        COLUMNS[0]: grouped["final_accuracy"].mean(),
        COLUMNS[1]: grouped["final_accuracy"].std(),
        COLUMNS[2]: grouped["experiment_code"].count()
    }

    table = pd.DataFrame(
        table_components
    )


    table = table.pivot_table(
            values=COLUMNS, 
            columns="dataset", 
            index=['strategy', 'replay_buffer', 'prune_proportion', 'architecture'])
    return table
clean_table = load_clean_table()
clean_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,count,count,count,count,count,final_accuracy_mean,final_accuracy_mean,final_accuracy_mean,final_accuracy_mean,final_accuracy_mean,final_accuracy_mean,final_accuracy_std,final_accuracy_std,final_accuracy_std,final_accuracy_std,final_accuracy_std,final_accuracy_std
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,dataset,S-CIFAR10,S-CIFAR100,S-CORe50,S-FMNIST,SE-CIFAR100,SE-CORe50,S-CIFAR10,S-CIFAR100,S-CORe50,S-FMNIST,SE-CIFAR100,SE-CORe50,S-CIFAR10,S-CIFAR100,S-CORe50,S-FMNIST,SE-CIFAR100,SE-CORe50
strategy,replay_buffer,prune_proportion,architecture,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
finetuning,,,AE,10,11,11,10,11,11,0.19516,0.086145,0.070175,0.19811,0.090545,0.094263,0.003368,0.003431,0.009488,0.002802,0.002815,0.003296
finetuning,,,VAE,10,11,10,10,10,10,0.19246,0.086718,0.06527,0.19921,0.0909,0.094561,0.004916,0.004498,0.013155,0.00099,0.002941,0.002562
nonContinual,,,AE,10,10,11,10,11,11,0.87354,0.61027,0.346282,0.91969,0.676709,0.785849,0.004272,0.004829,0.015104,0.001879,0.002918,0.006939
nonContinual,,,VAE,10,11,10,10,10,10,0.87287,0.597382,0.330374,0.91974,0.6706,0.768836,0.002922,0.004685,0.01206,0.00299,0.003152,0.006536
replay,100.0,,AE,12,12,10,14,10,10,0.280175,0.094692,0.14035,0.582514,0.13648,0.345155,0.03651,0.005055,0.018088,0.051615,0.01021,0.021992
replay,1000.0,,AE,12,10,11,12,10,10,0.582667,0.21352,0.277864,0.793942,0.34254,0.651134,0.03067,0.009381,0.020791,0.037142,0.009871,0.022488
replay,10000.0,,AE,12,10,10,12,10,10,0.821267,0.49552,0.334964,0.8856,0.60796,0.746593,0.010086,0.008609,0.015717,0.009412,0.005563,0.006231
taskInference,,0.2,AE,12,10,10,10,10,10,0.35165,0.13202,0.073608,0.50577,0.26482,0.245146,0.07952,0.032206,0.011986,0.078522,0.013427,0.010033
taskInference,,0.2,VAE,12,10,10,10,10,10,0.462833,0.17163,0.054047,0.57982,0.25751,0.231802,0.095931,0.034284,0.010272,0.035713,0.006555,0.011352
taskInference,,0.4,AE,12,10,10,10,10,10,0.522333,0.20546,0.089193,0.76119,0.38437,0.397354,0.13716,0.047249,0.014373,0.08117,0.026114,0.018962


In [10]:
# Add data from different repositories

from os import PathLike


def add_csv(df: pd.DataFrame, index: t.Tuple, csv_file: PathLike, ):
    data = pd.read_csv(csv_file, index_col=[0], comment="#")

    # Add rows
    for dataset in DATASETS:
        if dataset not in data.columns:
            continue
        df.loc[index, ("final_accuracy_mean",dataset)] = data.mean()[dataset]
        df.loc[index, ("final_accuracy_std",dataset)] = data.std()[dataset]
        df.loc[index, ("count", dataset)] = data.count()[dataset]

    return df
clean_table = add_csv(clean_table, ("SnB", "1000", "", "-"), "data/SnB.csv")
clean_table = add_csv(clean_table, ("BIR", "", "", "VAE"), "data/BIR.csv")
clean_table = add_csv(clean_table, ("GR", "", "", "VAE"), "data/GR.csv")
clean_table = add_csv(clean_table, ("ICARL", "1000", "", "-"), "data/ICARL.csv")
clean_table = add_csv(clean_table, ("taskOracle", "", "Best", "AE or VAE"), "data/BestPackNet.csv")


  df.loc[index, ("final_accuracy_mean",dataset)] = data.mean()[dataset]
  df.loc[index, ("final_accuracy_std",dataset)] = data.std()[dataset]
  df.loc[index, ("count", dataset)] = data.count()[dataset]
  df.loc[index, ("final_accuracy_mean",dataset)] = data.mean()[dataset]
  df.loc[index, ("final_accuracy_std",dataset)] = data.std()[dataset]
  df.loc[index, ("count", dataset)] = data.count()[dataset]
  df.loc[index, ("final_accuracy_mean",dataset)] = data.mean()[dataset]
  df.loc[index, ("final_accuracy_std",dataset)] = data.std()[dataset]
  df.loc[index, ("count", dataset)] = data.count()[dataset]
  df.loc[index, ("final_accuracy_mean",dataset)] = data.mean()[dataset]
  df.loc[index, ("final_accuracy_mean",dataset)] = data.mean()[dataset]
  df.loc[index, ("final_accuracy_std",dataset)] = data.std()[dataset]
  df.loc[index, ("final_accuracy_std",dataset)] = data.std()[dataset]
  df.loc[index, ("count", dataset)] = data.count()[dataset]
  df.loc[index, ("final_accuracy_mean",dataset)] =

In [11]:
DISPLAY_STRATEGY_NAMES = {
    "nonContinual": "Non-Continual",
    "taskOracle": "Task Oracle PackNet \cite{Mallya_Lazebnik_2018}",
    "PackNetBest": "PackNet \cite{Mallya_Lazebnik_2018}",
    "replay": "Experience Replay",
    "finetuning": "Finetuning",
    "SnB": "Split and Bridge \cite{Kim_Choi_2021}",
    "GR": "Generative Replay \cite{vandevenBraininspiredReplayContinual2020}",
    "BIR": "Brain Inspired Replay \cite{vandevenBraininspiredReplayContinual2020}",
    "taskInference": f"{DISPLAY_NAME} (ours)",
    "ICARL": "iCaRL \cite{rebuffiICaRLIncrementalClassifier2017}"
}

In [12]:
# Sort by strategy
strategy_order = [
    "nonContinual",
    "taskOracle",
    "PackNetBest",
    "finetuning",
    "replay",
    "SnB",
    "ICARL",
    "GR",
    "BIR",
    "taskInference",
]
clean_table = clean_table.reindex(
    strategy_order,
    level=0,
)
clean_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,count,count,count,count,count,final_accuracy_mean,final_accuracy_mean,final_accuracy_mean,final_accuracy_mean,final_accuracy_mean,final_accuracy_mean,final_accuracy_std,final_accuracy_std,final_accuracy_std,final_accuracy_std,final_accuracy_std,final_accuracy_std
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,dataset,S-CIFAR10,S-CIFAR100,S-CORe50,S-FMNIST,SE-CIFAR100,SE-CORe50,S-CIFAR10,S-CIFAR100,S-CORe50,S-FMNIST,SE-CIFAR100,SE-CORe50,S-CIFAR10,S-CIFAR100,S-CORe50,S-FMNIST,SE-CIFAR100,SE-CORe50
strategy,replay_buffer,prune_proportion,architecture,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
nonContinual,,,AE,10.0,10.0,11.0,10.0,11.0,11.0,0.87354,0.61027,0.346282,0.91969,0.676709,0.785849,0.004272,0.004829,0.015104,0.001879,0.002918,0.006939
nonContinual,,,VAE,10.0,11.0,10.0,10.0,10.0,10.0,0.87287,0.597382,0.330374,0.91974,0.6706,0.768836,0.002922,0.004685,0.01206,0.00299,0.003152,0.006536
taskOracle,,0.5,AE,10.0,10.0,11.0,10.0,11.0,11.0,0.94553,0.62674,0.5089,0.98117,0.612591,0.657953,0.014504,0.006806,0.027637,0.01067,0.019638,0.016291
taskOracle,,Best,AE or VAE,10.0,10.0,10.0,10.0,10.0,10.0,0.944,0.75,0.403,0.987,0.908,0.936,0.014298,0.014907,0.021628,0.009487,0.006325,0.005164
finetuning,,,AE,10.0,11.0,11.0,10.0,11.0,11.0,0.19516,0.086145,0.070175,0.19811,0.090545,0.094263,0.003368,0.003431,0.009488,0.002802,0.002815,0.003296
finetuning,,,VAE,10.0,11.0,10.0,10.0,10.0,10.0,0.19246,0.086718,0.06527,0.19921,0.0909,0.094561,0.004916,0.004498,0.013155,0.00099,0.002941,0.002562
replay,100.0,,AE,12.0,12.0,10.0,14.0,10.0,10.0,0.280175,0.094692,0.14035,0.582514,0.13648,0.345155,0.03651,0.005055,0.018088,0.051615,0.01021,0.021992
replay,1000.0,,AE,12.0,10.0,11.0,12.0,10.0,10.0,0.582667,0.21352,0.277864,0.793942,0.34254,0.651134,0.03067,0.009381,0.020791,0.037142,0.009871,0.022488
replay,10000.0,,AE,12.0,10.0,10.0,12.0,10.0,10.0,0.821267,0.49552,0.334964,0.8856,0.60796,0.746593,0.010086,0.008609,0.015717,0.009412,0.005563,0.006231
SnB,1000.0,,-,10.0,10.0,10.0,10.0,,,0.75913,0.48274,0.16146,0.865,,,0.021474,0.005105,0.015674,0.012059,,


In [14]:
from pandas.io.formats.style import Styler
from IPython.display import HTML
import numpy as np


def create_latex_table(
        df: pd.DataFrame,
        caption: str = "",
        label: str = "",
        bolding_ignores_rows = []) -> str:
    """Convert a dataframe to latex"""

    # Make the index more readable
    df = df.reset_index()
    def _format_hp(row):
        hp = ""
        prune_proportion = row["prune_proportion"][0]
        if prune_proportion == "ep":
            hp = "Equal Prune"
        elif prune_proportion == "Best":
            hp = "Best"
        elif prune_proportion != "":
            hp = f"$\lambda$={float(prune_proportion)*100:.0f}\%"

        replay_buffer = row["replay_buffer"][0]
        if replay_buffer != "":
            hp += f"$n$={int(replay_buffer)}"
        return hp
    # The new index will consist of the strategy, hyper-parameters, and the architecture
    df["Config"] = df.apply(_format_hp, axis=1)
    df["Strategy"]         = df["strategy"].apply(lambda x: DISPLAY_STRATEGY_NAMES[x])
    df["AE/VAE"]     = df["architecture"]
    df = df.drop(columns=["strategy", "replay_buffer", "prune_proportion", "architecture"])
    df.set_index(["Strategy", "Config", "AE/VAE"], inplace=True)
    df = df[[(col, dataset) for col in COLUMNS for dataset in DATASETS]]
    
    
    # Format the values into a format like '81.6±8.6'
    def _format_values(row):
        pretty_row = {}
        for dataset, value in row.groupby("dataset"):
            final_accuracy_mean = value["final_accuracy_mean"][0] * 100
            final_accuracy_std = value["final_accuracy_std"][0] * 100
            if not np.isnan(final_accuracy_mean) and not np.isnan(final_accuracy_std):
                pretty_row[dataset] = f"{final_accuracy_mean:.1f}$\pm${final_accuracy_std:.2f}"
            else:
                pretty_row[dataset] = "-"
            
        return pd.Series(pretty_row)
    
    # Save the indices of the best cells for each dataset
    relevant_rows = np.setdiff1d(list(range(df.shape[0])), bolding_ignores_rows)
    best_rows = df["final_accuracy_mean"].iloc[relevant_rows].idxmax()


    df = df.apply(_format_values, axis=1)
    df = df[DATASETS]
    style: Styler = df.style


    # Bold the best cells
    def _bold_best(row):
        return ["font-weight: bold" if row.name == best_row else "" for best_row in best_rows]
    style = style.apply(_bold_best, axis=1)

    # Export to latex
    result = style.to_latex(
        convert_css=True,
        hrules=True,
        position_float="centering",
        multirow_align="t",
        caption=caption,
        label=label,
    )
    return result

def copy_latex(latex: str):
    latex = latex.replace("\\", "\\\\")
    latex = latex.replace("\n", "\\n")
    return HTML(f"""<button onclick="navigator.clipboard.writeText('""" + latex + """')">COPY</button>""")


In [15]:
df = clean_table.copy(deep=True)
best_idx = df.groupby("strategy").idxmax().loc[("taskInference"), ("final_accuracy_mean")]

# Create new row
idx = ("taskInference", "", "Best", "AE or VAE")
df.loc[idx] = None
row = df.loc[idx]

for key, value in best_idx.items():
    row.loc["final_accuracy_std",key] = clean_table.loc[value].loc["final_accuracy_std",key]
    row.loc["final_accuracy_mean",key] = clean_table.loc[value].loc["final_accuracy_mean",key]
    row.loc["count",key] = clean_table.loc[value].loc["count",key]

df = df.reset_index()
best_vs_rest = df[~((df["strategy"] == "taskInference") & (df["prune_proportion"] != "Best"))]
best_vs_rest.set_index(["strategy", "prune_proportion", "replay_buffer", "architecture"], inplace=True)
best_vs_rest


  df.loc[idx] = None


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,count,count,count,count,count,final_accuracy_mean,final_accuracy_mean,final_accuracy_mean,final_accuracy_mean,final_accuracy_mean,final_accuracy_mean,final_accuracy_std,final_accuracy_std,final_accuracy_std,final_accuracy_std,final_accuracy_std,final_accuracy_std
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,dataset,S-CIFAR10,S-CIFAR100,S-CORe50,S-FMNIST,SE-CIFAR100,SE-CORe50,S-CIFAR10,S-CIFAR100,S-CORe50,S-FMNIST,SE-CIFAR100,SE-CORe50,S-CIFAR10,S-CIFAR100,S-CORe50,S-FMNIST,SE-CIFAR100,SE-CORe50
strategy,prune_proportion,replay_buffer,architecture,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
nonContinual,,,AE,10.0,10.0,11.0,10.0,11.0,11.0,0.87354,0.61027,0.346282,0.91969,0.676709,0.785849,0.004272,0.004829,0.015104,0.001879,0.002918,0.006939
nonContinual,,,VAE,10.0,11.0,10.0,10.0,10.0,10.0,0.87287,0.597382,0.330374,0.91974,0.6706,0.768836,0.002922,0.004685,0.01206,0.00299,0.003152,0.006536
taskOracle,0.5,,AE,10.0,10.0,11.0,10.0,11.0,11.0,0.94553,0.62674,0.5089,0.98117,0.612591,0.657953,0.014504,0.006806,0.027637,0.01067,0.019638,0.016291
taskOracle,Best,,AE or VAE,10.0,10.0,10.0,10.0,10.0,10.0,0.944,0.75,0.403,0.987,0.908,0.936,0.014298,0.014907,0.021628,0.009487,0.006325,0.005164
finetuning,,,AE,10.0,11.0,11.0,10.0,11.0,11.0,0.19516,0.086145,0.070175,0.19811,0.090545,0.094263,0.003368,0.003431,0.009488,0.002802,0.002815,0.003296
finetuning,,,VAE,10.0,11.0,10.0,10.0,10.0,10.0,0.19246,0.086718,0.06527,0.19921,0.0909,0.094561,0.004916,0.004498,0.013155,0.00099,0.002941,0.002562
replay,,100.0,AE,12.0,12.0,10.0,14.0,10.0,10.0,0.280175,0.094692,0.14035,0.582514,0.13648,0.345155,0.03651,0.005055,0.018088,0.051615,0.01021,0.021992
replay,,1000.0,AE,12.0,10.0,11.0,12.0,10.0,10.0,0.582667,0.21352,0.277864,0.793942,0.34254,0.651134,0.03067,0.009381,0.020791,0.037142,0.009871,0.022488
replay,,10000.0,AE,12.0,10.0,10.0,12.0,10.0,10.0,0.821267,0.49552,0.334964,0.8856,0.60796,0.746593,0.010086,0.008609,0.015717,0.009412,0.005563,0.006231
SnB,,1000.0,-,10.0,10.0,10.0,10.0,,,0.75913,0.48274,0.16146,0.865,,,0.021474,0.005105,0.015674,0.012059,,


In [17]:
latex_table = create_latex_table(
        best_vs_rest,
        caption=f"Best {DISPLAY_NAME} vs Others \\\\ Mean Final Accuracy $\pm$ One Standard Deviation after 10 runs \\\\ Top: Non-Class-IL Methods, Bottom: Class-IL Methods",
        label="tab:best_vs_rest",
        bolding_ignores_rows=[0, 1, 2, 3]
)
print("Best VS Rest")
copy_latex(latex_table)

Best VS Rest


  df = df.drop(columns=["strategy", "replay_buffer", "prune_proportion", "architecture"])


In [18]:
latex_table = create_latex_table(
        clean_table.loc[["taskInference"]],
        caption=f"{DISPLAY_NAME} with Different Hyper-Parameters \\\\ Mean Final Accuracy $\pm$ One Standard Deviation after 10 runs",
        label="tab:hp_ci_packnet",
)
copy_latex(latex_table)



  df = df.drop(columns=["strategy", "replay_buffer", "prune_proportion", "architecture"])
