In [1]:
import pandas as pd
from csv import QUOTE_NONE
from configurations.general_confs import SparseMethods

In [2]:
def get_dataset(model_name):
    """ Get data set from model name. Is generally in model name for visdom. """
    for d in ["fashionmnist", "cifar10", "mnist"]:
        if d in model_name:
            return d
        
def get_table(groupby_col, transformation, best_acc_path, dataset, modelname2method, not_contain_substring, contain_substring, sort, describe):
    """ Function to get a table with statistics. 

    Args: 
        best_acc_path (str): Path to the best_acc.csv file. 
        dataset (str): Name of the dataset to select as used in the model name. 
        modelname2method (str->str): Function that processes the model name to the method is question. 
        not_contain_substring (str, optional): Substring of the method that should not be in the table. 
            Used to exclude methods.     
    """
    # Read the data frame
    
    df = pd.read_csv(best_acc_path, sep=";")
    df[groupby_col] = df[groupby_col].apply(transformation)
    
    # retrieve from model name the dataset and add as column
    df["dataset"] = df["ModelName"].apply(lambda name:get_dataset(name))

    # retrieve from model name the sparse method and add as column
    df["method"] = df["ModelName"].apply(lambda x:modelname2method(x))
    
    # select on the dataset column
    df_sub = df[df['dataset'] == dataset]
  
    # only add methods that contain this substring
    if contain_substring:
        df_sub = df_sub[df_sub['method'].str.contains(contain_substring)]
        
    # remove all methods that contain this substring
    if not_contain_substring:
        df_sub = df_sub[~df_sub['method'].str.contains(not_contain_substring)]
        
    # hacky solution to get nice title, rename the method column to the dataset name
    new_method_col = dataset.upper() 
    df_sub = df_sub.rename(index=str, columns={"method": new_method_col})
    
    if describe: 
        df_sub = df_sub.groupby([new_method_col])[groupby_col].describe()[["count", "mean", "std"]]
    else: 
        df_sub = df_sub.pivot(columns=new_method_col, values=groupby_col)
    
    # round to 2 digits 
    df_sub = df_sub.round(2)
    
    if describe and sort: 
        df_sub = df_sub.sort_values(by="mean", ascending=False)
        
    return df_sub


def get_score_table(best_acc_path, dataset, modelname2method, not_contain_substring=None, contain_substring=None, sort=True, describe=True):
    return get_table("Score", lambda x: x*100, best_acc_path, dataset, modelname2method, not_contain_substring, contain_substring, sort, describe)

def get_best_epoch_table(best_acc_path, dataset, modelname2method, not_contain_substring=None, contain_substring=None, sort=True, describe=True):
    return get_table("EpochBest", lambda x: x, best_acc_path, dataset, modelname2method, not_contain_substring, contain_substring, sort, describe)
    

def sparse_parser(sparse_str, model_name):
        sparse = SparseMethods(sparse_str)
        
        method_list = []
        for step in sparse:
            if step['target'] == "none":
                method_list.append("none")
            else: 
                percent_list = '-'.join([str(e) for e in step['percent']])
                method_list.append(f"{step['target']} {step['method']} {percent_list}")
        
        sparse_name = " & ".join(method_list)
        
        if "no" in model_name[-2:]: 
            sparse_name += " No"
        
        return sparse_name

## Diff sparse experiment


Define function to get sparse method from model name

In [27]:
def get_sparse_method(model_name):
    """ Get sparse method from model name. 
    
    Model names have to be formatted as: {dataset}_:{sparse_method}:

    Example: cifar10_:edges_random_0.3-0.3:
    """
    
    if "loss" in model_name: 
        if "loss_001" in model_name:
            sparse_name = "Entropy (0.01)"
        elif "loss_01" in model_name:
            sparse_name = "Entropy (0.1)"
        
    else: 
        # get sparse string from model_name
        sparse_str = model_name.split(":")[1]

        sparse_name = sparse_parser(sparse_str, model_name)
            
    return sparse_name        
    

In [40]:
dataset = "cifar10"
contain_substring = "nodes random"
# contain_substring = None
# not_contain_substring = " random"
not_contain_substring = None

get_score_table("./experiments/diff_sparse_new/best_acc.csv", dataset, get_sparse_method, not_contain_substring, contain_substring, sort=False)

Unnamed: 0_level_0,count,mean,std
CIFAR10,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
nodes random 0.3-0.3,3.0,37.49,1.02
nodes random 0.3-0.3 No,3.0,65.89,0.81


In [41]:
get_score_table("./experiments/diff_sparse_new/best_acc.csv", dataset, get_sparse_method, not_contain_substring, contain_substring, sort=False, describe=False)

CIFAR10,nodes random 0.3-0.3,nodes random 0.3-0.3 No
100,36.53,
95,,66.18
96,38.55,
97,,64.97
98,37.39,
99,,66.51


In [35]:
get_best_epoch_table("./experiments/diff_sparse_new/best_acc.csv", dataset, get_sparse_method, not_contain_substring, contain_substring)

Unnamed: 0_level_0,count,mean,std
FASHIONMNIST,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
edges random 0.3-0.3,3.0,44.67,13.43
edges random 0.3-0.3 No,3.0,26.67,6.81


## Architecture experiment

In [17]:
def parse_architecture(model_name):
    
    sparse_str = model_name.split(":")[1]
    arch_str = model_name.split(":")[2].replace("&", ";")
    sparse_method = sparse_parser(sparse_str, model_name)
    
    return f"{arch_str} {sparse_method}"

from io import StringIO
import csv

# dirty method to avoid the fact that in the architecture notation the delimiter of the csv is use
with open("./experiments/architecture_new/best_acc.csv") as f: 
    # read csv with none existing delimiter (# shoudl not be in csv)
    reader = csv.reader(f, delimiter="#")
    
    # replace the first two ; with an &, skip header (row 0)
    reader_expanded = [r[0].replace(";", "&", 2) if ind > 0 else r[0] for ind, r in enumerate(reader)]
    
    # concat to string with enters
    parsed_csv_str = "\n".join(reader_expanded)
    
    # parse to io 
    arch_csv = StringIO(parsed_csv_str)
    
dataset = "mnist" #or cifar10
contain_substring = "none"
# contain_substring = None
# not_contain_substring = " No"
not_contain_substring = None


In [107]:
get_score_table(arch_csv, dataset, parse_architecture, not_contain_substring, contain_substring, sort=False)

Unnamed: 0_level_0,count,mean,std
MNIST,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"16,8;10,16;10,16 none",1.0,99.17,
"16,8;30,16;10,16 none",1.0,99.32,
"32,8;10,16;10,16 none",1.0,99.28,
"32,8;30,16;10,16 none",1.0,99.25,
"8,8;10,16;10,16 none",1.0,99.2,
"8,8;30,16;10,16 none",1.0,99.19,
"8,8;60,16;10,16 none",1.0,99.21,


## Sparse rato settings

In [3]:
def get_sparse_rato(model_name):
    """ Get sparse method from model name. 
    
    Model names have to be formatted as: {dataset}_:{sparse_method}:

    Example: cifar10_:edges_random_0.3-0.3:
    """
            
    sparse_str = model_name.split(":")[1]
        
    if "loss" in sparse_str: 
        ratio = sparse_str.split("_")[1]
        sparse_name  = f"Entropy {ratio}"
    else: 
        sparse_name = sparse_parser(sparse_str, model_name)
           
    return sparse_name        
    

In [6]:
contain_substring = "nodes topk 0.0-0.5"
# contain_substring = None
# not_contain_substring = " No"
not_contain_substring = None

get_score_table("./experiments/sparse_rato/best_acc.csv", "cifar10", get_sparse_rato, not_contain_substring, contain_substring, False)

Unnamed: 0_level_0,count,mean,std
CIFAR10,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
nodes topk 0.0-0.5,3.0,65.02,0.14
nodes topk 0.0-0.5 No,3.0,65.46,0.31


In [125]:
get_score_table("./experiments/sparse_rato/best_acc.csv", "cifar10", get_sparse_rato, not_contain_substring, contain_substring, False, describe=False)

CIFAR10,nodes topk 0.0-0.8
31,65.6
35,65.06
39,65.17


## Routing effect

In [110]:
def get_routing_iters_used(model_name):
    """ Get Number of routing iterations from model name. 
    
    Model names have to be formatted as: {dataset}_:{routing_iters}:

    Example: cifar10_:1:
    """
    split = model_name.split(":")
    sparse_method = sparse_parser(split[1], model_name)
    routing_iters = split[2]
    arch = split[4]
    
    
    return f"{sparse_method} {routing_iters} {arch}"

In [134]:
# not_contain_substring = None
not_contain_substring = "edges"
contain_substring = None
# contain_substring = "nodes"

In [135]:
get_score_table("./experiments/effect_routing_new/best_acc.csv", "mnist", get_routing_iters_used, not_contain_substring, contain_substring, False)

Unnamed: 0_level_0,count,mean,std
MNIST,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
nodes topk 0.3-0.3 1 double,3.0,99.4,0.09
nodes topk 0.3-0.3 2 double,2.0,99.35,0.04
nodes topk 0.3-0.3 3 double,2.0,99.28,0.03
nodes topk 0.3-0.3 No 1 double,3.0,99.4,0.09
nodes topk 0.3-0.3 No 2 double,2.0,99.28,0.04
nodes topk 0.3-0.3 No 3 double,2.0,99.28,0.04
none 1 single,3.0,99.41,0.02
none 2 single,3.0,99.27,0.04
none 3 single,3.0,99.18,0.06


## Regular CapsNet

In [22]:
dataset = "fashionmnist"

In [23]:
get_score_table("./experiments/regular_capsnet/best_acc.csv", dataset, lambda _: "CapsNet")

Unnamed: 0_level_0,count,mean,std
FASHIONMNIST,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CapsNet,3.0,90.11,0.08


In [24]:
get_score_table("./experiments/regular_capsnet/best_acc.csv", dataset, lambda _: "CapsNet", describe=False)

FASHIONMNIST,CapsNet
1,90.02
4,90.13
7,90.17
