In [1]:
import matplotlib
matplotlib.use('pdf')

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
import os
plt.style.use('default')

In [3]:
df = pd.read_csv("../experiments/e2e_raiders/e2e.csv")

In [4]:
def human_format(x):
    sigpart = 0
    suffix = ""
    if x < 1000:
        return "{:.3g}".format(x)
    elif x < 1000000:
        sigpart = x / 1000
        suffix = "k"
    elif x < 1000000000:
        sigpart = x / 1000000
        suffix = "M"
    else:
        sigpart = x / 1000000000
        suffix = "B"
    return "{:.3g}{}".format(sigpart, suffix)
def get_algorithm(path):
    algs = ["rkde", "ic2", "sklearn", "nocut", "simple", "ks"]
    for alg in algs:
        if alg in path:
            return alg
def get_dataset(path):
    datasets = {
        "bgauss": "gauss",
        "hep": "hep",
        "tmy3_4": "tmy3_4",
        "tmy3.": "tmy3",
        "mnist_64": "mnist",
        "home": "home",
    }
    for dataset in datasets.keys():
        if dataset in path:
            return datasets[dataset]
algorder = ["ic2", "simple", "sklearn", "rkde", "nocut", "ks"]

In [5]:
df["r_algorithm"] = df["out_path"].map(get_algorithm)
df["r_dataset"] = df["out_path"].map(get_dataset)
df["throughput"] = (
    1000 * df["num_test"] / df["test_time"]
)
df["a_throughput"] = (
    1000 * df["num_train"] 
    / (df["test_time"]*df["num_train"]/df["num_test"] + df["train_time"])
)

In [6]:
df["algorder"] = df["r_algorithm"].map(lambda alg: algorder.index(alg))
df = df.sort_values(by=["r_dataset","algorder"])
datasetorder = ["gauss", "tmy3_4", "tmy3", "home", "hep", "mnist"]

In [7]:
from matplotlib.ticker import LogFormatter

In [13]:
matplotlib.rcParams.update({'font.size': 10})

In [15]:
fig, axs = plt.subplots(nrows=2, ncols=3, figsize=(10,5),dpi=300)
fig.subplots_adjust(hspace=0.1)
for di,dataset in enumerate(datasetorder):
    ax = axs.ravel()[di]
    cur_df = df[(df.r_dataset == dataset)]
    xlocs = np.arange(0,len(cur_df))
    throughputs = cur_df.a_throughput
    if (di > 4):
        uselog = False
    else:
        uselog = True
    ax.bar(left=xlocs, height=throughputs, log=uselog)
    rects = ax.patches
    for rect, label in zip(rects, throughputs):
        height = rect.get_height()
        ax.text(
            rect.get_x() + rect.get_width()/2, 
            height,
            human_format(float(label)),
            ha='center', va='bottom', size = 9,
        )
    
    ax.set_xticks(np.arange(0,len(cur_df)))
    ax.set_xticklabels(
        cur_df.r_algorithm,
        rotation=-45,
    )
    if dataset == "tmy3_4":
        p_dataset = "tmy3"
    else:
        p_dataset = dataset
    ax.set_title("Dataset: {}, n={}, d={}".format(
        p_dataset,
        human_format(cur_df.num_train.iloc[0]),
        cur_df.dim.iloc[0]
    ))
    ylim_min, ylim_max = ax.get_ylim()
    if di == 0:
        ax.set_ylim(ylim_min, 10**8)
    elif di < 5:
        ax.set_ylim(ylim_min, ylim_max*2)
    else:
        ax.set_ylim(ylim_min, ylim_max*1.1)
    
    if (di >= 3):
        ax.set_xlabel("Algorithm")
    if (di == 0 or di == 3):
        ax.set_ylabel("Throughput")
        

#     ax.yaxis.grid(True)
fig.tight_layout()
fig.savefig("e2e.pdf")



In [117]:
pivoted = df.groupby(
    ["r_algorithm", "r_dataset", "dim", "num_train"]
)["a_throughput"].mean().unstack(0)
pivoted

Unnamed: 0_level_0,Unnamed: 1_level_0,r_algorithm,ic2,nocut,rkde,simple,sklearn
r_dataset,dim,num_train,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
gauss,2,100000000,55186.145629,7.690865,4.869725,0.115648,8.956584
hep,27,10500000,12.59781,0.212976,0.345213,0.620794,0.438428
home,10,928991,1193.070282,18.639559,24.680088,10.255816,67.928238
mnist,64,70000,86.254592,,,49.09948,31.109122
tmy3,8,1822080,7760.649668,3.606713,7.911713,6.297175,7.19857


In [54]:
pivoted = pivoted.reset_index().sort_values(by="dim")[[
    "r_dataset",
    "dim",
    "num_train",
    "ic2",
    "simple",
    "sklearn",
    "rkde",
    "nocut"
]]
pivoted

r_algorithm,r_dataset,dim,num_train,ic2,simple,sklearn,rkde,nocut
0,gauss,2,100000000,55186.145629,0.115648,8.956584,4.869725,7.690865
1,tmy3,8,1822080,7760.649668,6.297175,7.19857,7.911713,3.606713
2,home,10,928991,1193.070282,10.255816,67.928238,24.680088,18.639559
3,hep,27,10500000,12.59781,0.620794,0.438428,0.345213,0.212976
4,mnist,64,70000,86.254592,49.09948,31.109122,,
