In [1]:
%config InlineBackend.figure_formats = ['svg']

%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import math
from tqdm import tqdm_notebook as tqdm
import pickle
import os
from typing import List, Dict
import seaborn as sns
sns.set_palette("colorblind")

In [2]:
os.chdir("../../java/output/results")

In [3]:
datasets = [
    "l_caida_f",
    "l_zipf_f",
    "l_mnetwork_f",
    "l_mos_f",
    "l_power_q",
    "l_uniform_q",
    "l_mrecords_q",
]
dfs = []
for dataset in datasets:
    cur_df = pd.read_csv("{}/load_time.csv".format(dataset))
    cur_df["dataset"] = dataset
    dfs.append(cur_df)
df = pd.concat(dfs)

In [4]:
sketch_order = ["cooperative", "pps", "random_sample", "truncation", "dyadic_truncation", "yahoo_mg", "cms_min", "kll"]

In [5]:
df_out = df.set_index(
    ["dataset", "sketch"])[["construct_time"]].unstack(
    ["sketch"]
)["construct_time"][sketch_order].loc[datasets]

In [6]:
df_out

sketch,cooperative,pps,random_sample,truncation,dyadic_truncation,yahoo_mg,cms_min,kll
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
l_caida_f,1450.054603,522.094061,524.561016,695.976852,4002.838724,645.947853,632.050238,
l_zipf_f,1913.142789,633.660041,674.925498,717.663492,5767.335409,505.557357,635.110885,
l_mnetwork_f,235.417457,148.538049,149.393627,176.705723,992.915763,337.735338,605.919555,
l_mos_f,206.133548,171.301628,166.589718,177.071103,1517.849777,172.564066,568.682455,
l_power_q,569.828088,102.551309,125.721448,97.362771,1096.35128,,,96.362615
l_uniform_q,80228.660163,58.264661,83.476041,57.798026,903.371432,,,48.610826
l_mrecords_q,900.891877,478.610476,490.686032,477.832485,4878.212415,,,555.85777


In [24]:
print(df_out.to_latex(float_format="%.1f"))

\begin{tabular}{lrrrrrrrr}
\toprule
sketch &  cooperative &   pps &  random\_sample &  truncation &  dyadic\_truncation &  yahoo\_mg &  cms\_min &   kll \\
dataset      &              &       &                &             &                    &           &          &       \\
\midrule
l\_caida\_f    &       1451.0 & 575.3 &          625.0 &       834.1 &             4766.0 &     723.1 &    731.7 &   NaN \\
l\_zipf\_f     &       1967.9 & 685.6 &          823.6 &       845.2 &             8741.6 &     592.8 &    792.3 &   NaN \\
l\_mnetwork\_f &        388.4 & 193.8 &          213.7 &       285.7 &             1144.2 &     433.6 &    716.7 &   NaN \\
l\_mos\_f      &        293.8 & 178.2 &          217.7 &       236.8 &             1102.2 &     248.1 &    646.2 &   NaN \\
l\_power\_q    &        669.5 & 124.7 &          176.6 &       164.7 &             1165.8 &       NaN &      NaN & 148.7 \\
l\_uniform\_q  &      47357.7 &  82.3 &          131.9 &       120.7 &              949.3 &  

In [16]:
datasets = [
    "c_bcube_f",
    "c_insta_f",
    "c_mnetwork_f",
    "c_mos_f",
    "c_bcube_q",
    "c_mrecords_q",
]
dfs = []
for dataset in datasets:
    cur_df = pd.read_csv("{}/load_time.csv".format(dataset))
    cur_df["dataset"] = dataset
    dfs.append(cur_df)
df = pd.concat(dfs)
df["build_time"] = df["construct_time"] + df["plan_time"]
sketch_order = [
    "pps_coop", 
    "pps", 
    "random_sample",
    "random_sample_prop",
    "random_sample_strat",
    "truncation", 
    "yahoo_mg", 
    "cms_min", 
    "kll"
]
df_out = df.set_index(
    ["dataset", "sketch"])[["build_time"]].unstack(
    ["sketch"]
)["build_time"][sketch_order].loc[datasets]

In [17]:
df_out

sketch,pps_coop,pps,random_sample,random_sample_prop,random_sample_strat,truncation,yahoo_mg,cms_min,kll
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
c_bcube_f,1284.581583,595.035654,638.696407,644.918182,664.620594,695.792637,441.481614,690.028579,
c_insta_f,4752.45379,2565.023171,2680.722871,2694.573582,2699.49403,3421.894625,1573.643842,2246.639947,
c_mnetwork_f,536.855223,99.744092,102.709225,101.302842,125.359945,99.574187,238.071492,630.242594,
c_mos_f,497.643675,123.131784,107.216914,106.4923,130.323085,118.695574,159.8696,628.74322,
c_bcube_q,736.381074,684.164092,698.571304,701.089895,738.759905,683.210773,,,543.695517
c_mrecords_q,455.917681,419.743674,424.305392,435.124555,445.351966,419.138596,,,492.346389


In [18]:
print(df_out.to_latex(float_format="%.1f"))

\begin{tabular}{lrrrrrrrrr}
\toprule
sketch &  pps\_coop &    pps &  random\_sample &  random\_sample\_prop &  random\_sample\_strat &  truncation &  yahoo\_mg &  cms\_min &   kll \\
dataset      &           &        &                &                     &                      &             &           &          &       \\
\midrule
c\_bcube\_f    &    1284.6 &  595.0 &          638.7 &               644.9 &                664.6 &       695.8 &     441.5 &    690.0 &   NaN \\
c\_insta\_f    &    4752.5 & 2565.0 &         2680.7 &              2694.6 &               2699.5 &      3421.9 &    1573.6 &   2246.6 &   NaN \\
c\_mnetwork\_f &     536.9 &   99.7 &          102.7 &               101.3 &                125.4 &        99.6 &     238.1 &    630.2 &   NaN \\
c\_mos\_f      &     497.6 &  123.1 &          107.2 &               106.5 &                130.3 &       118.7 &     159.9 &    628.7 &   NaN \\
c\_bcube\_q    &     736.4 &  684.2 &          698.6 &               701.1 &    