In [102]:
def max_memory(row):
    if row["method"] in ["Greedy","ThreeSieves","Random"]:
        return row["K"]
    elif row["method"] in ["SieveStreaming", "Salsa"]:
        epsilon = row["epsilon"]
        K = row["K"]
        lower = 1 # in all experiments we used m = 1
        upper = lower * K

        ilower = np.ceil(np.log(lower) / np.log(1+epsilon))
        ts = []
        t = np.power(1.0 + epsilon, ilower)
        while (t < upper):
            ts.append(t)
            ilower += 1
            t = np.power(1.0 + epsilon, ilower)

        if row["method"] == "SieveStreaming":
            return K*len(ts)
        else:
            return 3*K*len(ts)
    else:
        # See https://arxiv.org/pdf/1905.00948.pdf page 6 for memory computation
        epsilon = row["epsilon"]
        K = row["K"]

        mem = np.ceil(K*np.log(2) / epsilon)
        upper = np.log(K) / np.log(1+epsilon)
        i = 0
        while(i < upper):
            mem += K/np.power(1+epsilon,i)
            i += 1
        return mem
        
def relative_performance(row):
    K = row["K"]
    sigma = row["sigma"]
    reference = df.loc[ (df["method"] == "Greedy") & (df["K"] == K) & (df["sigma"] == sigma) ]
    perf = row["fval"] / reference["fval"]

    return int(perf.values[0]*100)
    
def nice_name(row):
    if row["method"] in ["Greedy", "Random"]:
        return row["method"]
    elif row["method"] in ["SieveStreaming", "SieveStreaming++", "Salsa"]:
        #return "{} ε = {}".format(row["method"], row["epsilon"])
        return row["method"]
    else:
        #return "{} ε = {}, T = {}".format(row["method"], row["epsilon"], row["T"])
        return "{} T = {}".format(row["method"], int(row["T"]))

In [110]:
import pandas as pd
import numpy as np
import os
#import plotly.express as px

datasets = ["forestcover", "kddcup99", "creditfraud", "fact-highlevel", "fact-lowlevel"] 
dfs = []

for d in datasets:
    df = pd.read_csv(os.path.join("experiments",d,"results.csv"))

    df["nice_name"] = df.apply(nice_name,axis=1)
    df["relative_performance"] = df.apply(relative_performance,axis=1)
    df["max_memory"] = df.apply(max_memory,axis=1)
    if d == "forestcover":
        df["dataset"] = "Forestcover"
    elif d == "kddcup99":
        df["dataset"] = "KDDCup99"
    elif d == "creditfraud":
        df["dataset"] = "Creditfraud"
    elif d == "fact-highlevel":
        df["dataset"] = "FACT Highlevel"
    elif d == "fact-lowlevel":
        df["dataset"] = "FACT Lowlevel"
    else:
        df["dataset"] = d
    dfs.append(df)

#df = pd.concat(dfs)
#df
#print(df)



In [111]:
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio

#if not os.path.exists("images"):
    #os.mkdir("images") 

pio.orca.config.use_xvfb = True

def make_plots(dfs, x_name, filter_query):    
    # https://colorbrewer2.org/#type=qualitative&scheme=Paired&n=9
    paired = ['#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6','#6a3d9a','#ffff99','#b15928']
    colors = {}
    for n, c in zip(dfs[0]["nice_name"].unique(), paired):
        colors[n] = c
    
    fig = make_subplots(rows=3, cols=len(dfs), subplot_titles=[str(df["dataset"].unique()[0]) for df in dfs], horizontal_spacing = 0.03, vertical_spacing = 0.05)
    
    for i, df in enumerate(dfs):
        dff = df.query(filter_query)
        
        if i == 0:
            showlegend = True
        else:
            showlegend = False

        for name in dff["nice_name"].unique():
            df_method = dff.loc[ dff["nice_name"] == name]
            
            fig.add_trace(go.Scatter(x = df_method[x_name], y = df_method["relative_performance"], name = name, showlegend = showlegend, marker=dict(color = colors[name])), row = 1, col = i+1)
            fig.add_trace(go.Scatter(x = df_method[x_name], y = df_method["runtime"], name = name, showlegend = False, marker=dict(color = colors[name])), row = 2, col = i+1)
            fig.add_trace(go.Scatter(x = df_method[x_name], y = df_method["max_memory"], name = name, showlegend = False, marker=dict(color = colors[name])), row = 3, col = i+1)

    fig.update_yaxes(title_text="Relative Performance", row=1, col=1, title_standoff = 25, title_font = {"size": 14})
    fig.update_yaxes(title_text="Runtime [s]", row=2, col=1, title_font = {"size": 14})
    fig.update_yaxes(title_text="Max. Memory", row=3, col=1, title_font = {"size": 14})

    fig.update_layout(
        legend=dict(orientation="h",yanchor="bottom",y=-0.1,xanchor="left",x=0.1),
        margin={'l': 5, 'r': 20, 't': 20, 'b': 5},
        height=900, width=1500
    )

    return fig

fig = make_plots(dfs, "K", "method != \"Greedy\" & (epsilon == 0.001 | epsilon != epsilon) & (T != T | T == 500 | T == 1000 | T == 2500 | T == 5000)")
for i, df in enumerate(dfs):
    #fig.update_yaxes(row=1, col=i+1, type="log")
    fig.update_yaxes(row=2, col=i+1, type="log")
    fig.update_yaxes(row=3, col=i+1, type="log")
    fig.update_yaxes(dtick = 10, row = 1, col = i + 1)

    fig.update_xaxes(dtick = 10, row = 1, col = i + 1)
    fig.update_xaxes(dtick = 10, row = 2, col = i + 1)
    fig.update_xaxes(dtick = 10, row = 3, col = i + 1)
    fig.update_xaxes(title_text="K", row=3, col=i + 1, title_font = {"size": 14})
fig.write_image("all_over_K.pdf")
fig.show()

fig = make_plots(dfs, "epsilon", "method != \"Greedy\" & K == 75 & method != \"Random\" & (T != T | T == 500 | T == 1000 | T == 2500 | T == 5000)")
for i, df in enumerate(dfs):
    fig.update_yaxes(row=2, col=i+1, type="log")
    fig.update_yaxes(row=3, col=i+1, type="log")
    fig.update_yaxes(dtick = 10, row = 1, col = i + 1)
    fig.update_xaxes(title_text="ε", row=3, col=i + 1, title_font = {"size": 14})
fig.write_image("all_over_eps.pdf")
fig.show()


In [134]:
dfFactHi = dfs[3].query("method == \"ThreeSieves\"")
dfFactLo = dfs[4].query("method == \"ThreeSieves\"")

# https://colorbrewer2.org/#type=qualitative&scheme=Paired&n=9
paired = ['#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6','#6a3d9a','#ffff99','#b15928']
colors = {}
for n, c in zip(dfFactHi["nice_name"].unique(), paired):
    colors[n] = c

fig = make_subplots(rows=2, cols=2, subplot_titles=["Fact Highlevel", "Fact Lowlevel"], horizontal_spacing = 0.03, vertical_spacing = 0.07)

epsilon = 0.001
K = 50
for name in dfFactHi["nice_name"].unique():
    tmp = dfFactHi.query("nice_name == \"{}\" & epsilon == {}".format(name, 0.01))
    fig.add_trace(go.Scatter(x = tmp["K"], y = tmp["relative_performance"], name = name, showlegend = True, marker=dict(color = colors[name])), row = 1, col = 1)

    tmp = dfFactHi.query("nice_name == \"{}\"& K == {}".format(name, K))
    fig.add_trace(go.Scatter(x = tmp["epsilon"], y = tmp["relative_performance"], name = name, showlegend = False, marker=dict(color = colors[name])), row = 2, col = 1)
   
for name in dfFactLo["nice_name"].unique():
    tmp = dfFactLo.query("nice_name == \"{}\" & epsilon == {}".format(name, 0.01))
    fig.add_trace(go.Scatter(x = tmp["K"], y = tmp["relative_performance"], name = name, showlegend = False, marker=dict(color = colors[name])), row = 1, col = 2)

    tmp = dfFactLo.query("nice_name == \"{}\"& K == {}".format(name, K))
    fig.add_trace(go.Scatter(x = tmp["epsilon"], y = tmp["relative_performance"], name = name, showlegend = False, marker=dict(color = colors[name])), row = 2, col = 2)

fig.update_yaxes(title_text="Relative Performance", row=1, col=1, title_standoff = 25, title_font = {"size": 14})
fig.update_yaxes(title_text="Relative Performance]", row=2, col=1, title_font = {"size": 14})
fig.update_xaxes(title_text="K", row=1, col=1, title_font = {"size": 14})
fig.update_xaxes(title_text="K", row=1, col=2, title_font = {"size": 14})
fig.update_xaxes(title_text="ε", row=2, col=1, title_font = {"size": 14})
fig.update_xaxes(title_text="ε", row=2, col=2, title_font = {"size": 14})

fig.update_layout(
    legend=dict(orientation="h",yanchor="bottom",y=-0.1,xanchor="left",x=0.1),
    margin={'l': 5, 'r': 20, 't': 20, 'b': 5},
    height=900, width=1500
)

fig.show()
fig.write_image("T_over_fact.pdf")

#fig = make_plots(, "epsilon", "K == 75 & method != \"Random\"")
#for i, df in enumerate(dfs):
#    fig.update_yaxes(row=2, col=i+1, type="log")
#    fig.update_yaxes(row=3, col=i+1, type="log")
#    fig.update_yaxes(dtick = 10, row = 1, col = i + 1)
#    fig.update_xaxes(title_text="ε", row=3, col=i + 1, title_font = {"size": 14})
