In [10]:
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
import json
from pandas.io.json import json_normalize 
from plotly.validators.scatter.marker import SymbolValidator

import re

def natural_sort(l): 
    convert = lambda text: int(text) if text.isdigit() else text.lower() 
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(l, key = alphanum_key)

def read_data(path):
    data = []
    with open(path, 'r', encoding='utf-8') as f:
        for line in f:
            data.append(json.loads(line))
    df = json_normalize(data)
    df = df.applymap(lambda x: x[0] if (isinstance(x, list) and len(x) == 1) else x)
    return df

def max_memory(row):
    if row["method"] in ["Greedy","ThreeSieves","Random"]:
        return row["K"]
    elif row["method"] in ["SieveStreaming", "Salsa"]:
        epsilon = row["epsilon"]
        K = row["K"]
        lower = 1 # in all experiments we used m = 1
        upper = lower * K

        ilower = np.ceil(np.log(lower) / np.log(1+epsilon))
        ts = []
        t = np.power(1.0 + epsilon, ilower)
        while (t < upper):
            ts.append(t)
            ilower += 1
            t = np.power(1.0 + epsilon, ilower)

        if row["method"] == "SieveStreaming":
            return K*len(ts)
        else:
            return 3*K*len(ts)
    else:
        # See https://arxiv.org/pdf/1905.00948.pdf page 6 for memory computation
        epsilon = row["epsilon"]
        K = row["K"]

        mem = np.ceil(K*np.log(2) / epsilon)
        upper = np.log(K) / np.log(1+epsilon)
        i = 0
        while(i < upper):
            mem += K/np.power(1+epsilon,i)
            i += 1
        return mem
        
def relative_performance(row):
    K = row["K"]
    sigma = row["sigma"]
    reference = df.loc[ (df["method"] == "Greedy") & (df["K"] == K) & (df["sigma"] == sigma) ]
    perf = row["scores.mean_fval"] / reference["scores.mean_fval"]

    return int(perf.values[0]*100)
    
def nice_name(row):
    if row["method"] in ["Greedy", "Random", "IndependentSetImprovement"]:
        return row["method"]
    elif row["method"] in ["SieveStreaming", "SieveStreaming++", "Salsa"]:
        #return "{} ε = {}".format(row["method"], row["epsilon"])
        return row["method"]
    else:
        #return "{} ε = {}, T = {}".format(row["method"], row["epsilon"], row["T"])
        return "{} T = {}".format(row["method"], int(row["T"]))

def make_plots(dfs, x_name, filter_queries, colors = None, markers= None):    
    # https://colorbrewer2.org/#type=qualitative&scheme=Paired&n=9
    paired = ['#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6','#6a3d9a','#ffff99','#b15928'] #
    #raw_markers = SymbolValidator().values
    raw_markers = ["circle", "square", "diamond", "cross", "x", "triangle-up", "triangle-down", "triangle-left", "triangle-right", "pentagon", "hexagon", "hexagon2", "star", "hourglass"]

    if colors is None:
        colors = {}
        for n, c in zip(natural_sort(dfs[0]["nice_name"].unique()), paired):
            colors[n] = c
    
    if markers is None:    
        markers = {}
        for n, m in zip(natural_sort(dfs[0]["nice_name"].unique()), raw_markers):
            markers[n] = m 
    
    #print(colors)

    fig = make_subplots(rows=3, cols=len(dfs), subplot_titles=[str(df["dataset"].unique()[0]) for df in dfs], horizontal_spacing = 0.03, vertical_spacing = 0.05)
    
    for i in fig['layout']['annotations']:
        i['font']['size'] = 20 

    for i in range(4):
        for j in range(len(dfs) + 1):
            fig.update_yaxes(row = i, col = j, title_font = {"size": 18}, linewidth = 2, tickfont = {"size":13})
            fig.update_xaxes(row = i, col = j, title_font = {"size": 18}, linewidth = 2, tickfont = {"size":13})

    for i, (df, query) in enumerate(zip(dfs, filter_queries)):
        dff = df.query(query)
        
        if i == 0:
            showlegend = True
        else:
            showlegend = False

        x = None
        for name in natural_sort(dff["nice_name"].unique()):
            df_method = dff.loc[ dff["nice_name"] == name]
            df_method = df_method.sort_values(by = [x_name])
            if x is None or len(x) < len(df_method[x_name].values):
                x = df_method[x_name].values

        for name in natural_sort(dff["nice_name"].unique()):
            df_method = dff.loc[ dff["nice_name"] == name]
            df_method = df_method.sort_values(by = [x_name])
            
            if len(df_method[x_name].values) == 1:
                x_method = x
                relative_performance = [df_method["scores.relative_performance"].values[0] for _ in x]
                mean_fit_time = [df_method["scores.mean_fit_time"].values[0] for _ in x]
                mean_num_elements_stored = [df_method["scores.mean_num_elements_stored"].values[0] for _ in x]

                fig.add_trace(go.Scatter(x = x_method, y = relative_performance, name = name, showlegend = showlegend, marker=dict(color = colors[name], size=7, symbol=markers[name]), mode='lines+markers', line = {"width":2}), row = 1, col = i+1)
                fig.add_trace(go.Scatter(x = x_method, y = mean_fit_time, name = name, showlegend = False, marker=dict(color = colors[name], size=7, symbol=markers[name]), mode='lines+markers', line = {"width":2}), row = 2, col = i+1)
                fig.add_trace(go.Scatter(x = x_method, y = mean_num_elements_stored, name = name, showlegend = False, marker=dict(color = colors[name], size=7, symbol=markers[name]), mode='lines+markers', line = {"width":2}), row = 3, col = i+1)
            else:
                fig.add_trace(go.Scatter(x = df_method[x_name], y = df_method["scores.relative_performance"], name = name, showlegend = showlegend, marker=dict(color = colors[name], size=7, symbol=markers[name]), mode='lines+markers', line = {"width":2}), row = 1, col = i+1)
                fig.add_trace(go.Scatter(x = df_method[x_name], y = df_method["scores.mean_fit_time"], name = name, showlegend = False, marker=dict(color = colors[name], size=7, symbol=markers[name]), mode='lines+markers', line = {"width":2}), row = 2, col = i+1)
                fig.add_trace(go.Scatter(x = df_method[x_name], y = df_method["scores.mean_num_elements_stored"], name = name, showlegend = False, marker=dict(color = colors[name], size=7, symbol=markers[name]), mode='lines+markers', line = {"width":2}), row = 3, col = i+1)

    fig.update_yaxes(title_text="Relative Performance", row=1, col=1, title_standoff = 25)
    fig.update_yaxes(title_text="Runtime [s]", row=2, col=1)
    fig.update_yaxes(title_text="Num. elements stored", row=3, col=1)

    #fig.update_layout(
    #    template="simple_white",
    #    legend=dict(orientation="h",yanchor="bottom",y=-0.1,xanchor="left",x=0.00),
    #    margin={'l': 5, 'r': 20, 't': 20, 'b': 5},
    #    height=900, width=1500
    #)

    return fig

pio.orca.config.use_xvfb = True


In [15]:
import numpy as np
import pandas as pd
import os
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio

pio.orca.config.executable = "/home/buschjae/.conda/envs/pyssm/bin/orca"
datasets = ["forestcover", "kddcup99", "creditfraud", "fact-highlevel", "fact-lowlevel"] 

dfs = []
for d in datasets:
    df = read_data(os.path.join("experiments", d, "results", "results.jsonl"))
    #df = pd.read_csv(os.path.join("experiments",d,"results.csv"))
    #print(df)
    df["nice_name"] = df.apply(nice_name,axis=1)
    df["scores.relative_performance"] = df.apply(relative_performance,axis=1)
    df["scores.max_memory"] = df.apply(max_memory,axis=1)
    if d == "forestcover":
        df["dataset"] = "Forestcover"
    elif d == "kddcup99":
        df["dataset"] = "KDDCup99"
    elif d == "creditfraud":
        df["dataset"] = "Creditfraud"
    elif d == "fact-highlevel":
        df["dataset"] = "FACT Highlevel"
    elif d == "fact-lowlevel":
        df["dataset"] = "FACT Lowlevel"
    else:
        df["dataset"] = d
    dfs.append(df)

for eps in [0.001, 0.01, 0.05, 0.1]:
    queries = [
        "(epsilon == {} | epsilon != epsilon) & (T != T | T == 500 | T == 1000 | T == 2500 | T == 5000)".format(eps) for _ in datasets
    ]
    fig = make_plots(dfs, "K", queries)
    for i, df in enumerate(dfs):
        #fig.update_yaxes(row=1, col=i+1, type="log")
        fig.update_yaxes(row=2, col=i+1, type="log")
        fig.update_yaxes(row=3, col=i+1, type="log")
        fig.update_yaxes(dtick = 10, row = 1, col = i + 1)

        fig.update_xaxes(dtick = 10, row = 1, col = i + 1)
        fig.update_xaxes(dtick = 10, row = 2, col = i + 1)
        fig.update_xaxes(dtick = 10, row = 3, col = i + 1)
        fig.update_xaxes(title_text="K", row=3, col=i + 1)

    fig.update_layout(
        template="simple_white",
        legend=dict(orientation="h",yanchor="bottom",y=-0.15,xanchor="left",x=0.07,font={"size": 15}),
        margin={'l': 5, 'r': 20, 't': 25, 'b': 5},
        height=900, width=1500
    )
    fig.write_image("all_over_K_eps_{}.pdf".format(eps))
    fig.show()

#Compute graphs for a "fair" selection of hyperparameters. 
queries = [
    "(method == \"Greedy\") | (method == \"IndependentSetImprovement\") | (method == \"Random\") | (method == \"ThreeSieves\" & (T == 1000 | T == 2500) & epsilon == 0.001) | ((method == \"Salsa\" | method == \"SieveStreaming\" | method == \"SieveStreaming++\") & (epsilon == 0.1) )" for _ in datasets
]
fig = make_plots(dfs, "K", queries)
for i, df in enumerate(dfs):
    #fig.update_yaxes(row=1, col=i+1, type="log")
    fig.update_yaxes(row=2, col=i+1, type="log")
    fig.update_yaxes(row=3, col=i+1, type="log")
    fig.update_yaxes(dtick = 10, row = 1, col = i + 1)

    fig.update_xaxes(dtick = 10, row = 1, col = i + 1)
    fig.update_xaxes(dtick = 10, row = 2, col = i + 1)
    fig.update_xaxes(dtick = 10, row = 3, col = i + 1)
    fig.update_xaxes(title_text="K", row=3, col=i + 1)

fig.update_layout(
    template="simple_white",
    legend=dict(orientation="h",yanchor="bottom",y=-0.15,xanchor="left",x=0.07,font={"size": 15}),
    margin={'l': 5, 'r': 20, 't': 25, 'b': 5},
    height=900, width=1500
)
fig.write_image("fair_selection.pdf")
fig.show()

for K in [25, 50, 75, 100]:
    queries = [
        "K == {} & (T != T | T == 500 | T == 1000 | T == 2500 | T == 5000)".format(K) for _ in datasets
        #"K == {} & method != \"IndependentSetImprovement\" & method != \"Greedy\" & method != \"Random\" & (T != T | T == 500 | T == 1000 | T == 2500 | T == 5000)".format(K) for _ in datasets
    ]
    fig = make_plots(dfs, "epsilon", queries)
    for i, df in enumerate(dfs):
        fig.update_yaxes(row=2, col=i+1, type="log")
        fig.update_yaxes(row=3, col=i+1, type="log")
        fig.update_yaxes(dtick = 10, row = 1, col = i + 1)
        fig.update_xaxes(title_text="ε", row=3, col=i + 1)

    fig.update_layout(
        template="simple_white",
        legend=dict(orientation="h",yanchor="bottom",y=-0.15,xanchor="left",x=0.07,font={"size": 15}),
        margin={'l': 5, 'r': 20, 't': 25, 'b': 5},
        height=900, width=1500
    )
    fig.write_image("all_over_eps_K_{}.pdf".format(K))
    fig.show()

# paired = ['#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6','#6a3d9a','#ffff99','#b15928'] #
# raw_markers = ["circle", "square", "diamond", "cross", "x", "triangle-up", "triangle-down", "triangle-left", "triangle-right", "pentagon", "hexagon", "hexagon2", "star", "hourglass"]

# colors = {}
# markers = {}
# for n, c, m in zip(natural_sort(dfs[0]["nice_name"].unique()), paired, raw_markers):
#     colors[n] = c
#     markers[n] = m 

# dfFactHi = dfs[3].query("method == \"ThreeSieves\"")
# dfFactLo = dfs[4].query("method == \"ThreeSieves\"")

# fig = make_subplots(rows=2, cols=2, subplot_titles=["Fact Highlevel", "Fact Lowlevel"], horizontal_spacing = 0.03, vertical_spacing = 0.07)
# for i in fig['layout']['annotations']:
#     i['font']['size'] = 20
# for i in range(3):
#     for j in range(3):
#         fig.update_yaxes(row = i, col = j, title_font = {"size": 20}, linewidth = 2, tickfont = {"size":13})
#         fig.update_xaxes(row = i, col = j, title_font = {"size": 20}, linewidth = 2, tickfont = {"size":13})

# epsilon = 0.001
# K = 50
# for name in natural_sort(dfFactHi["nice_name"].unique()):
#     tmp = dfFactHi.query("nice_name == \"{}\" & epsilon == {}".format(name, 0.01))
#     tmp = tmp.sort_values(by = ["K"])
#     fig.add_trace(
#         go.Scatter(x = tmp["K"], y = tmp["scores.relative_performance"], name = name, mode='lines+markers', showlegend = True, 
#             marker=dict(color = colors[name], size=7, symbol=markers[name]), line = {"width":2}
#         ), row = 1, col = 1
#     )

#     tmp = dfFactHi.query("nice_name == \"{}\"& K == {}".format(name, K))
#     tmp = tmp.sort_values(by = ["epsilon"])
#     fig.add_trace(
#         go.Scatter(x = tmp["epsilon"], y = tmp["scores.relative_performance"], name = name, mode='lines+markers', showlegend = False, 
#             marker=dict(color = colors[name], size=7, symbol=markers[name]), line = {"width":2}
#         ), row = 2, col = 1
#     )
   
# for name in natural_sort(dfFactLo["nice_name"].unique()):
#     tmp = dfFactLo.query("nice_name == \"{}\" & epsilon == {}".format(name, 0.01))
#     tmp = tmp.sort_values(by = ["K"])
#     fig.add_trace(go.Scatter(x = tmp["K"], y = tmp["scores.relative_performance"], name = name, mode='lines+markers', line = {"width":2}, showlegend = False, marker=dict(color = colors[name], size=7, symbol=markers[name])), row = 1, col = 2)

#     tmp = dfFactLo.query("nice_name == \"{}\"& K == {}".format(name, K))
#     tmp = tmp.sort_values(by = ["epsilon"])
#     fig.add_trace(go.Scatter(x = tmp["epsilon"], y = tmp["scores.relative_performance"], name = name, mode='lines+markers', line = {"width":2}, showlegend = False, marker=dict(color = colors[name], size=7, symbol=markers[name])), row = 2, col = 2)

# fig.update_yaxes(title_text="Relative Performance", row=1, col=1)
# fig.update_yaxes(title_text="Relative Performance", row=2, col=1)
# fig.update_xaxes(dtick = 10, title_text="K", row=1, col=2)
# fig.update_xaxes(title_text="ε", row=2, col=2)
# fig.update_xaxes(dtick = 10, title_text="K", row=1, col=1)
# fig.update_xaxes(title_text="ε", row=2, col=1)

# fig.update_layout(
#     template="simple_white",
#     legend=dict(orientation="h",yanchor="bottom",y=-0.12,xanchor="left",x=0.12,font={"size": 15}),
#     margin={'l': 5, 'r': 20, 't': 25, 'b': 5},
#     height=900, width=1100
# )

# fig.show()
# fig.write_image("T_over_fact.pdf")




pandas.io.json.json_normalize is deprecated, use pandas.json_normalize instead


pandas.io.json.json_normalize is deprecated, use pandas.json_normalize instead


pandas.io.json.json_normalize is deprecated, use pandas.json_normalize instead


pandas.io.json.json_normalize is deprecated, use pandas.json_normalize instead


pandas.io.json.json_normalize is deprecated, use pandas.json_normalize instead



In [8]:
import numpy as np
import pandas as pd
import os
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio

pio.orca.config.executable = "/home/buschjae/.conda/envs/pyssm/bin/orca"
datasets = ["forestcover", "creditfraud"] 

dfs = []
for d in datasets:
    df = read_data(os.path.join("experiments", d, "results", "results.jsonl"))
    #df = pd.read_csv(os.path.join("experiments",d,"results.csv"))

    df["nice_name"] = df.apply(nice_name,axis=1)
    df["scores.relative_performance"] = df.apply(relative_performance,axis=1)
    df["scores.max_memory"] = df.apply(max_memory,axis=1)
    if d == "forestcover":
        df["dataset"] = "Forestcover"
    elif d == "kddcup99":
        df["dataset"] = "KDDCup99"
    elif d == "creditfraud":
        df["dataset"] = "Creditfraud"
    elif d == "fact-highlevel":
        df["dataset"] = "FACT Highlevel"
    elif d == "fact-lowlevel":
        df["dataset"] = "FACT Lowlevel"
    else:
        df["dataset"] = d
    dfs.append(df)

queries = [
    "(epsilon == 0.001 | epsilon != epsilon) & (T != T | T == 500 | T == 1000 | T == 2500 | T == 5000)" for _ in datasets
]
fig = make_plots(dfs, "K", queries)
for i, df in enumerate(dfs):
    #fig.update_yaxes(row=1, col=i+1, type="log")
    fig.update_yaxes(row=2, col=i+1, type="log")
    fig.update_yaxes(row=3, col=i+1, type="log")
    fig.update_yaxes(dtick = 10, row = 1, col = i + 1)

    fig.update_xaxes(dtick = 10, row = 1, col = i + 1)
    fig.update_xaxes(dtick = 10, row = 2, col = i + 1)
    fig.update_xaxes(dtick = 10, row = 3, col = i + 1)
    fig.update_xaxes(title_text="K", row=3, col=i + 1, title_font = {"size": 16})

fig.update_layout(
    template="simple_white",
    legend=dict(orientation="h",yanchor="bottom",y=-0.19,xanchor="left",x=0.05),
    margin={'l': 5, 'r': 5, 't': 20, 'b': 5},
    height=700, width=900
)
fig.write_image("all_over_K.pdf")
fig.show()

queries = [
    "K == 50 & method != \"IndependentSetImprovement\" & method != \"Greedy\" & method != \"Random\" & (T != T | T == 500 | T == 1000 | T == 2500 | T == 5000)" for _ in datasets
]
fig = make_plots(dfs, "epsilon", queries)
for i, df in enumerate(dfs):
    fig.update_yaxes(row=2, col=i+1, type="log")
    fig.update_yaxes(row=3, col=i+1, type="log")
    fig.update_yaxes(dtick = 10, row = 1, col = i + 1)
    fig.update_xaxes(title_text="ε", row=3, col=i + 1, title_font = {"size": 16})

fig.update_layout(
    template="simple_white",
    legend=dict(orientation="h",yanchor="bottom",y=-0.19,xanchor="left",x=0.0),
    margin={'l': 5, 'r': 5, 't': 20, 'b': 5},
    height=700, width=900
)
fig.write_image("all_over_eps.pdf")
fig.show()


#queries = [
#    "K == 50 & method != \"IndependentSetImprovement\" & method != \"Greedy\" & method != \"Random\" & (T != T | T == 500 | T == 1000 | T == 2500 | T == 5000)" for _ in datasets
#]
#fig = make_plots(dfs, "epsilon", queries)
#for i, df in enumerate(dfs):
#    fig.update_yaxes(row=2, col=i+1, type="log")
#    fig.update_yaxes(row=3, col=i+1, type="log")
#    fig.update_yaxes(dtick = 10, row = 1, col = i + 1)
#    fig.update_xaxes(title_text="ε", row=3, col=i + 1, title_font = {"size": 16})

#fig.update_layout(
#    template="simple_white",
#    legend=dict(orientation="h",yanchor="bottom",y=-0.19,xanchor="left",x=0.05),
#    margin={'l': 5, 'r': 5, 't': 20, 'b': 5},
#    height=700, width=900
#)
#fig.write_image("all_over_eps.pdf")
#fig.show()

In [41]:
import numpy as np
import pandas as pd
import os
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio

# Take the same colors / markers from the previous plots
# Comment-out this code if there are no previous experiments
#colors = {}
#markers = {}
#for n, c, m in zip(natural_sort(dfs[0]["nice_name"].unique()), paired, raw_markers):
#    colors[n] = c
#    markers[n] = m 

datasets = ["stream51", "abc", "examiner"] 
dfs = []

for d in datasets:
    df = read_data(os.path.join("experiments", d, "results", "results.jsonl"))
    df["nice_name"] = df.apply(nice_name,axis=1)
    df["scores.relative_performance"] = df.apply(relative_performance,axis=1)
    df["scores.max_memory"] = df.apply(max_memory,axis=1)
    df["dataset"] = d
    dfs.append(df)

queries = [
    "(method != \"Greedy\") &(sigma == {}) & (epsilon == 0.1 | epsilon != epsilon) & (T != T | T == 500 | T == 1000 | T == 2500 | T == 5000)".format(dfs[0]["sigma"].unique()[4]),
    "(method != \"Greedy\") &(sigma == {}) & (epsilon == 0.1 | epsilon != epsilon) & (T != T | T == 500 | T == 1000 | T == 2500 | T == 5000)".format(dfs[1]["sigma"].unique()[0]),
    "(method != \"Greedy\") &(sigma == {}) & (epsilon == 0.1 | epsilon != epsilon) & (T != T | T == 500 | T == 1000 | T == 2500 | T == 5000)".format(dfs[2]["sigma"].unique()[3])
]

fig = make_plots(dfs, "K", queries, colors, markers)
for i, df in enumerate(dfs):
    #fig.update_yaxes(row=1, col=i+1, type="log")
    fig.update_yaxes(row=2, col=i+1, type="log")
    fig.update_yaxes(row=3, col=i+1, type="log")
    fig.update_yaxes(dtick = 10, row = 1, col = i + 1)

    fig.update_xaxes(dtick = 10, row = 1, col = i + 1)
    fig.update_xaxes(dtick = 10, row = 2, col = i + 1)
    fig.update_xaxes(dtick = 10, row = 3, col = i + 1)
    fig.update_xaxes(title_text="K", row=3, col=i + 1)

fig.update_layout(
    template="simple_white",
    legend=dict(orientation="h",yanchor="bottom",y=-0.1,xanchor="left",x=0.08),
    margin={'l': 5, 'r': 20, 't': 25, 'b': 5},
    height=900, width=1500
)
fig.write_image("stream_all_over_K_0.1.pdf")
fig.show()

queries = [
    "(method != \"Greedy\") & (sigma == {}) & (epsilon == 0.01 | epsilon != epsilon) & (T != T | T == 500 | T == 1000 | T == 2500 | T == 5000)".format(dfs[0]["sigma"].unique()[4]),
    "(method != \"Greedy\") &(sigma == {}) & (epsilon == 0.01 | epsilon != epsilon) & (T != T | T == 500 | T == 1000 | T == 2500 | T == 5000)".format(dfs[1]["sigma"].unique()[0]),
    "(method != \"Greedy\") &(sigma == {}) & (epsilon == 0.01 | epsilon != epsilon) & (T != T | T == 500 | T == 1000 | T == 2500 | T == 5000)".format(dfs[2]["sigma"].unique()[3])
]
fig = make_plots(dfs, "K", queries, colors, markers)
for i, df in enumerate(dfs):
    #fig.update_yaxes(row=1, col=i+1, type="log")
    fig.update_yaxes(row=2, col=i+1, type="log")
    fig.update_yaxes(row=3, col=i+1, type="log")
    fig.update_yaxes(dtick = 10, row = 1, col = i + 1)

    fig.update_xaxes(dtick = 10, row = 1, col = i + 1)
    fig.update_xaxes(dtick = 10, row = 2, col = i + 1)
    fig.update_xaxes(dtick = 10, row = 3, col = i + 1)
    fig.update_xaxes(title_text="K", row=3, col=i + 1)

fig.update_layout(
    template="simple_white",
    legend=dict(orientation="h",yanchor="bottom",y=-0.1,xanchor="left",x=0.08),
    margin={'l': 5, 'r': 20, 't': 25, 'b': 5},
    height=900, width=1500
)
fig.write_image("stream_all_over_K_0.01.pdf")
fig.show()

In [51]:
import numpy as np
import pandas as pd
import os
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio

# Take the same colors / markers from the previous plots
# Comment-out this code if there are no previous experiments
#colors = {}
#markers = {}
#for n, c, m in zip(natural_sort(dfs[0]["nice_name"].unique()), paired, raw_markers):
#    colors[n] = c
#    markers[n] = m 

datasets = ["stream51", "abc", "examiner"] 
dfs = []

for d in datasets:
    df = read_data(os.path.join("experiments", d, "results", "results.jsonl"))
    df["nice_name"] = df.apply(nice_name,axis=1)
    df["scores.relative_performance"] = df.apply(relative_performance,axis=1)
    df["scores.max_memory"] = df.apply(max_memory,axis=1)
    df["dataset"] = d
    dfs.append(df)

fig = make_subplots(rows=2, cols=3, subplot_titles=datasets, horizontal_spacing = 0.03, vertical_spacing = 0.07)
for i in fig['layout']['annotations']:
    i['font']['size'] = 20 

for i in range(3):
    for j in range(4):
        fig.update_yaxes(row = i, col = j, title_font = {"size": 18}, linewidth = 2, tickfont = {"size":13})
        fig.update_xaxes(row = i, col = j, title_font = {"size": 18}, linewidth = 2, tickfont = {"size":13})

queries = [
    "(method != \"Greedy\") &(sigma == {}) & (epsilon == 0.1 | epsilon != epsilon) & (T != T | T == 500 | T == 1000 | T == 2500 | T == 5000)".format(dfs[0]["sigma"].unique()[4]),
    "(method != \"Greedy\") &(sigma == {}) & (epsilon == 0.1 | epsilon != epsilon) & (T != T | T == 500 | T == 1000 | T == 2500 | T == 5000)".format(dfs[1]["sigma"].unique()[0]),
    "(method != \"Greedy\") &(sigma == {}) & (epsilon == 0.1 | epsilon != epsilon) & (T != T | T == 500 | T == 1000 | T == 2500 | T == 5000)".format(dfs[2]["sigma"].unique()[3])
]

for i, df in enumerate(dfs):
    dff = df.query(queries[i])
    for name in natural_sort(dff["nice_name"].unique()):
        df_method = dff.loc[ dff["nice_name"] == name]
        df_method = df_method.sort_values(by = ["K"])

        fig.add_trace(
            go.Scatter(x = df_method["K"], y = df_method["scores.relative_performance"], name = name, line = {"width":2}, mode='lines+markers', showlegend = i == 0, 
                marker=dict(color = colors[name], size=7, symbol=markers[name])
            ), row = 1, col = i + 1
        )

queries = [
    "(method != \"Greedy\") &(sigma == {}) & (epsilon == 0.01 | epsilon != epsilon) & (T != T | T == 500 | T == 1000 | T == 2500 | T == 5000)".format(dfs[0]["sigma"].unique()[4]),
    "(method != \"Greedy\") &(sigma == {}) & (epsilon == 0.01 | epsilon != epsilon) & (T != T | T == 500 | T == 1000 | T == 2500 | T == 5000)".format(dfs[1]["sigma"].unique()[0]),
    "(method != \"Greedy\") &(sigma == {}) & (epsilon == 0.01 | epsilon != epsilon) & (T != T | T == 500 | T == 1000 | T == 2500 | T == 5000)".format(dfs[2]["sigma"].unique()[3])
]

for i, df in enumerate(dfs):
    dff = df.query(queries[i])
    for name in natural_sort(dff["nice_name"].unique()):
        df_method = dff.loc[ dff["nice_name"] == name]
        df_method = df_method.sort_values(by = ["K"])
    
        fig.add_trace(
            go.Scatter(x = df_method["K"], y = df_method["scores.relative_performance"], name = name, line = {"width":2}, mode='lines+markers', showlegend = False, 
                marker=dict(color = colors[name], size=7, symbol=markers[name])
            ), row = 2, col = i + 1
        )

fig.update_yaxes(title_text="Relative Performance ε = 0.1", row=1, col=1, title_standoff = 25)
fig.update_yaxes(title_text="Relative Performance ε = 0.01", row=2, col=1, title_standoff = 25)
fig.update_xaxes(dtick = 10, row=1, col=1)
fig.update_xaxes(dtick = 10, row=1, col=2)
fig.update_xaxes(dtick = 10, row=1, col=3)

fig.update_xaxes(dtick = 10, title_text="K", row=2, col=1)
fig.update_xaxes(dtick = 10, title_text="K", row=2, col=2)
fig.update_xaxes(dtick = 10, title_text="K", row=2, col=3)

fig.update_layout(
    template="simple_white",
    legend=dict(orientation="h",yanchor="bottom",y=-0.15,xanchor="left",x=0.03,font={"size": 15}),
    margin={'l': 5, 'r': 20, 't': 25, 'b': 5},
    height=900, width=1100
)
fig.write_image("stream_all_over_K.pdf")
fig.show()
