In [597]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import numpy as np
pd.set_option('display.max_rows', 1000)
%matplotlib inline

In [844]:
# Algorithms and Experiments defitions
algorithms = {
    'AverageKDTree': {
        'name': 'average_kd_tree',
        'color': 'red',
        'dash': 'dash',
        'show_name': 'AvgKD',
        'type': 'full_index',
        'alg_id': 'average_kd_tree-0.0-1024'
    },
    'MedianKDTree': {
        'name': 'median_kd_tree',
        'color': 'red',
        'dash': 'solid',
        'show_name': 'MedKD',
        'type': 'full_index',
        'alg_id': 'median_kd_tree-0.0-1024'
    },
    'CrackingKDTree': {
        'name': 'cracking_kd_tree',
        'color': 'blue',
        'dash': 'dash',
        'show_name': 'AKD',
        'type': 'adaptive',
        'alg_id': 'cracking_kd_tree-0.0-1024'
    },
    'Quasii': {
        'name': 'quasii',
        'color': 'blue',
        'dash': 'solid',
        'show_name': 'Q',
        'type': 'adaptive',
        'alg_id': 'quasii-0.0-1024'
    },
    'FullScan': {
        'name': 'full_scan_cl',
        'color': 'black',
        'dash': 'dot',
        'show_name': 'FS',
        'type': 'full_index',
        'partition': '0',
        'alg_id': 'full_scan_cl-0.0-0'
    },
    'ProgressiveIndexCostModel': {
        'name': 'progressive_index_cm',
        'color': 'purple',
        'dash': 'dash',
        'show_name': 'APKD',
        'type': 'adaptive',
        'delta': '0.2',
        'alg_id': 'progressive_index_cm-0.2-1024'
    },
    'ProgressiveIndex': {
        'name': 'progressive_index',
        'color': 'purple',
        'dash': 'solid',
        'show_name': 'PKD',
        'type': 'adaptive',
        'delta': '0.2',
        'alg_id': 'progressive_index-0.2-1024'
    }
}

deltas = [0.1, 0.2, 0.4, 0.8, 1]

for i in deltas:
    # For the delta experiment
    temp = {
        f'ProgressiveIndexCostModel_{i}': {
            'name': 'progressive_index_cm',
            'color': '#7C1A1F',
            'marker': 'x',
            'show_name': f'APKD({i})',
            'type': 'adaptive',
            'delta': f'{i}',
            'alg_id': f'progressive_index_cm-{i}-1024'
        },
        f'ProgressiveIndex_{i}': {
            'name': 'progressive_index',
            'color': '#A0393E',
            'marker': 'x-open',
            'show_name': f'PKD({i})',
            'type': 'adaptive',
            'delta': f'{i}',
            'alg_id': f'progressive_index-{i}-1024'
        }
    }
    algorithms = {**algorithms, **temp}

# Synthetic Experiments
synthetic_experiments = {}

for i in [2, 4, 8]:
    temp = {
        f'Uniform{i}': {
            "name": f"Unif({i})",
            'n_rows': '10000000',
            'n_queries': '3000',
            'n_cols': f'{i}',
            'sel': '0.01',
            'base_folder': 'synthetic_workloads/results',
            'exp_id': f'uniform-30000000-1000-{i}-0.01'
        },
        f'Skewed{i}': {
            "name": f"Skewed({i})",
            'n_rows': '10000000',
            'n_queries': '3000',
            'n_cols': f'{i}',
            'sel': '0.01',
            'base_folder': 'synthetic_workloads/results',
            'exp_id': f'skewed-30000000-1000-{i}-0.01'
        },
        f'Sequential{i}': {
            "name": f"Seq ({i})",
            'n_rows': '10000000',
            'n_queries': '3000',
            'n_cols': f'{i}',
            'sel': '0.01',
            'base_folder': 'synthetic_workloads/results',
            'exp_id': f'sequential-30000000-1000-{i}-0.01'
        },
        f'Periodic{i}': {
            "name": f"Prdc({i})",
            'n_rows': '10000000',
            'n_queries': '3000',
            'n_cols': f'{i}',
            'sel': '0.01',
            'base_folder': 'synthetic_workloads/results',
            'exp_id': f'periodic-30000000-1000-{i}-0.01'
        },
        f'ZoomIn{i}': {
            "name": f"Zoom({i})",
            'n_rows': '10000000',
            'n_queries': '3000',
            'n_cols': f'{i}',
            'sel': '0.01',
            'base_folder': 'synthetic_workloads/results',
            'exp_id': f'zoom_in-30000000-1000-{i}-0.01'
        },
        f'SequentialZoomIn{i}': {
            "name": f"SeqZoom({i})",
            'n_rows': '10000000',
            'n_queries': '3000',
            'n_cols': f'{i}',
            'sel': '0.01',
            'base_folder': 'synthetic_workloads/results',
            'exp_id': f'sequential_zoom_in-30000000-1000-{i}-0.01'
        },
        f'AlternatingZoomIn{i}': {
            "name": f"AltZoom({i})",
            'n_rows': '10000000',
            'n_queries': '3000',
            'n_cols': f'{i}',
            'sel': '0.01',
            'base_folder': 'synthetic_workloads/results',
            'exp_id': f'alternating_zoom_in-30000000-1000-{i}-0.01'
        },
        f'Shifting{i}': {
            "name": f"Shift({i})",
            'n_cols': f'{i}'
        }
    }
    
    synthetic_experiments = {**synthetic_experiments, **temp}

# Real World Experiments
real_world_experiments = {
    'Power': {
        'name': 'Power',
        'n_rows': '10000000',
        'n_queries': '3000',
        'n_cols': '',
        'sel': '0.0',
        'base_folder': 'real-data-workload/results',
        'exp_id': 'power-10000000-3000-0.0'
    },
    'Skyserver': {
        'name': 'Skyserver',
        'n_rows': '10000000',
        'n_queries': '3000',
        'n_cols': '',
        'sel': '0.0',
        'base_folder': 'real-data-workload/results',
        'exp_id': 'skyserver-0-0-0.0'
    },
    'Genomics Mixed': {
        'name': f'Genomics',
        'n_rows': '10000000',
        'n_queries': '3000',
        'n_cols': '',
        'sel': '0.0',
        'base_folder': 'real-data-workload/results',
        'exp_id': f'genomics_query_{8}-10000000-3000-0.0'
    }
}

for i in range(8):
    real_world_experiments[f'Genomics{i}'] = {
        'name': f'genomics{i}',
        'n_rows': '10000000',
        'n_queries': '3000',
        'n_cols': '',
        'sel': '0.0',
        'base_folder': 'real-data-workload/results',
        'exp_id': f'genomics_query_{i}-10000000-3000-0.0'
    }

experiments = {**synthetic_experiments, **real_world_experiments}

In [845]:
# Input/Output
def read(alg, exp):
    if exp.startswith('Shifting'):
        n_queries_per_run = 10
        uni = read(alg, 'Uniform' + experiments[exp]['n_cols'])
        n_runs = int(len(uni)/n_queries_per_run) - 1
        df_final = uni.head(n_queries_per_run)
        for _ in range(int(n_runs)):
            temp = uni.head(n_queries_per_run).copy()
            df_final = df_final.append(temp, ignore_index=True)
    else:
        df = pd.read_csv(f"{experiments[exp]['base_folder']}/{algorithms[alg]['alg_id']}-{experiments[exp]['exp_id']}.csv")
        repetitions = df['repetition'].max() + 1
        step = int(len(df.index)/repetitions)
        df_final = df[:step].copy().reset_index()
        for rep in range(1, repetitions):
            df_final += df[step * (rep) : step * (rep + 1)].copy().reset_index()

        df_final = df_final/repetitions
    
    if 'index_search_time' not in df_final:
        df_final['index_search_time'] = 0.0
    if 'tuples_scanned' not in df_final:
        df_final['tuples_scanned'] = 0.0
    if 'number_of_nodes' not in df_final:
        df_final['number_of_nodes'] = 0.0
    df_final['query_time'] = df_final['initialization_time'] + df_final['index_search_time'] + df_final['scan_time'] + df_final['adaptation_time']
    df_final['query_time_cumsum'] = df_final['query_time'].cumsum()
    return df_final

                     
def read_multiple(algs, exp):
    ''' Reads multiple algorithms in an experiment, return three arrays: dfs, colors, names
    '''
    dfs = []
    colors = []
    names = []
    dashes = []
    for alg in algs:
        dfs.append(read(alg, exp))
        names.append(algorithms[alg]['show_name'])
        colors.append(algorithms[alg]['color'])
        dashes.append(algorithms[alg]['dash'])
    
    return dfs, colors, dashes, names,

                     
def save_figure(fig, fig_name):
    fig.write_image(f"figures/{fig_name}")
                     
def save_table(table, table_name):
    with open(f"tables/{table_name}", 'w') as f:
        f.write(table)

In [846]:
# Helper methods
def get_first_query(df):
    return df['query_time'].iloc[0]

def get_payoff(df, baseline):
    p = [i for i, x in enumerate(df['query_time_cumsum'] - (baseline['query_time_cumsum']*0.95)) if x < 0]
    if len(p) == 0:
        return len(df)
    return p[0]

def get_convergence(df, df_type=''):
    if df_type == 'full_index':
        return 0
    c = [i for i, x in enumerate(df['adaptation_time']) if x != 0.0]
    if(len(c) == 0):
        return len(c)
    else:
        return c[-1]

def get_robustness(df, df_type=''):
    if df_type == 'full_index':
        return 0
    return np.var(df['query_time'][:min(50, get_convergence(df, df_type))])

def get_total_time(df):
    return df['query_time'].sum()

In [847]:
# Figures

def create_figure():
    return go.Figure(
        layout=go.Layout(
#             width=1500,
            plot_bgcolor='rgba(0,0,0,0)',
            font=dict(
                size=22
            ),
            yaxis=dict(
                showline=True,
                linewidth=2,
                linecolor='black'
            ),
            xaxis=dict(
                showline=True,
                linewidth=2,
                linecolor='black'
            ),
            bargap=0,
            showlegend=False
#             legend_orientation="h",
#             legend=dict(x=0.08, y=-.05)
        )
    )

def delta_exp_first_query():
    fig = create_figure()

    cols = [2, 4, 8]

    lines = []

    for i in cols:
        fq_times = []
        for d in deltas:
            fq_times.append(get_first_query(read(f'ProgressiveIndex_{d}', f'Uniform{i}')))
        lines.append(
            go.Scatter(
                name=f'{i} cols',
                x=deltas,
                y=fq_times,
                mode='lines'
            )
        )

    fig.add_traces(data=lines)
    fig.update_layout(showlegend=True, yaxis_title='First Query (seconds)', xaxis_title='Delta')
    return fig

def delta_exp_pay_off():
    fig = create_figure()

    cols = [2, 4, 8]

    lines = []

    for i in cols:
        fq_times = []
        for d in deltas:
            fq_times.append(get_payoff(read(f'ProgressiveIndex_{d}', f'Uniform{i}'), read('FullScan', f'Uniform{i}')))
        lines.append(
            go.Scatter(
                name=f'{i} cols',
                x=deltas,
                y=fq_times,
                mode='lines'
            )
        )

    fig.add_traces(data=lines)
    fig.update_layout(showlegend=True, yaxis_title='Payoff', xaxis_title='Delta')
    return fig

def delta_exp_convergence():
    fig = create_figure()

    cols = [2, 4, 8]

    lines = []

    for i in cols:
        fq_times = []
        for d in deltas:
            fq_times.append(get_convergence(read(f'ProgressiveIndex_{d}', f'Uniform{i}')))
        lines.append(
            go.Scatter(
                name=f'{i} cols',
                x=deltas,
                y=fq_times,
                mode='lines'
            )
        )

    fig.add_traces(data=lines)
    fig.update_layout(showlegend=True, yaxis_title='Convergence', xaxis_title='Delta')
    return fig

def delta_exp_total_time():
    fig = create_figure()

    cols = [2, 4, 8]

    lines = []

    for i in cols:
        fq_times = []
        for d in deltas:
            fq_times.append(get_total_time(read(f'ProgressiveIndex_{d}', f'Uniform{i}')))
        lines.append(
            go.Scatter(
                name=f'{i} cols',
                x=deltas,
                y=fq_times,
                mode='lines'
            )
        )

    fig.add_traces(data=lines)
    fig.update_layout(showlegend=True, yaxis_title='Total Response Time', xaxis_title='Delta')
    return fig

def delta_exp_robustness():
    fig = create_figure()

    cols = [2, 4, 8]

    lines = []

    for i in cols:
        fq_times = []
        for d in deltas:
            fq_times.append(get_robustness(read(f'ProgressiveIndex_{d}', f'Uniform{i}')))
        lines.append(
            go.Scatter(
                name=f'{i} cols',
                x=deltas,
                y=fq_times,
                mode='lines'
            )
        )

    fig.add_traces(data=lines)
    fig.update_layout(showlegend=True, yaxis_title='Robustness', xaxis_title='Delta')
    return fig

def line(exp, algs, attr, limit=2000):
    dfs, colors, dashes, names = read_multiple(algs, exp)
    fig = create_figure()
    
    lines = []
    
    biggest = 0
    
    for i, df in enumerate(dfs):
        per_query_times = np.array(df[attr][:limit])
        if biggest < np.max(per_query_times):
            biggest = np.max(per_query_times)
        lines.append(
            go.Scatter(
                name=names[i],
                x=list(range(len(per_query_times))),
                y=per_query_times,
                marker_color=colors[i],
                mode='lines',
                line=dict(width=4, dash=dashes[i])
            )
        )
    
    fig.add_traces(data=lines)
    fig.update_layout(showlegend=True, yaxis_title=attr)
    return fig

def workload_selectivity(exp):
    fig = create_figure()
    df = read('FullScan', exp)
    sel = ((df['tuples_scanned']/df['scan_overhead'])/df['tuples_scanned']) * 100
    
    fig.add_traces(
        data=go.Scatter(
            name='selectivity',
            x=list(range(len(sel))),
            y=sel,
            mode='lines',
            line=dict(width=4)
        )
    )
    fig.update_layout(showlegend=True, yaxis_title='Selectivity (%)')
    return fig


def per_query(exp, algs, limit=1000):
    dfs, colors, dashes, names = read_multiple(algs, exp)
    fig = create_figure()
    
    lines = []
    
    biggest = 0
    
    for i, df in enumerate(dfs):
        alg = algs[i]
        per_query_times = np.array(df['query_time'][:limit]) * 1000
        if biggest < np.max(per_query_times):
            biggest = np.max(per_query_times)
        lines.append(
            go.Scatter(
                name=names[i],
                x=list(range(len(per_query_times))),
                y=per_query_times,
                marker_color=colors[i],
                mode='lines',
                line=dict(width=4, dash=dashes[i])
            )
        )

    fig.add_traces(data=lines)
    fig.update_layout(showlegend=True, yaxis_title='Time (milliseconds)')
    fig.update_layout(legend_orientation="h", legend=dict(x=.25, y=1.2))
    fig.update_yaxes(type="log")
    return fig

def cummulative(exp, algs, limit=5000):
    dfs, colors, dashes, names = read_multiple(algs, exp)
    fig = create_figure()
    
    lines = []
    
    biggest = 0
    
    for i, df in enumerate(dfs):
        alg = algs[i]
        per_query_times = np.array(df['query_time_cumsum'][:limit])
        lines.append(
            go.Scatter(
                name=names[i],
                x=list(range(len(per_query_times))),
                y=per_query_times,
                marker_color=colors[i],
                mode='lines',
                line=dict(width=4, dash=dashes[i])
            )
        )
    
    fig.add_traces(data=lines)
    fig.update_layout(legend_orientation="h", legend=dict(x=.25, y=1.2))
    fig.update_layout(showlegend=True, yaxis_title='Time (Seconds)')
    return fig

def number_of_nodes(exp, algs, limit=5000):
    dfs, colors, dashes, names = read_multiple(algs, exp)
    fig = create_figure()
    
    lines = []
    
    biggest = 0
    
    for i, df in enumerate(dfs):
        per_query_times = np.array(df['number_of_nodes'][:limit])
        if biggest < np.max(per_query_times):
            biggest = np.max(per_query_times)
        lines.append(
            go.Scatter(
                name=names[i],
                x=list(range(len(per_query_times))),
                y=per_query_times,
                marker_color=colors[i],
                mode='lines',
                line=dict(width=4, dash=dashes[i])
            )
        )
    
    fig.add_traces(data=lines)
    fig.update_layout(legend_orientation="h", legend=dict(x=.25, y=1.2))
    fig.update_layout(showlegend=True, yaxis_title='# Nodes')
    return fig

def tuples_scanned(exp, algs, limit=5000):
    dfs, colors, dashes, names = read_multiple(algs, exp)
    fig = create_figure()
    
    lines = []
    
    
    for i, df in enumerate(dfs):
        per_query_times = np.array(df['tuples_scanned'][:limit])
        lines.append(
            go.Scatter(
                name=names[i],
                x=list(range(len(per_query_times))),
                y=per_query_times,
                marker_color=colors[i],
                mode='lines',
                line=dict(width=4, dash=dashes[i])
            )
        )
    
    fig.add_traces(data=lines)
    fig.update_layout(legend_orientation="h", legend=dict(x=.25, y=1.2))
    fig.update_layout(showlegend=True, yaxis_title='Tuples Scanned')
    return fig

def break_down(exp, algs, limit):
    dfs, _, _, names = read_multiple(algs, exp)
    fig = go.Figure()
    initializations = np.array([x['initialization_time'][:limit].sum() for x in dfs])
    adaptation = np.array([x['adaptation_time'][:limit].sum() for x in dfs])
    search = np.array([x['index_search_time'][:limit].sum() for x in dfs])
    scan = np.array([x['scan_time'][:limit].sum() for x in dfs])
    
    fig = go.Figure(data=[
        go.Bar(name='Initialization', x=names, y=initializations),
        go.Bar(name='Adaptation', x=names, y=adaptation),
        go.Bar(name='Index Search', x=names, y=search),
        go.Bar(name='Scan', x=names, y=scan),
    ])
    
    # Change the bar mode
    fig.update_layout(barmode='stack')
    fig.update_layout(legend_orientation="h", legend=dict(x=.25, y=1.2))
    fig.update_layout(yaxis_title='Time (seconds)')
    return fig

def plot(exp):
#     fq = first_query(exp, [
#         'AverageKDTree',
#         'MedianKDTree',
#         'CrackingKDTree',
#         'Quasii',
#         'ProgressiveIndex',
#         'ProgressiveIndexCostModel',
#         'FullScan',
#     ])
#     po = payoff(exp, [
#         'AverageKDTree',
#         'MedianKDTree',
#         'CrackingKDTree',
#         'Quasii',
#         'ProgressiveIndex',
#         'ProgressiveIndexCostModel'
#     ])

#     conv = convergence(exp, [
#         'CrackingKDTree',
#         'Quasii',
#         'ProgressiveIndex',
#         'ProgressiveIndexCostModel'
#     ])

#     ro = robustness(exp, [
#         'AverageKDTree',
#         'MedianKDTree',
#         'CrackingKDTree',
#         'Quasii',
#         'ProgressiveIndex',
#         'ProgressiveIndexCostModel'
#     ])

#     resp = power_response_time(exp, [
#         'AverageKDTree',
#         'MedianKDTree',
#         'CrackingKDTree',
#         'Quasii',
#         'ProgressiveIndex',
#         'ProgressiveIndexCostModel'
#     ])

#     save_figure(
#         number_of_nodes(exp, [
#             'CrackingKDTree',
#             'Quasii',
#         ], 5000),
#         f'{exp}-number-of-nodes-adapt.pdf'
#     )
    
#     save_figure(
#         cummulative(exp, [
#             'CrackingKDTree',
#             'Quasii',
#         ], 5000),
#         f'{exp}-cummulative-adapt.pdf'
#     )
    
#     save_figure(
#         tuples_scanned(exp, [
#             'CrackingKDTree',
#             'Quasii',
#         ], 5000),
#         f'{exp}-tuples-scanned-adapt.pdf'
#     )

    pq = per_query(exp, [
        'AverageKDTree',
        'MedianKDTree',
        'FullScan',
    ])
    
    save_figure(pq, f'{exp}-per_query-fi.pdf')
    
    pq =per_query(exp, [
        'CrackingKDTree',
        'Quasii',
        'FullScan',
    ])
    
    save_figure(pq, f'{exp}-per_query-adapt.pdf')
    
    pq = per_query(exp, [
        'ProgressiveIndex',
        'ProgressiveIndexCostModel',
        'FullScan',
    ])
    
    save_figure(pq, f'{exp}-per_query-prog.pdf')

#     cumm = cummulative(exp, [
#         'AverageKDTree',
#         'MedianKDTree',
#         'CrackingKDTree',
#         'Quasii',
#         'ProgressiveIndex',
#         'ProgressiveIndexCostModel',
#         'FullScan'
#     ], 100)

#     bd = break_down(exp, [
#         'AverageKDTree',
#         'MedianKDTree',
#         'CrackingKDTree',
#         'Quasii',
#         'ProgressiveIndex',
#         'ProgressiveIndexCostModel'
#     ])

#     save_figure(fq, f'{exp}-first_query.pdf')
#     save_figure(conv, f'{exp}-convergence.pdf')
#     save_figure(po, f'{exp}-payoff.pdf')
#     save_figure(ro, f'{exp}-robustness.pdf')
#     save_figure(resp, f'{exp}-total_response_time.pdf')
#     save_figure(cumm, f'{exp}-cummulative.pdf')
#     save_figure(bd, f'{exp}-break_down.pdf')

In [848]:
# exps = ['Uniform', 'Skewed', 'ZoomIn', 'Sequential', 'Periodic', 'SequentialZoomIn', 'Shifting', 'Power', 'Genomics Mixed', 'Skyserver']

# for exp in exps:
#     plot(exp)

In [849]:
# Latex Tables

def metrics(exps, algs):
    '''
    ||||||||||||Algorithm 1| Algorithm 2|...
    First Query|   1.11.   |     15.    |...
    ...
    '''
    data = {}

    # create indexes
    index_exp = []
    index_metric = []
    
    
    metrics = ['First Query', 'PayOff', 'Convergence', 'Robustness', 'Time']

    
    for exp in exps:
        dfs, _, _, names = read_multiple(algs, exp)
        
        # initialize the data dict with empty arrays for each algorithm
        for name in names:
            if name not in data:
                data[name] = []

        baseline = read('FullScan', exp)

        index_exp += [experiments[exp]['name']] * len(metrics)
        index_metric += metrics
        
        for df, name, alg in zip(dfs, names, algs):
            data[name].append('%.2f' % get_first_query(df))
            po = get_payoff(df, baseline)
            if po == len(df):
                data[name].append('-')
            else:
                data[name].append(po)
            conv = get_convergence(df, algorithms[alg]['type'])
            if conv == 0:
                data[name].append('-')
            elif conv >= len(df)-1:
                data[name].append('*')
            else:
                data[name].append(conv)
            
            robust = get_robustness(df, algorithms[alg]['type'])
            if robust == 0:
                data[name].append('-') 
            else:
                data[name].append('%.E' % robust)
            
            
            data[name].append('%.2f' %get_total_time(df))

    index = [index_exp, index_metric]
    df = pd.DataFrame(data, index=index)

    return df
    
    latex = df.to_latex(multicolumn=True, multirow=True)

    for exp in exps:
        latex = latex.replace(exp, "\\rotatebox[origin=c]{90}{%s}" % exp)
    return latex

# Analysis of results

## Workloads
In our experiments we used the following workloads:

* Synthetic: synthetically generated data and queries. The data is always uniformly distributed, and the workloads follow the patterns described in Figure \ref{}.
* Power: Real world sensor data from a manufacturing installation \cite{}. The workload is synthetically generated by selecting two random points for each attribute.
* Genomics: Human genetic informatiom from the 1000 Genomes Project \cite{}. The data consists of 10 million genomes, and the workload is randomly selected from 8 query templates designed by a group of Bioinformaticians.
* Skyserver: The data represents a map of a large portion of the universe. The workload consists of two-dimensional range queries used on their platform.

## Synthetic workloads description
Here we briefly describe each synthetic workload pattern we created. The patterns were derived from \cite{} and expanded to a multidimensional domain. Figure \ref{} presents a two-dimensional view of each workload, with a color scale representing the order of the queries (blue being the beginning of the workload, and red the end). Unless stated, all queries have the same selectivity. The proposed patterns are:

* Uniform: Uniform distributed multidimensional range queries.
* Skewed: Skewed multidimensional range queries generated using a Normal distribution. The majority of queries will be in the center, while some will tend towards the edges.
* Zoom In: A zoom pattern, where each query is enterelly contained inside the previous one. On this workload the query selectivity decreases with time.
* Sequential: In this patter each query has no intersection with any other query on any of their attributes. Which means, the queries will follow one of the diagonals.
* Periodic: This pattern follows the same concept of Sequential, with all but one attribute having no intersection with the other queries. Another point is that the sequence is repeated multiple times, but on different parts of the data.
* Sequential Zoom In: Follows the same pattern as Sequential, with the addition that in every step there is also a zoom in operation. The query selecitivy is the same for every major step, but decreases during zoom in.
* Alternating Zoom In: Follows the same pattern as Zoom In. However, the zoom operation happens on two separated locations. With queries alternating between each zoom. In this pattern the query selectivity also decreases during the zoom.
* Shifting: Altough not in the picture, this pattern simulates a data scientist quickly exploring multiple tables. For example, the data scientist executes ten queries on one table, is not satisfied with the results and move on to the second table. The same process happens on the second table, and so on.

This will be a table.
* Synthetic: 30 M rows, 8 columns, 1000 queries
    * Sequential: Has only 2 columns, as if we have a uniform data distribution, and 1% query selectivity, then using 8 columns the per attribute selectivity is around 56% (0.01 ^ (1/8)). While with two attributes the per attribute selectivity is around 10% (0.01 ^ (1/2)).
* Power: 10 M rows, 2 columns, 3000 queries
* Genomics Mixed: 10 M rows, 19 columns, 100 queries (same queries as in the paper Multidimensional Range Queries on Modern Hardware).
* Skyserver: 69 M rows, 2 columns, 100 K queries

All algorithms had 1024 maximum partition size
Progressive Index delta was 0.2




## Algorithms
To test our algorithms we executed and compared the following algorithms on the benchmarks we described before. Inside the parenthesis the abbreviations we used in our figures and tables.

* Average KDTree (AvgKD): Full Index KDTree where each level contains only one dimension, and the pivots are collected using average.
* Median KDTree (MedKD): Full Index KDTree where each level contains only one dimension, and the pivots are collected using median.
* Adaptive KDTree (AKD): Our first proposed algorithm, which aims towards low total response time.
* Progressive KDTree (PKD): Our second proposed algorithm, which aims towards smaller payoff.
* Adaptive Progressive KDTree (APKD): Our third proposed algorithm, which aims towards high robustness.
* Quasii (Q): The state of the art on multidimensional adaptive indexes.


# Impacts of the delta
In this section we study the impacts of the delta on progressive indexes. To analyze this we experimented with the Uniform benchmark, with 30 million rows, 1% query selectivity, 1000 queries, and 2, 4, and 8 columns. On the algorithms side, we executed the Progressive KDTree with differente deltas, i.e., 0.1, 0.2, 0.4, 0.8, and 1.0.

Figure \ref{fig:delta_exp_fist_query} shows the difference on first query response time as we increase the delta. With delta equal 0.1, around 1% of the table is reorganized, which is why the Progressive KDTree takes 100 queries to converge. While, if the delta is equal to 1, then the Progressive KDTree behaves exactly as a Full Index, as it will fully reorganize the data.

Payoff Figure \ref{fig:delta_exp_payoff}.

The number of queries to converge is independent of the number of attributes in the table, as shown in Figure \ref{fig:delta_exp_convergence}. This is because the delta termines the percentage of the table being reorganized each query.

Finally, the delta does not greatly influence the total response time, as shown in Figure \ref{fig:delta_exp_total_response_time}. As the main point of choosing the delta is the balance between a costly first query and low convergence time.

In [850]:
a = delta_exp_first_query()
save_figure(a, 'delta_exp_first_query.pdf')
a

In [851]:
a = delta_exp_pay_off()
save_figure(a, 'delta_exp_pay_off.pdf')
a

In [852]:
a = delta_exp_convergence()
save_figure(a, 'delta_exp_convergence.pdf')
a

In [853]:
a = delta_exp_total_time()
save_figure(a, 'delta_exp_total_time.pdf')
a

In [854]:
exps = ['Uniform8', 'Skewed8', 'ZoomIn8', 'Periodic8', 'SequentialZoomIn8', 'AlternatingZoomIn8', 'Shifting8', 'Sequential2', 'Power', 'Genomics Mixed', 'Skyserver']
# exps = ['Uniform2', 'Uniform4', 'Uniform8']
m = metrics(exps, [
        'AverageKDTree',
        'MedianKDTree',
        'CrackingKDTree',
        'Quasii',
        'ProgressiveIndex',
        'ProgressiveIndexCostModel',
        'FullScan',
])
# save_table(m, 'power')

# Comparison
In this section we compare all mentioned indexes on the following benchmarks: Uniform, Skewed, Zoom In, Periodic, Sequential Zoom In, Alternating Zoom In, and Shifting all with eight attributes; Sequential with two attributes; And the real world benchmarks Power, Genomics, and Skyserver.

We choose to use eight attributes (besides on Sequential benchmark), 30 millions tuples, and 1% query selectivity because scanning the data took more than 500 ms (interactive threshold). Sequential only has two attributes because of the per attribute selectivity, with eight attributes the per attribute selectivity is around 56% (0.56 ^ 8 around 0.01), which in turn makes it impossible to have two queries with no intersection.

For the Genomics Benchmark we used only the workload pattern which uses all templates, as they end up using every attribute in the dataset. The original benchmark had workloads which would only touch certain attributes, however these workloads would heavily penalize Quasii, as it is never described how to handle queries on only a subset of attributes of a table.

# First Query Analysis
We start by analyzing the response time for the first query. This is the time to copy the table to a secondary table structure, build the index or adapt with the first query, and then answer the query.

In [856]:
idx = pd.IndexSlice
a = m.loc[idx[:, 'First Query'], :]
a = a.reset_index(level=1, drop=True)
print(a.to_latex())
a

\begin{tabular}{llllllll}
\toprule
{} & AvgKD &  MedKD &   AKD &     Q &   PKD &  APKD &    FS \\
\midrule
Unif(8)    &  7.07 &  11.16 &  1.73 &  2.90 &  0.82 &  0.81 &  0.56 \\
Skewed(8)  &  7.07 &  11.17 &  2.11 &  3.75 &  0.74 &  0.74 &  0.50 \\
Zoom(8)    &  7.25 &  11.35 &  1.96 &  3.50 &  0.77 &  0.77 &  0.52 \\
Prdc(8)    &  7.05 &  11.17 &  4.19 &  5.00 &  0.59 &  0.57 &  0.38 \\
SeqZoom(8) &  7.06 &  11.16 &  1.76 &  3.01 &  0.83 &  0.81 &  0.56 \\
AltZoom(8) &  7.04 &  11.13 &  4.19 &  4.99 &  0.59 &  0.58 &  0.38 \\
Shift(8)   &  7.07 &  11.16 &  1.73 &  2.90 &  0.82 &  0.81 &  0.56 \\
Seq (2)    &  4.72 &   8.83 &  0.48 &  2.47 &  0.18 &  0.18 &  0.14 \\
Power      &  0.83 &   1.53 &  0.16 &  0.19 &  0.09 &  0.09 &  0.05 \\
Genomics   &  2.61 &   2.57 &  1.45 &  2.71 &  0.27 &  0.27 &  0.04 \\
Skyserver  &  6.97 &  14.47 &  0.62 &  1.17 &  0.35 &  0.35 &  0.23 \\
\bottomrule
\end{tabular}



Unnamed: 0,AvgKD,MedKD,AKD,Q,PKD,APKD,FS
Unif(8),7.07,11.16,1.73,2.9,0.82,0.81,0.56
Skewed(8),7.07,11.17,2.11,3.75,0.74,0.74,0.5
Zoom(8),7.25,11.35,1.96,3.5,0.77,0.77,0.52
Prdc(8),7.05,11.17,4.19,5.0,0.59,0.57,0.38
SeqZoom(8),7.06,11.16,1.76,3.01,0.83,0.81,0.56
AltZoom(8),7.04,11.13,4.19,4.99,0.59,0.58,0.38
Shift(8),7.07,11.16,1.73,2.9,0.82,0.81,0.56
Seq (2),4.72,8.83,0.48,2.47,0.18,0.18,0.14
Power,0.83,1.53,0.16,0.19,0.09,0.09,0.05
Genomics,2.61,2.57,1.45,2.71,0.27,0.27,0.04


From Table \ref{fig:exp-first-query}. Full indexes have slower first query response time, as they have to index the entire dataset. With the Median KDTree being the slowest between the two, because finding medians is more costly than finding averages. Adaptive indexes are faster than Full Indexes, because they only index the relevant parts of the data to answer the query. Quasii takes longer than the Adaptive KDTree because it creates much more nodes on the first query. For example, on the uniform workload Quasii creates 6298 nodes, while the Adaptive KDTree creates only 155. Finally, the Progressive indexes have similar time to a full scan. As they are limited by amount of data reorganized or time spent.

For the Genomics Mixed benchmark, the number of attributes is too high (19 attributes) for the number of tuples (10 million). If we analyze the number of nodes created on the first query, the Average KDTree and the Median KDTree create 27 and 19 nodes, respectively. While Quasii creates 1390. Which explains why Quasii has a higher first query cost. The other point is that the Average KDTree has a higher first query cost compared to the Median KDTree. This can be explained by the number of nodes created, the Median KDTree creates exactly 19 nodes, one for each attribute. While the Average KDTree creates 27 nodes, which means some of the nodes created bring no benefit to the index.

Figure \ref{fig:first-query-break-down} presents a time breakdown for the first query of Genomics Mixed. All indexes spend around 0.5 seconds copying the table to the secondary structure, then Quasii spends around 2.2 seconds adapting for the first query. While both full indexes take only 2 seconds to build.

In [857]:
a = break_down('Genomics Mixed',[
    'AverageKDTree',
    'MedianKDTree',
    'CrackingKDTree',
    'Quasii',
], 1)
save_figure(a, 'first_query_genomics_break_down.pdf')
a

# Payoff Analysis
Now we analyze the payoff for each algorithm in different benchmarks. We define payoff as when the cummulative query time of the algorithm is smaller than of the full scan. Whenever there is a '-' in the table, it means the algorithm never paid off.

In [859]:
idx = pd.IndexSlice
a = m[['AvgKD', 'MedKD', 'AKD', 'Q', 'PKD', 'APKD']].loc[idx[:, 'PayOff'], :]
a = a.reset_index(level=1, drop=True)
print(a.to_latex())
a

\begin{tabular}{lllllll}
\toprule
{} & AvgKD & MedKD & AKD &   Q & PKD & APKD \\
\midrule
Unif(8)    &    16 &    26 &   8 &  14 &  41 &   29 \\
Skewed(8)  &    18 &    29 &   6 &  12 &  57 &   51 \\
Zoom(8)    &    31 &    46 &   3 &   7 &  68 &   61 \\
Prdc(8)    &    21 &    34 &  11 &  13 &  41 &   39 \\
SeqZoom(8) &    16 &    26 &   3 &   5 &  53 &   36 \\
AltZoom(8) &    15 &    26 &   9 &  11 &  23 &   32 \\
Shift(8)   &     - &     - &   8 &   - &   - &    - \\
Seq (2)    &     - &     - &   5 &   - &   6 &    3 \\
Power      &    17 &    34 &   8 &  13 &  12 &   28 \\
Genomics   &    28 &    28 &  47 &  71 &  81 &   99 \\
Skyserver  &    31 &    65 &   3 &   5 &  11 &   42 \\
\bottomrule
\end{tabular}



Unnamed: 0,AvgKD,MedKD,AKD,Q,PKD,APKD
Unif(8),16,26,8,14,41,29
Skewed(8),18,29,6,12,57,51
Zoom(8),31,46,3,7,68,61
Prdc(8),21,34,11,13,41,39
SeqZoom(8),16,26,3,5,53,36
AltZoom(8),15,26,9,11,23,32
Shift(8),-,-,8,-,-,-
Seq (2),-,-,5,-,6,3
Power,17,34,8,13,12,28
Genomics,28,28,47,71,81,99


On Table \ref{fig:payoff} we can oberserve that the Average KDTree has a smaller payoff compared to the Median KDTree, because of its lower index creation cost. Adaptive indexes have a smaller payoff than Full Indexes, because they have lower cost on the first query and are adapted to the workload. The Adaptive KDTree has a smaller payoff than Quasii because Quasii has a higher adaptation time. As we can see in Figure \ref{fig:payoff-number-of-nodes}, when comparing the number of nodes created on the Uniform(8) benchmark, Quasii creates much more nodes until it stabilizes around query 25, which results in a higher payoff. Finally, progressive indexes have restrictions on the amount of data reorganized or time spent, which will improve convergence and robustness, respectively. However, they end up having higher payoffs.

On the sequential workload, the Average KDtree, Median KDTree, and Quasii end up never paying off because the number of queries is too small (Sequential has around 10 queries). Figure \ref{fig:payoff-per-query} provides a per query response time for the entire workload. The Median KDTree is at least two orders of magnitude faster than a full scan. Hence, with more queries it would eventually payoff.

For the shifting workload, no algorithm besides the Adaptive KDTree payoff because the number of queries executed before moving on to the next table is too small.

On the Genomics Mixed benchmark, Figure \ref{fig:payoff-genomics} presents the cummulative response time for the first 30 queries. We can observe that adaptive and progressive indexes take longer to payoff compared to full indexes because full indexes have, in this case, low first query cost (as we discussed previously), and low per query response time. While adaptive and progressive indexes, have low first query cost but take longer to achieve a low per query response time.

In [860]:
a = number_of_nodes('Uniform8', ['Quasii', 'CrackingKDTree', ], 50)
save_figure(a, 'payoff-number-of-nodes.pdf')
a

In [861]:
a = per_query('Sequential2', ['AverageKDTree', 'MedianKDTree', 'Quasii', 'FullScan'])
save_figure(a, 'payoff-per-query.pdf')
a

In [862]:
a = cummulative('Genomics Mixed', [
    'AverageKDTree',
    'MedianKDTree',
    'CrackingKDTree',
    'Quasii',
    'ProgressiveIndex',
    'ProgressiveIndexCostModel',
    'FullScan'
], 30)
save_figure(a, 'payoff-cummulative.pdf')
a

# Robustness
In this section we analyze the robustness of the adaptive and progressive indexes. We define robustness as the variance of the first 50 queries or until convergence, the smallest value between the two. We only measure this for adaptive and progressive indexes.

In [864]:
idx = pd.IndexSlice
a = m[['AKD', 'Q', 'PKD', 'APKD']].loc[idx[:, 'Robustness'], :]
a = a.reset_index(level=1, drop=True)
print(a.to_latex())
a

\begin{tabular}{lllll}
\toprule
{} &    AKD &      Q &    PKD &   APKD \\
\midrule
Unif(8)    &  9E-02 &  2E-01 &  3E-02 &  4E-04 \\
Skewed(8)  &  9E-02 &  3E-01 &  3E-02 &  2E-04 \\
Zoom(8)    &  8E-02 &  2E-01 &  3E-02 &  4E-04 \\
Prdc(8)    &  3E-01 &  5E-01 &  1E-02 &  2E-04 \\
SeqZoom(8) &  6E-02 &  2E-01 &  3E-02 &  4E-04 \\
AltZoom(8) &  3E-01 &  5E-01 &  3E-02 &  2E-04 \\
Shift(8)   &  3E-01 &  7E-01 &  7E-03 &  6E-04 \\
Seq (2)    &  2E-02 &  6E-01 &  4E-04 &  1E-04 \\
Power      &  6E-04 &  1E-03 &  3E-04 &  3E-06 \\
Genomics   &  6E-02 &  2E-01 &  1E-02 &  5E-04 \\
Skyserver  &  8E-03 &  4E-02 &  4E-03 &  2E-04 \\
\bottomrule
\end{tabular}



Unnamed: 0,AKD,Q,PKD,APKD
Unif(8),0.09,0.2,0.03,0.0004
Skewed(8),0.09,0.3,0.03,0.0002
Zoom(8),0.08,0.2,0.03,0.0004
Prdc(8),0.3,0.5,0.01,0.0002
SeqZoom(8),0.06,0.2,0.03,0.0004
AltZoom(8),0.3,0.5,0.03,0.0002
Shift(8),0.3,0.7,0.007,0.0006
Seq (2),0.02,0.6,0.0004,0.0001
Power,0.0006,0.001,0.0003,3e-06
Genomics,0.06,0.2,0.01,0.0005


On Table \ref{table:robustness} we can visualize that both progressive indexes are more robust (i.e. lower variance) than the adaptive indexes. Figure \ref{fig:robustness-per-query} presents a per query response time for each index, on the Uniform benchmark. The Adaptive Progressive KDTree does not have a huge variance until query 18 (when it converges), and is the more robust index, as expected. Followed by the Progressive KDTree, which is more robust than the the adaptive indexes, because it limits the number tuples reorganized but has no time limit for adaptation. Then, the adaptive indexes, which have no limitations on amount of data reorganized or time spent, hence their robustness is dependent on the workload.

In [865]:
a = per_query('Uniform8', ['CrackingKDTree', 'Quasii', 'ProgressiveIndex','ProgressiveIndexCostModel'], 50)
save_figure(a, 'robustness-per-query.pdf')
a

# Response time
In this section we analyze the total response time for each benchmark. Table \ref{table:total-response-time} presents the results.

In [866]:
idx = pd.IndexSlice
a = m.loc[idx[:, 'Time'], :]
a = a.reset_index(level=1, drop=True)
print(a.to_latex())
a

\begin{tabular}{llllllll}
\toprule
{} &   AvgKD &    MedKD &     AKD &       Q &     PKD &    APKD &        FS \\
\midrule
Unif(8)    &   94.37 &    98.67 &   63.27 &   83.37 &  108.18 &  104.15 &    532.50 \\
Skewed(8)  &  122.07 &   126.25 &   33.27 &   91.78 &  140.67 &  136.25 &    521.97 \\
Zoom(8)    &   30.87 &    34.52 &    4.99 &    7.07 &   41.20 &   39.78 &    374.38 \\
Prdc(8)    &   63.23 &    67.99 &  124.78 &   52.83 &   72.75 &   71.19 &    501.74 \\
SeqZoom(8) &   17.80 &    21.69 &    2.91 &    5.03 &   32.46 &   25.94 &    279.89 \\
AltZoom(8) &   26.72 &    31.13 &   14.15 &   13.90 &   36.17 &   36.24 &    415.39 \\
Shift(8)   &  781.35 &  1192.64 &  468.86 &  682.76 &  683.03 &  752.57 &    545.31 \\
Seq (2)    &    4.73 &     8.84 &    1.39 &    3.37 &    1.80 &    1.61 &      2.04 \\
Power      &   20.62 &    23.20 &   19.13 &   21.03 &   22.23 &   21.96 &    159.50 \\
Genomics   &    9.21 &     9.46 &    8.34 &   12.43 &   14.87 &   15.56 &     16.44 \\
Skyserv

Unnamed: 0,AvgKD,MedKD,AKD,Q,PKD,APKD,FS
Unif(8),94.37,98.67,63.27,83.37,108.18,104.15,532.5
Skewed(8),122.07,126.25,33.27,91.78,140.67,136.25,521.97
Zoom(8),30.87,34.52,4.99,7.07,41.2,39.78,374.38
Prdc(8),63.23,67.99,124.78,52.83,72.75,71.19,501.74
SeqZoom(8),17.8,21.69,2.91,5.03,32.46,25.94,279.89
AltZoom(8),26.72,31.13,14.15,13.9,36.17,36.24,415.39
Shift(8),781.35,1192.64,468.86,682.76,683.03,752.57,545.31
Seq (2),4.73,8.84,1.39,3.37,1.8,1.61,2.04
Power,20.62,23.2,19.13,21.03,22.23,21.96,159.5
Genomics,9.21,9.46,8.34,12.43,14.87,15.56,16.44


Adaptive indexes usually have the lowest total response time, as they adapt as little as possible to efficiently answer a multidimensional range query. We can see on Table \ref{table:response-time} that on the majority of scenarios the Adaptive KDTree has the lowest total response time. Progressive indexes are usually on par with full indexes, specifically because they prioritize robustness and convergence over total response time.

Figure \ref{fig:total-response-time-break-down} presents a time breakdown of the Periodic benchmark for the Adaptive KDTree and Quasii. We can see that the Adaptive KDTree has much more index search time than Quasii, this happens because the Adaptive KDTree does not adapt well with a periodic workload. Figure \ref{fig:total-time-number-of-nodes} shows the number of nodes in the index per query on the Periodic benchmark. We can see that at query 250 and 500 the Adaptive KDTree has a sudden increase in the number of nodes. This is no coincidence, as this queries are the ones where the peridic sequence resets. On query 750, which is also a reset, the index has already converged.

In [867]:
a = break_down('Periodic8', ['CrackingKDTree', 'Quasii'], -1)
save_figure(a, 'total_response_time_break_down.pdf')
a

In [868]:
a = number_of_nodes('Periodic8', ['CrackingKDTree', 'Quasii'])
save_figure(a, 'total_response_time_number_of_nodes.pdf')
a

On the shifting benchmark, the only index that has total response time smaller than scanning only is the Adaptive KDtree, as it is the only index capable of quickly paying off for such a small window of queries. Whereas, Quasii's adaptation process is heavier, hence it takes more queries to payoff; Full Indexes also need more queries because of the cost to build them; and Progressive Indexes have higher payoffs as their focus is on robustness and convergence.

# Comparison with different number of columns
Here I expect that only the total response time changes the pattern, the rest will probably remain equal.
Hence, we could use a plot with lines to show the pattern.

In [664]:
exps = []

for i in [2, 4, 8]:
    exps.append(f'Uniform{i}')

metrics(exps, [
        'AverageKDTree',
        'MedianKDTree',
        'CrackingKDTree',
        'Quasii',
        'ProgressiveIndex',
        'ProgressiveIndexCostModel',
        'FullScan',
])

Unnamed: 0,Unnamed: 1,AvgKD,MedKD,AKD,Q,PKD,APKD,FS
Uniform(2),First Query,4.72,8.85,0.47,1.61,0.31,0.3,0.26
Uniform(2),PayOff,22,42,4,15,8.0,22.0,-
Uniform(2),Convergence,-,-,*,*,51.0,16.0,-
Uniform(2),Robustness,-,-,6E-03,6E-02,0.003,0.0001,-
Uniform(2),Time,6.27,10.42,5.02,5.71,7.3,6.36,226.73
Uniform(4),First Query,5.38,9.54,0.83,1.58,0.48,0.49,0.35
Uniform(4),PayOff,16,29,7,10,13.0,20.0,-
Uniform(4),Convergence,-,-,*,*,51.0,15.0,-
Uniform(4),Robustness,-,-,2E-02,6E-02,0.01,0.0001,-
Uniform(4),Time,13.17,17.36,12.87,12.11,16.65,15.07,357.72


# Discussion/Conclussions
Here we present a discussion of the results and our conclusions.

# Conclusions
* Progressive Indexes are not always the fastest however they are also not the slowest, in respect to total respose time. Their major benefit is being much more stable compared to adaptive indexes.
* Adaptive indexes are a double edged knife, they can be exactly what you need (Quasii on ZoomIn) orbe terrible (Quasii on a workload with incomplete queries).
* Both adaptive and progressive indexes are competitive with a full indexes, which means they end up having the same total response time. However they bring a couple of benefits:
    * No need to pre-select which attributes to index.
    * Faster initial queries.
    * Hence, frees the scientist exploring the data of the index management burden.

In [703]:
def first_query(exp, algs):
    dfs, colors, names = read_multiple(algs, exp)
    fig = create_figure()
    first_query_time = np.array([get_first_query(x) for x in dfs])
    
    bars = []
    
    for i, _ in enumerate(dfs):
        bars.append(
            go.Bar(
                name=names[i],
                x=[''],
                y=[first_query_time[i]],
                text=['{:.2f}'.format(first_query_time[i])],
                textposition='auto',
                marker_color=colors[i]
            )
        )
    
    fig.add_traces(data=bars)
    fig.update_layout(yaxis_title='Time (seconds)')
    return fig

def payoff(exp, algs):
    baseline = read('FullScan', exp)
    dfs, colors, names = read_multiple(algs, exp)
    fig = create_figure()

    payoffs = []
    for df in dfs:
        payoffs.append(get_payoff(df, baseline))
    
    bars = []

    for i, _ in enumerate(dfs):
        bars.append(
            go.Bar(
                name=names[i],
                x=[''],
                y=[payoffs[i]],
                text=['{:.2f}'.format(payoffs[i])],
                textposition='auto',
                marker_color=colors[i]
            )
        )
    
    fig.add_traces(data=bars)
    fig.update_layout(yaxis_title='Query Number')
    return fig

def convergence(exp, algs):
    dfs, colors, names = read_multiple(algs, exp)
    fig = create_figure()

    convergences = []
    for df in dfs:
        convergences.append(get_convergence(df))
    
    bars = []
    
    for i, _ in enumerate(dfs):
        bars.append(
            go.Bar(
                name=names[i],
                x=[''],
                y=[convergences[i]],
                text=[convergences[i]],
                textposition='auto',
                marker_color=colors[i]
            )
        )
    
    fig.add_traces(data=bars)
    fig.update_layout(yaxis_title='Query number')
    return fig

def robustness(exp, algs):
    dfs, colors, names = read_multiple(algs, exp)
    fig = create_figure()

    variances = [get_robustness(df) for df in dfs]
    bars = []
    
    for i, _ in enumerate(dfs):
        bars.append(
            go.Bar(
                name=names[i],
                x=[''],
                y=[variances[i]],
#                 text=['{:.8f}'.format(variances[i])],
                textposition='auto',
                marker_color=colors[i]
            )
        )
    
    fig.add_traces(data=bars)
    fig.update_layout(yaxis_title='Time (seconds)')
    return fig

def power_response_time(exp, algs):
    dfs, colors, names = read_multiple(algs, exp)
    fig = create_figure()
    resp_time = np.array([get_total_time(x) for x in dfs])
    
    bars = []
    
    for i, _ in enumerate(dfs):
        bars.append(
            go.Bar(
                name=names[i],
                x=[''],
                y=[resp_time[i]],
                text=['{:.2f}'.format(resp_time[i])],
                textposition='auto',
                marker_color=colors[i]
            )
        )
    
    fig.add_traces(data=bars)
    fig.update_layout(yaxis_title='Time (seconds)')
    return fig