In [73]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import numpy as np
pd.set_option('display.max_rows', 1000)
%matplotlib inline

In [100]:
files = {
    'AverageKDTree': 'average_kd_tree',
    'MedianKDTree': 'median_kd_tree',
    'CrackingKDTree': 'cracking_kd_tree',
    'CrackingKDTreePerDimension': 'cracking_kd_tree_pd',
    'Quasii': 'quasii',
    'FullScan':'full_scan',
    'FullScanCandidate': 'full_scan_cl',
    'ProgressiveIndex': 'progressive_index',
    'ProgressiveIndexAdaptive': 'progressive_index_adaptive'
}
def read_df(alg_name, delta, partition, exp_name, n_rows, n_queries, sel): 
    df = pd.read_csv(f"results/{alg_name}-{delta}-{partition}-{exp_name}-{n_rows}-{n_queries}-{sel}.csv")
    repetitions = df['repetition'].max() + 1
    step = int(len(df.index)/repetitions)
    df_final = df[:step].copy().reset_index()
    for rep in range(1, repetitions):
        df_final += df[step * (rep) : step * (rep + 1)].copy().reset_index()
    
    df_final = df_final/repetitions
    
    if 'index_search_time' not in df_final:
        df_final['index_search_time'] = 0.0
    df_final['query_time'] = df_final['initialization_time'] + df_final['index_search_time'] + df_final['scan_time'] + df_final['adaptation_time']
    df_final['query_time_cumsum'] = df_final['query_time'].cumsum()
    return df_final

# Cumulative Response Time
Cumulative response time using 2 attributes, 10M rows, and 0.1 selectivity

In [108]:
experiment = 'genomics_query_8'
n_rows = '10000000.0'
n_queries = '3000'
sel='0.0'
fig = go.Figure()

def plot_time(fig, df, name):
    fig.add_trace(go.Scatter(y=df['query_time_cumsum'], mode='lines',name=name))

plot_time(fig, read_df(files['AverageKDTree'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Average KDTree')

plot_time(fig, read_df(files['MedianKDTree'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'MedianKDTree')

plot_time(fig, read_df(files['CrackingKDTree'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Adaptive KDTree')

plot_time(fig, read_df(files['CrackingKDTreePerDimension'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Adaptive KDTree Per Predicate')

plot_time(fig, read_df(files['Quasii'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Quasii')

plot_time(fig, read_df(files['ProgressiveIndex'], '0.1', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndex (Delta=0.1)')
plot_time(fig, read_df(files['ProgressiveIndex'], '0.2', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndex (Delta=0.2)')
plot_time(fig, read_df(files['ProgressiveIndex'], '0.5', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndex (Delta=0.5)')

plot_time(fig, read_df(files['ProgressiveIndexAdaptive'], '0.1', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndexAdaptive (Delta=0.1)')
plot_time(fig, read_df(files['ProgressiveIndexAdaptive'], '0.2', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndexAdaptive (Delta=0.2)')
plot_time(fig, read_df(files['ProgressiveIndexAdaptive'], '0.5', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndexAdaptive (Delta=0.5)')


plot_time(fig, read_df(files['FullScan'], '0.0', '0', experiment, n_rows, n_queries, sel), 'Full Scan')

# plot_time(fig, read_df(experiment, files['FullScan']), 'Full Scan')
# plot_time(fig, read_df(experiment, files['FullScanCandidate']), 'Full Scan Candidate List')
fig.update_layout(title=f'Cumulative response time ({experiment})',
                   xaxis_title='Query',
                   yaxis_title='Time (seconds)')
fig.show()

# First Query Response Time

In [109]:
# experiment = 'power'
# n_rows = '10000000.0'
# n_queries = '3000'
# sel='0.0'

def first_query(values):
    dfs = np.array(values)[:,0]
    names = np.array(values)[:,1]
    fig = go.Figure()
    first_query_time = np.array([x['query_time'][0] for x in dfs])
    fig = go.Figure(data=[
        go.Bar(name='First Query Tim', x=names, y=first_query_time)
    ])
    fig.update_layout(title=f'First Query Response time ({experiment})', yaxis_title='Time (seconds)')
    fig.show()

first_query([
[read_df(files['AverageKDTree'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Average KDTree'],
[read_df(files['MedianKDTree'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'MedianKDTree'],
[read_df(files['CrackingKDTree'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Adaptive KDTree'],
[read_df(files['CrackingKDTreePerDimension'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Adaptive KDTree Per Predicate'],
# [read_df(files['Quasii'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Quasii'],
[read_df(files['ProgressiveIndex'], '0.1', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndex (Delta=0.1)'],
[read_df(files['ProgressiveIndex'], '0.2', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndex (Delta=0.2)'],
[read_df(files['ProgressiveIndex'], '0.5', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndex (Delta=0.5)'],
[read_df(files['ProgressiveIndexAdaptive'], '0.1', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndexAdaptive (Delta=0.1)'],
[read_df(files['ProgressiveIndexAdaptive'], '0.2', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndexAdaptive (Delta=0.2)'],
[read_df(files['ProgressiveIndexAdaptive'], '0.5', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndexAdaptive (Delta=0.5)'],
[read_df(files['FullScan'], '0.0', '0', experiment, n_rows, n_queries, sel), 'Full Scan'],
])

In [110]:
# experiment = 'power'
# n_rows = '10000000.0'
# n_queries = '3000'
# sel='0.0'

def break_down(values):
    dfs = np.array(values)[:,0]
    names = np.array(values)[:,1]
    fig = go.Figure()
    initializations = np.array([x['initialization_time'].sum() for x in dfs])
    adaptation = np.array([x['adaptation_time'].sum() for x in dfs])
    search = np.array([x['index_search_time'].sum() for x in dfs])
    scan = np.array([x['scan_time'].sum() for x in dfs])
    
    fig = go.Figure(data=[
        go.Bar(name='Initialization', x=names, y=initializations),
        go.Bar(name='Adaptation', x=names, y=adaptation),
        go.Bar(name='Index Search', x=names, y=search),
        go.Bar(name='Scan', x=names, y=scan),
    ])
    
    # Change the bar mode
    fig.update_layout(barmode='stack')
    fig.update_layout(title=f'Time Breakdown ({experiment})',
                   yaxis_title='Time (seconds)')
    fig.show()

break_down([
[read_df(files['AverageKDTree'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Average KDTree'],
[read_df(files['MedianKDTree'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'MedianKDTree'],
[read_df(files['CrackingKDTree'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Adaptive KDTree'],
[read_df(files['CrackingKDTreePerDimension'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Adaptive KDTree Per Predicate'],
# [read_df(files['Quasii'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Quasii'],
[read_df(files['ProgressiveIndex'], '0.1', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndex (Delta=0.1)'],
[read_df(files['ProgressiveIndex'], '0.2', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndex (Delta=0.2)'],
[read_df(files['ProgressiveIndex'], '0.5', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndex (Delta=0.5)'],
[read_df(files['ProgressiveIndexAdaptive'], '0.1', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndexAdaptive (Delta=0.1)'],
[read_df(files['ProgressiveIndexAdaptive'], '0.2', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndexAdaptive (Delta=0.2)'],
[read_df(files['ProgressiveIndexAdaptive'], '0.5', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndexAdaptive (Delta=0.5)'],
# [read_df(files['FullScan'], '0.0', '0', experiment, n_rows, n_queries, sel), 'Full Scan'],
])

In [118]:
# experiment = 'power'
# n_rows = '10000000.0'
# n_queries = '3000'
# sel='0.0'

def convergence(values):
    dfs = np.array(values)[:,0]
    names = np.array(values)[:,1]
    fig = go.Figure()

    convergences = []
    for df in dfs:
        c = [i for i, x in enumerate(df['adaptation_time']) if x < 0.001]
        if(len(c) == 0):
            convergences.append(n_queries)
        else:
            convergences.append(c[0])
    
    fig = go.Figure(data=[
        go.Bar(name='Query Number', x=names, y=convergences),
    ])
    fig.update_layout(title=f'Convergence ({experiment})',
                   yaxis_title='Query number')
    fig.show()

convergence([
# [read_df(files['AverageKDTree'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Average KDTree'],
# [read_df(files['MedianKDTree'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'MedianKDTree'],
[read_df(files['CrackingKDTree'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Adaptive KDTree'],
[read_df(files['CrackingKDTreePerDimension'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Adaptive KDTree Per Predicate'],
# [read_df(files['Quasii'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Quasii'],
[read_df(files['ProgressiveIndex'], '0.1', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndex (Delta=0.1)'],
[read_df(files['ProgressiveIndex'], '0.2', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndex (Delta=0.2)'],
[read_df(files['ProgressiveIndex'], '0.5', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndex (Delta=0.5)'],
[read_df(files['ProgressiveIndexAdaptive'], '0.1', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndexAdaptive (Delta=0.1)'],
[read_df(files['ProgressiveIndexAdaptive'], '0.2', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndexAdaptive (Delta=0.2)'],
[read_df(files['ProgressiveIndexAdaptive'], '0.5', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndexAdaptive (Delta=0.5)'],
# [read_df(files['FullScan'], '0.0', '0', experiment, n_rows, n_queries, sel), 'Full Scan'],
])

In [113]:
# experiment = 'power'
# n_rows = '10000000.0'
# n_queries = '3000'
# sel='0.0'

def robustness(values):
    dfs = np.array(values)[:,0]
    names = np.array(values)[:,1]
    fig = go.Figure()

    variances = [np.var(df['query_time'][:30]) for df in dfs]
    fig = go.Figure(data=[
        go.Bar(name='Query Number', x=names, y=variances),
    ])
    fig.update_layout(title=f'Robustness ({experiment})',
                   yaxis_title='Variance of first 30 queries')
    fig.show()

robustness([
# [read_df(files['AverageKDTree'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Average KDTree'],
# [read_df(files['MedianKDTree'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'MedianKDTree'],
[read_df(files['CrackingKDTree'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Adaptive KDTree'],
[read_df(files['CrackingKDTreePerDimension'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Adaptive KDTree Per Predicate'],
# [read_df(files['Quasii'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Quasii'],
[read_df(files['ProgressiveIndex'], '0.1', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndex (Delta=0.1)'],
[read_df(files['ProgressiveIndex'], '0.2', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndex (Delta=0.2)'],
[read_df(files['ProgressiveIndex'], '0.5', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndex (Delta=0.5)'],
[read_df(files['ProgressiveIndexAdaptive'], '0.1', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndexAdaptive (Delta=0.1)'],
[read_df(files['ProgressiveIndexAdaptive'], '0.2', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndexAdaptive (Delta=0.2)'],
[read_df(files['ProgressiveIndexAdaptive'], '0.5', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndexAdaptive (Delta=0.5)'],
# [read_df(files['FullScan'], '0.0', '0', experiment, n_rows, n_queries, sel), 'Full Scan'],
])

In [114]:
# experiment = 'power'
# n_rows = '10000000.0'
# n_queries = '3000'
# sel='0.0'

def payoff(values, baseline):
    dfs = np.array(values)[:,0]
    names = np.array(values)[:,1]
    fig = go.Figure()

    payoffs = []
    for df in dfs:
        c = [i for i, x in enumerate(df['query_time_cumsum'] - baseline['query_time_cumsum']) if x < 0]
        if(len(c) == 0):
            payoffs.append(n_queries)
        else:
            payoffs.append(c[0])
    
    fig = go.Figure(data=[
        go.Bar(name='Query Number', x=names, y=payoffs),
    ])
    fig.update_layout(title=f'Payoff ({experiment})',
                   yaxis_title='Query Number')
    fig.show()

payoff([
    # [read_df(files['AverageKDTree'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Average KDTree'],
    # [read_df(files['MedianKDTree'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'MedianKDTree'],
    [read_df(files['CrackingKDTree'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Adaptive KDTree'],
    [read_df(files['CrackingKDTreePerDimension'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Adaptive KDTree Per Predicate'],
    # [read_df(files['Quasii'], '0.0', '1024', experiment, n_rows, n_queries, sel), 'Quasii'],
    [read_df(files['ProgressiveIndex'], '0.1', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndex (Delta=0.1)'],
    [read_df(files['ProgressiveIndex'], '0.2', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndex (Delta=0.2)'],
    [read_df(files['ProgressiveIndex'], '0.5', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndex (Delta=0.5)'],
    [read_df(files['ProgressiveIndexAdaptive'], '0.1', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndexAdaptive (Delta=0.1)'],
    [read_df(files['ProgressiveIndexAdaptive'], '0.2', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndexAdaptive (Delta=0.2)'],
    [read_df(files['ProgressiveIndexAdaptive'], '0.5', '1024', experiment, n_rows, n_queries, sel), 'ProgressiveIndexAdaptive (Delta=0.5)'],
],
    read_df(files['FullScan'], '0.0', '0', experiment, n_rows, n_queries, sel)
)

In [116]:
# to calculate selectivity in %
# df = read_df('query0', files['CrackingKDTree'], partition='1024')
# df['scan_overhead_after_adapt']/df['tuples_scanned'] * 100

In [117]:
read_df(files['CrackingKDTree'], '0.0', '1024', experiment, n_rows, n_queries, sel)

Unnamed: 0,index,adaptation_time,index_search_time,initialization_time,max_height,memory_footprint,min_height,number_of_nodes,partitions_scanned,partitions_skipped,scan_overhead_after_adapt,scan_overhead_before_adapt,scan_time,tuples_scanned,repetition,query_time,query_time_cumsum
0,640.0,0.073735,0.000023,0.492416,26.0,2304.0,21.0,32.0,14.0,0.0,234.000000,9.999999e+06,0.000015,234.0,0.5,0.566189,0.566189
1,641.0,0.215676,0.000087,0.000000,35.0,29016.0,6.0,403.0,351.0,0.0,33.059265,8.003633e+02,0.006888,412778.0,0.5,0.222652,0.788841
2,642.0,0.873258,0.000116,0.000000,35.0,75816.0,6.0,1053.0,639.0,0.0,128.733749,1.399139e+02,0.145254,8431031.0,0.5,1.018627,1.807468
3,643.0,0.501104,0.000249,0.000000,38.0,127080.0,5.0,1765.0,1453.0,0.0,31.999388,6.253416e+01,0.074560,5068767.0,0.5,0.575912,2.383379
4,644.0,0.377531,0.000164,0.000000,41.0,149904.0,5.0,2082.0,800.0,0.0,63.141308,1.240736e+02,0.038731,2747594.0,0.5,0.416426,2.799805
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1275,1915.0,0.000616,0.000979,0.000000,76.0,3326472.0,5.0,46201.0,2997.0,0.0,inf,inf,0.003159,34712.0,0.5,0.004755,11.796427
1276,1916.0,0.000896,0.002801,0.000000,76.0,3326472.0,5.0,46201.0,9998.0,0.0,inf,inf,0.010601,108959.0,0.5,0.014297,11.810725
1277,1917.0,0.000130,0.000118,0.000000,76.0,3326472.0,5.0,46201.0,442.0,0.0,inf,inf,0.000416,1275.0,0.5,0.000665,11.811389
1278,1918.0,0.000272,0.000392,0.000000,76.0,3326472.0,5.0,46201.0,1410.0,0.0,inf,inf,0.001404,14579.0,0.5,0.002069,11.813458


In [54]:
np.var([2, 2, 2])

0.0