In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import numpy as np
pd.set_option('display.max_rows', 1000)
%matplotlib inline

In [3]:
files = {
    'ProgressiveIndex': 'progressive_index'
}
def read_df(experiment, name, delta='0.0', partition='0'):
    df = pd.read_csv(f"results/{experiment}-{name}-{delta}-{partition}.csv")
    repetitions = df['repetition'].max() + 1
    step = int(len(df.index)/repetitions)
    df_final = df[:step].copy().reset_index()
    for rep in range(1, repetitions):
        df_final += df[step * (rep) : step * (rep + 1)].copy().reset_index()
    
    df_final = df_final/repetitions
    
    if 'index_search_time' not in df_final:
        df_final['index_search_time'] = 0.0
    df_final['query_time'] = df_final['initialization_time'] + df_final['index_search_time'] + df_final['scan_time'] + df_final['adaptation_time']
    df_final['query_time_cumsum'] = df_final['query_time'].cumsum()
    return df_final

# First Query Cost
First Query Cost for different deltas

In [46]:
# Cumulative Response Time
fig = go.Figure()

def plot_time(fig, df, name,deltas):
    fig.add_trace(go.Scatter(x = deltas, y=df[0], mode='lines',name=name))

cols = ['2cols','4cols','8cols','16cols']
deltas = ['0.005','0.01','0.05','0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9', '1.0']

for col in cols:
    init_query = []
    for delt in deltas:
        df = read_df(col, files['ProgressiveIndex'], delta=delt, partition='1024')
        init_query.append([df['adaptation_time'][0]+df['index_search_time'][0]+df['initialization_time'][0]])
    df = pd.DataFrame(data=init_query)
    plot_time(fig, df, 'ProgressiveIndex (Partition=1024, cols='+col+')',deltas)
fig.update_layout(title='Cumulative response time (2 columns, 10M rows, 0.01% selectivity)',
                   xaxis_title='Query',
                   yaxis_title='Time (seconds)',xaxis_type="log")
fig.show()