In [10]:
import pandas as pd
from parse_results import benchmark_output_as_dataframe

# These methods are hard-coded to the data in run_10M_3 (4 threads, 10M keys).
# It shouldn't be too much work to generalize it, but as of now it's not.

def extract_merge_duration(df, baseline, experiment, key_size_byte, use_disk):

    exp_condition = (df['merge_mode'] == baseline) 
    exp_condition = exp_condition | (df['merge_mode'] == experiment)
    condition = exp_condition
    condition = condition & (df['key_size_bytes']==key_size_byte)
    condition = condition & (df['use_disk'] == use_disk)
    
    rdf = df.loc[condition].sort_values('plr_error')
    rdf = rdf[['merge_mode', 'plr_error', 'merge_duration_sec', 'ratio', 'key_size_bytes']]
    rdf = pd.pivot_table(rdf, index='ratio', columns=['merge_mode', 'plr_error'] , aggfunc='median')

    # Assume baseline has always plr_error=0
    baseline = ('merge_duration_sec', baseline, 0) 
    for column in rdf.columns:
        column_rel = list(column)
        column_rel[0] = 'merge_duration_rel'
        rdf[tuple(column_rel)] = (((rdf[baseline]-rdf[column])/rdf[baseline]) * 100.0).round(2)
    return rdf
        

def print_experiment(df, exp, baseline, experiment, key_size, on_disk):
    df = extract_merge_duration(df, baseline, experiment, key_size, on_disk)
    print("#### %s\n" % exp)
    print("##### Absolute number (sec) \n")
    print(df['merge_duration_sec'].to_markdown())
    print("")

    print("###### Relative number (%) \n")
    print(df['merge_duration_rel'].to_markdown())
    print("")

def print_key_size_data(df, key_size):
    print("## %d Byte String Keys\n" % key_size)
    print("### In Memory\n")
    print_experiment(df, 'Single Threaded Merge', 'standard', 'learned', key_size, False)
    print_experiment(df, 'Multi-Threaded Merge, 4 threads', 'parallel_standard', 'parallel_learned', key_size, False)
    print_experiment(df, 'Single Threaded Join', 'standard_join', 'learned_join', key_size, False)

    print("### On Disk\n")
    print_experiment(df, 'Single Threaded Merge', 'standard', 'learned', key_size, True)
    print_experiment(df, 'Multi-Threaded Merge, 4 threads', 'parallel_standard', 'parallel_learned', key_size, True)
    print_experiment(df, 'Single Threaded Join', 'standard_join', 'learned_join', key_size, True)
    


In [14]:
df = benchmark_output_as_dataframe('run_10M_3.txt')

key_sizes = [8, 16, 32]
for key_size in key_sizes:
    print_key_size_data(df, key_size)

model_size = df.loc[(df['key_size_bytes']==8) & (df['plr_error'] > 0)][['iter_1.num_items', 'plr_error', 'iter_1.model_size_bytes', 'iter_1.item_total_size_bytes']].groupby(['iter_1.num_items', 'plr_error'])
model_size_bytes = model_size.median()
model_size_bytes['rel_percent'] = (model_size_bytes['iter_1.model_size_bytes']/model_size_bytes['iter_1.item_total_size_bytes'] * 100.0).round(2)
print('### Model Size to PLR Error')
print(model_size_bytes[['iter_1.model_size_bytes', 'rel_percent']].to_markdown(tablefmt="pretty"))



## 8 Byte String Keys

### In Memory

#### Single Threaded Merge

##### Absolute number (sec) 

|   ratio |   ('learned', 2) |   ('learned', 10) |   ('learned', 100) |   ('learned', 1000) |   ('standard', 0) |
|--------:|-----------------:|------------------:|-------------------:|--------------------:|------------------:|
|       1 |            0.943 |             0.999 |              1.543 |               7.897 |             0.371 |
|      10 |            2.374 |             2.52  |              3.565 |              14.776 |             2.048 |
|      50 |            6.132 |             6.329 |              7.668 |              20.151 |             9.486 |
|      60 |            7.068 |             7.264 |              8.621 |              21.559 |            11.354 |
|      80 |            8.906 |             9.119 |             10.525 |              23.419 |            15.079 |
|     100 |           10.754 |            10.976 |             12.384 |              25.803 |            1

#### Single Threaded Join

##### Absolute number (sec) 

|   ratio |   ('learned_join', 2) |   ('learned_join', 10) |   ('learned_join', 100) |   ('learned_join', 1000) |   ('standard_join', 0) |
|--------:|----------------------:|-----------------------:|------------------------:|-------------------------:|-----------------------:|
|       1 |                 0.303 |                  0.335 |                   0.644 |                    3.784 |                  0.1   |
|      10 |                 0.377 |                  0.398 |                   0.655 |                    3.572 |                  0.26  |
|      50 |                 0.524 |                  0.576 |                   0.817 |                    3.726 |                  0.97  |
|      60 |                 0.54  |                  0.607 |                   0.842 |                    3.618 |                  1.15  |
|      80 |                 0.56  |                  0.657 |                   0.879 |                    3.7