In [None]:
import plotly.express as px
import pandas as pd

In [None]:
# This read a file line by line skipping the lines that start with "Synth"
class InFile(object):
    def __init__(self, infile):
        self.infile = open(infile)
        self.initialized = False

    def __next__(self):
        return self.next()

    def __iter__(self):
        return self

    def read(self, *args, **kwargs):
        return self.__next__()

    def next(self):
        try:
            if not self.initialized:
                self.initialized = True
                for _ in range(9):
                    self.infile.readline()
            line: str = self.infile.readline()
            return line
        except:
            self.infile.close()
            raise StopIteration

In [None]:
# Read csv file
df = pd.read_csv(InFile('grid1.csv'))

# Drop unused columns
df = df.drop(columns=['bytes_per_second', 'items_per_second', 'label', 'error_occurred', 'error_message'])

# Filter the mean
df_std = df[df['name'].str.contains('stddev')]
df = df[df['name'].str.contains('mean')]

# Drop BINARY SEARCH --- TODO: remove this
df = df[~df['name'].str.contains('SearchFixture')]

df = df.reset_index(drop=True)
df

In [None]:
df_std_errors = df_std[df_std['real_time'] > 10]
pd.options.display.max_colwidth = 100
df_std_errors

In [None]:
# String example: 'sampled_b_tree_fixture<SIMD_Btree::SIMD_ext::AVX2, 2>/b_plus_256_2/0/10000000_mean'
def extract_ext(s): 
    try:
        return s.split('/')[0].split('<')[1].split(',')[0].split('::')[2].strip()
    except IndexError:
        return 0

def extract_vec_sz(s):
    try:
        return s.split('>/')[0].split('<')[1].split(',')[1].strip()
    except IndexError:
        return 0
    
extract_tree_type = lambda s: s.split('/')[0].split('<')[0].replace('_fixture', '').strip()
extract_input_sz = lambda s: s.split('/')[-1].split('_')[0].strip()
extract_distr = lambda s: s.split('/')[-2].strip()

In [None]:
df['bench_type'] = df['name'].apply(extract_tree_type)
df['simd_ext'] = df['name'].apply(extract_ext)
df['simd_vec_sz'] = df['name'].apply(extract_vec_sz)
df['input_sz'] = df['name'].apply(extract_input_sz)
df['input_distr'] = df['name'].apply(extract_distr)
df = df.drop(columns=['name'])

df

In [None]:
df['bench_unique'] = df['bench_type'].str.cat(df['simd_ext'], sep='_')
df

In [None]:
df[(df['input_sz'] == '1000000') & (df['simd_vec_sz'] == '2')]

In [None]:
id2dist = {0: 'Uniform', 1: 'Exponential', 2: 'Books', 3: 'Wiki'}

In [None]:
# Plot the benchmark
df['facet_splitter'] = df['input_sz'].str.cat(df['input_distr'], sep='_')
fig = px.line(df, x='simd_vec_sz', y='real_time', color='bench_unique', title=f'Input sz', facet_col='facet_splitter', facet_col_wrap=4, height=1200)
fig.update_yaxes(matches=None)
fig.for_each_yaxis(lambda axis: axis.update(showticklabels=True))
fig.for_each_xaxis(lambda axis: axis.update(showticklabels=True))
fig.for_each_annotation(lambda a: a.update(text=f"size: {int(a.text.split('=')[-1].split('_')[0])/1000000}M - distr: {id2dist[int(a.text.split('=')[-1].split('_')[1])]}"))
fig.show()