In [None]:
import pandas as pd
import numpy as np

from datetime import datetime
from datetime import time as dt_tm
from datetime import date as dt_date

import plotly.tools as plotly_tools
import plotly.graph_objs as go
import plotly.express as px
import plotly.graph_objects as go
import plotly.colors

import os
import tempfile
os.environ['MPLCONFIGDIR'] = tempfile.mkdtemp()
import matplotlib.pyplot as plt

from scipy.stats import gaussian_kde

from IPython.display import HTML

In [None]:
import sys
#import matplotlib as mpl
from scipy.special import xlogy, loggamma
from scipy.stats import entropy
from scipy.ndimage.filters import gaussian_filter1d
#import seaborn as sns
#mpl.use('agg')

#plot_ratio = (1 + np.sqrt(5)) / 2
#markers = ['^', 'v', '<', '>', 'o', 's', 'P', '*']

results_filename = 'results10M_2par.txt'

with open(results_filename, "r") as rfile:
    lines = np.vstack([ np.array(a.rstrip("\n").split("\t")) for a in rfile ])

#lines = lines[(lines[:, 0] == 'sd') | (lines[:, 0] == 'rrr')]

#plt.rcParams.update({'font.size': 22})

figsize = 8

data = pd.DataFrame()
data['method'] = lines[:, 0]
data['size'] = lines[:, 1].astype(int)
data['density'] = lines[:, 3].astype(float)
data['bits_per_entry'] = lines[:, 4].astype(float)
data['RAM_per_entry'] = lines[:, 5].astype(float) / data['size'] * 8
data['bits_per_entry_expected'] = lines[:, 6].astype(float)

data['access_time'] = lines[:, 7].astype(float)
data['seq_access_time'] = lines[:, 8].astype(float)

data['access_word_time'] = lines[:, 9].astype(float)
data['seq_access_word_time'] = lines[:, 10].astype(float)

data['rank_time'] = lines[:, 11].astype(float)
data['seq_rank_time'] = lines[:, 12].astype(float)

data['select_time'] = lines[:, 13].astype(float)
data['seq_select_time'] = lines[:, 14].astype(float)

data['cond_rank_time'] = lines[:, 15].astype(float)
data['seq_cond_rank_time'] = lines[:, 16].astype(float)

In [None]:
np.unique(data['method'])

In [None]:
methods = ['stat', 'il', 'sd', 'rrr15', 'rrr31', 'rrr63', 'rrr127', 'rrr255',
       'dyn', 'hyb', 'smart', 'small', 'smallrank']

In [None]:
colors = dict(zip(methods, plotly.colors.qualitative.Vivid + plotly.colors.qualitative.Alphabet))

def make_transparent(color, alpha=0.1):
    if color[0] == '#':
        return 'rgba({}, {}, {}, {})'.format(*[int(color[1:][i:i+2], 16) for i in (0, 2, 4)], alpha)
    else:
        return 'rgba({}, {}, {}, {})'.format(*[int(x) for x in color[4:-1].split(', ')], alpha)

In [None]:
assert(set(methods) == set(np.unique(data['method'])))

queries = [
    ('access_time', 'Bit access time (random)'),
    ('seq_access_time', 'Bit access time (sequential)'),
    ('access_word_time', '64-bit word access time (random)'),
    ('seq_access_word_time', '64-bit word access time (sequential)'),
    ('rank_time', 'Rank query time (random)'),
    ('seq_rank_time', 'Rank query time (sequential)'),
    ('cond_rank_time', 'Rank-if-Access query time (random)'),
    ('seq_cond_rank_time', 'Rank-if-Access query time (sequential)'),
    ('select_time', 'Select query time (random)'),
    ('seq_select_time', 'Select query time (sequential)'),
    ('RAM_per_entry', 'RAM'),
    ('bits_per_entry', 'Serialized size'),
]

fig = plotly.subplots.make_subplots(rows=6, cols=2, print_grid=False,
                                    subplot_titles=list(zip(*queries))[1],
                                    horizontal_spacing=0.12, vertical_spacing=0.05)

for i, (query, query_name) in enumerate(queries):
    df_all = data[['method', query, 'density']]
    for method in methods:
        if method in {'rrr127', 'rrr255', 'hyb', 'dyn'}:
            visible = 'legendonly' # hide by default
        else:
            visible = None

        df = df_all[df_all['method'] == method].sort_values(by='density')
        del df['method']
        df_mean = df.groupby(by=['density'], as_index=False).mean()
        df_std = df.groupby(by=['density'], as_index=False).apply(lambda x: x.std())
        fig.add_trace(go.Scatter(line=dict(width=0.5, color=colors[method]), hoverinfo='none',
                                 fillcolor=make_transparent(colors[method]),
                                 x=df_mean['density'], y=df_mean[query] + df_std[query], mode='lines',
                                 name=method, legendgroup=method, showlegend=False, visible=visible),
                      row=1+i//2, col=1+(i%2))
        fig.add_trace(go.Scatter(line=dict(width=0.5, color=colors[method]), hoverinfo='none', fill='tonexty',
                                 fillcolor=make_transparent(colors[method]),
                                 x=df_mean['density'], y=df_mean[query] - df_std[query], mode='lines',
                                 name=method, legendgroup=method, showlegend=False, visible=visible),
                      row=1+i//2, col=1+(i%2))
        fig.add_trace(go.Scatter(x=df_mean['density'], y=df_mean[query], mode='lines',
                                 name=method, legendgroup=method, marker_color=colors[method], showlegend=(i==0),
                                 visible=visible),
                      row=1+i//2, col=1+(i%2))
        fig.update_xaxes(title_text='Density', row=1+i//2, col=1+(i%2))
        fig.update_yaxes(title_text=('Time, sec' if i//2 < 5 else 'Bits per entry'), row=1+i//2, col=1+(i%2))
fig.update_layout(height=2000, title=None, xaxis_title='Density', yaxis_title='Time, sec')
fig.show()

In [None]:
plotly.offline.plot(fig, image_height=2000, image_width=800, filename='bit_vectors_10M.html',
                    include_plotlyjs=True, auto_open=False)