In [89]:
#
# # Constants
#
root_path = "/home/jkeiser/simdjson"
highlight_color = 'firebrick'
off_color = 'steelblue'


#
# Benchmarks: comment out ones we don't want to show
#
compilers = {
    "clang10": "Clang 10",
    "gcc10.2": "g++ 10.2",
}


#
# Implementations: comment out ones we don't want to show
#
json_implementations = {
    "simdjson_ondemand": "simdjson (On Demand)",
    "simdjson_dom": "simdjson (DOM)",
    "yyjson_insitu": "yyjson (insitu)",
#     "yyjson": "yyjson",
    "sajson": "sajson",
    "rapidjson_insitu": "RapidJson (insitu)",
#     "rapidjson": "RapidJson",
    "nlohmann_json": "nlohman::json",
#     "rapidjson_lossless": "RapidJson (lossless)",
#     "simdjson_ondemand_unordered": "simdjson (On Demand unordered)",
#     "simdjson_ondemand_forward_only": "simdjson (On Demand forward-only)",
}


#
# Highlight simdjson On Demand
#
json_implementation_colors = dict([(value,off_color) for value in json_implementations.values()])
json_implementation_colors['simdjson (On Demand)'] = highlight_color

#
# Comment out benchmarks we don't want shown
#
benchmark_names = {
    # "partial_tweets": "Read All Tweets",
    "find_tweet": "Find Tweet",
    # "large_random": "Read Points",
    # "top_tweet": "Top Tweet",
    "kostya": "Read Points (Kostya)",
    # "distinct_user_id": "Tweet User IDs",
}

In [90]:
# Pandas / data setup
%matplotlib notebook
%precision 3

from IPython.display import set_matplotlib_formats
# from pandas.plotting import register_matplotlib_converters
# register_matplotlib_converters()

# import matplotlib

import numpy as np
# import matplotlib.pyplot as plt
# plt.rcParams['figure.figsize'] = [9, 5]

import plotly.express as px

import pandas as pd

In [91]:
# Other Modules
import json
import re

In [92]:
import google_benchmark_json
benchmark_runs = [
  benchmark_run
  for benchmark_run in google_benchmark_json.list_runs(".")
  if benchmark_run.compiler_plus_version in compilers
]
benchmark_runs

[<google_benchmark_json.GoogleBenchmarkRun at 0x7ff66c931f40>,
 <google_benchmark_json.GoogleBenchmarkRun at 0x7ff66c931580>,
 <google_benchmark_json.GoogleBenchmarkRun at 0x7ff66f2c84c0>,
 <google_benchmark_json.GoogleBenchmarkRun at 0x7ff66cbfed30>]

In [93]:
benchmarks = [{
                "run": f"{benchmark_run.host} {compilers[benchmark_run.compiler_plus_version]} {f'- {benchmark_run.variant}' if benchmark_run.variant else ''}(simdjson {benchmark_run.base_version}{f'+{benchmark_run.commits_past_version}' if benchmark_run.commits_past_version > 0 else ''})",
                "benchmark_name": benchmark_names[benchmark.name],
                "json_implementation": json_implementations[benchmark.implementation],
                "throughput": benchmark.throughput
              }
              for benchmark_run in benchmark_runs
              for benchmark in benchmark_run.benchmarks
              if benchmark.name in benchmark_names
              if benchmark.implementation in json_implementations
             ]


In [94]:
def graph_benchmarks(name, benchmarks):
    fig = px.bar(benchmarks,
                 barmode="group",
                 title=name,
                 x="json_implementation", y="throughput",
                 labels={"throughput": "Throughput (GB/s)"},
                 facet_col="benchmark_name", facet_col_wrap=2,
                 category_orders={
                    "json_implementation": list(json_implementations.values()),
                    "benchmark_name": list(reversed(benchmark_names.values())),
                 }
                )

    # Set y axis to show "n GB/s"
    axis_gigabytes_per_sec = [ 0, 1, 2, 3, 4, 5, 6 ]
    fig.update_xaxes(dict(title = None))
    fig.update_yaxes(dict(
            tickmode = 'array',
            tickvals = [ tick*1000000000 for tick in axis_gigabytes_per_sec ],
            ticktext = [ f"{round(tick)} GB/s" for tick in axis_gigabytes_per_sec ]
        ))
    fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))
    fig.for_each_trace(lambda a:
        a.update(
            text=[round(throughput/1000000000, 1) for throughput in a.y],
            marker_color=[json_implementation_colors[impl] for impl in a.x],
        )
    )
    return fig

[graph_benchmarks(name, group).show() for name, group in pd.DataFrame(benchmarks).groupby("run")]


[None, None, None, None]