In [4]:
import pandas
import plotly.express
import json

with open("results.json") as results_file:
    results = json.load(results_file)
dataframe = pandas.DataFrame(
    {
        "Encoding": [result[0] for result in results],
        "Framework": [result[1] for result in results],
        "Read time [s]": [result[2] for result in results],
        "File size [MiB]": [result[3] for result in results],
    }
)

# Shorten the names of the encodings and frameworks to make the plotly legend readable on mobile
dataframe.loc[dataframe["Encoding"] == "numpy/brotli (Q=1)", "Encoding"] = "brotli (Q=1)"
dataframe.loc[dataframe["Encoding"] == "numpy/brotli (Q=6)", "Encoding"] = "brotli (Q=6)"
dataframe.loc[dataframe["Encoding"] == "numpy/brotli (Q=11)", "Encoding"] = "brotli (Q=11)"
dataframe.loc[dataframe["Encoding"] == "numpy (pickle)", "Encoding"] = "pickle"
dataframe.loc[dataframe["Encoding"] == "numpy (UNDR)", "Encoding"] = "UNDR"
dataframe.loc[dataframe["Framework"] == "numpy/brotli", "Framework"] = "brotli"
dataframe.loc[dataframe["Encoding"] == "eventstream", "Encoding"] = "es"

# temporary hack because I forgot to add the number of events to the results, 
# should be fixed in current version though
if len(results) < 17:
    number_of_events = 1e7
else:
    number_of_events = results[-1]

title = f"Reading the same {round(number_of_events / 1e6)} million events from different file formats."


In [5]:

figure = plotly.express.scatter(
    dataframe,
    x="Read time [s]",
    y="File size [MiB]",
    color="Framework",
    symbol="Encoding",
    template="plotly_dark",
    title=title,
)
figure.update_layout(
    # legend=dict(orientation="v", yanchor="top", y=0, xanchor="right", x=0.99, entrywidth=0.8, entrywidthmode="fraction"),
    margin=dict(l=10, r=10, t=80, b=10),
    # hovermode='x unified', 
    # hoverlabel=dict(bgcolor='rgba(255,255,255,0.75)')
)
figure.update_traces(marker_size=13)
figure.write_json("file_read_benchmark.json")
# figure.update_layout(height=600, width=900)
# figure.write_image("file_read_benchmark.png")

In [None]:

figure = plotly.express.scatter(
    dataframe,
    x="Read time [s]",
    y="File size [MiB]",
    color="Framework",
    symbol="Encoding",
    template="plotly_white",
)
figure.update_traces(marker_size=13)
figure.write_json("file_read_benchmark_white.json")
# figure.update_layout(height=400, width=1000, margin=dict(l=10,r=10,b=10,t=10),)
# figure.write_image("file_read_benchmark_white.png")

figure = plotly.express.scatter(
    dataframe,
    x="Read time [s]",
    y="File size [MiB]",
    color="Framework",
    symbol="Encoding",
    template="plotly_dark",
    title=title,
    log_x=True,
    log_y=True,
)
figure.update_traces(marker_size=13)
figure.write_json("file_read_benchmark_log.json")
# figure.update_layout(height=600, width=900)
# figure.write_image("file_read_benchmark_log.png", scale=2)
