# Protocol Benchmarks

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from functools import *
from glob import glob as list_files
from pathlib import Path

In [None]:
try:
    files = list_files(os.environ['BENCH_RESULTS_DIR'] + '/*/*.csv')
except:
    files = list_files("../bench-results/*/*/*.csv")
dfs = []
for file in files:
    runId = Path(file).parent.name
    system = Path(file).parent.parent.name
    df = pd.read_csv(file, delimiter=";", dtype={'latency': 'float64', 'send-time': 'float64', 'receive-time': 'float64'})

    params = runId.split('-')
    
    df["run_config"] = f"{params[0]} {params[1]} {params[2]}"
    df["run_id"] = params[3]
    df["run"] = runId
    df["system"] = system
    df["index"] = pd.Series(range(0, len(df)))
    df['receive-time'] = df['receive-time'] - df['send-time'].min()
    df['send-time'] = df['send-time'] - df['send-time'].min()
    dfs.append(df)

df = pd.concat(dfs)
df["latency"] = (df["latency"] / 1000)
df['send-time'] = df['send-time'] / 1000
df['receive-time'] = df['receive-time'] / 1000
df["unit"] = "ms" # fix unit

df

In [None]:
latency_df = df[['system', 'run_config', 'latency', 'index', 'name']] \
    .groupby(['system', 'run_config', 'index', 'name']) \
    .mean().reset_index().set_index('index')

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(7,5))

def plot_latency(ax, label: str, df: pd.DataFrame, group_size):
    grouped_df = df.groupby({x: (x // group_size) * group_size for x in range(len(df))})
    mean_latency = grouped_df.mean()
    
    mean_latency.plot(y='latency', ax=ax, label=label)
    


for system in latency_df['system'].unique():
    print(system)
    system_df = latency_df[latency_df['system'] == system]
    
    for run_config in latency_df['run_config'].unique():
        print(run_config)
        run_df = system_df[system_df['run_config'] == run_config]
            
        plot_latency(ax, f"{system} {run_config.split(' ')[1]}", run_df[['latency']], 10000)

ax.set_xlabel("number of queries")
ax.set_ylabel("latency [ms]")

# ax.set_ylim([0,None])

ax.legend()

plt.savefig(fname="latency.png", format="png")
plt.savefig(fname="latency.pdf", format="pdf")

In [None]:
df["seconds"] = (df["receive-time"] / 1000).apply(round)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(7,5))

res_data = []

for system in df['system'].unique():
    sys_df = df[df['system'] == system]
    
    for run_id in sys_df["run_config"].unique():
        filtered_df = sys_df[(sys_df["run_config"] == run_id)]
        
        throughput = filtered_df.groupby("seconds")["name"].count() / len(sys_df[sys_df['run_config'] == run_id]['run_id'].unique())
        throughput.plot(ax=ax, label=f"{system} {run_id.split(' ')[1]}")
        mean_throughput = throughput.quantile(0.5)
        res_data.append([system, run_id.split(" ")[1], mean_throughput])
        print(f"Mean Throughput for {system} {run_id}: {mean_throughput}")
    
        # also_mean_throughput = filtered_df["name"].count() / filtered_df["seconds"].max()
        # print(f"Diff {mean_throughput - also_mean_throughput}")
        
        ax.axhline(y=mean_throughput, color=ax.get_lines()[-1].get_color())
        # ax.axhline(y=also_mean_throughput, color=ax.get_lines()[-1].get_color())

ax.legend()

ax.set_xlabel('time [s]')
ax.set_ylabel('throughput [ops/s]')

plt.savefig(fname="throughput.pdf", format="pdf")

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(7,5))

res_df = pd.DataFrame(data=res_data, columns=['system', 'type', 'throughput'])

etcd_df = res_df[res_df.system == 'etcd'][['type', 'throughput']]
pb_df = res_df[res_df.system == 'pb'][['type', 'throughput']]

pd.DataFrame(data={'type': etcd_df.type, 'etcd': list(etcd_df.throughput), 'pb': list(pb_df.throughput)}).set_index('type').plot.bar(ax=ax)

ax.set_ylabel('throughput [ops/s]')
ax.set_xlabel('')

fig.savefig(fname="throughput_comp.pdf", format="pdf")

In [None]:
df.groupby(['system', 'run_config', 'run_id'])['send-time'].max() / 1000