In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_formats = ['svg']

import seaborn as sns

import itertools
from pprint import pprint
from pathlib import Path

In [None]:
result_dir = "/home/ubuntu/notebooks/mantis-osdi-2020/debug-200-400-qps-2min-sleep-pid"

In [None]:
dir_name = Path(result_dir)
print("Using directory", dir_name)

## Config Params

In [None]:
with open(dir_name / "config.json") as f:
    pprint(json.load(f))

## Controllers

In [None]:
data_df = pd.read_json(
    dir_name / "status.jsonl", 
    lines=True, 
    convert_dates=["_1_lg_sent"],
)

In [None]:
real_arrival_timestamps = np.array(list(itertools.chain.from_iterable(data_df['real_arrival_ts_ns'].tolist())))/1e9
real_arrival_timestamps = pd.Series(real_arrival_timestamps.astype(int)).value_counts().reset_index().sort_values('index')

real_arrival_timestamps['index']-=real_arrival_timestamps['index'].min()
real_arrival_timestamps = real_arrival_timestamps.rename(columns={'index': 'seconds', 0: 'qps'})


In [None]:
ctl_time = (data_df['current_ts_ns']/1e9).astype(int)
ctl_time -= ctl_time.min()

plot_df = data_df.loc[:, ['ctl_from', 'ctl_final_decision', 'ctl_action']]
plot_df.loc[:, 'num_dropping_replicas'] = data_df['dropped_queue_sizes'].str.len()
plot_df.loc[:, 'num_serving_replicas'] = (data_df['queue_sizes'].str.len() - 1 
                                          + data_df['fractional_value'] + plot_df['num_dropping_replicas'])
plot_df.loc[:, 'ctl_time'] = ctl_time
plot_df.loc[:, 'dropped_queue_sizes'] = data_df['dropped_queue_sizes'].apply(sum)
plot_df.loc[:, 'total_queue_sizes'] = data_df['queue_sizes'].apply(sum) + data_df['dropped_queue_sizes'].apply(sum)

In [None]:
fig, (ax1,ax2,ax3,ax4) = plt.subplots(4,1,sharex=True, figsize=(12,8))

sns.lineplot(data=real_arrival_timestamps.set_index('seconds'), ax=ax1)

sns.lineplot(data=plot_df.set_index('ctl_time')[['num_serving_replicas', 'num_dropping_replicas']], ax=ax2)

sns.lineplot(data=plot_df.set_index('ctl_time')[['ctl_action']],ax=ax3)

sns.lineplot(data=plot_df.set_index('ctl_time')[["total_queue_sizes","dropped_queue_sizes"]],ax=ax4)

plt.tight_layout()
plt.savefig(dir_name/"actions.png", dpi=100)

## Query Trace

In [None]:
trace_df = pd.read_json(
    dir_name / "trace.jsonl", 
    lines=True, 
    convert_dates=["_1_lg_sent"],
)
trace_df['e2e_ms'] = (trace_df['_4_done_time'] - trace_df['_1_lg_sent']).dt.total_seconds()*1e3
trace_df = trace_df.sort_values('query_id')

In [None]:
def plot_cdf(arr, title):
    cdf = pd.DataFrame({
        'percentile': np.arange(0,100.5, 0.5),
        'latency_ms': np.percentile(arr, np.arange(0,100.5, 0.5))
    })

    plt.plot(cdf['latency_ms'], cdf['percentile'])

    plt.title(title)
    plt.xlabel("Latency (ms)")
    plt.ylabel("Percentile")

    mark_perc = [50, 90, 95, 99]
    legends = []
    for p in mark_perc:
        lat = np.percentile(arr, p)
        line_obj = plt.axvline(lat, alpha=0.6, ls=':')
        legends.append((line_obj, f"{p}: {lat:.2f}"))
    objs, labels = zip(*legends)
    plt.legend(objs, labels)

In [None]:
plt.figure(figsize=(12,8))
plt.subplot(4,1,1)
plot_cdf(trace_df['e2e_ms'], "End to end latency (ms) CDF")

ordered_ts = ["_1_lg_sent", "_2_enqueue_time", "_3_dequeue_time", "_4_done_time"]
for i, (start, end) in enumerate(zip(ordered_ts[:-1], ordered_ts[1:])):
    diff_name = f"{start[3:]}->{end[3:]} CDF (ms)" # strip _1_ prefix
    plt.subplot(4,1,i+2)
    plot_cdf((trace_df[end]-trace_df[start]).dt.total_seconds()*1e3, diff_name)
plt.tight_layout()
plt.savefig(dir_name/"latency_cdf.png", dpi=100)