In [None]:
import json
import pandas as pd
import matplotlib
import re
from glob import glob
from StringIO import StringIO
from os.path import join as pjoin

import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [15, 8]

LOGS_DIR="logs"
PERCENTILES = [0.95, 0.99, 0.999]

def is_light_query(row):
    return (row['workload'].startswith('single-groupby') or
        row['workload'].startswith('cpu-max'))

def load_percentiles(path):
    csv = pd.read_csv(file(path), delim_whitespace=True, skipinitialspace=True,
                comment='#')
    ret = {}
    for percentile in PERCENTILES:
        val = csv[csv['Percentile'] >= percentile].iloc[0]['Value']
        ret['p{}'.format(percentile*100)] = val
    return ret

def load_query_data():
    data = []
    for path in sorted(glob(pjoin(LOGS_DIR, "run-*.json"))):
        # skip 'high-cpu' because VM doesn't support it and kudu's support
        # for 'SELECT *' doesn't include the tags. Hence not a fair comparison
        if 'high-cpu' in path:
            continue            
        j = json.load(open(path))
        hdr_path,subbed = re.subn(r'/run-(.+).json$', r'/hdr-\1.txt', path)
        assert subbed, "bad path: " + path
        j.update(load_percentiles(hdr_path))
        data.append(j)
    return pd.DataFrame(data)

def load_load_data():
    frames = []
    for k, path in LOAD_DATA.items():
        data = open(path).read()
        # The output contains some non-data junk -- filter it out
        lines = data.splitlines()
        lines = [l for l in lines if ',' in l]
        data = "\n".join(lines)
        df = pd.read_csv(StringIO(data))
        df['time'] -= df['time'].min()
        df['system'] = k
        frames.append(df)
    return pd.concat(frames)

In [None]:
df = load_query_data()
df

In [None]:

def add_value_labels(ax, spacing=5):
    """Add labels to the end of each bar in a bar chart.

    Arguments:
        ax (matplotlib.axes.Axes): The matplotlib object containing the axes
            of the plot to annotate.
        spacing (int): The distance between the labels and the bars.
    """

    # For each bar: Place a label
    for rect in ax.patches:
        # Get X and Y placement of label from rect.
        y_value = rect.get_height()
        x_value = rect.get_x() + rect.get_width() / 2

        # One decimal place for small values, otherwise round.
        if len(str(int(y_value))) < 3:
            label = "{:.1f}".format(y_value)
        else:
            label = "{:,}".format(int(y_value))

        # Create annotation
        ax.annotate(
            label,                      # Use `label` as label
            (x_value, y_value),         # Place label at end of the bar
            xytext=(0, spacing),          # Vertically shift label by `space`
            textcoords="offset points", # Interpret `xytext` as offset in points
            ha='center',                # Horizontally center label
            va='bottom',
            rotation=90)

def do_plot(query_data, metric, title=None, ylabel=None):
    df_p = query_data.pivot(index='workload', columns='system')
    ax = df_p.plot(y=metric,kind='bar',title=title)
    if ylabel:
        ax.set_ylabel(ylabel)
    add_value_labels(ax)
    ymax = query_data[metric].max()
    ax.set_ylim(bottom=0, top=ymax * 1.2)
    return ax

In [None]:
for workers in [8, 16]:
    w_title = "\n({} clients)".format(workers)
    df_filtered = df[df['workers'] == workers]
    df_light = df_filtered[df_filtered.apply(is_light_query, axis=1)]
    df_heavy = df_filtered[df_filtered.apply(is_light_query, axis=1) == False]
    do_plot(df_light, 'qps', ylabel='Throughput (QPS)', title='Throughput for light queries' + w_title)
    do_plot(df_heavy, 'qps', ylabel='Throughput (QPS)', title='Throughput for heavy queries' + w_title)
    do_plot(df_light, 'mean_latency', ylabel='Latency (ms)', title='mean latency for light queries' + w_title)
    do_plot(df_light, 'p95.0', ylabel='Latency (ms)', title='p95 for light queries' + w_title)
    do_plot(df_light, 'p99.0', ylabel='Latency (ms)', title='p99 for light queries' + w_title)