In [None]:
import toml
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import sqlalchemy as sa

sns.set_theme()

In [None]:
config = toml.load("./db.toml")['psql']
connection_string = f"postgresql://{config['user']}:{config['password']}@{config['host']}:{config['port']}/{config['database']}"

In [None]:
conn = sa.create_engine(connection_string)

In [None]:
def cdf(series: pd.Series) -> pd.DataFrame:
    """ calculates the cumulative distribution function of the given series"""
    return pd.DataFrame.from_dict({
        series.name: np.append(series.sort_values(), series.max()),
        "cdf": np.linspace(0, 1, len(series) + 1)
    })

In [None]:
sns.set_theme()
dpi = 150

In [None]:
df_runs = pd.read_sql_query("SELECT * FROM runs", con=conn)

In [None]:
df_measurements = pd.read_sql_query("SELECT * FROM measurements WHERE run_id >= 4 AND error IS NULL AND type = 'KUBO'", con=conn)

In [None]:
df = pd.concat([df_measurements, pd.json_normalize(df_measurements['metrics'])], axis=1).drop("metrics", axis=1).merge(df_runs, left_on="run_id", right_on="id")

In [None]:
data = df[df["run_id"] >= 4].copy()

In [None]:
def get_percentiles(data: pd.DataFrame, percentile: float = 0.5, metric: str = "performanceTimingPageLoad"):
    agg = data[["website", "region", metric]].groupby(["website", "region"]).quantile(percentile, numeric_only=True).reset_index()

    row_labels = list(sorted(agg["region"].unique()))
    col_labels = list(sorted(agg["website"].unique()))
    dat = []
    counts = []
    for region in row_labels:
        region_values = []
        region_counts = []
        for website in col_labels:
            region_counts += [df[(df["region"] == region) & (df["website"] == website)].count().iloc[0]]
            series = agg[(agg["region"] == region) & (agg["website"] == website)][metric]
            if len(series) > 0:
                region_values += [series.iloc[0]/1000]
            else:
                region_values += [np.NAN]
        dat += [region_values]
        counts += [region_counts]
    dat = np.array(dat)
    counts = np.array(counts)
    return dat, counts, row_labels, col_labels

In [None]:
def plot_metric(title:str, metric: str):

    fig, axes = plt.subplots(3, 1, figsize=[15, 20])

    for idx, percentile in enumerate([0.5, 0.9, 0.99]):

        cbar_kw = {}


        dat, counts, row_labels, col_labels = get_percentiles(data, percentile, metric)

        ax = fig.axes[idx]

        im = ax.imshow(dat, cmap=sns.color_palette("rocket_r", as_cmap=True))

        # Create colorbar
        cbar = ax.figure.colorbar(im, ax=ax, **cbar_kw)
        cbar.ax.set_ylabel("Time in Seconds", rotation=-90, va="bottom")

        # Show all ticks and label them with the respective list entries.
        ax.set_xticks(np.arange(dat.shape[1]), labels=col_labels)
        ax.set_yticks(np.arange(dat.shape[0]), labels=row_labels)

        # Let the horizontal axes labeling appear on top.
        ax.tick_params(top=True, bottom=False,
                       labeltop=True, labelbottom=False)

        # Rotate the tick labels and set their alignment.
        plt.setp(ax.get_xticklabels(), rotation=-30, ha="right",
                 rotation_mode="anchor")

        # Turn spines off and create white grid.
        ax.spines[:].set_visible(False)

        ax.set_xticks(np.arange(dat.shape[1]+1)-.5, minor=True)
        ax.set_yticks(np.arange(dat.shape[0]+1)-.5, minor=True)
        ax.grid(False)
        ax.tick_params(which="minor", bottom=False, left=False)

        threshold = im.norm(dat.max())/2.
        textcolors=("#212121", "white")
        kw = dict(ha="center",va="center")
        fmtr = mpl.ticker.StrMethodFormatter("{x:.3f}")
        for i in range(dat.shape[0]):
            for j in range(dat.shape[1]):
                tc = textcolors[int(im.norm(dat[i, j]) > threshold)]
                kw.update(color=tc)
                im.axes.text(j, i, fmtr(dat[i, j]), **kw)
                im.axes.text(j + 0.5, i + 0.5, counts[i, j], ha="right", va="bottom", color=tc, fontsize=8)

        ax.set_title(f"p{int(percentile * 100)} {title}")
        fig.tight_layout()


In [None]:
plot_metric("Performance Timining Page Load", "performanceTimingPageLoad")

In [None]:
plot_metric("Time To First Byte", "timeToFirstByte")