# Curiefense performance report
## Set measurement folder

In [None]:
import os
import json
import glob
import statistics
import pathlib

import pandas as pd
from collections import defaultdict
import matplotlib.pyplot as plt

DEFAULT_RESULTS_DIR = pathlib.Path("./stats-locust")
RESULTS_DIR = pathlib.Path(os.environ.get("RESULTS_DIR", DEFAULT_RESULTS_DIR))
print(f"Performance measurements will be read from {RESULTS_DIR}")

## Sample data

### Sample jaeger measurement
Two *spans* are present for each query: one for service `istio-ingressgateway`, one for service `ratings.bookinfo`. Related traces have the same `traceID` attribute. The span for `ratings.bookinfo` has a non-empty `references` attribute, and is smaller than the span for `istio-ingressgateway`.

In [None]:
j = json.load((RESULTS_DIR / "cf-default-config/jaeger-uc-10-0.json").open())
j

## Create a pandas dataframe from jaeger, locust & performance outputs

In [None]:
def folder2pd(folder):
    res = defaultdict(dict)
    for f in folder.glob("locust-*.json"):
        uc = int(f.stem.split("-")[-2])
        reqsize = int(f.stem.split("-")[-1])
        d = {}
        # locust data
        j = json.load(f.open())
        d["LocustP50"] = j["current_response_time_percentile_50"] / 1000
        d["LocustP95"] = j["current_response_time_percentile_95"] / 1000
        d["RPS"] = j["total_rps"]
        d["Reqsize"] = reqsize
        # jaeger data
        j = json.load((folder / f"jaeger-uc-{uc}-{reqsize}.json").open())
        trace_data = defaultdict(lambda: [None, None])
        for span in j["data"]:
            for s in span["spans"]:
                trace_id = s["traceID"]
                duration = s["duration"]
                if len(s["references"]) == 0:
                    # istio trace
                    trace_data[trace_id][0] = float(duration)/1e6
                else:
                    # app trace
                    trace_data[trace_id][1] = float(duration)/1e6
        trace_difference = []
        for (i, a) in trace_data.values():
            if "denyall" in folder.name or "contenfilter-and-acl" in folder.name:
                # single trace: request was blocked
                if i is None:
                    trace_difference.append(a)
                else:
                    trace_difference.append(i)
                continue
            if i is None or a is None:
                continue
            trace_difference.append(i-a)
        try:
            d["JIstioTimeAvg"] = statistics.fmean(trace_difference)
        except statistics.StatisticsError as e:
            #print(f"Warning: skipping {folder=} {uc=} {reqsize=}", e)
            if uc == 10 and reqsize == 0:
                print(f"Warning: skipping {folder=} {uc=} {reqsize=}", e)
                print("TD", trace_difference)
                print(trace_data)
            continue
        try:
            d["JIstioTimeP50"] = statistics.quantiles(trace_difference, n=2)[-1]
        except statistics.StatisticsError as e:
            print(f"Error on {folder=} {uc=} {reqsize=} {trace_difference=}", e)
            raise e
        d["JIstioTimeP75"] = statistics.quantiles(trace_difference, n=4)[-1]
        d["JIstioTimeP90"] = statistics.quantiles(trace_difference, n=10)[-1]
        d["JIstioTimeP99"] = statistics.quantiles(trace_difference, n=100)[-1]
        d["JIstioTimeMin"] = min(trace_difference)
        d["JIstioTimeMax"] = max(trace_difference)
        # resources data
        j = json.load((folder / f"resources-uc-{uc}-{reqsize}.json").open())
        d["CPU_milli"] = int(j["cpu"][:-1])
        d["RAM_Mo"] = int(j["ram"][:-2])
        res[(uc,reqsize)] = d
 
    return pd.DataFrame(res.values())

In [None]:
folders = RESULTS_DIR.glob("*")
data = {f.name: folder2pd(f) for f in folders}

data["cf-default-config"]

## Draw QPS vs time percentile
As measured from locust

In [None]:
def qps_vs(col, args, ylim=None, filterstr=""):
    datalbls, vals = list(zip(*args.items()))
    plotlbls = []
    for idx, m in enumerate(vals):
        for rs in sorted(m.Reqsize.unique()):
            lbl = f"{datalbls[idx]}-{rs}kB"
            if filterstr not in lbl:
                # skip this line
                continue
            t = (m[m["Reqsize"] == rs]).sort_values(by=["RPS"])
            p = plt.plot(t["RPS"],t[col])
            plotlbls.append(f"{datalbls[idx]}-{rs}kB")
    plt.legend(plotlbls)
    plt.xlabel("Actual RPS")
    plt.ylabel(f"{col} (s)")
    if ylim:
        plt.ylim(ylim)
    plt.title(f"RPS vs {col}")

def big_plot(*args, **kargs):
    fig = plt.figure(figsize=(20,20), dpi=72)
    qps_vs(*args, **kargs)

In [None]:
qps_vs("LocustP50", data)

In [None]:
big_plot("LocustP95", data)

In [None]:
def plot_all(args, lines=[["LocustP50","LocustP95"]], ylim=None, filterstr=""):
    h = len(lines)
    w = max([len(cols) for cols in lines])
    fig = plt.figure(figsize=(w*6, h*2), dpi= 100)
    fig.set_facecolor("white")
    plt.subplots_adjust(top=2)
    
    for i, cols in enumerate(lines):
        for j, col in enumerate(cols):
            fig.add_subplot(h, w, 1+(i*w)+j)
            qps_vs(col, args, filterstr=filterstr)

In [None]:
plot_all(data)

## Draw QPS vs latency introduced by istio
As measured from jaeger
### Influence of the configuration, for a 0kB payload

In [None]:
plot_all(data, lines=[["LocustP50","LocustP95"],["JIstioTimeP50", "JIstioTimeP90", "JIstioTimeP99"],["CPU_milli","RAM_Mo"]], filterstr="-0kB")

### Effect of the payload size, for the default configuration

In [None]:
plot_all(data, lines=[["LocustP50","LocustP95"],["JIstioTimeP50", "JIstioTimeP90", "JIstioTimeP99"],["CPU_milli","RAM_Mo"]], filterstr="default-config")

In [None]:
plot_all(data, lines=[["LocustP50","LocustP95"],["JIstioTimeP50", "JIstioTimeP90", "JIstioTimeP99"],["CPU_milli","RAM_Mo"]], filterstr="deny")