In [16]:
import pandas as pd
import numpy as np
from numpy.random import default_rng
import seaborn as sns
sns.set()
rng = default_rng()


numberOfSamples = 10000
CIsmall = 1
CImed = 5
CIlarge = 10
folder = "../../results_all/app_AA-test_influxdb/"
filename = "../../results_all/app_AA-test_influxdb/app_AA-test_influxdb.csv"

In [17]:
print("start...")
df_latencies = pd.read_csv(filename)
print("done.")

start...
done.


In [18]:
def resample(perfRuntimes1: pd.DataFrame,
            perfRuntimes2: pd.DataFrame,
            instanceRuns: np.ndarray,
            samples: int,
            numberOfSamples: int) -> np.ndarray:
    """
        Resamples performances using hierarchical bootstrapping for building confindence intervals

        Builds a tensor of random indices of a form numberOfSamples * instanceRunsNumber * suiteRunsNumber * numberOfIterations.
        Then uses these indices to choose from a performance runtimes tensor with a form instanceRunsNumber * suiteRunsNumber * numberOfIterations.
        Finally, reshapes resulting tensor to a matrix of form numberOfSamples * ( instanceRunsNumber * suiteRunsNumber * numberOfIterations)
        and calculates performance differences between medians.

        Parameters
        ----------
        perfRuntimes1 : performance runtimes of the first version.
        perfRuntimes2 : performance runtimes of the second version.
        instanceRuns : array of instanceRun numbers
        samples : number of microbenchmark iterations

        Returns
        -------
        np.ndarray
            array of performance differences with a shape numberOfSample * 1.

        """
    instanceRunsNumber = instanceRuns.shape[0]

    allRuntimes1 = np.ndarray((instanceRunsNumber, samples))
    allRuntimes2 = np.ndarray((instanceRunsNumber, samples))


    for instanceRun in instanceRuns:
        allRuntimes1[instanceRun - 1]= perfRuntimes1.loc[(perfRuntimes1['run'] == instanceRun),
                                                'latency (ms)'].to_numpy()
        allRuntimes2[instanceRun - 1]= perfRuntimes2.loc[(perfRuntimes2['run'] == instanceRun),
                                                'latency (ms)'].to_numpy()

    medians = []
    for i in range(numberOfSamples):
        if (i % 1000 == 0):
            print(f"i is {i}")
        #Generate Random Arrays
        currentInstanceRun = rng.choice(instanceRuns, size=(instanceRunsNumber)) - 1
        currentRuntimes1 = rng.integers(samples, size=(samples, instanceRunsNumber))
        currentRuntimes2 = rng.integers(samples, size=(samples, instanceRunsNumber))
        #Bulk selection
        tmp1 = allRuntimes1[currentInstanceRun, currentRuntimes1]
        tmp1 = tmp1.reshape((instanceRunsNumber * samples))

        tmp2 = allRuntimes2[currentInstanceRun, currentRuntimes2]
        tmp2 = tmp2.reshape((instanceRunsNumber * samples))

        # Get median for both lists
        med1 = np.median(tmp1, axis=0)
        med2 = np.median(tmp2, axis=0)
        medians.append(med2/med1)

    return medians


# For each type
for type in df_latencies.type.unique():
    print(f"Running analysis for type {type}...")

    benchmarkMeasurements = df_latencies.loc[(df_latencies['type'].str.startswith(type, na=False))]
    instanceRuns = benchmarkMeasurements.run.unique()
    numberOfInstanceRuns = len(instanceRuns)
    results = []

    #Find median perf. change
    perfRuntimes1 = benchmarkMeasurements.loc[(benchmarkMeasurements['version'] == "base")]
    perfRuntimes2 = benchmarkMeasurements.loc[(benchmarkMeasurements['version'] == "variation")]

    elements1 = perfRuntimes1['latency (ms)'].shape[0]
    elements2 = perfRuntimes2['latency (ms)'].shape[0]
    perf1 = perfRuntimes1['latency (ms)'].median()
    perf2 = perfRuntimes2['latency (ms)'].median()
    # Compare both (e.g., 10ms in ver1 and 12ms in ver2 => 12/10 = 1.2 (>1 -> regression)
    perfChange = ((perf2/perf1) - 1) * 100

    print(f"    Found median performance change ({perfChange}).")

    # Run Bootstrapping
    # R stores the 10.000 median values
    R = resample(perfRuntimes1=perfRuntimes1,
            perfRuntimes2=perfRuntimes2,
            instanceRuns=instanceRuns,
            samples=int(len(perfRuntimes1) / numberOfInstanceRuns),
            numberOfSamples=numberOfSamples)

    print(f"    Bootstrapping done ({len(R)} elements in R).")

    # Find conf. intervals
    R.sort()

    small = int((numberOfSamples * CIsmall) / 100 / 2)
    if small == 0:
        small  = 1
    medium = int((numberOfSamples * CImed) / 100 / 2)
    large = int((numberOfSamples * CIlarge) / 100 / 2)

    minSmall = R[small-1]
    minSmall = (minSmall - 1) * 100
    maxSmall = R[numberOfSamples-small-1]
    maxSmall = (maxSmall - 1) * 100

    minMedium = R[medium-1]
    minMedium = (minMedium - 1) * 100
    maxMedium = R[numberOfSamples-medium-1]
    maxMedium = (maxMedium - 1) * 100

    minLarge = R[large-1]
    minLarge = (minLarge - 1) * 100
    maxLarge = R[numberOfSamples-large-1]
    maxLarge = (maxLarge - 1) * 100

    print(f"    Found conf. intervals ([{minSmall}, {maxSmall}],[{minMedium}, {maxMedium}],[{minLarge}, {maxLarge}]).")

Running analysis for type group-by queries...
    Found median performance change (0.09106802734279995).
i is 0
i is 1000
i is 2000
i is 3000
i is 4000
i is 5000
i is 6000
i is 7000
i is 8000
i is 9000
    Bootstrapping done (10000 elements in R).
    Found conf. intervals ([-1.2935316574425815, 1.3307707008986203],[-0.9215379831507553, 0.9454296308481824],[-0.7651213336614893, 0.7680668177489025]).
Running analysis for type inserts...
    Found median performance change (-0.817209054571566).
i is 0
i is 1000
i is 2000
i is 3000
i is 4000
i is 5000
i is 6000
i is 7000
i is 8000
i is 9000
    Bootstrapping done (10000 elements in R).
    Found conf. intervals ([-0.5637702047301185, 0.5682462000334843],[-0.43302436793090093, 0.4457082727262529],[-0.3660247858768617, 0.3758903405880698]).
Running analysis for type simple queries...
    Found median performance change (0.4696589666363993).
i is 0
i is 1000
i is 2000
i is 3000
i is 4000
i is 5000
i is 6000
i is 7000
i is 8000
i is 9000
    