In [7]:
import pandas as pd
import numpy as np
from numpy.random import default_rng
import seaborn as sns
import os
from os import path
sns.set()
rng = default_rng()

In [8]:
numberOfSamples = 10000
#filename = "../../results_all/micro_history_full_influxdb.csv"
#outFolderName = "../../results_all/micro_bootstrapping_influx"

filename = "../../results_all/micro_history_opti_influxdb.csv"
outFolderName = "../../results_all/micro_bootstrapping_influx_opti"

debug = False
# iterations of microbenchmarks
numberOfIterations = 5
CIsmall = 1
CImed = 5
CIlarge = 10


In [9]:
#! mkdir -p {outFolderName} # type: ignore

In [10]:
def resample(perfRuntimes1: pd.DataFrame,
            perfRuntimes2: pd.DataFrame,
            instanceRuns: np.ndarray,
            suiteRuns: np.ndarray,
            numberOfIterations: int,
            numberOfSamples: int) -> np.ndarray:
    """
        Resamples performances using hierarchical bootstrapping for building confindence intervals

        Builds a tensor of random indices of a form numberOfSamples * instanceRunsNumber * suiteRunsNumber * numberOfIterations.
        Then uses these indices to choose from a performance runtimes tensor with a form instanceRunsNumber * suiteRunsNumber * numberOfIterations.
        Finally, reshapes resulting tensor to a matrix of form numberOfSamples * ( instanceRunsNumber * suiteRunsNumber * numberOfIterations)
        and calculates performance differences between medians.

        Parameters
        ----------
        perfRuntimes1 : performance runtimes of the first version.
        perfRuntimes2 : performance runtimes of the second version.
        instanceRuns : array of instanceRun numbers
        suiteRuns : array of suiteRun numbers
        numberOfIterations : number of microbenchmark iterations

        Returns
        -------
        np.ndarray
            array of performance differences with a shape numberOfSample * 1.

        """
    instanceRunsNumber = instanceRuns.shape[0]
    suiteRunsNumber = suiteRuns.shape[0]

    allRuntimes1 = np.ndarray((instanceRunsNumber, suiteRunsNumber, numberOfIterations))
    allRuntimes2 = np.ndarray((instanceRunsNumber, suiteRunsNumber, numberOfIterations))


    for instanceRun in instanceRuns:
        for suiteRun in suiteRuns:
            allRuntimes1[instanceRun - 1][suiteRun] = perfRuntimes1.loc[(perfRuntimes1['instanceRun'] == instanceRun)
                                                    & (perfRuntimes1['suiteRun'] == suiteRun),
                                                    'runtime'].to_numpy()
            allRuntimes2[instanceRun - 1][suiteRun] = perfRuntimes2.loc[(perfRuntimes2['instanceRun'] == instanceRun)
                                                    & (perfRuntimes2['suiteRun'] == suiteRun),
                                                    'runtime'].to_numpy()

    currentInstanceRun = rng.choice(instanceRuns, size=(instanceRunsNumber, numberOfSamples)) - 1
    currentSuiteRun = rng.choice(suiteRuns, size=(suiteRunsNumber, instanceRunsNumber, numberOfSamples))
    currentRuntimes1 = rng.integers(numberOfIterations, size=(numberOfIterations, suiteRunsNumber, instanceRunsNumber, numberOfSamples))
    currentRuntimes2 = rng.integers(numberOfIterations, size=(numberOfIterations, suiteRunsNumber, instanceRunsNumber, numberOfSamples))

    tmp1 = allRuntimes1[currentInstanceRun, currentSuiteRun, currentRuntimes1]
    tmp1 = tmp1.reshape((numberOfSamples, suiteRunsNumber * instanceRunsNumber * numberOfIterations))

    tmp2 = allRuntimes2[currentInstanceRun, currentSuiteRun, currentRuntimes2]
    tmp2 = tmp2.reshape((numberOfSamples, suiteRunsNumber * instanceRunsNumber * numberOfIterations))
    # Get median for both lists
    med1 = np.median(tmp1, axis=1)
    med2 = np.median(tmp2, axis=1)

    return med2/med1

In [11]:
df_all = pd.read_csv(filename,sep=";")


# For each microbenchmark
for name in df_all.name.unique():
    print(f"Running analysis for benchmark {name}...")

    benchmarkMeasurements = df_all.loc[(df_all['name'].str.startswith(name, na=False))]

    instanceRuns = benchmarkMeasurements.instanceRun.unique()
    suiteRuns = benchmarkMeasurements.suiteRun.unique()


    if len(benchmarkMeasurements) > 50:
        printName = name.replace("/","-")
        printName = printName.replace("/","-")
        printName = printName.replace("\\","-")
        benchmarkFilename = os.path.join(outFolderName, printName + ".csv")
        if path.exists(benchmarkFilename):
            print("  Already analyzed, skip.")
        else:
            results = []
            # For each commit (number)
            for commitNumber in benchmarkMeasurements.number.unique():
                print(f"  Running analysis for commit {commitNumber}...")

                #Find median perf. change
                perfRuntimes1 = benchmarkMeasurements.loc[(benchmarkMeasurements['number'] == commitNumber)
                                                    & (benchmarkMeasurements['version'] == 1)]
                perfRuntimes2 = benchmarkMeasurements.loc[(benchmarkMeasurements['number'] == commitNumber)
                                                    & (benchmarkMeasurements['version'] == 2)]


                elements1 = perfRuntimes1['runtime'].shape[0]
                elements2 = perfRuntimes2['runtime'].shape[0]
                if (elements1 == 45) & (elements2 == 45):

                    perf1 = perfRuntimes1['runtime'].median()
                    perf2 = perfRuntimes2['runtime'].median()
                    # Compare both (e.g., 10ms in ver1 and 12ms in ver2 => 12/10 = 1.2 (>1 -> regression)
                    perfChange = ((perf2/perf1) - 1) * 100

                    print(f"    Found median performance change ({perfChange}).")

                    # Run Bootstrapping
                    # R stores the 10.000 median values
                    R = resample(perfRuntimes1=perfRuntimes1,
                            perfRuntimes2=perfRuntimes2,
                            instanceRuns=instanceRuns,
                            suiteRuns=suiteRuns,
                            numberOfIterations=numberOfIterations,
                            numberOfSamples=numberOfSamples)

                    print(f"    Bootstrapping done ({len(R)} elements in R).")

                    # Find conf. intervals
                    R.sort()

                    small = int((numberOfSamples * CIsmall) / 100 / 2)
                    if small == 0:
                        small  = 1
                    medium = int((numberOfSamples * CImed) / 100 / 2)
                    large = int((numberOfSamples * CIlarge) / 100 / 2)

                    minSmall = R[small-1]
                    minSmall = (minSmall - 1) * 100
                    maxSmall = R[numberOfSamples-small-1]
                    maxSmall = (maxSmall - 1) * 100

                    minMedium = R[medium-1]
                    minMedium = (minMedium - 1) * 100
                    maxMedium = R[numberOfSamples-medium-1]
                    maxMedium = (maxMedium - 1) * 100

                    minLarge = R[large-1]
                    minLarge = (minLarge - 1) * 100
                    maxLarge = R[numberOfSamples-large-1]
                    maxLarge = (maxLarge - 1) * 100

                    print(f"    Found conf. intervals ([{minSmall}, {maxSmall}],[{minMedium}, {maxMedium}],[{minLarge}, {maxLarge}]).")

                    #Store values
                    results.append({
                                    "name" : name,
                                    "number" : int(commitNumber),
                                    "medianPerfChange" : perfChange,
                                    "minSmall" : minSmall,
                                    "maxSmall" : maxSmall,
                                    "minMedium" :minMedium,
                                    "maxMedium" : maxMedium,
                                    "minLarge" : minLarge,
                                    "maxLarge" : maxLarge,
                                    })
                    print("    Values stored.")
                else:
                    print(f"  Skip {name};{commitNumber}, only ({elements1}, {elements2}) elements.")

            #Store results for this benchmark to file
            # Convert to data frame
            df_results = pd.DataFrame(results)
            if len(results) > 0:
                df_results.sort_values(by=["name","number"], inplace=True)
                df_results.describe()

                #Create folder
                #Save file
                df_results.to_csv(benchmarkFilename, sep=";")

    else:
        print(f"  Skip {name}, only {len(benchmarkMeasurements)} measurments.")

Running analysis for benchmark BenchmarkCreateIterator/tsi1_shards_1-2...
  Running analysis for commit 15...
    Found median performance change (0.0).
    Bootstrapping done (10000 elements in R).
    Found conf. intervals ([-1.3513513513513598, 1.9522776572667988],[-1.0014306151645225, 1.797268152408349],[-0.7930785868781487, 1.6582552271088735]).
    Values stored.
  Running analysis for commit 20...
    Found median performance change (-0.43604651162790775).
    Bootstrapping done (10000 elements in R).
    Found conf. intervals ([-1.161946259985469, 1.171303074670571],[-1.0159651669085723, 0.5818181818181944],[-0.9440813362381895, 0.43699927166787056]).
    Values stored.
  Running analysis for commit 25...
    Found median performance change (-0.07235890014470892).
    Bootstrapping done (10000 elements in R).
    Found conf. intervals ([-0.7913669064748108, 1.0167029774873049],[-0.6474820143884896, 0.7988380537400364],[-0.5050505050504972, 0.6526468455402501]).
    Values store

In [12]:
print("Done.")


Done.
