In [1]:
import pandas as pd
import numpy as np
from numpy.random import default_rng
import seaborn as sns
import os
from os import path
sns.set()
rng = default_rng()

In [2]:
numberOfSamples = 10000
filename = "../../results_all/micro_history_full_influxdb.csv"
outFolderName = "../../results_all/micro_bootstrapping_influx"

#filename = "../../results_all/micro_history_opti_influxdb.csv"
#outFolderName = "../../results_all/micro_bootstrapping_influx_opti"
summaryFileName = "summaryInflux.csv"

debug = True
assertRelevances = False
# iterations of microbenchmarks
numberOfIterations = 5
CIsmall = 1

initialThreshold=6
slidingAvg=3

relevances = {
    "BenchmarkCreateIterator":9.848,
    "BenchmarkDecodeFloatArrayBlock":0.758,
    "BenchmarkIndex_IndexFile_TagValueSeriesIDIterator":2.879,
    "BenchmarkIntegerArrayDecodeAllPackedSimple":0.455,
    "BenchmarkWritePoints_NewSeries_100_Measurements_1_TagKey_1_TagValue":11.818
}

relevances = {
    "BenchmarkCreateIterator":1013.900,
    "BenchmarkDecodeFloatArrayBlock":251.210,
    "BenchmarkIndex_IndexFile_TagValueSeriesIDIterator":852.130,
    "BenchmarkIntegerArrayDecodeAllPackedSimple":116.800,
    "BenchmarkWritePoints_NewSeries_100_Measurements_1_TagKey_1_TagValue":942.970
}

In [3]:
#! mkdir -p {outFolderName} # type: ignore

In [4]:
def resample(perfRuntimes1: pd.DataFrame,
            perfRuntimes2: pd.DataFrame,
            instanceRuns: np.ndarray,
            suiteRuns: np.ndarray,
            numberOfIterations: int,
            numberOfSamples: int) -> np.ndarray:
    """
        Resamples performances using hierarchical bootstrapping for building confindence intervals

        Builds a tensor of random indices of a form numberOfSamples * instanceRunsNumber * suiteRunsNumber * numberOfIterations.
        Then uses these indices to choose from a performance runtimes tensor with a form instanceRunsNumber * suiteRunsNumber * numberOfIterations.
        Finally, reshapes resulting tensor to a matrix of form numberOfSamples * ( instanceRunsNumber * suiteRunsNumber * numberOfIterations)
        and calculates performance differences between medians.

        Parameters
        ----------
        perfRuntimes1 : performance runtimes of the first version.
        perfRuntimes2 : performance runtimes of the second version.
        instanceRuns : array of instanceRun numbers
        suiteRuns : array of suiteRun numbers
        numberOfIterations : number of microbenchmark iterations

        Returns
        -------
        np.ndarray
            array of performance differences with a shape numberOfSample * 1.

        """
    instanceRunsNumber = instanceRuns.shape[0]
    suiteRunsNumber = suiteRuns.shape[0]

    allRuntimes1 = np.ndarray((instanceRunsNumber, suiteRunsNumber, numberOfIterations))
    allRuntimes2 = np.ndarray((instanceRunsNumber, suiteRunsNumber, numberOfIterations))

    #aggregate measurements from all instance and suite runs
    for instanceRun in instanceRuns:
        for suiteRun in suiteRuns:
            allRuntimes1[instanceRun - 1][suiteRun] = perfRuntimes1.loc[(perfRuntimes1['instanceRun'] == instanceRun)
                                                    & (perfRuntimes1['suiteRun'] == suiteRun),
                                                    'runtime'].to_numpy()
            allRuntimes2[instanceRun - 1][suiteRun] = perfRuntimes2.loc[(perfRuntimes2['instanceRun'] == instanceRun)
                                                    & (perfRuntimes2['suiteRun'] == suiteRun),
                                                    'runtime'].to_numpy()
    #Generate Random Arrays
    currentInstanceRun = rng.choice(instanceRuns, size=(instanceRunsNumber, numberOfSamples)) - 1
    currentSuiteRun = rng.choice(suiteRuns, size=(suiteRunsNumber, instanceRunsNumber, numberOfSamples))
    currentRuntimes1 = rng.integers(numberOfIterations, size=(numberOfIterations, suiteRunsNumber, instanceRunsNumber, numberOfSamples))
    currentRuntimes2 = rng.integers(numberOfIterations, size=(numberOfIterations, suiteRunsNumber, instanceRunsNumber, numberOfSamples))

    #Bulk selection
    tmp1 = allRuntimes1[currentInstanceRun, currentSuiteRun, currentRuntimes1]
    tmp1 = tmp1.reshape((numberOfSamples, suiteRunsNumber * instanceRunsNumber * numberOfIterations))

    tmp2 = allRuntimes2[currentInstanceRun, currentSuiteRun, currentRuntimes2]
    tmp2 = tmp2.reshape((numberOfSamples, suiteRunsNumber * instanceRunsNumber * numberOfIterations))
    # Get median for both lists
    med1 = np.median(tmp1, axis=1)
    med2 = np.median(tmp2, axis=1)

    return med2/med1

def bootstrap(perfRuntimes1: pd.DataFrame,
            perfRuntimes2: pd.DataFrame):

    instanceRuns = perfRuntimes1.instanceRun.unique()
    numberOfInstanceRuns = len(instanceRuns)

    suiteRuns = perfRuntimes1.suiteRun.unique()
    numberOfSuiteRuns = len(suiteRuns)

    print(f"    Found {numberOfInstanceRuns} instance runs")
    R = resample(perfRuntimes1=perfRuntimes1,
                 perfRuntimes2=perfRuntimes2,
                 instanceRuns=instanceRuns,
                 suiteRuns=suiteRuns,
                 numberOfIterations=5,
                 numberOfSamples=numberOfSamples)

    print(f"    Bootstrapping done ({len(R)} elements in R).")

    # Find conf. intervals
    R.sort()

    small = int((numberOfSamples * CIsmall) / 100 / 2)
    if small == 0:
        small  = 1

    minSmall = R[small-1]
    minSmall = (minSmall - 1) * 100
    maxSmall = R[numberOfSamples-small-1]
    maxSmall = (maxSmall - 1) * 100
    return minSmall, maxSmall

In [7]:
df_all = pd.read_csv(filename,sep=";")


for commit in df_all.number.unique():
    number = 0
    tmp = df_all.loc[(df_all['number'] == commit)]
    num = len(tmp.name.unique())
    number += num
    print(f"Commit {commit}: {number} microbenchmarks")

Commit 100: 109 microbenchmarks
Commit 105: 109 microbenchmarks
Commit 110: 109 microbenchmarks
Commit 15: 426 microbenchmarks
Commit 20: 426 microbenchmarks
Commit 25: 426 microbenchmarks
Commit 30: 426 microbenchmarks
Commit 35: 426 microbenchmarks
Commit 40: 426 microbenchmarks
Commit 45: 426 microbenchmarks
Commit 50: 426 microbenchmarks
Commit 55: 424 microbenchmarks
Commit 60: 414 microbenchmarks
Commit 65: 414 microbenchmarks
Commit 70: 414 microbenchmarks
Commit 75: 414 microbenchmarks
Commit 80: 414 microbenchmarks
Commit 85: 412 microbenchmarks
Commit 90: 109 microbenchmarks
Commit 95: 109 microbenchmarks


In [6]:

summary = []

# For each microbenchmark
for name in df_all.name.unique():
    print(f"Running analysis for benchmark {name}...")

    benchmarkMeasurements = df_all.loc[(df_all['name'].str.startswith(name, na=False))]
    benchmarkMeasurements.sort_values(by=["number"], inplace=True)
    instanceRuns = benchmarkMeasurements.instanceRun.unique()
    suiteRuns = benchmarkMeasurements.suiteRun.unique()


    if len(benchmarkMeasurements) > 50:
        printName = name.replace("/","-")
        printName = printName.replace("/","-")
        printName = printName.replace("\\","-")
        benchmarkFilename = os.path.join(outFolderName, printName + ".csv")
        if path.exists(benchmarkFilename):
            print("  Already analyzed, skip.")
        else:
            results = []
            lastValues = []
            thresholds = []
            thresholds.append(initialThreshold)
            assert len(thresholds) > 0

            for commitNumber in benchmarkMeasurements.number.unique():
                print(f"  Running analysis for commit {commitNumber}...")

                #Find median perf. change
                perfRuntimes1 = benchmarkMeasurements.loc[(benchmarkMeasurements['number'] == commitNumber)
                                                    & (benchmarkMeasurements['version'] == 1)]
                perfRuntimes2 = benchmarkMeasurements.loc[(benchmarkMeasurements['number'] == commitNumber)
                                                    & (benchmarkMeasurements['version'] == 2)]


                elements1 = perfRuntimes1['runtime'].shape[0]
                elements2 = perfRuntimes2['runtime'].shape[0]
                if (elements1 == 45) & (elements2 == 45):

                    perf1 = perfRuntimes1['runtime'].median()
                    perf2 = perfRuntimes2['runtime'].median()
                    # Compare both (e.g., 10ms in ver1 and 12ms in ver2 => 12/10 = 1.2 (>1 -> regression)
                    change = ((perf2/perf1) - 1) * 100

                    print(f"    Found median performance change ({change}).")

                    # Run Bootstrapping
                    min, max = bootstrap(perfRuntimes1, perfRuntimes2)
                    instability = max - min
                    print(f"    Min: {min} Max: {max} (Width: {instability})")

                    assert max >= change
                    assert change >= min

                    lastValues.append({
                        "commit":int(commitNumber),
                        "min": min,
                        "med": change,
                        "max": max})
                    thresholds.append(instability)

                    if (len(thresholds) > slidingAvg):
                        thresholds.pop(0)

                    #Calculate sliding threshold
                    threshold = 0
                    if (len(thresholds) > 2):
                        sumOfPrevThresholds = 0
                        for val in thresholds:
                            sumOfPrevThresholds += val
                        threshold = (sumOfPrevThresholds / len(thresholds))  * 0.75

                    if threshold < 1:
                        threshold = 1

                    # Jump detection
                    jump = ""
                    if (len(lastValues) > 1):
                        currVal = lastValues[len(lastValues)-1]['med']
                        prevVal = lastValues[len(lastValues)-2]['med']
                        diff = currVal - prevVal
                        #print(f"diff is {diff}. {prevVal} -> {currVal}")
                        if (diff > threshold):
                            jump = "potential up"
                        if ((-1 * diff) > threshold):
                            jump = "potential down"
                        if (jump != ""):
                            # check CIs
                            currMin = lastValues[len(lastValues)-1]['min']
                            currMax = lastValues[len(lastValues)-1]['max']
                            prevMin = lastValues[len(lastValues)-2]['min']
                            prevMax = lastValues[len(lastValues)-2]['max']
                            if (currMin > prevMax):
                                jump = "definite up"
                            if (currMax < prevMin):
                                jump = "definite down"

                    if (jump != ""):
                        print(f"    Found {jump} jump at commit {commitNumber}.")

                    # Trend detection
                    trend = ""
                    #Clear values if there is a definite jump
                    if (jump.startswith("definite")):
                        lastValues = lastValues[-1:]

                    if (len(lastValues) > 2):
                        currVal = lastValues[len(lastValues)-1]['med']
                        sumOfPrevVals = 0
                        for val in lastValues[:-1]:
                            sumOfPrevVals += val['med']
                        diff = currVal - (sumOfPrevVals / (len(lastValues)-1))
                        #print(f"diff is {diff}. {sumOfPrevVals / slidingAvg} -> {currVal}")
                        if (diff > threshold):
                            trend = "potential up"
                        if ((-1 * diff) > threshold):
                            trend = "potential down"
                        if (trend != ""):
                            currMin = lastValues[len(lastValues)-1]['min']
                            currMax = lastValues[len(lastValues)-1]['max']
                            for val in lastValues[:-1]:
                                prevMin = val['min']
                                prevMax = val['max']
                                if (currMin > prevMax):
                                    trend = "definite up"
                                if (currMax < prevMin):
                                    trend = "definite down"


                    if (trend != ""):
                        print(f"    Found {trend} trend at commit {commitNumber}.")

                    # Remove first element
                    if (len(lastValues) > slidingAvg):
                        lastValues.pop(0)

                    #Store values
                    results.append({
                                    "name" : name,
                                    "commit" : int(commitNumber),
                                    "min" : min,
                                    "med" : change,
                                    "max" : max,
                                    "jump": jump,
                                    "trend": trend
                                    })

                    if (jump != "" or trend != ""):
                        relevance = -1
                        for n, r in relevances.items():
                            if name.startswith(n):
                                relevance = r
                        if assertRelevances:
                            assert relevance >= 0
                        summary.append({
                            "name" : name,
                            "commit" : int(commitNumber),
                            "relevance" : relevance,
                            "jump": jump,
                            "trend": trend
                        })
                else:
                    print(f"  Skip {name};{commitNumber}, only ({elements1}, {elements2}) elements.")

            #Store results for this benchmark to file
            # Convert to data frame
            df_results = pd.DataFrame(results)
            if len(results) > 0:
                df_results.sort_values(by=["name","commit"], inplace=True)
                df_results.describe()

                #Create folder
                #Save file
                df_results.to_csv(benchmarkFilename, sep=";")

    else:
        print(f"  Skip {name}, only {len(benchmarkMeasurements)} measurments.")

Running analysis for benchmark BenchmarkAppendUnescaped-2...
  Already analyzed, skip.
Running analysis for benchmark BenchmarkBlockTypeToInfluxQLDataType-2...
  Already analyzed, skip.
Running analysis for benchmark BenchmarkBooleanArrayDecodeAll/1-2...
  Already analyzed, skip.
Running analysis for benchmark BenchmarkBooleanArrayDecodeAll/1000-2...
  Already analyzed, skip.
Running analysis for benchmark BenchmarkBooleanArrayDecodeAll/55-2...
  Already analyzed, skip.
Running analysis for benchmark BenchmarkBooleanArrayDecodeAll/555-2...
  Already analyzed, skip.
Running analysis for benchmark BenchmarkBooleanDecoder_DecodeAll/1-2...
  Already analyzed, skip.
Running analysis for benchmark BenchmarkBooleanDecoder_DecodeAll/1000-2...
  Already analyzed, skip.
Running analysis for benchmark BenchmarkBooleanDecoder_DecodeAll/55-2...
  Already analyzed, skip.
Running analysis for benchmark BenchmarkBooleanDecoder_DecodeAll/555-2...
  Already analyzed, skip.
Running analysis for benchmark

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  benchmarkMeasurements.sort_values(by=["number"], inplace=True)


KeyboardInterrupt: 

In [None]:
df_summary = pd.DataFrame(summary)
if len(summary) > 0:
    df_summary.sort_values(by=["commit"], inplace=True)
    df_summary.describe()

    #Create folder
    #Save file
    df_summary.to_csv(outFolderName + "/" + summaryFileName, sep=";")

print("Done.")
