In [1]:
import pandas as pd
import glob
from pathlib import Path
import numpy as np
from numpy.random import default_rng
rng = default_rng()


folder = "../../results_all/app_bootstrap_influx/"
resultfile = "../../results_aggr/app_bootstrap_influx.csv"

initialInsert=1
initialSimpleQuery=3
initialGroupQuery=2
slidingAvg=10
print(folder)


all_files = glob.glob(folder + "/*")
rows = []

for file in all_files:
    print(f"Paring file {file} ...")

    values = pd.read_csv(file)

    for index, row in values.iterrows():
        rows.append({
            "commit":row["commit"],
            "type": row["type"],
            "min": row["min"],
            "med": row["med"],
            "max": row["max"]
        })

print(f"Parsing complete")

../../results_all/app_bootstrap_influx/
Paring file ../../results_all/app_bootstrap_influx\app_bootstrap_influxdb_1.csv ...
Paring file ../../results_all/app_bootstrap_influx\app_bootstrap_influxdb_10.csv ...
Paring file ../../results_all/app_bootstrap_influx\app_bootstrap_influxdb_100.csv ...
Paring file ../../results_all/app_bootstrap_influx\app_bootstrap_influxdb_101.csv ...
Paring file ../../results_all/app_bootstrap_influx\app_bootstrap_influxdb_102.csv ...
Paring file ../../results_all/app_bootstrap_influx\app_bootstrap_influxdb_103.csv ...
Paring file ../../results_all/app_bootstrap_influx\app_bootstrap_influxdb_104.csv ...
Paring file ../../results_all/app_bootstrap_influx\app_bootstrap_influxdb_105.csv ...
Paring file ../../results_all/app_bootstrap_influx\app_bootstrap_influxdb_106.csv ...
Paring file ../../results_all/app_bootstrap_influx\app_bootstrap_influxdb_107.csv ...
Paring file ../../results_all/app_bootstrap_influx\app_bootstrap_influxdb_108.csv ...
Paring file ../..

In [2]:
print(f"Create dataframe ...")
df_values = pd.DataFrame(rows)
df_values.sort_values(by=["commit"], inplace=True)

print(f"Detect performance changes ...")
rows = []

for type in df_values.type.unique():
    df_measurements = df_values.loc[(df_values['type'].str.startswith(type, na=False))]
    df_measurements.sort_values(by=["commit"], inplace=True)

    thresholds = []
    lastValues = []

    jumpThreshold=-1
    trendThreshold=-1
    if (type == "inserts"):
        thresholds.append(initialInsert)
    elif (type == "simple queries"):
        thresholds.append(initialSimpleQuery)
    elif (type == "group-by queries"):
        thresholds.append(initialGroupQuery)

    assert len(thresholds) > 0

    for index, row in df_measurements.iterrows():
        min = row["min"]
        max = row["max"]
        instability = max - min

        lastValues.append({
            "commit":row["commit"],
            "min": row["min"],
            "med": row["med"],
            "max": row["max"],
        })
        thresholds.append(instability)

        if (len(thresholds) > slidingAvg):
            thresholds.pop(0)

        #Calculate sliding threshold
        threshold = 0
        if (len(thresholds) > 2):
            sumOfPrevThresholds = 0
            for val in thresholds:
                sumOfPrevThresholds += val
            threshold = (sumOfPrevThresholds / len(thresholds))  * 0.75

        if threshold < 1:
            threshold = 1

        # Jump detection
        jump = ""
        if (len(lastValues) > 1):
            currVal = lastValues[len(lastValues)-1]['med']
            prevVal = lastValues[len(lastValues)-2]['med']
            diff = currVal - prevVal
            #print(f"diff is {diff}. {prevVal} -> {currVal}")
            if (diff > threshold):
                jump = "potential up"
            if ((-1 * diff) > threshold):
                jump = "potential down"
            if (jump != ""):
                # check CIs
                currMin = lastValues[len(lastValues)-1]['min']
                currMax = lastValues[len(lastValues)-1]['max']
                prevMin = lastValues[len(lastValues)-2]['min']
                prevMax = lastValues[len(lastValues)-2]['max']
                if (currMin > prevMax):
                    jump = "definite up"
                if (currMax < prevMin):
                    jump = "definite down"

        if (jump != ""):
            print(f"Found {jump} jump at commit {row['commit']} for type {row['type']}.")

        # Trend detection
        trend = ""
        #Clear values if there is a definite jump
        if (jump.startswith("definite")):
            lastValues = lastValues[-1:]

        if (len(lastValues) > 2):
            currVal = lastValues[len(lastValues)-1]['med']
            sumOfPrevVals = 0
            for val in lastValues[:-1]:
                sumOfPrevVals += val['med']
            diff = currVal - (sumOfPrevVals / (len(lastValues)-1))
            #print(f"diff is {diff}. {sumOfPrevVals / slidingAvg} -> {currVal}")
            if (diff > threshold):
                trend = "potential up"
            if ((-1 * diff) > threshold):
                trend = "potential down"
            if (trend != ""):
                currMin = lastValues[len(lastValues)-1]['min']
                currMax = lastValues[len(lastValues)-1]['max']
                for val in lastValues[:-1]:
                    prevMin = val['min']
                    prevMax = val['max']
                    if (currMin > prevMax):
                        trend = "definite up"
                    if (currMax < prevMin):
                        trend = "definite down"


        if (trend != ""):
            print(f"Found {trend} trend at commit {row['commit']} for type {row['type']}.")

        # Remove first element
        if (len(lastValues) > slidingAvg):
            lastValues.pop(0)

        rows.append({
            "commit":row["commit"],
            "type": row["type"],
            "min": row["min"],
            "med": row["med"],
            "max": row["max"],
            "jump": jump,
            "trend": trend
        })

print("Done.")

Create dataframe ...
Detect performance changes ...
Found potential down jump at commit 46 for type inserts.
Found definite up jump at commit 48 for type inserts.
Found potential up jump at commit 51 for type inserts.
Found definite up jump at commit 60 for type inserts.
Found potential down jump at commit 62 for type inserts.
Found definite down trend at commit 63 for type inserts.
Found potential up jump at commit 70 for type inserts.
Found potential down jump at commit 71 for type inserts.
Found definite down trend at commit 87 for type inserts.
Found potential up jump at commit 88 for type inserts.
Found definite up trend at commit 95 for type inserts.
Found potential down jump at commit 111 for type inserts.
Found definite up trend at commit 28 for type simple queries.
Found definite down jump at commit 29 for type simple queries.
Found potential up jump at commit 32 for type simple queries.
Found definite up trend at commit 32 for type simple queries.
Found definite down jump at 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_measurements.sort_values(by=["commit"], inplace=True)


In [3]:
print(f"Save to file ...")
df_result = pd.DataFrame(rows)
df_result.sort_values(by=["commit"], inplace=True)
df_result.to_csv(str(resultfile))
print("Done.")




Save to file ...
Done.
