In [4]:
import pandas as pd
import glob
from pathlib import Path
import numpy as np
from numpy.random import default_rng
rng = default_rng()


folder = "../../results_all/app_bootstrap_vm/"
resultfile = "../../results_aggr/app_bootstrap_vm.csv"

insertJump=3
simpleQueryJump=1
groupQueryJump=1

slidingAvg=10
insertTrend=3
simpleQueryTrend=1
groupQueryTrend=1
print(folder)


all_files = glob.glob(folder + "/*")
rows = []

for file in all_files:
    print(f"Paring file {file}")

    values = pd.read_csv(file)

    for index, row in values.iterrows():
        rows.append({
            "commit":row["commit"],
            "type": row["type"],
            "min": row["min"],
            "med": row["med"],
            "max": row["max"]
        })

print(f"Parsing complete")

../../results_all/app_bootstrap_vm/
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_1.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_10.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_11.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_12.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_13.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_14.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_15.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_16.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_17.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_18.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_19.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_2.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_20.csv
Paring file ../../results_all/app_bootstrap

In [5]:
print(f"Create dataframe ...")
df_values = pd.DataFrame(rows)
df_values.sort_values(by=["commit"], inplace=True)

print(f"Detect performance changes ...")
rows = []

for type in df_values.type.unique():
    df_measurements = df_values.loc[(df_values['type'].str.startswith(type, na=False))]
    df_measurements.sort_values(by=["commit"], inplace=True)

    jumpThreshold=-1
    trendThreshold=-1
    if (type == "inserts"):
        jumpThreshold=insertJump
        trendThreshold=insertTrend
    elif (type == "simple queries"):
        jumpThreshold=simpleQueryJump
        trendThreshold=simpleQueryTrend
    elif (type == "group-by queries"):
        jumpThreshold=groupQueryJump
        trendThreshold=groupQueryTrend

    assert jumpThreshold > 0
    assert  trendThreshold > 0

    lastValues = []

    for index, row in df_measurements.iterrows():
        lastValues.append({
            "commit":row["commit"],
            "min": row["min"],
            "med": row["med"],
            "max": row["max"],
        })

        # Jump detection
        jump = ""
        if (len(lastValues) > 1):
            currVal = lastValues[len(lastValues)-1]['med']
            prevVal = lastValues[len(lastValues)-2]['med']
            diff = currVal - prevVal
            #print(f"diff is {diff}. {prevVal} -> {currVal}")
            if (diff > jumpThreshold):
                jump = "potential up"
            if ((-1 * diff) > jumpThreshold):
                jump = "potential down"
            if (jump != ""):
                # check CIs
                currMin = lastValues[len(lastValues)-1]['min']
                currMax = lastValues[len(lastValues)-1]['max']
                prevMin = lastValues[len(lastValues)-2]['min']
                prevMax = lastValues[len(lastValues)-2]['max']
                if (currMin > prevMax):
                    jump = "definite up"
                if (currMax < prevMin):
                    jump = "definite down"

        if (jump != ""):
            print(f"Found {jump} jump at commit {row['commit']} for type {row['type']}.")

        # Trend detection
        trend = ""
        #Clear values if there is a definite jump
        if (jump.startswith("definite")):
            lastValues = lastValues[-1:]

        if (len(lastValues) > 2):
            currVal = lastValues[len(lastValues)-1]['med']
            sumOfPrevVals = 0
            for val in lastValues[:-1]:
                sumOfPrevVals += val['med']
            diff = currVal - (sumOfPrevVals / (len(lastValues)-1))
            #print(f"diff is {diff}. {sumOfPrevVals / slidingAvg} -> {currVal}")
            if (diff > trendThreshold):
                trend = "potential up"
            if ((-1 * diff) > trendThreshold):
                trend = "potential down"
            if (trend != ""):
                currMin = lastValues[len(lastValues)-1]['min']
                currMax = lastValues[len(lastValues)-1]['max']
                for val in lastValues[:-1]:
                    prevMin = val['min']
                    prevMax = val['max']
                    if (currMin > prevMax):
                        trend = "definite up"
                    if (currMax < prevMin):
                        trend = "definite down"


        if (trend != ""):
            print(f"Found {trend} trend at commit {row['commit']} for type {row['type']}.")

        # Remove first element
        if (len(lastValues) > slidingAvg):
            lastValues.pop(0)

        rows.append({
            "commit":row["commit"],
            "type": row["type"],
            "min": row["min"],
            "med": row["med"],
            "max": row["max"],
            "jump": jump,
            "trend": trend
        })

print("Done.")

Create dataframe ...
Detect performance changes ...
Found potential down jump at commit 29 for type inserts.
Found definite up trend at commit 64 for type inserts.
Found definite up trend at commit 65 for type inserts.
Found definite up trend at commit 66 for type inserts.
Found definite up trend at commit 67 for type inserts.
Found definite up trend at commit 68 for type inserts.
Found potential up jump at commit 9 for type simple queries.
Found potential up trend at commit 9 for type simple queries.
Found potential down jump at commit 10 for type simple queries.
Found potential up trend at commit 10 for type simple queries.
Found definite down jump at commit 11 for type simple queries.
Found potential down trend at commit 13 for type simple queries.
Found definite down trend at commit 14 for type simple queries.
Found definite down trend at commit 16 for type simple queries.
Found definite down trend at commit 17 for type simple queries.
Found potential down jump at commit 59 for typ

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_measurements.sort_values(by=["commit"], inplace=True)


In [6]:
print(f"Save to file ...")
df_result = pd.DataFrame(rows)
df_result.sort_values(by=["commit"], inplace=True)
df_result.to_csv(str(resultfile))
print("Done.")




Save to file ...
Done.
