In [7]:
import pandas as pd
import glob
from pathlib import Path
import numpy as np
from numpy.random import default_rng
rng = default_rng()


folder = "../../results_all/app_bootstrap_vm/"
resultfile = "../../results_aggr/app_bootstrap_vm.csv"
threshold = 2
print(folder)


all_files = glob.glob(folder + "/*")
rows = []

for file in all_files:
    print(f"Paring file {file}")

    values = pd.read_csv(file)

    for index, row in values.iterrows():
        rows.append({
            "commit":row["commit"],
            "type": row["type"],
            "min": row["min"],
            "med": row["med"],
            "max": row["max"]
        })

print(f"Parsing complete")

../../results_all/app_bootstrap_vm/
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_1.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_10.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_11.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_12.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_13.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_14.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_15.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_16.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_17.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_18.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_19.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_2.csv
Paring file ../../results_all/app_bootstrap_vm\app_bootstrap_vm_20.csv
Paring file ../../results_all/app_bootstrap

In [8]:
print(f"Create dataframe ...")
df_values = pd.DataFrame(rows)
df_values.sort_values(by=["commit"], inplace=True)

print(f"Detect performance changes ...")
rows = []

for type in df_values.type.unique():
    df_measurements = df_values.loc[(df_values['type'].str.startswith(type, na=False))]
    df_measurements.sort_values(by=["commit"], inplace=True)

    lastMin = None
    lastMax = None

    for index, row in df_measurements.iterrows():
        if (lastMin == None) :
            lastMin = row["min"]
            lastMax = row["max"]

        min = row["min"]
        max = row["max"]

        change = None
        if ((min > lastMax) & ((min - lastMax) > threshold)):
            change = "Up"
            lastMin = min
            lastMax = max
        else:
            if ((max < lastMin) & ((lastMin - max) > threshold)):
                change = "Down"
                lastMin = min
                lastMax = max

        if (change != None):
            print(f"Found {change} at commit {row['commit']} for type {row['type']}.")

        rows.append({
            "commit":row["commit"],
            "type": row["type"],
            "min": row["min"],
            "med": row["med"],
            "max": row["max"],
            "change": change
        })

print("Done.")

Create dataframe ...
Detect performance changes ...
Found Up at commit 68 for type inserts.
Found Down at commit 12 for type simple queries.
Found Down at commit 13 for type group-by queries.
Done.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_measurements.sort_values(by=["commit"], inplace=True)


In [9]:
print(f"Save to file ...")
df_result = pd.DataFrame(rows)
df_result.sort_values(by=["commit"], inplace=True)
df_result.to_csv(str(resultfile))
print("Done.")




Save to file ...
Done.
