In [None]:
import os
from pathlib import Path

import pandas as pd
from matplotlib import pyplot as plt

# Get the old and new data directories
list_of_commits = list(Path("../../logs/performance/regression").glob("*/"))

list_of_commits.sort(key=os.path.getmtime, reverse=True)
list_of_commits = list_of_commits[:2]
list_of_files_new = list(Path(list_of_commits[0] / "all").glob("*.csv"))
list_of_files_old = list(Path(list_of_commits[1] / "all").glob("*.csv"))
print(f"Old commit: {list_of_commits[1]}")
print(f"New commit: {list_of_commits[0]}")

latest_file_new = max(list_of_files_new, key=os.path.getctime)
latest_file_old = max(list_of_files_old, key=os.path.getctime)


# format: timestamp,category,test_name,query,scenario,execution_time,results_consistent,fainder_mode,num_results,ids,num_terms,id_str

df_latest_new = pd.read_csv(latest_file_new)
df_latest_old = pd.read_csv(latest_file_old)


# Filter df_latest_old to only include records with matching fainder_modes from df_latest_new
df_latest_old = df_latest_old[
    df_latest_old["fainder_mode"].isin(df_latest_new["fainder_mode"].unique())
]


# assert num_results with the same test_name, query, scenario, fainder_mode
def compare_num_results(df1, df2):
    for i in range(len(df1)):
        test_name = df1.iloc[i]["test_name"]
        query = df1.iloc[i]["query"]
        scenario = df1.iloc[i]["scenario"]
        fainder_mode = df1.iloc[i]["fainder_mode"]
        num_results_1 = df1.iloc[i]["num_results"]
        try:
            num_results_2 = df2[
                (df2["test_name"] == test_name)
                & (df2["query"] == query)
                & (df2["scenario"] == scenario)
                & (df2["fainder_mode"] == fainder_mode)
            ]["num_results"].values[0]
        except IndexError:
            print(f"KeyError: {test_name}, {query}, {scenario}, {fainder_mode}")
            continue

        if num_results_1 != num_results_2:
            print(
                f"num_results mismatch: {test_name}, {query}, {scenario}, {fainder_mode}: {num_results_1} vs {num_results_2}"
            )
            continue
    return True


compare_num_results(df_latest_new, df_latest_old)

print(df_latest_new)

print(df_latest_old)

In [None]:
# For each category and fainder_mode (new plot) do a bar plot of the average execution time with scenario on the x-axis
import numpy as np

# Create the directory if it doesn't exist
if not os.path.exists("figures"):
    os.makedirs("figures")
if not os.path.exists("figures/regression"):
    os.makedirs("figures/regression")

scenarios = df_latest_new["scenario"].unique()
categories = df_latest_new["category"].unique()
fainder_modes = df_latest_new["fainder_mode"].unique()

for category in categories:
    for scenario in scenarios:
        df_latest_new_temp = df_latest_new[df_latest_new["category"] == category]
        df_latest_new_temp = df_latest_new_temp[
            df_latest_new_temp["scenario"] == scenario
        ]
        df_latest_old_temp = df_latest_old[df_latest_old["category"] == category]
        df_latest_old_temp = df_latest_old_temp[
            df_latest_old_temp["scenario"] == scenario
        ]

        # Create individual figure
        plt.figure()

        mean_execution_times_new = df_latest_new_temp.groupby("fainder_mode")[
            "execution_time"
        ].mean()
        mean_execution_times_old = df_latest_old_temp.groupby("fainder_mode")[
            "execution_time"
        ].mean()

        x = np.arange(len(mean_execution_times_new.index))
        width = 0.35

        plt.bar(x - width / 2, mean_execution_times_old.values, width, label="Old")
        plt.bar(x + width / 2, mean_execution_times_new.values, width, label="New")

        plt.title(f"Execution time for {category} in {scenario}")
        plt.xlabel("Fainder mode")
        plt.ylabel("Execution time (s)")
        plt.xticks(x, mean_execution_times_new.index, rotation=45)
        plt.legend()

        # Save individual figure
        plt.savefig(
            f"figures/regression/execution_time_{category}_{scenario}.png",
            bbox_inches="tight",
        )
        plt.close()

In [None]:
# Create combined figure
plt.figure(figsize=(30, 20))
num_plots = len(categories) * len(scenarios)
rows = (num_plots + 3) // 4  # Ceiling division to determine number of rows
cols = min(4, num_plots)  # Maximum 4 columns
for idx, (category, scenario) in enumerate(
    [(c, s) for c in categories for s in scenarios]
):
    plt.subplot(rows, cols, idx + 1)

    df_latest_new_temp = df_latest_new[df_latest_new["category"] == category]
    df_latest_new_temp = df_latest_new_temp[df_latest_new_temp["scenario"] == scenario]
    df_latest_old_temp = df_latest_old[df_latest_old["category"] == category]
    df_latest_old_temp = df_latest_old_temp[df_latest_old_temp["scenario"] == scenario]

    mean_execution_times_new = df_latest_new_temp.groupby("fainder_mode")[
        "execution_time"
    ].mean()
    mean_execution_times_old = df_latest_old_temp.groupby("fainder_mode")[
        "execution_time"
    ].mean()

    x = np.arange(len(mean_execution_times_new.index))
    width = 0.35

    plt.bar(x - width / 2, mean_execution_times_old.values, width, label="Old")
    plt.bar(x + width / 2, mean_execution_times_new.values, width, label="New")

    # Set log scale for y-axis
    plt.yscale("log")

    # Only add legend and labels for the first plot in each row
    if idx % 4 == 0:
        plt.legend(loc="upper right")
        plt.xlabel("Fainder mode", fontsize=8)
        plt.ylabel("Execution time (s)", fontsize=8)
    plt.xticks(x, mean_execution_times_new.index, fontsize=6)

    plt.title(f"{category}\n{scenario}", fontsize=8)

    plt.yticks(fontsize=6)

plt.tight_layout()
plt.savefig("figures/regression/all_execution_times.png")

In [None]:
# Create combined figure without log scale
plt.figure(figsize=(30, 20))
num_plots = len(categories) * len(scenarios)
rows = (num_plots + 3) // 4  # Ceiling division to determine number of rows
cols = min(4, num_plots)  # Maximum 4 columns
for idx, (category, scenario) in enumerate(
    [(c, s) for c in categories for s in scenarios]
):
    plt.subplot(rows, cols, idx + 1)

    df_latest_new_temp = df_latest_new[df_latest_new["category"] == category]
    df_latest_new_temp = df_latest_new_temp[df_latest_new_temp["scenario"] == scenario]
    df_latest_old_temp = df_latest_old[df_latest_old["category"] == category]
    df_latest_old_temp = df_latest_old_temp[df_latest_old_temp["scenario"] == scenario]

    mean_execution_times_new = df_latest_new_temp.groupby("fainder_mode")[
        "execution_time"
    ].mean()
    mean_execution_times_old = df_latest_old_temp.groupby("fainder_mode")[
        "execution_time"
    ].mean()

    x = np.arange(len(mean_execution_times_new.index))
    width = 0.35

    plt.bar(x - width / 2, mean_execution_times_old.values, width, label="Old")
    plt.bar(x + width / 2, mean_execution_times_new.values, width, label="New")

    # Only add legend and labels for the first plot in each row
    if idx % 4 == 0:
        plt.legend(loc="upper right")
        plt.xlabel("Fainder mode", fontsize=8)
        plt.ylabel("Execution time (s)", fontsize=8)
    plt.xticks(x, mean_execution_times_new.index, fontsize=6)

    plt.title(f"{category}\n{scenario}", fontsize=8)

    plt.yticks(fontsize=6)

plt.tight_layout()
plt.savefig("figures/regression/all_execution_times_without_log.png")

In [None]:
# Create combined figure without log scale
plt.figure(figsize=(30, 20))
num_per_column = 2
num_plots = len(categories) * len(fainder_modes)
rows = (
    num_plots + num_per_column - 1
) // num_per_column  # Ceiling division to determine number of rows
cols = min(num_per_column, num_plots)  # Maximum 4 columns
for idx, (category, fainder_mode) in enumerate(
    [(c, f) for c in categories for f in fainder_modes]
):
    plt.subplot(rows, cols, idx + 1)

    df_latest_new_temp = df_latest_new[df_latest_new["category"] == category]
    df_latest_new_temp = df_latest_new_temp[
        df_latest_new_temp["fainder_mode"] == fainder_mode
    ]
    df_latest_old_temp = df_latest_old[df_latest_old["category"] == category]
    df_latest_old_temp = df_latest_old_temp[
        df_latest_old_temp["fainder_mode"] == fainder_mode
    ]

    mean_execution_times_new = df_latest_new_temp.groupby("scenario")[
        "execution_time"
    ].mean()
    mean_execution_times_old = df_latest_old_temp.groupby("scenario")[
        "execution_time"
    ].mean()

    x = np.arange(len(mean_execution_times_new.index))
    width = 0.35

    plt.bar(x - width / 2, mean_execution_times_old.values, width, label="Old")
    plt.bar(x + width / 2, mean_execution_times_new.values, width, label="New")

    # Only add legend and labels for the first plot in each row
    if idx % num_per_column == 0:
        plt.legend(loc="upper right")
        plt.xlabel("Scenario", fontsize=8)
        plt.ylabel("Execution time (s)", fontsize=8)
    plt.xticks(x, mean_execution_times_new.index, fontsize=6)

    plt.title(f"{category}\n{fainder_mode}", fontsize=8)

    plt.yticks(fontsize=6)

plt.tight_layout()
plt.savefig("figures/regression/all_execution_times_without_log_by_fainder_mode.png")

In [None]:
# Create combined figure without log scale
plt.figure(figsize=(30, 20))
num_per_column = 2
num_plots = len(categories) * len(fainder_modes)
rows = (
    num_plots + num_per_column - 1
) // num_per_column  # Ceiling division to determine number of rows
cols = min(num_per_column, num_plots)  # Maximum 4 columns
for idx, (category, fainder_mode) in enumerate(
    [(c, f) for c in categories for f in fainder_modes]
):
    plt.subplot(rows, cols, idx + 1)

    df_latest_new_temp = df_latest_new[df_latest_new["category"] == category]
    df_latest_new_temp = df_latest_new_temp[
        df_latest_new_temp["fainder_mode"] == fainder_mode
    ]
    df_latest_old_temp = df_latest_old[df_latest_old["category"] == category]
    df_latest_old_temp = df_latest_old_temp[
        df_latest_old_temp["fainder_mode"] == fainder_mode
    ]

    mean_execution_times_new = df_latest_new_temp.groupby("scenario")[
        "execution_time"
    ].mean()
    mean_execution_times_old = df_latest_old_temp.groupby("scenario")[
        "execution_time"
    ].mean()

    x = np.arange(len(mean_execution_times_new.index))
    width = 0.35

    plt.bar(x - width / 2, mean_execution_times_old.values, width, label="Old")
    plt.bar(x + width / 2, mean_execution_times_new.values, width, label="New")

    # Set log scale for y-axis
    plt.yscale("log")

    # Only add legend and labels for the first plot in each row
    if idx % num_per_column == 0:
        plt.legend(loc="upper right")
        plt.xlabel("Scenario", fontsize=8)
        plt.ylabel("Execution time (s)", fontsize=8)
    plt.xticks(x, mean_execution_times_new.index, fontsize=6)

    plt.title(f"{category}\n{fainder_mode}", fontsize=8)

    plt.yticks(fontsize=6)

plt.tight_layout()
plt.savefig("figures/regression/all_execution_times_by_fainder_mode.png")

In [None]:
# For each category and scenario (new plot) a plot query (x-axis) vs execution time (y-axis) (new line for each fainder_mode) (old and new)


for category in df_latest_new["category"].unique():
    for scenario in df_latest_new["scenario"].unique():
        df_latest_new_temp = df_latest_new[df_latest_new["category"] == category]
        df_latest_new_temp = df_latest_new_temp[
            df_latest_new_temp["scenario"] == scenario
        ]
        df_latest_old_temp = df_latest_old[df_latest_old["category"] == category]

        # Create individual figure
        plt.figure()

        for fainder_mode in df_latest_new_temp["fainder_mode"].unique():
            df_latest_new_temp_fainder_mode = df_latest_new_temp[
                df_latest_new_temp["fainder_mode"] == fainder_mode
            ].reset_index(drop=True)
            df_latest_old_temp_fainder_mode = df_latest_old_temp[
                df_latest_old_temp["fainder_mode"] == fainder_mode
            ].reset_index(drop=True)

            plt.plot(
                df_latest_new_temp_fainder_mode.index,
                df_latest_new_temp_fainder_mode["execution_time"],
                label=f"New {fainder_mode}",
            )
            plt.plot(
                df_latest_old_temp_fainder_mode.index,
                df_latest_old_temp_fainder_mode["execution_time"],
                label=f"Old {fainder_mode}",
            )
        plt.title(f"Execution time for {category} in {scenario}")
        plt.xlabel("Query")
        plt.ylabel("Execution time (s)")
        plt.legend()

        # Save individual figure
        plt.savefig(
            f"figures/regression/execution_time_{category}_{scenario}_query.png",
            bbox_inches="tight",
        )

In [None]:
# For each category and scenario and fainder mode a tile in a heatmap (new - old) one plot

# Create combined figure
plt.figure()

# group by category and scenario and fainder mode
df_latest_new_grouped = df_latest_new.groupby(["category", "scenario", "fainder_mode"])[
    "execution_time"
].mean()
df_latest_old_grouped = df_latest_old.groupby(["category", "scenario", "fainder_mode"])[
    "execution_time"
].mean()
df_latest_new_grouped = df_latest_new_grouped.reset_index()
df_latest_old_grouped = df_latest_old_grouped.reset_index()
df_latest_new_grouped = df_latest_new_grouped[
    df_latest_new_grouped["fainder_mode"].isin(
        df_latest_old_grouped["fainder_mode"].unique()
    )
]
df_latest_old_grouped = df_latest_old_grouped[
    df_latest_old_grouped["fainder_mode"].isin(
        df_latest_new_grouped["fainder_mode"].unique()
    )
]
df_latest_new_grouped = df_latest_new_grouped[
    df_latest_new_grouped["category"].isin(df_latest_old_grouped["category"].unique())
]
df_latest_old_grouped = df_latest_old_grouped[
    df_latest_old_grouped["category"].isin(df_latest_new_grouped["category"].unique())
]

# Create a pivot table for the heatmap
heatmap_data = pd.merge(
    df_latest_new_grouped,
    df_latest_old_grouped,
    on=["category", "scenario", "fainder_mode"],
    suffixes=("_new", "_old"),
)
heatmap_data["execution_time_diff"] = (
    heatmap_data["execution_time_new"] - heatmap_data["execution_time_old"]
)
heatmap_data_pivot = heatmap_data.pivot(
    index=["category", "scenario"],
    columns="fainder_mode",
    values="execution_time_diff",
)
# Create a heatmap
plt.figure(figsize=(20, 10))
plt.imshow(
    heatmap_data_pivot,
    cmap="coolwarm",
    aspect="auto",
    interpolation="nearest",
)
plt.colorbar(label="Execution time difference (s)")
plt.title("Execution time difference (new - old) for each category and scenario")
plt.xlabel("Fainder mode")
plt.ylabel("Category and scenario")
plt.xticks(
    range(len(heatmap_data_pivot.columns)),
    list(heatmap_data_pivot.columns),
    rotation=45,
)
plt.yticks(
    range(len(heatmap_data_pivot.index)),
    [f"{cat}\n{scen}" for cat, scen in heatmap_data_pivot.index],
)
plt.tight_layout()
plt.savefig("figures/regression/execution_time_diff_heatmap.png", bbox_inches="tight")