In [None]:
import os
import numpy as np
import seaborn as sns
import pandas as pd
from matplotlib import pyplot as plt

os.makedirs('../figures/hua', exist_ok=True)

In [None]:
single_stats = pd.read_csv("../results/hua_stats.csv")
runs = single_stats["run"].unique()
print("Results from {len(runs)} runs per configuration")

In [None]:
# we consider only the runs over the complete range (0, 36)
single_stats = single_stats[single_stats["tu"] >= 36]
single_stats.info()

In [None]:
# average hausdorff over configuration eps, m, tl, tu
mean_stats = single_stats.groupby(["eps", "m", "tl", "tu"]).agg(
    {
        "hausdorff": ["mean"],
        "duration": ["mean"],
        "run": ["count"],
    }
)
mean_stats.columns = ["hausdorff_mean", "duration_mean", "count"]
mean_stats = mean_stats.reset_index()
mean_stats

In [None]:
# Plot mean hausdorff distance vs m for different eps
plt.figure(figsize=(12, 8))
sns.set_theme(style="whitegrid")
sns.set_palette("colorblind")
sns.set_context("paper", font_scale=1.5)
sns.set_style("whitegrid")

def plot_hausdorff(eps):
    subset = mean_stats[mean_stats["eps"] == eps]
    plt.plot(
        subset["m"],
        subset["hausdorff_mean"],
        marker="o",
        label=f"eps={eps}",
    )

for eps in single_stats["eps"].unique():
    plot_hausdorff(eps)

plt.xlabel("m")
plt.xticks(mean_stats["m"].unique())
plt.ylabel("Hausdorff distance")
plt.title("Hausdorff distance vs m")
plt.legend()
plt.savefig("../figures/hua/hausdorff_vs_m.svg")
plt.show()

In [None]:
# Boxplot of hausdorff distance vs m for different eps
plt.figure(figsize=(12, 8))
sns.set_theme(style="whitegrid")
sns.set_palette("colorblind")
sns.set_context("paper", font_scale=1.5)
sns.set_style("whitegrid")
sns.boxplot(
    data=single_stats,
    x="m",
    y="hausdorff",
    hue="eps",
    palette="colorblind",
)
plt.xlabel("m")
plt.ylabel("Hausdorff distance")
plt.title(f"Hausdorff distance vs m over {len(runs)} runs")
plt.legend(title="eps")
plt.savefig("../figures/hua/boxplot_hausdorff_vs_m.svg")
plt.show()

In [None]:
# import range query results
range_query = pd.read_csv("../results/hua_query_distortion.csv")
# we are interested in the results for the complete range (0, 36)
range_query = range_query[range_query["tu"] >= 36]
range_query.info()

In [None]:
# average distortion over configuration eps, m, tl, tu
mean_distortion_per_run = range_query.groupby(["run", "eps", "m", "tl", "tu"]).agg(
    {
        "psi_distortion": ["mean"],
        "dai_distortion": ["mean"],
        "run": ["count"],
    }
)
mean_distortion_per_run.columns = ["psi_distortion_mean", "dai_distortion_mean", "count"]
mean_distortion_per_run = mean_distortion_per_run.reset_index()
mean_distortion_per_run

In [None]:
mean_distortion = mean_distortion_per_run.groupby(["eps", "m", "tl", "tu"]).agg(
    {
        "psi_distortion_mean": ["mean"],
        "dai_distortion_mean": ["mean"],
        "count": ["count"],
    }
)
mean_distortion.columns = ["psi_distortion_mean", "dai_distortion_mean", "count"]
mean_distortion = mean_distortion.reset_index()
mean_distortion

In [None]:
def surface_plot_3d(query, query_label):
    m_values = mean_distortion["m"].unique()
    eps_values = mean_distortion["eps"].unique()
    E, M = np.meshgrid(eps_values, m_values)
    Z = np.zeros((len(m_values), len(eps_values)))
    for i, eps in enumerate(eps_values):
        for j, m in enumerate(m_values):
            Z[i, j] = mean_distortion[
                (mean_distortion["m"] == m) & (mean_distortion["eps"] == eps)
            ][query].values[0]
    fig = plt.figure(figsize=(12, 8))
    ax = fig.add_subplot(111, projection="3d")
    ax.plot_surface(E, M, Z, cmap="viridis") # type: ignore
    ax.set_xlabel(r"$\varepsilon$")
    ax.set_ylabel("m")
    ax.set_zlabel(query_label) # type: ignore
    ax.set_title(f"{query_label} vs eps and m")

    plt.savefig(f"../figures/hua/{query}_vs_eps_m.svg")
    plt.show()

In [None]:
surface_plot_3d("psi_distortion_mean", r"$PSI$-distortion")

In [None]:
surface_plot_3d("dai_distortion_mean", r"$DAI$-distortion")

In [None]:
# 3D surface plot of runtime vs m and eps
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection="3d")

X, Y = np.meshgrid(mean_stats["m"].unique(), mean_stats["eps"].unique())
Z = np.array([
    mean_stats[(mean_stats["m"] == m) & (mean_stats["eps"] == eps)]["duration_mean"].values[0]
    if not mean_stats[(mean_stats["m"] == m) & (mean_stats["eps"] == eps)].empty else np.nan
    for eps in Y[:, 0] for m in X[0, :]
]).reshape(X.shape)

surf = ax.plot_surface(X, Y, Z, edgecolor="k", alpha=0.8)

ax.set_xlabel("m")
ax.set_ylabel("eps")
ax.set_zlabel("Runtime (s)")
ax.set_title("Runtime vs m and eps")
ax.invert_xaxis()


plt.savefig("../figures/hua/runtime_surface_plot.svg")
plt.show()