In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [None]:
def filter_df(df, filters):
    df_filtered = df.copy()
    for col, val in filters.items():
        if isinstance(val, list):
            df_filtered = df_filtered[df_filtered[col].isin(val)]
        else:
            df_filtered = df_filtered[df_filtered[col] == val]
    return df_filtered

In [None]:
dpapt_stats = pd.read_csv('../results/dpapt/cluster_extended/stats.csv')
dpapt_stats['t_span'] = dpapt_stats['tu'] - dpapt_stats['tl'] + 1
dpapt_stats['hd_norm'] = dpapt_stats["hausdorff"] / np.sqrt(2 * dpapt_stats["t_span"])
dpapt_stats['algo'] = 'DPAPT'


hua_stats = pd.read_csv('../results/hua/medium/stats.csv')
hua_stats['t_span'] = hua_stats['tu'] - hua_stats['tl'] + 1
hua_stats['hd_norm'] = hua_stats["hausdorff"] / np.sqrt(2 * hua_stats["t_span"])
hua_stats['eps'] = hua_stats['ep']
hua_stats.drop(columns=['ep'], inplace=True)
hua_stats['algo'] = 'HUA'


In [None]:
dpapt_filtered = filter_df(dpapt_stats, {'n_clusters' : 15, 'eps': 1.0})
hua_filtered = filter_df(hua_stats, {'m' : 15, 'eps': 1.0})

In [None]:
combined = pd.concat([dpapt_filtered, hua_filtered], ignore_index=True)

In [None]:
sns.boxplot(
    data=combined,
    x='t_span',
    y='hd_norm',
    hue='algo',
)

In [None]:
dpapt_dist = pd.read_csv('../results/dpapt/cluster_extended/indiv_hd.csv')
dpapt_dist['t_span'] = dpapt_dist['tu'] - dpapt_dist['tl'] + 1
dpapt_dist['hd_norm'] = dpapt_dist["individual_hausdorff"] / np.sqrt(2 * dpapt_dist["t_span"])
dpapt_dist['algo'] = 'DPAPT'

hua_dist = pd.read_csv('../results/hua/medium/indiv_hd.csv')
hua_dist['t_span'] = hua_dist['tu'] - hua_dist['tl'] + 1
hua_dist['hd_norm'] = hua_dist["individual_hausdorff"] / np.sqrt(2 * hua_dist["t_span"])
hua_dist['eps'] = hua_dist['ep']
hua_dist.drop(columns=['ep'], inplace=True)
hua_dist['algo'] = 'HUA'

In [None]:
dpapt_filtered = filter_df(dpapt_dist, {'n_clusters' : 15, 'eps': 1.0})
hua_filtered = filter_df(hua_dist, {'m' : 15, 'eps': 1.0})
combined = pd.concat([dpapt_filtered, hua_filtered], ignore_index=True)

In [None]:

t_spans = sorted(hua_filtered['t_span'].unique())
palette = sns.color_palette("colorblind", n_colors=len(t_spans))
tspan_colors = {t: palette[i] for i, t in enumerate(t_spans)}
for t in t_spans:
    data_t = dpapt_filtered[dpapt_filtered["t_span"] == t]
    sns.kdeplot(
        data=data_t,
        x="hd_norm",
        label=f"DPAPT (t={t})",
        color=tspan_colors[t],
        linestyle="-",
        # bw_adjust=0.7,
        clip=(0, None),
        common_norm=False
    )

# HUA curves (e.g. dashed line)
for t in t_spans:
    data_t = hua_filtered[hua_filtered["t_span"] == t]
    sns.kdeplot(
        data=data_t,
        x="hd_norm",
        label=f"HUA (t={t})",
        color=tspan_colors[t],
        linestyle="--",
        # bw_adjust=0.7,
        clip=(0, None),
        common_norm=False
    )

plt.xlabel("Individual Hausdorff Distance")
plt.ylabel("Density")
plt.title("Comparison of DPAPT and HUA by Time Span")
plt.legend(title="Method (Time Span)")
plt.grid(True)
plt.savefig("../figures/dpapt/dpapt_hua_comparison.pdf", bbox_inches='tight')
plt.show()