In [None]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np

In [None]:
t_res = pd.read_feather('./monocasting_results.feather').replace(
    {
        'mono_casting': {'depth': 'Depth', 'prob': 'Weighted'},
        'bdry_cleaning': {'absorb': 'Absorb', 'none': 'Default', 'kde': 'KDE'},
        'leveling': {'unique': 'Default', 'mean_shift': 'Mean Shift'},
    }
).rename(
    columns={'tid': 'track_id', 'mono_casting': 'P(t|H)', 'bdry_cleaning': 'B Clean-up', 'leveling': 'Level'}
)

b_res = pd.read_feather('./monocasting_bmeasure.feather').replace(
    {
        'mono_casting': {'depth': 'Depth', 'prob': 'Weighted'},
        'bdry_cleaning': {'absorb': 'Absorb', 'none': 'Default', 'kde': 'KDE'},
        'leveling': {'unique': 'Default', 'mean_shift': 'Mean Shift'},
        'prf': {'f': "F-score", 'p': "Prec", 'r': "Recall"},
    }
).rename(
    columns={
        'mono_casting': 'P(t|H)', 'bdry_cleaning': 'B Clean-up', 'leveling': 'Level', 
        'b': 'B', 'hr': 'HR', 'poa': 'POA', 'poa-m': 'POA-m', 'b-m': 'B-m'
    }
)

## Boundary clean-up effects story:
cleaning up boundaries betters precision. With KDE a little bit better than absorb in terms of precision.
F-measure wise tho, everyone is neck and neck, and no boundary cleaning does a little bit better.

In [None]:
def plot_hr_prf(b_res, window='0.5', level='Default', pt='Depth', ax=None):
    filtered_b_res = b_res.where(
        (b_res['window']==window) & (b_res['Level']==level) & (b_res['P(t|H)']==pt)
    ).dropna().reset_index(drop=True)

    full_combo_b = filtered_b_res.pivot_table(
        index=['track_id'], 
        columns=['B Clean-up', 'prf'], 
        values='HR'
    )

    new_columns = [
        ': '.join([f for f in col if f not in ('')]) 
        for col in full_combo_b.columns.values
    ]
    full_combo_b.columns = new_columns
    col_order = [
        'Default: F-score', 'Default: Prec', 'Default: Recall', 
        'Absorb: F-score', 'Absorb: Prec', 'Absorb: Recall', 
        'KDE: F-score', 'KDE: Prec', 'KDE: Recall',
    ]
    full_combo_b = full_combo_b[col_order]
    if ax is None:
        fig, ax = plt.subplots(figsize=figsize)

    # color cycler for F-score, Prec, and Recall
    colors =  ['#4477AA', '#EE6677', '#228833']  # blue, red, green
    palette = [colors[i // 3] for i in range(len(col_order))]
    ax = sns.violinplot(data=full_combo_b, ax=ax, palette=palette)

    # Calculate means and put text in a box on each violin plot
    means = full_combo_b.mean()
    for i, mean in enumerate(means):
        # Get the maximum value for this column to position text above it
        text_y = [0.75, 0.85, 0.95] * 3
        ax.text(
            i, text_y[i], f'μ={mean:.3f}', 
            ha='center', va='bottom', fontsize=9,
            bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8)
        )

    plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
    ax.set(
        title=f'Hit-Rate ({window} sec) for different Boundary Clean-up Strategy',
        ylabel=f'Hit Rate ({window} sec)',
        xlabel='Boundary Clean-up Strategy'
    )
    plt.tight_layout()
    return ax

In [None]:
fig, axs = plt.subplots(3,1, figsize=(6, 10), sharex=True)
for i, window in enumerate(['0.5', '1.5', '3.0']):
    plot_hr_prf(
        b_res, window=window, level='Default', pt='Depth', ax=axs[i]
    )
fig.savefig("cleanup_effects_on_bhr.pdf", bbox_inches='tight')
    

## Let's look at POA metrics and B-measure

Weighted + Mean Shift wins.

In [None]:
def plot_bmeasure_violin(b_result, window='0.5', prf='F-score', metric='POA', figsize=(7.5, 7), ax=None):
    filtered_result = b_result.where(
        (b_result['window']==window) & (b_result['prf']==prf)
    ).dropna().reset_index(drop=True)

    full_combo_b = filtered_result.pivot_table(
        index=['track_id'], 
        columns=['P(t|H)', 'B Clean-up', 'Level'], 
        values=metric
    )

    new_columns = ['+'.join([f for f in col if f not in ('', 'Default')]) for col in full_combo_b.columns.values]
    full_combo_b.columns = new_columns
    col_order = [
        'Depth', 'Depth+Absorb', 'Depth+KDE', 
        'Depth+Mean Shift', 'Depth+Absorb+Mean Shift', 'Depth+KDE+Mean Shift', 
        'Weighted', 'Weighted+Absorb', 'Weighted+KDE', 
        'Weighted+Mean Shift', 'Weighted+Absorb+Mean Shift', 'Weighted+KDE+Mean Shift', 
    ]
    full_combo_b = full_combo_b[col_order]
    
    if ax is None:
        fig, ax = plt.subplots(figsize=figsize)
    sns.violinplot(data=full_combo_b, ax=ax)
    
    # Calculate means
    means = full_combo_b.mean()
    # Create new labels with the mean on a new line
    # new_labels = [f"{label}\nμ={mean:.3f}" for label, mean in zip(col_order, means)]
    # ax.set_xticks(ax.get_xticks()) # Prevents a UserWarning
    # ax.set_xticklabels(new_labels, rotation=45, ha='right')
    for i, mean in enumerate(means):
        # Get the maximum value for this column to position text above it
        text_y = [0.72, 0.82, 0.92] * 4
        ax.text(
            i, text_y[i], f'μ={mean:.3f}', 
            ha='center', va='bottom', fontsize=9,
            bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8)
        )

    ax.set(
        ylim=(0, 1),
        title=f'{metric} {prf} ({window} sec) for Different Strategy Combinations',
        ylabel=f'{metric} {prf} ({window} sec)',
        xlabel='Strategy Combo'
    )
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    # plt.close(fig)
    return ax

In [None]:
fig, axs = plt.subplots(3, 1, figsize=(6, 10), sharex=True, sharey=True)
for i, window in enumerate(['0.5', '1.5', '3.0']):
    plot_bmeasure_violin(b_res, window=window, prf="F-score", metric="POA", ax=axs[i])
fig.savefig("combo_effect_on_poa.pdf", bbox_inches='tight')


In [None]:
fig, axs = plt.subplots(3, 1, figsize=(6, 10), sharex=True, sharey=True)
for i, window in enumerate(['0.5', '1.5', '3.0']):
    plot_bmeasure_violin(b_res, window=window, prf="F-score", metric="B", ax=axs[i])
fig.savefig("combo_effects_on_bmeasure.pdf", bbox_inches='tight')


In [None]:
fig, axs = plt.subplots(3, 1, figsize=(6, 10), sharex=True, sharey=True)
for i, window in enumerate(['0.5', '1.5', '3.0']):
    plot_bmeasure_violin(b_res, window=window, prf="Prec", metric="B", ax=axs[i])
# fig.savefig("combo_effects_on_bmeasure.pdf", bbox_inches='tight')


In [None]:
fig, axs = plt.subplots(3, 1, figsize=(6, 10), sharex=True, sharey=True)
for i, window in enumerate(['0.5', '1.5', '3.0']):
    plot_bmeasure_violin(b_res, window=window, prf="Recall", metric="B", ax=axs[i])
# fig.savefig("combo_effects_on_bmeasure.pdf", bbox_inches='tight')


## Scatter plots to show effects of mono casting on B-measure

In [None]:
filtered_result = b_res.where(
    (b_res['window']=='0.5') & (b_res['prf']=='F-score')
).dropna().reset_index(drop=True)

full_combo_b = filtered_result.pivot_table(
    index=['track_id', 'B Clean-up', 'Level'], 
    columns=['P(t|H)'], 
    values='B'
)

g = sns.relplot(
    data=full_combo_b, x="Depth", y="Weighted", 
    col="Level", row="B Clean-up", 
    alpha=0.3, s=15,
    height=3.2, aspect=1,
    rasterized=True
)

for ax in g.axes.flat:
    ax.plot([0,1],[0,1], "r--")
    ax.grid(True)

plt.suptitle("Choice of P(t|H)'s Effect on B-measure (0.5 sec)", fontsize=16)
plt.tight_layout()
plt.savefig("prom_func_effects_b05.pdf", bbox_inches='tight')

In [None]:
filtered_result = b_res.where(
    (b_res['window']=='0.5') & (b_res['prf']=='F-score')
).dropna().reset_index(drop=True)

full_combo_b = filtered_result.pivot_table(
    index=['track_id', 'P(t|H)', 'B Clean-up'], 
    columns=['Level'], 
    values='POA'
)

g = sns.relplot(
    data=full_combo_b, x="Default", y="Mean Shift", 
    col="P(t|H)", row="B Clean-up", 
    alpha=0.3, s=15,
    height=3.2, aspect=1,
    rasterized=True
)

for ax in g.axes.flat:
    ax.plot([0,1],[0,1], "r--")
    ax.grid(True)

plt.suptitle("Level Quantization's Effect on POA (0.5 sec)", fontsize=16)
plt.tight_layout()
plt.savefig("level_quant_poa05.pdf")

In [None]:
## Boundary cleaning has very little effects

reduced_bclean = b_res.pivot_table(
    index=['track_id', 'P(t|H)', 'Level'], 
    columns=['B Clean-up'], 
    values='B'
).reset_index()

g = sns.relplot(
    data=reduced_bclean, x="Default", y="KDE", 
    col="P(t|H)", row="Level", 
    alpha=0.3, s=15,
    height=3.2, aspect=1,
    rasterized=True
)
for ax in g.axes.flat:
    ax.plot([0,1],[0,1], "r--")
    ax.grid(True)

plt.suptitle("KDE Clean-up Effect on B-measure (0.5 sec)", fontsize=16)
plt.tight_layout()
plt.savefig("kde_vs_default_b05.pdf")

g = sns.relplot(
    data=reduced_bclean, x="Absorb", y="KDE", 
    col="P(t|H)", row="Level", 
    alpha=0.3, s=15,
    height=3.2, aspect=1,
    rasterized=True
)
for ax in g.axes.flat:
    ax.plot([0,1],[0,1], "r--")
    ax.grid(True)

plt.suptitle("Absorption vs. KDE Effect on B-measure (0.5 sec)", fontsize=16)
plt.tight_layout()
plt.savefig("kde_vs_absorb_b05.pdf")

## Comparing T-measure and B-measure on the same tracks's estimates

In [None]:
# T-measure and B-measure on "Depth" only

t_by_depth = t_res.where(
    (t_res['P(t|H)']=='Depth') 
    & (t_res.Level=='Default')
    & (t_res['B Clean-up']=='Default')
).dropna().pivot_table(
    index='track_id', values=['t_reduced_f', 't_full_f']
)

t_by_combo = t_res.where(
    (t_res['P(t|H)']=='Weighted') 
    & (t_res.Level=='Mean Shift')
    & (t_res['B Clean-up']=='KDE')
).dropna().pivot_table(
    index='track_id', values=['t_reduced_f', 't_full_f']
)

b_by_depth = b_res.where(
    (b_res['P(t|H)']=='Depth') 
    & (b_res.Level=='Default')
    & (b_res['B Clean-up']=='Default')
    & (b_res.prf=='F-score')
).dropna().pivot_table(
    index='track_id', columns='window', values=['B','POA','HR']
)

b_by_combo = b_res.where(
    (b_res['P(t|H)']=='Weighted') 
    & (b_res.Level=='Mean Shift')
    & (b_res['B Clean-up']=='KDE')
    & (b_res.prf=='F-score')
).dropna().pivot_table(
    index='track_id', columns='window', values=['B','POA','HR']
)

combined_by_depth = pd.concat([t_by_depth, b_by_depth], axis=1, join='outer')
combined_by_combo = pd.concat([t_by_combo, b_by_combo], axis=1, join='outer')


In [None]:
def plot_b_vs_t(combined_df, figsize=(5.5, 8)):
    fig, axs = plt.subplots(3, 2, figsize=figsize, sharex=True, sharey=True)
    rows = [('B', w) for w in ['0.5', '1.5', '3.0']]
    cols = ['t_reduced_f', 't_full_f']
    col_label_map = {'t_reduced_f': 'T-measure (reduced)', 't_full_f': 'T-measure (full)'}

    for b_option, ax_row in zip(rows, axs):
        for t_option, ax in zip(cols, ax_row):
            sns.scatterplot(
                combined_df, x=t_option, y=b_option, 
                ax=ax, 
                s=12, alpha=0.5, rasterized=True
            )
            ax.plot([0,1], [0,1], 'r--')
            ax.grid(True)
            ax.set(
                aspect='equal', 
                ylabel=f'B-measure ({b_option[1]} sec)',
                xlabel=col_label_map[t_option]
            )
        
    fig.tight_layout()
    return fig

fig = plot_b_vs_t(combined_by_depth)
fig.suptitle("T-measure VS B-measure with\nBaseline Monotonic Casting")
fig.tight_layout()
fig.savefig('b_vs_t_baseline_monocasting.pdf', bbox_inches='tight')

In [None]:
fig = plot_b_vs_t(combined_by_combo)
fig.suptitle("T-measure VS B-measure with Best Monotonic Casting Combo\nWeighted Prominence + KDE Cleaning + Mean Shift Leveling")
fig.tight_layout()
fig.savefig('b_vs_t_best_monocasting.pdf', bbox_inches='tight')

## Comparing T-measure and B-measure on SALAMI reference annotations

In [None]:
# T-measure and B-measure on "Depth" only
t_ref_res = pd.read_feather("mir_eval_slm_inter_anno_results.feather")
b_ref_res = pd.read_feather("bmeasure_slm_inter_anno_results.feather")

filtered_b_ref_res = b_ref_res.pivot_table(
    index='track_id', columns=['prf', 'metric', 'window'], values='score'
)['f']['b']


combined_df = pd.concat([t_ref_res, filtered_b_ref_res], axis=1)
combined_df.rename(columns={'T-Measure reduced': 'T-measure (reduced)', 'T-Measure full': 'T-measure (full)', 0.5: 'B-measure (0.5 sec)', 1.5: 'B-measure (1.5 sec)', 3: 'B-measure (3.0 sec)'}, inplace=True)
combined_df

In [None]:
def plot_b_vs_t_slm_ref(combined_df, figsize=(5.5, 8)):
    fig, axs = plt.subplots(3, 2, figsize=figsize, sharex=True, sharey=True)
    rows = [f"B-measure ({w} sec)" for w in ['0.5', '1.5', '3.0']]
    cols = ['T-measure (reduced)', 'T-measure (full)']

    for b_option, ax_row in zip(rows, axs):
        for t_option, ax in zip(cols, ax_row):
            sns.scatterplot(
                combined_df, x=t_option, y=b_option, 
                ax=ax, 
                s=12, alpha=0.5, rasterized=True
            )
            ax.plot([0,1], [0,1], 'r--')
            ax.grid(True)
            ax.set(aspect='equal')
        
    fig.tight_layout()
    return fig

fig = plot_b_vs_t_slm_ref(combined_df)
fig.suptitle("T-measure vs B-measure\nbetween 2 reference annotations on SALAMI tracks")
fig.tight_layout()
fig.savefig('b_vs_t_slm_ref.pdf', bbox_inches='tight')