In [None]:
%load_ext autoreload
%autoreload 2

import bnl
import random

from bnl.metrics import bmeasure

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
## Now it's time to implement the B-measure
# let's start with the boundary hit rate for a proccessed Boundary Contour object
slm_ds = bnl.data.Dataset('~/data/salami/metadata.csv')

In [None]:
tid = random.choice(slm_ds.track_ids)
track = slm_ds[tid]

In [None]:
ref = list(track.refs.values())[0]
est = track.ests['mu1gamma9'].align(ref)
est_bc = est.contour('prob').clean('kde', bw=1).level('mean_shift', bw=0.15)
ref_bc = ref.contour('depth').level()

est_bc.plot().show()
ref_bc.plot().show()

## Let's look at how B-measure performs on SALAMI track that have two annotations

In [None]:
slm_2anno_tracks = [t for t in slm_ds if len(t.refs) == 2]

In [None]:
from bnl.exp import bmeasure_between_slm_refs
score_df = bmeasure_between_slm_refs(slm_ds[2])
bnl.viz.bmeasure_df(score_df)

In [None]:
from pqdm.processes import pqdm
from bnl.exp import bmeasure_between_slm_refs
import bnl
import pandas as pd

slm_ds = bnl.data.Dataset(manifest_path="~/data/salami/metadata.csv")
list_of_dfs = pqdm(slm_ds, bmeasure_between_slm_refs, n_jobs=8)
valid_dfs = [x for x in list_of_dfs if type(x) is pd.DataFrame]
exp_results = pd.concat(valid_dfs, ignore_index=True)
exp_results.to_feather("bmeasure_slm_inter_anno_results.feather")

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

exp_results = pd.read_feather("bmeasure_slm_inter_anno_results.feather")

res_df = exp_results.where(
    exp_results.metric.isin(["b", "hr", "poa"]) & exp_results.prf.isin(["f"])
).dropna().reset_index(drop=True).pivot_table(
    index=['track_id'], columns=['window', 'metric'], values='score'
)

new_columns = ['_'.join([str(f) for f in col]) for col in res_df.columns.values]
res_df.columns = new_columns

res_d

In [None]:
fig, ax = plt.subplots(figsize=(6,4))

colors = ['#4477AA', '#EE6677', '#228833']  # blue, red, green
palette = [colors[i // 3] for i in range(len(res_df.columns))]
sns.violinplot(data=res_df, palette=palette, ax=ax)
xtick_labels = ax.get_xticklabels()
new_tick_p1 = [label.get_text().split("_")[1].upper() for label in xtick_labels]
new_tick_p2 = [f"({label.get_text().split('_')[0]} sec)" for label in xtick_labels]
new_tick_labels = [f"{part1} {part2}" for part1, part2 in zip(new_tick_p1, new_tick_p2)]

# put in mean value as text
means = res_df.mean()
for i, mean in enumerate(means):
    # Get the maximum value for this column to position text above it
    text_y = [0.4, 0.3, 0.2] * 3
    ax.text(
        i, text_y[i], f'μ={mean:.3f}', 
        ha='center', va='bottom', fontsize=9,
        bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8)
    )
ax.set_xticks(ax.get_xticks())
ax.set_xticklabels(new_tick_labels, rotation=45, ha='right')
ax.set_title("B-Measure and Components between SALAMI Annotations", fontsize=14)
ax.set(ylabel="F-score")
plt.tight_layout()
fig.savefig("bmeasure_violin.pdf", bbox_inches='tight')

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
plotting_df = exp_results.pivot_table(
    index=["track_id", "prf"], columns=["metric", "window"], values="score"
)

new_columns = ["_".join([str(f) for f in col if f]) for col in plotting_df.columns.values]
plotting_df.columns = new_columns
sns.set_theme(style="whitegrid")
for prf in ['f', 'p', 'r']:
    df = plotting_df[plotting_df['prf'] == prf]
    text = 'measure' if prf == 'f' else 'precision' if prf == 'p' else 'recall'
    plt.rcParams["figure.figsize"] = (4, 4) 
    sns.violinplot(data=df)
    plt.xticks(rotation=45, ha='right')
    plt.title(f'Dist. of B-{text} with different windows', fontsize=14)
    plt.ylabel(f'B-{text}')
    plt.xlabel('Metric')
    plt.gca().set_ylim(-0.1, 1.1)
    plt.tight_layout()
    plt.show()
    # plt.savefig(f'B_violin_{prf}.pdf', bbox_inches='tight')
    # plt.close()

In [None]:
## I want to see scattering between b and b-x
sns.jointplot(data=plotting_df[plotting_df.prf == 'f'], x="b_0.5", y="b-x_0.5")

## Now it's time to see what T-measure does on the same set and compare

In [None]:
import pandas as pd
slm_ref_bmeasure = pd.read_feather('bmeasure_slm_inter_anno_results.feather')

In [None]:
from pqdm.processes import pqdm
from bnl.exp import mir_eval_between_slm_refs
import bnl
import pandas as pd

slm_ds = bnl.Dataset(manifest_path="~/data/salami/metadata.csv")
output_series = pqdm(slm_ds, mir_eval_between_slm_refs, n_jobs=6)
valid_s = [x for x in output_series if type(x) is pd.Series]
mir_eval_results = pd.DataFrame(valid_s).set_index('track_id')
mir_eval_results.to_feather("mir_eval_slm_inter_anno_results.feather")


In [None]:
print(mir_eval_results.head())
print(slm_ref_bmeasure.head())

In [None]:
slm_ref_bmeasure_wide = (
    slm_ref_bmeasure[slm_ref_bmeasure.prf == 'f']
    .reset_index(drop=True).drop(columns=['prf'])
    .pivot_table(
        index='track_id',
        columns=['metric', 'window'],
        values='score'
    )
)
slm_ref_bmeasure_wide

In [None]:
# slm_ref_bmeasure_wide.columns
# new_columns = ['_'.join(map(str, col)).strip() for col in slm_ref_bmeasure_wide.columns.values]
# slm_ref_bmeasure_wide.columns = new_columns
# slm_ref_bmeasure_wide["t_full"] = mir_eval_results["T-Measure full"]
# slm_ref_bmeasure_wide['t_red'] = mir_eval_results['T-Measure reduced']
# slm_ref_bmeasure_wide.to_feather('./slm_inter_ref_scores_wide.feather')

## Let's load the dataframe and make some plots

In [None]:
slm_inter_ref_scores = pd.read_feather('./slm_inter_ref_scores_wide.feather')

In [None]:
slm_inter_ref_scores.drop(columns=['b_1.5', 'hr_1.5', 'poa_1.5', 'poa-x_1.5', 'b-x_1.5'], inplace=True)

In [None]:
import seaborn as sns

def joint_scores(df, x_col, y_col):
    g = sns.jointplot(data=df, x=x_col, y=y_col, s=15, alpha=0.4, height=4)
    g.ax_joint.plot([0, 1], [0, 1], "r--")
    g.ax_joint.grid(True)
    g.set_axis_labels(x_col, y_col, fontsize=14)
    return g


In [None]:
import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = (5, 4) 
g = sns.violinplot(data=slm_inter_ref_scores)
plt.xticks(rotation=45)


In [None]:
## Tmeasures are correlated with either other (reduced and full)
g = joint_scores(slm_inter_ref_scores, "t_full", "t_red")

In [None]:
for window in [0.5, 3]:
    g = joint_scores(slm_inter_ref_scores, f'hr_{window:.1f}', 't_red')
    g.set_axis_labels(f"Hit Rate (window={window})", "Reduced T-Measure", fontsize=12);

In [None]:
for window in [0.5, 3]:
    g = joint_scores(slm_inter_ref_scores, f'b-x_{window:.1f}', 't_red')
    g.set_axis_labels(f"Expanded B-Measure (window={window})", "Reduced T-Measure", fontsize=12);

In [None]:
for window in [0.5, 3]:
    g = joint_scores(slm_inter_ref_scores, f'hr_{window:.1f}', 't_full')
    g.set_axis_labels(f"Expanded B-Measure (window={window})", "Full T-Measure", fontsize=12);

In [None]:
for window in [0.5, 1.5, 3]:
    g = joint_scores(slm_inter_ref_scores, f'b-x_{window:.1f}', 't_red')
    g.set_axis_labels(f"Expanded B-Measure (window={window})", "Reduced T-Measure", fontsize=12);

In [None]:
for window in [0.5, 3]:
    g = joint_scores(slm_inter_ref_scores, f'b-x_{window:.1f}', 't_full')
    g.set_axis_labels(f"Expanded B-Measure (window={window})", "Full T-Measure", fontsize=12);

In [None]:
for window in [0.5, 3]:
    g = joint_scores(slm_inter_ref_scores, f'hr_{window:.1f}', 't_full')
    g.set_axis_labels(f"Hit Rate (window={window})", "Full T-Measure", fontsize=12);

In [None]:
for window in [0.5, 3]:
    g = joint_scores(slm_inter_ref_scores, f'b_{window:.1f}', 't_full')
    g.set_axis_labels(f"B-Measure (window={window})", "Full T-Measure", fontsize=12);

In [None]:
## Hit rate and po_exp are still measuring different things.
g = joint_scores(slm_inter_ref_scores, 'hr_3.0', 'po_exp_3.0')
g.set_axis_labels("Boundary Hit-Rate (window=3.0)", "Expanded PO-measure (window=3.0)", fontsize=12)

In [None]:
## Hit rate and Reduced T are still correlated
g = joint_scores(slm_inter_ref_scores, 'hr_3.0', 't_red')
g.set_axis_labels("Boundary Hit-Rate (window=3.0)", "Reduced T-measure", fontsize=12)

In [None]:
## Hit rate and Reduced T are still correlated
g = joint_scores(slm_inter_ref_scores, 'hr_0.5', 't_red')
g.set_axis_labels("Boundary Hit-Rate (window=0.5)", "Reduced T-measure", fontsize=12)

## Preparing to run some experiments

In [None]:
%load_ext autoreload
%autoreload 2

from pqdm.processes import pqdm
from bnl.exp import bmeasure_mono_casting_effects as bmce
from bnl.exp import tmeasure_mono_casting_effects as tmce
import bnl

import pandas as pd

slm_ds = bnl.data.Dataset(manifest_path="~/data/salami/metadata.csv")

In [None]:
out = bmce(slm_ds[2])
out.where((out.prf=='f') & (out.window=="0.5")).dropna().reset_index(drop=True)

In [None]:
tmce(slm_ds[2])

## Run mono casting pipeline effects on B-measure over all SLM tracks.

In [None]:
from pqdm.processes import pqdm
from bnl.exp import bmeasure_mono_casting_effects as bmce
import bnl
import pandas as pd
slm_ds = bnl.data.Dataset(manifest_path="~/data/salami/metadata.csv")

In [None]:
bmce(slm_ds[2], overwrite=True)

In [None]:
list_of_dfs = pqdm(slm_ds, bmce, n_jobs=8)
valid_dfs = [df for df in list_of_dfs if type(df) is pd.DataFrame]
print("total valid tracks:", len(valid_dfs))
all_results_df = pd.concat(valid_dfs, ignore_index=True)
all_results_df.to_feather("monocasting_bmeasure.feather")
all_results_df

In [None]:
print((all_results_df['b'] == 0).sum())
print((all_results_df['b'] == 0).sum() / len(all_results_df))

In [None]:
## Look at num_est_bs between bdry_cleaning strategies

num_bdry_df = all_results_df[all_results_df.mono_casting == "prob"].pivot_table(
    index=["track_id"],
    columns="bdry_cleaning",
    values="num_est_bs",
).drop_duplicates()

In [None]:
import seaborn as sns

g = sns.jointplot(num_bdry_df, x='absorb', y='kde')
g.ax_joint.set(aspect='equal')
g.ax_joint.plot([0, 160], [0,160], 'r--')