In [None]:
import pandas as pd
import fiftyone as fo

import seaborn as sns

sns.set_theme(style='darkgrid', palette='mako')

## Label distribution in ROV dataset

In [None]:
def make_df(dataset, splits):
    """Counts how many annotations of each label exist per split.

    Args:
        dataset (fiftyone dataset): Fiftyone dataset.
        splits (list): List of splits in the fiftyone dataset. E.g.: ['train', 'test']

    Returns:
        pandas DataFrame: label, count and split columns.
    """    
    df_list = []
    for i in splits:
        view = dataset.match_tags(i)
        count_dict = view.count_values("ground_truth.detections.label")

        df = pd.DataFrame(count_dict.items(), columns=['label', 'count']).copy()
        df["split"] = i
        df_list.append(df)
    
    return pd.concat(df_list)

In [None]:
rov_dataset = fo.load_dataset("rov")

In [None]:
label_count = make_df(rov_dataset, ["train", "val", "test"])
label_count.head()

# save label counts
# label_count.to_csv("../results/label_count_split.csv", index=False)

In [None]:
# sort by count, more labels first
sorted_df = label_count.sort_values(by='count', ascending=False)

# plot label counts
label_dist = sns.lineplot(data=sorted_df, x="label", y="count", hue="split", palette="mako")
label_dist.set_xticklabels(label_dist.get_xticklabels(), rotation=45, horizontalalignment='right');

## AUV models evaluation

In [None]:
auv_results = pd.read_csv('../artifacts/results/auv_results.csv')

In [None]:
auv_results.columns

In [None]:
sns.lmplot(y='metrics/mAP50(B)', x='auv_train_sample_count', data=auv_results, fit_reg=True);

In [None]:
sns.lmplot(y='metrics/mAP50(B)', x='auv_train_sample_count', data=auv_results, fit_reg=True, order=2, ci=None);

In [None]:
exclude = auv_results[auv_results['auv_train_sample_count'] > 50]
sns.lmplot(y='metrics/mAP50(B)', x='auv_train_sample_count', data=exclude, fit_reg=True);