# Evaluation of STAE Experiments

This notebook connects to MLflow, downloads all experiment runs and creates visualizations.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from collections import defaultdict
from pathlib import Path
from pprint import pprint

import dagshub
import matplotlib.pyplot as plt
import pandas as pd
from mlflow.client import MlflowClient

from model.common import MRL_DRIVER_NAMES_MAPPING
from model.fonts import set_cmu_typewriter_font
from model.latex import get_caption, pivot_table_to_latex, pivotize_drivers
from model.mlflow import download_all_runs, download_predictions, load_predictions
from model.plot import plot_results

In [None]:
REPO_NAME = 'driver-stae'
USER_NAME = 'matejfric'
dagshub.init(REPO_NAME, USER_NAME, mlflow=True)  # type: ignore

pd.set_option('display.max_columns', None)

client = MlflowClient()

plt.style.use('seaborn-v0_8-whitegrid')
font = set_cmu_typewriter_font()
plt.rc('font', size=34)

## Get all experiment runs

In [None]:
experiments = client.search_experiments()
pprint([experiment.name for experiment in experiments])

In [None]:
runs_df = download_all_runs(client=client, experiments=experiments)

# Display the DataFrame
print(f'Total runs: {len(runs_df)}')
runs_df.head(10)

In [None]:
source_type_map = {
    'depth': 'MDE',
    'source_depth': 'Depth',
    'images': 'RGB',
    'rgb': 'RGB',
    'masks': 'Mask',
    'rgbd': 'RGBD',
    'rgbdm': 'RGBDM',
}
source_type_color_map = {
    'Depth': 'tab:red',
    'RGB': 'tab:green',
    'Mask': 'tab:blue',
    'MDE': 'tab:orange',
    'RGBD': 'tab:purple',
    'RGBDM': 'tab:cyan',
}
source_type_linestyle_map = {
    'MDE': '-',
    'Mask': '--',
    'Depth': '-.',
    'RGB': '-',
    'RGBD': '--',
    'RGBDM': '-.',
}
driver_name_mapping = {
    **MRL_DRIVER_NAMES_MAPPING,
    'all': 'All',
    'mean': 'Mean',
    **{str(i): str(i) for i in range(1, 6)},
    **{i: str(i) for i in range(1, 6)},
}

OUTPUT_DIR = Path('outputs')
OUTPUT_DIR.mkdir(exist_ok=True)
(OUTPUT_DIR / 'mrl').mkdir(exist_ok=True)
(OUTPUT_DIR / 'dmd').mkdir(exist_ok=True)

## Filtering

In [None]:
integer_columns = ['image_size', 'batch_size', 'early_stopping', 'max_epochs']
runs_df[integer_columns] = runs_df[integer_columns].astype(int)
runs_df['lambda_regularization'] = runs_df['lambda_regularization'].astype(float)

### MRL

In [None]:
df_selection = runs_df[
    (runs_df['image_size'] == 64)
    & (runs_df['max_epochs'] == 100)
    & (runs_df['tag.Dataset'] != 'dmd')
]

In [None]:
idx = df_selection.groupby(['driver', 'source_type', 'image_size'])[
    'metric.roc_auc'
].idxmax()
df_mrl_stae = df_selection.loc[idx]
df_mrl_stae[
    [
        'driver',
        'source_type',
        'metric.roc_auc',
        'metric.pr_auc',
        'early_stopping',
        'patience',
        'best_metric',
    ]
]

In [None]:
df_mrl_stae_pivot = pivotize_drivers(
    df_mrl_stae,
    source_type_map=source_type_map,
    driver_name_mapping=driver_name_mapping,
)
df_mrl_stae_pivot = df_mrl_stae_pivot[
    [
        'Image Type',
        'Driver 1',
        'Driver 2',
        'Driver 4',
        'Driver 5',
        'Driver 6',
        'Mean',
        'All',
    ]
]
df_mrl_stae_pivot.head()

In [None]:
pivot_table_to_latex(
    df_mrl_stae_pivot.drop(columns=['All']),
    path=OUTPUT_DIR / 'mrl' / 'stae_mrl_pivot.tex',
    caption=get_caption('STAE', 'MRL'),
    label='tab:stae-mrl-pivot',
)

### DMD

In [None]:
df_selection = runs_df[
    (runs_df['image_size'] == 64)
    & (runs_df['max_epochs'] == 100)
    & (runs_df['tag.Dataset'] == 'dmd')
    & ((runs_df['lambda_regularization'] - 1e-5).abs() > 1e-6)  # != 1e-5
    & (runs_df['source_type'] != 'source_depth')
]
# source_type 'source_depth' is invalid and replaced with 'source_depth_ir_masked'
df_selection.loc[
    df_selection['source_type'] == 'source_depth_ir_masked', 'source_type'
] = 'source_depth'

In [None]:
idx = df_selection.groupby(
    ['driver', 'source_type', 'image_size', 'lambda_regularization']
)['metric.roc_auc'].idxmax()
df_dmd_stae = df_selection.loc[idx]
df_dmd_stae[
    [
        'driver',
        'source_type',
        'metric.roc_auc',
        'metric.pr_auc',
        'early_stopping',
        'patience',
        'best_metric',
        'lambda_regularization',
    ]
]

In [None]:
df_dmd_stae.loc[df_dmd_stae['lambda_regularization'] == 1e-6, 'source_type'] = (
    'Mask (λ=1e-6)'
)

In [None]:
df_dmd_stae_pivot = pivotize_drivers(
    df_dmd_stae,
    source_type_map=source_type_map,
    driver_name_mapping=driver_name_mapping,
)
df_dmd_stae_pivot.head()

In [None]:
pivot_table_to_latex(
    df_dmd_stae_pivot.drop(columns=['All']),
    path=OUTPUT_DIR / 'dmd' / 'stae_dmd_pivot.tex',
    caption=get_caption('STAE', 'DMD'),
    label='tab:stae-dmd-pivot',
)

## Download predictions

In [None]:
df_mrl_stae = download_predictions(client=client, df=df_mrl_stae)
data_mrl_stae = load_predictions(df_mrl_stae, source_type_map=source_type_map)

In [None]:
df_dmd_stae = download_predictions(client=client, df=df_dmd_stae)
data_dmd_stae = load_predictions(
    df_dmd_stae, source_type_map=source_type_map | {'Mask (λ=1e-6)': 'Maskλ'}
)

## Visualizations

In [None]:
plt.rc('font', size=17)

plot_kwargs = dict(
    source_type_color_map=source_type_color_map,
    source_type_linestyle_map=source_type_linestyle_map,
    driver_name_mapping=driver_name_mapping,
    fig_height_multiplier=5,
    fig_width_multiplier=3.6,
    n_rows=1,
    linewidth=2,
    legend_outside=True,
)

In [None]:
# Re-order
ordering = ['geordi', 'jakub', 'michal', 'poli', 'dans']
data_mrl_stae = {k: data_mrl_stae[k] for k in ordering}

In [None]:
plot_results(
    'roc', data_mrl_stae, save_path=OUTPUT_DIR / 'mrl' / 'roc_auc.pdf', **plot_kwargs
)

In [None]:
plot_results(
    'pr', data_mrl_stae, save_path=OUTPUT_DIR / 'mrl' / 'pr_auc.pdf', **plot_kwargs
)

In [None]:
plot_kwargs = dict(
    source_type_color_map=source_type_color_map | {'Maskλ': 'tab:green'},
    source_type_linestyle_map=source_type_linestyle_map | {'Maskλ': '--'},
    driver_name_mapping=driver_name_mapping,
    fig_height_multiplier=5,
    fig_width_multiplier=3.6,
    n_rows=1,
    linewidth=2,
    legend_outside=True,
)

In [None]:
plot_results(
    'roc', data_dmd_stae, save_path=OUTPUT_DIR / 'dmd' / 'roc_auc.pdf', **plot_kwargs
)

In [None]:
plot_results(
    'pr', data_dmd_stae, save_path=OUTPUT_DIR / 'dmd' / 'pr_auc.pdf', **plot_kwargs
)

## Recalculate MSE and MAE metrics for MRL

In [None]:
import copy

from model.eval import compute_best_roc_auc

redata = defaultdict(dict)
data = copy.deepcopy(data_mrl_stae)

# iqr = (0.00, 0.95)
iqr = (0.00, 1.00)

drivers = list(data.keys())
source_types = list(data[list(data.keys())[0]].keys())
pprint(source_types)
pprint(drivers)

for driver in drivers:
    for source_type in source_types:
        x = copy.deepcopy(data[driver][source_type])
        res = compute_best_roc_auc(
            x['y_true'],
            x['errors'],
            iqr=iqr,
            metric='mae',
        )
        redata[driver][source_type] = x
        redata[driver][source_type].update(res)
        y = redata[driver][source_type]

In [None]:
plot_results(
    'roc', redata, save_path=OUTPUT_DIR / 'mrl' / 'roc_auc_mae.pdf', **plot_kwargs
)

In [None]:
plot_results(
    'pr', redata, save_path=OUTPUT_DIR / 'mrl' / 'pr_auc_mae.pdf', **plot_kwargs
)

In [None]:
for driver in drivers:
    for source_type in source_types:
        x = copy.deepcopy(data[driver][source_type])
        res = compute_best_roc_auc(
            x['y_true'],
            x['errors'],
            iqr=iqr,
            metric='mse',
        )
        redata[driver][source_type] = x
        redata[driver][source_type].update(res)
        y = redata[driver][source_type]

In [None]:
plot_results(
    'roc', redata, save_path=OUTPUT_DIR / 'mrl' / 'roc_auc_mse.pdf', **plot_kwargs
)

In [None]:
plot_results(
    'pr', redata, save_path=OUTPUT_DIR / 'mrl' / 'pr_auc_mse.pdf', **plot_kwargs
)