## Notebook appendix F: *Validating training objective effects through controlled anchor model analysis*

This notebook creates the correlation distribution boxplot for appendix F, when model sets art restricted to the anchor models. In especially, we compute the correlation of representational similarities measured on all paris of datasets, where we only consider representational similarities of model pairs that contain an anchor model and models from different training objective. In the notation of the main paper, we consider the anchor models :OpenCLIP RN50, OpenCLIP ViT-L, SimCLR RN50, DINOv2 ViT-L, ResNet-50, and ViT-L. Each anchor model, is contained in a single-element model set, e.g., $\Phi_{\text{OpenCLIP RN50}}$. Then we compute the distributions e.g.,  $\mathbf{R}(\Phi_{\text{OpenCLIP RN50,}}, \Phi_{SSL})$.

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
# import starbars
from scipy.stats import ranksums
# from statsmodels.stats.multitest import multipletests

from constants import (
    BASE_PATH_RESULTS,
    anchor_name_mapping,
    cat_color_mapping,
    cat_name_mapping,
    ds_list_sim_file,
    exclude_models,
    exclude_models_w_mae,
    fontsizes,
    fontsizes_cols,
    model_config_file,
    model_size_order,
    sim_metric_name_mapping
)
from helper import (
    load_all_datasetnames_n_info,
    load_model_configs_and_allowed_models,
    pp_storing_path,
    save_or_show
)

#### Global variables

In [2]:
# Data loading
ds_list, ds_info = load_all_datasetnames_n_info(ds_list_sim_file, verbose=False)

# Define similarity metrics
sim_metrics = [
    # 'cka_kernel_rbf_unbiased_sigma_0.4',
    'cka_kernel_rbf_unbiased_sigma_0.2',
    'cka_kernel_linear_unbiased',
]
sim_metrics_mapped = [sim_metric_name_mapping[k] for k in sim_metrics]

# Define available aggregated data
available_data = [
    'agg_pearsonr_all_ds.csv',
    'agg_pearsonr_all_ds_with_rsa.csv',
    'agg_spearmanr_all_ds.csv',
    'agg_spearmanr_all_ds_with_rsa.csv',
    'agg_pearsonr_all_ds_rbf02_n_linear.csv',
    'agg_pearsonr_all_ds_wo_mae_rbf02_n_linear.csv'
]

## Define aggregated data path
curr_data = available_data[-2]
print(curr_data)
agg_data_path = BASE_PATH_RESULTS / f'aggregated/r_coeff_dist/with_anchor_models/{curr_data}'
assert agg_data_path.exists(), f'Path does not exist: {agg_data_path}. Aggregated correlation coefficients across all dataset pairs not found, please run aggregate_consistencies_for_model_set_pairs.ipynb first.'

## Version and plotting info
version = 'arxiv'
curr_fontsizes = fontsizes if version == 'arxiv' else fontsizes_cols

SAVE = True
folder = f'{curr_data.split(".")[0]}'
storing_path = pp_storing_path(
    # BASE_PATH_RESULTS / 'plots' / 'final' / version / 'app_F_distr_corr_anchor_models' / folder, 
    BASE_PATH_RESULTS / 'plots' / 'experiment_anchor_models_rbf02' / folder, 
    SAVE)
storing_path

agg_pearsonr_all_ds_rbf02_n_linear.csv



PosixPath('/home/space/diverse_priors/results_rebuttal/plots/experiment_anchor_models_rbf02/agg_pearsonr_all_ds_rbf02_n_linear')

In [3]:
# Define anchor models
anchors = [
    'OpenCLIP_RN50_openai',
    'simclr-rn50',
    'resnet50',
    'OpenCLIP_ViT-L-14_openai',
    'dinov2-vit-large-p14',
    'vit_large_patch16_224',
]

anchor_nm_val_list = list(anchor_name_mapping.values())
anchor_nm_val_list_v2 = [anchor_name_mapping[mid] for mid in anchors]

# Define columns
anchor_col = 'Anchor Model'
sim_metric_col = 'Similarity metric'
comp_cat_col = 'Comparison category'
comp_cat_orig_col = 'Comparison category (orig. name)'
comp_val_col = 'Comparison values'
r_col = 'r coeff'

#### Load model configurations and allowed models

In [4]:
curr_excl_models = exclude_models_w_mae if 'mae' in curr_data else exclude_models

model_configs, allowed_models = load_model_configs_and_allowed_models(
    path=model_config_file,
    exclude_models=curr_excl_models,
    exclude_alignment=True,
)

Nr. models original=64


#### Load aggregated data

In [5]:
r_df = pd.read_csv(agg_data_path)
r_df = r_df[r_df[anchor_col].isin(anchors)].copy().reset_index(drop=True)
r_df[anchor_col] = r_df[anchor_col].map(anchor_name_mapping)
r_df = r_df[r_df[sim_metric_col].isin(sim_metrics_mapped)]
r_df = r_df[r_df['DS 1'].isin(ds_list) & r_df['DS 2'].isin(ds_list)].copy().reset_index(drop=True)

In [6]:
r_df[comp_val_col] = r_df[comp_val_col].map(cat_name_mapping)

#### Helper functions

In [7]:
def get_pairs(df, strata_col):
    tuple_list = df[[strata_col, sim_metric_col]].value_counts().sort_index().index.tolist()
    tuple_list = [(v1, v2) for v1, v2 in tuple_list if v1 != 'All']
    pairs = [(a, b) for i, a in enumerate(tuple_list) for j, b in enumerate(tuple_list) if i < j and a[1] == b[1]]
    return pairs


def get_config_data(df, config, strata_col):
    return df[(df[strata_col] == config[0]) & (df[sim_metric_col] == config[1])]


def get_pvals_pairs(pairs, df, strata_col, alpha=0.05):
    p_values = []
    for (config1, config2) in pairs:
        dat1 = get_config_data(df, config1, strata_col)[r_col].reset_index(drop=True)
        dat2 = get_config_data(df, config2, strata_col)[r_col].reset_index(drop=True)
        idx2drop = list(np.where(dat1.isna())[0]) + list(np.where(dat2.isna())[0])
        dat1.drop(labels=idx2drop, inplace=True)
        dat2.drop(labels=idx2drop, inplace=True)
        assert len(dat1) == len(dat2)
        stat, p_value = ranksums(dat1, dat2)
        p_values.append(p_value)
    return p_values


def add_sign_bars(anchor_pairs, anchor_corr_pvals, axes, strata_col, alpha=0.01):
    import starbars
    total_annots = 0
    for i, anchor in enumerate(anchors):
        curr_pairs = anchor_pairs[anchor]
        curr_corr_pvals = anchor_corr_pvals[anchor]
        annotations = [(config1, config2, pval) for (config1, config2), pval in zip(curr_pairs, curr_corr_pvals) if
                       pval < alpha]
        total_annots += len(annotations)
        starbars.draw_annotation(annotations, ax=axes[i])
        axes[i].get_legend().remove()


def correct_anchor_pvalues(anchor_pairs, anchor_pvals):
    from statsmodels.stats.multitest import multipletests
    anchor_corr_pvals = {}
    all_pvals = list(np.concatenate(list(anchor_pvals.values())))
    # corrected_all_pvals = multipletests(all_pvals, method='hs')[1]
    corrected_all_pvals = multipletests(all_pvals, method='bonferroni')[1]
    # corrected_all_pvals = multipletests(all_pvals, method='fdr_bh')[1]
    idx = 0
    for anchor in anchors:
        nr_pvals = len(anchor_pairs[anchor])
        anchor_corr_pvals[anchor] = corrected_all_pvals[idx:(idx + nr_pvals)]
        idx += nr_pvals
    return anchor_corr_pvals


def plot_box_per_sim_metric(subset, strata_name, strata_col, strata_col_orig, add_significances):
    n_rows = 2
    n_cols = 3

    fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 3.55, n_rows * 2.6), sharey=True, sharex=False)
    axes = axes.flatten()

    handles, labels = [], []
    tab10_cols = list(sns.color_palette("tab10").as_hex())
    comparison_type = list(subset[strata_col].unique())
    anchor_pairs = {}
    anchor_pvals = {}
    for i, anchor in enumerate(anchors):
        anchor_data = subset[subset[anchor_col] == anchor_name_mapping[anchor]]

        ## Compute pairwise distribution statistics
        anchor_pairs[anchor] = get_pairs(anchor_data, strata_col)
        anchor_pvals[anchor] = get_pvals_pairs(anchor_pairs[anchor], anchor_data, strata_col, alpha=0.01)

        sns.boxplot(
            data=anchor_data,
            x=sim_metric_col,
            y=r_col,
            hue=strata_col,
            hue_order=model_size_order if strata_name == 'Model size' else None,
            palette=cat_color_mapping,
            ax=axes[i]
        )
        strata_type = cat_name_mapping[model_configs.loc[anchor][strata_col_orig]]
        axes[i].set_title(f'{anchor_name_mapping[anchor]} ({strata_type})',
                          color=cat_color_mapping[strata_type], fontsize=curr_fontsizes['title'])
        axes[i].set_xlabel("")

        axes[i].legend(loc='best')
        if i == 0:
            handles, labels = axes[i].get_legend_handles_labels()
        if (i) % n_cols == 0:
            y_lbl = "Correlation coefficient" if version == 'arxiv' else "Correlation coeff."
            axes[i].set_ylabel(y_lbl, fontsize=curr_fontsizes['label'])
        else:
            axes[i].set_ylabel("")
        axes[i].tick_params('both', labelsize=curr_fontsizes['ticks'])
        for mid in axes[i].get_xticks()[:-1]:
            axes[i].axvline(mid + 0.5, ls=':', c='black', alpha=0.5, lw=1.5)

    fig.subplots_adjust(hspace=-.2, wspace=-.15)

    if add_significances:
        anchor_corr_pvals = correct_anchor_pvalues(anchor_pairs, anchor_pvals)
        add_sign_bars(anchor_pairs, anchor_corr_pvals, axes, strata_col, alpha=0.01)
    else:
        for ax in axes:
            ax.get_legend().remove()

    ncols = subset[strata_col].nunique()
    fig.legend(handles, labels,
               # bbox_to_anchor=(1.15, 0.60),
               bbox_to_anchor=(0.51, -0.05),
               loc='center',
               borderaxespad=0.,
               fontsize=curr_fontsizes['legend'],
               frameon=False,
               ncols=ncols
               )

    fig.tight_layout()  # Make room for the legend
    return fig

#### Plot boxplot per comparison category

In [8]:
r_df.loc[r_df[comp_cat_col] == 'Dataset size', comp_cat_col] = 'Training data'
add_significances = False
# for include_all in [True, False]:
for include_all in [False]:
    if not include_all:
        subset = r_df[r_df[comp_val_col] != 'All']
    else:
        subset = r_df
    for strata_cat, df in subset.groupby(comp_cat_col):
        strata_cat_orig = df[comp_cat_orig_col].unique()[0]
        fig = plot_box_per_sim_metric(df, strata_cat, comp_val_col, strata_cat_orig, add_significances)
        save_or_show(fig,
                     storing_path / f'dist_r_coeffs_{strata_cat.replace(" ", "_")}_only_box_per_sim_metric{"_include_all" if include_all else ""}.pdf',
                     SAVE)

stored img at /home/space/diverse_priors/results_rebuttal/plots/experiment_anchor_models_rbf02/agg_pearsonr_all_ds_rbf02_n_linear/dist_r_coeffs_Architecture_only_box_per_sim_metric.pdf.
stored img at /home/space/diverse_priors/results_rebuttal/plots/experiment_anchor_models_rbf02/agg_pearsonr_all_ds_rbf02_n_linear/dist_r_coeffs_Model_size_only_box_per_sim_metric.pdf.
stored img at /home/space/diverse_priors/results_rebuttal/plots/experiment_anchor_models_rbf02/agg_pearsonr_all_ds_rbf02_n_linear/dist_r_coeffs_Training_data_only_box_per_sim_metric.pdf.
stored img at /home/space/diverse_priors/results_rebuttal/plots/experiment_anchor_models_rbf02/agg_pearsonr_all_ds_rbf02_n_linear/dist_r_coeffs_Training_objective_only_box_per_sim_metric.pdf.
