## Notebook to aggregate the downstream task performances for each model
After running single model evaluation with `../scripts/single_model_evaluation.py` for all models and datasets with can gather the linear probe results and store the aggregated information in a single csv file. 

In [None]:
from itertools import product

import pandas as pd

from sim_consistency.analysis.utils import retrieve_performance
from constants import (
    BASE_PATH_RESULTS,
    ds_list_perf_file,
    exclude_models,
    model_config_file
)
from helper import (
    load_all_datasetnames_n_info,
    load_model_configs_and_allowed_models,
    pp_storing_path
)

#### Global variables

In [None]:
# Datasets
ds_list_perf, ds_info = load_all_datasetnames_n_info(ds_list_perf_file, verbose=True)

# Results of downstream task linear probes 
results_root = BASE_PATH_RESULTS / 'linear_probe/single_model'

storing_path = pp_storing_path(BASE_PATH_RESULTS / f'aggregated/single_model_performance', True)

#### Load config

In [None]:
model_configs, allowed_models = load_model_configs_and_allowed_models(
    path=model_config_file,
    exclude_models=exclude_models,
    exclude_alignment=True,
)

### Aggreggate downstream task performance for all combinations of (ds_list_perf, allowed_models)

In [None]:
res = []
for ds, mid in product(ds_list_perf, allowed_models):
    performance = retrieve_performance(
        model_id=mid,
        dataset_id=ds,
        metric_column='test_lp_acc1',
        results_root=results_root,
        regularization="weight_decay",
        allow_db_results=False
    )
    res.append({
        'DS': ds,
        'Model': mid,
        'TestAcc': performance
    })
perf_res = pd.DataFrame(res)

In [None]:
fn = storing_path / 'all_ds.csv'
print(f"Storing aggrgated results at {fn}")
perf_res.to_csv(fn, index=False)