In [None]:
import pandas as pd 
import wandb
import tqdm
import os
api = wandb.Api()

# Project is specified by <entity/project-name>
runs = api.runs(os.environ['WANDB_ENTITY'] + "/" + os.environ['WANDB_RUN_PROJECT'])

rows = []
for run in tqdm.tqdm(runs): 
    # .summary contains the output keys/values for metrics like accuracy.
    #  We call ._json_dict to omit large files 
    summary = run.summary._json_dict

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config = {k: v for k,v in run.config.items()
            if not k.startswith('_')}

    maybe_sweep_id = {"sweep_id": run.sweep.id} if run.sweep else {}

    rows.append({
        "name": run.name,
        **config,
        **summary,
        **maybe_sweep_id
    })

runs_df = pd.DataFrame.from_records(rows)

100%|██████████| 9161/9161 [01:31<00:00, 99.65it/s] 


In [50]:
cropped = runs_df[runs_df['dataset'] == 'cub200_cropped']

cropped = cropped[~cropped['best_model'].isna()]
cropped_cos = cropped[cropped['activation_function'] == 'cosine'].copy()
cropped_l2 = cropped[cropped['activation_function'] == 'l2'].copy()

cropped.groupby(['activation_function', 'backbone']).count()[['name']]

Unnamed: 0_level_0,Unnamed: 1_level_0,name
activation_function,backbone,Unnamed: 2_level_1
cosine,densenet161,130
cosine,resnet50,180
cosine,resnet50[pretraining=inaturalist],222
cosine,vgg19,163
l2,densenet161,103
l2,resnet50,158
l2,resnet50[pretraining=inaturalist],187
l2,vgg19,151


In [None]:
def merge_one_to_one(df1, df2, suffixes=("_x", "_y"), acc_col='best[prototypes_embedded]/eval/accuracy', backbone_col='backbone'):
    # Create working copies
    left = df1.copy().sort_values(acc_col, ascending=False)
    right = df2.copy().sort_values(acc_col, ascending=False)
    
    matched_pairs = []
    
    # For each row in left dataframe
    for _, left_row in left.iterrows():
        # Find all potential matches in right with same backbone
        potential_matches = right[right[backbone_col] == left_row[backbone_col]]
        
        if len(potential_matches) > 0:
            # Calculate time differences
            time_diffs = abs(potential_matches[acc_col] - left_row[acc_col])
            
            # Find the closest unused match
            best_match_idx = time_diffs.idxmin()
            
            # Add to matched pairs
            matched_pairs.append({
                **{f'{k}{suffixes[0]}': v for k, v in left_row.to_dict().items() if k != backbone_col},
                **{f'{k}{suffixes[1]}': v for k, v in potential_matches.loc[best_match_idx].to_dict().items() if k != backbone_col},
                **{backbone_col: left_row[backbone_col]}
            })
            
            # Remove the used match from right
            right = right.drop(best_match_idx)
    
    return pd.DataFrame(matched_pairs)

cropped_coxXl2_df = merge_one_to_one(cropped_cos, cropped_l2, suffixes=('_cos', '_l2'), backbone_col='backbone')
cropped_coxXl2_df = cropped_coxXl2_df.sort_values('best[prototypes_embedded]/eval/accuracy_cos', ascending=False)
cropped_coxXl2_df[['best[prototypes_embedded]/eval/accuracy_cos', 'best[prototypes_embedded]/eval/accuracy_l2']]

In [None]:
selected_df = cropped_coxXl2_df.sort_values('best[prototypes_embedded]/eval/accuracy_l2', ascending=False).copy()
selected_df = selected_df[selected_df['backbone'] != 'resnet50']
selected_df

In [69]:
selected_df.groupby('backbone')[['name_cos', 'name_l2']].nunique()

Unnamed: 0_level_0,name_cos,name_l2
backbone,Unnamed: 1_level_1,Unnamed: 2_level_1
densenet161,103,103
resnet50[pretraining=inaturalist],187,187
vgg19,151,151


In [77]:
new_df = pd.DataFrame()
for g, df in selected_df.groupby('backbone'):
    sample_df = df.sort_values('best[prototypes_embedded]/eval/accuracy_cos', ascending=False).head(40)
    new_df = pd.concat([new_df, sample_df])

new_df.groupby('backbone')[['name_cos', 'name_l2']].nunique()

Unnamed: 0_level_0,name_cos,name_l2
backbone,Unnamed: 1_level_1,Unnamed: 2_level_1
densenet161,40,40
resnet50[pretraining=inaturalist],40,40
vgg19,40,40


In [None]:
new_df[['backbone', 'name_cos', 'name_l2', 'best[prototypes_embedded]/eval/accuracy_cos', 'best[prototypes_embedded]/eval/accuracy_l2']]

In [76]:
new_df.to_csv("user_study/model-pairs.csv")