In [None]:
import json
from pathlib import Path
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

In [None]:
fewshot_folders = ['no_fewshot', 'fewshot_1', 'fewshot_10', 'fewshot_100']
feat_combiner = ['feat_comb_concat', 'feat_comb_concat_pca']

In [None]:
base_path_experiment= Path("/home/space/diverse_priors/results")
single_model_exps = base_path_experiment / 'single_models' 
combined_model_exps = base_path_experiment / 'combined_models' 

In [None]:
def load_data(exp_path):
    rows = []
    fieldnames = set()
    for res_file in exp_path.rglob('**/*.json'):        
        def process_file(path: str):
            data = json.load(open(path))
            row = {}
            try:
                row.update(data["metrics"])
            except TypeError as e:
                print('No metric values available for file ', res_file)   
            row.update(data)
            del row["metrics"]
            row['model_fullname'] = "__".join(row['model_ids'])
            row['mode'] = 'single_feature' if 'single_models' in str(path) else 'combined_features'
            row['combiner'] = 'no' if 'single_models' in str(path) else ('Concat + PCA' if 'feat_comb_concat_pca' in str(path) else 'Concat' ) 
            for field in row.keys():
                fieldnames.add(field)
            rows.append(row)
        process_file(res_file)
    return rows, fieldnames


In [None]:
all_rows = []
all_fieldnames = set()

for fewshot in fewshot_folders:

    # single feature experiments 
    exp_path = single_model_exps / fewshot

    rows, fieldnames = load_data(exp_path)
    
    all_rows += rows
    all_fieldnames.update(fieldnames)

    # combined features experiments 
    for feat_comb in feat_combiner:
        exp_path = combined_model_exps / fewshot / feat_comb

        rows, fieldnames = load_data(exp_path)
    
        all_rows += rows
        all_fieldnames.update(fieldnames)

In [None]:
df = pd.DataFrame(all_rows)

In [None]:
df

In [None]:
## remove vit_b_16 model 
df = df[df['model_fullname']!='vit_b_16'].copy()

In [None]:
name_mapping={
    'dinov2-vit-large-p14': 'DINOv2-VIT-L14',
    'dino-vit-base-p16': 'DINOv1-VIT-B16',
    'DreamSim_open_clip_vitb32': 'DreamSim-CLIP-VIT-B16',
    'OpenCLIP_ViT-L-14_laion400m_e32': 'CLIP-VIT-L14',
}
def rename_string(x):
    for key, val in name_mapping.items():
        x = x.replace(key, val)
    return x

df['model_renamed'] = df['model_fullname'].apply(rename_string)
df['model_renamed'] = df['model_renamed'].str.replace('__', ' & ')
df.loc[df['mode']=='combined_features', 'model_renamed'] = df.loc[df['mode']=='combined_features', ['model_renamed', 'combiner']].apply(
    lambda x: f"{x['model_renamed']} ({x['combiner']})",
    axis=1
)

In [None]:
df['model_renamed'].value_counts()

In [None]:
grouping_cols = ['fewshot_k', 'dataset', 'model_renamed']
metrics_col = ['lp_acc1', 'lp_acc5', 'lp_mean_per_class_recall']

In [None]:
grouped_df = df.groupby(grouping_cols)[metrics_col].agg(['mean', 'std']).sort_index().reset_index()

In [None]:
grouped_df.columns

In [None]:
order = [ 
'DINOv2-VIT-L14',
'DINOv1-VIT-B16', 
'CLIP-VIT-L14',
'DreamSim-CLIP-VIT-B16',

'DINOv2-VIT-L14 & DINOv1-VIT-B16 (Concat)',
'DINOv2-VIT-L14 & CLIP-VIT-L14 (Concat)',
'DINOv2-VIT-L14 & DreamSim-CLIP-VIT-B16 (Concat)',
'DINOv1-VIT-B16 & CLIP-VIT-L14 (Concat)',
'DINOv1-VIT-B16 & DreamSim-CLIP-VIT-B16 (Concat)',
'CLIP-VIT-L14 & DreamSim-CLIP-VIT-B16 (Concat)',

'DINOv2-VIT-L14 & DINOv1-VIT-B16 & CLIP-VIT-L14 (Concat)',
'DINOv2-VIT-L14 & DINOv1-VIT-B16 & DreamSim-CLIP-VIT-B16 (Concat)',
'DINOv2-VIT-L14 & CLIP-VIT-L14 & DreamSim-CLIP-VIT-B16 (Concat)',
'DINOv1-VIT-B16 & CLIP-VIT-L14 & DreamSim-CLIP-VIT-B16 (Concat)',
    
'DINOv2-VIT-L14 & DINOv1-VIT-B16 & CLIP-VIT-L14 & DreamSim-CLIP-VIT-B16 (Concat)',

'DINOv2-VIT-L14 & DINOv1-VIT-B16 (Concat + PCA)',
'DINOv2-VIT-L14 & CLIP-VIT-L14 (Concat + PCA)',
'DINOv2-VIT-L14 & DreamSim-CLIP-VIT-B16 (Concat + PCA)',
'DINOv1-VIT-B16 & CLIP-VIT-L14 (Concat + PCA)',
'DINOv1-VIT-B16 & DreamSim-CLIP-VIT-B16 (Concat + PCA)', 
'CLIP-VIT-L14 & DreamSim-CLIP-VIT-B16 (Concat + PCA)',


'DINOv2-VIT-L14 & DINOv1-VIT-B16 & CLIP-VIT-L14 (Concat + PCA)',
'DINOv2-VIT-L14 & DINOv1-VIT-B16 & DreamSim-CLIP-VIT-B16 (Concat + PCA)',
'DINOv2-VIT-L14 & CLIP-VIT-L14 & DreamSim-CLIP-VIT-B16 (Concat + PCA)',
'DINOv1-VIT-B16 & CLIP-VIT-L14 & DreamSim-CLIP-VIT-B16 (Concat + PCA)',  
   
'DINOv2-VIT-L14 & DINOv1-VIT-B16 & CLIP-VIT-L14 & DreamSim-CLIP-VIT-B16 (Concat + PCA)',
   
]

In [None]:
grouped_df = grouped_df[grouped_df.dataset !='wds/vtab/pcam'].copy()

In [None]:
with PdfPages('results.pdf') as pdf:
    for fewshot_mode, curr_data in grouped_df.groupby('fewshot_k'):
        fig, axs = plt.subplots(nrows=3, ncols=1, figsize=(20, 30), sharex=True)
        # cbar_ax = fig.add_axes([0.95, 0.15, 0.05, 0.7])
        
        for i, metric_col in enumerate(['lp_acc1', 'lp_acc5', 'lp_mean_per_class_recall']):
            metric_table = pd.pivot(curr_data,
                                    columns='model_renamed',
                                    index='dataset',
                                    values=(metric_col, 'mean'))
            metric_table = metric_table.loc[:,order]
            sns.heatmap(
                metric_table,
                annot=True,
                fmt='.3f',
                cmap='coolwarm',
                # vmin=0.3,
                # vmax=1,
                ax=axs[i],
                # cbar=i == 0,  # Only show color bar for the first subplot
                # cbar_ax=None if i else cbar_ax  # Pass cbar_ax only for the first subplot
                )
            axs[i].set_title(f"Metric {metric_col} with fewshot {'no' if fewshot_mode==-1 else fewshot_mode}")
        plt.tight_layout()
        pdf.savefig(fig, bbox_inches='tight')
        plt.show(fig)

In [None]:
with PdfPages('results_boxplots.pdf') as pdf:
    for fewshot_mode, curr_data in grouped_df.groupby('fewshot_k'):
        fig, axs = plt.subplots(nrows=3, ncols=1, figsize=(10,20), sharex=True)
        # cbar_ax = fig.add_axes([0.95, 0.15, 0.05, 0.7])
        
        for i, metric_col in enumerate(['lp_acc1', 'lp_acc5', 'lp_mean_per_class_recall']):
            sns.boxplot(
                curr_data,
                y='model_renamed',
                x=(metric_col, 'mean'),
                ax=axs[i],
                order=order
            )
            axs[i].set_title(f"Metric {metric_col} with fewshot {'no' if fewshot_mode==-1 else fewshot_mode}")
        pdf.savefig(fig, bbox_inches='tight')
        fig.show()

In [None]:
tmp = grouped_df.groupby(['fewshot_k', 'model_renamed'])[[('lp_acc1', 'mean'), ('lp_acc5', 'mean'), ('lp_mean_per_class_recall', 'mean')]].median()
tmp

In [None]:
with PdfPages('results_median_ds.pdf') as pdf:
    for fewshot_mode, curr_data in tmp.groupby('fewshot_k'):
        curr_data = curr_data.droplevel(0)
        plt.figure(figsize=(10, 10))
        g = sns.heatmap(curr_data.loc[order,:], annot=True, fmt='.3f', cmap='coolwarm')
        title = "No fewshot" if fewshot_mode==-1 else f"Fewshot {fewshot_mode}"
        g.set_title(title)
        pdf.savefig(bbox_inches='tight')
        plt.show()