In [None]:
import json
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

In [None]:
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

OT_LOGS_DIR = Path("/lustre/groups/eml/projects/sroschmann/ot_logs")

In [None]:
def parse_params(params_path):
    params = {}
    if not params_path.exists():
        return params

    try:
        with open(params_path, "r") as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue

                if ":" in line:
                    key, val = line.split(":", 1)
                elif "=" in line:
                    key, val = line.split("=", 1)
                else:
                    continue

                params[key.strip()] = val.strip()
    except Exception as e:
        print(f"Warning: Could not read {params_path}: {e}")

    return params

In [None]:
def load_and_flatten_metrics(json_path, prefix):
    metrics = {}
    if not json_path.exists():
        return metrics

    try:
        with open(json_path, "r") as f:
            data = json.load(f)
            epoch_best = data.get("epoch_best", {})

            if isinstance(epoch_best, dict):
                for key, value in epoch_best.items():
                    col_name = f"{prefix}_{key}"
                    metrics[col_name] = value
            else:
                raise ValueError("epoch_best is not a dictionary")

    except Exception as e:
        print(f"Warning: Issue parsing {json_path}: {e}")

    return metrics

In [None]:
def build_results_dataframe(base_dir):
    data_rows = []
    print(f"Scanning directory: {base_dir} ...")

    CUTOFF_TIMESTAMP = "2026-01-15_21-36-57"

    for exp_dir in base_dir.iterdir():
        if not exp_dir.is_dir():
            continue

        dir_timestamp = exp_dir.name[:19]
        
        if dir_timestamp < CUTOFF_TIMESTAMP:
            continue

        row = {"experiment_id": exp_dir.name, "full_path": str(exp_dir)}

        params_path = exp_dir / "params.txt"
        params = parse_params(params_path)
        
        if params.get("debugging") == "True":
            continue

        row.update(params)

        coco_path = exp_dir / "results" / "COCO" / "alignment_probing.json"
        coco_metrics = load_and_flatten_metrics(coco_path, prefix="coco")
        row.update(coco_metrics)

        imagenet_path = exp_dir / "results" / "imagenetv1" / "alignment_probing.json"
        imagenet_metrics = load_and_flatten_metrics(imagenet_path, prefix="imagenet")
        row.update(imagenet_metrics)

        data_rows.append(row)

    df = pd.DataFrame(data_rows)
    return df

In [None]:
def extract_model_and_dataset(path_str):
    if not isinstance(path_str, str):
        return None, None
    
    cleaned_str = path_str.replace("[", "").replace("]", "").replace("'", "").replace('"', "").replace(",", " ")
    paths = cleaned_str.split()
    
    models = []
    datasets = []
    
    for p in paths:
        parts = p.strip("/").split("/")
        if len(parts) >= 2:
            models.append(parts[-2]) 
            datasets.append(parts[-1])
            
    if not models:
        return None, None
        
    final_model = models[0]
    final_dataset = " + ".join(datasets)
    
    return final_model, final_dataset

In [None]:

df = build_results_dataframe(OT_LOGS_DIR)

if df.empty:
    print("No experiments found.")
else:
    # Define base columns (metrics + params)
    param_columns = [
        "seed", "linear-type", "width-factor", "logit_scale", "logit_bias",
        "semisupervised", "supervised", "n_supervised_pairs", "batch-size-supervised",
        "n_unsupervised_image", "n_unsupervised_text", "anchor_lam_x",
        "anchor_lam_y", "alpha_semisupervised_sail", "alpha_semisupervised_ot", "alpha_supervised_sail", "structure",
        "epsilon_sinkhorn_shared", "n_iters_sinkhorn_shared",
        "epsilon_sinkhorn_anchor", "n_iters_sinkhorn_anchor",
        "unsupervised_index_mode",
    ]

    key_metrics = [
        "experiment_id", "coco_T2I R@1", "coco_T2I R@5",
        "coco_I2T R@1", "coco_I2T R@5", "imagenet_top1", "imagenet_top5",
    ]

    # Extract Models and Datasets for ALL embedding columns
    all_embedding_cols = [
        "supervised_image_embedding", "supervised_text_embedding",
        "unsupervised_image_embedding", "unsupervised_text_embedding",
        "val_image_embedding", "val_text_embedding"
    ]

    dataset_cols = []

    for col in all_embedding_cols:
        if col in df.columns:
            base_name = col.replace("_embedding", "")
            extracted = df[col].apply(extract_model_and_dataset)
            
            df[f"{base_name}_model"] = extracted.apply(lambda x: x[0])
            df[f"{base_name}"] = extracted.apply(lambda x: x[1])
            dataset_cols.append(base_name)
            
            df.drop(columns=[col], inplace=True)

    # Assert and merge image models
    if "supervised_image_model" in df.columns and "unsupervised_image_model" in df.columns:
        mismatches = df[df["supervised_image_model"] != df["unsupervised_image_model"]]
        if not mismatches.empty:
            print(f"\n[WARNING] Found {len(mismatches)} experiments where supervised_image_model != unsupervised_image_model")
            print(mismatches[["experiment_id", "supervised_image_model", "unsupervised_image_model"]])
        
        df["image_model"] = df["supervised_image_model"]
        
        cols_to_drop = ["supervised_image_model", "unsupervised_image_model"]
        if "val_image_model" in df.columns:
            cols_to_drop.append("val_image_model")
        df.drop(columns=cols_to_drop, inplace=True)

    # Assert and merge text models
    if "supervised_text_model" in df.columns and "unsupervised_text_model" in df.columns:
        mismatches = df[df["supervised_text_model"] != df["unsupervised_text_model"]]
        if not mismatches.empty:
            print(f"\n[WARNING] Found {len(mismatches)} experiments where supervised_text_model != unsupervised_text_model")
            print(mismatches['experiment_id'])
            print(mismatches[["experiment_id", "supervised_text_model", "unsupervised_text_model"]])

        df["text_model"] = df["supervised_text_model"]
        
        cols_to_drop = ["supervised_text_model", "unsupervised_text_model"]
        if "val_text_model" in df.columns:
            cols_to_drop.append("val_text_model")
        df.drop(columns=cols_to_drop, inplace=True)
    
    dataset_cols.sort()
    model_cols = ["image_model", "text_model"]
    
    final_cols = key_metrics + model_cols + dataset_cols + param_columns
    final_cols = [c for c in final_cols if c in df.columns]

    df = df[final_cols]

    # Sort by main metric
    if "coco_T2I R@1" in df.columns:
        df = df.sort_values(by="coco_T2I R@1", ascending=False)

    print(f"\nLoaded {len(df)} experiments.")

### Number of pairs

In [None]:
df_n_supervised_pairs = df[(df["image_model"] == "dinov2-large") & 
   (df["text_model"] == "NV-Embed-v2") & 
   (df["supervised_text"] == "cc3m_raw_caption.h5") & 
   (df["unsupervised_text"] == "cc3m_raw_caption.h5") & 
   (df["supervised_image"] == "cc3m_concat.h5") & 
   (df["unsupervised_image"] == "cc3m_concat.h5") &
   (df["n_unsupervised_image"] == "1000000") & 
   (df["n_unsupervised_text"] == "1000000") &
   (df["semisupervised"]) & 
   (df["alpha_semisupervised_sail"] == "1.0") & 
   (df["alpha_semisupervised_ot"] == "0.0001")]
df_n_supervised_pairs = df_n_supervised_pairs.sort_values(by="coco_T2I R@1", ascending=False)
df_n_supervised_pairs[["coco_T2I R@1", "coco_I2T R@1", "coco_T2I R@5", "coco_I2T R@5", "imagenet_top1", "imagenet_top5", "image_model", "text_model", "n_supervised_pairs"]]

In [None]:
df_cc3m_siglip_supervised = df[(df["image_model"] == "dinov2-large") & 
   (df["text_model"] == "NV-Embed-v2") & 
   (df["supervised_text"] == "cc3m_raw_caption.h5") & 
   (df["supervised_image"] == "cc3m_concat.h5") &
   (df["supervised"]) & 
   (df["alpha_supervised_sail"] == "1.0")]
df_cc3m_siglip_supervised = df_cc3m_siglip_supervised.sort_values(by="coco_T2I R@1", ascending=False)
df_cc3m_siglip_supervised[["experiment_id", "coco_T2I R@1", "coco_I2T R@1", "coco_T2I R@5", "coco_I2T R@5", "imagenet_top1", "imagenet_top5", "image_model", "text_model", "n_supervised_pairs"]]

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

plt.rcParams.update({'font.size': 20})

float_cols = ['coco_T2I R@1', 'coco_I2T R@1', 'coco_T2I R@5', 'coco_I2T R@5', 'imagenet_top1', 'imagenet_top5']
int_cols = ['n_supervised_pairs']

# Convert SOTAlign
for col in float_cols:
    df_n_supervised_pairs[col] = pd.to_numeric(df_n_supervised_pairs[col], errors='coerce')
for col in int_cols:
    df_n_supervised_pairs[col] = pd.to_numeric(df_n_supervised_pairs[col], errors='coerce').astype('Int64')

# Convert Baseline
for col in float_cols:
    df_cc3m_siglip_supervised[col] = pd.to_numeric(df_cc3m_siglip_supervised[col], errors='coerce')
for col in int_cols:
    df_cc3m_siglip_supervised[col] = pd.to_numeric(df_cc3m_siglip_supervised[col], errors='coerce').astype('Int64')


df_sotalign_sorted = df_n_supervised_pairs.sort_values(by='n_supervised_pairs')
df_baseline_sorted = df_cc3m_siglip_supervised.sort_values(by='n_supervised_pairs')

df_sotalign_sorted = df_sotalign_sorted[df_sotalign_sorted['n_supervised_pairs'] <= 50000]
df_baseline_sorted = df_baseline_sorted[df_baseline_sorted['n_supervised_pairs'] <= 50000]

def plot_metric_comparison(metric_col, title, ylabel, filename):
    plt.figure(figsize=(8, 6))

    plt.plot(df_sotalign_sorted['n_supervised_pairs'], df_sotalign_sorted[metric_col],
             marker='o', linestyle='-', color='b', label='SOTAlign (Ours)', linewidth=2)

    plt.plot(df_baseline_sorted['n_supervised_pairs'], df_baseline_sorted[metric_col],
             marker='^', linestyle='--', color='r', label='Supervised SigLIP (SAIL)', linewidth=2)

    plt.xscale('log')
    plt.xlabel('Number of pairs')
    plt.ylabel(ylabel)
    # plt.title(title)
    plt.legend()
    plt.grid(True, which="both", ls="-", alpha=0.5)
    
    plt.tight_layout()
    plt.savefig(filename)
    plt.show()
    plt.close()

plot_metric_comparison(
    'coco_T2I R@1', 
    'COCO Text-to-Image Retrieval R@1', 
    'T2I R@1', 
    'plot_coco_t2i_r1.png'
)

plot_metric_comparison(
    'coco_I2T R@1', 
    'COCO Image-to-Text Retrieval R@1', 
    'I2T R@1', 
    'plot_coco_i2t_r1.png'
)

plot_metric_comparison(
    'imagenet_top1', 
    'ImageNet Top 1 Accuracy', 
    'Top-1 Accuracy', 
    'plot_imagenet_top1.png'
)

### Number of unpaired samples

In [None]:
df_n_unsupervised_samples = df[(df["image_model"] == "dinov2-large") & 
   (df["text_model"] == "NV-Embed-v2") & 
   (df["supervised_text"] == "cc3m_raw_caption.h5") & 
   (df["unsupervised_text"] == "cc3m_raw_caption.h5") & 
   (df["supervised_image"] == "cc3m_concat.h5") & 
   (df["unsupervised_image"] == "cc3m_concat.h5") &
   (df["n_supervised_pairs"] == "10000") & 
   (df["semisupervised"]) & 
   (df["alpha_semisupervised_sail"] == "1.0") & 
   (df["alpha_semisupervised_ot"] == "0.0001")]
df_n_unsupervised_samples = df_n_unsupervised_samples.sort_values(by="coco_T2I R@1", ascending=False)
df_n_unsupervised_samples[["experiment_id", "coco_T2I R@1", "coco_I2T R@1", "coco_T2I R@5", "coco_I2T R@5", "imagenet_top1", "imagenet_top5", "image_model", "text_model", "n_unsupervised_image", "n_unsupervised_text"]]

In [None]:
df

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import io
import numpy as np

# Set global font size
plt.rcParams.update({'font.size': 20})

# ---------------------------------------------------------
# 1. Data Preparation
# ---------------------------------------------------------

# Unsupervised Data
data_unsupervised = """
experiment_id	coco_T2I R@1	coco_I2T R@1	coco_T2I R@5	coco_I2T R@5	imagenet_top1	imagenet_top5	image_model	text_model	n_unsupervised_image	n_unsupervised_text
17	0.22612	0.2928	0.45540	0.5424	40.064	70.828	dinov2-large	NV-Embed-v2	100000	100000
4	0.22260	0.2868	0.44948	0.5232	39.026	68.772	dinov2-large	NV-Embed-v2	1000000	1000000
2	0.19952	0.2562	0.42232	0.4930	34.858	65.064	dinov2-large	NV-Embed-v2	10000	10000
"""
df_unsup = pd.read_csv(io.StringIO(data_unsupervised), sep='\t')

# Baseline Data (0 samples)
baseline_data = {
    "coco_T2I R@1": [0.18924],
    "coco_I2T R@1": [0.2352],
    "imagenet_top1": [32.714],
    "n_unsupervised_image": [0]
}
df_baseline = pd.DataFrame(baseline_data)

# Combine and Sort
# We concatenate them to treat them as a single series
cols_to_use = ["n_unsupervised_image", "coco_T2I R@1", "coco_I2T R@1", "imagenet_top1"]
df_combined = pd.concat([df_baseline[cols_to_use], df_unsup[cols_to_use]], ignore_index=True)
df_combined = df_combined.sort_values(by="n_unsupervised_image")

# ---------------------------------------------------------
# 2. Plotting Logic
# ---------------------------------------------------------

def plot_equal_spacing(metric_col, title, ylabel, filename):
    plt.figure(figsize=(8, 6))
    
    # Create artificial indices: 0, 1, 2, 3...
    x_indices = np.arange(len(df_combined))
    y_values = df_combined[metric_col].values
    
    # Define custom labels
    def format_label(n):
        if n == 0: return "0"
        if n >= 1000000: return f"{int(n/1000000)}M"
        if n >= 1000: return f"{int(n/1000)}k"
        return str(n)
        
    x_labels = [format_label(n) for n in df_combined["n_unsupervised_image"]]

    # 1. Plot the Connecting Line (No label, just visual)
    plt.plot(x_indices, y_values, color='blue', linestyle='-', linewidth=2, zorder=1)
    
    # 2. Plot the Markers (SWAPPED ORDER)
    
    # FIRST: Plot SOTAlign (Blue Circles) so it appears top in legend
    plt.scatter(x_indices[1:], y_values[1:], color='blue', marker='o', s=100, zorder=2, label='SOTAlign (Ours)')

    # SECOND: Plot Baseline (Red Triangle) so it appears bottom in legend
    plt.scatter(x_indices[0], y_values[0], color='red', marker='^', s=150, zorder=2, label='Supervised SigLIP (SAIL)')

    # 3. Configure X-Axis
    plt.xticks(x_indices, x_labels)
    plt.xlabel('Number of unsupervised samples')
    plt.ylabel(ylabel)
    # plt.title(title)
    plt.legend()  # Now renders SOTAlign first
    plt.grid(True, which="both", ls="-", alpha=0.5)
    
    plt.tight_layout()
    plt.savefig(filename)
    plt.show()
    plt.close()

# ---------------------------------------------------------
# 3. Generate Plots
# ---------------------------------------------------------

plot_equal_spacing('coco_T2I R@1', 'COCO Text-to-Image Retrieval R@1', 'T2I R@1', 'equal_coco_t2i.png')
plot_equal_spacing('coco_I2T R@1', 'COCO Image-to-Text Retrieval R@1', 'I2T R@1', 'equal_coco_i2t.png')
plot_equal_spacing('imagenet_top1', 'ImageNet Top 1 Accuracy', 'Top-1 Accuracy', 'equal_imagenet.png')

In [None]:
results = {}

# Iterate over the dataframe rows
for index, row in df_n_unsupervised_samples.iterrows():
    # Use 'n_unsupervised_image' as the anchor key (10k, 100k, 1M)
    # converting to int just in case it's a string or float
    n_samples = int(row["n_unsupervised_image"])
    
    # Build the nested structure
    results[n_samples] = {
        "ours": {
            # Convert fractional COCO scores (e.g., 0.226) to percentages (22.6)
            "t2i_r1": row["coco_T2I R@1"] * 100,
            "i2t_r1": row["coco_I2T R@1"] * 100,
            
            # ImageNet is already in percentage format in your table
            "cls_acc": row["imagenet_top1"]
        }
    }

In [None]:
results

In [None]:
# Use SigLIP semisupervised baseline as reference for 0 unsupervised samples
df_cc3m_siglip_semisupervised = df[(df["image_model"] == "dinov2-large") & 
   (df["text_model"] == "NV-Embed-v2") & 
   (df["supervised_text"] == "cc3m_raw_caption.h5") & 
   (df["unsupervised_text"] == "cc3m_raw_caption.h5") & 
   (df["supervised_image"] == "cc3m_concat.h5") & 
   (df["unsupervised_image"] == "cc3m_concat.h5") &
   (df["n_supervised_pairs"] == "10000") &
   (df["n_unsupervised_image"] == "1000000") & 
   (df["n_unsupervised_text"] == "1000000") &
   (df["semisupervised"]) & 
   (df["alpha_semisupervised_sail"] == "1.0") &
   (df["alpha_semisupervised_ot"] == "0.0")]

df_cc3m_siglip_semisupervised = df_cc3m_siglip_semisupervised.sort_values(by="coco_T2I R@1", ascending=False)
df_cc3m_siglip_semisupervised[["experiment_id", "coco_T2I R@1", "coco_I2T R@1", "coco_T2I R@5", "coco_I2T R@5", "imagenet_top1", "imagenet_top5", "image_model", "text_model", "unsupervised_text", "unsupervised_image"]]

In [None]:
df_cc3m_siglip_semisupervised = df_cc3m_siglip_semisupervised.dropna(subset=["coco_T2I R@1", "imagenet_top1"])

# 2. Add the first valid row to results for 0 unsupervised samples
if not df_cc3m_siglip_semisupervised.empty:
    row_zero = df_cc3m_siglip_semisupervised.iloc[0]
    
    results[0] = {
        "ours": {
            # Converting fractional COCO scores to percentages as before
            # Note: Keeping your mapping of R@5 data to 'r1' keys to match your previous loop
            "t2i_r1": row_zero["coco_T2I R@1"] * 100, 
            "i2t_r1": row_zero["coco_I2T R@1"] * 100,
            
            # ImageNet is already in percentage
            "cls_acc": row_zero["imagenet_top1"]
        }
    }

In [None]:
results

In [None]:
from plotting.plot_unpaired_samples_vs_performance import plot_num_unpaired

In [None]:
p1 = plot_num_unpaired(
    results,
    baseline=None,
    metric_key="t2i_r1",
    title="COCO T2I Retrieval",
    ylabel="R@1",
    filename="t2i_r1_num_unpaired_samples.png",
)
p2 = plot_num_unpaired(
    results,
    baseline=None,
    metric_key="i2t_r1",
    title="COCO I2T Retrieval",
    ylabel="R@1",
    filename="i2t_r1_num_unpaired_samples.png",
)
p3 = plot_num_unpaired(
    results,
    baseline=None,
    metric_key="cls_acc",
    title="ImageNet Classification",
    ylabel="Accuracy (%)",
    filename="cls_acc_num_unpaired_samples.png",
)

### Raw vs synthetic captions

In [None]:
df_cc3m_raw_synthetic = df[(df["image_model"] == "dinov2-large") & 
   (df["text_model"] == "NV-Embed-v2") & 
   ((df["supervised_text"] == "cc3m_raw_caption.h5") | (df["supervised_text"] == "cc3m_shortSV_captions.h5")) & 
   ((df["unsupervised_text"] == "cc3m_raw_caption.h5") | (df["unsupervised_text"] == "cc3m_shortSV_captions.h5")) & 
   (df["supervised_image"] == "cc3m_concat.h5") & 
   (df["unsupervised_image"] == "cc3m_concat.h5") &
   (df["n_supervised_pairs"] == "10000") &
   (df["n_unsupervised_image"] == "1000000") & 
   (df["n_unsupervised_text"] == "1000000") &
   (df["semisupervised"]) & 
   (df["alpha_semisupervised_sail"] == "1.0") & 
   (df["alpha_semisupervised_ot"] == "0.0001")]
df_cc3m_raw_synthetic = df_cc3m_raw_synthetic.sort_values(by="coco_T2I R@1", ascending=False)
# "coco_T2I R@5", "coco_I2T R@5", "imagenet_top5"
df_cc3m_raw_synthetic[["coco_T2I R@1", "coco_I2T R@1", "imagenet_top1", "image_model", "text_model", "supervised_text", "unsupervised_text"]]

### CC3M anchors and CC12M unsupervised

In [None]:
df_cc3m_cc12m = df[(df["image_model"] == "dinov2-large") & 
   (df["text_model"] == "NV-Embed-v2") & 
   (df["supervised_text"] == "cc3m_raw_caption.h5") & 
   ((df["unsupervised_text"] == "cc3m_raw_caption.h5") | (df["unsupervised_text"] == "cc12m_raw_caption.h5")) & 
   (df["supervised_image"] == "cc3m_concat.h5") & 
   ((df["unsupervised_image"] == "cc3m_concat.h5") | (df["unsupervised_image"] == "cc12m_concat.h5")) &
   (df["n_supervised_pairs"] == "10000") &
   (df["n_unsupervised_image"] == "1000000") & 
   (df["n_unsupervised_text"] == "1000000") &
   (df["semisupervised"]) & 
   (df["alpha_semisupervised_sail"] == "1.0") & 
   (df["alpha_semisupervised_ot"] == "0.0001")]
df_cc3m_cc12m = df_cc3m_cc12m.sort_values(by="coco_T2I R@1", ascending=False)
df_cc3m_cc12m[["imagenet_top1", "coco_T2I R@1", "coco_I2T R@1", "image_model", "text_model", "unsupervised_image", "unsupervised_text"]]

### Unsupervised ImageNet images

In [None]:
df_cc3m_imagenet = df[(df["image_model"] == "dinov2-large") & 
   (df["text_model"] == "NV-Embed-v2") & 
   (df["supervised_text"] == "cc3m_raw_caption.h5") & 
   ((df["unsupervised_text"] == "cc3m_raw_caption.h5") | (df["unsupervised_text"] == "cc12m_raw_caption.h5")) & 
   (df["supervised_image"] == "cc3m_concat.h5") & 
   (df["unsupervised_image"] == "imagenet1k_concat.h5") &
   (df["n_supervised_pairs"] == "10000") &
   (df["n_unsupervised_image"] == "1000000") & 
   (df["n_unsupervised_text"] == "1000000") &
   (df["semisupervised"]) & 
   (df["alpha_semisupervised_sail"] == "1.0") & 
   (df["alpha_semisupervised_ot"] == "0.0001")]
df_cc3m_imagenet = df_cc3m_imagenet.sort_values(by="coco_T2I R@1", ascending=False)
df_cc3m_imagenet[["imagenet_top1", "coco_T2I R@1", "coco_I2T R@1", "image_model", "text_model", "unsupervised_image", "unsupervised_text"]]

### Unsupervised COCO images and text

In [None]:
df_cc3m_coco = df[(df["image_model"] == "dinov2-large") & 
   (df["text_model"] == "NV-Embed-v2") & 
   (df["supervised_text"] == "cc3m_raw_caption.h5") & 
   ((df["unsupervised_text"] == "cc3m_raw_caption.h5") | (df["unsupervised_text"] == "cc12m_raw_caption.h5")) & 
   (df["supervised_image"] == "cc3m_concat.h5") & 
   (df["unsupervised_image"] == "coco_concat.h5") &
   (df["n_supervised_pairs"] == "10000") &
   (df["n_unsupervised_image"] == "1000000") & 
   (df["n_unsupervised_text"] == "1000000") &
   (df["semisupervised"]) & 
   (df["alpha_semisupervised_sail"] == "1.0") & 
   (df["alpha_semisupervised_ot"] == "0.0001")]
df_cc3m_coco = df_cc3m_coco.sort_values(by="coco_T2I R@1", ascending=False)
df_cc3m_coco[["imagenet_top1", "coco_T2I R@1", "coco_I2T R@1", "image_model", "text_model", "unsupervised_image", "unsupervised_text"]]

In [None]:
df_cc3m_coco = df[(df["image_model"] == "dinov2-large") & 
   (df["text_model"] == "NV-Embed-v2") & 
   (df["supervised_text"] == "cc3m_raw_caption.h5") & 
   (df["unsupervised_text"] == "coco_raw_caption_idx=0.h5 + coco_raw_caption_idx=1.h5 + coco_raw_caption_idx=2.h5 + coco_raw_caption_idx=3.h5 + coco_raw_caption_idx=4.h5") & 
   (df["supervised_image"] == "cc3m_concat.h5") & 
   (df["n_supervised_pairs"] == "10000") &
   (df["n_unsupervised_image"] == "1000000") & 
   (df["n_unsupervised_text"] == "1000000") &
   (df["semisupervised"]) & 
   (df["alpha_semisupervised_sail"] == "1.0") & 
   (df["alpha_semisupervised_ot"] == "0.0001")]
df_cc3m_coco = df_cc3m_coco.sort_values(by="coco_T2I R@1", ascending=False)
df_cc3m_coco[["imagenet_top1", "coco_T2I R@1", "coco_I2T R@1", "image_model", "text_model", "unsupervised_image", "unsupervised_text"]]

### Supervised SigLIP baseline

In [None]:
df_cc3m_siglip_supervised = df[(df["image_model"] == "dinov2-large") & 
   (df["text_model"] == "NV-Embed-v2") & 
   (df["supervised_text"] == "cc3m_raw_caption.h5") & 
   (df["supervised_image"] == "cc3m_concat.h5") &
   (df["n_supervised_pairs"] == "10000") &
   (df["supervised"]) & 
   (df["alpha_supervised_sail"] == "1.0")]
df_cc3m_siglip_supervised = df_cc3m_siglip_supervised.sort_values(by="coco_T2I R@1", ascending=False)
df_cc3m_siglip_supervised[["experiment_id", "coco_T2I R@1", "coco_I2T R@1", "coco_T2I R@5", "coco_I2T R@5", "imagenet_top1", "imagenet_top5", "image_model", "text_model", "unsupervised_text", "unsupervised_image"]]

### Semisupervised SigLIP baseline

In [None]:
df_cc3m_siglip_semisupervised = df[(df["image_model"] == "dinov2-large") & 
   (df["text_model"] == "NV-Embed-v2") & 
   (df["supervised_text"] == "cc3m_raw_caption.h5") & 
   (df["unsupervised_text"] == "cc3m_raw_caption.h5") & 
   (df["supervised_image"] == "cc3m_concat.h5") & 
   (df["unsupervised_image"] == "cc3m_concat.h5") &
   (df["n_supervised_pairs"] == "10000") &
   (df["n_unsupervised_image"] == "1000000") & 
   (df["n_unsupervised_text"] == "1000000") &
   (df["semisupervised"]) & 
   (df["alpha_semisupervised_sail"] == "1.0") &
   (df["alpha_semisupervised_ot"] == "0.0")]

df_cc3m_siglip_semisupervised = df_cc3m_siglip_semisupervised.sort_values(by="coco_T2I R@1", ascending=False)
df_cc3m_siglip_semisupervised[["experiment_id", "coco_T2I R@1", "coco_I2T R@1", "coco_T2I R@5", "coco_I2T R@5", "imagenet_top1", "imagenet_top5", "image_model", "text_model", "unsupervised_text", "unsupervised_image"]]

### STRUCTURE baseline

In [None]:
df["structure"] = df["structure"].fillna(False).astype(bool)
df_cc3m_structure = df[(df["image_model"] == "dinov2-large") & 
   (df["text_model"] == "NV-Embed-v2") & 
   (df["supervised_text"] == "cc3m_raw_caption.h5") & 
   (df["unsupervised_text"] == "cc3m_raw_caption.h5") & 
   (df["supervised_image"] == "cc3m_concat.h5") & 
   (df["unsupervised_image"] == "cc3m_concat.h5") &
   (df["n_supervised_pairs"] == "10000") &
   (df["n_unsupervised_image"] == "1000000") & 
   (df["n_unsupervised_text"] == "1000000") &
   (df["semisupervised"]) & 
   (df["structure"])
]

df_cc3m_structure = df_cc3m_structure.sort_values(by="coco_T2I R@1", ascending=False)
df_cc3m_structure[["experiment_id", "coco_T2I R@1", "coco_I2T R@1", "imagenet_top1", "image_model", "text_model", "unsupervised_text", "unsupervised_image", "structure"]]

### Model ablation

In [None]:
(df["image_model"] == "dinov2-large") & 
   (df["text_model"] == "NV-Embed-v2") & 

In [None]:
df_cc3m_models = df[
   (df["supervised_text"] == "cc3m_raw_caption.h5") & 
   (df["unsupervised_text"] == "cc3m_raw_caption.h5") & 
   (df["supervised_image"] == "cc3m_concat.h5") & 
   (df["unsupervised_image"] == "cc3m_concat.h5") &
   (df["n_supervised_pairs"] == "10000") &
   (df["n_unsupervised_image"] == "1000000") & 
   (df["n_unsupervised_text"] == "1000000") &
   (df["semisupervised"]) &
   (df["alpha_semisupervised_sail"] == "1.0") &
   (df["alpha_semisupervised_ot"] == "0.0001")
]

df_cc3m_models = df_cc3m_models.sort_values(by="coco_T2I R@1", ascending=False)
df_cc3m_models[["imagenet_top1", "coco_T2I R@1", "coco_I2T R@1", "image_model", "text_model", "unsupervised_text", "unsupervised_image"]]

In [None]:
df_cc3m_models = df[
   (df["supervised_text"] == "cc3m_raw_caption.h5") & 
   (df["unsupervised_text"] == "cc3m_raw_caption.h5") & 
   (df["supervised_image"] == "cc3m_concat.h5") & 
   (df["unsupervised_image"] == "cc3m_concat.h5") &
   (df["n_supervised_pairs"] == "10000") &
   (df["n_unsupervised_image"] == "1000000") & 
   (df["n_unsupervised_text"] == "1000000") &
   (df["semisupervised"]) &
   (df["alpha_semisupervised_sail"] == "1.0") &
   (df["alpha_semisupervised_ot"] == "0.0001")
]

df_cc3m_models = df_cc3m_models.sort_values(by="coco_T2I R@1", ascending=False)
df_cc3m_models[["imagenet_top1", "coco_T2I R@1", "coco_I2T R@1", "image_model", "text_model", "unsupervised_text", "unsupervised_image"]]