In [1]:
import json
import os
import subprocess
os.environ["CUDA_VISIBLE_DEVICES"] = "4"
import huggingface_hub
huggingface_hub.login('')  # get your own HF token

# Function to load and display JSON results
def load_and_print_results(file_path):
    if os.path.exists(file_path):
        # Read the file
        with open(file_path) as f:
            # Read each line and decode it as a separate JSON object
            for line in f:
                try:
                    data = json.loads(line)
                    print(data)
                except json.JSONDecodeError as e:
                    print(f"Error decoding JSON: {e}")
                    
    else:
        print(f"No results found at {file_path}")

# Function to run a command and print output
def run_command(command):
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    print(result.stdout)
    if result.stderr:
        print(result.stderr)


  from .autonotebook import tqdm as notebook_tqdm


# Zero-shot cross-modal retrieval

In [2]:
command = (
    "python3 -m clip_benchmark.cli eval --pretrained_model models.txt "
    "--dataset 'pathmmu_retrieval' "
    "--task 'zeroshot_retrieval' "
    "--batch_size 128 "
    "--num_workers 16 "
    "--seed 42 "
    "--recall_k 1 10 50 "
    "--dataset_root '/mnt/radonc-li01/private/xiangjx/data/downstreams_demo' "
    "--output './results/benchmark_mm_retrieval.json' > /dev/null 2>&1"
)
run_command(command)





In [3]:
load_and_print_results('./results/benchmark_mm_retrieval.json')

{'dataset': 'pathmmu_retrieval', 'model': 'musk_large_patch16_384', 'pretrained': 'hf_hub:xiangjx/musk', 'task': 'zeroshot_retrieval', 'metrics': {'image_retrieval_recall@1': 0.03087213821709156, 'text_retrieval_recall@1': 0.040648315101861954, 'image_retrieval_recall@10': 0.15731926262378693, 'text_retrieval_recall@10': 0.17661435902118683, 'image_retrieval_recall@50': 0.3395935297012329, 'text_retrieval_recall@50': 0.34435296058654785}, 'language': 'en'}
{'dataset': 'pathmmu_retrieval', 'model': 'conch', 'pretrained': '/mnt/radonc-li01/private/xiangjx/models/conch/pytorch_model.bin', 'task': 'zeroshot_retrieval', 'metrics': {'image_retrieval_recall@1': 0.019938256591558456, 'text_retrieval_recall@1': 0.022896835580468178, 'image_retrieval_recall@10': 0.10869564861059189, 'text_retrieval_recall@10': 0.1201440691947937, 'image_retrieval_recall@50': 0.2658862769603729, 'text_retrieval_recall@50': 0.27720606327056885}, 'language': 'en'}


# Zero-shot classification

In [4]:
command = (
    "python3 -m clip_benchmark.cli eval --pretrained_model models.txt "
    "--dataset 'skin' 'pannuke' 'unitopatho' "
    "--task 'zeroshot_classification' "
    "--batch_size 256 "
    "--dataset_root '/mnt/radonc-li01/private/xiangjx/data/downstreams_demo' "
    "--output './results/zeroshot_cls.json' > /dev/null 2>&1"
)
run_command(command)





In [5]:
load_and_print_results('./results/zeroshot_cls.json')

{'dataset': 'skin', 'model': 'musk_large_patch16_384', 'pretrained': 'hf_hub:xiangjx/musk', 'task': 'zeroshot_classification', 'metrics': {'balanced_acc': 0.5878613953752625}, 'language': 'en'}
{'dataset': 'pannuke', 'model': 'musk_large_patch16_384', 'pretrained': 'hf_hub:xiangjx/musk', 'task': 'zeroshot_classification', 'metrics': {'balanced_acc': 0.8280807628343463}, 'language': 'en'}
{'dataset': 'unitopatho', 'model': 'musk_large_patch16_384', 'pretrained': 'hf_hub:xiangjx/musk', 'task': 'zeroshot_classification', 'metrics': {'balanced_acc': 0.368094020538031}, 'language': 'en'}
{'dataset': 'skin', 'model': 'conch', 'pretrained': '/mnt/radonc-li01/private/xiangjx/models/conch/pytorch_model.bin', 'task': 'zeroshot_classification', 'metrics': {'balanced_acc': 0.21905265325279444}, 'language': 'en'}
{'dataset': 'pannuke', 'model': 'conch', 'pretrained': '/mnt/radonc-li01/private/xiangjx/models/conch/pytorch_model.bin', 'task': 'zeroshot_classification', 'metrics': {'balanced_acc': 0.5

# Zero-shot Image2Image Retrieval

In [6]:
# image-to-image retrieval
command = (
    "python3 -m clip_benchmark.cli eval --pretrained_model models.txt "
    "--dataset 'unitopatho_retrieval' "
    "--task 'image_retrieval' "
    "--batch_size 128 "
    "--num_workers 8 "
    "--seed 41 "
    "--dataset_root '/mnt/radonc-li01/private/xiangjx/data/downstreams_demo' "
    "--output './results/image_retrieval.json' > /dev/null 2>&1"
)
run_command(command)





In [7]:
load_and_print_results('./results/image_retrieval.json')

{'dataset': 'unitopatho_retrieval', 'model': 'musk_large_patch16_384', 'pretrained': 'hf_hub:xiangjx/musk', 'task': 'image_retrieval', 'metrics': {'acc_top1': 0.8968120813369751, 'acc_top3': 0.9610947966575623, 'acc_top5': 0.9767197966575623, 'mMv_top5': 0.8673447966575623}, 'language': 'en'}
{'dataset': 'unitopatho_retrieval', 'model': 'conch', 'pretrained': '/mnt/radonc-li01/private/xiangjx/models/conch/pytorch_model.bin', 'task': 'image_retrieval', 'metrics': {'acc_top1': 0.833787739276886, 'acc_top3': 0.9383389353752136, 'acc_top5': 0.9625629186630249, 'mMv_top5': 0.8200503587722778}, 'language': 'en'}


# 10-shot classification

It will take long time to extract features and run supervised cls.

In [8]:
# Few-shot linear probe commands
seed = 123
k_shot = 10

command = (
    f"python3 -m clip_benchmark.cli eval --pretrained_model models.txt "
    f"--dataset  'skin' 'unitopatho' 'pannuke' "
    f"--task 'linear_probe' "
    f"--batch_size 256 "
    f"--num_workers 8 "
    f"--fewshot_k {k_shot} "
    f"--seed {seed} "
    f"--dataset_root '/mnt/radonc-li01/private/xiangjx/data/downstreams_demo' "
    f"--output './results/benchmark_fs_{k_shot}shot_seed{seed}.json' > /dev/null 2>&1"
)

run_command(command)





In [9]:
load_and_print_results(f'./results/benchmark_fs_{k_shot}shot_seed{seed}.json')

{'dataset': 'skin', 'model': 'musk_large_patch16_384', 'pretrained': 'hf_hub:xiangjx/musk', 'task': 'linear_probe', 'metrics': {'balanced_acc': 0.898394069992517}, 'language': 'en'}
{'dataset': 'unitopatho', 'model': 'musk_large_patch16_384', 'pretrained': 'hf_hub:xiangjx/musk', 'task': 'linear_probe', 'metrics': {'balanced_acc': 0.5591443143634388}, 'language': 'en'}
{'dataset': 'pannuke', 'model': 'musk_large_patch16_384', 'pretrained': 'hf_hub:xiangjx/musk', 'task': 'linear_probe', 'metrics': {'balanced_acc': 0.8010187540834307}, 'language': 'en'}
{'dataset': 'skin', 'model': 'conch', 'pretrained': '/mnt/radonc-li01/private/xiangjx/models/conch/pytorch_model.bin', 'task': 'linear_probe', 'metrics': {'balanced_acc': 0.8813451499159908}, 'language': 'en'}
{'dataset': 'unitopatho', 'model': 'conch', 'pretrained': '/mnt/radonc-li01/private/xiangjx/models/conch/pytorch_model.bin', 'task': 'linear_probe', 'metrics': {'balanced_acc': 0.5176345673715952}, 'language': 'en'}
{'dataset': 'pann

# Linear probe classification

It will take long time to extract features and run supervised cls.

In [10]:
# Linear probe commands
seed = 123

os.system("rm -rf features/")

command = (
    f"python3 -m clip_benchmark.cli eval --pretrained_model models.txt "
    f"--dataset 'skin' 'unitopatho' 'pannuke' "
    f"--task 'linear_probe' "
    f"--batch_size 256 "
    f"--num_workers 8 "
    f"--fewshot_k -1 "
    f"--seed {seed} "
    f"--dataset_root '/mnt/radonc-li01/private/xiangjx/data/downstreams_demo' "
    f"--ms_aug "
    f"--output './results/linear_probe_{seed}.json' > /dev/null 2>&1 "
)

run_command(command)





In [11]:

load_and_print_results(f'./results/linear_probe_{seed}.json')

{'dataset': 'skin', 'model': 'musk_large_patch16_384', 'pretrained': 'hf_hub:xiangjx/musk', 'task': 'linear_probe', 'metrics': {'balanced_acc': 0.9582715967801188}, 'language': 'en'}
{'dataset': 'unitopatho', 'model': 'musk_large_patch16_384', 'pretrained': 'hf_hub:xiangjx/musk', 'task': 'linear_probe', 'metrics': {'balanced_acc': 0.7796039693199256}, 'language': 'en'}
{'dataset': 'pannuke', 'model': 'musk_large_patch16_384', 'pretrained': 'hf_hub:xiangjx/musk', 'task': 'linear_probe', 'metrics': {'balanced_acc': 0.9604754845671069}, 'language': 'en'}
{'dataset': 'skin', 'model': 'conch', 'pretrained': '/mnt/radonc-li01/private/xiangjx/models/conch/pytorch_model.bin', 'task': 'linear_probe', 'metrics': {'balanced_acc': 0.9447268567940842}, 'language': 'en'}
{'dataset': 'unitopatho', 'model': 'conch', 'pretrained': '/mnt/radonc-li01/private/xiangjx/models/conch/pytorch_model.bin', 'task': 'linear_probe', 'metrics': {'balanced_acc': 0.7383896813889964}, 'language': 'en'}
{'dataset': 'pan