In [1]:
import json
import os
import subprocess
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# Function to load and display JSON results
def load_and_print_results(file_path):
    if os.path.exists(file_path):
        # Read the file
        with open(file_path) as f:
            # Read each line and decode it as a separate JSON object
            for line in f:
                try:
                    data = json.loads(line)
                    print(data)
                except json.JSONDecodeError as e:
                    print(f"Error decoding JSON: {e}")
                    
    else:
        print(f"No results found at {file_path}")

# Function to run a command and print output
def run_command(command):
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    print(result.stdout)
    if result.stderr:
        print(result.stderr)


# Zero-shot cross-modal retrieval

In [2]:
command = (
    "python3 -m clip_benchmark.cli eval --pretrained_model models.txt "
    "--dataset 'pathmmu_retrieval' "
    "--task 'zeroshot_retrieval' "
    "--batch_size 512 "
    "--num_workers 16 "
    "--seed 42 "
    "--recall_k 1 10 50 "
    "--dataset_root '/mnt/radonc-li01/private/xiangjx/data/downstreams_demo' "
    "--output './results/benchmark_mm_retrieval.json'"
)
run_command(command)


Models: [['conch', '/mnt/radonc-li01/private/xiangjx/models/conch/conch.pt'], ['musk_large_patch16_384', '/mnt/radonc-li01/private/xiangjx/code/musk_v2/MUSK/musk/models/musk.pth']]
Datasets: ['pathmmu_retrieval']
Languages: ['en']
Running 'zeroshot_retrieval' on 'pathmmu_retrieval' with the model '/mnt/radonc-li01/private/xiangjx/models/conch/conch.pt' on language 'en'
Dataset size: 7774
Dataset split: test
Dump results to: ./results/benchmark_mm_retrieval.json
Running 'zeroshot_retrieval' on 'pathmmu_retrieval' with the model '/mnt/radonc-li01/private/xiangjx/code/musk_v2/MUSK/musk/models/musk.pth' on language 'en'
Load ckpt from /mnt/radonc-li01/private/xiangjx/code/musk_v2/MUSK/musk/models/musk.pth
Dataset size: 7774
Dataset split: test
Dump results to: ./results/benchmark_mm_retrieval.json


0it [00:00, ?it/s]
1it [00:08,  8.84s/it]
2it [00:10,  4.57s/it]
3it [00:12,  3.21s/it]
4it [00:13,  2.61s/it]
5it [00:15,  2.23s/it]
6it [00:16,  2.01s/it]
7it [00:18,  1.87s/it]
8it [00:20,  

In [3]:
load_and_print_results('./results/benchmark_mm_retrieval.json')

{'dataset': 'pathmmu_retrieval', 'model': 'conch', 'pretrained': '/mnt/radonc-li01/private/xiangjx/models/conch/conch.pt', 'task': 'zeroshot_retrieval', 'metrics': {'image_retrieval_recall@1': 0.019938256591558456, 'text_retrieval_recall@1': 0.022253666073083878, 'image_retrieval_recall@10': 0.10779521614313126, 'text_retrieval_recall@10': 0.12464625388383865, 'image_retrieval_recall@50': 0.2635708749294281, 'text_retrieval_recall@50': 0.27450475096702576}, 'language': 'en'}
{'dataset': 'pathmmu_retrieval', 'model': 'musk_large_patch16_384', 'pretrained': '/mnt/radonc-li01/private/xiangjx/code/musk_v2/MUSK/musk/models/musk.pth', 'task': 'zeroshot_retrieval', 'metrics': {'image_retrieval_recall@1': 0.03074350394308567, 'text_retrieval_recall@1': 0.040648315101861954, 'image_retrieval_recall@10': 0.1574479043483734, 'text_retrieval_recall@10': 0.17661435902118683, 'image_retrieval_recall@50': 0.34062257409095764, 'text_retrieval_recall@50': 0.34422433376312256}, 'language': 'en'}


# Zero-shot classification

In [4]:
command = (
    "python3 -m clip_benchmark.cli eval --pretrained_model models.txt "
    "--dataset 'skin' 'pannuke' 'unitopatho' "
    "--task 'zeroshot_classification' "
    "--batch_size 256 "
    "--dataset_root '/mnt/radonc-li01/private/xiangjx/data/downstreams_demo' "
    "--output './results/zeroshot_cls.json'"
)
run_command(command)


Models: [['conch', '/mnt/radonc-li01/private/xiangjx/models/conch/conch.pt'], ['musk_large_patch16_384', '/mnt/radonc-li01/private/xiangjx/code/musk_v2/MUSK/musk/models/musk.pth']]
Datasets: ['skin', 'pannuke', 'unitopatho']
Languages: ['en']
Running 'zeroshot_classification' on 'skin' with the model '/mnt/radonc-li01/private/xiangjx/models/conch/conch.pt' on language 'en'
Dataset size: 27439
Dataset split: test
Dataset classes: ['nontumor_skin_necrosis_necrosis', 'nontumor_skin_muscle_skeletal', 'nontumor_skin_sweatglands_sweatglands', 'nontumor_skin_vessel_vessel', 'nontumor_skin_elastosis_elastosis', 'nontumor_skin_chondraltissue_chondraltissue', 'nontumor_skin_hairfollicle_hairfollicle', 'nontumor_skin_epidermis_epidermis', 'nontumor_skin_nerves_nerves', 'nontumor_skin_subcutis_subcutis', 'nontumor_skin_dermis_dermis', 'nontumor_skin_sebaceousglands_sebaceousglands', 'tumor_skin_epithelial_sqcc', 'tumor_skin_melanoma_melanoma', 'tumor_skin_epithelial_bcc', 'tumor_skin_naevus_naevus

In [5]:
load_and_print_results('./results/zeroshot_cls.json')

{'dataset': 'skin', 'model': 'conch', 'pretrained': '/mnt/radonc-li01/private/xiangjx/models/conch/conch.pt', 'task': 'zeroshot_classification', 'metrics': {'acc1': 0.10536098254309559, 'acc5': 0.38569189839279855, 'mean_per_class_recall': 0.2216787058303029}, 'language': 'en'}
{'dataset': 'pannuke', 'model': 'conch', 'pretrained': '/mnt/radonc-li01/private/xiangjx/models/conch/conch.pt', 'task': 'zeroshot_classification', 'metrics': {'acc1': 0.6928702010968921, 'acc5': nan, 'mean_per_class_recall': 0.6468658593258796}, 'language': 'en'}
{'dataset': 'unitopatho', 'model': 'conch', 'pretrained': '/mnt/radonc-li01/private/xiangjx/models/conch/conch.pt', 'task': 'zeroshot_classification', 'metrics': {'acc1': 0.394079200307574, 'acc5': 0.9615532487504805, 'mean_per_class_recall': 0.26908394326509305}, 'language': 'en'}
{'dataset': 'skin', 'model': 'musk_large_patch16_384', 'pretrained': '/mnt/radonc-li01/private/xiangjx/code/musk_v2/MUSK/musk/models/musk.pth', 'task': 'zeroshot_classificat

# Zero-shot Image2Image Retrieval

In [6]:
# image-to-image retrieval
command = (
    "python3 -m clip_benchmark.cli eval --pretrained_model models.txt "
    "--dataset 'unitopatho_retrieval' "
    "--task 'image_retrieval' "
    "--batch_size 512 "
    "--num_workers 16 "
    "--seed 41 "
    "--dataset_root '/mnt/radonc-li01/private/xiangjx/data/downstreams_demo' "
    "--output './results/image_retrieval.json'"
)
run_command(command)


Models: [['conch', '/mnt/radonc-li01/private/xiangjx/models/conch/conch.pt'], ['musk_large_patch16_384', '/mnt/radonc-li01/private/xiangjx/code/musk_v2/MUSK/musk/models/musk.pth']]
Datasets: ['unitopatho_retrieval']
Languages: ['en']
Running 'image_retrieval' on 'unitopatho_retrieval' with the model '/mnt/radonc-li01/private/xiangjx/models/conch/conch.pt' on language 'en'
Dataset size: 8669
Dataset split: test
Dataset classes: ['HP', 'NORM', 'TA.HG', 'TA.LG', 'TVA.HG', 'TVA.LG']
Dataset number of classes: 6
Dump results to: ./results/image_retrieval.json
Running 'image_retrieval' on 'unitopatho_retrieval' with the model '/mnt/radonc-li01/private/xiangjx/code/musk_v2/MUSK/musk/models/musk.pth' on language 'en'
Load ckpt from /mnt/radonc-li01/private/xiangjx/code/musk_v2/MUSK/musk/models/musk.pth
Dataset size: 8669
Dataset split: test
Dataset classes: ['HP', 'NORM', 'TA.HG', 'TA.LG', 'TVA.HG', 'TVA.LG']
Dataset number of classes: 6
Dump results to: ./results/image_retrieval.json


0it [0

In [7]:
load_and_print_results('./results/image_retrieval.json')

{'dataset': 'unitopatho_retrieval', 'model': 'conch', 'pretrained': '/mnt/radonc-li01/private/xiangjx/models/conch/conch.pt', 'task': 'image_retrieval', 'metrics': {'acc_top1': 0.8372361063957214, 'acc_top3': 0.936209499835968, 'acc_top5': 0.9622793793678284, 'mMv_top5': 0.8255854249000549}, 'language': 'en'}
{'dataset': 'unitopatho_retrieval', 'model': 'musk_large_patch16_384', 'pretrained': '/mnt/radonc-li01/private/xiangjx/code/musk_v2/MUSK/musk/models/musk.pth', 'task': 'image_retrieval', 'metrics': {'acc_top1': 0.896527886390686, 'acc_top3': 0.9649325013160706, 'acc_top5': 0.9795824289321899, 'mMv_top5': 0.8764563202857971}, 'language': 'en'}


# Few-shot classification

It will take long time to extract features and run supervised cls.

In [8]:
# Few-shot linear probe commands
seed = 287
k_shot = 10

command = (
    f"python3 -m clip_benchmark.cli eval --pretrained_model models.txt "
    f"--dataset 'skin' 'pannuke' 'unitopatho' "
    f"--task 'linear_probe' "
    f"--batch_size 512 "
    f"--num_workers 16 "
    f"--fewshot_k {k_shot} "
    f"--seed {seed} "
    f"--dataset_root '/mnt/radonc-li01/private/xiangjx/data/downstreams_demo' "
    f"--output './results/benchmark_fs_{k_shot}shot_seed{seed}.json'"
)

run_command(command)


Models: [['conch', '/mnt/radonc-li01/private/xiangjx/models/conch/conch.pt'], ['musk_large_patch16_384', '/mnt/radonc-li01/private/xiangjx/code/musk_v2/MUSK/musk/models/musk.pth']]
Datasets: ['skin', 'pannuke', 'unitopatho']
Languages: ['en']
Running 'linear_probe' on 'skin' with the model '/mnt/radonc-li01/private/xiangjx/models/conch/conch.pt' on language 'en'
Dataset size: 27439
Dataset split: test
Dataset classes: ['nontumor_skin_necrosis_necrosis', 'nontumor_skin_muscle_skeletal', 'nontumor_skin_sweatglands_sweatglands', 'nontumor_skin_vessel_vessel', 'nontumor_skin_elastosis_elastosis', 'nontumor_skin_chondraltissue_chondraltissue', 'nontumor_skin_hairfollicle_hairfollicle', 'nontumor_skin_epidermis_epidermis', 'nontumor_skin_nerves_nerves', 'nontumor_skin_subcutis_subcutis', 'nontumor_skin_dermis_dermis', 'nontumor_skin_sebaceousglands_sebaceousglands', 'tumor_skin_epithelial_sqcc', 'tumor_skin_melanoma_melanoma', 'tumor_skin_epithelial_bcc', 'tumor_skin_naevus_naevus']
Dataset 

In [9]:
load_and_print_results(f'./results/benchmark_fs_{k_shot}shot_seed{seed}.json')

{'dataset': 'skin', 'model': 'conch', 'pretrained': '/mnt/radonc-li01/private/xiangjx/models/conch/conch.pt', 'task': 'linear_probe', 'metrics': {'lp_acc1': 0.8759065563613835, 'lp_acc5': 0.993658661029921, 'lp_mean_per_class_recall': 0.8614277026163812, 'lr': 0.1, 'epochs': 10, 'seed': 287, 'fewshot_k': 10}, 'language': 'en'}
{'dataset': 'pannuke', 'model': 'conch', 'pretrained': '/mnt/radonc-li01/private/xiangjx/models/conch/conch.pt', 'task': 'linear_probe', 'metrics': {'lp_acc1': 0.6983546617915904, 'lp_acc5': nan, 'lp_mean_per_class_recall': 0.7230529509854843, 'lr': 0.1, 'epochs': 10, 'seed': 287, 'fewshot_k': 10}, 'language': 'en'}
{'dataset': 'unitopatho', 'model': 'conch', 'pretrained': '/mnt/radonc-li01/private/xiangjx/models/conch/conch.pt', 'task': 'linear_probe', 'metrics': {'lp_acc1': 0.2710495963091119, 'lp_acc5': 0.9615532487504805, 'lp_mean_per_class_recall': 0.4062938790622688, 'lr': 0.1, 'epochs': 10, 'seed': 287, 'fewshot_k': 10}, 'language': 'en'}
{'dataset': 'skin

# Linear probe classification

It will take long time to extract features and run supervised cls.

In [None]:
# Linear probe commands
seed = 123

command = (
    f"python3 -m clip_benchmark.cli eval --pretrained_model models.txt "
    f"--dataset 'skin' 'pannuke' 'unitopatho' "
    f"--task 'linear_probe' "
    f"--batch_size 512 "
    f"--num_workers 8 "
    f"--fewshot_k -1 "
    f"--seed {seed} "
    f"--dataset_root '/mnt/radonc-li01/private/xiangjx/data/downstreams_demo' "
    f"--output './results/linear_probe_{seed}.json'"
)

run_command(command)

load_and_print_results(f'./results/linear_probe_{seed}.json')