## Initialization

In [1]:
# --- Kaggle Bootstrap Script ---
%cd /kaggle/working
%rm -rf MedCLIP
!git clone --depth 1 --branch results-reproduction https://github.com/lamlethanh777/MedCLIP.git

%pwd
%cd MedCLIP

# Install dependencies
!pip install -r requirements.txt --quiet

# # Show current commit for reproducibility
!git rev-parse HEAD

/kaggle/working
Cloning into 'MedCLIP'...
remote: Enumerating objects: 31, done.[K
remote: Counting objects: 100% (31/31), done.[K
remote: Compressing objects: 100% (28/28), done.[K
remote: Total 31 (delta 1), reused 16 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (31/31), 87.64 KiB | 5.84 MiB/s, done.
Resolving deltas: 100% (1/1), done.
/kaggle/working/MedCLIP
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.5/90.5 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.6/60.6 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m108.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 

## Pulling new changes from local

In [2]:
!git pull

Already up to date.


## Evaluation

### Library setup

In [3]:
import pdb, os
import argparse
import random

import numpy as np
import torch
from torch.utils.data import DataLoader
from torchvision import transforms

from medclip.modeling_medclip import MedCLIPModel, PromptClassifier, MedCLIPVisionModel, MedCLIPVisionModelViT
from medclip.dataset import ImageTextContrastiveDataset, ZeroShotImageDataset
from medclip.dataset import ImageTextContrastiveCollator, ZeroShotImageCollator
from medclip.losses import ImageTextContrastiveLoss
from medclip.trainer import Trainer
from medclip.evaluator import Evaluator
from medclip import constants
from medclip.prompts import generate_class_prompts, generate_chexpert_class_prompts, generate_covid_class_prompts, generate_rsna_class_prompts
from medclip import utils

2025-10-25 10:57:37.033183: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1761389857.210690      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1761389857.260238      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [4]:
# set random seed
utils.set_random_seed(42)

# set cuda devices
os.environ['CUDA_VISIBLE_DEVICES']='0'
device = "cuda:0" if torch.cuda.is_available() else "cpu"

## Preprocess for Open-I, COVID, RSNA

In [5]:
!python /kaggle/working/MedCLIP/preprocess/preprocess_covid.py

COVID-19 X-ray Dataset Preprocessing

Configuration:
  COVID samples: 3000
  Normal samples: 3122
  Total samples: 6122

----------------------------------------------------------------------
Processing COVID images...
----------------------------------------------------------------------
✓ Copied 3000 COVID images

----------------------------------------------------------------------
Processing Normal images...
----------------------------------------------------------------------
✓ Copied 3122 Normal images

----------------------------------------------------------------------
Creating metadata CSV...
----------------------------------------------------------------------

Metadata CSV created at: local_data/covid-test-meta.csv
Total entries: 6122

SUMMARY
COVID images copied: 3000
Normal images copied: 3122
Total images: 6122
Destination folder: data/COVID/test
Metadata CSV: local_data/covid-test-meta.csv

✓ Preprocessing completed successfully!


### Config for each dataset

In [6]:
def setup_dataset_config(dataset_name):
    """Configure dataset-specific settings"""
    configs = {
        'chexpert': {
            'datalist': ['chexpert-5x200'],
            'class_names': constants.CHEXPERT_COMPETITION_TASKS,
            'prompt_generator': generate_chexpert_class_prompts,
            'mode': 'multiclass', # 5x200 does not have multilabel
            'n_prompts': 10,
        },
        'covid': {
            'datalist': ['covid-test'],
            'class_names': constants.COVID_TASKS,
            'prompt_generator': generate_covid_class_prompts,
            'mode': 'binary',  # COVID vs Normal
            'n_prompts': 10,
        },
        'rsna': {
            'datalist': ['rsna-balanced-test'],
            'class_names': constants.RSNA_TASKS,
            'prompt_generator': generate_rsna_class_prompts,
            'mode': 'binary',  # Pneumonia vs Normal
            'n_prompts': 10,
        },
        'mimic': {
            'datalist': ['mimic-5x200'],
            'class_names': constants.CHEXPERT_TASKS,
            'prompt_generator': generate_chexpert_class_prompts,
            'mode': 'multiclass', # 5x200 does not have multilabel
            'n_prompts': 10,
        },
        'openi': {
            'datalist': ['openi'],
            'class_names': constants.CHEXPERT_TASKS,
            'prompt_generator': generate_chexpert_class_prompts,
            'mode': 'multiclass', # 5x200 does not have multilabel
            'n_prompts': 10,
        },
    }
    return configs[dataset_name]

In [7]:
def setup_model(model_type='vit', pretrained=True):
    """Setup MedCLIP model with specified vision backbone"""
    if model_type.lower() == 'vit':
        vision_cls = MedCLIPVisionModelViT
        print("Using Vision Transformer (ViT) backbone")
    elif model_type.lower() == 'resnet':
        vision_cls = MedCLIPVisionModel
        print("Using ResNet backbone")
    else:
        raise ValueError(f"Unknown model type: {model_type}. Choose 'vit' or 'resnet'")
    
    model = MedCLIPModel(vision_cls=vision_cls)
    if pretrained:
        model.from_pretrained()
        print("Loaded pretrained weights")
    model.cuda()
    return model

In [8]:
def run_zero_shot_evaluation(dataset_name='chexpert', model_type='vit', batch_size=256):
    """
    Run zero-shot evaluation on specified dataset
    
    Args:
        dataset_name: 'chexpert', 'covid', 'rsna', or 'mimic'
        model_type: 'vit' or 'resnet'
        batch_size: evaluation batch size
    """
    print(f"\n{'='*60}")
    print(f"Zero-Shot Evaluation: {dataset_name.upper()} dataset")
    print(f"Model: MedCLIP-{model_type.upper()}")
    print(f"{'='*60}\n")
    
    # Get dataset configuration
    config = setup_dataset_config(dataset_name)
    
    # Generate class prompts
    print(f"Generating {config['n_prompts']} prompts per class...")
    cls_prompts = config['prompt_generator'](n=config['n_prompts'])
    print(f"Classes: {config['class_names']}")

    # Setup dataset
    print(f"\nLoading dataset from: {config['datalist']}")
    
    eval_dataset = ZeroShotImageDataset(
        datalist=config['datalist'],
        class_names=config['class_names']
    )
    print(f"Dataset size: {len(eval_dataset)} images")
    
    # Setup collator
    eval_collate_fn = ZeroShotImageCollator(
        cls_prompts=cls_prompts,
        mode=config['mode']
    )
    
    # Setup dataloader
    eval_dataloader = DataLoader(
        eval_dataset,
        batch_size=batch_size,
        collate_fn=eval_collate_fn,
        shuffle=False,
        pin_memory=True,
        num_workers=4,
    )

    # Setup model
    model = setup_model(model_type=model_type, pretrained=True)
    medclip_clf = PromptClassifier(model)
    
    # Setup evaluator
    evaluator = Evaluator(
        medclip_clf=medclip_clf,
        eval_dataloader=eval_dataloader,
        mode=config['mode'],
    )
    
    # Run evaluation
    print("\nStarting evaluation...")
    results = evaluator.evaluate()
    print("\nDone!")
    
    return results


In [9]:
dataset = 'covid' # ['chexpert', 'covid', 'rsna', 'mimic', 'openi']
model_type = 'vit' # 'vit' or 'resnet'
batch_size = 256

results = run_zero_shot_evaluation(
    dataset_name=dataset,
    model_type=model_type,
    batch_size=batch_size
)


Zero-Shot Evaluation: COVID dataset
Model: MedCLIP-VIT

Generating 10 prompts per class...
sample 10 num of prompts for COVID from total 12
Classes: ['Normal', 'COVID']

Loading dataset from: ['covid-test']
load data from ./local_data/covid-test-meta.csv
Dataset size: 6122 images




config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Using Vision Transformer (ViT) backbone


config.json: 0.00B [00:00, ?B/s]



model.safetensors:   0%|          | 0.00/113M [00:00<?, ?B/s]

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.weight', 'classifier.bias']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).



 Download pretrained model from: https://storage.googleapis.com/pytrial/medclip-vit-pretrained.zip
load model weight from: ./pretrained/medclip-vit
Loaded pretrained weights

Starting evaluation...


Evaluation: 100%|██████████| 24/24 [00:21<00:00,  1.13it/s]


Done!



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [10]:
print("="*60)
print(f"\n{dataset.upper()}:")
for metric, value in results.items():
    print(f"  {metric}: {value}")


COVID:
  pred: [[0.43817186]
 [0.8885017 ]
 [0.17448856]
 ...
 [0.8839516 ]
 [0.16402759]
 [0.1808735 ]]
  labels: [0 1 0 ... 1 0 0]
  auc: 0.8906058082425796
  acc: 0.4900359359686377
  precision: 0.24501796798431885
  recall: 0.5
  f1-score: 0.32887524665643497
