## Initialization

In [1]:
# --- Kaggle Bootstrap Script ---
%cd /kaggle/working
%rm -rf MedCLIP
!git clone --depth 1 https://github.com/lamlethanh777/MedCLIP.git

%pwd
%cd MedCLIP

# Install dependencies
!pip install -r requirements.txt --quiet

# # Show current commit for reproducibility
!git rev-parse HEAD

/kaggle/working
Cloning into 'MedCLIP'...
remote: Enumerating objects: 39, done.[K
remote: Counting objects: 100% (39/39), done.[K
remote: Compressing objects: 100% (36/36), done.[K
remote: Total 39 (delta 3), reused 23 (delta 2), pack-reused 0 (from 0)[K
Receiving objects: 100% (39/39), 109.56 KiB | 6.44 MiB/s, done.
Resolving deltas: 100% (3/3), done.
/kaggle/working/MedCLIP
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.5/90.5 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.6/60.6 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m82.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 

## Pulling new changes from local

In [2]:
!git pull

Already up to date.


## Convert Open-I dataset to compatible form

In [3]:
!python preprocess_indiana.py --quiet

Loading Indiana dataset...
Loaded 7466 projections
Loaded 3851 reports
Filtered to 3818 frontal views
Merging projections and reports...
Merged dataset size: 3818
Found 3818 images that exist on disk
After filtering reports: 3794 samples
Extracting labels...
Splitting data with validation ratio: 0.2
Train set: 3036 samples
Validation set: 758 samples

Training Set Label Distribution:
--------------------------------------------------
No Finding                    : 2252 (74.18%)
Enlarged Cardiomediastinum    :    0 ( 0.00%)
Cardiomegaly                  :  263 ( 8.66%)
Lung Lesion                   :    0 ( 0.00%)
Lung Opacity                  :  351 (11.56%)
Edema                         :   35 ( 1.15%)
Consolidation                 :   23 ( 0.76%)
Pneumonia                     :   33 ( 1.09%)
Atelectasis                   :  257 ( 8.47%)
Pneumothorax                  :   26 ( 0.86%)
Pleural Effusion              :  117 ( 3.85%)
Pleural Other                 

## Training MedCLIP with Open-I

In [None]:
!python examples/run_indiana_pretrain.py

## Prompt Classification Demo

### Import required modules

In [None]:
from medclip import MedCLIPModel, MedCLIPVisionModelViT
from medclip import MedCLIPProcessor
from medclip import PromptClassifier

### Initialize models

In [None]:
# init models
processor = MedCLIPProcessor()
model = MedCLIPModel(vision_cls=MedCLIPVisionModelViT, checkpoint='./data/MedCLIP/checkpoints/vision_text_pretrain/25000')
clf = PromptClassifier(model, ensemble=True)
clf.cuda()

### Prepare input image and prompts

In [None]:
# prepare input image
from PIL import Image
image = Image.open('./example_data/view1_frontal.jpg')
inputs = processor(images=image, return_tensors="pt")

# prepare input prompt texts
from medclip.prompts import generate_chexpert_class_prompts, process_class_prompts

cls_prompts = process_class_prompts(generate_chexpert_class_prompts(n=10))
inputs['prompt_inputs'] = cls_prompts

### Run classification

In [None]:
output = clf(**inputs)
print(output)

## Model Evaluation on CheXpert Validation Set

### Load and prepare CheXpert validation data

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path

# Path to CheXpert validation set
chexpert_root = Path('/kaggle/input/chexpert-v10-small')  # Update this path as needed
valid_csv = chexpert_root / 'archive' / 'valid.csv'

# Load validation data
df_valid = pd.read_csv(valid_csv)

# Select only frontal images and limit to ~100 samples
df_valid_frontal = df_valid[df_valid['Frontal/Lateral'] == 'Frontal'].head(100)

print(f"Total validation samples: {len(df_valid)}")
print(f"Selected frontal samples: {len(df_valid_frontal)}")
print(f"\nFirst few samples:")
print(df_valid_frontal.head())

### Prepare CheXpert class labels

In [None]:
# CheXpert class names (excluding 'No Finding' and 'Support Devices' for clinical evaluation)
chexpert_classes = [
    'Enlarged Cardiomediastinum',
    'Cardiomegaly',
    'Lung Opacity',
    'Lung Lesion',
    'Edema',
    'Consolidation',
    'Pneumonia',
    'Atelectasis',
    'Pneumothorax',
    'Pleural Effusion',
    'Pleural Other',
    'Fracture'
]

print(f"Evaluating on {len(chexpert_classes)} clinical classes:")
for i, cls in enumerate(chexpert_classes, 1):
    print(f"{i}. {cls}")

### Run zero-shot classification on validation samples

In [None]:
from tqdm import tqdm
import torch
from PIL import Image

# Prepare prompts for all CheXpert classes
from medclip.prompts import generate_chexpert_class_prompts, process_class_prompts

# Generate prompts for classification
cls_prompts = process_class_prompts(generate_chexpert_class_prompts(n=10))

# Store predictions and ground truth
all_predictions = []
all_labels = []
valid_samples = 0

print("Running zero-shot classification on validation samples...")
print(f"Processing {len(df_valid_frontal)} images...\n")

# Process each image
for idx, row in tqdm(df_valid_frontal.iterrows(), total=len(df_valid_frontal)):
    try:
        # Load image
        img_path = chexpert_root / row['Path']
        if not img_path.exists():
            continue
            
        image = Image.open(img_path).convert('RGB')
        
        # Prepare inputs
        inputs = processor(images=image, return_tensors="pt")
        inputs['prompt_inputs'] = cls_prompts
        
        # Move to GPU
        for key in inputs:
            if isinstance(inputs[key], torch.Tensor):
                inputs[key] = inputs[key].cuda()
        
        # Get predictions
        with torch.no_grad():
            output = clf(**inputs)
        
        # Store predictions (probabilities for each class)
        predictions = output['logits'].cpu().numpy()[0]  # Shape: [num_classes]
        all_predictions.append(predictions)
        
        # Get ground truth labels for CheXpert classes
        labels = []
        for cls in chexpert_classes:
            label_val = row[cls]
            # Convert: 1.0 -> positive, 0.0 -> negative, NaN/-1.0 -> uncertain (treat as negative)
            if pd.isna(label_val) or label_val == -1.0:
                labels.append(0)
            else:
                labels.append(int(label_val))
        all_labels.append(labels)
        valid_samples += 1
        
    except Exception as e:
        print(f"Error processing {row['Path']}: {e}")
        continue

print(f"\nSuccessfully processed {valid_samples} samples")

# Convert to numpy arrays
all_predictions = np.array(all_predictions)  # Shape: [num_samples, num_classes]
all_labels = np.array(all_labels)  # Shape: [num_samples, num_classes]

print(f"Predictions shape: {all_predictions.shape}")
print(f"Labels shape: {all_labels.shape}")

### Calculate evaluation metrics (AUC-ROC)

In [None]:
from sklearn.metrics import roc_auc_score, average_precision_score, accuracy_score
from sklearn.metrics import classification_report

# Calculate AUC-ROC for each class
auc_scores = []
ap_scores = []  # Average Precision

print("=" * 70)
print("Zero-Shot Classification Results on CheXpert Validation Set")
print("=" * 70)
print(f"\nNumber of samples: {valid_samples}")
print(f"Number of classes: {len(chexpert_classes)}\n")

for i, cls_name in enumerate(chexpert_classes):
    y_true = all_labels[:, i]
    y_scores = all_predictions[:, i]
    
    # Only calculate if we have both positive and negative samples
    if len(np.unique(y_true)) > 1:
        auc = roc_auc_score(y_true, y_scores)
        ap = average_precision_score(y_true, y_scores)
        auc_scores.append(auc)
        ap_scores.append(ap)
        
        # Calculate accuracy with threshold 0.5
        y_pred = (y_scores > 0.5).astype(int)
        acc = accuracy_score(y_true, y_pred)
        
        print(f"{cls_name:30s} | AUC: {auc:.4f} | AP: {ap:.4f} | Acc: {acc:.4f}")
    else:
        print(f"{cls_name:30s} | Skipped (only one class present)")
        auc_scores.append(np.nan)
        ap_scores.append(np.nan)

# Calculate mean scores (excluding NaN)
mean_auc = np.nanmean(auc_scores)
mean_ap = np.nanmean(ap_scores)

print("\n" + "=" * 70)
print(f"Mean AUC-ROC:  {mean_auc:.4f}")
print(f"Mean AP:       {mean_ap:.4f}")
print("=" * 70)

### Visualize results

In [None]:
import matplotlib.pyplot as plt

# Plot AUC scores for each class
fig, ax = plt.subplots(figsize=(12, 6))

valid_indices = ~np.isnan(auc_scores)
valid_classes = [cls for cls, valid in zip(chexpert_classes, valid_indices) if valid]
valid_auc_scores = [score for score, valid in zip(auc_scores, valid_indices) if valid]

bars = ax.barh(range(len(valid_classes)), valid_auc_scores, color='steelblue')
ax.set_yticks(range(len(valid_classes)))
ax.set_yticklabels(valid_classes, fontsize=10)
ax.set_xlabel('AUC-ROC Score', fontsize=12)
ax.set_title('Zero-Shot Classification Performance on CheXpert Validation Set', fontsize=14, fontweight='bold')
ax.axvline(x=mean_auc, color='red', linestyle='--', linewidth=2, label=f'Mean AUC: {mean_auc:.4f}')
ax.legend()
ax.set_xlim([0, 1])
ax.grid(axis='x', alpha=0.3)

# Add value labels on bars
for i, (bar, score) in enumerate(zip(bars, valid_auc_scores)):
    ax.text(score + 0.01, i, f'{score:.3f}', va='center', fontsize=9)

plt.tight_layout()
plt.show()

### Show sample predictions with images

In [None]:
# Display some sample predictions
num_samples_to_show = 4
sample_indices = np.random.choice(valid_samples, min(num_samples_to_show, valid_samples), replace=False)

fig, axes = plt.subplots(2, 2, figsize=(14, 12))
axes = axes.flatten()

for idx, sample_idx in enumerate(sample_indices):
    ax = axes[idx]
    
    # Get image path
    row = df_valid_frontal.iloc[sample_idx]
    img_path = chexpert_root / row['Path']
    
    # Load and display image
    image = Image.open(img_path).convert('RGB')
    ax.imshow(image, cmap='gray')
    ax.axis('off')
    
    # Get predictions and ground truth for this sample
    predictions = all_predictions[sample_idx]
    labels = all_labels[sample_idx]
    
    # Find top predicted classes and ground truth positives
    top_indices = np.argsort(predictions)[-3:][::-1]  # Top 3 predictions
    positive_indices = np.where(labels == 1)[0]  # Ground truth positives
    
    # Build title text
    title_text = f"Sample {sample_idx + 1}\n\n"
    title_text += "Top 3 Predictions:\n"
    for i in top_indices:
        title_text += f"  • {chexpert_classes[i]}: {predictions[i]:.3f}\n"
    
    if len(positive_indices) > 0:
        title_text += "\nGround Truth (Positive):\n"
        for i in positive_indices:
            title_text += f"  • {chexpert_classes[i]} (score: {predictions[i]:.3f})\n"
    else:
        title_text += "\nGround Truth: No findings"
    
    ax.set_title(title_text, fontsize=9, ha='left', loc='left')

plt.tight_layout()
plt.show()