In [1]:
!pip install transformers==4.40.0

Collecting transformers==4.40.0
  Downloading transformers-4.40.0-py3-none-any.whl.metadata (137 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.6/137.6 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.20,>=0.19 (from transformers==4.40.0)
  Downloading tokenizers-0.19.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading transformers-4.40.0-py3-none-any.whl (9.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.0/9.0 MB[0m [31m67.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tokenizers-0.19.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m86.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.21.1
    Uninstalling tokenizers-0.21.1:
      Success

In [2]:
import os
import warnings
warnings.filterwarnings("ignore")

from transformers import logging
logging.set_verbosity_error()

import torch
import pandas as pd
from sklearn.metrics import accuracy_score
from tqdm.auto import tqdm

import sys
sys.path.append('/kaggle/input/chexagent-files/CheXagent')
from model_chexagent.chexagent import CheXagent

# ───────────────────────────────────────────────────
# 1) Load ground truth CSV
# ───────────────────────────────────────────────────
gt_df = pd.read_csv("/kaggle/input/chest-x-ray-ground-truth-labels/test_df/test_df.csv")
gt_df["ImageName"] = gt_df["ImageName"].apply(os.path.basename)
diseases = [col for col in gt_df.columns if col != "ImageName"]

# ───────────────────────────────────────────────────
# 2) Initialize CheXagent model
# ───────────────────────────────────────────────────
chexagent = CheXagent()

# ───────────────────────────────────────────────────
# 3) Batch inference & per-batch CSV + accuracy
# ───────────────────────────────────────────────────
batch_size = 100
num_batches = 10  # only do 10 batches

for batch_idx in range(num_batches):
    start = batch_idx * batch_size
    end   = start + batch_size
    batch_df = gt_df.iloc[start:end]
    if batch_df.empty:
        break  # no more images

    preds = []
    for _, row in tqdm(batch_df.iterrows(),
                       total=len(batch_df),
                       desc=f"Batch {batch_idx+1}/{num_batches}"):
        img_file = row["ImageName"]
        img_path = os.path.join(
            "/kaggle/input/chest-x-ray-images/test_data",
            img_file
        )
        row_pred = {"ImageName": img_file}
        for disease in diseases:
            response = chexagent.binary_disease_classification([img_path], disease)
            txt = response.strip().lower()
            # 1 if CheXagent says yes/present/true or mentions the disease name
            flag = int(
                txt.startswith(("yes", "present", "true")) or
                (disease.lower() in txt)
            )
            row_pred[disease] = flag
        preds.append(row_pred)

    # Build DataFrame & save CSV
    pred_df = pd.DataFrame(preds)
    out_csv = f"predictions_batch_{batch_idx+1}.csv"
    pred_df.to_csv(out_csv, index=False)
    print(f"Saved predictions for batch {batch_idx+1} → {out_csv}")

    # Compute per-batch accuracy
    merged = batch_df.merge(pred_df, on="ImageName", suffixes=("_gt","_pred"))
    # Flatten all labels to compute overall accuracy for the batch
    y_true = merged[[d + "_gt" for d in diseases]].values.flatten()
    y_pred = merged[[d + "_pred" for d in diseases]].values.flatten()
    acc = accuracy_score(y_true, y_pred)
    print(f"Batch {batch_idx+1} accuracy: {acc:.3f}\n")


tokenizer_config.json:   0%|          | 0.00/21.0k [00:00<?, ?B/s]

tokenization_chexagent.py:   0%|          | 0.00/26.3k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/999k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/2.85k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/769 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

configuration_chexagent.py:   0%|          | 0.00/9.35k [00:00<?, ?B/s]

modeling_chexagent.py:   0%|          | 0.00/53.7k [00:00<?, ?B/s]

modeling_visual.py:   0%|          | 0.00/8.24k [00:00<?, ?B/s]

2025-05-14 12:44:43.730237: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747226683.917778      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747226683.972764      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


model.safetensors.index.json:   0%|          | 0.00/75.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/2.60G [00:00<?, ?B/s]

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.61G [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/368 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/711 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/798k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/409 [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/119 [00:00<?, ?B/s]

Batch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

Saved predictions for batch 1 → predictions_batch_1.csv
Batch 1 accuracy: 0.738



Batch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

Saved predictions for batch 2 → predictions_batch_2.csv
Batch 2 accuracy: 0.744



Batch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

Saved predictions for batch 3 → predictions_batch_3.csv
Batch 3 accuracy: 0.728



Batch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

Saved predictions for batch 4 → predictions_batch_4.csv
Batch 4 accuracy: 0.735



Batch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

Saved predictions for batch 5 → predictions_batch_5.csv
Batch 5 accuracy: 0.732



Batch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

Saved predictions for batch 6 → predictions_batch_6.csv
Batch 6 accuracy: 0.759



Batch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

Saved predictions for batch 7 → predictions_batch_7.csv
Batch 7 accuracy: 0.742



Batch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

Saved predictions for batch 8 → predictions_batch_8.csv
Batch 8 accuracy: 0.751



Batch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

Saved predictions for batch 9 → predictions_batch_9.csv
Batch 9 accuracy: 0.765



Batch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

Saved predictions for batch 10 → predictions_batch_10.csv
Batch 10 accuracy: 0.735

