In [None]:
!pip install transformers==4.40.0


In [None]:
# Install dependencies
!pip install -q torch torchvision pydicom opencv-python Pillow accelerate

#Imports
import os, glob
import torch
import pydicom
import cv2
from PIL import Image
from pathlib import Path
from transformers import AutoTokenizer, AutoModelForCausalLM



In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "StanfordAIMI/CheXagent-2-3b"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    trust_remote_code=True,
    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
    low_cpu_mem_usage=True
)
###########seems to thin that calling the if torch.cudo.is_available() as a param should work!!
model.eval()
# Define query
question = "Does this chest X-ray show a pneumothorax?"


In [None]:
for name, param in model.named_parameters():
    if param.device.type == "meta":
        print(f"{name} is on the meta device")

In [None]:
# attach the colab project to teh files
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#Set paths
from pathlib import Path
ROOT = Path('/content/drive/MyDrive/MLProjects/foundation-models-radiology')
DICOM_DIR = ROOT / 'PTXHeadtoHeadSmall'   # use the exact folder name as on Drive
print("exists:", ROOT.exists())

#ROOT = Path("/content/drive/MyDrive")  # adjust if needed
#DICOM_DIR = ROOT / "PTXHeadtoHeadSmall"
JPEG_DIR = Path("/content/drive/MyDrive/MLProjects/foundation-models-radiology/cxr_jpegs")
JPEG_DIR.mkdir(exist_ok=True)


In [None]:
# Convert DICOMs to JPEGs
def dicom_to_jpeg(dicom_path, jpeg_path):
    ds = pydicom.dcmread(str(dicom_path))
    img = ds.pixel_array
    img = cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX).astype('uint8')
    img = cv2.equalizeHist(img)  # optional contrast enhancement
    cv2.imwrite(str(jpeg_path), img)

# Batch convert
#dicom_paths = list(DICOM_DIR.rglob("*.dcm"))
# find DICOMs (case-insensitive .dcm)
dicom_paths = list(DICOM_DIR.rglob('*.[dD][cC][mM]'))
print("count:", len(dicom_paths))
print("samples:", dicom_paths[:5])

jpeg_paths = []
for dcm_path in dicom_paths:
    jpg_path = JPEG_DIR / f"{dcm_path.stem}.jpg"
    dicom_to_jpeg(dcm_path, jpg_path)
    jpeg_paths.append(str(jpg_path))


In [None]:
# Load CheXagent-2-3b


In [None]:
# Run inference on each image
def ask_chexagent(image_path, question):
    query = tokenizer.from_list_format([
        {"image": image_path},
        {"text": question}
    ])
    conversation = [
        {"from": "system", "value": "You are a helpful assistant."},
        {"from": "human", "value": query}
    ]
    input_ids = tokenizer.apply_chat_template(
        conversation,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(device)

    output = model.generate(
        input_ids,
        do_sample=False,
        num_beams=1,
        temperature=1.0,
        top_p=1.0,
        use_cache=True,
        max_new_tokens=128
    )[0]

    response = tokenizer.decode(output[input_ids.size(1):-1])
    return response.strip()

In [None]:
# Run on all images and collect responses
results = []
for path in jpeg_paths:
    answer = ask_chexagent(path, question)
    results.append((path, answer))
    print(f" {Path(path).name} → {answer}")
