In [None]:
import os
import sys

def safe_pip_install(package):
    try:
        # Detect Colab
        in_colab = 'google.colab' in sys.modules

        # Use shell-style install in Colab
        if in_colab:
            print(f"Installing {package} in Colab...")
            # Use !pip or %pip to avoid subprocess errors
            get_ipython().system(f"pip install {package}")
        else:
            print(f"Installing {package} in standard environment...")
            # Use subprocess for non-Colab environments
            import subprocess
            subprocess.check_call([sys.executable, "-m", "pip", "install", package])
    except Exception as e:
        print(f"Failed to install {package}: {e}")

In [None]:
import os
import torch

def detect_environment():
    # Check for GPU
    has_gpu = torch.cuda.is_available()

    # Check for Colab-specific environment
    is_colab = 'COLAB_GPU' in os.environ or 'google.colab' in str(get_ipython())

    # Check for RunPod-specific environment
    is_runpod = 'RUNPOD_POD_ID' in os.environ or os.path.exists('/workspace')

    if is_runpod and has_gpu:
        return 'runpod'
    elif is_colab and not has_gpu:
        return 'colab'
    else:
        return 'unknown'

#check the environement
env = detect_environment()

if env == 'runpod':
    print("Running on RunPod with GPU — using full model.")
    # Load full CheXagent, enable CUDA, etc.
    #GPU update:
    safe_pip_install("transformers==4.40.0")
    safe_pip_install("torch")
    safe_pip_install("torchvision")
    safe_pip_install("pydicom")
    safe_pip_install("opencv-python")
    safe_pip_install("Pillow")
    safe_pip_install("accelerate")
elif env == 'colab':
    print("Running on Colab with CPU — using lightweight fallback.")
    safe_pip_install("transformers==4.40.0")
    safe_pip_install("transformers==4.40.0")
    safe_pip_install("torch")
    safe_pip_install("torchvision")
    safe_pip_install("pydicom")
    safe_pip_install("opencv-python")
    safe_pip_install("Pillow")
    safe_pip_install("accelerate")
else:
    print("Unknown environment — defaulting to safe config.")


In [None]:
# Install dependencies
#Imports
import os, glob
import torch
import pydicom
import cv2
from PIL import Image
from pathlib import Path
from transformers import AutoTokenizer, AutoModelForCausalLM



In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "StanfordAIMI/CheXagent-2-3b"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    trust_remote_code=True,
    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
    low_cpu_mem_usage=True
)
##device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
###########seems to thin that calling the if torch.cudo.is_available() as a param should work!!
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define query
question = "Does this chest X-ray show a pneumothorax?"


In [None]:
for name, param in model.named_parameters():
    if param.device.type == "meta":
        print(f"{name} is on the meta device")

In [None]:
from pathlib import Path

#check the environement
env = detect_environment()
if env == 'runpod':
  ##rclone sync gdrive:/MyDrive/MLProjects/foundation-models-radiology /workspace/MLProjects/foundation-models-radiology
  ROOT = Path('/workspace/MLProjects/foundation-models-radiology')
elif env == 'colab':
  from google.colab import drive
  drive.mount('/content/drive')
  ###once mounted the folders can be referenced
  ROOT = Path('/content/drive/MyDrive/MLProjects/foundation-models-radiology')

else:
  sys.exit("Error: No platform recognised")

DICOM_DIR = ROOT / 'PTXHeadtoHeadSmall'   # use the exact folder name as on Drive
JPEG_DIR = ROOT / 'cxr_jpegs'
JPEG_DIR.mkdir(exist_ok=True)
print("exists:", ROOT.exists())


In [None]:
#Set paths

#ROOT = Path("/content/drive/MyDrive")  # adjust if needed
#DICOM_DIR = ROOT / "PTXHeadtoHeadSmall"

#Replace with: direct file access or use rclone to sync your Google Drive into the pod’s local filesystem.
#RunPod doesn’t support drive.mount().
#GPU update:
#ROOT = Path('/workspace/MLProjects/foundation-models-radiology')
#JPEG_DIR = ROOT / 'cxr_jpegs'

In [None]:
# Convert DICOMs to JPEGs
def dicom_to_jpeg(dicom_path, jpeg_path):
    ds = pydicom.dcmread(str(dicom_path))
    img = ds.pixel_array
    img = cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX).astype('uint8')
    img = cv2.equalizeHist(img)  # optional contrast enhancement
    cv2.imwrite(str(jpeg_path), img)

# Batch convert
#dicom_paths = list(DICOM_DIR.rglob("*.dcm"))
# find DICOMs (case-insensitive .dcm)
dicom_paths = list(DICOM_DIR.rglob('*.[dD][cC][mM]'))
print("count:", len(dicom_paths))
print("samples:", dicom_paths[:5])

jpeg_paths = []
for dcm_path in dicom_paths:
    jpg_path = JPEG_DIR / f"{dcm_path.stem}.jpg"
    dicom_to_jpeg(dcm_path, jpg_path)
    jpeg_paths.append(str(jpg_path))


In [None]:
from PIL import Image
def ask_chexagent(image_path, question):
    try:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        query = tokenizer.from_list_format([
            {"image": str(image_path)},
            {"text": question}
        ])
        conversation = [
            {"from": "system", "value": "You are a helpful assistant."},
            {"from": "human",  "value": query}
        ]
        # Returns a tensor, not a dict
        input_ids = tokenizer.apply_chat_template(
            conversation, add_generation_prompt=True, return_tensors="pt"
        ).to(model.device)
        # Attention mask (optional; model can infer it, but this is safe)
        attention_mask = input_ids.ne(tokenizer.pad_token_id) if tokenizer.pad_token_id is not None else None

        output = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,  # ← this line
            do_sample=False,
            num_beams=1,
            temperature=1.0,
            top_p=1.0,
            use_cache=True,
            max_new_tokens=512
        )[0]

        response = tokenizer.decode(output, skip_special_tokens=True)
        return response.strip()

    except Exception as e:
        return f" Error processing {image_path}: {e}"


In [None]:
# Run on all images and collect responses
import csv

results = []
for path in jpeg_paths:
    answer = ask_chexagent(path, question)
    results.append((path, answer))
    print(f" {Path(path).name} → {answer}")

with open("chexagent_results.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(["Image", "Answer"])
    writer.writerows(results)
