In [1]:
from transformers import AutoModelForCausalLM, AutoProcessor
from pathlib import Path
import torch
from tqdm import tqdm
import pandas as pd
import glob


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
local_model_path = Path("maira-2")

model = AutoModelForCausalLM.from_pretrained(local_model_path, trust_remote_code=True)
processor = AutoProcessor.from_pretrained(local_model_path, trust_remote_code=True)

device = torch.device("cuda")
model = model.eval()
model = model.to(device)


Loading checkpoint shards: 100%|██████████████████| 6/6 [00:06<00:00,  1.13s/it]


RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx

# PTX

In [None]:
root_dir = Path('/well/papiez/users/hri611/python/foundation-models-radiology')

jpg_paths = glob.glob(str(root_dir / 'PTX Head to Head Study Data' / '**/*.jpg'), recursive=True)

In [None]:
predictions = []
grounded_predictions = []

for image_path in tqdm(jpg_paths):
    img = Image.open(image_path)

    # non-grounded report
    processed_inputs = processor.format_and_preprocess_reporting_input(
        current_frontal=img,
        current_lateral=None,
        prior_frontal=None,  # Our example has no prior
        indication=None,
        technique='PA view of the chest',
        comparison=None,
        prior_report=None,  # Our example has no prior
        return_tensors="pt",
        get_grounding=False,  # For this example we generate a non-grounded report
    )

    processed_inputs = processed_inputs.to(device)
    with torch.no_grad():
        output_decoding = model.generate(
            **processed_inputs,
            max_new_tokens=300,  # Set to 450 for grounded reporting
            use_cache=True,
        )
    prompt_length = processed_inputs["input_ids"].shape[-1]
    decoded_text = processor.decode(output_decoding[0][prompt_length:], skip_special_tokens=True)
    decoded_text = decoded_text.lstrip()  # Findings generation completions have a single leading space
    prediction = processor.convert_output_to_plaintext_or_grounded_sequence(decoded_text)

    # grounded report
    processed_inputs = processor.format_and_preprocess_phrase_grounding_input(
        frontal_image= img,
        phrase='Pneumothorax',
        return_tensors="pt",
    )

    processed_inputs = processed_inputs.to(device)
    with torch.no_grad():
        output_decoding = model.generate(
            **processed_inputs,
            max_new_tokens=150,
            use_cache=True,
        )
    prompt_length = processed_inputs["input_ids"].shape[-1]
    decoded_text = processor.decode(output_decoding[0][prompt_length:], skip_special_tokens=True)

    try: # some errors for images because of decoded_text output
        grounded_prediction = processor.convert_output_to_plaintext_or_grounded_sequence(decoded_text)

        # adjust bbox coordinates for image size
        for i in range(len(grounded_prediction)):
            if grounded_prediction[i][1] is None:
                continue
            else:
                coords = grounded_prediction[i][1][0]
                adjusted_coords = processor.adjust_box_for_original_image_size(coords, width = img.size[0], height = img.size[1])
                grounded_prediction[i][1][0] = adjusted_coords

    except Exception as e:
        print(f"Error processing image {image_path}: {e}")
        grounded_prediction = None

    predictions.append(prediction)
    grounded_predictions.append(grounded_prediction)

maira_results_df = pd.DataFrame()
maira_results_df['image_path'] = jpg_paths
maira_results_df['prediction'] = predictions
maira_results_df['grounded_prediction'] = grounded_predictions

maira_results_df['image_path'] = maira_results_df['image_path'].apply(lambda x: x.replace('/well/papiez/users/hri611/python/foundation-models-radiology/PTX Head to Head Study Data/', ''))

maira_results_df.to_csv('ptx_maira_scores.csv', index=False)


 11%|████████▏                                                                  | 45/413 [07:31<1:18:20, 12.77s/it]

Error processing image /well/papiez/users/hri611/python/foundation-models-radiology/PTX Head to Head Study Data/641c8fb92052b371c8ae07739b3e3165/8085bd0484a67696e4ea94210be0af57/0d485bfdf8efe60bce38fa2e0512f1bc/7674a6468f2b10c10dba634e1d5a3524.jpg: 


 28%|█████████████████████▎                                                      | 116/413 [18:06<41:09,  8.32s/it]

Error processing image /well/papiez/users/hri611/python/foundation-models-radiology/PTX Head to Head Study Data/11739350140b85317fa7e1583f1c8b05/5c24a3bca2a5c1e00df087ace2743b37/e9224f78ba18d6eb80121bd49b5df0c6/7eb5a9238462b0910dac0a400301c7e1.jpg: 


 73%|███████████████████████████████████████████████████████▏                    | 300/413 [48:29<24:02, 12.76s/it]

Error processing image /well/papiez/users/hri611/python/foundation-models-radiology/PTX Head to Head Study Data/2aa4e51b9bbd537ee166b9b02716ab21/085e8ec77cac84257529ba7796ce552b/5cafb839b2b0a2c8c9fcc9e64e2857a2/c6a0076ae7bdc9184285740b16573c96.jpg: 


100%|██████████████████████████████████████████████████████████████████████████| 413/413 [1:06:49<00:00,  9.71s/it]


# FTX

In [None]:
root_dir = Path('/well/papiez/users/hri611/python/foundation-models-radiology')

dicom_paths = glob.glob(str(root_dir / 'H2H Study - OUH Cases.nosync' / '**/*.jpg'), recursive=True)


In [None]:
predictions = []
grounded_predictions = []

for image_path in tqdm(jpg_paths):
    img = Image.open(image_path)

    # non-grounded report
    processed_inputs = processor.format_and_preprocess_reporting_input(
        current_frontal=img,
        current_lateral=None,
        prior_frontal=None,  # Our example has no prior
        indication=None,
        technique='X-ray',
        comparison=None,
        prior_report=None,  # Our example has no prior
        return_tensors="pt",
        get_grounding=False,  # For this example we generate a non-grounded report
    )

    processed_inputs = processed_inputs.to(device)
    with torch.no_grad():
        output_decoding = model.generate(
            **processed_inputs,
            max_new_tokens=300,  # Set to 450 for grounded reporting
            use_cache=True,
        )
    prompt_length = processed_inputs["input_ids"].shape[-1]
    decoded_text = processor.decode(output_decoding[0][prompt_length:], skip_special_tokens=True)
    decoded_text = decoded_text.lstrip()  # Findings generation completions have a single leading space
    prediction = processor.convert_output_to_plaintext_or_grounded_sequence(decoded_text)

    # grounded report
    processed_inputs = processor.format_and_preprocess_phrase_grounding_input(
        frontal_image= img,
        phrase='Fracture',
        return_tensors="pt",
    )

    processed_inputs = processed_inputs.to(device)
    with torch.no_grad():
        output_decoding = model.generate(
            **processed_inputs,
            max_new_tokens=150,
            use_cache=True,
        )
    prompt_length = processed_inputs["input_ids"].shape[-1]
    decoded_text = processor.decode(output_decoding[0][prompt_length:], skip_special_tokens=True)

    try: # some errors for images because of decoded_text output
        grounded_prediction = processor.convert_output_to_plaintext_or_grounded_sequence(decoded_text)

        # adjust bbox coordinates for image size
        for i in range(len(grounded_prediction)):
            if grounded_prediction[i][1] is None:
                continue
            else:
                coords = grounded_prediction[i][1][0]
                adjusted_coords = processor.adjust_box_for_original_image_size(coords, width = img.size[0], height = img.size[1])
                grounded_prediction[i][1][0] = adjusted_coords

    except Exception as e:
        print(f"Error processing image {image_path}: {e}")
        grounded_prediction = None

    predictions.append(prediction)
    grounded_predictions.append(grounded_prediction)

maira_results_df = pd.DataFrame()
maira_results_df['image_path'] = jpg_paths
maira_results_df['prediction'] = predictions
maira_results_df['grounded_prediction'] = grounded_predictions

maira_results_df['image_path'] = maira_results_df['image_path'].apply(lambda x: x.replace('/well/papiez/users/hri611/python/foundation-models-radiology/PTX Head to Head Study Data/', ''))

maira_results_df.to_csv('ftx_maira_scores.csv', index=False)
