# Assessment of Large Vision-Language Models for Radiological Image Analysis

Code accompanying this paper:

*Strotzer QD, Nieberle F, Kupke LS, Napodano G, Muertz A, Meiler S, Einspieler I, Rennert J, Strotzer M, Wiesinger I, Wendl C, Stroszczynski C, Hamer O, Schicho A (2024). Toward Foundation Models in Radiology? Quantitative Assessment of GPT-4V’s Multimodal and Multianatomic Region Capabilities. Radiology (in press)*

## Prerequisites

In [None]:
from pathlib import Path
import os
import glob
import numpy as np
import cv2
from matplotlib import pyplot as plt
from tqdm import tqdm
import pandas as pd
import base64
import openai
import re

In [None]:
SOURCE_PATH = Path("PATH/TO/RAW/IMAGES")
RESULTS_PATH = Path("PATH/TO/RESULTS/FOLDER")

files = glob.glob(os.path.join(SOURCE_PATH, "**"), recursive=True)
print(len(files))

## Preprocess Images

### Crop

In [None]:
def crop_to_aspect_ratio(img):
    """
    Crops an image to a target aspect ratio of 4:3 (1.33) while maintaining the image's center.

    Args:
    -----
    img : numpy.ndarray
        The input image array. Can be a 2D grayscale image or a 3D image with multiple channels.

    Returns:
    -----
    numpy.ndarray
        The cropped image array with an aspect ratio of 4:3, or the original image if no cropping is needed.
    """

    width = img.shape[1]
    height = img.shape[0]

    aspect_ratio = max(width, height) / min(width, height)

    if aspect_ratio > 1.33:

        if width > height:
            new_width = int(height * 1.33)
            left = int(np.ceil((width - new_width) / 2))
            top = 0
            right = left + new_width
            bottom = height
        else:
            new_height = int(width * 1.33)
            left = 0
            top = int(np.ceil((height - new_height) / 2))
            right = width
            bottom = top + new_height

        if len(img.shape) == 2:
            cropped_img = img[top:bottom, left:right]
        else:
            cropped_img = img[top:bottom, left:right, ...]
    else:
        cropped_img = img

    return cropped_img

### Resize

In [None]:
def resize_image(img):
    """
    Resize an image to a maximum dimension of 768 pixels while preserving the aspect ratio.

    Args:
    -----
    img : numpy.ndarray
        The input image as a NumPy array.

    Returns:
    -----
    numpy.ndarray
        The resized image with the new dimensions.
    """

    height, width = img.shape[:2]

    if width < height:
        new_width = 768
        scale_ratio = new_width / width
        new_height = int(height * scale_ratio)
    else:
        new_height = 768
        scale_ratio = new_height / height
        new_width = int(width * scale_ratio)

    resized_img = cv2.resize(
        img,
        (new_width, new_height),
        interpolation=cv2.INTER_AREA,
    )

    return resized_img

### Normalize

In [None]:
def z_score_normalize_and_rescale(image):
    """
    Perform Z-score normalization on a grayscale image and rescale it to 0-255.

    Args:
    -----
    image : numpy.ndarray
        A 2D numpy array representing a grayscale image.

    Returns:
    -----
    numpy.ndarray
        A 2D numpy array of the rescaled normalized image.
    """

    image = image.astype(np.float32)

    mean = np.mean(image)
    std = np.std(image)

    normalized_image = (image - mean) / std

    min_val = normalized_image.min()
    max_val = normalized_image.max()
    rescaled_image = 255 * (normalized_image - min_val) / (max_val - min_val)

    rescaled_image = rescaled_image.astype(np.uint8)

    return rescaled_image

In [None]:
def img_stats_and_show(img):
    """
    Helper function for testing.

    """

    print("type", type(img))
    print("shape", img.shape)
    print("min", np.min(img))
    print("max", np.max(img))
    print("mean", np.mean(img))
    print("std", np.std(img))

    plt.imshow(img, cmap="Greys_r")
    plt.show()

    plt.hist(img, bins='auto')
    plt.show()

### Process all Files

In [None]:
for file in tqdm(files):
    img = cv2.imread(str(file), 0)
    
    cropped_img = crop_to_aspect_ratio(img)

    resized_img = resize_image(cropped_img)

    normalized_img = z_score_normalize_and_rescale(resized_img)

    cv2.imwrite(os.path.join(RESULTS_PATH, f"{Path(file).stem}.png"), normalized_img)

## API
- https://platform.openai.com/docs/guides/vision

### Prerequisites

In [None]:
API_KEY = "PLACE_YOUR_API_KEY_HERE"

CLIENT = openai.OpenAI(api_key=API_KEY)

SYSTEM_PROMPT = "PLACE_SYSTEM_PROMPT_HERE"

MODEL_NAME = "MODEL_NAME"

files = glob.glob(os.path.join(RESULTS_PATH, "**", "*.png"), recursive=True)
print(len(files), "Image files found in", RESULTS_PATH)

os.makedirs(os.path.join(RESULTS_PATH, "model_output"), exist_ok=True)

def encode_image(image_path):
    """
    Encodes an image file to a base64-encoded string to be appended to the prompt.

    Args:
    -----
    image_path : str
        The file path of the image to be encoded.

    Returns:
    -----
    str
        A base64-encoded string representation of the image.
    """

    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

In [None]:
def create_GPT_response(img_path):
    """
    Creates a GPT response using a provided image path.

    Args:
    -----
    img_path : str
        The file path of the image to be encoded and sent to the GPT model.

    Returns:
    -----
    dict
        The GPT model's response as a dictionary.

    Notes:
    -----
        - Replace "PROMPT" with the desired text prompt for the GPT model.
        - Set "SET_NUMBER_OF_MAX_TOKENS_HERE" to specify the maximum tokens for the response.
        - Set "SET_TEMPERATURE_HERE" to control the randomness of the output.
        - Consider upgrading to the newer "gpt-4o" model as the gpt-4-vision-preview will soon be deprecated.
    """
    base64_image = encode_image(img_path)

    response = CLIENT.chat.completions.create(
        model="gpt-4-vision-preview",  # soon to be deprecated. Newer model: "gpt-4o"
        messages=[
            {
                "role": "system",
                "content": SYSTEM_PROMPT,
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/png;base64,{base64_image}",
                            "detail": "high",
                        },
                    },
                ],
            },
        ],
        stream=False,
        max_tokens="SET_NUMBER_OF_MAX_TOKENS_HERE",
        temperature="SET_TEMPERATURE_HERE",
    )

    return response

In [None]:
for file in tqdm(files): 
    if not os.path.isfile(
        os.path.join(RESULTS_PATH, "model_output", f"{MODEL_NAME}_{Path(file).stem}.txt")
    ):
        try:
            resp = create_GPT_response(file)

            final_string = (
                resp.choices[0].message.content
                + ";"
                + "\n"
                + "prompt_tokens: "
                + str(resp.usage.prompt_tokens)
                + ";"
                + "\n"
                + "response_tokens: "
                + str(resp.usage.completion_tokens)
                + ";"
                + "\n"
                + "total_tokens: "
                + str(resp.usage.total_tokens)
                + ";"
            )

            with open(
                os.path.join(RESULTS_PATH, "model_output", f"GPT4-V_{Path(file).stem}.txt"),
                "w",
            ) as textfile:
                textfile.write(final_string)

        except openai.error.BadRequestError as e:
            print(file, e)

    else:
        print("Report already exists for", file)
        pass

##### Create Results Table

In [None]:
res_table = pd.read_excel(
    os.path.join(
        SOURCE_PATH, "EXCEL_TABLE_CONTAINING_IDs.xlsx"
    ),
    header=0,
    index_col=0,
)

txtfiles = glob.glob(
    os.path.join(RESULTS_PATH, "model_output", "*.txt"), recursive=True
)

case_ids = res_table.index.to_list()

for file in txtfiles:
    data = Path(file).read_text()

    case_id = str(Path(file).stem).replace(MODEL_NAME + "_", "")

    if case_id in case_ids:
        res_table.loc[case_id, "Output_text"] = data

        try:
            prompt_tokens = re.findall(r"prompt_tokens\s*:\s*(\d+);", data)
            res_table.loc[case_id, "prompt_tokens"] = int(prompt_tokens[0])

            prompt_tokens = re.findall(r"response_tokens\s*:\s*(\d+);", data)
            res_table.loc[case_id, "response_tokens"] = int(prompt_tokens[0])

            prompt_tokens = re.findall(r"total_tokens\s*:\s*(\d+);", data)
            res_table.loc[case_id, "total_tokens"] = int(prompt_tokens[0])

        except Exception as e:
            print(e, file)
            pass


res_table.to_excel(
    os.path.join(
        RESULTS_PATH, "OUTPUT_TABLE_NAME.xlsx"
    )
)