## Download files and images


In [None]:
!unzip /content/images_gpt4_evaluation_vindr.zip -d /content/images_validation_vindr

In [None]:
!pip install torchmetrics

In [None]:
import json

In [None]:
llava_result = '/content/eval_all_batches_vindr_formatted.json'

In [None]:
with open(llava_result, 'r') as f:
  data = json.load(f)

In [None]:
# data[0]

In [None]:
import json
import os

def extract_unique_labels(file):
    """
    Extract unique labels from all JSON files in the given folder.

    Args:
        json_folder (str): Path to the folder containing JSON files.

    Returns:
        list: A sorted list of unique labels across all JSON files.
    """
    unique_labels = set()

    with open(file, "r") as f:
        data = json.load(f)
        for record in data:
            if "labels" in record:
                unique_labels.update(record["labels"])
    return sorted(unique_labels)

In [None]:
json_file = "/content/eval_all_batches_vindr_formatted.json"
abnormality_list = extract_unique_labels(json_file)

In [None]:
len(abnormality_list)

In [None]:
abnormality_list = [i.lower() for i in abnormality_list]

In [None]:
abnormality_list

## Calculate mAP and IOU

In [None]:
from sklearn.metrics import precision_recall_fscore_support
import numpy as np
import re
from torchvision.ops import box_iou
import torch
import os
import re
from collections import OrderedDict
import json
from sklearn.metrics import precision_score, recall_score, f1_score
from collections import defaultdict

In [None]:
def calculate_iou(box1, box2):
    """Calculate IoU between two bounding boxes."""
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    intersection = max(0, x2 - x1) * max(0, y2 - y1)
    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union = area1 + area2 - intersection

    return intersection / union if union > 0 else 0.0

# Extract bounding boxes from output
def extract_bboxes(output, abnormality_list):
    """
    Extract abnormalities and their bounding boxes from the `output` field.

    Args:
        output (str): Text output containing abnormalities and bounding boxes.
        abnormality_list (list): List of valid abnormalities to match.

    Returns:
        dict: Dictionary mapping abnormalities to predicted bounding boxes.
    """
    anatomy_to_bbox = {}
    pattern = rf"({'|'.join(map(re.escape, abnormality_list))}).*?\[([\d\.,\s]+)\]"
    matches = re.findall(pattern, output, flags=re.IGNORECASE)

    for abnormality, bbox_coords in matches:
        bbox = list(map(float, bbox_coords.split(',')))
        anatomy_to_bbox[abnormality.lower()] = bbox
    return anatomy_to_bbox

def calculate_metrics_for_image(predictions, ground_truth, iou_threshold=0.5):
    """
    Calculate TP, FP, FN, and IoUs for a single image.
    Tracks IoUs for all matched bounding boxes (TP + FP).

    Args:
        predictions (dict): Predicted abnormalities and bounding boxes.
        ground_truth (dict): Ground truth abnormalities and bounding boxes.
        iou_threshold (float): IoU threshold for determining TP.

    Returns:
        dict: Per-class metrics including TP, FP, FN, and IoUs.
        list: List of IoUs for all matched bounding boxes (TP + FP).
    """
    metrics = {}
    matched_ious = []


    predictions = {k.lower(): v for k, v in predictions.items()}
    ground_truth = {k.lower(): v for k, v in ground_truth.items()}

    matched_gt = set()

    for pred_label, pred_bbox in predictions.items():
        if pred_label in ground_truth:
            gt_bbox = ground_truth[pred_label]
            iou = calculate_iou(pred_bbox, gt_bbox)


            matched_ious.append(iou)

            if iou >= iou_threshold:
                metrics.setdefault(pred_label, {"TP": 0, "FP": 0, "FN": 0, "IoU": []})
                metrics[pred_label]["TP"] += 1
                metrics[pred_label]["IoU"].append(iou)
                matched_gt.add(pred_label)
            else:
                metrics.setdefault(pred_label, {"TP": 0, "FP": 0, "FN": 0, "IoU": []})
                metrics[pred_label]["FP"] += 1
                metrics[pred_label]["IoU"].append(iou)
        else:
            metrics.setdefault(pred_label, {"TP": 0, "FP": 0, "FN": 0, "IoU": []})
            metrics[pred_label]["FP"] += 1

    for gt_label in ground_truth.keys():
        if gt_label not in matched_gt:
            metrics.setdefault(gt_label, {"TP": 0, "FP": 0, "FN": 0, "IoU": []})
            metrics[gt_label]["FN"] += 1

    return metrics, matched_ious


def calculate_global_metrics(data, abnormality_list, iou_threshold=0.5):
    global_metrics = {}
    total_tp, total_fp, total_fn = 0, 0, 0
    total_ious = []
    per_class_metrics = defaultdict(lambda: {"TP": 0, "FP": 0, "FN": 0, "IoU": []})

    for item in data:

        predictions = extract_bboxes(item["output"], abnormality_list)
        # print('predictions:',predictions)


        ground_truth = {label.lower(): box for label, box in zip(item["labels"], item["boxes"])}
        # print('ground_truth:',ground_truth)

        image_metrics, matched_ious = calculate_metrics_for_image(predictions, ground_truth, iou_threshold)

        total_ious.extend(matched_ious)

        for anatomy, metrics in image_metrics.items():
            per_class_metrics[anatomy]["TP"] += metrics["TP"]
            per_class_metrics[anatomy]["FP"] += metrics["FP"]
            per_class_metrics[anatomy]["FN"] += metrics["FN"]
            per_class_metrics[anatomy]["IoU"].extend(metrics["IoU"])

            total_tp += metrics["TP"]
            total_fp += metrics["FP"]
            total_fn += metrics["FN"]

    mAP = 0
    for anatomy, metrics in per_class_metrics.items():
        tp, fp, fn = metrics["TP"], metrics["FP"], metrics["FN"]
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        average_iou = sum(metrics["IoU"]) / len(metrics["IoU"]) if metrics["IoU"] else 0

        mAP += precision
        per_class_metrics[anatomy]["AP"] = precision
        per_class_metrics[anatomy]["AR"] = recall
        per_class_metrics[anatomy]["Average IoU"] = average_iou

    mAP /= len(per_class_metrics) if per_class_metrics else 0
    mAR = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0
    mean_iou = sum(total_ious) / len(total_ious) if total_ious else 0

    return {
        "mAP": mAP,
        "mAR": mAR,
        "Mean IoU": mean_iou,
        "Per-Class Metrics": per_class_metrics,
    }

def process_json_file(json_file_path, abnormality_list, iou_threshold=0.5):
    """
    Process a JSON file and compute metrics.

    Args:
        json_file_path (str): Path to the JSON file.
        abnormality_list (list): List of valid abnormalities.
        iou_threshold (float): IoU threshold for determining matches.

    Returns:
        dict: Metrics for the entire dataset.
    """
    with open(json_file_path, "r") as f:
        data = json.load(f)

    return calculate_global_metrics(data, abnormality_list, iou_threshold)
json_file_path = "/content/eval_all_batches_vindr_formatted.json"

results = process_json_file(json_file_path, abnormality_list)



In [None]:
results

## Visualize the output

In [None]:
data[0]


In [None]:
def index_image(image_path):
  for i in data:
    if i['img_path'] == image_path:
      return i

In [None]:
image = index_image('/cluster/dataset/medinfmk/ARGON/VinDr-CXR/test_jpg/5280b2f1ea5c084e2bd8f2fe9447ab84.jpg')
image

In [None]:
image = index_image('/cluster/dataset/medinfmk/ARGON/VinDr-CXR/test_jpg/1ca5675a7a3042ccb0334a42d14d145f.jpg')
image

In [None]:
image = index_image('/cluster/dataset/medinfmk/ARGON/VinDr-CXR/test_jpg/7a74f4e463b72f018838f26cf7aabdf2.jpg')
image

In [None]:
image = index_image('/cluster/dataset/medinfmk/ARGON/VinDr-CXR/test_jpg/552e53af89d7ae4c482cb935ac1542b7.jpg')
image

In [None]:
image = index_image('/cluster/dataset/medinfmk/ARGON/VinDr-CXR/test_jpg/b8756da75ca0ff37e9124e4480fa5c51.jpg')
image

In [None]:
image = index_image('/cluster/dataset/medinfmk/ARGON/VinDr-CXR/test_jpg/f5de7e513d2c4085e8b7fd6b5e5db57a.jpg')
image

In [None]:
7# pleural thickening
pleural_thickening = {'image_path': '/content/images_validation_vindr/5280b2f1ea5c084e2bd8f2fe9447ab84.jpg', 'predictions': [0.22, 0.14, 0.36, 0.18], 'ground_truth': [0.222, 0.109, 0.317, 0.207]}
no_finding = {'image_path': '/content/images_validation_vindr/1ca5675a7a3042ccb0334a42d14d145f.jpg', 'predictions': [0, 0, 0, 0], 'ground_truth': [0, 0, 0, 0]}
aortic_enlargement = {'image_path': '/content/images_validation_vindr/7a74f4e463b72f018838f26cf7aabdf2.jpg', 'predictions': [0.48, 0.22, 0.6, 0.36], 'ground_truth': [0.34, 0.59, 0.76, 0.74]}
pulmonary_fibrosis = {'image_path': '/content/images_validation_vindr/552e53af89d7ae4c482cb935ac1542b7.jpg', 'predictions': [0.14, 0.3, 0.32, 0.52], 'ground_truth': [0.105, 0.225, 0.278, 0.454]}
cardiomegaly = {'image_path': '/content/images_validation_vindr/b8756da75ca0ff37e9124e4480fa5c51.jpg', 'predictions': [0.42, 0.48, 0.76, 0.61], 'ground_truth': [0.347, 0.534, 0.756, 0.733]}
lung_capacity = {'image_path': '/content/images_validation_vindr/f5de7e513d2c4085e8b7fd6b5e5db57a.jpg', 'predictions': [0, 0, 0, 0], 'ground_truth': [0.111, 0.501, 0.17, 0.568]},

In [None]:
{'output': ' The analysis reveals a pleural thickening located at the coordinates [0.14, 0.79, 0.18, 0.84]; a pleural effusion located at the coordinates [0.14, 0.79, 0.18, 0.84]; a lung opacity located at the coordinates [0.14, 0.3, 0.32, 0.52]; a pulmonary fibrosis located at the coordinates [0.14, 0.3, 0.32, 0.52].',
 'instr': 'Are there identifiable abnormalities on this Chest X-ray? If so, where are they located?',
 'answer': 'Notably, I can identify a pleural thickening located at the coordinates [0.11, 0.24, 0.27, 0.29] and [0.43, 0.2, 0.58, 0.3]; a pulmonary fibrosis located at the coordinates [0.1, 0.23, 0.28, 0.45] and [0.41, 0.2, 0.58, 0.45].',
 'img_path': '/cluster/dataset/medinfmk/ARGON/VinDr-CXR/test_jpg/552e53af89d7ae4c482cb935ac1542b7.jpg',
 'img': '/cluster/dataset/medinfmk/ARGON/VinDr-CXR/test_jpg/552e53af89d7ae4c482cb935ac1542b7.jpg',
 'labels': ['Pleural thickening',
  'Pleural thickening',
  'Pulmonary fibrosis',
  'Pulmonary fibrosis'],
 'boxes': [[0.112, 0.241, 0.269, 0.292],
  [0.429, 0.203, 0.576, 0.297],
  [0.105, 0.225, 0.278, 0.454],
  [0.41, 0.198, 0.576, 0.451]]}

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image

sample = {
    "Sample_01": {
        "image_path": "/content/images_validation_vindr/5280b2f1ea5c084e2bd8f2fe9447ab84.jpg",
        "predictions": {
            "labels": ["Pleural thickening", "Pleural thickening", "Aortic enlargement", "Lung opacity", "Pulmonary fibrosis"],
            "boxes": [[0.22, 0.14, 0.36, 0.18], [0.5, 0.14, 0.61, 0.17], [0.41, 0.26, 0.5, 0.38], [0.22, 0.2, 0.36, 0.41], [0.22, 0.2, 0.36, 0.41]],
        },
        "ground_truth": {
            "labels": ["Pulmonary fibrosis", "Pulmonary fibrosis", "Atelectasis", "Pleural thickening", "Pleural thickening", "Pulmonary fibrosis"],
            "boxes": [[0.22, 0.316, 0.276, 0.382], [0.26, 0.193, 0.343, 0.305], [0.247, 0.109, 0.4, 0.313], [0.222, 0.109, 0.317, 0.207], [0.514, 0.101, 0.627, 0.154], [0.508, 0.141, 0.595, 0.293]],
        },
    },
    "Sample_02": {
        "image_path": "/content/images_validation_vindr/1ca5675a7a3042ccb0334a42d14d145f.jpg",
        "predictions": {
            "labels": [],
            "boxes": [],
        },
        "ground_truth": {
            "labels": [],
            "boxes": [],
        },
    },
    "Sample_03": {
        "image_path": "/content/images_validation_vindr/7a74f4e463b72f018838f26cf7aabdf2.jpg",
        "predictions": {
            "labels": ["Aortic enlargement", "Cardiomegaly"],
            "boxes": [[0.48, 0.22, 0.6, 0.36], [0.34, 0.59, 0.76, 0.74]],
        },
        "ground_truth": {
            "labels": ["Aortic enlargement", "Nodule/Mass"],
            "boxes": [[0.34, 0.59, 0.76, 0.74], [0.103, 0.588, 0.13, 0.626]],
        },
    },
    "Sample_04": {
        "image_path": "/content/images_validation_vindr/552e53af89d7ae4c482cb935ac1542b7.jpg",
        "predictions": {
            "labels": ["Pleural thickening", "Pleural effusion", "Lung opacity", "Pulmonary fibrosis"],
            "boxes": [[0.14, 0.79, 0.18, 0.84], [0.14, 0.79, 0.18, 0.84], [0.14, 0.3, 0.32, 0.52], [0.14, 0.3, 0.32, 0.52]],
        },
        "ground_truth": {
            "labels": ["Cardiomegaly", "Aortic enlargement", "ILD", "Lung Opacity"],
            "boxes": [[0.23, 0.55, 0.608, 0.775], [0.356, 0.249, 0.491, 0.448], [0.091, 0.275, 0.252, 0.655], [0.111, 0.501, 0.17, 0.568]],
        },
    },
    "Sample_05": {
        "image_path": "/content/images_validation_vindr/b8756da75ca0ff37e9124e4480fa5c51.jpg",
        "predictions": {
            "labels": ["Aortic enlargement", "Aortic enlargement"],
            "boxes": [[0.42, 0.48, 0.76, 0.61], [0.54, 0.28, 0.64, 0.39]],
        },
        "ground_truth": {
            "labels": ["Cardiomegaly", "Aortic enlargement", "Calcification", "Pleural effusion", "ILD", "ILD"],
            "boxes": [[0.347, 0.534, 0.756, 0.733], [0.465, 0.314, 0.652, 0.512], [0.552, 0.317, 0.632, 0.413], [0.172, 0.646, 0.201, 0.695], [0.236, 0.429, 0.41, 0.632], [0.374, 0.229, 0.465, 0.371]],
        },
    },
    "Sample_06": {
        "image_path": "/content/images_validation_vindr/f5de7e513d2c4085e8b7fd6b5e5db57a.jpg",
        "predictions": {
            "labels": ["Aortic enlargement", "Aortic enlargement"],
            "boxes": [[0.39, 0.24, 0.48, 0.36], [0.28, 0.59, 0.6, 0.72]],
        },
        "ground_truth": {
            "labels": ["Pleural thickening", "Pleural thickening", "Pulmonary fibrosis", "Pulmonary fibrosis"],
            "boxes": [[0.112, 0.241, 0.269, 0.292], [0.429, 0.203, 0.576, 0.297], [0.105, 0.225, 0.278, 0.454], [0.41, 0.198, 0.576, 0.451]],
        },
    },
}


In [None]:
def scale_bbox(box, img_width, img_height):
    """
    Scale normalized bounding box coordinates to the image dimensions.

    Args:
        box (list): Bounding box coordinates [x_min, y_min, x_max, y_max].
        img_width (int): Width of the image.
        img_height (int): Height of the image.

    Returns:
        list: Scaled bounding box coordinates.
    """
    return [
        box[0] * img_width,
        box[1] * img_height,
        (box[2] - box[0]) * img_width,
        (box[3] - box[1]) * img_height,
    ]

def plot_images_with_boxes(data, num_cols=3):
    """
    Plot images with predicted and ground truth bounding boxes.

    Args:
        data (dict): Dictionary containing image paths, predictions, and ground truths.
        num_cols (int): Number of images per row.
    """
    num_images = len(data)
    num_rows = (num_images + num_cols - 1) // num_cols
    fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 5 * num_rows))
    axes = axes.flatten()

    for idx, (key, value) in enumerate(data.items()):
        image_path = value["image_path"]
        predictions = value["predictions"]
        ground_truth = value["ground_truth"]

        try:
            img = Image.open(image_path).convert("RGB")
            img_width, img_height = img.size
        except FileNotFoundError:
            print(f"Image not found: {image_path}")
            continue

        ax = axes[idx]
        ax.imshow(img, cmap="gray")
        ax.set_title(key.replace("_", " ").title())
        ax.axis("off")

        for i, (label, box) in enumerate(zip(predictions["labels"], predictions["boxes"])):
            scaled_box = scale_bbox(box, img_width, img_height)
            rect = patches.Rectangle(
                (scaled_box[0], scaled_box[1]), scaled_box[2], scaled_box[3],
                linewidth=2, edgecolor="red", facecolor="none"
            )
            ax.add_patch(rect)
            ax.text(
                scaled_box[0], scaled_box[1] - 10 - i * 10,
                label, color="red", fontsize=8, bbox=dict(facecolor="white", alpha=0.5, pad=0.4,edgecolor='none')
            )

        for i, (label, box) in enumerate(zip(ground_truth["labels"], ground_truth["boxes"])):
            scaled_box = scale_bbox(box, img_width, img_height)
            rect = patches.Rectangle(
                (scaled_box[0], scaled_box[1]), scaled_box[2], scaled_box[3],
                linewidth=2, edgecolor="green", facecolor="none"
            )
            ax.add_patch(rect)
            ax.text(
                scaled_box[0], scaled_box[1] - 10 - i * 10,
                label, color="green", fontsize=8, bbox=dict(facecolor="white", alpha=0.5, pad=0.4, edgecolor='none')
            )

    for ax in axes[num_images:]:
        ax.axis("off")

    plt.tight_layout()
    plt.savefig("vindr_sample_images.pdf")
    plt.show()


In [None]:
plot_images_with_boxes(sample)

In [None]:
yolo_abnormal = {
    'average': 0.329,
    'Aortic enlargement': 0.813,
    'Atelectasis': 0.412,
    'Calcification': 0.149,
    'Cardiomegaly': 0.844,
    'Clavicle fracture': 0.0469,
    'Consolidation': 0.371,
    'Edema': 0.0,
    'Emphysema': 0.0958,
    'Enlarged PA': 0.41,
    'ILD': 0.56,
    'Infiltration': 0.519,
    'Lung Opacity': 0.41,
    'Lung cavity': 0.0114,
    'Lung cyst': 0.209,
    'Mediastinal shift': 0.541,
    'Nodule/Mass': 0.123,
    'Other lesion': 0.16,
    'Pleural effusion': 0.503,
    'Pleural thickening': 0.256,
    'Pneumothorax': 0.41,
    'Pulmonary fibrosis': 0.315,
    'Rib fracture': 0.0847
}

In [None]:
yolo = {
    'average': 0.0965,
    'Aortic enlargement': 0.489,
    'Atelectasis': 0.0237,
    'Calcification': 0.00727,
    'Cardiomegaly': 0.62,
    'Consolidation': 0.087,
    'Edema': 0.0,
    'Emphysema': 0.0,
    'Enlarged PA': 0.0167,
    'ILD': 0.139,
    'Infiltration': 0.166,
    'Lung Opacity': 0.0407,
    'Lung cavity': 0.0,
    'Lung cyst': 0.0,
    'Mediastinal shift': 0.0152,
    'Nodule/Mass': 0.00644,
    'Other lesion': 0.00159,
    'Pleural effusion': 0.173,
    'Pleural thickening': 0.0334,
    'Pneumothorax': 0.0891,
    'Pulmonary fibrosis': 0.118,
    'Rib fracture': 0.0
}


In [None]:
llava_results = {
    'average': 0.0083,
    'Aortic enlargement': 0.016,
    'Atelectasis': 0.0,
    'Calcification': 0.0,
    'Cardiomegaly': 0.142,
    'Consolidation': 0.00,
    'Edema': 0.0,
    'Emphysema': 0.0,
    'Enlarged PA': 0.00,
    'ILD': 0.0,
    'Infiltration': 0.0,
    'Lung Opacity': 0.0,
    'Lung cavity': 0.0,
    'Lung cyst': 0.0,
    'Mediastinal shift': 0.0,
    'Nodule/Mass': 0.0,
    'Other lesion': 0.0,
    'Pleural effusion': 0.0,
    'Pleural thickening': 0.0,
    'Pneumothorax': 0.0,
    'Pulmonary fibrosis': 0.018,
    'Rib fracture': 0.0
}

In [None]:
import matplotlib.pyplot as plt
import numpy as np


common_regions = set(llava_results.keys()) & set(yolo.keys()) & set(yolo_abnormal.keys())
common_regions.discard("average")

sorted_regions = sorted(common_regions, key=lambda x: llava_results[x])
sorted_regions.append("average")

llava_results_ious = [llava_results[region] for region in sorted_regions]
yolo_ious = [yolo[region] for region in sorted_regions]
yolo_abnormal_ious = [yolo_abnormal[region] for region in sorted_regions]

plt.figure(figsize=(8, 6))
y_positions = np.arange(len(sorted_regions))

plt.plot(llava_results_ious, y_positions, '*', color='#fa7f6f', label='LLaVA-Detect')
plt.plot(yolo_ious, y_positions, '^', color='#2f7fc1', label='YOLOv5')
plt.plot(yolo_abnormal_ious, y_positions, 's', color='#ffbe7a', label='YOLOv5-Abnormal')

plt.yticks(y_positions, [region.lower() for region in sorted_regions])
plt.xlabel("Average Precision", fontsize=12)
plt.ylabel("Abnormality", fontsize=12)
plt.legend(loc="lower right", fontsize=10)
plt.grid(True, linestyle='--', alpha=0.7)
plt.xlim(0.0, 1.0)

plt.text(llava_results_ious[-1], y_positions[-1], f"{llava_results_ious[-1]:.3f}", color='#fa7f6f', ha='left', va='bottom', fontsize=8)
plt.text(yolo_ious[-1], y_positions[-1], f"{yolo_ious[-1]:.3f}", color='#2f7fc1', ha='left', va='bottom', fontsize=8)
plt.text(yolo_abnormal_ious[-1], y_positions[-1], f"{yolo_abnormal_ious[-1]:.3f}", color='#ffbe7a', ha='left', va='bottom', fontsize=8)

plt.tight_layout()
plt.savefig('vindr_abnormality_detection_precision.pdf', format='pdf', bbox_inches='tight')
plt.show()
