## Weighted Box Fusion

- Use this to remove overlapping bounding boxes
- https://www.kaggle.com/c/vinbigdata-chest-xray-abnormalities-detection/discussion/208468
- Input: .csv file needed to be processed
- Output: .csv file with non-overlapping boxes

In [None]:
import numpy as np
import pandas as pd

from tqdm import tqdm
from ensemble_boxes import weighted_boxes_fusion

%matplotlib inline
from matplotlib import pyplot as plt

In [None]:
IOU_THRESH      = 0.5
SKIP_THRESH     = 0.0001

CSV_TRAIN           = './train.csv'
CSV_TRAIN_OUTNAME   = './train_512_wbf.csv'

In [None]:
df = pd.read_csv(CSV_TRAIN)
df.head(5)

In [None]:
obj_list = [
    'Aortic enlargement', 
    'Atelectasis', 
    'Calcification',
    'Cardiomegaly',
    'Consolidation',
    'ILD',
    'Infiltration',
    'Lung Opacity',
    'Nodule/Mass',
    'Other lesion',
    'Pleural effusion',
    'Pleural thickening',
    'Pneumothorax',
    'Pulmonary fibrosis',
  ]

In [None]:
df = df[df["class_id"] != 14]

results = []
image_ids = df["image_id"].unique()

for image_id in tqdm(image_ids, total=len(image_ids)):

    # All annotations for the current image.
    data = df[df["image_id"] == image_id]
    data = data.reset_index(drop=True)

    width = data['width'][0]
    height = data['height'][0]
    

    annotations = {}
    weights = []

    # WBF expects the coordinates in 0-1 range.
    max_value = data.iloc[:, 4:].values.max()
    data.loc[:, ["x_min", "y_min", "x_max", "y_max"]] = data.iloc[:, 4:] / max_value

    # Loop through all of the annotations
    for idx, row in data.iterrows():

        rad_id = row["rad_id"]

        if rad_id not in annotations:
            annotations[rad_id] = {
                "boxes_list": [],
                "scores_list": [],
                "labels_list": [],
            }

            # We consider all of the radiologists as equal.
            weights.append(1.0) 

        annotations[rad_id]["boxes_list"].append(
            [row["x_min"], row["y_min"], row["x_max"], row["y_max"]]
        )
        annotations[rad_id]["scores_list"].append(1.0)
        annotations[rad_id]["labels_list"].append(row["class_id"])

    boxes_list = []
    scores_list = []
    labels_list = []

    for annotator in annotations.keys():
        boxes_list.append(annotations[annotator]["boxes_list"])
        scores_list.append(annotations[annotator]["scores_list"])
        labels_list.append(annotations[annotator]["labels_list"])

    # Calculate WBF
    boxes, scores, labels = weighted_boxes_fusion(
        boxes_list,
        scores_list,
        labels_list,
        weights=weights,
        iou_thr=IOU_THRESH,
        skip_box_thr=SKIP_THRESH,
    )

    for idx, box in enumerate(boxes):
        results.append(
            {
                "image_id": image_id,
                "class_name": obj_list[int(labels[idx])],
                "class_id": int(labels[idx]),
                "rad_id": 'wbf',
                "x_min": int(box[0]*max_value),
                "y_min": int(box[1]*max_value),
                "x_max": int(box[2]*max_value),
                "y_max": int(box[3]*max_value),
                "width": width,
                "height": height,
            }
        )

results = pd.DataFrame(results)
results.to_csv(CSV_TRAIN_OUTNAME, index = False)

In [None]:
print(f"Number of original boxes : {len(df)}")
print(f"Number of boxes (after removing overlappings): {len(results)}")

In [None]:
df = pd.read_csv(CSV_TRAIN_OUTNAME)
df.head(5)