## Non-maximum supression

- Use this to remove overlapping bounding boxes
- Remove boxes with IoU > 0.5, with or without the same classes
- Input: .csv file needed to be processed
- Output: .csv file with non-overlapping boxes

In [1]:
import numpy as np
import pandas as pd

from tqdm import tqdm
from commons import box_nms_numpy

%matplotlib inline
from matplotlib import pyplot as plt

In [2]:
IOU_THRESH      = 0.5

CSV_TRAIN           = './train.csv'
CSV_TRAIN_OUTNAME   = './train_512_nms.csv'

In [3]:
df = pd.read_csv(CSV_TRAIN)
df.head(5)

Unnamed: 0,image_id,class_name,class_id,rad_id,x_min,y_min,x_max,y_max,width,height
0,50a418190bc3fb1ef1633bf9678929b3,No finding,14,R11,,,,,2332,2580
1,21a10246a5ec7af151081d0cd6d65dc9,No finding,14,R7,,,,,2954,3159
2,9a5094b2563a1ef3ff50dc5c7ff71345,Cardiomegaly,3,R10,691.0,1375.0,1653.0,1831.0,2080,2336
3,051132a778e61a86eb147c7c6f564dfe,Aortic enlargement,0,R10,1264.0,743.0,1611.0,1019.0,2304,2880
4,063319de25ce7edb9b1c6b8881290140,No finding,14,R10,,,,,2540,3072


In [4]:
df = df[df["class_id"] != 14]

results = []
image_ids = df["image_id"].unique()

for image_id in tqdm(image_ids, total=len(image_ids)):

    # All annotations for the current image.
    data = df[df["image_id"] == image_id]
    data = data.reset_index(drop=True)
    width = data['width'][0]
    height = data['height'][0]
    
    annotations = [
        row for row in zip(
            data['x_min'],
            data['x_max'],
            data['y_min'],
            data['y_max'],
            data['class_id'],
            data['rad_id'],
            data['class_name'])
    ]
    
    # Loop through all of the annotations]
    boxes = []
    conf_scores = []
    labels = []
    for row in annotations:
        boxes.append([row[0], row[2], row[1], row[3]])
        labels.append([row[4], row[5], row[6]])
        conf_scores.append(1.0)
        
    keep_boxes, keep_conf, keep_labels = box_nms_numpy(boxes, conf_scores, labels, threshold=IOU_THRESH)
    
    for idx, box in enumerate(keep_boxes):
        results.append({
            "image_id": image_id,
            "class_name": str(keep_labels[idx][2]),
            "class_id": int(keep_labels[idx][0]),
            "rad_id": str(keep_labels[idx][1]),
            "x_min": box[0],
            "y_min": box[1],
            "x_max": box[2],
            "y_max": box[3],
            "width": width,
            "height": height
        })
        
results = pd.DataFrame(results)
results.to_csv(CSV_TRAIN_OUTNAME, index = False)

100%|██████████| 4394/4394 [00:14<00:00, 302.77it/s]


In [5]:
print(f"Number of original boxes : {len(df)}")
print(f"Number of boxes (after removing overlappings): {len(results)}")

Number of original boxes : 36096
Number of boxes (after removing overlappings): 19427


In [6]:
df = pd.read_csv(CSV_TRAIN_OUTNAME)
df.head(5)

Unnamed: 0,image_id,class_name,class_id,rad_id,x_min,y_min,x_max,y_max,width,height
0,9a5094b2563a1ef3ff50dc5c7ff71345,Aortic enlargement,0,R9,1052.0,715.0,1299.0,966.0,2080,2336
1,9a5094b2563a1ef3ff50dc5c7ff71345,Cardiomegaly,3,R8,689.0,1313.0,1666.0,1763.0,2080,2336
2,9a5094b2563a1ef3ff50dc5c7ff71345,Pleural thickening,11,R9,1789.0,1729.0,1875.0,1992.0,2080,2336
3,051132a778e61a86eb147c7c6f564dfe,Cardiomegaly,3,R10,952.0,1317.0,2033.0,1723.0,2304,2880
4,051132a778e61a86eb147c7c6f564dfe,Aortic enlargement,0,R9,1275.0,729.0,1612.0,1066.0,2304,2880
