# Credits
* The code for converting from dicom to jpg is courtsey of [raddar](https://www.kaggle.com/raddar/vinbigdata-competition-jpg-data-3x-downsampled)
* The code for visualization, different radiologists bbox consolidation, and coco conversion is courtsey of [sreevishnudamodaran](https://www.kaggle.com/sreevishnudamodaran/vinbigdata-fusing-bboxes-coco-dataset) (note: you can choose a different technoque to combine overlapping bboxes from different radiologists, I just chose weighted boxes fusion because it gave better results based on his visualizations) 

In [None]:
!pip install ensemble-boxes

In [None]:
%matplotlib inline

import os
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rcParams
sns.set(rc={"font.size":9,"axes.titlesize":15,"axes.labelsize":9,
            "axes.titlepad":11, "axes.labelpad":9, "legend.fontsize":7,
            "legend.title_fontsize":7, 'axes.grid' : False})
import cv2
import json
import pandas as pd
import glob
import os.path as osp
from path import Path
import datetime
import numpy as np
from tqdm.auto import tqdm
import random
import shutil
from sklearn.model_selection import train_test_split

from ensemble_boxes import *
import warnings
from collections import Counter
from tqdm import tqdm

import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

# Dicom to JPG
The downscale factor is a hyperparameter (note: this cell is too slow so try to run the cell only once then upload the data to google drive or keep on kaggle working directory)

In [None]:
def read_xray(path, voi_lut = True, fix_monochrome = True, downscale_factor = 3):
    dicom = pydicom.read_file(path)

    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array

    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data

    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255.0).astype(np.uint8)
    new_shape = tuple([int(x / downscale_factor) for x in data.shape])
    data = cv2.resize(data, (new_shape[1], new_shape[0]))

    return data

ftrain = os.listdir('../input/vinbigdata-chest-xray-abnormalities-detection/train')
ftest = os.listdir('../input/vinbigdata-chest-xray-abnormalities-detection/test')

! mkdir ./train
! mkdir ./test

for i in tqdm(range(len(ftrain))):
    img = read_xray('../input/vinbigdata-chest-xray-abnormalities-detection/train/'+ftrain[i])
    cv2.imwrite('./train/'+ftrain[i].replace('.dicom','.jpg'), img)

for i in tqdm(range(len(ftest))):
    img = read_xray('../input/vinbigdata-chest-xray-abnormalities-detection/test/'+ftest[i])
    cv2.imwrite('./test/'+ftest[i].replace('.dicom','.jpg'), img)
    
#! cp ../input/vinbigdata-chest-xray-abnormalities-detection/train.csv ./

# Read the Dataset into a Dataframe

In [None]:
train_annotations = pd.read_csv("./train.csv")
train_annotations.head(5)

In [None]:
train_annotations = train_annotations[train_annotations.class_id!=14]
train_annotations['image_path'] = train_annotations['image_id'].map(lambda x:os.path.join('./train', str(x)+'.jpg'))
train_annotations.head(5)

In [None]:
imagepaths = train_annotations['image_path'].unique()
print("Number of Images with abnormalities:",len(imagepaths))
anno_count = train_annotations.shape[0]
print("Number of Annotations with abnormalities:", anno_count)

# Helper Functions

In [None]:
def plot_img(img, size=(18, 18), is_rgb=True, title="", cmap='gray'):
    plt.figure(figsize=size)
    plt.imshow(img, cmap=cmap)
    plt.suptitle(title)
    plt.show()

def plot_imgs(imgs, cols=2, size=10, is_rgb=True, title="", cmap='gray', img_size=None):
    rows = len(imgs)//cols + 1
    fig = plt.figure(figsize=(cols*size, rows*size))
    for i, img in enumerate(imgs):
        if img_size is not None:
            img = cv2.resize(img, img_size)
        fig.add_subplot(rows, cols, i+1)
        plt.imshow(img, cmap=cmap)
    plt.suptitle(title)
    
def draw_bbox(image, box, label, color):   
    alpha = 0.1
    alpha_box = 0.4
    overlay_bbox = image.copy()
    overlay_text = image.copy()
    output = image.copy()

    text_width, text_height = cv2.getTextSize(label.upper(), cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)[0]
    cv2.rectangle(overlay_bbox, (box[0], box[1]), (box[2], box[3]),
                color, -1)
    cv2.addWeighted(overlay_bbox, alpha, output, 1 - alpha, 0, output)
    cv2.rectangle(overlay_text, (box[0], box[1]-7-text_height), (box[0]+text_width+2, box[1]),
                (0, 0, 0), -1)
    cv2.addWeighted(overlay_text, alpha_box, output, 1 - alpha_box, 0, output)
    cv2.rectangle(output, (box[0], box[1]), (box[2], box[3]),
                    color, thickness)
    cv2.putText(output, label.upper(), (box[0], box[1]-5),
            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, cv2.LINE_AA)
    return output

# Class Definitions

In [None]:
labels =  [
            "__ignore__",
            "Aortic_enlargement",
            "Atelectasis",
            "Calcification",
            "Cardiomegaly",
            "Consolidation",
            "ILD",
            "Infiltration",
            "Lung_Opacity",
            "Nodule/Mass",
            "Other_lesion",
            "Pleural_effusion",
            "Pleural_thickening",
            "Pneumothorax",
            "Pulmonary_fibrosis"
            ]
viz_labels = labels[1:]

# Visualize Original Boxes

In [None]:
# map label_id to specify color
#label2color = [[random.randint(0,255) for i in range(3)] for class_id in viz_labels]
label2color = [[59, 238, 119], [222, 21, 229], [94, 49, 164], [206, 221, 133], [117, 75, 3],
                 [210, 224, 119], [211, 176, 166], [63, 7, 197], [102, 65, 77], [194, 134, 175],
                 [209, 219, 50], [255, 44, 47], [89, 125, 149], [110, 27, 100]]

thickness = 3
imgs = []

for img_id, path in zip(train_annotations['image_id'][:6], train_annotations['image_path'][:6]):

    boxes = train_annotations.loc[train_annotations['image_id'] == img_id,
                                  ['x_min', 'y_min', 'x_max', 'y_max']].values
    img_labels = train_annotations.loc[train_annotations['image_id'] == img_id, ['class_id']].values.squeeze()
    
    img = cv2.imread(path)
    
    for label_id, box in zip(img_labels, boxes):
        color = label2color[label_id]
        img = draw_bbox(img, list(np.int_(box)), viz_labels[label_id], color)
    imgs.append(img)

plot_imgs(imgs, size=9, cmap=None)
plt.show()

# Create COCO Dataset and Consolidate the Similar Annotations for Different Radiologists w/ Weighted Boxes Fusion (WBF)

Note: here we consolidate the different annotations by different radiologists for the same images, another option is to not consolidate but to replicate the entries once for each different annotation

## Visualize WBF

In [None]:
iou_thr = 0.5
skip_box_thr = 0.0001
viz_images = []
sigma = 0.1

for i, path in tqdm(enumerate(imagepaths[5:8])):
    img_array  = cv2.imread(path)
    image_basename = Path(path).stem
    print(f"(\'{image_basename}\', \'{path}\')")
    img_annotations = train_annotations[train_annotations.image_id==image_basename]

    boxes_viz = img_annotations[['x_min', 'y_min', 'x_max', 'y_max']].to_numpy().tolist()
    labels_viz = img_annotations['class_id'].to_numpy().tolist()
    
    print("Bboxes before WBF:\n", boxes_viz)
    print("Labels before WBF:\n", labels_viz)
    
    ## Visualize Original Bboxes
    img_before = img_array.copy()
    for box, label in zip(boxes_viz, labels_viz):
        x_min, y_min, x_max, y_max = (box[0], box[1], box[2], box[3])
        color = label2color[int(label)]
        img_before = draw_bbox(img_before, list(np.int_(box)), viz_labels[label], color)
    viz_images.append(img_before)
    
    boxes_list = []
    scores_list = []
    labels_list = []
    weights = []
    
    boxes_single = []
    labels_single = []
    
    cls_ids = img_annotations['class_id'].unique().tolist()
    count_dict = Counter(img_annotations['class_id'].tolist())
    print(count_dict)

    for cid in cls_ids:       
        ## Performing Fusing operation only for multiple bboxes with the same label
        if count_dict[cid]==1:
            labels_single.append(cid)
            boxes_single.append(img_annotations[img_annotations.class_id==cid][['x_min', 'y_min', 'x_max', 'y_max']].to_numpy().squeeze().tolist())

        else:
            cls_list =img_annotations[img_annotations.class_id==cid]['class_id'].tolist()
            labels_list.append(cls_list)
            bbox = img_annotations[img_annotations.class_id==cid][['x_min', 'y_min', 'x_max', 'y_max']].to_numpy()
            ## Normalizing Bbox by Image Width and Height
            bbox = bbox/(img_array.shape[0], img_array.shape[1], img_array.shape[0], img_array.shape[1])
            bbox = np.clip(bbox, 0, 1)
            boxes_list.append(bbox.tolist())
            scores_list.append(np.ones(len(cls_list)).tolist())

            weights.append(1)
            

    # Perform WBF
    boxes, scores, box_labels= weighted_boxes_fusion(boxes_list, scores_list, labels_list, weights=weights,
                                                     iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    
    
    boxes = boxes*(img_array.shape[0], img_array.shape[1], img_array.shape[0], img_array.shape[1])
    boxes = boxes.round(1).tolist()
    box_labels = box_labels.astype(int).tolist()

    boxes.extend(boxes_single)
    box_labels.extend(labels_single)
    
    print("Bboxes after WBF:\n", boxes)
    print("Labels after WBF:\n", box_labels)
    
    ## Visualize Bboxes after operation
    img_after = img_array.copy()
    for box, label in zip(boxes, box_labels):
        color = label2color[int(label)]
        img_after = draw_bbox(img_after, list(np.int_(box)), viz_labels[label], color)
    viz_images.append(img_after)
    print()
        
plot_imgs(viz_images, cmap=None)
plt.figtext(0.3, 0.9,"Original Bboxes", va="top", ha="center", size=25)
plt.figtext(0.73, 0.9,"WBF", va="top", ha="center", size=25)
plt.savefig('wbf.png', bbox_inches='tight')
plt.show()

## Building COCO DATASET

### Train and Val Split

In [None]:
random.seed(42)
## 42 -  The Answer to the Ultimate Question of Life
random.shuffle(imagepaths)
train_len = round(0.75*len(imagepaths))
train_paths = imagepaths[:train_len]
val_paths = imagepaths[train_len:]

print("Split Counts\nTrain Images:\t\t{0}\nVal Images:\t\t{1}"
      .format(len(train_paths), len(val_paths)))

### Defining Structure

In [None]:
now = datetime.datetime.now()

data = dict(
    info=dict(
        description=None,
        url=None,
        version=None,
        year=now.year,
        contributor=None,
        date_created=now.strftime('%Y-%m-%d %H:%M:%S.%f'),
    ),
    licenses=[dict(
        url=None,
        id=0,
        name=None,
    )],
    images=[
        # license, url, file_name, height, width, date_captured, id
    ],
    type='instances',
    annotations=[
        # segmentation, area, iscrowd, image_id, bbox, category_id, id
    ],
    categories=[
        # supercategory, id, name
    ],
)

In [None]:
class_name_to_id = {}
for i, each_label in enumerate(labels):
    class_id = i - 1  # starts with -1
    class_name = each_label
    if class_id == -1:
        assert class_name == '__ignore__'
        continue
    class_name_to_id[class_name] = class_id
    data['categories'].append(dict(
        supercategory=None,
        id=class_id,
        name=class_name,
    ))

> ### Creating Output Directories

In [None]:
train_output_dir = "./vinbigdata_coco_chest_xray/train_images"
val_output_dir = "./vinbigdata_coco_chest_xray/val_images"

if not osp.exists(train_output_dir):
    os.makedirs(train_output_dir)
    print('Coco Train Image Directory:', train_output_dir)
    
if not osp.exists(val_output_dir):
    os.makedirs(val_output_dir)
    print('Coco Val Image Directory:', val_output_dir)

### Doing the COCO Conversion

In [None]:
warnings.filterwarnings("ignore", category=UserWarning)

In [None]:
## Setting the output annotations json file path
train_out_file = './vinbigdata_coco_chest_xray/train_annotations.json'

data_train = data.copy()
data_train['images'] = []
data_train['annotations'] = []

In [None]:
iou_thr = 0.5
skip_box_thr = 0.0001
viz_images = []

for i, path in tqdm(enumerate(train_paths)):
    img_array  = cv2.imread(path)
    image_basename = Path(path).stem
#     print(f"(\'{image_basename}\', \'{path}\')")
    
    ## Copy Image 
    shutil.copy2(path, train_output_dir)
    
    ## Add Images to annotation
    data_train['images'].append(dict(
        license=0,
        url=None,
        file_name=os.path.join('train_images', image_basename+'.jpg'),
        height=img_array.shape[0],
        width=img_array.shape[1],
        date_captured=None,
        id=i
    ))
    
    img_annotations = train_annotations[train_annotations.image_id==image_basename]
    boxes_viz = img_annotations[['x_min', 'y_min', 'x_max', 'y_max']].to_numpy().tolist()
    labels_viz = img_annotations['class_id'].to_numpy().tolist()
    
    ## Visualize Original Bboxes every 500th
    if (i%500==0):
        img_before = img_array.copy()
        for box, label in zip(boxes_viz, labels_viz):
            x_min, y_min, x_max, y_max = (box[0], box[1], box[2], box[3])
            color = label2color[int(label)]
            img_before = draw_bbox(img_before, list(np.int_(box)), viz_labels[label], color)
        viz_images.append(img_before)
    
    boxes_list = []
    scores_list = []
    labels_list = []
    weights = []
    
    boxes_single = []
    labels_single = []

    cls_ids = img_annotations['class_id'].unique().tolist()
    
    count_dict = Counter(img_annotations['class_id'].tolist())

    for cid in cls_ids:
        ## Performing Fusing operation only for multiple bboxes with the same label
        if count_dict[cid]==1:
            labels_single.append(cid)
            boxes_single.append(img_annotations[img_annotations.class_id==cid][['x_min', 'y_min', 'x_max', 'y_max']].to_numpy().squeeze().tolist())

        else:
            cls_list =img_annotations[img_annotations.class_id==cid]['class_id'].tolist()
            labels_list.append(cls_list)
            bbox = img_annotations[img_annotations.class_id==cid][['x_min', 'y_min', 'x_max', 'y_max']].to_numpy()
            
            ## Normalizing Bbox by Image Width and Height
            bbox = bbox/(img_array.shape[0], img_array.shape[1], img_array.shape[0], img_array.shape[1])
            bbox = np.clip(bbox, 0, 1)
            boxes_list.append(bbox.tolist())
            scores_list.append(np.ones(len(cls_list)).tolist())
            weights.append(1)
    
    ## Perform WBF
    boxes, scores, box_labels = weighted_boxes_fusion(boxes_list=boxes_list, scores_list=scores_list,
                                                  labels_list=labels_list, weights=weights,
                                                  iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    
    boxes = boxes*(img_array.shape[0], img_array.shape[1], img_array.shape[0], img_array.shape[1])
    boxes = boxes.round(1).tolist()
    box_labels = box_labels.astype(int).tolist()
    boxes.extend(boxes_single)
    box_labels.extend(labels_single)
    
    img_after = img_array.copy()
    for box, label in zip(boxes, box_labels):
        x_min, y_min, x_max, y_max = (box[0], box[1], box[2], box[3])
        area = round((x_max-x_min)*(y_max-y_min),1)
        bbox =[
                round(x_min, 1),
                round(y_min, 1),
                round((x_max-x_min), 1),
                round((y_max-y_min), 1)
                ]
        
        data_train['annotations'].append(dict( id=len(data_train['annotations']), image_id=i,
                                            category_id=int(label), area=area, bbox=bbox,
                                            iscrowd=0))
        
    ## Visualize Bboxes after operation every 500th
    if (i%500==0):
        img_after = img_array.copy()
        for box, label in zip(boxes, box_labels):
            color = label2color[int(label)]
            img_after = draw_bbox(img_after, list(np.int_(box)), viz_labels[label], color)
        viz_images.append(img_after)

plot_imgs(viz_images, cmap=None)
plt.figtext(0.3, 0.9,"Original Bboxes", va="top", ha="center", size=25)
plt.figtext(0.73, 0.9,"WBF", va="top", ha="center", size=25)
plt.show()
               
with open(train_out_file, 'w') as f:
    json.dump(data_train, f, indent=4)

### Verify Annotations

In [None]:
print("Number of Images in the Train Annotations File:", len(data_train['images']))
print("Number of Bboxes in the Train Annotations File:", len(data_train['annotations']))

print("Number of Images in the Val Annotations File:", len(data_val['images']))
print("Number of Bboxes in the Val Annotations File:", len(data_val['annotations']))

# Should output
# Number of Images in the Train Annotations File: 3296
# Number of Bboxes in the Train Annotations File: 17815
# Number of Images in the Val Annotations File: 1098
# Number of Bboxes in the Val Annotations File: 5880

In [None]:
!find ./vinbigdata_coco_chest_xray/val_images -type f | wc -l
# 1098

In [None]:
!find ./vinbigdata_coco_chest_xray/train_images -type f | wc -l
# 3296