<h1 style="text-align: center; font-family: Verdana; font-size: 32px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; font-variant: small-caps; letter-spacing: 3px; color: #468282; background-color: #ffffff;">VinBigData Chest X-ray Abnormalities Detection</h1>
<h2 style="text-align: center; font-family: Verdana; font-size: 24px; font-style: normal; font-weight: bold; text-decoration: underline; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff;">Utility To Convert Annotations From CSV to Pascal VOC XML</h2>
<h5 style="text-align: center; font-family: Verdana; font-size: 12px; font-style: normal; font-weight: bold; text-decoration: None; text-transform: none; letter-spacing: 1px; color: black; background-color: #ffffff;">CREATED BY: DARIEN SCHETTLER</h5>


<h2 style="font-family: Verdana; font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; color: navy; background-color: #ffffff;">TABLE OF CONTENTS</h2>

---

<h3 style="text-indent: 10vw; font-family: Verdana; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff;"><a href="#imports">0&nbsp;&nbsp;&nbsp;&nbsp;IMPORTS</a></h3>

---

<h3 style="text-indent: 10vw; font-family: Verdana; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff;"><a href="#setup">1&nbsp;&nbsp;&nbsp;&nbsp;NOTEBOOK SETUP</a></h3>

---

<h3 style="text-indent: 10vw; font-family: Verdana; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff;"><a href="#background_information">2&nbsp;&nbsp;&nbsp;&nbsp;BACKGROUND INFORMATION</a></h3>

---

<h3 style="text-indent: 10vw; font-family: Verdana; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff;"><a href="#helper_functions">3&nbsp;&nbsp;&nbsp;&nbsp;HELPER FUNCTIONS</a></h3>

---

<h3 style="text-indent: 10vw; font-family: Verdana; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff;"><a href="#convert_to_pascal_voc">4&nbsp;&nbsp;&nbsp;&nbsp;CONVERTING TRAINING DATA TO PASCAL-VOC XMLS</a></h3>

---

<h3 style="text-indent: 10vw; font-family: Verdana; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff;"><a href="#reading_from_pascal_voc">5&nbsp;&nbsp;&nbsp;&nbsp;READINNG FROM PASCAL-VOC XMLS</a></h3>


<a style="text-align: font-family: Verdana; font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; background-color: #ffffff; color: navy;" id="imports">0&nbsp;&nbsp;IMPORTS</a>

In [None]:
# PIP Installs
!pip install -q pylibjpeg pylibjpeg-libjpeg pylibjpeg-openjpeg

# Machine Learning and Data Science Imports
import tensorflow_probability as tfp
import tensorflow_datasets as tfds
import tensorflow_addons as tfa
import tensorflow_hub as hub
from skimage import exposure
import pandas as pd; pd.options.mode.chained_assignment = None
import numpy as np
import scipy

# Built In Imports
import xml.etree.ElementTree as ET
from datetime import datetime
from glob import glob
import warnings
import IPython
import urllib
import zipfile
import pickle
import shutil
import string
import math
import tqdm
import time
import os
import gc
import re

# Visualization Imports
FIG_FONT = dict(family="Helvetica, Arial", size=14, color="#7f7f7f")
from matplotlib.colors import ListedColormap
import matplotlib.patches as patches
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import plotly.express as px
from lxml import etree
import seaborn as sns
from PIL import Image
import plotly
import PIL
import cv2

# Other Imports
from pydicom.pixel_data_handlers.util import apply_voi_lut
from tqdm.notebook import tqdm
import pydicom

<a style="font-family: Verdana; font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; color: navy; background-color: #ffffff;" id="setup">1&nbsp;&nbsp;NOTEBOOK SETUP</a>

In [None]:
# Define the root data directory
DATA_DIR = "/kaggle/input/vinbigdata-chest-xray-abnormalities-detection"

# Define the paths to the training and testing dicom folders respectively
TRAIN_DIR = os.path.join(DATA_DIR, "train")
TEST_DIR = os.path.join(DATA_DIR, "test")
XML_OUTPUT_DIR = "/kaggle/working/train_xml_files"

if not os.path.isdir(XML_OUTPUT_DIR):
    os.makedirs(XML_OUTPUT_DIR, exist_ok=True)
    
# Capture all the relevant full train/test paths
TRAIN_DICOM_PATHS = [os.path.join(TRAIN_DIR, f_name) for f_name in os.listdir(TRAIN_DIR)]
TEST_DICOM_PATHS = [os.path.join(TEST_DIR, f_name) for f_name in os.listdir(TEST_DIR)]
print(f"\n... The number of training files is {len(TRAIN_DICOM_PATHS)} ...")
print(f"... The number of testing files is {len(TEST_DICOM_PATHS)} ...")


# Define paths to the relevant csv files
TRAIN_CSV = os.path.join(DATA_DIR, "train.csv")
SS_CSV = os.path.join(DATA_DIR, "sample_submission.csv")

# Create the relevant dataframe objects
train_df = pd.read_csv(TRAIN_CSV)
ss_df = pd.read_csv(SS_CSV)

print("\n\nTRAIN DATAFRAME\n\n")
display(train_df.head(3))

print("\n\nSAMPLE SUBMISSION DATAFRAME\n\n")
display(ss_df.head(3))

<a style="text-align: font-family: Verdana; font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; color: navy; background-color: #ffffff;" id="background_information">2&nbsp;&nbsp;BACKGROUND INFORMATION</a>

* Pascal VOC provides standardized image data sets for object detection. 
* Pascal VOC is an XML file, unlike COCO which has a JSON file.
* In Pascal VOC we create a file for each of the image in the dataset. 
* The bounding Box in Pascal VOC is formatted as: **(xmin, ymin, xmax, ymax)**

**Here is an example**
![Example of Pascal VOC Annotation](https://miro.medium.com/max/1130/1*J84PBv70HWVW_tJ2zQwd4g.png)

<a style="text-align: font-family: Verdana; font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; color: navy; background-color: #ffffff;" id="helper_functions">3&nbsp;&nbsp;HELPER FUNCTIONS</a>

In [None]:
def dicom2array(path, voi_lut=True, fix_monochrome=True):
    """ Convert dicom file to numpy array 
    
    Args:
        path (str): Path to the dicom file to be converted
        voi_lut (bool): Whether or not VOI LUT is available
        fix_monochrome (bool): Whether or not to apply monochrome fix
        
    Returns:
        Numpy array of the respective dicom file 
        
    """
    # Use the pydicom library to read the dicom file
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to 
    # transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
        
    # The XRAY may look inverted
    #   - If we want to fix this we can
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    
    # Normalize the image array and return
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data

def pprint_elem_tree(tree):
    """ Pretty Print ElementTree Object (XML) """
    if type(tree) != str:
        tree = ET.tostring(tree)
    print(etree.tostring(etree.fromstring(tree), pretty_print=True).decode())

    
def plot_image(img, title="", figsize=(8,8), cmap=None):
    """ Function to plot an image to save a bit of time """
    plt.figure(figsize=figsize)
    
    if cmap:
        plt.imshow(img, cmap=cmap)
    else:
        img
        plt.imshow(img)
        
    plt.title(title, fontweight="bold")
    plt.axis(False)
    plt.show()
    

def draw_bboxes(img, tl, br, rgb, label="", label_location="tl", opacity=0.1, line_thickness=0):
    """ TBD 
    
    Args:
        TBD
        
    Returns:
        TBD 
    """
    rect = np.uint8(np.ones((br[1]-tl[1], br[0]-tl[0], 3))*rgb)
    sub_combo = cv2.addWeighted(img[tl[1]:br[1],tl[0]:br[0],:], 1-opacity, rect, opacity, 1.0)    
    img[tl[1]:br[1],tl[0]:br[0],:] = sub_combo

    if line_thickness>0:
        img = cv2.rectangle(img, tuple(tl), tuple(br), rgb, line_thickness)
        
    if label:
        # DEFAULTS
        FONT = cv2.FONT_HERSHEY_SIMPLEX
        FONT_SCALE = 1.666
        FONT_THICKNESS = 3
        FONT_LINE_TYPE = cv2.LINE_AA
        
        if type(label)==str:
            LABEL = label.upper().replace(" ", "_")
        else:
            LABEL = f"CLASS_{label:02}"
        
        text_width, text_height = cv2.getTextSize(LABEL, FONT, FONT_SCALE, FONT_THICKNESS)[0]
        
        label_origin = {"tl":tl, "br":br, "tr":(br[0],tl[1]), "bl":(tl[0],br[1])}[label_location]
        label_offset = {
            "tl":np.array([0, -10]), "br":np.array([-text_width, text_height+10]), 
            "tr":np.array([-text_width, -10]), "bl":np.array([0, text_height+10])
        }[label_location]
        img = cv2.putText(img, LABEL, tuple(label_origin+label_offset), 
                          FONT, FONT_SCALE, rgb, FONT_THICKNESS, FONT_LINE_TYPE)
    
    return img


def get_annotated_image(image_id, annots, plot=False, plot_size=(18,25), plot_title="", resize_to=None):
    pal = [tuple([int(x) for x in np.array(c)*(255,255,255)]) for c in sns.color_palette("Spectral", 14)]

    if type(annots) != list:
        image_annots = annots[image_id]
    else:
        image_annots = annots

    img = cv2.cvtColor(dicom2array(image_annots[0]["img_path"]), cv2.COLOR_GRAY2RGB)
    
    if resize_to is not None:
        img = cv2.resize(img, resize_to, interpolation=cv2.INTER_LANCZOS4)
        
    for ann in image_annots:
        if ann["class_id"] != 14:
            img = draw_bboxes(img, 
                              ann["bbox"][:2], ann["bbox"][-2:], 
                              rgb=pal[ann["class_id"]], 
                              label=int_2_str[ann["class_id"]], 
                              opacity=0.08, line_thickness=4)
    if plot:
        plot_image(img, title=plot_title, figsize=plot_size)

    return img

# Create dictionary mappings
int_2_str = {i:train_df[train_df["class_id"]==i].iloc[0]["class_name"] for i in range(15)}
str_2_int = {v:k for k,v in int_2_str.items()}
str_2_int_xml = {k.lower().replace(" ", "_"):v for k,v in str_2_int.items()}

print("\n... Dictionary Mapping Class Integer to Class String Representation [int_2_str]...\n")
display(int_2_str)

print("\n... Dictionary Mapping Class String to Class Integer Representation [str_2_int]...\n")
display(str_2_int)

print("\n... Head of Train Dataframe After Dropping The Class Name Column...\n")
train_df.drop(columns=["class_name"], inplace=True)
display(train_df.head(5))

<a style="text-align: font-family: Verdana; font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; color: navy; background-color: #ffffff;" id="convert_to_pascal_voc">4&nbsp;&nbsp;CONVERTING TRAINING DATA INTO PASCAL-VOC XMLS</a>

<h3 style="text-align: font-family: Verdana; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff;">4.1 MAIN FUNCTION</h3>

---

TBD

In [None]:
def create_xml_file(df, image_id, train_dir, out_dir, display=False):
    """ Converts an images annotations to Pascal VOC (XML) and writes to disk
    
    Args:
        df (pd.DataFrame): TBD
        image_id (str): The image_id that we want to generate an XML file for
        train_dir (str): Path to the directory containing training
            dicom image files.
        out_dir (str): Path to the directory to save the XML file
        display (bool, optional): Whether to pretty-print the XML
            file prior to writing it to file.
    
    Returns:
        None;   It writes the Pascal VOC converted information to file
                as an XML file in the directed output directory.
    """
    def _get_image_size(path):
        """ Get the image shape from a path to a dicom image """
        meta = pydicom.read_file(path)
        width = meta.Columns
        height = meta.Rows
        return str(int(width)), str(int(height)), "1"
    
    def resize(old_img_w, old_img_h, new_img_w=1280, new_img_h=1280):
        w_ratio = new_img_w/int(old_img_w)
        h_ratio = new_img_h/int(old_img_h)
        return new_img_w, new_img_h, w_ratio, h_ratio
        
    
    def _create_object_subtree(annotation, obj, w_ratio, h_ratio):
        """ Create the sub-tree related to a given object and update the root """
        # Object Sub-Element
        # Check that bbox is not NaN
        if not np.isnan(obj[3]):
            _object = ET.SubElement(annotation, "object")
            ET.SubElement(_object, "name").text = int_2_str[obj[1]].lower().replace(" ", "_")
            ET.SubElement(_object, "radiologist").text = obj[2]
            ET.SubElement(_object, "pose").text = "Unspecified"
            ET.SubElement(_object, "truncated").text = "0"
            ET.SubElement(_object, "difficult").text = "0"

            _bndbox = ET.SubElement(_object, "bndbox")
            ET.SubElement(_bndbox, "xmin").text = str(int(obj[3]*w_ratio))
            ET.SubElement(_bndbox, "ymin").text = str(int(obj[4]*h_ratio))
            ET.SubElement(_bndbox, "xmax").text = str(int(obj[5]*w_ratio))
            ET.SubElement(_bndbox, "ymax").text = str(int(obj[6]*h_ratio))
        return annotation
    
    # Initalize and create the objects array
    objects = train_df[train_df.image_id==DEMO_IMG_ID].to_numpy()
    annotation = ET.Element('annotation')
    img_w, img_h, img_d = _get_image_size(os.path.join(train_dir, image_id+".dicom"))
    img_w, img_h, w_ratio, h_ratio = resize(img_w, img_h)
    
    # ##### Beginning of XML #####
    
    # File Sub-Elements
    ET.SubElement(annotation, "folder").text="train"
    ET.SubElement(annotation, "filename").text=image_id+".dicom"
    ET.SubElement(annotation, "path").text=os.path.join(train_dir, image_id+".dicom")

    # SRC Sub-Element
    _src = ET.SubElement(annotation, "src")
    ET.SubElement(_src, "database").text="train"
    
    # Size Sub-Element
    _size = ET.SubElement(annotation, "size")
    ET.SubElement(_size, "width").text=str(img_w)
    ET.SubElement(_size, "height").text=str(img_h)
    ET.SubElement(_size, "depth").text=str(img_d)
    
    # Segmented Sub-Element
    ET.SubElement(annotation, "segmented").text="0"
    
    # Loop over every object and create the annotation for that bounding box
    for obj in objects:
        annotation = _create_object_subtree(annotation, obj, w_ratio, h_ratio)
    
    # Display if requested
    if display:
        pprint_elem_tree(annotation)
        
    # Save to output directory
    ET.ElementTree(annotation).write(os.path.join(out_dir, image_id+'.xml'))
    
DEMO_IMG_ID = "9a5094b2563a1ef3ff50dc5c7ff71345"
DEMO_PATH = os.path.join(TRAIN_DIR, DEMO_IMG_ID+".dicom")

create_xml_file(train_df, DEMO_IMG_ID, TRAIN_DIR, out_dir="/tmp", display=True)

print("Look into the tmp folder to see that the xml was created...")
for file in [f for f in os.listdir("/tmp") if f.endswith(".xml")]: print("\t– /tmp/"+file)

<h3 style="text-align: font-family: Verdana; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff;">4.2  REDUCE TRAINING DATA</h3>

---

TBD

In [None]:
def calc_iou(bbox_1, bbox_2):
    # determine the coordinates of the intersection rectangle
    x_left = max(bbox_1[0], bbox_2[0])
    y_top = max(bbox_1[1], bbox_2[1])
    x_right = min(bbox_1[2], bbox_2[2])
    y_bottom = min(bbox_1[3], bbox_2[3])

    # Check if bboxes overlap at all (if not return 0)
    if x_right < x_left or y_bottom < y_top:
        return 0.0
    
    # The intersection of two axis-aligned bounding boxes is always an
    # axis-aligned bounding box
    else:
        intersection_area = (x_right - x_left) * (y_bottom - y_top)
        
        # compute the area of both AABBs
        bbox_1_area = (bbox_1[2] - bbox_1[0]) * (bbox_1[3] - bbox_1[1])
        bbox_2_area = (bbox_2[2] - bbox_2[0]) * (bbox_2[3] - bbox_2[1])

        # compute the intersection over union by taking the intersection
        # area and dividing it by the sum of prediction + ground-truth
        # areas - the interesection area
        iou = intersection_area / float(bbox_1_area + bbox_2_area - intersection_area)
        return iou

def redux_bboxes(annots):
    def _get_inner_box(bboxes):
        xmin = max([box[0] for box in bboxes])
        ymin = max([box[1] for box in bboxes])
        xmax = min([box[2] for box in bboxes])
        ymax = min([box[3] for box in bboxes])
        if (xmax<=xmin) or (ymax<=ymin):
            return None
        else:
            return [xmin, ymin, xmax, ymax]
        
    valid_list_indices = [] 
    new_bboxes = []
    new_class_ids = []
    new_rad_ids = []
    
    for i, (class_id, rad_id, bbox) in enumerate(zip(annots["class_id"], annots["rad_id"], annots["bbox"])):
        intersecting_boxes = [bbox,]
        other_bboxes = [x for j,x in enumerate(annots["bbox"]) if j!=i]
        other_classes = [x for j,x in enumerate(annots["class_id"]) if j!=i]
        for j, (other_class_id, other_bbox) in enumerate(zip(other_classes, other_bboxes)):
            if class_id==other_class_id:
                iou = calc_iou(bbox, other_bbox)
                if iou>0.:
                    intersecting_boxes.append(other_bbox)

        if len(intersecting_boxes)>1:
            inner_box = _get_inner_box(intersecting_boxes)
            if inner_box and inner_box not in new_bboxes:
                new_bboxes.append(inner_box)
                new_class_ids.append(class_id)
                new_rad_ids.append(rad_id) 

    annots["bbox"] = new_bboxes
    annots["rad_id"] = new_rad_ids
    annots["class_id"] = new_class_ids
    
    return annots

# Make GT Dataframe
gt_df = train_df[train_df.class_id!=14]

# Apply Manipulations and Merger Functions
gt_df["bbox"] = gt_df.loc[:, ["x_min","y_min","x_max","y_max"]].values.tolist()
gt_df.drop(columns=["x_min","y_min","x_max","y_max"], inplace=True)
gt_df = gt_df.groupby(["image_id"]).agg({k:list for k in gt_df.columns if k !="image_id"}).reset_index()
gt_df = gt_df.apply(redux_bboxes, axis=1)

# Recreate the Original Dataframe Style
gt_df = gt_df.apply(pd.Series.explode).reset_index(drop=True).dropna()
gt_df["x_min"] = gt_df["bbox"].apply(lambda x: x[0])
gt_df["y_min"] = gt_df["bbox"].apply(lambda x: x[1])
gt_df["x_max"] = gt_df["bbox"].apply(lambda x: x[2])
gt_df["y_max"] = gt_df["bbox"].apply(lambda x: x[3])
gt_df.drop(columns=["bbox"], inplace=True)

# Add back in NaN Rows As A Single Annotation
gt_df = pd.concat([
    gt_df, train_df.loc[train_df['class_id'] == 14].drop_duplicates(subset=["image_id"])
]).reset_index(drop=True)

gt_df

<h3 style="text-align: font-family: Verdana; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff;">4.3  EXECUTE OVER TRAINING DATA</h3>

---

TBD

In [None]:
for unique_id in tqdm(gt_df.image_id.unique(), total=len(gt_df.image_id.unique())):
    create_xml_file(gt_df, unique_id, TRAIN_DIR, out_dir=XML_OUTPUT_DIR)

<a style="text-align: font-family: Verdana; font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; color: navy; background-color: #ffffff;" id="reading_from_pascal_voc">5&nbsp;&nbsp;READING TRAINING DATA FROM PASCAL-VOC XMLS</a>

In [None]:
def read_xml_file(xml_path):
    """ Return a dictionary containing information from XML file 
    
    Args:
        xml_path (str): Path to the XML file to be read
    
    Returns:
        dictionary containing ...
    """
    
    tree = ET.parse(xml_path)
    root = tree.getroot()

    # Initalize the dictionary
    image_id = root.find("filename").text[:-6]
    image_path = root.find("path").text
    annotations = {image_id:[]}
    
    for obj in root.findall("object"):
        bbox = obj.find("bndbox")
        annotations[image_id].append(dict(
            img_path=image_path,
            img_id=image_id,
            class_id=str_2_int_xml[obj.find("name").text],
            rad_id=obj.find("radiologist").text,
            bbox=np.array([int(bbox.find("xmin").text), 
                           int(bbox.find("ymin").text), 
                           int(bbox.find("xmax").text), 
                           int(bbox.find("ymax").text)], dtype=np.int32)
        ))
    return annotations
    
# DEMO_PATH = os.path.join("/tmp", DEMO_IMG_ID+".xml")
# demo_annotations = read_xml_file(DEMO_PATH)
# ann = get_annotated_image(DEMO_IMG_ID, demo_annotations, plot=True, plot_size=(20,25), plot_title=f"Image With Bounding Boxes From XML – {DEMO_IMG_ID}", resize_to=(1024,1024))

# TO BE CLEANED UP LATER