# Preprocessing Dataset for Object Detection

This notebook illustrates preprocessing the training data, accomplishing two key things:  

- Converting the annotations files from .xml to .txt format
- Supplying annotation text files in this format: `class x y width height`
- NOTE: the coordinates and width/height values for the labels are given as proportions of the respective width and height of the entire image

In [5]:
import untangle
import os

In [7]:
DATA_PATH = './data/annotations'

In [120]:
def convert_dimensions(bounding_box, image_width, image_height):
    xmin = float(bounding_box.xmin.cdata)
    xmax = float(bounding_box.xmax.cdata)
    ymin = float(bounding_box.ymin.cdata)
    ymax = float(bounding_box.ymax.cdata)
    
    # calculate transformed dimensions as a proportion of the entire image
    obj_width = xmax - xmin
    obj_height = ymax - ymin
    
    x_label = (xmin + (obj_width / 2.0)) / image_width
    y_label = (ymin + (obj_height / 2.0)) / image_height
    
    obj_width_label = obj_width / image_width
    obj_height_label = obj_height / image_height
    return x_label, y_label, obj_width_label, obj_height_label

In [125]:
def convert_multiple_objects(obj_list, image_width, image_height):
    result = []
    for obj in obj_list:
        result.append(obj.name.cdata)
        bounding_box = obj.bndbox
        x_label, y_label, obj_width_label, obj_height_label = convert_dimensions(bounding_box, image_width, image_height)
        result.append(x_label)
        result.append(y_label)
        result.append(obj_width_label)
        result.append(obj_height_label)
    return result

In [126]:
def transform_object(obj, image_width, image_height):
    """Convert object representation in XML to desired format"""
    # check if input object is a list:
    if isinstance(obj, list):
        converted = convert_multiple_objects(obj, image_width, image_height)
        return(converted)
    
    else:
        obj_name = obj.name.cdata
        bounding_box = obj.bndbox
        x_label, y_label, obj_width_label, obj_height_label = convert_dimensions(bounding_box, image_width, image_height)
        return [obj_name, x_label, y_label, obj_width_label, obj_height_label]

In [127]:
def convert_annotation_format(path_to_data):
    """Converts annotation xmls to text files with labels in `class x y width height format. 
    Note that object dimensions are given as proportions of the entire image"""
    results = []
    errors = []
    for fname in os.listdir(DATA_PATH):
        try:
            file_path = DATA_PATH + '/' + fname
            obj = untangle.parse(file_path)
            verified = obj.annotation['verified']
            
            # get the overall dimensions of the image
            image_width = float(obj.annotation.size.width.cdata)
            image_height = float(obj.annotation.size.height.cdata)
            
            object_data = obj.annotation.object
            
            # parse object label and dimensions
            results.append(transform_object(object_data, image_width, image_height))
        
        except Exception as err:
            errors.append([fname, err])
    
    return results, errors

In [128]:
processed, errors = convert_annotation_format(DATA_PATH)