In [141]:
import json
from collections import OrderedDict
import os
import xml.etree.ElementTree as ET
import re
import shutil
from sklearn.model_selection import train_test_split

# Split images (test-validation)

In [138]:
# Split annotations in training and validation
root_path = '../../'
data_src_path = root_path + 'data/images/version_1/'
files = [re.sub("\.xml", "", f) for f in os.listdir(data_src_path + 'annotations/') if re.match(r".*xml$", f)]

train_files, valid_files = train_test_split(files, train_size=0.9)
split_files = {'training/': train_files,
               'validation/': valid_files}

# Split data into 
errors = []
for split in split_files:
    for file in split_files[split]:
        for dtype, ext in [('annotations/', '.xml'), ('images/', '.jpg')]:
            src = data_src_path + dtype + file + ext
            dst = './data/' + split + dtype + file + ext
            try:
                shutil.copy(src, dst)
            except Exception as err:
                errors.append([err, file])

# Pascal VOC to COCO

Define function

In [149]:
def pascal_to_coco(src_dir, output):
    """
    Basic info
    """
    # Info
    info_dict = {'description': "kitchens with stoves/ovens and chairs",
                 'url': "http://happy-walrus.com",
                 'version': "1.0",
                 'year': 2019,
                 'contributer': "Happy Walrus team",
                 'date_created': "2019/10/14"}
    info = {'info': info_dict}
    
    # Add licenses as required as blobs and to `licences` list below
    licenses_dict_0 = {"url": "test_0",
                       "id": 0,
                       "name": "no license specified"}

    licenses_dict_1 = {"url": "test_1",
                       "id": 1,
                       "name": "no license specified"}

    licenses = [licenses_dict_0, licenses_dict_1]
    
    # Categories
    categories = []
    objects = {1: "stove/oven", 2: "chair"}
    inv_objects = {value: key for key, value in objects.items()}

    for key, name in objects.items():
        temp_dict = {"supercategory": "NULL",
                     "id": key,
                     "name": name}
        categories.append(temp_dict)
    
    """
    Images and annotaions
    """
    # Set file directory. Initialize empty lists and counters
    directory = src_dir

    images = []
    annotations = []

    j = 0
    k = 0

    # Iterate through file structure
    files = [f for f in sorted(os.listdir(directory)) if f.endswith(".xml")]
    for file in files:
        filename = file
        full_path = directory + "/" + filename

        # Construct full file path, read XML tree object, set tree root, and obtain image size
        tree = ET.parse(full_path)
        root = tree.getroot()
        size = root.find('size')

        # Add requisite information for each image and append to list
        image = {"id": int(j), 
                 "license": 0,
                 "coco_url": "NULL",
                 "flick_url": "NULL",
                 "width": int(size.find('width').text),
                 "height": int(size.find('height').text),
                 "file_name": root.find('filename').text,
                 "date_captured": "NULL"  
        }

        images.append(image)

        # Iterate through each annotated 'object' in an image
        for obj in root.findall('object'):

            # If annotation is not object of interest, continue to next iteration
            if obj.find('name').text not in ['chair', 'stove/oven']:
                continue

            # Extract bounding box coordinates and calculate width, height, and area
            bndbox = obj.find('bndbox')

            # A similar script I saw subtracts one from `xmin` and `ymin`. I don't understand why, so I haven't here
            xmin = int(bndbox.find('xmin').text)
            ymin = int(bndbox.find('ymin').text)
            xmax = int(bndbox.find('xmax').text)
            ymax = int(bndbox.find('ymax').text)

            width = xmax - xmin
            height = ymax - ymin
            area = width * height

            # Extract name (i.e., category) and use inverted dictionary to get category_id
            name = obj.find('name').text
            category_id = inv_objects[name]        

            # Create annotation. Leave segmentation as empty list
            annotation = {"id": k,
                          "category_id": category_id,
                          "iscrowd": 0,
                          "segmentation": [], 
                          "image_id": j,
                          "area": area,
                          "bbox": [xmin, ymin, width, height]

            }

            annotations.append(annotation)

            # Increment annotation count
            k += 1 

        # Increment image count
        j += 1
        
    # Instantiate ordered dictionary, add fields in correct order, and dump to JSON format
    all_items = OrderedDict()
    all_items['info'] = info_dict
    all_items['licenses'] = licenses
    all_items['images'] = images
    all_items['annotations'] = annotations
    all_items['categories'] = categories

    all_items_json = json.dumps(all_items)

    # Write JSON file
    with open(output, 'w') as output_file:
        output_file.write(all_items_json)

Translate annotations

In [152]:
pascal_to_coco('./data/validation/annotations', './data/validation/coco_annotations.json')
pascal_to_coco('./data/training/annotations', './data/training/coco_annotations.json')