## Bounding boxes

This notebook handles the extraction of bounding boxes from the original XML data to something more lightweight, for our selected synsets. Bounding boxes which have been defined to overlap regions outside of the image are clipped to the bounds of the image.

In [2]:
import os, os.path
import xml.etree.ElementTree as ET

images_path = 'data/images/bboxfiltered/'
bbox_annotations_path = 'data/bbox/Annotation/'

synsets = [f for f in os.listdir(images_path)
                if not os.path.isfile(os.path.join(images_path, f))]

i = 0
for synset in synsets:
    
    dir_path = bbox_annotations_path + synset + "/"
    files = [f for f in os.listdir(dir_path)
                 if os.path.isfile(os.path.join(dir_path, f))]
    
    output_path = 'data/bbox/tsv/' + synset + ".tsv"
    with open(output_path, "w") as f:
        
        print("Processing synset " + str(i) + ": " + synset)
    
        for file in files:
            correction_performed = False

            # Traverse XML tree and get the relevant values
            tree = ET.parse(dir_path + str(file))
            root = tree.getroot()
            
            filename = [child for child in root if child.tag == "filename"][0].text
            size = [child for child in root if child.tag == "size"][0]
            
            width = int([child for child in size if child.tag == "width"][0].text)
            height = int([child for child in size if child.tag == "height"][0].text)
            
            obj = [child for child in root if child.tag == "object"][0]
            # AFAIK there is only one bounding box label per image
            bndbox = [child for child in obj if child.tag == "bndbox"][0]
            xmin = int([child for child in bndbox if child.tag == "xmin"][0].text)
            ymin = int([child for child in bndbox if child.tag == "ymin"][0].text)
            xmax = int([child for child in bndbox if child.tag == "xmax"][0].text)
            ymax = int([child for child in bndbox if child.tag == "ymax"][0].text)
            
            # This is to correct for out-of-bounds bounding boxes
            
            if xmax > width:
                xmax = min(xmax, width)
                correction_performed = True    
            if ymax > height:
                ymax = min(ymax, height)
                correction_performed = True
            if xmin < 0:
                xmin = max(xmin, 0)
                correction_performed = True
            if ymin < 0:
                ymin = max(ymin, 0)
                correction_performed = True
            
            if correction_performed:
                print("  WARN: corrected oob bbox.")
            
            output_str = "".join([filename + ".JPEG", 
                                   "\t", str(width),
                                   "\t", str(height),
                                   "\t", str(xmin), 
                                   "\t", str(ymin), 
                                   "\t", str(xmax), 
                                   "\t", str(ymax), 
                                   "\n"])
            f.write(output_str)
    
    i+=1
    
print("All done.")

Processing synset 0: n03692522
Processing synset 1: n04179913
Processing synset 2: n09217230
Processing synset 3: n03314780
Processing synset 4: n04325704
  WARN: corrected oob bbox.
Processing synset 5: n03982430
  WARN: corrected oob bbox.
  WARN: corrected oob bbox.
  WARN: corrected oob bbox.
  WARN: corrected oob bbox.
  WARN: corrected oob bbox.
  WARN: corrected oob bbox.
  WARN: corrected oob bbox.
Processing synset 6: n02077923
Processing synset 7: n02321529
Processing synset 8: n04317175
Processing synset 9: n04194289
Processing synset 10: n06277135
Processing synset 11: n02443484
Processing synset 12: n01950731
Processing synset 13: n01774384
  WARN: corrected oob bbox.
Processing synset 14: n02395406
Processing synset 15: n03179701
  WARN: corrected oob bbox.
  WARN: corrected oob bbox.
  WARN: corrected oob bbox.
Processing synset 16: n04613696
Processing synset 17: n04256520
  WARN: corrected oob bbox.
  WARN: corrected oob bbox.
  WARN: corrected oob bbox.
  WARN: correc

In [4]:
# Output as json instead

import os
import json

INPUT_DIR = "data/bbox/tsv/"
OUTPUT_DIR = "data/bbox/json/"

files = [f for f in os.listdir(INPUT_DIR) if os.path.isfile(os.path.join(INPUT_DIR, f))]

for f in files:
    print("Processing: " + f)
    filepath = os.path.join(INPUT_DIR, f)
    dic = {}
    with open(filepath) as fp:
        data = fp.readlines()
        data = [x.strip() for x in data]
        for item in data:
            ls = item.split('\t')
            key = ls[0]
            value = list(map(int, ls[1:]))
            dic[key] = value
        
    with open(os.path.join(OUTPUT_DIR, f.split('.')[0] + ".json"), 'w') as fp:
        json.dump(dic, fp)
        
print("All done.")

Processing: n01983481.tsv
Processing: n07711569.tsv
Processing: n03327234.tsv
Processing: n09421951.tsv
Processing: n03733131.tsv
Processing: n04330267.tsv
Processing: n04070727.tsv
Processing: n03530642.tsv
Processing: n01632458.tsv
Processing: n03992509.tsv
Processing: n04025508.tsv
Processing: n03180011.tsv
Processing: n02280649.tsv
Processing: n04317175.tsv
Processing: n02281787.tsv
Processing: n03532672.tsv
Processing: n04311004.tsv
Processing: n04405762.tsv
Processing: n09428293.tsv
Processing: n07873807.tsv
Processing: n03207743.tsv
Processing: n02990373.tsv
Processing: n04133789.tsv
Processing: n03692522.tsv
Processing: n04525038.tsv
Processing: n00017222.tsv
Processing: n03874599.tsv
Processing: n04465501.tsv
Processing: n03636649.tsv
Processing: n02950826.tsv
Processing: n04479046.tsv
Processing: n04608329.tsv
Processing: n03250847.tsv
Processing: n06277135.tsv
Processing: n02948072.tsv
Processing: n03447721.tsv
Processing: n03637318.tsv
Processing: n04201297.tsv
Processing: 

In [5]:
# Confirm the one-bounding box assumption

import os, os.path
import xml.etree.ElementTree as ET

bbox_annotations_path = 'data/bbox/Annotation/'

synsets = [f for f in os.listdir(bbox_annotations_path)
                if not os.path.isfile(os.path.join(bbox_annotations_path, f))]

i = 0
mbbox_count = 0
for synset in synsets:
    
    dir_path = bbox_annotations_path + synset + "/"
    files = [f for f in os.listdir(dir_path)
                 if os.path.isfile(os.path.join(dir_path, f))]
        
    print("Processing synset " + str(i) + ": " + synset)
    
    for file in files:
        correction_performed = False

        # Traverse XML tree and get the relevant values
        tree = ET.parse(dir_path + str(file))
        root = tree.getroot()

        filename = [child for child in root if child.tag == "filename"][0].text
        size = [child for child in root if child.tag == "size"][0]

        width = int([child for child in size if child.tag == "width"][0].text)
        height = int([child for child in size if child.tag == "height"][0].text)

        obj = [child for child in root if child.tag == "object"][0]
        # AFAIK there is only one bounding box label per image
        bndboxes = [child for child in obj if child.tag == "bndbox"]
        if len(bndboxes) > 1:
            print(" WARN: More than one bounding box")
            mbbox_count += 1
    
    i+=1
    
print(str(mbbox_count))

Processing synset 0: n07867324
Processing synset 1: n02773037
Processing synset 2: n12305089
Processing synset 3: n03692522
Processing synset 4: n03291819
Processing synset 5: n11709674
Processing synset 6: n02129991
Processing synset 7: n04497801
Processing synset 8: n04179913
Processing synset 9: n04489008
Processing synset 10: n02339376
Processing synset 11: n02097474
Processing synset 12: n04379964
Processing synset 13: n00467719
Processing synset 14: n04127521
Processing synset 15: n07844042
Processing synset 16: n03743016
Processing synset 17: n02997391
Processing synset 18: n07574504
Processing synset 19: n07891726
Processing synset 20: n02932693
Processing synset 21: n07904395
Processing synset 22: n07859583
Processing synset 23: n07688265
Processing synset 24: n13136316
Processing synset 25: n01487506
Processing synset 26: n04490091
Processing synset 27: n03148324
Processing synset 28: n04047401
Processing synset 29: n02493793
Processing synset 30: n03110669
Processing synset 