In [1]:
import glob
import xml.etree.ElementTree as ET
import numpy as np
annotations_folder = 'dataset/ILSVRC/Annotations/CLS-LOC/'
synset_file = 'dataset/LOC_synset_mapping.txt'

In [2]:
f = open(synset_file,'r')
synset_lines = f.readlines()
f.close()
synset_dict = {}
for line in synset_lines:
    key = line.replace('\n','').split()[0]
    synset_dict[key] = line.replace('\n','').replace(key, '').strip()

In [3]:
image_classes = list(synset_dict.keys())

In [4]:
def get_annotations(xml_file):
    xml_root = ET.parse(xml_file).getroot()
    width = xml_root.find('size').find('width').text
    height = xml_root.find('size').find('height').text
    depth = xml_root.find('size').find('depth').text
    objects = xml_root.findall('object')
    name = objects[0].find('name').text
    bboxes = []
    for obj in objects:
        bbox = obj.find('bndbox')
        bboxes.append([int(bbox.find('xmin').text), 
                       int(bbox.find('ymin').text),
                       int(bbox.find('xmax').text),
                       int(bbox.find('ymax').text)])
    filename = xml_root.find('filename').text
    return int(width), int(height), int(depth), bboxes, filename, name

In [5]:
def get_class_xml_files(img_class, dataset='train'):
    xml_class_files = glob.glob(annotations_folder+dataset+'/'+img_class+'/'+'*.xml')
    return xml_class_files

In [6]:
def create_dict(img_class, annotations_dict, dataset='train'):
    xml_class_files = get_class_xml_files(img_class, dataset)
    for xml_file in xml_class_files:
        width, height, depth, bboxes, filename, name = get_annotations(xml_file)
        if img_class not in annotations_dict:
            annotations_dict[img_class] = {}
        annotations_dict[img_class][filename] = {}
        annotations_dict[img_class][filename]['width'] = width
        annotations_dict[img_class][filename]['height'] = height
        annotations_dict[img_class][filename]['depth'] = depth
        annotations_dict[img_class][filename]['bounding_boxes'] = bboxes

In [7]:
dataset = 'train'
annotations_dict = {}
for img_class in image_classes:
    create_dict(img_class, annotations_dict, dataset)
    print(img_class, 'done', end='; ')

n01440764 done; n01443537 done; n01484850 done; n01491361 done; n01494475 done; n01496331 done; n01498041 done; n01514668 done; n01514859 done; n01518878 done; n01530575 done; n01531178 done; n01532829 done; n01534433 done; n01537544 done; n01558993 done; n01560419 done; n01580077 done; n01582220 done; n01592084 done; n01601694 done; n01608432 done; n01614925 done; n01616318 done; n01622779 done; n01629819 done; n01630670 done; n01631663 done; n01632458 done; n01632777 done; n01641577 done; n01644373 done; n01644900 done; n01664065 done; n01665541 done; n01667114 done; n01667778 done; n01669191 done; n01675722 done; n01677366 done; n01682714 done; n01685808 done; n01687978 done; n01688243 done; n01689811 done; n01692333 done; n01693334 done; n01694178 done; n01695060 done; n01697457 done; n01698640 done; n01704323 done; n01728572 done; n01728920 done; n01729322 done; n01729977 done; n01734418 done; n01735189 done; n01737021 done; n01739381 done; n01740131 done; n01742172 done; n0174440

In [8]:
np.save('annotations_train.npy', annotations_dict)

In [9]:
def get_val_annotations():
    annotations_dict = {}
    for filename in glob.glob(annotations_folder+'val'+'/'+'*.xml'):
        width, height, depth, bboxes, filename, img_class = get_annotations(filename)
        if img_class not in annotations_dict:
            annotations_dict[img_class] = {}
        annotations_dict[img_class][filename] = {}
        annotations_dict[img_class][filename]['width'] = width
        annotations_dict[img_class][filename]['height'] = height
        annotations_dict[img_class][filename]['depth'] = depth
        annotations_dict[img_class][filename]['bounding_boxes'] = bboxes
    return annotations_dict

In [10]:
annotations_val_dict = get_val_annotations()

In [11]:
np.save('annotations_val.npy', annotations_val_dict)