In [47]:
"""
Changing PASCAL VOC like dataset to TFRecord data.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import hashlib
import io
import os
import glob
from random import shuffle

from lxml import etree
import PIL.Image
import tensorflow as tf

from object_detection.utils import dataset_util
from object_detection.utils import label_map_util

SETS = ['training', 'validation']
TYPES = ['sim', 'site']

type_idx = 1
data_type = TYPES[type_idx]
root_dir = os.getcwd()
data_dir = os.path.join(root_dir, data_type)
relative_annotations_dir = '_data_labeled'
output_path = root_dir + '/TFRecord'
label_map_path = os.path.join(root_dir,'tl_label_map.pbtxt')  # Path to label map proto


def dict_to_tf_example(data, dataset_directory, label_map_dict, set_name, ignore_difficult_instances=False):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    dataset_directory: Path to root directory holding PASCAL dataset
    label_map_dict: A map from string label names to integers ids.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    image_subdirectory: String specifying subdirectory within the
      PASCAL dataset directory holding the actual image data.

    Returns:
    example: The converted tf.Example.

    Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    img_path = os.path.join(data_type + '_data', set_name, data['filename'])
    full_path = os.path.join(dataset_directory, img_path)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    if 'object' in data:
        for obj in data['object']:
            difficult = bool(int(obj['difficult']))
            if ignore_difficult_instances and difficult:
                continue

            difficult_obj.append(int(difficult))

            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)
            classes_text.append(obj['name'].encode('utf8'))
            classes.append(label_map_dict[obj['name']])
            truncated.append(int(obj['truncated']))
            poses.append(obj['pose'].encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
        'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated': dataset_util.int64_list_feature(truncated),
        'image/object/view': dataset_util.bytes_list_feature(poses),
    }))
    return example

In [48]:
def main(_):
    sets = ['training', 'validation']
    
    label_map_dict = label_map_util.get_label_map_dict(label_map_path)

    for s in sets:
        writer = tf.python_io.TFRecordWriter(os.path.join(output_path, s+'.record'))
        print('Reading from traffic light {} {} dataset.'.format(data_type, s))
        annotations_dir = os.path.join(data_dir, data_type + relative_annotations_dir, s)
        annotations_path_list = glob.glob(annotations_dir + '/*.xml')
        shuffle(annotations_path_list)
        print('Number of dataset: {}'.format(len(annotations_path_list)))
        for i, path in enumerate(annotations_path_list):
            if i % 10 == 0:
                print('On image {} of {}'.format(i, len(annotations_path_list)))
            with tf.gfile.GFile(path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            tf_example = dict_to_tf_example(data, data_dir, label_map_dict, s)
            writer.write(tf_example.SerializeToString())
        writer.close()
        print('{} {} dataset conversion done.'.format(data_type, s))

In [49]:
tf.app.run()

Reading from traffic light site training dataset.
Number of dataset: 384
On image 0 of 384
On image 10 of 384
On image 20 of 384
On image 30 of 384
On image 40 of 384
On image 50 of 384
On image 60 of 384
On image 70 of 384
On image 80 of 384
On image 90 of 384
On image 100 of 384
On image 110 of 384
On image 120 of 384
On image 130 of 384
On image 140 of 384
On image 150 of 384
On image 160 of 384
On image 170 of 384
On image 180 of 384
On image 190 of 384
On image 200 of 384
On image 210 of 384
On image 220 of 384
On image 230 of 384
On image 240 of 384
On image 250 of 384
On image 260 of 384
On image 270 of 384
On image 280 of 384
On image 290 of 384
On image 300 of 384
On image 310 of 384
On image 320 of 384
On image 330 of 384
On image 340 of 384
On image 350 of 384
On image 360 of 384
On image 370 of 384
On image 380 of 384
site training dataset conversion done.
Reading from traffic light site validation dataset.
Number of dataset: 45
On image 0 of 45
On image 10 of 45
On image 2

SystemExit: 