In [1]:
import os
from glob import glob

from tqdm import tqdm

import tensorflow as tf

In [2]:
import dataset_util

def create_tf_example(data, image_string):
    # TODO(user): Populate the following variables from your example.
    height = data['height'] # Image height
    width = data['width'] # Image width
    filename = data['filename'] # Filename of the image. Empty if image is not from file
    encoded_image_data = image_string # Encoded image bytes
    image_format = data['image_format'] # b'jpeg' or b'png'

    xmins = data['xmins'] # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = data['xmaxs'] # List of normalized right x coordinates in bounding box
             # (1 per box)
    ymins = data['ymins'] # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = data['ymaxs'] # List of normalized bottom y coordinates in bounding box
             # (1 per box)
    classes_text = data['classes_text'] # List of string class name of bounding box (1 per box)
    classes = data['classes'] # List of integer class id of bounding box (1 per box)

    tf_example = tf.train.Example(features=tf.train.Features(feature={
          'image/height': dataset_util.int64_feature(height),
          'image/width': dataset_util.int64_feature(width),
          'image/filename': dataset_util.bytes_feature(filename),
          'image/source_id': dataset_util.bytes_feature(filename),
          'image/encoded': dataset_util.bytes_feature(encoded_image_data),
          'image/format': dataset_util.bytes_feature(image_format),
          'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
          'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
          'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
          'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
          'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
          'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example


In [3]:
img_paths = glob('images/images/n02085620-Chihuahua/*.jpg')       

In [4]:
img_paths[0]

'images/images/n02085620-Chihuahua\\n02085620_10074.jpg'

## Get Annotation

Annotation 얻기. 각 파일에서 annotation을 얻어야함

In [5]:
ann_paths = glob("annotation/Annotation/*/*")

ann_path = ann_paths[0]

In [6]:
with open(ann_path, 'r') as f:
    anno = f.read()

In [7]:
print(anno)

<annotation>
	<folder>02085620</folder>
	<filename>n02085620_10074</filename>
	<source>
		<database>ImageNet database</database>
	</source>
	<size>
		<width>333</width>
		<height>500</height>
		<depth>3</depth>
	</size>
	<segment>0</segment>
	<object>
		<name>Chihuahua</name>
		<pose>Unspecified</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>25</xmin>
			<ymin>10</ymin>
			<xmax>276</xmax>
			<ymax>498</ymax>
		</bndbox>
	</object>
</annotation>


In [8]:
name = anno.split('filename>')[1][:-2]
name

'n02085620_10074'

In [9]:
def get_annotation(path):
    with open(path, 'r') as f:
        anno = f.read()

    info_names = ['filename', 'width', 'height', 'depth', 'name', 'xmin', 'ymin', 'xmax', 'ymax']

    infos = {}
    for name in info_names:
        infos[name] = anno.split('<%s>'%name, 1)[-1].split('</%s>'%name)[0]

    annotation = {}

    annotation['width'] = int(infos['width'])
    annotation['height'] = int(infos['height'])

    annotation['filename'] = infos['filename'].encode('utf-8')  # decode with ->  .decode()
#     annotation['encoded_image_data'] = image_string  # 이 부분 확인 해야 함
    annotation['image_format'] = b'jpg'

    annotation['xmins'] = [float(infos['xmin'])/annotation['height']]
    annotation['xmaxs'] = [float(infos['xmax'])/annotation['height']] 
    annotation['ymins'] = [float(infos['ymin'])/annotation['width']]
    annotation['ymaxs'] = [float(infos['ymax'])/annotation['width']]
    annotation['classes_text'] = [b'Dog']
    annotation['classes'] = [1]

    return annotation

In [11]:
output_path = 'trainset.tfrecords.gzip'

output_path = 'trainset.tfrecords'

# options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.GZIP) 
# writer = tf.python_io.TFRecordWriter(output_path, options=options)
writer = tf.python_io.TFRecordWriter(output_path)

annotations = {}
for path in tqdm(ann_paths):
    # Read Image
    img_path = path.replace('annotation/Annotation', 'images/images') + '.jpg'
    with tf.gfile.Open(img_path, "rb") as binfile:
        image_string = binfile.read()
    
    annotation = get_annotation(path)
    tf_example = create_tf_example(annotation, image_string)
    writer.write(tf_example.SerializeToString())

writer.close()

100%|███████████████████████████████████████████████████████████████████████████| 20580/20580 [01:55<00:00, 177.84it/s]
