In [9]:
# Converting the German Traffic Sign Detection Benchmark dataset to TFRecord for object_detection using tensorflow API


import hashlib
import io
import logging
import os

import PIL.Image
import tensorflow as tf

import sys
sys.path.append('..')

from object_detection.utils import dataset_util
from object_detection.utils import label_map_util

import pandas as pd




In [None]:
flags = tf.app.flags
flags.DEFINE_string('data_dir', '', 'Root directory to raw gtsdb dataset.')
flags.DEFINE_string('output_dir', '', 'Path to directory to output TFRecords.')
flags.DEFINE_string('label_map_path', 'data/gtsdb_label_map.pbtxt',
                    'Path to label map proto')
FLAGS = flags.FLAGS


In [None]:
import argparse

ap = argparse.ArgumentParser()
ap.add_argument("-d", "--data_dir", required=True, help="Root directory to raw gtsdb dataset")
ap.add_argument("-o", "--output_dir", required=True, help="Path to directory to output TFRecords")
ap.add_argument("-l", "--label_map_path", required = True, help="path to label_map.pbtxt")
args = vars(ap.parse_args())

In [2]:
args = {}
args["data_dir"] = "C:\\Users\\noaim\\Downloads\\FullIJCNN2013\\FullIJCNN2013"
args["output_dir"] = "C:\\Users\\noaim\\Downloads\\FullIJCNN2013\\FullIJCNN2013"
args["label_map_path"] = "C:\\Users\\noaim\\Downloads\\FullIJCNN2013\\FullIJCNN2013\\label_map.pbtxt"

## Converting ppm files to jpg for using tensorflow tf record

In [32]:
from PIL import Image 
import os
import shutil

data_dir = args["data_dir"]
#train_img_dir = os.path.join(data_dir, 'TrainIJCNN2013')
img_dir = os.path.join(data_dir)
jpg_img_dir = os.path.join(data_dir, 'jpg_FullIJCNN2013')


if os.path.exists(jpg_img_dir):
    shutil.rmtree(jpg_img_dir)
os.makedirs(jpg_img_dir) 

for img_name in os.listdir(img_dir):
#for img_name in ["00000.ppm","00001.ppm","00002.ppm"]:
    if img_name[-3:] == "ppm":
        img_path = os.path.join(img_dir, img_name)
        img = Image.open(img_path)
        jpg_path = os.path.join(jpg_img_dir, img_name[:-3]+'jpg')
        #print(png_path)
        img.save(jpg_path)
        #img.show()

## Function to get dictionary of class id and class name

In [3]:
def get_label_dict(label_path):
    label_map_dict = {}
    with open(label_path, 'r', encoding="utf-8") as f:
        lines = f.readlines()
        for line in lines:
            if not line.split():
                continue
            line = line.strip()
            number, name = line.split(' ', 1)
            label_map_dict[int(number)+1] = name
    return label_map_dict

## Creating TF Records

In [4]:
def df_to_tf_example(data, label_map_dict, image_subdirectory):
    img_path = os.path.join(image_subdirectory, data['filename'])
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width, height = image.size

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    
    for obj in data['object']:

        xmin.append(float(obj['bndbox']['xmin']) / width)
        ymin.append(float(obj['bndbox']['ymin']) / height)
        xmax.append(float(obj['bndbox']['xmax']) / width)
        ymax.append(float(obj['bndbox']['ymax']) / height)
        class_name = label_map_dict[obj['class']]
        classes_text.append(class_name.encode('utf8'))
        classes.append(obj['class'])
    example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(
            data['filename'].encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(
            data['filename'].encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return example


In [5]:
## function to create tf record

def create_tf_record(output_filename, label_map_dict, gt_path, image_dir, examples):

    writer = tf.python_io.TFRecordWriter(output_filename)

    # Read ground truth csv
    df = pd.read_csv(gt_path, delimiter=';', names=('file', 'xMin', 'yMin', 'xMax', 'yMax', 'classId'))
    df['file'] = df['file'].str.replace('ppm', 'jpg')

    for idx, example in enumerate(examples):
        if idx % 100 == 0:
            logging.info('On image %d of %d', idx, len(examples))

        data = {
            'filename': example,
            'object': []
        }
        objects = df[df['file'] == example]
        for _, obj in objects.iterrows():
            class_id = obj['classId'] + 1
            data['object'].append({
                'bndbox': {
                    'xmin': obj['xMin'],
                    'ymin': obj['yMin'],
                    'xmax': obj['xMax'],
                    'ymax': obj['yMax']
                },
                'class': class_id
            })
        #print(data)
        tf_example = df_to_tf_example(data, label_map_dict, image_dir)
        writer.write(tf_example.SerializeToString())

    writer.close()

## Main Function

In [6]:
def main(_):
    #data_dir = FLAGS.data_dir
    data_dir = args["data_dir"]

    #label_map_dict = label_map_util.get_label_map_id_dict(FLAGS.label_map_path)

    label_map_dict = get_label_dict("C:\\Users\\noaim\\Downloads\\FullIJCNN2013\\FullIJCNN2013\\gtsdb_labels.txt")

    logging.info('Reading from GTSDB dataset.')
    image_dir = os.path.join(data_dir, 'jpg_FullIJCNN2013') 
    examples_gt_path = os.path.join(data_dir, 'gt.txt')
    examples_list = ['%05d.jpg' % x for x in range(900)]

    num_train = 600
    train_examples = examples_list[:num_train]
    val_examples = examples_list[num_train:]
    logging.info('%d training and %d validation examples.',
                 len(train_examples), len(val_examples))

    train_output_path = os.path.join(args["output_dir"], 'gtsdb_train.record')
    val_output_path = os.path.join(args["output_dir"], 'gtsdb_val.record')
    create_tf_record(train_output_path, label_map_dict, examples_gt_path,
                     image_dir, train_examples)
    
    create_tf_record(val_output_path, label_map_dict, examples_gt_path,
                     image_dir, val_examples)

if __name__ == '__main__':
    tf.app.run()

I0706 02:52:35.305810  6560 <ipython-input-6-7ca262d29d3d>:9] Reading from GTSDB dataset.
I0706 02:52:35.305810  6560 <ipython-input-6-7ca262d29d3d>:18] 600 training and 300 validation examples.
I0706 02:52:35.323721  6560 <ipython-input-5-b421cf493828>:13] On image 0 of 600
I0706 02:52:35.623880  6560 <ipython-input-5-b421cf493828>:13] On image 100 of 600
I0706 02:52:35.874686  6560 <ipython-input-5-b421cf493828>:13] On image 200 of 600
I0706 02:52:36.169329  6560 <ipython-input-5-b421cf493828>:13] On image 300 of 600
I0706 02:52:36.424351  6560 <ipython-input-5-b421cf493828>:13] On image 400 of 600
I0706 02:52:36.667667  6560 <ipython-input-5-b421cf493828>:13] On image 500 of 600
I0706 02:52:37.263319  6560 <ipython-input-5-b421cf493828>:13] On image 0 of 300
I0706 02:52:37.541632  6560 <ipython-input-5-b421cf493828>:13] On image 100 of 300
I0706 02:52:37.789393  6560 <ipython-input-5-b421cf493828>:13] On image 200 of 300


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [11]:
def get_num_classes(pbtxt_fname):
    from object_detection.utils import label_map_util
    label_map = label_map_util.load_labelmap(pbtxt_fname)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=90, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)
    return len(category_index.keys())

In [12]:
c = get_num_classes("C:\\Users\\noaim\\Downloads\\FullIJCNN2013\\FullIJCNN2013\\label_map.pbtxt")

In [13]:
c

43