# Generating dataset for Tensorflow object detection

## Reading in class mapping

In [10]:
import os

cwd = os.getcwd()
root_path = os.path.join(cwd, os.pardir)
dataset_path = os.path.join(root_path, 'uec-food', 'UECFOOD23')

print('Root: {}'.format(root_path))
print('Data: {}'.format(dataset_path))


Root: /home/vencintgamer_gmail_com/food_detection/src/..
Data: /home/vencintgamer_gmail_com/food_detection/src/../uec-food/UECFOOD23


In [11]:
import os
import pandas as pd

category_df = pd.read_csv(os.path.join(dataset_path, "category.txt"), sep='\t')
id_to_classes = {int(row['id']): row['name'].strip() for idx, row in category_df.iterrows()}
classes = [[v, k] for k, v in id_to_classes.items()] 

print('Class map size: {}'.format(len(classes)))

Class map size: 23


## Setting up a data agnostic object to store image info

In [12]:
class ImageInfo:
    def __init__(self, filepath, xmin, ymin, xmax, ymax, class_id):
        self.filepath = filepath
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax
        self.class_id = class_id
    
    def __repr__(self):
        return "filepath: {}, xmin: {}, ymin: {}, xmax: {}, ymax:{}, class_id: {}".format(
            self.filepath, self.xmin, self.ymin, self.xmax,
            self.ymax, self.class_id)

img = ImageInfo(10, 10, 10, 10, 10, 10)
print(img)

filepath: 10, xmin: 10, ymin: 10, xmax: 10, ymax:10, class_id: 10


## Read in bounding box and label data

In [13]:
img_dict = {}

category_folders = [x[0] for x in os.walk(dataset_path) if os.path.isdir(x[0])]
category_folders = category_folders[1:]
annotations = []
for folder in category_folders:
  bbox_df = pd.read_csv(os.path.join(folder, "bb_info.txt"), sep=' ')
  for _, row in bbox_df.iterrows():
    filepath = os.path.join(os.path.basename(folder), '{}'.format(row['img'])) + '.jpg'
    info = ImageInfo(filepath, row['x1'], row['y1'], row['x2'], row['y2'],
                    int(os.path.basename(folder)))
    key = str(row['img'])
    if key in img_dict:
        img_dict[key].append(info)
    else:
        img_dict[key] = [info]

## Shuffle dataset and split into train, validation

In [14]:
import numpy as np
import math

train_split = 0.8
data_size = len(img_dict)
rand_idx = np.arange(data_size)
np.random.shuffle(rand_idx)

key_list = [*img_dict.keys()]
np_key = np.asarray(key_list)[rand_idx]

np_train_key = np_key[:math.ceil(train_split * data_size)]
np_val_key = np_key[math.ceil(train_split * data_size):]

# sanity check
print('All: {}'.format(np_key.shape))
print('Train: {}'.format(np_train_key.shape))
print('Val: {}'.format(np_val_key.shape))

All: (4183,)
Train: (3347,)
Val: (836,)


## Serialize dataset in memory to TFrecords

In [15]:
%matplotlib inline
import tensorflow as tf

from PIL import Image
import io
import matplotlib.pyplot as plt
import numpy as np

import sys
sys.path.append('..')
from object_detection.utils import dataset_util

def create_tf_example(group, filename):
#     print(os.path.join(dataset_path, filename))
    with tf.gfile.GFile(os.path.join(dataset_path, '{}'.format(group[0].filepath)), 'rb') as fd:
        encoded_jpg = fd.read()
    
    # Do a read back to check that image encoding is correct
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size
    
#     print(width, height)
#     imgplot = plt.imshow(np.asarray(image))
#     plt.show()
    filename = filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for item in group:
        if item.xmax >= width:
#             print(item.filepath)
            item.xmax = width - 1
        if item.ymax >= height:
#             print(item.filepath)
            item.ymax = height - 1
        xmins.append(item.xmin / width)
        xmaxs.append(item.xmax / width)
        ymins.append(item.ymin / height)
        ymaxs.append(item.ymax / height)
        classes_text.append(id_to_classes[int(item.class_id)].encode('utf8'))
        classes.append(int(item.class_id))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example

In [16]:
import contextlib2
from object_detection.dataset_tools import tf_record_creation_util
from progressbar import ProgressBar
import math

def writeToTFRecordShards(num_per_shard, label, img_dict, keys_arr):
    num_shards = math.ceil(len(keys_arr) / num_per_shard)
    print("Num shards: {}".format(num_shards))
    pbar = ProgressBar()
    
    # variable to keep track of which shard to put in
    curr_idx = 0 
    filename = label % (curr_idx, num_shards)
    writer = tf.python_io.TFRecordWriter(filename)
    for index, key in enumerate(pbar(keys_arr)):
            value = img_dict[key]
            # sequential sharding due to sequential serializing of tfrecords
            if not curr_idx == index // num_per_shard:
                writer.close() # close previous writer
                curr_idx =  index // num_per_shard
                print("Changing to {} at index {}".format(curr_idx, index))
                filename = label % (curr_idx, num_shards)
                writer = tf.python_io.TFRecordWriter(filename) #start a new one
            tf_example = create_tf_example(value, key)
            writer.write(tf_example.SerializeToString())
    writer.close()

In [17]:
num_per_shard = 1000
train_tf_filebase = os.path.join(root_path, 'data', 'train_dataset-%04d-%04d.record')
val_tf_filebase = os.path.join(root_path, 'data', 'val_dataset-%04d-%04d.record')

print("Start conversion of train dataset...")
writeToTFRecordShards(num_per_shard, train_tf_filebase, img_dict, np_train_key)
print("Start conversion of validation dataset...")
writeToTFRecordShards(num_per_shard, val_tf_filebase, img_dict, np_val_key)

  5% (170 of 3347) |#                    | Elapsed Time: 0:00:00 ETA:   0:00:02

Start conversion of train dataset...
Num shards: 4


 38% (1272 of 3347) |#######             | Elapsed Time: 0:00:01 ETA:   0:00:01

Changing to 1 at index 1000


 68% (2288 of 3347) |#############       | Elapsed Time: 0:00:01 ETA:   0:00:00

Changing to 2 at index 2000


 96% (3220 of 3347) |################### | Elapsed Time: 0:00:02 ETA:   0:00:00

Changing to 3 at index 3000


100% (3347 of 3347) |####################| Elapsed Time: 0:00:02 Time:  0:00:02
 27% (233 of 836) |######                | Elapsed Time: 0:00:00 ETA:   0:00:00

Start conversion of validation dataset...
Num shards: 1


100% (836 of 836) |######################| Elapsed Time: 0:00:00 Time:  0:00:00


## Generate label map

In [18]:
from progressbar import ProgressBar

label_fname = os.path.join(root_path, 'data', 'labelmap.pbtxt')
end = '\n'
s = ' '
pbar = ProgressBar()

for _, (id, name) in enumerate(pbar(id_to_classes.items())):
    # Skip dummy
    if id == 0:
        continue
    out = ''
    out += 'item' + s + '{' + end
    out += s*2 + 'name:' + ' ' + '\'' + name + '\'' + end
    out += s*2 + 'id:' + ' ' + (str(id)) + end
    out += '}' + end*2
    
    with open(label_fname, 'a') as f:
        f.write(out)
    

100% (23 of 23) |########################| Elapsed Time: 0:00:00 Time:  0:00:00


[filepath: 1/19.jpg, xmin: 352, ymin: 135, xmax: 499, ymax:326, class_id: 1,
 filepath: 21/19.jpg, xmin: 181, ymin: 183, xmax: 305, ymax:300, class_id: 21]