Splitting the data into train and test sets

In [None]:
import pandas as pd
import numpy as np

data = pd.read_csv('./data/full/gt.txt', sep=";", header=None)
data.columns = ["img", "x1", "y1", "x2", "y2", "id"]

data['width'] = 1360
data['height'] = 800

xtrain = data.iloc[:852]
xtest = data.iloc[852:]

xtrain.to_csv('./data/full_png/train.csv', index=False)
xtest.to_csv('./data/full_png/test.csv', index=False)

Modify the class labels as required

In [None]:
#for red sign classification, drop the images with no red signs
import pandas as pd
import numpy as np

xtrain = pd.read_csv('./data/train.csv')

xtrain = xtrain[xtrain.id != 6]
xtrain = xtrain[xtrain.id != 12]

for i in range(32, 43):
    xtrain = xtrain[xtrain.id != i]
        
xtrain.to_csv('./data/train_new2.csv', index=False)

In [None]:
#for red sign detection, drop the images with no red signs
#additionally, change the class for the remaining images to 1 - representing they all belong to the same class of red signs

Record file generation

In [None]:
import os
import hashlib
import io
import random
import shutil
import configparser
import pylab as plt
import tensorflow as tf
from tqdm import tqdm
from lxml import etree
from PIL import Image, ImageDraw, ImageFont
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
from collections import namedtuple, OrderedDict
import pandas as pd
from shutil import copyfile

def create_tf_record(examples_list, output_filename):
    
    writer = tf.python_io.TFRecordWriter(output_filename)
    for tf_example in examples_list:
        writer.write(tf_example.SerializeToString())
    writer.close()
    print("Successful created record files")
    
def dict_to_tf_example(img_path,
                       group,
                       gt_label,
                       ignore_difficult_instances=False):
    """Convert gt derived dict to tf.Example proto.
    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.
    """
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)

    width, height = image.size

    filename = (group.img[:-3] + 'png')
    image_format = b'png'
    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes_text = []
    classes = []
    
    for index, row in group.object.iterrows():
        xmin.append(row['x1'] / width)
        xmax.append(row['x2'] / width)
        ymin.append(row['y1'] / height)
        ymax.append(row['y2'] / height)
        #print (index)
        #print (gt_label[int(row['id'])])
        classes_text.append(gt_label[int(row['id'])].encode('utf8'))
        classes.append(int(row['id']))
    
    #save_img_with_box(image, group, filename)

    example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
      'image/format': dataset_util.bytes_feature(image_format),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return example

def split(df, group):
    data = namedtuple('data', ['img', 'object'])
    gb = df.groupby(group)
    return [data(img, gb.get_group(x)) for img, x in zip(gb.groups.keys(), gb.groups)]

def csv_record(csv, gt_label, out='out.record'):

    examples = pd.read_csv(csv)
    grouped = split(examples, 'img')
    out_examples = []

    for group in grouped:
        img_path = os.path.join(train_img_dir, group.img)
        img_path = img_path[:-3] + "png"
        #print(group.img)
        #print(img_path)
        tf_example = dict_to_tf_example(img_path, group, gt_label)
        out_examples.append(tf_example)

    output = os.path.join(data_dir, out)
    create_tf_record(out_examples, output)
    print(out + ' has been successful created')
    
def main():
    
    gt_label = {1:'speed limit 30', 2:'speed limit 50', 3:'speed limit 60', 4:'speed limit 70', 5:'speed limit 80', 6:'speed limit 20', 7:'speed limit 100', 8:'speed limit 120', 9:'no overtaking', 10:'no overtaking (trucks)', 11:'priority at next intersection', 13:'give way', 14:'stop', 15:'no traffic both ways', 16:'no trucks', 17:'no entry', 18:'danger', 19:'bend left', 20:'bend right', 21:'bend', 22:'uneven road', 23:'slippery road', 24:'road narrows', 25:'construction', 26:'traffic signal', 27:'pedestrian crossing', 28:'school crossing', 29:'cycles crossing', 30:'snow', 31:'animals'}
    csv_record(train_gt_path, gt_label, 'train.record')
    csv_record(test_gt_path, gt_label, 'test.record')
   
data_dir = './data/class_data'
train_img_dir = './images'
train_gt_path = os.path.join(data_dir, 'train.csv')
test_gt_path = os.path.join(data_dir, 'test.csv')


if __name__ == '__main__': 
    
    main()