In [1]:
import os
import pandas as pd

# Separating Datasets

This section entails separating the dataset into training and test datasets

In [2]:
base_path = '../../' # file is in the scripts/preprocessing folder
source = 'annotations/lisa-traffic-light-dataset.csv'
source = os.path.join(base_path, source)

In [3]:
df = pd.read_csv(source)
df.head()

Unnamed: 0,filename,class,xmin,ymin,xmax,ymax,height,width
0,dayTest/daySequence1--00000.jpg,stop,706,478,718,500,1280,960
1,dayTest/daySequence1--00001.jpg,stop,705,475,720,497,1280,960
2,dayTest/daySequence1--00002.jpg,stop,707,476,719,494,1280,960
3,dayTest/daySequence1--00005.jpg,stop,708,474,720,492,1280,960
4,dayTest/daySequence1--00006.jpg,stop,707,470,722,492,1280,960


In [4]:
# Cleaning the filename to include only the file name and not the path
df['filename'] = df['filename'].apply(lambda x: x[8:])

In [5]:
# Creating a dataframe with 90%
# values of original dataframe
train = df.sample(frac = 0.9).reset_index(drop=True)
  
# Creating dataframe with 
# rest of the 10% values
test = df.drop(train.index).reset_index(drop=True)

In [6]:
train_dest = os.path.join(base_path, 'annotations/train.csv')
train.to_csv(train_dest, index=False)

test_dest = os.path.join(base_path, 'annotations/test.csv')
test.to_csv(test_dest, index=False)

# Separating the Images

Using the separated datasets, move the respective images into their relevant folders.

In [7]:
train.head()

Unnamed: 0,filename,class,xmin,ymin,xmax,ymax,height,width
0,daySequence1--02603.jpg,go,883,0,946,95,1280,960
1,daySequence1--01080.jpg,go,607,373,625,403,1280,960
2,daySequence1--01093.jpg,go,743,425,761,455,1280,960
3,daySequence1--02197.jpg,stop,538,129,580,187,1280,960
4,daySequence1--02843.jpg,go,534,47,594,147,1280,960


In [3]:
train = pd.read_csv(os.path.join(base_path, 'annotations/train.csv'))
test = pd.read_csv(os.path.join(base_path, 'annotations/test.csv'))

In [9]:
train.head()

Unnamed: 0,filename,class,xmin,ymin,xmax,ymax,height,width
0,daySequence1--02603.jpg,go,883,0,946,95,1280,960
1,daySequence1--01080.jpg,go,607,373,625,403,1280,960
2,daySequence1--01093.jpg,go,743,425,761,455,1280,960
3,daySequence1--02197.jpg,stop,538,129,580,187,1280,960
4,daySequence1--02843.jpg,go,534,47,594,147,1280,960


In [10]:
test.head()

Unnamed: 0,filename,class,xmin,ymin,xmax,ymax,height,width
0,daySequence1--03717.jpg,go,453,446,468,466,1280,960
1,daySequence1--03718.jpg,go,453,442,468,467,1280,960
2,daySequence1--03719.jpg,go,453,442,468,467,1280,960
3,daySequence1--03720.jpg,go,453,442,468,467,1280,960
4,daySequence1--03721.jpg,go,453,442,468,467,1280,960


In [11]:
from shutil import copyfile

In [12]:
train_img_dest = os.path.join(base_path, 'images/train/')
for filename in train['filename']:
    src = os.path.join(base_path, 'images/frames/', filename)
    dst = os.path.join(train_img_dest, filename)
    copyfile(src, dst)

In [13]:
test_img_dest = os.path.join(base_path, 'images/test/')
for filename in test['filename']:
    src = os.path.join(base_path, 'images/frames/', filename)
    dst = os.path.join(test_img_dest, filename)
    copyfile(src, dst)

# Creating Label Map
The label map maps all the classes to integer values. This will be saved to a .pbtxt file for easy use in Tensorflow.

In [15]:
train['class'].unique()



In [34]:
label_map = ''

for i, item in enumerate(train['class'].unique()):
    text = 'item {\n\t' + 'id: ' + str(i+1) + \
                 '\n\tname: ' + '\'' + str(item) + '\'' + \
                 '\n}\n\n'
    label_map += text

print(label_map)

item {
	id: 1
	name: 'go'
}

item {
	id: 2
	name: 'stop'
}

item {
	id: 3
}

item {
	id: 4
}

item {
	id: 5
	name: 'stopLeft'
}




In [35]:
# Writing to .pbtxt file
label_dest = os.path.join(base_path, 'annotations/label_map.pbtxt')
with open(label_dest, "w") as text_file:
    print(label_map, file=text_file)

# Create TFRecord Files

In [1]:
import os
import glob
import pandas as pd
import io
import xml.etree.ElementTree as ET
import argparse

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'    # Suppress TensorFlow logging (1)
import tensorflow.compat.v1 as tf
from PIL import Image
from object_detection.utils import dataset_util, label_map_util
from collections import namedtuple

In [36]:
base_path = '../../' # file is in the scripts/preprocessing folder
labels_path = os.path.join(base_path, 'annotations/label_map.pbtxt')
label_map = label_map_util.load_labelmap(labels_path)
label_map_dict = label_map_util.get_label_map_dict(label_map)

In [37]:
def class_text_to_int(row_label):
    return label_map_dict[row_label]


def split(df, group):
    data = namedtuple('data', ['filename', 'object'])
    gb = df.groupby(group)
    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]


def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example


In [55]:
train_tfrecord_path = os.path.join(base_path, 'annotations/train.record')
test_tfrecord_path = os.path.join(base_path, 'annotations/test.record')

train_img_path = os.path.join(base_path, 'images/train/')
test_img_path = os.path.join(base_path, 'images/test/')

train_csv_path = os.path.join(base_path, 'annotations/train.csv')
test_csv_path = os.path.join(base_path, 'annotations/test.csv')

tfrecord_paths = [train_tfrecord_path, test_tfrecord_path]
image_paths = [train_img_path, test_img_path]
csv_paths = [train_csv_path, test_csv_path]

In [56]:
for output_path, image_dir, csv_path in zip(tfrecord_paths, image_paths, csv_paths):
    writer = tf.python_io.TFRecordWriter(output_path)
    path = os.path.join(image_dir)
    examples = pd.read_csv(csv_path)
    grouped = split(examples, 'filename')
    for group in grouped:
        tf_example = create_tf_example(group, path)
        writer.write(tf_example.SerializeToString())
    writer.close()
    print('Successfully created the TFRecord file: {}'.format(output_path))

Successfully created the TFRecord file: ../../annotations/train.record
Successfully created the TFRecord file: ../../annotations/test.record
