### PreProcessing the Dataset for Tensorflow Object Detection API

This script takes in the TFRecord files of the dataset and creates tf.Examples that describe the Regions of Interest (ROIs) of the image that contain a object. 



### Imports and File Locations

In [3]:
DIRNAME = "path/to/file/dir/"
OUTPUT_PATH='/home/tedbo123/data/waymo/eval/tf_recs/4_25_eval'
NUM_SHARDS=10

**Set-Up**<br>
Download and set up -> https://github.com/gdlg/simple-waymo-open-dataset-reader <br>
Download and set up -> https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md <br>



In [4]:

import os
import numpy as np
import math
import io
from io import BytesIO
import sys
import random
from PIL import Image
import pickle
import pandas as pd
import tensorflow as tf
import contextlib2
from simple_waymo_open_dataset_reader import WaymoDataFileReader
from simple_waymo_open_dataset_reader import dataset_pb2, label_pb2
from simple_waymo_open_dataset_reader import utils


from models.research.object_detection.utils import dataset_util
from models.research.object_detection.dataset_tools import tf_record_creation_util

ModuleNotFoundError: No module named 'simple_waymo_open_dataset_reader'

**Creating TF Examples**
This follows this script https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md

There is one Example per image, and each image can have 0 or more ROIs. Each examples contains the following:

- **Filename**: Name of the jpg file
- **Height**: Height of image
- **Width**: Width of image
- **Encoded Bytes**: Raw JPG Bytes
- **Type of Encoding**: JPG or PNG encoding
- **X_mins**: List of X_mins for object boxes, 1 per object
- **X_maxs**: List of X_maxs for object boxes, 1 per object
- **Y_mins**: List of Y_mins for object boxes, 1 per object
- **Y_maxs**: List of Y_maxs for object boxes, 1 per object
- **Class Label**: Class number
- **Class Name**: Class name


![Example Image](Images/vis8.png "An frame from the set with Bounding Boxes")

Using this image (generated by our model in Part 4!) our dictionary might look something like this: <br>
  filename: "sample_image_1.png" <br>
  height: 1280 <br>
  width: 900 <br>
  X_mins=[30,45,100,...] <br>
  X_maxs=[800,450,110,...] <br>
  Y_mins=[400,245,730,...] <br>
  Y_maxs=[600,845,790,...] <br>
  labels: [1,1,1,...] <br>


The labels are all VEHICLES, so they all have class 1. If there were pedestrians (2) or signs (3) we would see the labels change.


In [None]:
def create_tf_example(img,frame,img_num,frame_num,file_num):
    encoded_image_data=img.image
    image_format = b'jpeg'
    xmins=[]
    xmaxs=[]
    ymins=[]
    ymaxs=[]
    classes=[]
    classes_text=[]
    class_names=["null",b"TYPE_UNKNOWN",b"TYPE_VEHICLE",b"TYPE_PEDESTRIAN",b'TYPE_SIGN',b"TYPE_CYCLIST"]
    bytes_img=BytesIO(img.image)
    im=Image.open(bytes_img)
    width, height=im.size
    for camera_label in frame.camera_labels:
        if camera_label.name != img.name:
            continue
        for label in camera_label.labels:
            xmins.append((label.box.center_x - 0.5 * label.box.length)/width)
            ymins.append((label.box.center_y - 0.5 * label.box.width)/width)
            xmaxs.append((label.box.center_x + 0.5 * label.box.length)/height)
            ymaxs.append((label.box.center_y + 0.5 * label.box.width)/height)
            classes.append(label.type+1)
            classes_text.append(class_names[label.type+1])
    img_name=f"{file_num}_{frame_num}_{img_num}.jpeg"
    im.save(os.path.join("/home/tedbo123/data/waymo/eval/jpgs/",img_name))
    f_name=bytes(img_name,'utf-8')
    tf_example = tf.train.Example(features=tf.train.Features(feature={
    'image/height': dataset_util.int64_feature(height),
    'image/width': dataset_util.int64_feature(width),
    'image/filename': dataset_util.bytes_feature(f_name),
    'image/source_id': dataset_util.bytes_feature(f_name),
    'image/encoded': dataset_util.bytes_feature(encoded_image_data),
    'image/format': dataset_util.bytes_feature(image_format),
    'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
    'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
    'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
    'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
    'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
    'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example

### Loop Through The Whole Dataset

In [None]:
# Open a .tfrecord
def main(_):
    num_shards=NUM_SHARDS
    output_filebase=OUTPUT_PATH

    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
        tf_record_close_stack, output_filebase, num_shards)
        file_num=0
        shard_index=0
        for tf_file in os.listdir(DIRNAME):
            # Get an file from the list
            tf_file=os.path.join(DIRNAME,tf_file)
            print(tf_file)
            datafile = WaymoDataFileReader(tf_file)
            frame_num=0
            for frame in datafile:
                #Loop through each frame in the file
                img_num=0
                for camera_image in frame.images:
                    #Loop through each image in the frame
                    tf_example=create_tf_example(camera_image,frame,img_num,frame_num,file_num)
                    output_shard_index = shard_index % num_shards
                    output_tfrecords[output_shard_index].write(tf_example.SerializeToString())
                    shard_index+=1
                    img_num+=1
                frame_num+=1
            file_num+=1

        

if __name__ == '__main__':
    tf.compat.v1.app.run()

**That's It!** <br>

Now you have everything you need to use the Waymo Dataset on the Tensorflow Object Detection API!