# Utilities for creating TFRecords of TF examples for the Open Images dataset

In [6]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os

import contextlib2
import pandas as pd
import tensorflow as tf

import oid_tfrecord_creation
import tf_record_creation_util
import label_map_util

    --input_box_annotations_csv=/path/to/input/annotations-human-bbox.csv \
    --input_image_label_annotations_csv=/path/to/input/annotations-label.csv \
    --input_images_directory=/path/to/input/image_pixels_directory \
    --input_label_map=/path/to/input/labels_bbox_545.labelmap \
    --output_tf_record_path_prefix=/path/to/output/prefix.tfrecord

CSVs with bounding box annotations and image metadata (including the image URLs)
can be downloaded from the Open Images GitHub repository:
https://github.com/openimages/dataset
This script will include every image found in the input_images_directory in the
output TFRecord, even if the image has no corresponding bounding box annotations
in the input_annotations_csv. If input_image_label_annotations_csv is specified,
it will add image-level labels as well. Note that the information of whether a
label is positivelly or negativelly verified is NOT added to tfrecord.
"""

In [None]:
def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
                                           encoded_image):
  """Populates a TF Example message with image annotations from a data frame.
  Args:
    annotations_data_frame: Data frame containing the annotations for a single
      image.
    label_map: String to integer label map.
    encoded_image: The encoded image string
  Returns:
    The populated TF Example, if the label of at least one object is present in
    label_map. Otherwise, returns None.
  """

  filtered_data_frame = annotations_data_frame[
      annotations_data_frame.LabelName.isin(label_map)]
  filtered_data_frame_boxes = filtered_data_frame[
      ~filtered_data_frame.YMin.isnull()]
  filtered_data_frame_labels = filtered_data_frame[
      filtered_data_frame.YMin.isnull()]
  image_id = annotations_data_frame.ImageID.iloc[0]

  feature_map = {
      standard_fields.TfExampleFields.object_bbox_ymin:
          dataset_util.float_list_feature(
              filtered_data_frame_boxes.YMin.as_matrix()),
      standard_fields.TfExampleFields.object_bbox_xmin:
          dataset_util.float_list_feature(
              filtered_data_frame_boxes.XMin.as_matrix()),
      standard_fields.TfExampleFields.object_bbox_ymax:
          dataset_util.float_list_feature(
              filtered_data_frame_boxes.YMax.as_matrix()),
      standard_fields.TfExampleFields.object_bbox_xmax:
          dataset_util.float_list_feature(
              filtered_data_frame_boxes.XMax.as_matrix()),
      standard_fields.TfExampleFields.object_class_text:
          dataset_util.bytes_list_feature(
              filtered_data_frame_boxes.LabelName.as_matrix()),
      standard_fields.TfExampleFields.object_class_label:
          dataset_util.int64_list_feature(
              filtered_data_frame_boxes.LabelName.map(lambda x: label_map[x])
              .as_matrix()),
      standard_fields.TfExampleFields.filename:
          dataset_util.bytes_feature('{}.jpg'.format(image_id)),
      standard_fields.TfExampleFields.source_id:
          dataset_util.bytes_feature(image_id),
      standard_fields.TfExampleFields.image_encoded:
          dataset_util.bytes_feature(encoded_image),
  }

  if 'IsGroupOf' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_group_of] = dataset_util.int64_list_feature(
                    filtered_data_frame_boxes.IsGroupOf.as_matrix().astype(int))
  if 'IsOccluded' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_occluded] = dataset_util.int64_list_feature(
                    filtered_data_frame_boxes.IsOccluded.as_matrix().astype(
                        int))
  if 'IsTruncated' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_truncated] = dataset_util.int64_list_feature(
                    filtered_data_frame_boxes.IsTruncated.as_matrix().astype(
                        int))
  if 'IsDepiction' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_depiction] = dataset_util.int64_list_feature(
                    filtered_data_frame_boxes.IsDepiction.as_matrix().astype(
                        int))

  if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns:
    feature_map[standard_fields.TfExampleFields.
                image_class_label] = dataset_util.int64_list_feature(
                    filtered_data_frame_labels.LabelName.map(
                        lambda x: label_map[x]).as_matrix())
    feature_map[standard_fields.TfExampleFields.
                image_class_text] = dataset_util.bytes_list_feature(
                    filtered_data_frame_labels.LabelName.as_matrix()),
  return tf.train.Example(features=tf.train.Features(feature=feature_map))
