In [1]:
"""Example usage:
  python object_detection/dataset_tools/create_oid_tf_record.py \
    --input_annotations_csv=/path/to/input/annotations-human-bbox.csv \
    --input_images_directory=/path/to/input/image_pixels_directory \
    --input_label_map=/path/to/input/labels_bbox_545.labelmap \
    --output_tf_record_path_prefix=/path/to/output/prefix.tfrecord
CSVs with bounding box annotations and image metadata (including the image URLs)
can be downloaded from the Open Images GitHub repository:
https://github.com/openimages/dataset
This script will include every image found in the input_images_directory in the
output TFRecord, even if the image has no corresponding bounding box annotations
in the input_annotations_csv.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os

import contextlib2
import pandas as pd
import tensorflow as tf

from object_detection.dataset_tools import oid_tfrecord_creation
from object_detection.utils import label_map_util

In [65]:
input_annotations_csv='/root/2017_07/validation/annotations-human-bbox.csv'
input_images_directory='/root/raw_images_validation'
input_label_map='../object_detection/data/oid_bbox_trainable_label_map.pbtxt'
!mkdir test_tfrecords
output_tf_record_path_prefix='test_tfrecords/test.tfrecord'
num_shards=100

mkdir: cannot create directory ‘test_tfrecords’: File exists


In [66]:
tf.logging.set_verbosity(tf.logging.INFO)

required_flags = [
  'input_annotations_csv', 'input_images_directory', 'input_label_map',
  'output_tf_record_path_prefix'
]

label_map = label_map_util.get_label_map_dict(input_label_map)
all_annotations = pd.read_csv(input_annotations_csv)
all_images = tf.gfile.Glob(
  os.path.join(input_images_directory, '*.jpg'))
all_image_ids = [os.path.splitext(os.path.basename(v))[0] for v in all_images]
all_image_ids = pd.DataFrame({'ImageID': all_image_ids})
all_annotations = pd.concat([all_annotations, all_image_ids])

tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids))

INFO:tensorflow:Found 41620 images...


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


## v4, train, dataset

In [67]:
input_annotations_csv='/root/2018_04/train/annotations-human-bbox.csv'
input_images_directory='/root/raw_images_train'
input_label_map='../object_detection/data/oid_v4_label_map.pbtxt'
# !mkdir test_tfrecords
# output_tf_record_path_prefix='test_tfrecords/test.tfrecord'
num_shards=100

In [68]:
tf.logging.set_verbosity(tf.logging.INFO)


all_annotations_v4_train = pd.read_csv(input_annotations_csv)
all_images = tf.gfile.Glob(
  os.path.join(input_images_directory, '*.jpg'))
all_image_ids = [os.path.splitext(os.path.basename(v))[0] for v in all_images]
all_image_ids = pd.DataFrame({'ImageID': all_image_ids})
# all_annotations = pd.concat([all_annotations, all_image_ids])

tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids))

INFO:tensorflow:Found 1743042 images...


In [69]:
label_map_600 = label_map_util.get_label_map_dict(input_label_map)
label_map_600_info = label_map_util.load_labelmap(input_label_map)

In [70]:
label_map_600_info

item {
  name: "/m/011k07"
  id: 1
  display_name: "Tortoise"
}
item {
  name: "/m/011q46kg"
  id: 2
  display_name: "Container"
}
item {
  name: "/m/012074"
  id: 3
  display_name: "Magpie"
}
item {
  name: "/m/0120dh"
  id: 4
  display_name: "Sea turtle"
}
item {
  name: "/m/01226z"
  id: 5
  display_name: "Football"
}
item {
  name: "/m/012n7d"
  id: 6
  display_name: "Ambulance"
}
item {
  name: "/m/012w5l"
  id: 7
  display_name: "Ladder"
}
item {
  name: "/m/012xff"
  id: 8
  display_name: "Toothbrush"
}
item {
  name: "/m/012ysf"
  id: 9
  display_name: "Syringe"
}
item {
  name: "/m/0130jx"
  id: 10
  display_name: "Sink"
}
item {
  name: "/m/0138tl"
  id: 11
  display_name: "Toy"
}
item {
  name: "/m/013y1f"
  id: 12
  display_name: "Organ"
}
item {
  name: "/m/01432t"
  id: 13
  display_name: "Cassette deck"
}
item {
  name: "/m/014j1m"
  id: 14
  display_name: "Apple"
}
item {
  name: "/m/014sv8"
  id: 15
  display_name: "Human eye"
}
item {
  name: "/m/014trl"
  id: 16
  di

## two new classes
 "/m/015qbp" parking meter, "/m/0djtd" coconut

## v2 label map
545 classese

In [71]:
input_label_map='../object_detection/data/oid_bbox_trainable_label_map.pbtxt'
label_map_v2 = label_map_util.get_label_map_dict(input_label_map)
label_map_v2_info = label_map_util.load_labelmap(input_label_map)

In [72]:
label_map_v2_add2 = label_map_v2.copy()
label_map_v2_add2["/m/015qbp"] = label_map_600["/m/015qbp"]
label_map_v2_add2["/m/0djtd"] = label_map_600["/m/0djtd"]
print(len(label_map_v2_add2))

547


## v4, train, example

In [73]:
all_annotations_v4_train.head(3)

Unnamed: 0,ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,IsTruncated,IsGroupOf,IsDepiction,IsInside
0,000002b66c9c498e,xclick,/m/01g317,1,0.0125,0.195312,0.148438,0.5875,0,1,0,0,0
1,000002b66c9c498e,xclick,/m/01g317,1,0.025,0.276563,0.714063,0.948438,0,1,0,0,0
2,000002b66c9c498e,xclick,/m/01g317,1,0.151562,0.310937,0.198437,0.590625,1,0,0,0,0


## drop non 547 classes

In [74]:
# data = data.set_index("Area")
# data = data.drop("Ireland", axis=0). # Delete all rows with label "Ireland"

SyntaxError: invalid syntax (<ipython-input-74-7a0e73d9ef33>, line 2)

In [81]:
all_annotations_547 = all_annotations_v4_train.copy()

In [82]:
aa = all_annotations_v4_train.groupby("LabelName").size()

In [83]:
print(type(aa.to_dict())) 

<type 'dict'>


In [84]:
all_annotations_v4_dict = aa.to_dict()

In [99]:
all_annotations_547 = all_annotations_547.set_index("LabelName")
for key in all_annotations_v4_dict.keys():
    if key not in label_map_v2_add2:
        all_annotations_547.drop(key, axis=0, inplace=True)

In [100]:
all_annotations_547.head(3)

Unnamed: 0_level_0,ImageID,Source,Confidence,XMin,XMax,YMin,YMax,IsOccluded,IsTruncated,IsGroupOf,IsDepiction,IsInside
LabelName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
/m/01g317,000002b66c9c498e,xclick,1,0.0125,0.195312,0.148438,0.5875,0,1,0,0,0
/m/01g317,000002b66c9c498e,xclick,1,0.025,0.276563,0.714063,0.948438,0,1,0,0,0
/m/01g317,000002b66c9c498e,xclick,1,0.151562,0.310937,0.198437,0.590625,1,0,0,0,0


In [101]:
all_annotations_547.groupby('LabelName').size()

LabelName
/m/011k07       1998
/m/012074        145
/m/0120dh       1132
/m/01226z       5097
/m/012n7d        447
/m/012w5l        994
/m/012xff        219
/m/012ysf        127
/m/0130jx       1648
/m/0138tl      70963
/m/013y1f        398
/m/014j1m       3898
/m/014sv8      77233
/m/014trl       2394
/m/014y4n       6951
/m/0152hh        770
/m/01599        9565
/m/015h_t       3157
/m/015p6       47921
/m/015qbp        209
/m/015qff       7426
/m/015wgc        447
/m/015x4r       1194
/m/015x5n        688
/m/0162_1        338
/m/0167gd       6442
/m/016m2d       2661
/m/0174k2        655
/m/0174n1       1198
/m/0176mf        422
               ...  
/m/0k0pj       60142
/m/0k1tl        1705
/m/0k4j       248075
/m/0k5j         1898
/m/0k65p       75307
/m/0kmg4        1587
/m/0kpqd         844
/m/0kpt_         166
/m/0ky7b          92
/m/0l14j_        362
/m/0l3ms         326
/m/0l515        1157
/m/0ll1f78      1856
/m/0llzx         453
/m/0lt4_         123
/m/0m53l         485
/m/

In [102]:
aa = all_annotations_547.query('LabelName =="/m/0h8nsvg"', inplace = False) 

In [103]:
aa

Unnamed: 0_level_0,ImageID,Source,Confidence,XMin,XMax,YMin,YMax,IsOccluded,IsTruncated,IsGroupOf,IsDepiction,IsInside
LabelName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1


In [104]:
all_annotations_547=all_annotations_547.reset_index()

In [105]:
all_annotations_547.to_csv(path_or_buf = "/root/all_annotations_547.csv", index=False)
