In [1]:
"""Example usage:
  python object_detection/dataset_tools/create_oid_tf_record.py \
    --input_annotations_csv=/path/to/input/annotations-human-bbox.csv \
    --input_images_directory=/path/to/input/image_pixels_directory \
    --input_label_map=/path/to/input/labels_bbox_545.labelmap \
    --output_tf_record_path_prefix=/path/to/output/prefix.tfrecord
CSVs with bounding box annotations and image metadata (including the image URLs)
can be downloaded from the Open Images GitHub repository:
https://github.com/openimages/dataset
This script will include every image found in the input_images_directory in the
output TFRecord, even if the image has no corresponding bounding box annotations
in the input_annotations_csv.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os

import contextlib2
import pandas as pd
import tensorflow as tf

from object_detection.dataset_tools import oid_tfrecord_creation
from object_detection.utils import label_map_util

## v4, test, dataset

In [3]:
input_annotations_csv='/root/2018_04/test/annotations-human-bbox.csv'
input_images_directory='/root/raw_images_test'
input_label_map='../object_detection/data/oid_v4_label_map.pbtxt'
# !mkdir test_tfrecords
# output_tf_record_path_prefix='test_tfrecords/test.tfrecord'
num_shards=100

In [4]:
tf.logging.set_verbosity(tf.logging.INFO)


all_annotations_v4_train = pd.read_csv(input_annotations_csv)
all_images = tf.gfile.Glob(
  os.path.join(input_images_directory, '*.jpg'))
all_image_ids = [os.path.splitext(os.path.basename(v))[0] for v in all_images]
all_image_ids = pd.DataFrame({'ImageID': all_image_ids})
# all_annotations = pd.concat([all_annotations, all_image_ids])

tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids))

INFO:tensorflow:Found 125436 images...


In [5]:
label_map_600 = label_map_util.get_label_map_dict(input_label_map)
label_map_600_info = label_map_util.load_labelmap(input_label_map)

In [None]:
# look up e9ec2617897763cc

In [8]:
all_annotations_v4_train.head(1)

Unnamed: 0,ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,IsTruncated,IsGroupOf,IsDepiction,IsInside
0,000026e7ee790996,freeform,/m/07j7r,1,0.071905,0.145346,0.206591,0.391306,0,1,1,0,0


In [10]:
all_annotations_v4_train
df=all_annotations_v4_train
ImageID_e9ec2617897763cc = df[df['ImageID'].isin(list(['e9ec2617897763cc']))]



In [11]:
ImageID_e9ec2617897763cc

Unnamed: 0,ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,IsTruncated,IsGroupOf,IsDepiction,IsInside
572398,e9ec2617897763cc,freeform,/m/01d40f,1,0.0,0.320974,0.211437,0.412263,1,1,0,0,0
572399,e9ec2617897763cc,freeform,/m/01d40f,1,0.082951,0.93755,0.362112,1.0,1,1,0,0,0
572400,e9ec2617897763cc,freeform,/m/01g317,1,1.2e-05,0.99999,0.0,0.999935,0,1,1,0,0
572401,e9ec2617897763cc,freeform,/m/02p0tk3,1,0.0,0.999364,0.0,0.999422,0,1,1,0,0
572402,e9ec2617897763cc,freeform,/m/03bt1vf,1,0.0,0.333469,0.048539,0.50691,1,1,0,0,0
572403,e9ec2617897763cc,freeform,/m/03bt1vf,1,0.084445,0.941722,0.089972,0.998966,1,1,0,0,0
572404,e9ec2617897763cc,freeform,/m/03bt1vf,1,0.735973,0.881022,0.015074,0.145388,1,0,0,0,0
572405,e9ec2617897763cc,freeform,/m/03q69,1,0.0,0.209958,0.051782,0.272198,0,1,0,0,0
572406,e9ec2617897763cc,freeform,/m/03q69,1,0.0,1.0,0.0,0.411395,0,0,1,0,0
572407,e9ec2617897763cc,freeform,/m/09j2d,1,0.0,1.0,7e-06,1.0,0,1,1,0,0


In [None]:
# v5 annotation

In [17]:
! wget -O ~/test-annotations-bbox.csv https://storage.googleapis.com/openimages/v5/test-annotations-bbox.csv

--2019-10-04 21:08:21--  https://storage.googleapis.com/openimages/v5/test-annotations-bbox.csv
Resolving storage.googleapis.com (storage.googleapis.com)... 172.217.11.80, 2607:f8b0:4007:80d::2010
Connecting to storage.googleapis.com (storage.googleapis.com)|172.217.11.80|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 77484237 (74M) [text/csv]
Saving to: ‘/root/test-annotations-bbox.csv’


2019-10-04 21:08:22 (118 MB/s) - ‘/root/test-annotations-bbox.csv’ saved [77484237/77484237]



In [18]:
input_annotations_csv='/root/test-annotations-bbox.csv'
input_images_directory='/root/raw_images_test'
input_label_map='../object_detection/data/oid_v4_label_map.pbtxt'
# !mkdir test_tfrecords
# output_tf_record_path_prefix='test_tfrecords/test.tfrecord'
num_shards=100

In [19]:
tf.logging.set_verbosity(tf.logging.INFO)


all_annotations_v5_train = pd.read_csv(input_annotations_csv)
all_images = tf.gfile.Glob(
  os.path.join(input_images_directory, '*.jpg'))
all_image_ids = [os.path.splitext(os.path.basename(v))[0] for v in all_images]
all_image_ids = pd.DataFrame({'ImageID': all_image_ids})
# all_annotations = pd.concat([all_annotations, all_image_ids])

tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids))

INFO:tensorflow:Found 125436 images...


In [20]:
df=all_annotations_v5_train
ImageID_e9ec2617897763cc = df[df['ImageID'].isin(list(['e9ec2617897763cc']))]

In [21]:
ImageID_e9ec2617897763cc

Unnamed: 0,ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,IsTruncated,IsGroupOf,IsDepiction,IsInside
856243,e9ec2617897763cc,xclick,/m/0dzct,1,0.0,0.013304,0.039764,0.098675,1,1,0,0,0
856244,e9ec2617897763cc,xclick,/m/0dzct,1,0.035477,0.075388,0.007364,0.047128,1,0,0,0,0
856245,e9ec2617897763cc,xclick,/m/0dzct,1,0.04878,0.177384,0.079529,0.200295,1,0,0,0,0
856246,e9ec2617897763cc,xclick,/m/0dzct,1,0.104213,0.168514,0.025037,0.069219,1,0,0,0,0
856247,e9ec2617897763cc,xclick,/m/0dzct,1,0.184035,0.239468,0.035346,0.073638,1,0,0,0,0
856248,e9ec2617897763cc,xclick,/m/0dzct,1,0.323725,0.385809,0.0,0.039764,1,1,0,0,0
856249,e9ec2617897763cc,xclick,/m/0dzct,1,0.348115,0.441242,0.038292,0.113402,1,0,0,0,0
856250,e9ec2617897763cc,xclick,/m/0dzct,1,0.423503,0.643015,0.136966,0.332842,1,0,0,0,0
856251,e9ec2617897763cc,xclick,/m/0dzct,1,0.481153,0.611973,0.072165,0.103093,1,0,0,0,0
856252,e9ec2617897763cc,xclick,/m/0dzct,1,0.560976,0.62306,0.014728,0.036819,1,0,0,0,0


In [26]:
all_annotations_v5_train.groupby("ImageID").size()

ImageID
000026e7ee790996     3
000062a39995e348     2
0000c64e1253d68f     5
000132c20b84269b    39
0002ab0af02e4a77     6
0002cc8afaf1b611     1
0003d84e0165d630    17
000411001ff7dd4f     4
00045d609ca3f4eb    17
00068d5450f0358b     1
0006acf221a3e1d1     2
000794c94c6d86ea    32
0007ef7b46ad6bb8     2
00094d5e8b3cb038    28
000a546e910f0a6b     9
000aa0b1c8fd5ddf    20
000b14e4ee4a2b2b    21
000b6e5bfa3e2a34    19
000c5171b38d4bb0     2
000c5f3f0b58ce18     2
000ccf1d00f7f1cd     1
000cf5859025877f     2
000d5efd9500e718     7
000dbcc283d87f67    13
000e4e7ed48c932d    11
000e80d5874dd0fd     4
00108e3a60a46128    17
0010a3d096cd57b2     2
0012a7cf33c6f427     1
0012aacda256f0fb     1
                    ..
ffe8d89197cb7ad0    11
ffe919441411c1f1    12
ffe9a0db54e36f37     7
ffea1b30f38a85ec    13
ffea9e311adf9953     1
ffeaf3fd809336cf    30
ffec3aa222dfc2d1     4
ffecfc3edf922417     2
ffede35b67aab7d2    38
ffee883088882976     1
ffef15d9927076c0    10
ffef1bcd2503ccf5     3
ffe

In [27]:
all_annotations_v4_train.groupby("ImageID").size()

ImageID
000026e7ee790996     3
000062a39995e348     2
0000c64e1253d68f     5
000132c20b84269b    31
0002ab0af02e4a77     6
0002cc8afaf1b611     1
0003d84e0165d630     5
000411001ff7dd4f     4
00045d609ca3f4eb    10
00068d5450f0358b     1
0006acf221a3e1d1     2
000794c94c6d86ea    17
0007ef7b46ad6bb8     2
00094d5e8b3cb038    22
000a546e910f0a6b     5
000aa0b1c8fd5ddf     6
000b14e4ee4a2b2b    21
000b6e5bfa3e2a34    13
000c5171b38d4bb0     1
000c5f3f0b58ce18     2
000ccf1d00f7f1cd     1
000cf5859025877f     2
000d5efd9500e718     7
000dbcc283d87f67     9
000e4e7ed48c932d    11
000e80d5874dd0fd     4
00108e3a60a46128    17
0010a3d096cd57b2     2
0012a7cf33c6f427     1
0012aacda256f0fb     1
                    ..
ffe8d89197cb7ad0     3
ffe919441411c1f1    10
ffe9a0db54e36f37     7
ffea1b30f38a85ec     1
ffea9e311adf9953     1
ffeaf3fd809336cf    29
ffec3aa222dfc2d1     4
ffecfc3edf922417     2
ffede35b67aab7d2    22
ffee883088882976     1
ffef15d9927076c0     9
ffef1bcd2503ccf5     3
ffe

In [None]:
## look up distribution

In [6]:
all_annotations_547 = all_annotations_v4_train.copy()

In [7]:
all_annotations_v4_train.groupby("LabelName").size()

LabelName
/m/011k07       178
/m/012074        39
/m/0120dh       155
/m/01226z       220
/m/012n7d        67
/m/012w5l        65
/m/012xff        20
/m/012ysf        19
/m/0130jx       119
/m/0138tl      1205
/m/013y1f        63
/m/01432t        32
/m/014j1m       334
/m/014sv8     13034
/m/014trl       151
/m/014y4n       699
/m/0152hh        30
/m/01599        332
/m/015h_t       481
/m/015p6       2751
/m/015qbp         3
/m/015qff       131
/m/015wgc        78
/m/015x4r       259
/m/015x5n        67
/m/0162_1        29
/m/0167gd       188
/m/016m2d        91
/m/0174k2       121
/m/0174n1       158
              ...  
/m/0k1tl        149
/m/0k4j       28737
/m/0k5j         556
/m/0k65p      12505
/m/0km7z         23
/m/0kmg4        125
/m/0kpqd        126
/m/0kpt_         29
/m/0ky7b          3
/m/0l14j_        31
/m/0l3ms         48
/m/0l515        240
/m/0ll1f78      266
/m/0llzx         74
/m/0lt4_         16
/m/0m53l         83
/m/0mcx2         61
/m/0mkg          79
/m/0mw_6  

In [121]:
all_annotations_v4_train.groupby("LabelName").size().to_dict()

{'/m/011k07': 1998,
 '/m/012074': 145,
 '/m/0120dh': 1132,
 '/m/01226z': 5097,
 '/m/012n7d': 447,
 '/m/012w5l': 994,
 '/m/012xff': 219,
 '/m/012ysf': 127,
 '/m/0130jx': 1648,
 '/m/0138tl': 70963,
 '/m/013y1f': 398,
 '/m/01432t': 74,
 '/m/014j1m': 3898,
 '/m/014sv8': 77233,
 '/m/014trl': 2394,
 '/m/014y4n': 6951,
 '/m/0152hh': 770,
 '/m/01599': 9565,
 '/m/015h_t': 3157,
 '/m/015p6': 47921,
 '/m/015qbp': 209,
 '/m/015qff': 7426,
 '/m/015wgc': 447,
 '/m/015x4r': 1194,
 '/m/015x5n': 688,
 '/m/0162_1': 338,
 '/m/0167gd': 6442,
 '/m/016m2d': 2661,
 '/m/0174k2': 655,
 '/m/0174n1': 1198,
 '/m/0175cv': 143,
 '/m/0176mf': 422,
 '/m/017ftj': 23996,
 '/m/018j2': 264,
 '/m/018p4k': 2755,
 '/m/018xm': 6845,
 '/m/01940j': 1216,
 '/m/0199g': 40161,
 '/m/019dx1': 2086,
 '/m/019h78': 280,
 '/m/019jd': 79113,
 '/m/019w40': 2594,
 '/m/01_5g': 617,
 '/m/01_bhs': 24991,
 '/m/01b638': 3132,
 '/m/01b7fy': 1255,
 '/m/01b9xk': 482,
 '/m/01bfm9': 16981,
 '/m/01bjv': 11927,
 '/m/01bl7v': 87555,
 '/m/01bms0': 85,


In [9]:
aa = all_annotations_547.query('LabelName =="/m/015x4r"', inplace = False) 

In [15]:
aa.groupby('ImageID').size().to_dict()

{'000f220c9a9533cc': 2,
 '006df68c88745213': 3,
 '008838299bd147a4': 1,
 '009bfd2738d8543d': 2,
 '009e483fe67bcf00': 3,
 '00af886180e9eaec': 2,
 '00b3999cddded683': 2,
 '00dd63a6da037b22': 2,
 '0148ed083755215c': 5,
 '01c97ef479e40619': 1,
 '01d72e99379eadf9': 1,
 '01dc72d4f485375e': 1,
 '02231dba7f5b2866': 2,
 '02ae856e14dbff15': 3,
 '02e5a63815b6e479': 2,
 '0308fabcf9363553': 1,
 '0353b5f2eb7bd145': 7,
 '0394e9f22a816c33': 2,
 '03964d6b1a341877': 1,
 '03c510697e4506b5': 3,
 '045b70d9b74821a4': 1,
 '04662f5ed5ed57f0': 5,
 '04d128efffc684fe': 2,
 '0543b7b11c8eaa94': 2,
 '055b4d28607ffb84': 3,
 '057d2471d6724d66': 1,
 '05915ebe952bace4': 1,
 '06091846135f8f9c': 1,
 '06510b7db01b5473': 3,
 '066054cae75ea053': 4,
 '067d9be41a41aaf5': 2,
 '06bcd93f0cf2f003': 6,
 '071790f0da8bda4a': 12,
 '072258536c0a01bf': 1,
 '073003db0ab478e4': 1,
 '0733828232f0d999': 3,
 '07aba0947486a4da': 5,
 '07dcf6f487bb14a2': 1,
 '0833cdc20d9277d4': 4,
 '0906830f03194534': 7,
 '093b3a277c0c4e4d': 3,
 '0946c0abf2117

In [16]:
all_annotations_547.query('ImageID =="000f220c9a9533cc"', inplace = False) 

Unnamed: 0,ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,IsTruncated,IsGroupOf,IsDepiction,IsInside
18662,000f220c9a9533cc,xclick,/m/015x4r,1,0.075781,0.824219,0.463542,0.996875,0,1,1,0,0
18663,000f220c9a9533cc,xclick,/m/015x4r,1,0.417188,0.664062,0.894792,0.998958,0,0,0,0,0
18664,000f220c9a9533cc,xclick,/m/01g317,1,0.357812,0.75625,0.182292,0.690625,0,0,0,0,0
18665,000f220c9a9533cc,xclick,/m/027pcv,1,0.092969,0.267188,0.505208,0.866667,1,0,0,0,0
18666,000f220c9a9533cc,xclick,/m/027pcv,1,0.095312,0.409375,0.836458,0.998958,1,1,1,0,0
18667,000f220c9a9533cc,xclick,/m/027pcv,1,0.232031,0.414844,0.451042,0.8125,1,0,0,0,0
18668,000f220c9a9533cc,xclick,/m/027pcv,1,0.348437,0.83125,0.586458,0.998958,1,1,1,0,0


## retained at most 300 samples per class 
1a) query labelName each class

1b) groupby ImageID and select 300 images

1c) collect ImageID to set()

2a) query ImageID and add to new dataframe


In [17]:
LabelName_v4_dict=all_annotations_v4_train.groupby("LabelName").size().to_dict()

In [76]:
ImageID_per300_set = set()

In [78]:
for LabelName in LabelName_v4_dict.keys():
#     print(LabelName)
    LabelName_subset = all_annotations_547.query('LabelName =="'+LabelName+'"', inplace = False) 
    ImageID_in_LabelName_subset_dic = LabelName_subset.groupby('ImageID').size().head(300).to_dict()
#     print(ImageID_in_LabelName_subset_dic)
    ImageID_per300_set = ImageID_per300_set.union(set(ImageID_in_LabelName_subset_dic.keys()))
    
    

In [74]:
ImageID_per300_set

{'0001f65de725a864',
 '000698b6a00772ac',
 '0008c55ba81d387e',
 '00164cfbc5281399',
 '0016c2695043b4c6',
 '001837ea6af349a1',
 '001a5d3e351466f4',
 '001e1cf3abbee3cd',
 '00249e489bb887ad',
 '002532414f7e1a35'}

In [80]:
len(ImageID_per300_set)

123814

In [90]:
# ImageID_per300_annotation = all_annotations_547.query('LabelName ==""', inplace = False) 
# ImageID_per300_annotation.shape

(0, 13)

In [99]:
## isin is faster than query !!!!
# df[df['A'].isin([3, 6])]
df=all_annotations_547
ImageID_per300_annotation = df[df['ImageID'].isin(list(ImageID_per300_set))]


In [95]:
list(ImageID_per300_set)

['00b8735d5bcdfa22',
 '2692389a6650d9b2',
 'e96d7e45ab6b7cc1',
 '00b02dc4719d8ed8',
 '0ffc777bbe764eeb',
 '03a7f698f977cac4',
 '01d35d6966cba3a3',
 '0e1daca97cd44baa',
 '003e3ce72a55c426',
 '31859a08d0df0d59',
 '006df640ce32c5e2',
 '1c73155a7886664d',
 '1186a1ccc9923678',
 '078677e01a860b68',
 '33a4f78672f617d5',
 '003e50d325ce60fe',
 'a4548b52eec33412',
 '03de064b5af0e374',
 '02842e8a2a7cc464',
 '13271c1eeaebbce2',
 '024f2bf9dc4a0919',
 '0d907185820b1a6b',
 '00ef71f8bbd3c94e',
 '0b370c25f8df8450',
 '036fb73db5b61c31',
 '83f60f2993300ad3',
 '003132babefacc2b',
 '16ee1ec4a888f312',
 '06ce6fbf4c89c485',
 '02004dca244ce346',
 'f67d501adca0a6c7',
 'a2c191f79596e44e',
 '3aedf8fcceb797bf',
 '06710118a60cddb6',
 '0001c8c65851276f',
 '01b9c755004d938b',
 '00be4dbc71a5400a',
 '010cc8130803ef77',
 '17d195cd54198979',
 '131a1e2221d77e85',
 '28503aaf83123819',
 'e1aaf9a7820a18fd',
 'e3bfb71bef1818af',
 '06a8544025cd5ec1',
 '0419aa5b3be2ff3a',
 '549ed02f6b89f94b',
 '52beaf5957e71a3f',
 '013ea4c0fb7

In [100]:
ImageID_per300_annotation.groupby('ImageID').size()

ImageID
000002b66c9c498e    13
000002b97e5471a0    11
000002c707c9895e     1
0000048549557964    12
000004f4400f6ec5    25
0000071d71a0a6f6     9
000013ba71c12506     4
000018acd19b4ad3     3
00001bc2c4027449     3
00001bcc92282a38     3
0000201cd362f303    13
000020780ccee28d     4
000023aa04ab09ed     6
0000253ea4ecbf19     5
000025ea48cab6fc    10
0000271195f2c007    10
0000286a5c6a3eb5     3
00002b368e91b947    23
00002f4ff380c64c    11
0000313e5dccf13b    12
000032046c3f8371     1
00003223e04e2e66     3
0000333f08ced1cd     1
000033469fb48bc1    24
0000339d0372e7e6    11
0000375a83c19042     4
000037c2dd414b46    46
00003bfccf5f36c2     4
00003d63a4839019     2
00003e2837c7b728     5
                    ..
ff5b62d4ae063036     5
ff5c4ec654ff637e     7
ff5d5aef490c1ce1     7
ff64bd069a687687     1
ff696d012754d26c     1
ff6dd2c703480249     1
ff75bcf28d75de46     6
ff76ced3840997de    13
ff76fab7620aceb6     1
ff810bcadac4c145     2
ff88c5411b72e52c     1
ff8ac28dbdbdb760     1
ff8

In [118]:
ImageID_per300_annotation.to_csv(path_or_buf='/root/ImageID_per300_annotation_v4.csv',index=False)

In [117]:
len(ImageID_per300_annotation.groupby("ImageID"))

123814

In [119]:
ImageID_per300_annotation.shape

(581550, 13)

In [116]:
ImageID_per300_annotation.groupby("ImageID").size()

ImageID
000002b66c9c498e    13
000002b97e5471a0    11
000002c707c9895e     1
0000048549557964    12
000004f4400f6ec5    25
0000071d71a0a6f6     9
000013ba71c12506     4
000018acd19b4ad3     3
00001bc2c4027449     3
00001bcc92282a38     3
0000201cd362f303    13
000020780ccee28d     4
000023aa04ab09ed     6
0000253ea4ecbf19     5
000025ea48cab6fc    10
0000271195f2c007    10
0000286a5c6a3eb5     3
00002b368e91b947    23
00002f4ff380c64c    11
0000313e5dccf13b    12
000032046c3f8371     1
00003223e04e2e66     3
0000333f08ced1cd     1
000033469fb48bc1    24
0000339d0372e7e6    11
0000375a83c19042     4
000037c2dd414b46    46
00003bfccf5f36c2     4
00003d63a4839019     2
00003e2837c7b728     5
                    ..
ff5b62d4ae063036     5
ff5c4ec654ff637e     7
ff5d5aef490c1ce1     7
ff64bd069a687687     1
ff696d012754d26c     1
ff6dd2c703480249     1
ff75bcf28d75de46     6
ff76ced3840997de    13
ff76fab7620aceb6     1
ff810bcadac4c145     2
ff88c5411b72e52c     1
ff8ac28dbdbdb760     1
ff8

In [105]:
ImageID_per300_annotation.groupby('LabelName').filter(lambda x: len(x['LabelName']) < 50)

Unnamed: 0,ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,IsTruncated,IsGroupOf,IsDepiction,IsInside
31862,001e61db0f5130d9,xclick,/m/07dd4,1,0.201875,0.775000,0.278333,0.727500,0,0,0,0,0
58531,004dec94c5de631f,activemil,/m/03qhv5,1,0.248750,0.673750,0.278333,0.723333,-1,-1,-1,-1,-1
69742,0065a51ab60e59fe,xclick,/m/07dd4,1,0.457500,0.830000,0.461679,0.693431,1,0,0,0,0
71398,0069743a230bbf8f,xclick,/m/0ct4f,1,0.317500,0.684375,0.056667,0.881667,0,0,0,0,0
78195,00799b2b084b6542,activemil,/m/07dd4,1,0.221250,0.500000,0.478424,0.755159,-1,-1,-1,-1,-1
85794,008d54057dc2b157,activemil,/m/07dd4,1,0.059375,0.306875,0.074977,0.560976,-1,-1,-1,-1,-1
89810,00980a5f319aec67,activemil,/m/03qhv5,1,0.015000,0.980625,0.000000,0.847093,-1,-1,-1,-1,-1
91257,009b230bae16361c,xclick,/m/07dd4,1,0.409375,0.601250,0.306667,0.440833,0,0,0,0,0
95703,00a6a3f60c5bfb36,xclick,/m/04z4wx,1,0.425625,0.999375,0.000000,0.750833,0,0,0,0,0
125817,00fce96bdd7a130c,xclick,/m/0j496,1,0.092188,0.206250,0.746875,0.915625,0,0,0,0,0


In [107]:
ImageID_per300_annotation.groupby('LabelName').size() <50

LabelName
/m/011k07     False
/m/012074     False
/m/0120dh     False
/m/01226z     False
/m/012n7d     False
/m/012w5l     False
/m/012xff     False
/m/012ysf     False
/m/0130jx     False
/m/0138tl     False
/m/013y1f     False
/m/01432t     False
/m/014j1m     False
/m/014sv8     False
/m/014trl     False
/m/014y4n     False
/m/0152hh     False
/m/01599      False
/m/015h_t     False
/m/015p6      False
/m/015qbp     False
/m/015qff     False
/m/015wgc     False
/m/015x4r     False
/m/015x5n     False
/m/0162_1     False
/m/0167gd     False
/m/016m2d     False
/m/0174k2     False
/m/0174n1     False
              ...  
/m/0k5j       False
/m/0k65p      False
/m/0km7z      False
/m/0kmg4      False
/m/0kpqd      False
/m/0kpt_      False
/m/0ky7b      False
/m/0l14j_     False
/m/0l3ms      False
/m/0l515      False
/m/0ll1f78    False
/m/0llzx      False
/m/0lt4_      False
/m/0m53l      False
/m/0mcx2      False
/m/0mkg       False
/m/0mw_6      False
/m/0n28_      False
/m/0nl46  

In [109]:
ImageID_per300_annotation.groupby('LabelName').size()[ImageID_per300_annotation.groupby('LabelName').size() <25]

LabelName
/m/02ddwp     21
/m/02mqfb      7
/m/04f5ws     21
/m/04lvq_     11
/m/05w9t9     10
/m/07dd4      20
/m/080n7g      4
/m/08ks85     20
/m/0ct4f       8
/m/0h8jyh6    14
/m/0h8nsvg    20
/m/0h8ntjv    14
/m/0xzly      10
dtype: int64

In [111]:
#parking meter
all_annotations_547.query('LabelName =="/m/0djtd"', inplace = False).groupby('ImageID').size()


ImageID
0017747766a4779f     2
001e4a514b1089b3    20
001e654d76be48c2     1
00216b18d4aeb3b5     1
0023fb12b6a05283     1
0024034f4d61b7d6     1
0048c9e02c09977d     1
0058138f2e406891     1
005ad6a4f29022ff     2
0072738ee0368160     1
008cd496a4412169     1
009808de9640bbf6     4
00b9bffc26b09800     2
00cad86848fd2543     1
00d0791962f713ab     2
0102e26fe682d626     2
0105d7f551e92f36     1
011e32368f5dd394     2
012817a22796b0de     2
0136adf7a926ba2b     1
0136ae6b64db798a     1
014c41c22256c708     1
016495d044ac8c4f     3
018b63a2010d0dd8     1
018cac9e5922fb9a     5
018e0d851c225849     2
018ed94728dca31a     1
01aa6ff5cc8265ba     1
01ce457ea768530e     2
01daa5dce678ca39     1
                    ..
dd6d1b94e0c2e8ac     1
ddb59b3c4029f8a4     1
de42c287a307399e     1
de7bd44ef71402a1     4
df7b71745dcb06d0     1
dfa94ed82f278f84     1
e0b90ab4f532243e     2
e3327cb27ca06221     1
e41bbedb30572f45     2
e435af0e6c54cef6     1
e4e5c97ec5b92119     1
e6d20e4621f8a58d     3
e94

## conclusion
use label 545+2, because label 600 has classes which is less than 25. The few samples leads to larger error?

In [122]:
ImageID_per300_annotation.groupby("LabelName").size().to_dict()

{'/m/011k07': 719,
 '/m/012074': 145,
 '/m/0120dh': 449,
 '/m/01226z': 414,
 '/m/012n7d': 402,
 '/m/012w5l': 387,
 '/m/012xff': 219,
 '/m/012ysf': 127,
 '/m/0130jx': 781,
 '/m/0138tl': 2230,
 '/m/013y1f': 325,
 '/m/01432t': 74,
 '/m/014j1m': 1403,
 '/m/014sv8': 2194,
 '/m/014trl': 988,
 '/m/014y4n': 1470,
 '/m/0152hh': 457,
 '/m/01599': 736,
 '/m/015h_t': 377,
 '/m/015p6': 1177,
 '/m/015qbp': 209,
 '/m/015qff': 1298,
 '/m/015wgc': 447,
 '/m/015x4r': 968,
 '/m/015x5n': 688,
 '/m/0162_1': 338,
 '/m/0167gd': 576,
 '/m/016m2d': 537,
 '/m/0174k2': 611,
 '/m/0174n1': 500,
 '/m/0175cv': 143,
 '/m/0176mf': 362,
 '/m/017ftj': 929,
 '/m/018j2': 264,
 '/m/018p4k': 459,
 '/m/018xm': 1203,
 '/m/01940j': 558,
 '/m/0199g': 1300,
 '/m/019dx1': 654,
 '/m/019h78': 280,
 '/m/019jd': 1445,
 '/m/019w40': 460,
 '/m/01_5g': 392,
 '/m/01_bhs': 2573,
 '/m/01b638': 691,
 '/m/01b7fy': 365,
 '/m/01b9xk': 407,
 '/m/01bfm9': 1149,
 '/m/01bjv': 533,
 '/m/01bl7v': 1804,
 '/m/01bms0': 85,
 '/m/01bqk0': 1939,
 '/m/01bt

In [None]:
df=all_annotations_v4_train
df[df['ImageID'].isin(['4fff89d787cb2e8e'])]