In [1]:
import os
import json
import cv2
import numpy as np
import pandas as pd

In [2]:
root_path = '../VOC2012_coco/annotations'

# your annotations.json of data must in the 'food_100/annotations/' path
annot_path = os.path.join(root_path, 'annotations.json')

for path in [annot_path]:
    with open(path, 'r') as file:
        json_dict = json.load(open(path, 'r'))
    images = json_dict['images']
    annots = json_dict['annotations']
    print('images', len(images))
    print('annotations', len(annots))

images 5717
annotations 15774


In [3]:
json_dict['annotations']

[{'segmentation': None,
  'area': None,
  'iscrowd': 0,
  'image_id': 2008000008,
  'bbox': [53, 87, 418, 333],
  'category_id': 4,
  'id': 0,
  'ignore': 0},
 {'segmentation': None,
  'area': None,
  'iscrowd': 0,
  'image_id': 2008000008,
  'bbox': [158, 44, 131, 123],
  'category_id': 10,
  'id': 1,
  'ignore': 0},
 {'segmentation': None,
  'area': None,
  'iscrowd': 0,
  'image_id': 2008000015,
  'bbox': [270, 1, 108, 175],
  'category_id': 16,
  'id': 2,
  'ignore': 0},
 {'segmentation': None,
  'area': None,
  'iscrowd': 0,
  'image_id': 2008000015,
  'bbox': [57, 1, 107, 149],
  'category_id': 16,
  'id': 3,
  'ignore': 0},
 {'segmentation': None,
  'area': None,
  'iscrowd': 0,
  'image_id': 2008000019,
  'bbox': [139, 2, 233, 195],
  'category_id': 2,
  'id': 4,
  'ignore': 0},
 {'segmentation': None,
  'area': None,
  'iscrowd': 0,
  'image_id': 2008000019,
  'bbox': [165, 66, 153, 170],
  'category_id': 2,
  'id': 5,
  'ignore': 0},
 {'segmentation': None,
  'area': None,
  

In [4]:
for annot in json_dict['annotations']:
    annot['iscrowd'] = 0
    annot['ignore'] = 0
    annot['area'] = annot['bbox'][2] * annot['bbox'][3] 

In [5]:
data_index = sorted([meta['id'] for meta in json_dict['images']])
size = len(data_index)
train_size = int(size * 0.7)
test_size = int(size * 0.15)
val_size = int(size * 0.15)

np.random.seed(0)

train_index = np.random.choice(data_index, train_size, replace=False)

test_index = np.setdiff1d(data_index, train_index)

val_index = np.random.choice(test_index, val_size, replace=False)

test_index = np.setdiff1d(test_index, val_index)

In [6]:
print('total number images')
print('data: ', len(data_index))
print('train', len(train_index))
print('test', len(test_index))
print('val', len(val_index))

total number images
data:  5717
train 4001
test 859
val 857


In [7]:
train = {"info": json_dict['info'],
             "licenses": json_dict['licenses'],
             "images": [meta for meta in json_dict['images'] if meta['id'] in train_index],
             "annotations": [meta for meta in json_dict['annotations'] if meta['image_id'] in train_index],
             "categories":json_dict['categories']}
test = {"info": json_dict['info'],
             "licenses": json_dict['licenses'],
             "images": [meta for meta in json_dict['images'] if meta['id'] in test_index],
             "annotations": [meta for meta in json_dict['annotations'] if meta['image_id'] in test_index],
             "categories":json_dict['categories']}
val = {"info": json_dict['info'],
             "licenses": json_dict['licenses'],
             "images": [meta for meta in json_dict['images'] if meta['id'] in val_index],
             "annotations": [meta for meta in json_dict['annotations'] if meta['image_id'] in val_index],
             "categories":json_dict['categories']}

In [8]:
class_count = pd.DataFrame()
class_count['train_count'] = pd.DataFrame([instance['category_id'] for instance in train['annotations']]).value_counts().sort_index()
class_count['val_count'] = pd.DataFrame([instance['category_id'] for instance in val['annotations']]).value_counts().sort_index()
class_count['test_count'] = pd.DataFrame([instance['category_id'] for instance in test['annotations']]).value_counts().sort_index()
class_count = class_count.reset_index(drop=True)
class_count.index = [i for i in range(1,21)]
class_count

Unnamed: 0,train_count,val_count,test_count
1,361,107,124
2,872,155,164
3,520,112,136
4,324,74,72
5,272,62,43
6,438,95,76
7,1057,202,198
8,298,110,101
9,364,83,61
10,266,69,64


In [9]:
print('total number annotations')
print('data', len(json_dict['annotations']))
print('train', len(train['annotations']))
print('test', len(test['annotations']))
print('val', len(val['annotations']))

total number annotations
data 15774
train 11198
test 2294
val 2282


In [10]:
# for name, meta in zip(['train_voc.json', 'test_voc.json', 'val_voc.json'], 
#                       [train, test, val]):
#     with open(os.path.join(root_path, name), 'w') as outfile:
#          json.dump(meta, outfile)

In [11]:
# your annotations.json of data must in the 'food_100/annotations/' path
annot_path = os.path.join(root_path, 'val_voc.json')

for path in [annot_path]:
    with open(path, 'r') as file:
        json_dict = json.load(open(path, 'r'))
    images = json_dict['images']
    annots = json_dict['annotations']
    print('images', len(images))
    print('annotations', len(annots))

images 857
annotations 2282


In [12]:
class_name = ['__background__']
for dct in json_dict['categories']:
    class_name.append('%d %s'%(dct['id'], dct['name']))

In [13]:
class_name

['__background__',
 '0 bird',
 '1 car',
 '2 dog',
 '3 aeroplane',
 '4 horse',
 '5 cat',
 '6 chair',
 '7 sheep',
 '8 boat',
 '9 sofa',
 '10 person',
 '11 bus',
 '12 train',
 '13 bicycle',
 '14 tvmonitor',
 '15 motorbike',
 '16 bottle',
 '17 cow',
 '18 diningtable',
 '19 pottedplant']