In [1]:
import pandas as pd, numpy as np
import json
from pathlib import Path

In [2]:
DATA_ROOT = Path("./data")
ANNOTATION_ROOT = DATA_ROOT/'annotations.json'

In [3]:
with open(ANNOTATION_ROOT, 'r') as f:
    dataset = json.loads(f.read())

In [4]:
categories = dataset['categories']
anns = dataset['annotations']
imgs = dataset['images']

In [5]:
categories[10]

{'supercategory': 'Can', 'id': 10, 'name': 'Food Can'}

In [6]:
imgs[0]

{'id': 0,
 'width': 1537,
 'height': 2049,
 'file_name': 'batch_1/000006.jpg',
 'license': None,
 'flickr_url': 'https://farm66.staticflickr.com/65535/33978196618_e30a59e0a8_o.png',
 'coco_url': None,
 'date_captured': None,
 'flickr_640_url': 'https://farm66.staticflickr.com/65535/33978196618_632623b4fc_z.jpg'}

In [7]:
anns[0]

{'id': 1,
 'image_id': 0,
 'category_id': 6,
 'segmentation': [[561.0,
   1238.0,
   568.0,
   1201.0,
   567.0,
   1175.0,
   549.0,
   1127.0,
   538.0,
   1089.0,
   519.0,
   1043.0,
   517.0,
   1005.0,
   523.0,
   964.0,
   529.0,
   945.0,
   520.0,
   896.0,
   525.0,
   862.0,
   536.0,
   821.0,
   554.0,
   769.0,
   577.0,
   727.0,
   595.0,
   678.0,
   596.0,
   585.0,
   588.0,
   346.0,
   581.0,
   328.0,
   569.0,
   306.0,
   570.0,
   276.0,
   576.0,
   224.0,
   560.0,
   205.0,
   564.0,
   170.0,
   578.0,
   154.0,
   608.0,
   136.0,
   649.0,
   127.0,
   688.0,
   127.0,
   726.0,
   129.0,
   759.0,
   141.0,
   784.0,
   153.0,
   792.0,
   177.0,
   788.0,
   193.0,
   782.0,
   209.0,
   792.0,
   238.0,
   802.0,
   271.0,
   802.0,
   294.0,
   791.0,
   319.0,
   789.0,
   360.0,
   794.0,
   395.0,
   810.0,
   529.0,
   819.0,
   609.0,
   841.0,
   675.0,
   882.0,
   728.0,
   916.0,
   781.0,
   928.0,
   802.0,
   938.0,
   834.0,
   940.0,
  

In [8]:
df_anns = []
for ann in anns:
    ann = ann.copy()
    ann.pop('segmentation')
    ann.pop("iscrowd")
    bbox = ann.pop("bbox")
    ann["x"] = bbox[0]
    ann["y"] = bbox[1]
    ann["width"] = bbox[2]
    ann["height"] = bbox[3]
    ann["ann_id"] = ann.pop("id")
    ann["img_id"] = ann.pop("image_id")
    ann["cat_id"] = ann.pop("category_id")
    df_anns.append(ann)
df_anns = pd.DataFrame(df_anns)
df_anns.shape

(4784, 8)

In [9]:
df_anns.head()

Unnamed: 0,area,x,y,width,height,ann_id,img_id,cat_id
0,403954.0,517.0,127.0,447.0,1322.0,1,0,6
1,1071259.5,1.0,457.0,1429.0,1519.0,2,1,18
2,99583.5,531.0,292.0,1006.0,672.0,3,1,14
3,73832.5,632.0,987.0,500.0,374.0,4,2,5
4,915.0,632.0,989.0,44.0,51.0,5,2,7


In [10]:
imgs[0]

{'id': 0,
 'width': 1537,
 'height': 2049,
 'file_name': 'batch_1/000006.jpg',
 'license': None,
 'flickr_url': 'https://farm66.staticflickr.com/65535/33978196618_e30a59e0a8_o.png',
 'coco_url': None,
 'date_captured': None,
 'flickr_640_url': 'https://farm66.staticflickr.com/65535/33978196618_632623b4fc_z.jpg'}

In [11]:
df_imgs = pd.DataFrame.from_records(imgs, columns=  ["id", "width","height", "file_name"])
df_imgs.rename(columns = {
    "id": "img_id",
    "width": "img_width",
    "height": "img_height",
    "file_name": "img_file",
}, inplace = True)
df_imgs.shape

(1500, 4)

In [12]:
df_imgs.head()

Unnamed: 0,img_id,img_width,img_height,img_file
0,0,1537,2049,batch_1/000006.jpg
1,1,1537,2049,batch_1/000008.jpg
2,2,1537,2049,batch_1/000010.jpg
3,3,2049,1537,batch_1/000019.jpg
4,4,1537,2049,batch_1/000026.jpg


In [13]:
categories[0]

{'supercategory': 'Aluminium foil', 'id': 0, 'name': 'Aluminium foil'}

In [14]:
df_cats = pd.DataFrame(categories)
df_cats.rename(
    columns={
        "id": "cat_id",
        "name": "cat_name",
    }, inplace=True
)
df_cats.shape

(60, 3)

In [15]:
df_cats.head()

Unnamed: 0,supercategory,cat_id,cat_name
0,Aluminium foil,0,Aluminium foil
1,Battery,1,Battery
2,Blister pack,2,Aluminium blister pack
3,Blister pack,3,Carded blister pack
4,Bottle,4,Other plastic bottle


In [16]:
df_anns.head()

Unnamed: 0,area,x,y,width,height,ann_id,img_id,cat_id
0,403954.0,517.0,127.0,447.0,1322.0,1,0,6
1,1071259.5,1.0,457.0,1429.0,1519.0,2,1,18
2,99583.5,531.0,292.0,1006.0,672.0,3,1,14
3,73832.5,632.0,987.0,500.0,374.0,4,2,5
4,915.0,632.0,989.0,44.0,51.0,5,2,7


In [17]:
df = df_anns.merge(df_imgs, on="img_id" )
df = df.merge(df_cats, on="cat_id")
df = df[['img_id','img_width', 'img_height', 'img_file','cat_id','cat_name', 
                     'supercategory', 'ann_id', 'x', 'y', 'width','height', 'area']]
df.sort_values(["img_id", "ann_id"], inplace=True)
df.reset_index(drop=True, inplace=True)
df.shape

(4784, 13)

In [18]:
df.head()

Unnamed: 0,img_id,img_width,img_height,img_file,cat_id,cat_name,supercategory,ann_id,x,y,width,height,area
0,0,1537,2049,batch_1/000006.jpg,6,Glass bottle,Bottle,1,517.0,127.0,447.0,1322.0,403954.0
1,1,1537,2049,batch_1/000008.jpg,18,Meal carton,Carton,2,1.0,457.0,1429.0,1519.0,1071259.5
2,1,1537,2049,batch_1/000008.jpg,14,Other carton,Carton,3,531.0,292.0,1006.0,672.0,99583.5
3,2,1537,2049,batch_1/000010.jpg,5,Clear plastic bottle,Bottle,4,632.0,987.0,500.0,374.0,73832.5
4,2,1537,2049,batch_1/000010.jpg,7,Plastic bottle cap,Bottle cap,5,632.0,989.0,44.0,51.0,915.0


In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4784 entries, 0 to 4783
Data columns (total 13 columns):
img_id           4784 non-null int64
img_width        4784 non-null int64
img_height       4784 non-null int64
img_file         4784 non-null object
cat_id           4784 non-null int64
cat_name         4784 non-null object
supercategory    4784 non-null object
ann_id           4784 non-null int64
x                4784 non-null float64
y                4784 non-null float64
width            4784 non-null float64
height           4784 non-null float64
area             4784 non-null float64
dtypes: float64(5), int64(5), object(3)
memory usage: 486.0+ KB


In [20]:
df.to_csv("meta_df.csv", index=False)

In [25]:
df.cat_id.drop_duplicates().sort_values().values

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
       52, 53, 54, 55, 56, 57, 58, 59], dtype=int64)