# Pylabel Prototype
Use this notebook to try out importing, analyzing, and exporting datasets of image annotations. 

In [1]:
#from io import StringIO
import json
import pandas as pd

#These are the valid columns in the pylabel annotations table.              
schema = ['id','img_folder','img_filename','img_path','img_id','img_width','img_height','img_depth','ann_segmented','ann_bbox_xmin','ann_bbox_ymin','ann_bbox_xmax','ann_bbox_ymax','ann_bbox_width','ann_bbox_height','ann_area','ann_segmentation','ann_iscrowd','ann_pose','ann_truncated','ann_difficult','cat_id','cat_name','cat_supercategory','split']


## Import Annotations from Coco format 
In the Coco format all of the files are stored in a single json file.

In [2]:
#Download the sample coco file so it can be read and imported
#!wget https://raw.githubusercontent.com/pylabelalpha/notebook/main/test.json
!wget https://raw.githubusercontent.com/pylabelalpha/notebook/main/coco_instances_val2017.json

coco_annnotations = "coco_instances_val2017.json"
#coco_annnotations = "test.json"

--2021-09-20 01:43:33--  https://raw.githubusercontent.com/pylabelalpha/notebook/main/coco_instances_val2017.json
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 19987840 (19M) [text/plain]
Saving to: ‘coco_instances_val2017.json’


2021-09-20 01:43:33 (153 MB/s) - ‘coco_instances_val2017.json’ saved [19987840/19987840]



In [19]:
def ImportCoco(path):
    with open(path) as cocojson:
        annotations_json = json.load(cocojson)

    #Store the 3 sections of the json as seperate json arrays
    images = pd.json_normalize(annotations_json["images"])
    images.columns = 'img_' + images.columns

    annotations = pd.json_normalize(annotations_json["annotations"])
    annotations.columns = 'ann_' + annotations.columns

    categories = pd.json_normalize(annotations_json["categories"])
    categories.columns = 'cat_' + categories.columns

    df = annotations
    df[['ann_bbox_xmin','ann_bbox_ymax','ann_bbox_width','ann_bbox_height']] = pd.DataFrame(df.ann_bbox.tolist(), index= df.index)
    df.insert(8, 'ann_bbox_xmax', df['ann_bbox_xmin'] + df['ann_bbox_width'] )
    df.insert(10, 'ann_bbox_ymin', df['ann_bbox_ymax'] - df['ann_bbox_height'] )
    
    #debug print(df.info())

    #Join the annotions with the information about the image to add the image columns to the dataframe
    df = pd.merge(images, df, left_on='img_id', right_on='ann_image_id', how='left')
    df = pd.merge(df, categories, left_on='ann_category_id', right_on='cat_id', how='left')
    
    #Rename columns if needed from the coco column name to the pylabel column name 
    df.rename(columns={"img_file_name": "img_filename"}, inplace=True)

    #Drop columns that are not in the schema
    df = df[df.columns.intersection(schema)]

    #Add missing columns that are in the schema but not part of the table
    df[list(set(schema) - set(df.columns))] = ""

    #Reorder columns
    df = df[schema]

    return df

df = ImportCoco(coco_annnotations)

#This dataframe has all of the annotations
df

Unnamed: 0,id,img_folder,img_filename,img_path,img_id,img_width,img_height,img_depth,ann_segmented,ann_bbox_xmin,ann_bbox_ymin,ann_bbox_xmax,ann_bbox_ymax,ann_bbox_width,ann_bbox_height,ann_area,ann_segmentation,ann_iscrowd,ann_pose,ann_truncated,ann_difficult,cat_id,cat_name,cat_supercategory,split
0,,,000000397133.jpg,,397133,640,427,,,217.62,182.79,256.61,240.54,38.99,57.75,1481.38065,"[[224.24, 297.18, 228.29, 297.18, 234.91, 298....",0.0,,,,44.0,bottle,kitchen,
1,,,000000397133.jpg,,397133,640,427,,,1.00,53.48,347.63,240.24,346.63,186.76,54085.62170,"[[292.37, 425.1, 340.6, 373.86, 347.63, 256.31...",0.0,,,,67.0,dining table,furniture,
2,,,000000397133.jpg,,397133,640,427,,,388.66,-207.70,498.07,69.92,109.41,277.62,17376.91885,"[[446.71, 70.66, 466.07, 72.89, 471.28, 78.85,...",0.0,,,,1.0,person,person,
3,,,000000397133.jpg,,397133,640,427,,,135.57,220.64,157.89,249.43,22.32,28.79,123.19340,"[[136.18, 253.44, 153.89, 277.3, 157.89, 278.2...",0.0,,,,49.0,knife,kitchen,
4,,,000000397133.jpg,,397133,640,427,,,31.28,303.17,99.40,344.00,68.12,40.83,2136.46615,"[[37.61, 381.77, 31.28, 360.25, 40.15, 352.65,...",0.0,,,,51.0,bowl,kitchen,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36824,,,000000015335.jpg,,15335,640,480,,,160.39,-18.01,238.38,66.68,77.99,84.69,5697.13675,"[[168.32, 151.37, 165.88, 141.01, 161.61, 133....",0.0,,,,1.0,person,person,
36825,,,000000015335.jpg,,15335,640,480,,,599.96,365.80,640.00,422.90,40.04,57.10,2162.51280,"[[640.0, 425.34, 634.94, 424.12, 623.96, 422.9...",0.0,,,,47.0,cup,kitchen,
36826,,,000000015335.jpg,,15335,640,480,,,2.47,289.79,47.35,306.54,44.88,16.75,380.79115,"[[26.58, 318.6, 47.35, 323.29, 46.01, 315.92, ...",0.0,,,,77.0,cell phone,electronic,
36827,,,000000015335.jpg,,15335,640,480,,,362.44,74.74,386.67,102.44,24.23,27.70,453.39980,"[[362.44, 128.22, 364.36, 119.75, 366.28, 115....",0.0,,,,1.0,person,person,


In [12]:
df["cat_name"].value_counts()

person        11004
car            1932
chair          1791
book           1161
bottle         1025
              ...  
toothbrush       57
microwave        55
scissors         36
hair drier       11
toaster           9
Name: cat_name, Length: 80, dtype: int64

In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 36829 entries, 0 to 36828
Data columns (total 25 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   id                 36829 non-null  object 
 1   img_folder         36829 non-null  object 
 2   img_filename       36829 non-null  object 
 3   img_path           36829 non-null  object 
 4   img_id             36829 non-null  int64  
 5   img_width          36829 non-null  int64  
 6   img_height         36829 non-null  int64  
 7   img_depth          36829 non-null  object 
 8   ann_segmented      36829 non-null  object 
 9   ann_bbox_xmin      36781 non-null  float64
 10  ann_bbox_ymin      36781 non-null  float64
 11  ann_bbox_xmax      36781 non-null  float64
 12  ann_bbox_ymax      36781 non-null  float64
 13  ann_bbox_width     36781 non-null  float64
 14  ann_bbox_height    36781 non-null  float64
 15  ann_area           36781 non-null  float64
 16  ann_segmentation   363

In [23]:
#df[df['cat_id'].isna()]


person        11004
vehicle        4082
kitchen        3687
furniture      3434
food           2835
animal         2700
indoor         2000
sports         1991
accessory      1881
electronic     1323
outdoor        1286
appliance       558
Name: cat_supercategory, dtype: int64