In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [2]:
from fastai.conv_learner import *
from fastai.dataset import *

from pathlib import Path
import json
from PIL import ImageDraw, ImageFont
from matplotlib import patches, patheffects

In [3]:
p_name = "Object_recognition_basics"

# Data setup

In [4]:
PATH = Path('data/pascal')
assert str(PATH) == 'data/pascal'
list(PATH.iterdir())

[PosixPath('data/pascal/VOCdevkit'),
 PosixPath('data/pascal/pascal_train2007.json'),
 PosixPath('data/pascal/pascal_test2007.json'),
 PosixPath('data/pascal/pascal_val2012.json'),
 PosixPath('data/pascal/pascal_val2007.json'),
 PosixPath('data/pascal/pascal_train2012.json'),
 PosixPath('data/pascal/models'),
 PosixPath('data/pascal/tmp')]

### Annotations for the bound boxes

In [5]:
trn_json = json.load((PATH / 'pascal_train2007.json').open())
assert trn_json.keys() is not []
trn_json.keys()

dict_keys(['images', 'type', 'annotations', 'categories'])

###### Better to use CONSTANTS as provide better auto-complete and we don't mistype them

In [6]:
IMAGES, ANNOTATIONS, CATEGORIES = ['images', 'annotations', 'categories']

In [7]:
trn_json[IMAGES][:2]

[{'file_name': '000012.jpg', 'height': 333, 'id': 12, 'width': 500},
 {'file_name': '000017.jpg', 'height': 364, 'id': 17, 'width': 480}]

In [8]:
trn_json[ANNOTATIONS][:1]

[{'area': 34104,
  'bbox': [155, 96, 196, 174],
  'category_id': 7,
  'id': 1,
  'ignore': 0,
  'image_id': 12,
  'iscrowd': 0,
  'segmentation': [[155, 96, 155, 270, 351, 270, 351, 96]]}]

In [9]:
trn_json[CATEGORIES][:2]

[{'id': 1, 'name': 'aeroplane', 'supercategory': 'none'},
 {'id': 2, 'name': 'bicycle', 'supercategory': 'none'}]

### Let's create some dict of key data

In [10]:
FILE_NAME, ID, IMG_ID, CAT_ID, BBOX = 'file_name','id','image_id','category_id','bbox'

In [15]:
cats = {cat[ID] : cat['name'] for cat in trn_json[CATEGORIES]}

In [16]:
cats

{1: 'aeroplane',
 2: 'bicycle',
 3: 'bird',
 4: 'boat',
 5: 'bottle',
 6: 'bus',
 7: 'car',
 8: 'cat',
 9: 'chair',
 10: 'cow',
 11: 'diningtable',
 12: 'dog',
 13: 'horse',
 14: 'motorbike',
 15: 'person',
 16: 'pottedplant',
 17: 'sheep',
 18: 'sofa',
 19: 'train',
 20: 'tvmonitor'}

In [18]:
trn_fnames = {img[ID] : img[FILE_NAME] for img in trn_json[IMAGES]}

In [27]:
trn_ids = [img[ID] for img in trn_json[IMAGES]]

In [28]:
trn_ids[:2]

[12, 17]

In [30]:
' '.join(trn_fnames[key] for key in trn_ids[:5])

'000012.jpg 000017.jpg 000023.jpg 000026.jpg 000032.jpg'

In [49]:
JPEGS_PATH = 'VOCdevkit/VOC2007/JPEGImages'

In [50]:
IMG_PATH = PATH / JPEGS_PATH
list(IMG_PATH.iterdir())[:5]

[PosixPath('data/pascal/VOCdevkit/VOC2007/JPEGImages/006948.jpg'),
 PosixPath('data/pascal/VOCdevkit/VOC2007/JPEGImages/005796.jpg'),
 PosixPath('data/pascal/VOCdevkit/VOC2007/JPEGImages/007006.jpg'),
 PosixPath('data/pascal/VOCdevkit/VOC2007/JPEGImages/004693.jpg'),
 PosixPath('data/pascal/VOCdevkit/VOC2007/JPEGImages/002279.jpg')]

In [31]:
img0_d = trn_json[IMAGES][0]
img0_d[FILE_NAME], img0_d[ID]

('000012.jpg', 12)

A `defaultdict` is useful any time you want to have a default dictionary entry for new keys. Here we create a dict from image IDs to a list of annotations (tuple of bounding box and class id).

#### We convert VOC's height/width into top-left/bottom-right, and switch x/y coords to be consistent with numpy.

In [32]:
trn_json[ANNOTATIONS][0]

{'area': 34104,
 'bbox': [155, 96, 196, 174],
 'category_id': 7,
 'id': 1,
 'ignore': 0,
 'image_id': 12,
 'iscrowd': 0,
 'segmentation': [[155, 96, 155, 270, 351, 270, 351, 96]]}

In [36]:
trn_anno = collections.defaultdict(lambda: []) # empty dict
for a in trn_json[ANNOTATIONS]:
    if not a['ignore']:
        bb = a[BBOX]
        bb = np.array([bb[1], bb[0], bb[3] + bb[1] - 1, bb[2] + bb[0] - 1])
        trn_anno[a[IMG_ID]].append( (bb, a[CAT_ID]) )

In [39]:
trn_anno[img0_d[ID]]

[(array([ 96, 155, 269, 350]), 7)]

In [40]:
cats[17]

'sheep'

In [42]:
trn_anno[17]

[(array([ 61, 184, 198, 278]), 15), (array([ 77,  89, 335, 402]), 13)]

In [44]:
def bb_2_hw(bb_hw):
    """
    Some libs take VOC format bounding boxes, so this let's us convert back when required:
    """
    return np.array([bb[1], bb[0], bb[3] - bb[1], bb[2] - bb[0]])
    

In [51]:
im = open_image(IMG_PATH / img0_d[FILE_NAME])

In [57]:
def show_img(img, figsize=None, ax=None):
    if not ax:
        fig, ax = plt.subplots(figsize=figsize)
        
    ax.imshow(img)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    
    return ax

In [62]:
def draw_outline(img, line_w):
    img.set_path_effects( [patheffects.Stroke(linewidth=line_w, 
                                              foreground='black'), patheffects.Normal()] )

SyntaxError: unexpected EOF while parsing (<ipython-input-62-f656187df674>, line 1)

In [None]:
show_img(im)