In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [3]:
from fastai.conv_learner import *
from fastai.dataset import *

from pathlib import Path
import json
from PIL import ImageDraw, ImageFont
from matplotlib import patches, patheffects

## Object detection

We want to do Object detection with bounding boxes using the Pascal VOC dataset

to download the 2007 VOC dataset:

wget http://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar

In [4]:
# we're using pathlib from python 3 to manage paths
# PATH gives us object oriented access to directories and files
PATH = Path('data/pascal')

# iterate through all the elements in the directory specified by PATH
list(PATH.iterdir())

[PosixPath('data/pascal/VOCdevkit'),
 PosixPath('data/pascal/pascal_train2007.json'),
 PosixPath('data/pascal/pascal_test2007.json'),
 PosixPath('data/pascal/pascal_val2012.json'),
 PosixPath('data/pascal/VOCtrainval_06-Nov-2007.tar'),
 PosixPath('data/pascal/pascal_val2007.json'),
 PosixPath('data/pascal/pascal_train2012.json'),
 PosixPath('data/pascal/models'),
 PosixPath('data/pascal/src'),
 PosixPath('data/pascal/tmp')]

As well as the images, there are also annotations - bounding boxes showing where each object is. These were hand labeled. The original version were in XML, which is a little hard to work with nowadays, so we uses the more recent JSON version which you can download from this link:

wget https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip

You can see here how pathlib includes the ability to open files (amongst many other capabilities

## Data Definitions

IMAGES_BASIC_INFO is a python list containing one or more IMAGE_BASIC_INFO.

Contains some basic information about all the images.

*********

IMAGE_BASIC_INFO is a python dictionary

Contains the following basic info about one particular image:

'file_name': A string, the image's name

'width': A number, the image's width

'height': A number, the image's height

'id': A number, the image's id

**********

IMAGES_ANNOTATIONS is a python list containing one or more IMAGE_ANNOTATION.

Contains info about all the bounding boxes inside the images.

*********

IMAGE_ANNOTATION is a python dictionary

Contains the following information about one particular annotation in an image:

'area': A number, the area of the annotation

'bbox': A list containing [x, y, width, height], 
        x (column) and y (row), 
        are the xy coordinates of the top left corner,
        width and height are the width and height of the annotation.
        
'category_id': A number, used to find the corresponding IMAGE_CATEGORY

'id': A number

'ignore': A number, if 0, we don't ignore, if 1, we ignore

'image_id': A number, used to find the corresponding IMAGE_BASIC_INFO

'iscrowd': A number

'segmentation': A list

********************

IMAGES_CATEGORIES is a python list containing one or more IMAGE_CATEGORY.

Contains the data describing all the different image categories that can be detected.

**************

IMAGE_CATEGORY is a python dictionary

Contains the following information about one particular image category

'id': A number

'name': A string, the name of the category

'supercategory': A string


In [None]:
# the path towards the JSON annotations
json_annotations = PATH/'pascal_train2007.json'

# the JSON annotations turned into a python dictionary
trn_j = json.load(json_annotations.open())

# displays the keys inside this dict
# there should be 'images', 'type', 'annotations', 'categories'
trn_j.keys()

In [None]:
IMAGES, ANNOTATIONS, CATEGORIES = ['images', 'annotations', 'categories']

# the 'images' key contains a IMAGES_BASIC_INFO data structure
# let's see a sample of that
IMAGES_BASIC_INFO = trn_j[IMAGES]

#IMAGES_BASIC_INFO_SAMPLE = trn_j[IMAGES][:5]

In [None]:
# the 'annotations' key contains a IMAGES_ANNOTATIONS data structure
# let's see a sample of that
IMAGES_ANNOTATIONS = trn_j[ANNOTATIONS]

#IMAGES_ANNOTATIONS_SAMPLE = trn_j[ANNOTATIONS][:2]

In [None]:
# the 'categories' key contains a IMAGES_CATEGORIES data structure
# let's see a sample of that
IMAGES_CATEGORIES = trn_j[CATEGORIES]

#IMAGES_CATEGORIES_SAMPLE = trn_j[CATEGORIES][:2]

In [None]:
FILE_NAME, ID, IMG_ID, CAT_ID, BBOX = 'file_name', 'id', 'image_id', 'category_id', 'bbox'

# let's create a category_id => category_name dictionary from the IMAGES_CATEGORIES data
cats = dict((o[ID], o['name']) for o in IMAGES_CATEGORIES)

# let's then create a image_id => image_name 
# dictionary from the IMAGES_BASIC_INFO data structure
trn_fns = dict((o[ID], o[FILE_NAME]) for o in IMAGES_BASIC_INFO)
trn_j[IMAGES]
# let's also make a list of all the images id's
trn_ids = [o[ID] for o in IMAGES_BASIC_INFO]

# Okay so now let's look at the images

In [None]:
# the path leading to our beloved images
JPEGS = 'VOCdevkit/VOC2007/JPEGImages'
IMG_PATH = PATH/JPEGS

In [1]:
"""
INPUTS:

-

OUTPUT:

- A python Dictionary

Creates a dictionary where 
each key is an image id, 
and 
each key's value is a list of annotations for that image.
each annotation is a
([ymin, xmin, ymax, xmax], category_id) tuple.
"""
def create_key_annotation_dict():
    # Usually, a Python dictionary throws a KeyError 
    # if you try to get an item with a key that is not currently 
    # in the dictionary. The defaultdict in contrast will simply create 
    # any items that you try to access (provided of course they do not exist yet). 
    # To create such a "default" item, it calls the function object that you pass in the constructor.
    # inour case, the default item's value is an empty list
    trn_anno = collections.defaultdict(lambda:[])
    
    # iterate through the IMAGES_ANNOTATIONS data structure...
    for o in IMAGES_ANNOTATIONS:
        # should we include that annotation ?
        it_should_be_included = not o['ignore']
        
        # if so, then...
        if it_should_be_included:
            # append the bounding box and the id 
            bounding_box = o[BBOX]
            
            # we convert VOC's (x, y, height, width) format to (xmin, ymin, xmax, ymax)
            # we also swap the order from (xmin, ymin, xmax, ymax) to (ymin, xmin, ymax, xmax)
            # to be more in tune with numpy's orderings
            x = bounding_box[0]
            y = bounding_box[1]
            width = bounding_box[2]
            height = bounding_box[3]
            
            xmin = x
            ymin = y
            xmax = width + x - 1
            ymax = height + y - 1
            
            rearranged_bounding_box = np.array([ymin, xmin, ymax, xmax])
            
            # the id of the image where the annotation is  and category id
            img_id = o[IMG_ID]
            cat_id = o[CAT_ID]
            
            # the full annotation data
            annotation_data = (rearranged_bounding_box, cat_id)
            
            # add that annotation to the basket of annotations for the particular
            # image
            trn_anno[IMG_ID].append(annotation_data)
    
    return trn_anno

trn_anno = create_key_annotation_dict()

NameError: name 'collections' is not defined

In [None]:
len(trn_anno)