In [1]:
import os
import numpy as np
import skimage
import skimage.io
import skimage.transform

import matplotlib.pyplot as plt
%matplotlib inline
import xml.etree.ElementTree as ET

import random

In [None]:
image_size = 224
pixel_means = np.array([[[102.9801, 115.9465, 122.7717]]])

In [7]:
# PASCAL VOC Data import
# DO NOT EDIT HERE! Modify in its own file
home_dir = os.path.expanduser('~') + "/"
datasets_dir = home_dir + "projects/datasets/"
voc_2012_dir = datasets_dir + "VOC2012/"

classes_to_index = {"aeroplane":1, "bicycle":2, "boat":3, "bottle":4, "bus":5, "car":6, "cat":7,
"chair":8, "cow":9, "diningtable":10, "dog":11, "horse":12, "motorbike":13, "person":14,
"pottedplant":15, "sheep":16, "train":17, "tvmonitor":18, "sofa":19, "bird":20}

indexes_to_classes = [0]*21
for key, value in classes_to_index.items():
    indexes_to_classes[value] = key

class bounding_box:
    def __init__(self, class_name, xmin, xmax, ymin, ymax):
        self.class_name = class_name
        #self.bbox_coords = bbox_coords #xmax, xmin, ymax, ymin
        self.xmin = xmin
        self.xmax = xmax
        self.ymin = ymin
        self.ymax = ymax

# Get list of training images
train_filenames = []
with open(voc_2012_dir + "ImageSets/Main/train.txt", "r") as file:
    for line in file:
        train_filenames.append(line.strip())
        
# Get list of validation images
val_filenames = []
with open(voc_2012_dir + "ImageSets/Main/val.txt", "r") as file:
    for line in file:
        val_filenames.append(line.strip())
        
# Read an annotation file, return a list of bounding boxes
def get_bounding_boxes(filename):
    bounding_boxes = []
    xmldoc = ET.parse(voc_2012_dir + "Annotations/{}.xml".format(filename))
    objects = xmldoc.findall('object')
    for thing in objects:
        name = thing.find('name').text
        box = thing.find('bndbox')
        xmax = int(box.find('xmax').text)
        xmin = int(box.find('xmin').text)
        ymax = int(box.find('ymax').text)
        ymin = int(box.find('ymin').text)
        bounding_boxes.append(bounding_box(name, xmin, xmax, ymin, ymax))
    return bounding_boxes

def get_rois(filename):
    boxes = get_bounding_boxes(filename)
    rois = []
    classes = []
    for box in boxes:
        rois.append((box.xmin, box.ymin, box.ymax - box.ymin, box.xmax - box.xmin))
        classes.append(classes_to_index[box.class_name])
    return rois, classes

memoized_images = {}
num_to_memoize = 5000

# Returns a [height, width, depth] image in RGB pixel order
def read_jpeg_memoized(filename):
    try:
        image = memoized_images[filename]
    except:
        image = read_jpeg(filename)
        if len(memoized_images) < num_to_memoize:
            memoized_images[filename] = image
    return image

# Returns a [height, width, depth] image in RGB pixel order
def read_jpeg(filename):
    img = skimage.io.imread(voc_2012_dir + "JPEGImages/{}.jpg".format(filename))
    # TODO maybe use the resize function below?
    img = skimage.transform.resize(img, (image_size, image_size))
    img_swapped = img[...,[2,1,0]]
    img_swapped = (img_swapped * 255) - pixel_means
    #image = cv2.imread(voc_2012_dir + "JPEGImages/{}.jpg".format(filename))
    return img_swapped

# Plots an image from either filename or numpy array. Optionally draws bounding boxes with class name labels
def show_image(image, bounding_boxes = []):
    if isinstance(image, str):
        image = read_jpeg(image)
    fig = plt.figure()
    ax = fig.add_subplot(111, aspect='equal')
    ax.imshow(image)
    for box in bounding_boxes:
        ax.add_patch(
            patches.Rectangle(
                (box.xmin,box.ymin),
                box.xmax - box.xmin,
                box.ymax - box.ymin,
                fill=False, edgecolor="red", linewidth=3    
            ))
        ax.text(box.xmin + 10,box.ymin + 30, box.class_name, fontsize=16, color = "red")
    
# Check if a bounding box has a size and shape similar to the desired dimension
def check_bounding_box(bounding_box, desired_dimension):
    x = bounding_box.xmin
    y = bounding_box.ymin
    width = bounding_box.xmax - bounding_box.xmin
    height = bounding_box.ymax - bounding_box.ymin
    shorter_dimension = min(width, height)
    if (desired_dimension - shorter_dimension) / desired_dimension < .5:
        if abs(width - height) / width < .5:
            return True
    return False
            
# Gets the des_dim x des_dimension scaled version of the top leftmost square of the given bounding box region of the image
def crop_resize_image(image, bounding_box, desired_dimension):
    x = bounding_box.xmin
    y = bounding_box.ymin
    width = bounding_box.xmax - bounding_box.xmin
    height = bounding_box.ymax - bounding_box.ymin
    shorter_dimension = min(width, height)
    #if shorter_dimension > desired_dimension:
    cropped_image = image[y:y+shorter_dimension, x:x+shorter_dimension]
    resized_image = cv2.resize(cropped_image, (desired_dimension, desired_dimension))
    return resized_image


In [6]:
#current_index = 0
def get_train_sample():
    while(True):
        random.shuffle(train_filenames)
        for filename in train_filenames:
            read_image = read_jpeg_memoized(filename)
            rois = get_rois(filename)
            yield (read_image, rois)
            
#current_index = 0
def get_val_sample():
    while(True):
        random.shuffle(val_filenames)
        for filename in val_filenames:
            read_image = read_jpeg_memoized(filename)
            rois = get_rois(filename)
            yield (read_image, rois)

In [10]:
len(train_filenames)

5717

In [9]:
len(val_filenames)

5823