# 05_generate_npz

In [9]:
import os

import numpy as np
from PIL import Image

In [10]:
def process_data(img_size, boxes):
    """ Box preprocessing: based on two diagonal coordinates convert box info to boxcenter_x, boxcenter_y, w, h
    and find the maximum number of boxes then do padding for all the boxes based on the maximum #boxes
    :param boxes: array with pure box diagonal coordinates info from train_uec100.txt
    :return:
    """
    # Original boxes stored as 1D list of class, x_min, y_min, x_max, y_max.
    boxes = [box.reshape((-1, 5)) for box in boxes]

    # Get box parameters as x_center, y_center, box_width, box_height, class.
    boxes_xy = [0.5 * (box[:, 3:5] + box[:, 1:3]) for box in boxes]
    boxes_wh = [box[:, 3:5] - box[:, 1:3] for box in boxes]
    boxes_xy = [boxxy / img_size for boxxy in boxes_xy]
    boxes_wh = [boxwh / img_size for boxwh in boxes_wh]
    boxes = [np.concatenate((boxes_xy[i], boxes_wh[i], box[:, 0:1]), axis=1) for i, box in enumerate(boxes)]

    # find the max number of boxes
    max_boxes = 0
    for box in boxes:
        if box.shape[0] > max_boxes:
            max_boxes = box.shape[0]

    # add zero pad for training
    for i, box in enumerate(boxes):
        if box.shape[0] < max_boxes:
            zero_padding = np.zeros((max_boxes - box.shape[0], 5), dtype=np.float32)
            boxes[i] = np.vstack((box, zero_padding))

    return np.array(boxes)

In [11]:
def create_label_dict(class_path):
    print('\n-> creating dictinary for labels...\n')
    label_dict = {}
    with open(class_path) as f:
        class_names = f.readlines()
    for i in range(0, len(class_names)):
        label_dict[class_names[i][:-1]] = i
    return label_dict

In [26]:
def txt2data(txt_path):
    """ Read train_uec100.txt file and convert to image_data array
    :return: image_data array with ['Volumes/JS/UECFOOD100_JS/1/1.jpg', [0,0,143,370,486]] kind of entries
    """
    print('\n-> converting txt info to data...\n')

    # Read train_uec100.txt file and save to a dict with directory as the key, bbox as value
    with open(txt_path, 'r') as f:
        entries = f.readlines()
        out = {}
        for i, entry in enumerate(entries):
            if i > 0:  # skip header
                print(entry)
                entry = entry[:-1].split(' ')
                assert Image.open(entry[0]).size == (224, 224)  # after preprocessing size should be exactly 800,600
                entry[2] = ' '.join(entry[2:])
                entry[1] = str(int(entry[1]) - 1) + ' ' + entry[2]  # YOLO requires category id starts from 0 not 1
                entry = entry[:2]
                if entry[0] in out.keys():
                    out[entry[0]].append(entry[1])
                else:
                    out[entry[0]] = [entry[1]]

    # Save img directory with bbox info from out dict to image_data array
    image_data = list()
    index = 0
    for k, v in out.items():
        image_data.append([k])
        for i in v:
            image_data[index].append(i)
        index += 1

    # Convert string to int or float and save in image_data array again
    for no, entry in enumerate(image_data):
        for i, box in enumerate(entry):
            if i != 0:      # skip img path
                box = box.split(' ')
                box[0] = int(box[0])  # convert class name to numbers (0~)

                for k in range(1, 5):  # Change box boundaries from str to int
                    box[k] = int(float(box[k]))

                image_data[no][i] = box
    return image_data

In [27]:
def load_images(image_data):
    """ Load images based on their directory in the image_data array and save them in images then return
    :param image_data: acquired from txt2data()
    :return: images with each img info with shape(600, 800, 3) for each
    """
    print('\n -> Reading imgs and saving to array images...\n')
    images = []
    boxes = np.array([np.array(image_data[i][1:]) for i in range(np.array(image_data).shape[0])])
    image_data = np.array(image_data)
    boxes = process_data(img_size, boxes)
    detectors_mask, matching_true_boxes = get_detector_mask(boxes, anchors)

    for i, data in enumerate(image_data):
        img = Image.open(os.path.join(data[0]))
        assert img.size == (800, 600)
        img = np.array(img, dtype=np.uint8)
        images.append(img)
        boxes = np.array(image_data[i][1:])
        boxes = np.array(boxes)
    return images

In [28]:
def images2npv(images, image_data, shuffle=False):
    """ Save image info and box info to npv file
    :param images: image
    :param image_data:
    :param shuffle: if shuffle or not (data has been shuffled in during preprocessing)
    :return:
    """
    print('\n -> converting image info to npv file...\n')
    images = np.array(images, dtype=np.uint8)
    image_data = [np.array(image_data[i][1:]) for i in range(images.shape[0])]
    image_data = np.array(image_data)

    # shuffle dataset
    if shuffle:
        np.random.seed(13)
        indices = np.arange(len(images))
        np.random.shuffle(indices)
        images, image_data = images[indices], image_data[indices]
    print('dataset contains {} images'.format(images.shape[0]))
    np.savez('FoodDataset_npv', image=images, boxes=image_data)
    print('npz file has been generated and saved as FoodDataset_npv.npz')

In [29]:
def get_detector_mask(boxes, anchors):
    detectors_mask = [0 for i in range(len(boxes))]
    matching_true_boxes = [0 for i in range(len(boxes))]
    for i, box in enumerate(boxes):
        detectors_mask[i], matching_true_boxes[i] = preprocess_true_boxes(box, anchors, [416, 416])

    return np.array(detectors_mask), np.array(matching_true_boxes)

In [30]:
def preprocess_true_boxes(true_boxes, anchors, image_size):
    """Find detector in YOLO where ground truth box should appear

    Parameters
    ----------
    true_boxes : array
        List of ground truth boxes in form of relative x, y, w, h, class.
        Relative coordinates are in the range [0, 1] indicating a percentage
        of the original image dimensions.
    anchors : array
        List of anchors in form of w, h.
        Anchors are assumed to be in the range [0, conv_size] where conv_size
        is the spatial dimension of the final convolutional features.
    image_size : array-like
        List of image dimensions in form of h, w in pixels.

    Returns
    -------
    detectors_mask : array
        0/1 mask for detectors in [conv_height, conv_width, num_anchors, 1]
        that should be compared with a matching ground truth box.
    matching_true_boxes: array
        Same shape as detectors_mask with the corresponding ground truth box
        adjusted for comparison with predicted parameters at training time.
    """
    height, width = image_size
    num_anchors = len(anchors)

    # Downsampling factor of 5x 2-stride max_pools == 32.
    assert height % 32 == 0,    'Image sizes in YOLO_v2 must be multiples of 32.'
    assert width % 32 == 0,     'Image sizes in YOLO_v2 must be multiples of 32.'
    conv_height = height // 32
    conv_width = width // 32
    num_box_params = true_boxes.shape[1]
    detectors_mask = np.zeros(
        (conv_height, conv_width, num_anchors, 1), dtype=np.float32)
    matching_true_boxes = np.zeros(
        (conv_height, conv_width, num_anchors, num_box_params),
        dtype=np.float32)

    for box in true_boxes:
        # scale box to convolutional feature spatial dimensions
        box_class = box[4:5]
        box = box[0:4] * np.array(
            [conv_width, conv_height, conv_width, conv_height])
        i = np.floor(box[1]).astype('int')
        j = np.floor(box[0]).astype('int')
        if j >= 13 or i >= 13:
            print('bug')
        best_iou = 0
        best_anchor = 0
        for k, anchor in enumerate(anchors):
            # Find IOU between box shifted to origin and anchor box.
            box_maxes = box[2:4] / 2.
            box_mins = -box_maxes
            anchor_maxes = (anchor / 2.)
            anchor_mins = -anchor_maxes

            intersect_mins = np.maximum(box_mins, anchor_mins)
            intersect_maxes = np.minimum(box_maxes, anchor_maxes)
            intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
            intersect_area = intersect_wh[0] * intersect_wh[1]
            box_area = box[2] * box[3]
            anchor_area = anchor[0] * anchor[1]
            iou = intersect_area / (box_area + anchor_area - intersect_area)
            if iou > best_iou:
                best_iou = iou
                best_anchor = k

        if best_iou > 0:
            print(i, j, best_anchor)
            detectors_mask[i, j, best_anchor] = 1
            adjusted_box = np.array(
                [
                    box[0] - j, box[1] - i,
                    np.log(box[2] / anchors[best_anchor][0]),
                    np.log(box[3] / anchors[best_anchor][1]), box_class
                ],
                dtype=np.float32)
            matching_true_boxes[i, j, best_anchor] = adjusted_box
    return detectors_mask, matching_true_boxes

In [32]:
# Read anchor_10.txt
anchors = []
with open(os.path.join("Dataset", "generated_anchors_mobilenet", "anchors_10.txt"), 'r') as anchor_file:
    for i, line in enumerate(anchor_file):
        line = line.rstrip('\n')
        anchors.append(list(map(float, line.split(', '))))
anchors = np.array(anchors)
print('-> anchors acquired\n')
print(anchors)

txt_path = os.path.join("Dataset", "train_food.txt")
img_size = np.array([224, 224])

# Generate dictionary with labels and ids (not necessary)
label_dict = create_label_dict(os.path.join("Dataset", "classes.txt"))
print(label_dict)

# Convert txt info to data
image_data = txt2data(txt_path)
images = load_images(image_data)
images2npv(images, image_data)

print('Done!')

SyntaxError: unexpected character after line continuation character (<ipython-input-32-b0b594c8d06e>, line 3)