In [None]:
YOLOV3_RESOURCES_DIR = '../resources/yolov3'
YOLOV3_WEIGHTS_FILE_PATH = f'{YOLOV3_RESOURCES_DIR}/yolov3-openimages.weights'
YOLOV3_CFG_FILE_PATH = f'{YOLOV3_RESOURCES_DIR}/yolov3-openimages.cfg'

In [None]:
import configparser
import io
from collections import defaultdict

def unique_config_sections(config_file):
    """Convert all config sections to have unique names.

    Adds unique suffixes to config sections for compability with configparser.
    """
    section_counters = defaultdict(int)
    output_stream = io.StringIO()
    with open(config_file) as fin:
        for line in fin:
            if line.startswith('['):
                section = line.strip().strip('[]')
                _section = section + '_' + str(section_counters[section])
                section_counters[section] += 1
                line = line.replace(section, _section)
            output_stream.write(line)
    output_stream.seek(0)
    return output_stream

def parse_darknet_config(path_to_config_file):
    unique_config_file = unique_config_sections(path_to_config_file)
    cfg_parser = configparser.ConfigParser()
    cfg_parser.read_file(unique_config_file)
    
    for section in cfg_parser.sections():
        print('Parsing section {}'.format(section))
        print(dict(cfg_parser[section]))



In [None]:
parse_darknet_config(YOLOV3_CFG_FILE_PATH)

In [None]:
import numpy as np
import os
from keras import backend as K
from keras.layers import (Conv2D, GlobalAveragePooling2D, Input, Lambda,
                          MaxPooling2D, UpSampling2D, Add)
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.merge import concatenate
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.regularizers import l2
from keras.utils.vis_utils import plot_model as plot


def darknet_yolov3_to_keras(config_path, weights_path, output_path, *, fully_convolutional, plot_model = False, path_to_graph_output = None):
    output_root = os.path.splitext(output_path)[0]

    # Load weights and config.
    print('Loading weights.')
    weights_file = open(weights_path, 'rb')
    WEIGHTS_HEADER_SIZE = 20
#     weights_header = np.ndarray(shape=(4, ), dtype='int32', buffer=weights_file.read(WEIGHTS_HEADER_SIZE))
    
    major = np.ndarray(shape=(1, ), dtype='int32', buffer=weights_file.read(4))
    minor = np.ndarray(shape=(1, ), dtype='int32', buffer=weights_file.read(4))
    revision = np.ndarray(shape=(1, ), dtype='int32', buffer=weights_file.read(4))
    seen = np.ndarray(shape=(1, ), dtype='int32', buffer=weights_file.read(8))

    print(f'Weights Header: major,minor,revision,seen={major},{minor},{revision},{seen}.')
    # TODO: Check transpose flag when implementing fully connected layers.
    # transpose = (weight_header[0] > 1000) or (weight_header[1] > 1000)

    print('Parsing Darknet config.')
    unique_config_file = unique_config_sections(config_path)
    cfg_parser = configparser.ConfigParser()
    cfg_parser.read_file(unique_config_file)

    print('Creating Keras model.')
    if fully_convolutional:
        print('Fully convolutional.')
        image_height, image_width = None, None
    else:
        image_height = int(cfg_parser['net_0']['height'])
        image_width = int(cfg_parser['net_0']['width'])
    prev_layer = Input(shape=(image_height, image_width, 3))
    all_layers = [prev_layer]
    yolo_heads = []

    weight_decay = float(cfg_parser['net_0']['decay']) if 'net_0' in cfg_parser.sections() else 5e-4
    count = 0
    for section in cfg_parser.sections():
        print('Parsing section {}'.format(section))
        if section.startswith('convolutional'):
            filters = int(cfg_parser[section]['filters'])
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            pad = int(cfg_parser[section]['pad'])
            activation = cfg_parser[section]['activation']
            batch_normalize = 'batch_normalize' in cfg_parser[section]

            # padding='same' is equivalent to Darknet pad=1
            padding = 'same' if pad == 1 else 'valid'

            # Setting weights.
            # Darknet serializes convolutional weights as:
            # [bias/beta, [gamma, mean, variance], conv_weights]
            prev_layer_shape = K.int_shape(prev_layer)

            # TODO: This assumes channel last dim_ordering.
            weights_shape = (size, size, prev_layer_shape[-1], filters)
            darknet_w_shape = (filters, weights_shape[2], size, size)
            weights_size = np.product(weights_shape)

            print('conv2d', 'bn'
                  if batch_normalize else '  ', activation, weights_shape)

            conv_bias = np.ndarray(
                shape=(filters, ),
                dtype='float32',
                buffer=weights_file.read(filters * 4))
            count += filters

            if batch_normalize:
                bn_weights = np.ndarray(
                    shape=(3, filters),
                    dtype='float32',
                    buffer=weights_file.read(filters * 12))
                count += 3 * filters

                # TODO: Keras BatchNormalization mistakenly refers to var
                # as std.
                bn_weight_list = [
                    bn_weights[0],  # scale gamma
                    conv_bias,  # shift beta
                    bn_weights[1],  # running mean
                    bn_weights[2]  # running var
                ]

            conv_weights = np.ndarray(
                shape=darknet_w_shape,
                dtype='float32',
                buffer=weights_file.read(weights_size * 4))
            count += weights_size

            # DarkNet conv_weights are serialized Caffe-style:
            # (out_dim, in_dim, height, width)
            # We would like to set these to Tensorflow order:
            # (height, width, in_dim, out_dim)
            conv_weights = np.transpose(conv_weights, [2, 3, 1, 0])

            conv_weights = [conv_weights] if batch_normalize else [
                conv_weights, conv_bias
            ]

            # Handle activation.
            act_fn = None
            if activation == 'leaky':
                pass  # Add advanced activation later.
            elif activation != 'linear':
                raise ValueError(
                    'Unknown activation function `{}` in section {}'.format(
                        activation, section))

            # Create Conv2D layer
            conv_layer = (Conv2D(
                filters, (size, size),
                strides=(stride, stride),
                kernel_regularizer=l2(weight_decay),
                use_bias=not batch_normalize,
                weights=conv_weights,
                activation=act_fn,
                padding=padding))(prev_layer)

            if batch_normalize:
                conv_layer = (BatchNormalization(
                    weights=bn_weight_list))(conv_layer)
            prev_layer = conv_layer

            if activation == 'linear':
                all_layers.append(prev_layer)
            elif activation == 'leaky':
                act_layer = LeakyReLU(alpha=0.1)(prev_layer)
                prev_layer = act_layer
                all_layers.append(act_layer)

        elif section.startswith('maxpool'):
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            all_layers.append(
                MaxPooling2D(
                    padding='same',
                    pool_size=(size, size),
                    strides=(stride, stride))(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('avgpool'):
            if cfg_parser.items(section) != []:
                raise ValueError('{} with params unsupported.'.format(section))
            all_layers.append(GlobalAveragePooling2D()(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('route'):
            ids = [int(i) for i in cfg_parser[section]['layers'].split(',')]
            layers = [all_layers[i] for i in ids]
            if len(layers) > 1:
                print('Concatenating route layers:', layers)
                concatenate_layer = concatenate(layers)
                all_layers.append(concatenate_layer)
                prev_layer = concatenate_layer
            else:
                skip_layer = layers[0]  # only one layer to route
                all_layers.append(skip_layer)
                prev_layer = skip_layer

        elif section.startswith('upsample'):
            stride = cfg_parser[section]['stride']
            prev_layer = all_layers[-1]
            all_layers.append(
                UpSampling2D(size=(stride, stride), interpolation='nearest')(prev_layer)
            )
            prev_layer = all_layers[-1]
            
        elif section.startswith('shortcut'):
            from_idx = cfg_parser[section]['from']

            from_layer = all_layers[int(from_idx)]
            prev_layer = all_layers[-1]

            all_layers.append(
                Add()([from_layer, prev_layer])
            )
            prev_layer = all_layers[-1]

        elif section.startswith('yolo'):
            prev_layer = all_layers[-1]
            yolo_layer = Lambda(lambda x: x, name = f'yolo_{len(yolo_heads)}')(prev_layer)
            all_layers.append(yolo_layer)
            yolo_heads += [yolo_layer]
            anchors = np.array(list(map(lambda x: int(x.strip()), cfg_parser[section]['anchors'].split(',')))).reshape((9, 2))
            print(anchors)
            prev_layer = all_layers[-1]


        elif (section.startswith('net') or section.startswith('cost')
              or section.startswith('softmax')):
            pass  # Configs not currently handled during model definition.
    
        else:
            raise ValueError(
                'Unsupported section header type: {}'.format(section))

    # Create and save model.
    model = Model(inputs=all_layers[0], outputs=yolo_heads)
    print(model.summary())
    
    remaining_weights = len(weights_file.read()) / 4
    weights_file.close()
    print(f'Warning: {remaining_weights} unused weights')


    model.save(f'{output_path}')
    print(f'Saved Keras model to {output_path}')
    # Check to see if all weights have been read.
    print(f'Read {count} of {count + remaining_weights} from Darknet weights.')

    if plot_model:
        if path_to_graph_output is None:
            path_to_graph_output = output_root
        plot(model, to_file=f'{path_to_graph_output}.png', show_shapes=True)
        print(f'Saved model plot to {path_to_graph_output}.png')


In [None]:
darknet_yolov2_to_keras(
    YOLOV3_CFG_FILE_PATH,
    YOLOV3_WEIGHTS_FILE_PATH,
    './out/yolov3openimages_3heads_v3.h5', 
    fully_convolutional=True, 
    plot_model=True, 
    path_to_graph_output='./out/yolov3openimages_3heads_v3_graph'
)

In [None]:
from PIL import Image, ImageDraw
import aiosqlite
from data.openimages.constants import BoxableImagesConstants
import asyncio
import os
import numpy as np
import sqlite3
from utils.np_array_db_converters import adapt_array, convert_array
import PIL
from keras.models import load_model
import keras.backend as K
import math
from models.yolov3.utils import load_classes
from utils.non_max_suppression import non_max_suppression

# Converts numpy array to binary compressed version
aiosqlite.register_adapter(np.ndarray, adapt_array)
# Converts TEXT to np.array when selecting
aiosqlite.register_converter("BLOB", convert_array)

SELECT_FIELDS_BOX = ['id', 'x_min', 'x_max', 'y_min', 'y_max', 'label_id', 'image_id']
SELECT_FIELDS_IMAGE = ['id', 'image_bytes']

OUT_GRID_WIDTH = 76
OUT_GRID_HEIGHT = 76

ANCHORS = np.array([
    [[116,90],  [156,198],  [373,326]],
    [[30,61],   [62,45],    [59,119]],
    [[10,13],   [16,30],    [33,23]]
])

def sigmoid(x):
    return 1 / (1 + math.exp(-x))

def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

def parse_sql_result_to_object(sql_data_from_db, select_fields_arr):
    obj = {}
    for idx, value in enumerate(sql_data_from_db):
        obj[select_fields_arr[idx]] = value
        
    return obj

def drawrect(drawcontext, box_coords, color="red", width=3):
    x1, y1, x2, y2 = box_coords
    offset = 1
    for i in range(0, width):
        drawcontext.rectangle(((x1, y1), (x2, y2)), outline=color)
        x1 = x1 - offset
        y1 = y1 + offset
        x2 = x2 + offset
        y2 = y2 - offset

async def get_images_from_db(path_to_db, table_name_for_images, image_ids = [1]):
    async with aiosqlite.connect(path_to_db, detect_types=sqlite3.PARSE_DECLTYPES) as db_conn:
        cursor = await db_conn.cursor()
        image_ids_placeholder = f'''({', '.join(['?' for _ in image_ids])})'''

        await cursor.execute(f'''
            SELECT {', '.join(SELECT_FIELDS_IMAGE)}
            FROM {table_name_for_images}
            WHERE id IN {image_ids_placeholder}
        ''', image_ids)
        
        all_images = await cursor.fetchall()
                    
        return list(map(lambda sql_result: parse_sql_result_to_object(sql_result, SELECT_FIELDS_IMAGE), all_images))

def draw_boxes(draw, img_bytes, predicted_reshaped, anchor_start_idx, class_idx_to_class_name):
    box_candidates = []
    box_scores = []
    for col_idx, cell_grid in enumerate(predicted_reshaped[0]):
        grid_width = predicted_reshaped.shape[1]
        grid_height = predicted_reshaped.shape[2]

        for row_idx, cell in enumerate(cell_grid):
            for anchor_idx, box in enumerate(cell):
                prob_obj = sigmoid(box[4])

                class_probs = list(map(lambda x: sigmoid(x), box[5:]))
                prob_chosen_class = prob_obj * np.array(class_probs)
                
                detected_classes_idx = np.where(prob_chosen_class > 0.3)[0]
                
                if len(detected_classes_idx) > 0:
                    box_center_x_feat = sigmoid(box[0])
                    box_center_y_feat = sigmoid(box[1])

                    box_center_x = (row_idx + box_center_x_feat) / grid_width
                    box_center_y = (col_idx + box_center_y_feat) / grid_height

                    width_feat = box[2]
                    height_feat = box[3]

                    print(f'''img_bytes.shape={img_bytes.shape},
                        ANCHORS[anchor_start_idx][anchor_idx][0]={ANCHORS[anchor_start_idx][anchor_idx][0]},
                        ANCHORS[anchor_start_idx][anchor_idx][1]={ANCHORS[anchor_start_idx][anchor_idx][1]},
                        np.exp(width_feat)={np.exp(width_feat)},
                        np.exp(height_feat)={np.exp(height_feat)}
                    anchor ''')

                    grid_cell_width = np.exp(width_feat) * ANCHORS[anchor_start_idx][anchor_idx][0]
                    grid_cell_height = np.exp(height_feat) * ANCHORS[anchor_start_idx][anchor_idx][1]

                    box_center_x = box_center_x * img_bytes.shape[0]
                    box_center_y = box_center_y * img_bytes.shape[1]

                    width = grid_cell_width
                    width_center = width / 2
                    height = grid_cell_height
                    height_center = height / 2

                    box_candidates += [[
                        box_center_x - width_center,
                        box_center_y - height_center,
                        box_center_x + width_center,
                        box_center_y + height_center,
                        detected_classes_idx
                    ]]
                    box_scores += [
                        np.max(prob_chosen_class)
                    ]
                    
    _, chosen_box_indices = non_max_suppression(box_candidates, box_scores, 0.6)
    #     picked_boxes = non_max_suppression_fast(np.array(box_candidates))
    
    box_candidates = [box_candidates[i] for i in chosen_box_indices]
    
    for box in box_candidates:
        x1, y1, x2, y2, detected_classes_indices = box
        drawrect(draw, [x1, y1, x2, y2])
        print(f'detected_classes_indices={detected_classes_indices}')
        
        detected_class_names = list(map(lambda idx: class_idx_to_class_name[idx], detected_classes_indices))
        
        
        draw.text((((x1 + x2) // 2) - 5, ((y1+y2) //2 ) - 5), ','.join(detected_class_names), fill="red")




def draw_box(img, box):
    print(box)
    x_min_scaled = box['x_min'] * img.size[0]
    x_max_scaled = box['x_max'] * img.size[0]

    y_min_scaled = box['y_min'] * img.size[1]
    y_max_scaled = box['y_max'] * img.size[1]

    x_middle = (box['x_min'] + box['x_max']) / 2.
    y_middle = (box['y_min'] + box['y_max']) / 2.

    x_middle_scaled = x_middle  * img.size[0]
    y_middle_scaled = y_middle  * img.size[1]


    draw.line([(x_min_scaled, y_min_scaled), (x_max_scaled, y_min_scaled), (x_max_scaled, y_max_scaled), (x_min_scaled, y_max_scaled), (x_min_scaled, y_min_scaled)], fill = 128)

    draw.point((x_middle_scaled, y_middle_scaled),  fill=(255,255,255,255))

    draw.point((x_middle_scaled - 1, y_middle_scaled -1),  fill=(255,255,255,255))
    draw.point((x_middle_scaled, y_middle_scaled -1),  fill=(255,255,255,255))
    draw.point((x_middle_scaled + 1, y_middle_scaled -1),  fill=(255,255,255,255))

    draw.point((x_middle_scaled + 1, y_middle_scaled -1),  fill=(255,255,255,255))
    draw.point((x_middle_scaled + 1, y_middle_scaled),  fill=(255,255,255,255))
    draw.point((x_middle_scaled + 1, y_middle_scaled +1),  fill=(255,255,255,255))

async def test():
    table_name_for_boxes = BoxableImagesConstants.TABLE_NAME_VAL_IMAGE_BOXES
    table_name_for_images = BoxableImagesConstants.TABLE_NAME_VAL_BOXABLE_IMAGES
    curr_path_to_db = os.path.join('..', 'db/boxable-images-608-608-subset-50.data')
    
    image_ids_to_test = [
        1,2,3,4,5,6,7,8,9,10
    ]
    image_ids_to_test = [
        30,31,32,33,34,35
    ]


    imgs_db = (await get_images_from_db(curr_path_to_db, table_name_for_images, image_ids_to_test))
    # last one works :)
    model_path = './out/yolov3openimages_3heads_v3.h5'
    yolov3fully_conv = load_model(model_path, compile=False)
    
    class_idx_to_class_name = load_classes('../resources/yolov3/openimages.names')

    
    for img_db in imgs_db:
        img = PIL.Image.fromarray(img_db['image_bytes'])

        img_bytes = img_db['image_bytes']
        arr = np.expand_dims(img_bytes, axis=0)


        img = PIL.Image.fromarray(img_bytes)
        draw = ImageDraw.Draw(img)
        predicted = yolov3fully_conv.predict(arr/255.)

        for idx, yolo_predicted in enumerate(predicted):
            np_arr_predicted = np.array(yolo_predicted)
            np_arr_predicted = np_arr_predicted.reshape((1, np_arr_predicted.shape[1],np_arr_predicted.shape[2], 3, -1))
            print(np_arr_predicted.shape)

            draw_boxes(draw, img_bytes, np_arr_predicted, idx, class_idx_to_class_name)

        img.show()

In [None]:
await test()

In [None]:
async def test_converted_model():
    model_path_fully_convolutional = './yolov2_fully_convolutional.h5'
    model_path = './yolov2_fully.h5'

    yolov2fully_conv = load_model(model_path_fully_convolutional)
    path_to_db = BoxableImagesConstants.PATH_TO_DB_YOLO_V2    
    table_name_for_boxes = BoxableImagesConstants.TABLE_NAME_VAL_IMAGE_BOXES
    table_name_for_images = BoxableImagesConstants.TABLE_NAME_VAL_BOXABLE_IMAGES
    curr_path_to_db = os.path.join('..', path_to_db)

    
    img_db = (await get_images_from_db(curr_path_to_db, table_name_for_images, [14]))[0]
    boxes_for_img = (await get_boxes_from_db(curr_path_to_db, table_name_for_boxes, [14]))[0]
    
    img_bytes = img_db['image_bytes']
    
    arr = np.expand_dims(img_bytes, axis = 0)
    
    print(arr.shape)
    
    predicted = np.array(yolov2fully_conv.predict(arr))
    
    print(predicted.shape)
    
    grid_width = img_bytes.shape[0] // 32
    grid_height = img_bytes.shape[1] // 32
    print(f'''
        grid_width = {grid_width},
        grid_height = {grid_height}
    ''')
    
    predicted_reshaped = np.reshape(predicted, (1, 14, 14, 5, -1))
    
    img = PIL.Image.fromarray(img_bytes)
    
    draw = ImageDraw.Draw(img)
    
    box_candidates = []
    for col_idx, cell_grid in enumerate(predicted_reshaped[0]):
#         print(f'cell_grid.shape: {cell_grid.shape}')
        for row_idx, cell in enumerate(cell_grid):
#             print(f'cell.shape: {cell.shape}')
            for anchor_idx, box in enumerate(cell):
                prob_obj = expit(box[4])
                prob_class = expit(np.max(box[5:]))
                class_idx = np.argmax(box[5:])
                
                prob_chosen_class = prob_obj * prob_class
                if prob_chosen_class > 0.6:
                    box_center_x_feat = expit(box[0])
                    box_center_y_feat = expit(box[1])
                    
                    box_center_x = (row_idx + box_center_x_feat) / grid_width
                    box_center_y = (col_idx + box_center_y_feat) / grid_height
                    
                    width_feat = box[2]
                    height_feat = box[3]
                    
                    grid_cell_width = (np.exp(width_feat) * (ANCHORS[anchor_idx][0] / grid_width))
                    grid_cell_height = (np.exp(height_feat) * (ANCHORS[anchor_idx][1] / grid_height))

#                     width = (np.exp(width_feat) * ANCHORS[anchor_idx][0])
#                     height = (np.exp(height_feat) * ANCHORS[anchor_idx][1])
                    
                    box_center_x = box_center_x * img_bytes.shape[0]
                    box_center_y = box_center_y * img_bytes.shape[1]
                                        
                    width = grid_cell_width * (img_bytes.shape[0])
                    width_center = width / 2
                    height = grid_cell_height * (img_bytes.shape[1])
                    height_center = height / 2
                    
                    box_candidates += [[
                        box_center_x - width_center,
                        box_center_y - height_center,
                        box_center_x + width_center,
                        box_center_y + height_center,
                        class_idx
                    ]]
                    
                    
#                     print(f'box.shape: {box.shape}')
#                     print(f'prob_obj = {prob_obj}')
#                     print(f'''
#                         box_center_x - width / 2 = {box_center_x - width / 2},
#                         box_center_y - height / 2 = {box_center_y - height / 2},
#                         box_center_x + width / 2 = {box_center_x + width / 2},
#                         box_center_y + height / 2 = {box_center_y + height / 2},
#                     ''')
#                     print(f'''
#                         prob_class = {prob_class}
#                     ''')
                    
        
    picked_boxes = non_max_suppression_fast(np.array(box_candidates))
    
    for box in picked_boxes:
        x1, y1, x2, y2, class_idx = box
#         draw_bounding_box(draw, x1, y1, x2, y2)
        
        drawrect(draw, [x1, y1, x2, y2])
        draw.text((x1 + 5, y1 + 5), get_coco_class(class_idx), fill="red")
        print(f'class detected: {get_coco_class(class_idx)}')


    print(predicted_reshaped.shape)
    
    img.show()

    
await test_converted_model()