In [5]:
import tensorflow as tf
import tensorflow.contrib.slim as slim
import tensorflow.contrib.slim.nets
from tensorflow.contrib.slim.nets import vgg 

import json
import numpy as np
import cv2
import warnings
warnings.filterwarnings("ignore")

In [11]:
cell_size = 7
box_per_cell = 2
img_size = 224
classes = {'circle':0, 'triangle':1,  'rectangle':2}
VGG_MEAN = [123.68, 116.78, 103.94]

In [3]:
def load():
    labels = json.load(open('../data/yolo/train/labels.json'))
    N = len(labels)
    X = np.zeros((N, img_size, img_size, 3), dtype='uint8')
    y = np.zeros((N,cell_size, cell_size, 8))
    for idx, label in enumerate(labels):
        img = cv2.imread("../data/yolo/train/{}.png".format(idx))
        X[idx] = img
        for box in label['boxes']:
            x1, y1 = box['x1'], box['y1']
            x2, y2 = box['x2'], box['y2']
            cl = [0]*len(classes)
            cl[classes[box['class']]] = 1
            
            x_center, y_center, w, h = (x1+x2)/2.0, (y1+y2)/2.0, x2-x1, y2-y1
            x_idx, y_idx = int(x_center/img_size*cell_size), int(y_center/img_size*cell_size)
            y[idx, y_idx, x_idx] = 1, x_center, y_center, w, h, *cl
    
    return X, y

In [4]:
X, y = load()

In [34]:
def compute_iou(boxes1, boxes2, scope='iou'):
    """calculate ious
    Args:
      boxes1: 5-D tensor [BATCH_SIZE, CELL_SIZE, CELL_SIZE, BOXES_PER_CELL, 4]  ====> (x_center, y_center, w, h)
      boxes2: 5-D tensor [BATCH_SIZE, CELL_SIZE, CELL_SIZE, BOXES_PER_CELL, 4] ===> (x_center, y_center, w, h)
    Return:
      iou: 4-D tensor [BATCH_SIZE, CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
    """
    with tf.variable_scope(scope):
        # transform (x_center, y_center, w, h) to (x1, y1, x2, y2)
        boxes1_t = tf.stack([boxes1[..., 0] - boxes1[..., 2] / 2.0,
                             boxes1[..., 1] - boxes1[..., 3] / 2.0,
                             boxes1[..., 0] + boxes1[..., 2] / 2.0,
                             boxes1[..., 1] + boxes1[..., 3] / 2.0],
                            axis=-1)

        boxes2_t = tf.stack([boxes2[..., 0] - boxes2[..., 2] / 2.0,
                             boxes2[..., 1] - boxes2[..., 3] / 2.0,
                             boxes2[..., 0] + boxes2[..., 2] / 2.0,
                             boxes2[..., 1] + boxes2[..., 3] / 2.0],
                            axis=-1)

        # calculate the left up point & right down point
        lu = tf.maximum(boxes1_t[..., :2], boxes2_t[..., :2])
        rd = tf.minimum(boxes1_t[..., 2:], boxes2_t[..., 2:])

        # intersection
        intersection = tf.maximum(0.0, rd - lu)
        inter_square = intersection[..., 0] * intersection[..., 1]

        # calculate the boxs1 square and boxs2 square
        square1 = boxes1[..., 2] * boxes1[..., 3]
        square2 = boxes2[..., 2] * boxes2[..., 3]

        union_square = tf.maximum(square1 + square2 - inter_square, 1e-10)

    return tf.clip_by_value(inter_square / union_square, 0.0, 1.0)    

In [8]:
def vgg16(inputs, is_training):
    with tf.variable_scope("vgg_16"):
        with slim.arg_scope(vgg.vgg_arg_scope()):
            net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
            net = slim.max_pool2d(net, [2, 2], scope='pool1')
            net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
            net = slim.max_pool2d(net, [2, 2], scope='pool2')
            net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
            net = slim.max_pool2d(net, [2, 2], scope='pool3')
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
            net = slim.max_pool2d(net, [2, 2], scope='pool4')
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
            net = slim.max_pool2d(net, [2, 2], scope='pool5')
            net = slim.conv2d(net, 1027, [1, 1], scope='fc6')
            net = slim.dropout(net, 0.5, is_training=is_training,
                             scope='dropout6')
            net = slim.conv2d(net, 13, [1, 1], scope='fc7')
    return net

In [39]:
def loss_layer(predicts, labels, scope='loss_layer'):
    with tf.variable_scope(scope):
        predict_object = predicts[..., :box_per_cell]
        predict_box = tf.reshape(predicts[...,box_per_cell:4*box_per_cell], (-1, cell_size, cell_size, box_per_cell, 4))
        predict_class = predicts[...,5:]
        
        true_object = labels[..., 0]
        true_box = tf.reshape(labels[..., 1:5], (-1, cell_size, cell_size, 1, 4))
        true_box = tf.tile(true_box, (1, 1, 1, box_per_cell, 1))/img_size
        true_class = labels[..., 5:]
        
        predict_iou = compute_iou(predict_box, true_box)
        
        object_mask = tf.reduce_max(predict_iou, 3, keepdims=True)
        
        object_mask = tf.cast((predict_iou>=object_mask), tf.float32)*
        

In [40]:
graph = tf.Graph()
with graph.as_default():    
    images = tf.placeholder("float", [None, img_size, img_size, 3], name="input")
    labels = tf.placeholder('float', [None, cell_size, cell_size, 8], name='label')
    is_training = tf.placeholder(tf.bool)

    logits = vgg16(images, is_training)
    loss = loss_layer(logits, labels)
    
    variables_to_restore = tf.contrib.framework.get_variables_to_restore(exclude=['vgg_16/fc6', 'vgg_16/fc7'])
    init_fn = tf.contrib.framework.assign_from_checkpoint_fn('../data/yolo/vgg_16.ckpt', variables_to_restore)

    fc_variables = tf.contrib.framework.get_variables('vgg_16/fc')
    fc_init = tf.variables_initializer(fc_variables)

(?, 7, 7, 1)


In [17]:
with tf.Session(graph=graph) as sess:
    init_fn(sess)
    sess.run(fc_init)
    print(net.get_shape())

INFO:tensorflow:Restoring parameters from ../data/yolo/vgg_16.ckpt


NameError: name 'net' is not defined