In [46]:
import tensorflow as tf
import tensorflow.contrib.slim as slim
import tensorflow.contrib.slim.nets
from tensorflow.contrib.slim.nets import vgg 

import json
import numpy as np
import cv2

In [65]:
cell_size=7
img_size = 224
classes = {'circle':0, 'triangle':1,  'rectangle':2}
VGG_MEAN = [123.68, 116.78, 103.94]

In [66]:
def load():
    labels = json.load(open('../data/yolo/train/labels.json'))
    N = len(labels)
    X = np.zeros((N, img_size, img_size, 3), dtype='uint8')
    y = np.zeros((N,cell_size, cell_size, 8))
    for idx, label in enumerate(labels):
        img = cv2.imread("../data/yolo/train/{}.png".format(idx))
        X[idx] = img
        for box in label['boxes']:
            x1, y1 = box['x1'], box['y1']
            x2, y2 = box['x2'], box['y2']
            cl = [0]*len(classes)
            cl[classes[box['class']]] = 1
            
            x_center, y_center, w, h = (x1+x2)/2.0, (y1+y2)/2.0, x2-x1, y2-y1
            x_idx, y_idx = int(x_center/img_size*cell_size), int(y_center/img_size*cell_size)
            y[idx, y_idx, x_idx] = 1, x_center, y_center, w, h, *cl
    
    return X, y

In [8]:
X, y = load()

In [67]:
def vgg16(inputs, is_training):
    with tf.variable_scope("vgg_16"):
        with slim.arg_scope(vgg.vgg_arg_scope()):
            net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
            net = slim.max_pool2d(net, [2, 2], scope='pool1')
            net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
            net = slim.max_pool2d(net, [2, 2], scope='pool2')
            net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
            net = slim.max_pool2d(net, [2, 2], scope='pool3')
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
            net = slim.max_pool2d(net, [2, 2], scope='pool4')
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
            net = slim.max_pool2d(net, [2, 2], scope='pool5')
            net = slim.conv2d(net, 1027, [1, 1], scope='fc6')
            net = slim.dropout(net, 0.5, is_training=is_training,
                             scope='dropout6')
            net = slim.conv2d(net, 13, [1, 1], scope='fc7')
    return net

In [68]:
graph = tf.Graph()
with graph.as_default():    
    image = tf.placeholder("float", [None, img_size, img_size, 3], name="input")
    is_training = tf.placeholder(tf.bool)

    net = vgg16(image, is_training)
    variables_to_restore = tf.contrib.framework.get_variables_to_restore(exclude=['vgg_16/fc6', 'vgg_16/fc7'])
    init_fn = tf.contrib.framework.assign_from_checkpoint_fn('../data/yolo/vgg_16.ckpt', variables_to_restore)

    fc_variables = tf.contrib.framework.get_variables('vgg_16/fc')
    fc_init = tf.variables_initializer(fc_variables)

In [69]:
with tf.Session(graph=graph) as sess:
    init_fn(sess)
    sess.run(fc_init)
    print(net.get_shape())

INFO:tensorflow:Restoring parameters from ../data/yolo/vgg_16.ckpt
(?, 7, 7, 13)
