In [2]:
%cd ../DenseCap_tf/

/home/alex/DenseCap/DenseCap_tf


In [3]:
import tensorflow as tf
import numpy as np

import keras
import keras.backend as K
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input
from keras.models import Model
from keras.layers import Convolution2D, Lambda, Merge
import MakeAnchors, ReshapeBoxFeatures, ApplyBoxTransform

In [3]:
# load vgg16 model
base_model = VGG16(weights='imagenet')
model = Model(input=base_model.input, output=base_model.get_layer('block5_conv3').output)

In [4]:
# load random image
img_path = '/home/alex/FasterRCNN/py-faster-rcnn/data/demo/000456.jpg'
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

In [5]:
# Create RPN with  Keras api
rpn = model(x)

In [6]:
rpn

<tf.Tensor 'Relu_27:0' shape=(1, 14, 14, 512) dtype=float32>

In [7]:
# RPN Conv layer parameters
rpn_num_filters = 256
rpn_filter_size = 3
std = 0.01

# Add an extra conv layer and a ReLU
rpn = Convolution2D(rpn_num_filters, rpn_filter_size, rpn_filter_size, activation='relu',
                      border_mode='same', init='normal')(rpn)

In [8]:
rpn

<tf.Tensor 'Relu_28:0' shape=(1, 14, 14, 256) dtype=float32>

In [9]:
# Set up anchor sizes
anchors = np.array([[45, 90], [90, 45], [64, 64],
                   [90, 180], [180, 90], [128, 128],
                   [181, 362], [362, 181], [256, 256],
                    [362, 724], [724, 362], [512, 512]]).T

num_anchors = anchors.shape[1]

In [10]:
# Branch to produce box coordinates for each anchor
# This branch will return {boxes, {anchors, transforms}}
# Box branch Conv layer parameters
box_input_shape = 14, 14, 256
box_num_filters = 4 * num_anchors
box_filter_size = 1

box_branch = Convolution2D(box_num_filters, box_filter_size, box_filter_size, 
                             border_mode='valid', init='normal')(rpn)

In [11]:
box_branch

<tf.Tensor 'add_30:0' shape=(1, 14, 14, 48) dtype=float32>

In [12]:
# Branch to make anchors
make_anchors_branch = Lambda(MakeAnchors.make_anchors)(box_branch)
make_anchors_branch = ReshapeBoxFeatures.ReshapeBoxFeatures(num_anchors)(make_anchors_branch)

In [13]:
# Intermediate layer, just to reshape a tensor for ReshapeBoxFeatures layer
def tensor_reshape(tensor):
    tensor_shape = tensor.get_shape().as_list()
    return K.reshape(tensor,
                     [tensor_shape[0], tensor_shape[3], tensor_shape[1], tensor_shape[2]])

In [14]:
# Branch that creates offsets for boxes, cannot understand how it works
reshape_boxes_branch = Lambda(tensor_reshape)(box_branch)
reshape_boxes_branch = ReshapeBoxFeatures.ReshapeBoxFeatures(num_anchors)(reshape_boxes_branch)

In [15]:
# Merge make_anchors_branch and reshape_boxes_branch together into box_branch
box_branch = Merge(
    mode=lambda x: tf.stack(x)
                         )([make_anchors_branch, reshape_boxes_branch])

In [16]:
box_branch

<tf.Tensor 'stack:0' shape=(2, 2352, 4) dtype=float32>

In [17]:
# Apply adjustments to bounding boxes for bounding box regression
box_branch = ApplyBoxTransform.ApplyBoxTransform()(box_branch)

In [18]:
box_branch

<tf.Tensor 'Reshape_32:0' shape=(2352, 4) dtype=float32>

In [19]:
# Branch to produce box / not box scores for each anchor
box_num_filters = 2 * num_anchors
box_filter_size = 1

rpn_branch = Convolution2D(box_num_filters, box_filter_size, box_filter_size, 
                             border_mode='valid', init='normal')(rpn)
rpn_branch = Lambda(tensor_reshape)(rpn_branch)
rpn_branch = ReshapeBoxFeatures.ReshapeBoxFeatures(num_anchors)(rpn_branch)

In [20]:
rpn_branch

<tf.Tensor 'Reshape_35:0' shape=(2352, 2) dtype=float32>

In [4]:
sess = tf.InteractiveSession()
tf.initialize_all_variables().run()

Instructions for updating:
Use `tf.global_variables_initializer` instead.


In [5]:
def IoU(input_x):
    box_1 = input_x[0]
    box_2 = input_x[1]
    
    box_1_shape = box_1.get_shape().as_list()
    box_2_shape = box_2.get_shape().as_list()
    N, B1, B2 = box_1_shape[0], box_1_shape[1], box_2_shape[1]
    
    area_1 = box_1[:, :, 2] * box_1[:, :, 3]
    area_2 = box_2[:, :, 2] * box_2[:, :, 3]
    area_1_reshaped = tf.reshape(area_1, [N, B1, 1])
    area_2_reshaped = tf.reshape(area_2, [N, 1, B2])
    area_1_expand = tf.tile(area_1_reshaped, [1, 1, B2])
    area_2_expand = tf.tile(area_2_reshaped, [1, B1, 1])
    
    box1_lohi = xcycwh_to_x1y1x2y2(box_1)
    box2_lohi = xcycwh_to_x1y1x2y2(box_2)
    box1_lohi_reshaped = tf.reshape(box1_lohi, [N, B1, 1, 4 ])
    box2_lohi_reshaped = tf.reshape(box2_lohi, [N, 1, B2, 4 ])
    box1_lohi_expand = tf.tile(box1_lohi_reshaped, [1, 1, B2, 1])
    box2_lohi_expand = tf.tile(box2_lohi_reshaped, [1, B1, 1, 1])
    
    x0 = tf.maximum(box1_lohi_expand[...,0], box2_lohi_expand[...,0])
    y0 = tf.maximum(box1_lohi_expand[...,1], box2_lohi_expand[...,1])
    x1 = tf.minimum(box1_lohi_expand[...,2], box2_lohi_expand[...,2])
    y1 = tf.minimum(box1_lohi_expand[...,3], box2_lohi_expand[...,3])
    
    w = tf.maximum(x1 - x0, 0)
    h = tf.maximum(y1 - y0, 0)
    
    intersection = w * h
    output = tf.add(area_1_expand, (intersection * (-1)))
    output = tf.pow(tf.add(output, area_2_expand), -1) * intersection
    
    return output

In [6]:
def xcycwh_to_x1y1x2y2(boxes):
    xc, yc, w, h = boxes[..., 0], boxes[..., 1], boxes[..., 2], boxes[..., 3]
    
    x0 = tf.div(tf.add(w, -1), 2.0) * (-1) + xc
    x1 = tf.div(tf.add(w, -1), 2.0) + xc
    y0 = tf.div(tf.add(h, -1), 2.0) * (-1) + yc
    y1 = tf.div(tf.add(h, -1), 2.0) + yc
    
    return tf.stack([x0, y0, x1, y1], axis=2)

In [7]:
boxes_1 = tf.constant([[2, 4, 2, 6], [5, 7.5, 2, 5]], shape=(1, 2, 4))
boxes_2 = tf.constant([[5, 8, 4, 2], [4.5, 4.5, 5, 3], [4.5, 0, 5, 4]], shape=(1, 3, 4))

In [8]:
#N, B1, B2 = boxes_1_shape[0], boxes_1_shape[1], boxes_2_shape[1]
boxes_1_shape = boxes_1.get_shape().as_list()
boxes_2_shape = boxes_2.get_shape().as_list()

In [9]:
N, B1, B2

NameError: name 'N' is not defined

In [10]:
ious = IoU([boxes_1, boxes_2])

In [11]:
input_max_iou = tf.reduce_max(ious, reduction_indices=[2])
input_idx = tf.arg_max(ious, dimension=2)
target_max_iou = tf.reduce_max(ious, reduction_indices=[1])
target_idx = tf.arg_max(ious, dimension=1)

In [12]:
input_max_iou.eval()

array([[ 0.        ,  0.05882353]], dtype=float32)

In [13]:
high_thresh = 0.05
low_thresh = 0.02
batch_size = 256

In [38]:
pos_mask = (input_max_iou > high_thresh)
neg_mask = (input_max_iou < low_thresh)

In [39]:
x_min, x_max, y_min, y_max = -1, 9, -9, 8

In [40]:
boxes_x1y1x2y2 = xcycwh_to_x1y1x2y2(boxes_1)

In [41]:
x_min_mask = (boxes_x1y1x2y2[...,0] < x_min)
y_min_mask = (boxes_x1y1x2y2[...,1] < y_min)
x_max_mask = (boxes_x1y1x2y2[...,2] > x_max)
y_max_mask = (boxes_x1y1x2y2[...,3] > y_max)

In [42]:
mask_shape = pos_mask.get_shape()

pos_mask_false = tf.constant(value=False, shape=mask_shape)
neg_mask_false = tf.constant(value=False, shape=mask_shape)

pos_mask_true = tf.constant(value=True, shape=(mask_shape[1],))
neg_mask_true = tf.constant(value=False, shape=(mask_shape[1],))

In [43]:
pos_mask = tf.select(x_min_mask, pos_mask_false, pos_mask)
pos_mask = tf.select(y_min_mask, pos_mask_false, pos_mask)
pos_mask = tf.select(x_max_mask, pos_mask_false, pos_mask)
pos_mask = tf.select(y_max_mask, pos_mask_false, pos_mask)
neg_mask = tf.select(x_min_mask, neg_mask_false, neg_mask)
neg_mask = tf.select(y_min_mask, neg_mask_false, neg_mask)
neg_mask = tf.select(x_max_mask, neg_mask_false, neg_mask)
neg_mask = tf.select(y_max_mask, neg_mask_false, neg_mask)
#tf.select(y_max_mask, neg_mask_false, neg_mask).eval()

In [44]:
# Count as positive each input box that has maximal IoU with each target box,
# even if it is outside the bounds or does not meet the thresholds.
# This is important since things will crash if we don't have at least one
# positive box.
# Make it currently optioanal because things become too complicated

pos_mask = tf.reshape(pos_mask, [mask_shape.as_list()[1]])
neg_mask = tf.reshape(neg_mask, [mask_shape.as_list()[1]])

"""
target_idx_cutted = target_idx[0,:mask_shape.as_list()[1]]

pos_mask = tf.Variable(initial_value=pos_mask, trainable=False)
neg_mask = tf.Variable(initial_value=neg_mask, trainable=False)

pos_mask = tf.scatter_update(pos_mask, target_idx_cutted, pos_mask_true)
neg_mask = tf.scatter_update(neg_mask, target_idx_cutted, neg_mask_false[0,:])
"""

'\ntarget_idx_cutted = target_idx[0,:mask_shape.as_list()[1]]\n\npos_mask = tf.Variable(initial_value=pos_mask, trainable=False)\nneg_mask = tf.Variable(initial_value=neg_mask, trainable=False)\n\npos_mask = tf.scatter_update(pos_mask, target_idx_cutted, pos_mask_true)\nneg_mask = tf.scatter_update(neg_mask, target_idx_cutted, neg_mask_false[0,:])\n'

In [30]:
#neg_mask = tf.cast(neg_mask, tf.int32)
#pos_mask = tf.cast(pos_mask, tf.int32)

In [45]:
neg_mask.eval()

array([ True, False], dtype=bool)

In [85]:
# Indices of boxes
pos_mask_nonzero = tf.reshape(tf.where(pos_mask), [-1])
neg_mask_nonzero = tf.reshape(tf.where(neg_mask), [-1])

In [69]:
total_pos = tf.reduce_sum(tf.cast(pos_mask, tf.int32))
total_neg = tf.reduce_sum(tf.cast(neg_mask, tf.int32))

In [81]:
num_neg

256

In [79]:
num_pos = min(batch_size / 2, total_pos.eval())
num_neg = batch_size - num_pos

In [363]:
val = tf.boolean_mask(neg_mask, y_max_mask)

In [353]:
tf.boolean_mask(neg_mask, x_max_mask).eval()

array([], dtype=bool)

In [95]:
tf.random_crop(pos_mask_nonzero, [num_pos]).eval()
tf.random_crop(neg_mask_nonzero, [num_neg]).eval()

<tf.Tensor 'random_crop_5:0' shape=(256,) dtype=float32>

In [92]:
data = tf.truncated_normal(shape=[300])

In [108]:
target_idx_cutted

<tf.Tensor 'strided_slice_57:0' shape=(1, 2) dtype=int64>

In [39]:
#target_idx = tf.reshape(target_idx_cutted, [mask_shape.as_list()[1]])

In [23]:
target_idx_cutted

<tf.Tensor 'strided_slice_28:0' shape=(2,) dtype=int64>

In [25]:
print pos_mask.get_shape()
print target_idx_cutted.get_shape()

(2,)
(2,)


In [540]:
tf.scatter_update(data, target_idx[0], [True, True, True])

<tf.Tensor 'ScatterUpdate_25:0' shape=(3,) dtype=bool_ref>

In [27]:
#target_idx_cutted.eval()
#pos_mask_true.eval()

In [506]:
tf.rank(target_idx).eval()

2

In [482]:
pos_mask.get_shape()

TensorShape([Dimension(1), Dimension(2)])

In [483]:
target_idx.get_shape()

TensorShape([Dimension(1), Dimension(3)])

In [383]:
tf.cast(val, tf.int32).eval() * 0

array([0], dtype=int32)

In [392]:
tf.where(boxes_x1y1x2y2[...,0] < x_min)

array([], shape=(0, 2), dtype=int64)

In [402]:
tf.select(y_max_mask, neg_mask_false, neg_mask).eval()

array([[ True, False]], dtype=bool)

In [403]:
neg_mask.eval()

array([[ True, False]], dtype=bool)

In [405]:
neg_mask_false.eval()

array([[False, False]], dtype=bool)

In [356]:
boxes_x1y1x2y2[...,0].eval()

array([[ 1.5,  4.5]], dtype=float32)

In [362]:
x_min_mask.eval()

array([[False, False]], dtype=bool)

In [360]:
tf.less(boxes_x1y1x2y2[...,0], x_min).eval()

array([[False, False]], dtype=bool)