In [1]:
"""
Main script for FCNT tracker. 
"""
#%%
# Import custom class and functions
from inputproducer import InputProducer
from tracker import TrackerVanilla
from vgg16 import Vgg16
from selcnn import SelCNN
from sgnet import GNet, SNet
from utils import img_with_bbox, IOU_eval

import numpy as np 
import tensorflow as tf

import os

#%%
tf.app.flags.DEFINE_integer('iter_step_sel', 200,
                          """Number of steps for trainning"""
                          """selCNN networks.""")
tf.app.flags.DEFINE_integer('iter_step_sg', 50,
                          """Number of steps for trainning"""
                          """SGnet works""")
tf.app.flags.DEFINE_integer('num_sel', 384,
                          """Number of feature maps selected.""")
tf.app.flags.DEFINE_integer('iter_max', 200,
							"""Max iter times through imgs""")

FLAGS = tf.app.flags.FLAGS

## Define varies path
DATA_ROOT = 'data/Dog1'
IMG_PATH = os.path.join(DATA_ROOT, 'img')
GT_PATH = os.path.join(DATA_ROOT, 'groundtruth_rect.txt')
VGG_WEIGHTS_PATH = 'vgg16_weights.npz'
#%%
def train_selCNN(sess, selCNN, feed_dict):
	# Initialize variables
	global_step = tf.Variable(0, trainable=False)
	selCNN_vars = selCNN.variables 
	init_vars_op = tf.initialize_variables(selCNN_vars + [global_step], name='init_selCNN')
	sess.run(init_vars_op)

	# Retrive trainning op
	train_op, losses, lr, optimizer = selCNN.train_op(global_step)
	print(sess.run(tf.report_uninitialized_variables()))
	# Train for iter_step_sel times
	# Inspects loss curve and pre_M visually
	for step in range(FLAGS.iter_step_sel):
		_, total_loss, lr_ = sess.run([train_op, losses, lr], feed_dict=feed_dict)
		print(total_loss)


def train_sgNet(sess, gnet, snet, sgt_M, ggt_M, feed_dict):
	"""
	Train sgnet by minimize the loss
	Loss = Lg + Ls
	where Li = |pre_Mi - gt_M|**2 + Weights_decay_term_i

	"""
	# Initialize sgNet variables
	sgNet_vars = gnet.variables + snet.variables
	init_SGNet_vars_op = tf.initialize_variables(sgNet_vars, name='init_sgNet')
	sess.run(init_SGNet_vars_op)

	# Define composite loss
	total_losses = snet.loss(sgt_M) + gnet.loss(ggt_M)

	# Define trainning op
	optimizer = tf.train.GradientDescentOptimizer(1e-6)
	train_op = optimizer.minimize(total_losses, var_list= sgNet_vars)

	for step in range(FLAGS.iter_step_sg):
		loss, _ = sess.run([total_losses, train_op], feed_dict = feed_dict)
		print(loss)



def gen_mask_phi(img_sz, loc):
	x,y,w,h = loc
	phi = np.zeros(img_sz)
	phi[y-int(0.5*h): y+int(0.5*h), x-int(0.5*w):x+int(0.5*w)] = 1
	return phi


In [2]:
## Instantiate inputProducer and retrive the first img
# with associated ground truth. 
inputProducer = InputProducer(IMG_PATH, GT_PATH)
img, gt, t  = next(inputProducer.gen_img)
roi_t0, _, _ = inputProducer.extract_roi(img, gt)

# Predicts the first img.
sess = tf.Session()
sess.run(tf.initialize_all_variables())
vgg = Vgg16(VGG_WEIGHTS_PATH, sess)
vgg.print_prob(roi_t0, sess)


lselCNN = SelCNN('sel_local', vgg.conv4_3, (1,28,28,1))
sgt_M = inputProducer.gen_mask(lselCNN.pre_M_size)
sgt_M = sgt_M[np.newaxis,:,:,np.newaxis]
feed_dict = {vgg.imgs: [roi_t0], lselCNN.gt_M: sgt_M}
train_selCNN(sess, lselCNN, feed_dict)

  roi = convas[cy-half:cy+half, cx-half:cx+half, :]


Old English sheepdog, bobtail 0.654105
miniature poodle 0.0409054
Tibetan terrier, chrysanthemum dog 0.0321338
standard poodle 0.0294567
Sealyham terrier, Sealyham 0.0269377
0.0013186425201 max of mask
(224, 224, 3)
250 max convas
[]
6.24955
6.72138
6.84566
6.60599
6.74102
6.42663
6.34022
6.57497
6.68752
6.69023
6.87499
6.88316
6.51023
6.44164
6.3163
6.40377
6.6733
6.59548
7.39637
6.76343
6.63283
6.97001
6.49342
6.79834
6.84682
6.7791
6.69084
6.76442
6.40401
6.31935
6.41752
6.42161
6.32999
6.82438
6.30624
6.81193
6.32685
6.93009
7.05138
6.58546
6.51173
6.3929
6.93226
6.55511
6.64598
6.2998
6.932
6.49809
6.76801
7.0186
6.71637
6.58757
6.64891
6.71199
6.58561
6.65325
6.5383
6.88126
6.76699
6.15861
6.39608
6.78094
6.7321
6.64148
6.62454
6.55969
6.9972
6.93911
6.52063
6.63478
6.8553
6.4667
6.68106
6.83305
6.86904
6.82761
6.87282
7.08774
7.00538
7.14762
6.87149
6.31789
6.59749
6.66101
6.98662
6.64763
6.21712
6.58686
6.63439
7.0363
7.16503
6.65444
6.61598
6.42774
6.88458
6.75254
6.44872
6.31

In [3]:
gselCNN = SelCNN('sel_global', vgg.conv5_3, (1,14,14,1))

# Gen anotated mask for target arear
ggt_M = inputProducer.gen_mask(gselCNN.pre_M_size)

## Train selCNN networks with first frame roi
# reshape gt_M for compatabilities

ggt_M = ggt_M[np.newaxis,:,:,np.newaxis]


0.0013186425201 max of mask
(224, 224, 3)
228 max convas


  roi = convas[cy-half:cy+half, cx-half:cx+half, :]


In [4]:
feed_dict[gselCNN.gt_M] = ggt_M # corrpus the other nets?

train_selCNN(sess, gselCNN, feed_dict)

[]
7.45898
7.18705
7.60542
7.22822
7.17818
7.13465
7.53609
7.20013
7.74993
7.42395
7.65614
7.4669
7.23219
7.30863
7.45034
7.55876
7.20524
7.69552
7.02159
7.47021
7.21452
7.19345
6.99465
7.10267
7.026
7.29146
7.46323
7.45864
7.43257
6.95836
7.37717
7.32549
7.03209
7.0665
7.19673
7.28802
6.76567
7.1914
7.2316
7.17945
7.57196
7.41841
7.24527
7.25672
7.23772
7.38088
7.47429
7.323
7.39932
7.05511
7.38201
7.034
7.63812
7.03099
7.19835
6.9884
7.41543
7.44386
6.88429
6.79032
7.85837
7.15966
7.6223
6.97199
7.13589
7.35873
7.22758
6.82742
7.11123
7.57246
7.29468
7.82809
7.07535
7.39229
7.00296
7.14005
6.76448
7.39539
7.37311
7.40585
7.93474
7.21463
7.23193
7.03047
7.43868
7.02994
7.63209
7.5435
7.26362
7.43913
7.23781
7.59292
7.43953
7.70063
7.30031
7.51797
7.48542
7.0717
7.6034
6.94664
7.52211
7.03897
7.27644
7.12308
7.60911
7.37688
7.61241
7.00234
7.57979
7.00004
6.97601
7.07745
7.11427
7.17251
7.5678
7.35608
7.47322
7.46241
7.28779
7.35634
7.31761
7.20186
7.25613
7.2778
7.79274
7.42668
7.6797

### sel part debug 

In [5]:
grads = tf.gradients(lselCNN.loss, lselCNN.feature_maps)
H_diag = [tf.gradients(grads[i], lselCNN.feature_maps[i])[0] for i in range(512)]
S = [tf.reduce_sum(-tf.mul(grads[i], lselCNN.feature_maps[i])) \
    + 0.5 * tf.reduce_sum(tf.mul(H_diag[i], lselCNN.feature_maps[i]**2)) for i in range(512)]
S_tensor = tf.pack(S, axis=0) # shape (512,)
vgg_maps, signif_v = sess.run([vgg.conv4_3, S_tensor], feed_dict=feed_dict)
idxs = sorted(range(len(signif_v)), key=lambda i: signif_v[i])[-FLAGS.num_sel:]
best_maps = vgg_maps[...,idxs]
print('Selected maps shape: {0}'.format(best_maps.shape))

In [6]:
s_sel_maps, s_idx = best_maps, idxs

In [21]:
grads = tf.gradients(gselCNN.loss, gselCNN.feature_maps)
H_diag = [tf.gradients(grads[i], gselCNN.feature_maps[i])[0] for i in range(512)]
S = [tf.reduce_sum(-tf.mul(grads[i], gselCNN.feature_maps[i])) \
    + 0.5 * tf.reduce_sum(tf.mul(H_diag[i], gselCNN.feature_maps[i]**2)) for i in range(512)]
S_tensor = tf.pack(S, axis=0) # shape (512,)
vgg_maps, signif_v = sess.run([vgg.conv5_3, S_tensor], feed_dict=feed_dict)
idxs = sorted(range(len(signif_v)), key=lambda i: signif_v[i])[-FLAGS.num_sel:]
best_maps = vgg_maps[...,idxs]
print('Selected maps shape: {0}'.format(best_maps.shape))

Selected maps shape: (1, 14, 14, 384)


In [33]:
g_sel_maps, g_idx = best_maps, idxs

## debug sgnet initializae 

In [27]:
from sgnet import SGNet

In [42]:

import tensorflow as tf
import numpy as np 

from scipy.misc import imresize

from utils import variable_on_cpu, variable_with_weight_decay



class SGNet:
	# Define class level optimizer
	lr = 1e-6
	#optimizer = tf.train.GradientDescentOptimizer(lr)
	def __init__(self, scope, vgg_conv_shape):
		"""
		Base calss for SGNet, defines the network structure
		"""
		tracker.scope = scope
		tracker.params = {
		'num_fms': 200, # number of selected featrue maps, inputs of the network
		'wd': 0.5, # L2 regulization coefficient
		}
		tracker.variables = []
		with tf.variable_scope(scope) as scope:
			tracker.pre_M = tracker._build_graph(vgg_conv_shape)

	def _build_graph(self, vgg_conv_shape):
		"""
		Define Structure. 
		The first additional convolutional
		layer has convolutional kernels of size 9×9 and outputs
		36 feature maps as the input to the next layer. The second
		additional convolutional layer has kernels of size 5 × 5
		and outputs the foreground heat map of the input image.
		ReLU is chosen as the nonlinearity for these two layers.

		Args:
		    vgg_conv_shape: 
		Returns:
		    conv2: 
		"""
		tracker.variables = []
		tracker.kernel_weights = []
		out_num = vgg_conv_shape[-1]

		tracker.input_maps = tf.placeholder(tf.float32, shape=vgg_conv_shape,
		    name='selected_maps')
		#assert vgg_conv_shape[-1] == tracker.params['num_fms']
        
		with tf.name_scope('conv1') as scope:
			kernel = tf.Variable(tf.truncated_normal([9,9,out_num,36],         dtype=tf.float32,stddev=1e-1), name='weights')

			conv = tf.nn.conv2d(tracker.input_maps, kernel, [1, 1, 1, 1], padding='SAME')
			biases = tf.Variable(tf.constant(0.0, shape=[36], dtype=tf.float32),trainable=True, name='biases')
			out = tf.nn.bias_add(conv, biases)
			conv1 = tf.nn.relu(out, name=scope)
			tracker.variables += [kernel, biases]
			tracker.kernel_weights += [kernel]
			print(conv1.get_shape().as_list(), 'conv1 shape')


		with tf.name_scope('conv2') as scope:
			kernel = tf.Variable(tf.truncated_normal([5,5,36,1], dtype=tf.float32, stddev=1e-1), name='weights')
			conv = tf.nn.conv2d(conv1, kernel , [1, 1, 1, 1], padding='SAME')
			print(conv.get_shape().as_list(), 'conv shape')
			biases = tf.Variable(tf.constant(0.0, shape=[1], dtype=tf.float32),
			                     trainable=True, name='biases')
			out = tf.nn.bias_add(conv, biases)
			conv2 = tf.nn.relu(out, name=scope)
			tracker.variables += [kernel, biases]
			tracker.kernel_weights += [kernel]

		print('Shape of the out put heat map for %s is %s'%(tracker.scope, conv2.get_shape().as_list()))
		return conv2

	def loss(self, gt_M):
		"""Returns Losses for the current network.

		Args:
		    gt_M: Tensor, ground truth heat map.

		Returns:
		    Loss: 
		"""

		# Assertion
		assert isinstance(gt_M, np.ndarray)
		if len(gt_M.shape) == 2:
			# gt_M is a 2D mask
			gt_M = tf.constant(gt_M.reshape((1,gt_M.shape[0], gt_M.shape[1], 1)), dtype=tf.float32)
		elif len(gt_M.shape) == 4:
			# gt_M is SGNet.pre_M
			gt_M = tf.constant(gt_M, dtype=tf.float32)
		else:
			print('Unhandled input shape: {0}'.format(gt_M.shape))

		with tf.name_scope(tracker.scope) as scope:
			beta = tf.constant(tracker.params['wd'], name='beta')
			loss_rms = tf.reduce_mean(tf.squared_difference(gt_M, tracker.pre_M))
			loss_wd = [tf.reduce_mean(tf.square(w)) for w in tracker.kernel_weights]
			loss_wd = beta * tf.add_n(loss_wd)
			total_loss = loss_rms + loss_wd
		return total_loss

		@classmethod
		def eadge_RP():
			"""
			This method propose a series of ROI along eadges
			for a given frame. This should be called when particle 
			confidence below a critical value, which possibly accounts
			for object re-appearance.
			"""
			pass



class GNet(SGNet):
	def __init__(self, scope, vgg_conv_shape):
		"""
		Fixed params once trained in the first frame
		"""
		super(GNet, self).__init__(scope, vgg_conv_shape)



class SNet(SGNet):
	lr = 1e-8
	optimizer = tf.train.GradientDescentOptimizer(lr)
	def __init__(self, scope, vgg_conv_shape):
		"""
		Initialized in the first frame
		"""
		super(SNet, self).__init__(scope, vgg_conv_shape)

	def adaptive_finetune(self, sess, best_M):
		"""Finetune SNet with best pre_M predicetd by gNet."""
        # Upsampling best_M 
        #bres_M_resized = imresize(best_M, [1, 28, 28, 1], interp='bicubic')
        #bres_M_resized = tf.constant(bres_M_resized, dtype=tf.float32)
		loss = tracker.loss(best_M)
		train_op = SNet.optimizer.minimize(loss, var_list=tracker.variables)
		sess.run(train_op)


	def descrimtive_finetune(self, sess, conv4_3_t0, sgt_M, conv4_3_t, pre_M, phi):
		# Type and shape check!
		sgt_M = tf.constant(sgt_M, dtype=tf.float32)
		pre_M = tf.constant(pre_M, dtype=tf.float32)

		Loss_t0 = tf.reduce_sum(tf.sqrt(tf.sub(sgt_M, tracker.pre_M)))
		feed_dict_t0 = {tracker.input_maps: conv4_3_t0}
		train_op_t0 = SNet.optimizer.minimize(Loss_t0, var_list=tracker.variables)
		

		Loss_t =  tf.reduce_sum((1-phi) * tf.reduce_sum(tf.sqrt(tf.sub(pre_M, tracker.pre_M))))
		feed_dict_t = {tracker.input_maps: conv4_3_t}
		train_op_t = SNet.optimizer.minimize(Loss_t0, var_list=tracker.variables)
		
		for _ in range(20):
			sess.run(train_op_t0, feed_dict_t0)
			sess.run(train_op_t, feed_dict_t)
		


In [45]:
gnet.pre_M

<tf.Tensor 'GNet_tmp/conv2:0' shape=(1, 28, 28, 1) dtype=float32>

In [46]:
snet.pre_M

<tf.Tensor 'SNet_tmp/conv2:0' shape=(1, 28, 28, 1) dtype=float32>

In [47]:
# Instantiate G and S networks.
gnet = GNet('GNet_tmp2', g_sel_maps.shape)
snet = SNet('SNet_tmp2', s_sel_maps.shape)

## Train G and S nets by minimizing a composite loss.
## with feeding selected saliency maps for each networks.
feed_dict = {gnet.input_maps: g_sel_maps, snet.input_maps: s_sel_maps}
train_sgNet(sess, gnet, snet, sgt_M, ggt_M, feed_dict)
s_sel_maps_t0 = s_sel_maps

[1, 14, 14, 36] conv1 shape
[1, 14, 14, 1] conv shape
Shape of the out put heat map for GNet_tmp2 is [1, 14, 14, 1]
[1, 28, 28, 36] conv1 shape
[1, 28, 28, 1] conv shape
Shape of the out put heat map for SNet_tmp2 is [1, 28, 28, 1]
303295.0
349.84
244.718
181.154
139.952
111.364
90.7019
75.5983
64.4097
56.035
49.7111
44.8813
41.1373
38.1913
35.8318
33.9029
32.2921
30.9214
29.7332
28.6851
27.7451
26.89
26.0993
25.3572
24.6577
23.9957
23.3655
22.7602
22.1784
21.6183
21.0789
20.5588
20.057
19.5725
19.1044
18.6511
18.2121
17.7867
17.3746
16.9753
16.5884
16.2134
15.85
15.497
15.1536
14.8194
14.4944
14.1781
13.8703
13.5708


###  debug t>0 

In [113]:


from queue import Queue
from scipy.misc import imresize
from operator import add

def compute_conf(roi, loc_p):
	"""Helper func for computing confidence"""
	cx,cy,w,h = loc_p
	conf = np.sum(roi[y-int(0.5*h): y+int(0.5*h), x-int(0.5*w):x+int(0.5*w)])
	return conf


class Tracker:
	"""
	Generic tracking model. A location is represented by an affine transformation (e.g., Xt−1), which warps the
	coordinate system so that the target lies within the unit square. Particles representing possible target locations Xt, 
	at time t are sampled according to P(Xt|Xt−1), which in this case is a diagonal-covariance Gaussian centered at Xt−1.
	
	Where:
	Xt = (xt, yt, θt, st, αt, φt)
	denote x, y translation, rotation angle, scale, aspect ratio, and skew direction at time t.

	P(Xt|Xt−1) = N (Xt; Xt−1, Ψ)
	where Ψ is a diagonal covariance matrix whose elements are the corresponding variances of affine parameters, assumes the variance of each affine parameter does not change over time

	See 3.3.1 Dynamic model in http://www.cs.toronto.edu/~dross/ivt/RossLimLinYang_ijcv.pdf for reference

	Particle filter calss"""
	def __init__(self, init_location,):
		self.conf_q = Queue(maxsize=20)
		self.pre_M_q = Queue(maxsize=20)
		self.last_two_loc_q = Queue(maxsize=2)

		self.location = init_location
		self.params = self._init_params(init_location)



	def _init_params(self, init_location):
		"""Initialize tracker's parameters"""

		params = {'p_sz': 64, 'p_num': 700, 'min_conf': 0.2, 
				'mv_thr': 0.1, 'up_thr': 0.35, 'roi_scale': 2}
		diag_s = np.ceil((init_location[2]**2 + init_location[3]**2)**0.5/7)
		params['aff_sig'] = [diag_s, diag_s, 0.004, 0.0, 0.0, 0]
		params['ratio'] = init_location[2] / params['p_sz']
		
		return params

	def _qs_full(self):
		if self.conf_q.full() and self.pre_M_q.full():
			return True
		else:
			return False

	def gen_best_M(self):
		"""Returns the best pre_M in records."""
		assert self._qs_full()

		pre_Ms = [self.pre_M_q.get() for _ in range(20)]
		confs = [self.conf_q.get() for _ in range(20)]
		idx = np.argmax(confs)
		return pre_Ms[idx]


	def draw_particles(self):
		"""
		Generates particles according to 
		P(Xt|Xt−1) = N (Xt; Xt−1, Ψ)

		Args:
			aff_params: affine parameters, see class doc string for 
				specific element definition.
				[cx, cy, w/p_sz, 0, h/w, 0] for 6 degrees of freendom
				[tlx, tly, w, h] for 4 degrees of freedom.
				.
		Returns:
			aff_params_M : self.p_num*dof size matrix,
				where rows are updated randomly drawed affine 
				params, columns repersent each particles. 
		"""
		pass



	def predict_location(self, pre_M, gt_last, resize_factor, t):
		"""
		Predict location for each particle. It is calculated by
		1. compute the confidence of the i-th candidate, which is 
			the summation of all the heatmap values within the candidate region.
		2. the candidate with the highest confidence value is predicted as target.

		Args:
			img_siz: tuple(image height, image width)
			pre_M: predicted heat map
			t: index of current frame
		"""
		pass


	def get_most_conf_M(self):
		"""Returns the most confidence heat maps."""

		# Pull self.conf_records all out, and retrive 
		# the most confident heat map. 

		return updated_gt_M


	def linear_prediction(self):
		"""
		Predicts current location linnearly according
		to las two frames location. This may boost the 
		robustnesss of obejct occlusion.
		"""
		pass

	def distracted(self):
		"""Distracter detection."""

		# up-sampling pre_M

		# Compute confidence according to 
		# S = with_in / with_out
		if self.cur_best_conf <= self.params['min_conf']:
			return True
		else:
			return False
			
	@classmethod
	def compute_conf(self, roi, loc_p):
		"""Helper func for computing confidence"""
		x,y,w,h = loc_p
		conf = np.sum(roi[y-int(0.5*h): y+int(0.5*h), \
					x-int(0.5*w):x+int(0.5*w)])
		return conf

	@classmethod
	def aff2loc(self, las_loc, aff_param):
		"""Convert affine params to location."""
		assert len(aff_param)==4, 'This method only works for dof 4 aff space.'
		cur_loc = [i+j for i,j in zip(las_loc, aff_param)]
		return cur_loc


class TrackerVanilla(Tracker):
	"""Vanilla tracker

		The covariance matrix has only 4 degrees of freedom,
		specified by vertical, horizontal translation of the central
		point, variance of the width, variance of the w/h ratio.

		The corrresponding actual senarios are object replacment,
		object zoom in/out, object rotaion. Should be sufficient 
		for most cases of car tracking.

	"""
	def __init__(self, init_location):
		super(TrackerVanilla, self).__init__(init_location)
		self._update_params()

	def _update_params(self):
		"""Update aff_sig param."""
		self.params['aff_sig'] = [10, 10, 0.04, 0.04]


	def draw_particles(self, last_location):
		"""
		The covariance matrix has only 4 degrees of freedom,
		specified by vertical, horizontal translation of the central
		point, variance of the width, variance of the w/h ratio.

		The corrresponding actual senarios are object replacment,
		object zoom in/out, object rotaion. Should be sufficient 
		for most cases of car tracking.

		Args: 
			last_location: [tlx, tly, w, h]
		"""
		# Define degrees of freedom 
		dof = len(self.params['aff_sig'])

		# Construct an p_num*6 size matrix with with each 
		# column repersents one particle

		#aff_params_M = np.kron(np.ones((self.params['p_num'],1)), np.array(aff_params))

		# First onstruct a p_num*dof size normal distribution with 
		# mean 0 and sigma 1
		rand_norml_M = np.array([np.random.standard_normal(dof) for _ in range(self.params['p_num'])])

		# Then construct a affine sigma matrix
		aff_sig_M = np.kron(np.ones((self.params['p_num'], 1)), self.params['aff_sig'])

		# Update particles 
		self.aff_params_M = rand_norml_M * aff_sig_M


	def predict_location(self, pre_M, gt_last, resize_factor, t, roi_size):
		"""
		Predict location for each particle. It is calculated by
		1. compute the confidence of the i-th candidate, which is 
			the summation of all the heatmap values within the candidate region.
		2. the candidate with the highest confidence value is predicted as target.

		Args:
			img_siz: tuple(image height, image width)
			pre_M: predicted heat map
			t: index of current frame
		"""
		# transform self.aff_params_M to location_M with each column 
		# repersent [cx, cy, w, h] in the pre_M heat map
		loc_M = np.zeros(self.aff_params_M.shape)
		tlx, tly, w, h = gt
		cx, cy = roi_size // 2, roi_size // 2
		loc_M[:, 0] = cx
		loc_M[:, 1] = cy
		loc_M[:, 2] = resize_factor * w 
		loc_M[:, 3] = resize_factor * h
		loc_M += self.aff_params_M
		loc_M = loc_M.astype(np.int)

		# Upsampling pre_M bicubicly to roi_size
		pre_M_resized = imresize(pre_M[0,:,:,0], [roi_size, roi_size], interp='bicubic')

		# Compute conf for each particle 
		conf_lsit = []
		for p_i_loc in loc_M:
			conf_i = self.compute_conf(pre_M_resized, p_i_loc)
			conf_lsit += [conf_i]

		# Get index and conf score of of the most confident one
		idx = np.argmax(conf_lsit)
		self.cur_best_conf = conf_lsit[idx]
		self.conf_q.put(conf_lsit[idx])

		# Get the corresponding aff_param which is then
		# used to predicted the cureent best location
		best_aff =  self.aff_params_M[idx]
		print('The affine paramters: [dx,dy,dw,dh] is {0}'.format(best_aff))
		self.pre_location = self.aff2loc(gt_last, best_aff)

		# Stack into records queue


		return self.pre_location










		



In [None]:
tracker = TrackerVanilla(gt)

In [None]:


# Iter imgs
gt_last = gt 
#for i in range(FLAGS.iter_max):
# Gnerates next frame infos
img, gt_cur, t  = next(inputProducer.gen_img)

## Crop a rectangle ROI region centered at last target location.
roi, _, resize_factor = inputProducer.extract_roi(img, gt_last)

## Perform Target localiation predicted by GNet
# Get heat map predicted by GNet
feed_dict_vgg = {vgg.imgs : [roi]}
s_maps, g_maps = sess.run([vgg.conv4_3, vgg.conv5_3], feed_dict=feed_dict_vgg)
s_sel_maps = s_maps[...,s_idx] # np.ndarray, shape = [1,28,28,num_sel]?
g_sel_maps = g_maps[...,g_idx]

feed_dict_g = { gnet.input_maps: g_sel_maps}
pre_M = sess.run(gnet.pre_M, feed_dict=feed_dict_g)

tracker.draw_particles(gt_last)
pre_loc = tracker.predict_location(pre_M, gt_last, resize_factor, t, 224)

In [157]:
t

33

The affine paramters: [dx,dy,dw,dh] is [ 11.64488943 -29.72706869   0.04514857  -0.06402944]


### Snet adaptive debug

In [160]:
tracker.pre_M_q.full()

False

In [158]:
best_M = tracker.gen_best_M()

AssertionError: 

In [120]:
pre_M.shape

(1, 14, 14, 1)

In [121]:
snet.adaptive_finetune(sess, pre_M)

ValueError: Dimensions must be equal, but are 14 and 28

### Snet Descrimtive update debug

In [116]:

tracker.distracted()

True

In [118]:
phi = gen_mask_phi(roi.shape, pre_loc)
snet.descrimtive_finetune(sess, s_sel_maps_t0, sgt_M, roi, s_sel_maps, phi)
pre_M = sess.run(snet.pre_M, feed_dict=feed_dict)

# Use location predicted by SNet.
pre_loc = tracker.predict_location(pre_M)



ValueError: Cannot feed value of shape (224, 224, 3) for Tensor 'SNet_tmp2/selected_maps:0', which has shape '(1, 28, 28, 384)'