In [None]:
import tensorflow as tf
print('Using Tensorflow '+tf.__version__)
assert tf.__version__>='1.0.0', ('You should use Tensorflow 1.0 or superior')
import matplotlib;
from PIL import Image
import sys
sys.path.append('../')
import os.path
import csv
import numpy as np

In [None]:
from src.argparse import argparse
from src.region_to_bbox import region_to_bbox
from src.pprint_params import pprint_params
from src.crops import *
from src.siamese import import_siamese
from src.visualization import *

In [None]:
hp = {"z_lr":0}
evaluation = {"video": "vot2016_ball1"}
run = {"visualization":1,"debug":1}
hp,evaluation,run,env,design = argparse(hp, evaluation, run)

In [None]:
video_folder = os.path.join(env.root_dataset, evaluation.dataset, evaluation.video)
frame_name_list = [f for f in os.listdir(video_folder) if f.endswith(".jpg")]
frame_name_list = [os.path.join(env.root_dataset, evaluation.dataset, evaluation.video, '') + s for s in frame_name_list]
frame_name_list.sort()
num_frames = np.size(frame_name_list)

with Image.open(frame_name_list[0]) as img:
    frame_sz = np.asarray(img.size)
    frame_sz[1], frame_sz[0] = frame_sz[0], frame_sz[1]

In [None]:
# read the initialization from ground truth
gt_file = os.path.join(video_folder, 'groundtruth.txt')
gt = np.genfromtxt(gt_file, delimiter=',')
assert len(gt) == len(frame_name_list), ('Number of frames and number of GT lines should be equal.')
# bbox is in format <cx,cy,w,h>
init_bbox = region_to_bbox(gt[evaluation.start_frame])
# stores tracker's output for evaluation
bboxes = np.zeros((num_frames,4))

In [None]:
scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num/2), np.ceil(hp.scale_num/2), hp.scale_num)
# cosine window to penalize large displacements
hann_1d = np.expand_dims(np.hanning(design.score_sz*hp.response_up), axis=0)
penalty = np.transpose(hann_1d) * hann_1d
penalty = penalty / np.sum(penalty)

In [None]:
# tracker's state initializations
pos = [init_bbox[1],init_bbox[0]]
target_sz = [init_bbox[3],init_bbox[2]]
context = design.context*sum(target_sz)
crop_sz = target_sz + context
z_sz = np.sqrt(np.prod(crop_sz))
x_sz = design.search_sz/design.exemplar_sz * z_sz
scaled_exemplar = z_sz * scale_factors
scaled_search_area = x_sz * scale_factors

In [None]:
# thresholds to saturate patches shrinking/growing
min_z = hp.scale_min * z_sz
max_z = hp.scale_max * z_sz
min_x = hp.scale_min * x_sz
max_x = hp.scale_max * x_sz

In [None]:
#### DEFINE PLACEHOLDERS ####
pos_x_ph = tf.placeholder(tf.float64)
pos_y_ph = tf.placeholder(tf.float64)
z_sz_ph = tf.placeholder(tf.float64)
x_sz0_ph = tf.placeholder(tf.float64)
x_sz1_ph = tf.placeholder(tf.float64)
x_sz2_ph = tf.placeholder(tf.float64)

In [None]:
#### BUILD THE GRAPH #####

# Make a queue of file names
filename_queue = tf.train.string_input_producer(frame_name_list, shuffle=False, capacity=num_frames)

image_reader = tf.WholeFileReader()
# Read a whole file from the queue
_, image_file = image_reader.read(filename_queue)

# Decode the image as a JPEG file, this will turn it into a Tensor
image = tf.cast(tf.image.decode_jpeg(image_file), tf.int32)

# used to pad the crops
avg_chan = tf.cast(tf.reduce_mean(image, axis=(0,1)), tf.int32)

# pad with avg color if necessary
frame_padded_z, npad_z = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, z_sz_ph, avg_chan);

# extract tensor of z_crops (all identical)
z_crops = extract_crops_z(frame_padded_z, npad_z, pos_x_ph, pos_y_ph, z_sz_ph, design.exemplar_sz)

frame_padded_x, npad_x = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, x_sz2_ph, avg_chan);

# extract tensor of x_crops (3 scales)
x_crops = extract_crops_x(frame_padded_x, npad_x, pos_x_ph, pos_y_ph, x_sz0_ph, x_sz1_ph, x_sz2_ph, design.search_sz)

In [None]:
#### START TRACKING WITHIN A TF SESSION ####

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    # Coordinate the loading of image files.
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    
    image_, z_crops_ = sess.run([image, z_crops], feed_dict={
                                    pos_x_ph:pos[1],
                                    pos_y_ph:pos[0],
                                    z_sz_ph:scaled_exemplar[1]})
    
    if run.visualization:
        show_frame(image_, 1)
        
    if run.debug:
        show_crops(z_crops_, 2)        

    # Get an image tensor from the queue
    for i in xrange(evaluation.start_frame, 5):
        x_crops_ = sess.run([x_crops], feed_dict={
                                pos_x_ph:pos[1],
                                pos_y_ph:pos[0],
                                x_sz0_ph:scaled_search_area[0],
                                x_sz1_ph:scaled_search_area[1],
                                x_sz2_ph:scaled_search_area[2]})
        if run.debug:
            show_crops(np.squeeze(x_crops_), 3)
        
        
    # Finish off the filename queue coordinator.
    coord.request_stop()
    coord.join(threads) 

In [None]:
plt.close('all')