In [1]:
"""
Main script for FCNT tracker. 
"""

# Import custom class and functions
from inputproducer import LiveInput
from tracker import TrackerContour
from vgg16 import Vgg16
from sgnet import GNet, SNet
from utils import img_with_bbox, IOU_eval, select_fms

import cv2
import numpy as np 
import tensorflow as tf
import matplotlib.pylab as plt

from scipy.misc import imresize
from subprocess import call
import sys
import os
import time

tf.app.flags.DEFINE_integer('iter_epoch_sg', 7,
                          """Number of epoches for trainning"""
                          """SGnet works""")
tf.app.flags.DEFINE_integer('batch_size', 45,
                          """Batch size for SGNet trainning"""
                          """SGnet works""")
tf.app.flags.DEFINE_integer('n_samples_per_batch', 5000,
                          """Number of samples per batch for trainning"""
                          """SGnet works""")
tf.app.flags.DEFINE_integer('iter_max', 1349,
							"""Max iter times through imgs""")
tf.app.flags.DEFINE_integer('sel_num', 354,
                          """Number of feature maps selected.""")
tf.app.flags.DEFINE_string('model_name', 'first_LiveFaceXL',
						"""true for train, false for eval""")
FLAGS = tf.app.flags.FLAGS

## Define varies pathes
DATA_ROOT = 'data/Car1'
PRE_ROOT = os.path.join(DATA_ROOT, 'img_loc')
IMG_PATH = os.path.join(DATA_ROOT, 'img')
GT_PATH = os.path.join(DATA_ROOT, 'groundtruth_rect.txt')
VGG_WEIGHTS_PATH = 'vgg16_weights.npz'


#if not os.path.isdir(PRE_ROOT):
#    os.mkdir(PRE_ROOT)


TB_SUMMARY = os.path.join('tb_summary', FLAGS.model_name)
if not os.path.isdir('tb_summary'):
    os.mkdir('tb_summary')
if not os.path.isdir(TB_SUMMARY):
    os.mkdir(TB_SUMMARY)

CKPT_PATH = 'checkpoint'
if not os.path.isdir(CKPT_PATH):
    os.mkdir(CKPT_PATH)

model_name = FLAGS.model_name+'.ckpt'
CKPT_MODEL = os.path.join(CKPT_PATH, model_name)


def init_vgg(roi_t0, predict=True):
    """
    Initialize a tf.Session and a vgg16 graph. Followed
    by forwarding the vgg net once to predict top5 class labels
    for image generated in the first frame.

    Args:
        roi_t0: np.ndarray with shape (28x28x3), extracted roi in the first frame.
    Returns:
        sess: tf.Session object.
        vgg: Vgg16 class instance.
    """
    print('Classify it with a pre-trained Vgg16 model.')
    t_start = time.time()
    sess = tf.Session()
    sess.run(tf.initialize_all_variables())
    vgg = Vgg16(VGG_WEIGHTS_PATH, sess)
    if predict:
        vgg.print_prob(roi_t0, sess)
    print('Forwarding the vgg net cost : %.2f s'%(time.time() - t_start))
    return sess, vgg

def gen_sel_maps(sess, roi, vgg, idx_c4, idx_c5):
    """Returns selected c4 and c5 maps"""
    if len(roi.shape) == 3: roi = [roi]
    fd = {vgg.imgs : roi}
    c4_arr, c5_arr = sess.run([vgg.conv4_3_norm, vgg.conv5_3_norm], feed_dict=fd)
    c4_maps = c4_arr[...,idx_c4]
    c5_maps = c5_arr[...,idx_c5]
    return c4_maps, c5_maps


def train_SGNets(sess, img, gt, vgg, snet, gnet, inputProducer, idx_c4, idx_c5):
    """
    Train SGnets' variables by minimizing a composite L2 regression losses.

    Args:
        sess: tf.Session object.
        vgg: Vgg16 class instance.
        snet: SNet class instance.
        gnet:  GNet class instance.
        inputProducer: InputProducer class instance.
    """
    gnet.params['wd'] = 0.5
    gloss, sloss = gnet.loss(), snet.loss()
    loss = gloss  + sloss
    tf.scalar_summary('loss', loss)
    writer = tf.train.SummaryWriter(TB_SUMMARY, sess.graph)
    
    vars_train = gnet.variables + snet.variables

    # Backprop using SGD and updates vgg variables and sgnets variables
    global_step = tf.Variable(0, trainable=False)
    lr_exp = tf.train.exponential_decay(
            0.25, # Initial learning rate 
            global_step, 
            1000, # Decay steps 
            0.8, # Decay rate 
            name='sg_lr')

    tf.scalar_summary('Learning rate', lr_exp)
    optimizer = tf.train.GradientDescentOptimizer(lr_exp)
    train_op = optimizer.minimize(loss, var_list= vars_train, global_step=global_step)
    merged = tf.merge_all_summaries()
    
    print('Generating batches from img size:%s  for trainning.'%str(img.shape))
    sample_batches, target_batches = inputProducer.gen_batches(img, gt, n_samples=FLAGS.n_samples_per_batch, batch_sz=FLAGS.batch_size, pos_ratio=0.5, scale_factors=np.arange(0.5, 5., 0.2)) #np.array([1]))#
    print('Start training the SGNets........ for %s epoches'%FLAGS.iter_epoch_sg)
    saver = tf.train.Saver()
    step = 1
    loss_list = []
    for ep in range(FLAGS.iter_epoch_sg):
        print('Total batches in each epoch: ', len(sample_batches))
        for roi, target in zip(sample_batches, target_batches):
            #roi[roi>0] = 1 # neglect gaussian..set to 1 for target arear
            
            t = time.time()
            c4_maps, c5_maps = gen_sel_maps(sess, roi, vgg, idx_c4, idx_c5)
            
            fd = {gnet.input_maps: c5_maps, gnet.gt_M: target, 
                  snet.input_maps: c4_maps, snet.gt_M: target}
            
            # Initialization 
            if step == 1:
                loss_g = 10
                init_s = 0
                while loss_g > 1.2:
                    init_s += 1
                    sess.run(tf.initialize_variables(gnet.variables))
                    loss_g = sess.run(gloss, feed_dict=fd)
                    print('Initial Gnet Loss: ', loss_g, 'In steps: ', init_s)
                sess.run(tf.initialize_variables(snet.variables + [global_step]))
                
            
            pre_M_g, l, _, lr = sess.run([gnet.pre_M, loss, train_op, lr_exp], feed_dict=fd)
            
            loss_list += [l]
            if l <= 0.1:
                print('break learning!')
                break
            if step % 20 == 0:
                
                loss_ac = np.diff(np.diff(loss_list[-19:]))
                loss_ac_summary = tf.scalar_summary('Loss acceleration', loss_ac.mean())
                
                
                summary_img_g = tf.image_summary('pre_M', 
                                                 np.repeat(pre_M_g[...,np.newaxis], 3, axis=-1), name='GMap')

                summary, img_summary_g, ac_loss_summary = sess.run([merged, summary_img_g, loss_ac_summary], feed_dict=fd)

                writer.add_summary(summary, global_step=step)
                writer.add_summary(img_summary_g, global_step=step)
                writer.add_summary(ac_loss_summary, global_step=step)
                
                loss_std = np.std(loss_list[-19:])
                if loss_std <= 0.007:
                    
                    print('Stop learning??! Last 10 batches Loss Std: ', loss_std)
                    #break

            #if step % 20 == 0:
                print('Epoch: ', ep+1, 'Step: ', (ep+1)*step, 'Loss : %.2f'%l, \
                    'Speed: %.2f second/batch'%(time.time()-t), 'Lr: ', lr)
                #saver.save(sess, CKPT_MODEL)
            step += 1



print('Reading the first image...')
t_start = time.time()
## Instantiate inputProducer and retrive the first img
# with associated ground truth. 
inputProducer = LiveInput()




Reading the first image...


In [2]:
# import the necessary packages
#import argparse
import cv2

# initialize the list of reference points and boolean indicating
# whether cropping is being performed or not
refPt = []
cropping = False

def click_and_crop(event, x, y, flags, param):
    # grab references to the global variables
    global refPt, cropping

    # if the left mouse button was clicked, record the starting
    # (x, y) coordinates and indicate that cropping is being
    # performed
    if event == cv2.EVENT_LBUTTONDOWN:
        refPt = [(x, y)]
        #cropping = True

    # check to see if the left mouse button was released
    elif event == cv2.EVENT_LBUTTONUP:
        # record the ending (x, y) coordinates and indicate that
        # the cropping operation is finished
        refPt.append((x, y))
        #cropping = False

        # draw a rectangle around the region of interest
        #cv2.rectangle(image, refPt[0], refPt[1], (0, 255, 0), 2)
        #cv2.imshow("image", image)


In [3]:
def refPt_2_gt(refPt):
    p1, p2 = refPt
    x1, y1 = p1
    x2, y2 = p2
    w = x2 - x1
    h = y2 - y1
    return (x1, y1, w, h)

In [4]:
tracker = TrackerContour()
inputProducer.roi_params['roi_scale'] = 2.5


cap = cv2.VideoCapture(0)
cv2.namedWindow("image")
cv2.setMouseCallback("image", click_and_crop)


saver = tf.train.Saver()
saved_ckpt = os.path.join('checkpoint', FLAGS.model_name.split('_')[-1]+'.ckpt')
if os.path.exists(saved_ckpt):
    print('Found saved model %s, restoring! '%saved_ckpt)
    saver.restore(sess, saved_ckpt)
    TrackReady = True
else: 
    TrackReady = False
    
PosReady = False

while(cap.isOpened()):
    # Capture frame-by-frame
    ret, image = cap.read()

    # load the image, clone it, and setup the mouse callback function
    clone = image.copy()
    
    key = cv2.waitKey(1) & 0xFF
    # keep looping until the 'q' key is pressed

    

    # if there are two reference points, then crop the region of interest
    # from teh image and display it
    if len(refPt) == 2 and key==ord("c"):      
        roi = clone[refPt[0][1]:refPt[1][1], refPt[0][0]:refPt[1][0]]
        cv2.imshow("CroppedROI", roi)

        gt = refPt_2_gt(refPt)
        img = image
        print(gt, 'gt in first!')
        
        inputProducer.save_fist_roi_mean(img, gt)


    # train
    
    if key == ord('t') and not TrackReady:
        roi_t0, _, rz_factor = inputProducer.extract_roi(img, gt)
        
        # Predicts the first img.
        sess, vgg = init_vgg(roi_t0)
        fd = {vgg.imgs: [roi_t0]}
        gt_M = inputProducer.gen_mask((28,28)) # rank2 array


        ## At t=0. Train S and G Nets 
        # Instainate SGNets with conv tensors and training.
        # 1. feature maps selection
        # 2. Train G and S networks.
        idx_c4 = select_fms(sess, vgg.conv4_3_norm, gt, rz_factor, fd, FLAGS.sel_num)
        idx_c5 = select_fms(sess, vgg.conv5_3_norm, gt, rz_factor, fd, FLAGS.sel_num)
        snet = SNet('SNet', FLAGS.sel_num)
        gnet = GNet('GNet', FLAGS.sel_num)
        train_SGNets(sess, img, gt, vgg, snet, gnet, inputProducer, idx_c4, idx_c5)
        saver.save(sess, saved_ckpt)
        
        TrackReady = True
    
    
    # Records the first position
    if key == ord('s'):
        gt_last = refPt_2_gt(refPt)
        print(gt_last, 'gt in start~!')
        PosReady = True
        
    # Start tracking
    if PosReady and TrackReady:
        img = image.copy()
        #img = inputProducer.Ajust_brighteness(img, gt_last)
        roi, _, rz_factor = inputProducer.extract_roi(img, gt_last)

        # @inputproducer, remove low level pixel
        noise_value = 10#np.argmax(hist)*0.1
        roi[roi<noise_value] = roi.mean()
        
        ## Perform Target localiation predicted by GNet
        # Get heat map predicted by GNet
        c4_maps, c5_maps = gen_sel_maps(sess, roi, vgg, idx_c4, idx_c5)
        fd = {gnet.input_maps: c5_maps, snet.input_maps: c4_maps}
        pre_M_g, pre_M_s = sess.run([gnet.pre_M, snet.pre_M], feed_dict=fd)

        pre_M = tracker.preporcess_heatmaps(pre_M_g, pre_M_s, resize=(224,224))
        pre_loc = tracker.predict_location(pre_M,gt_last,rz_factor,threshold=np.arange(0.3, 0.9, 0.05))
        
        gt_last = pre_loc
        x,y,w,h = pre_loc
        print('pre_loc', pre_loc)
        cv2.rectangle(image,(x,y),(x+w,y+h),(225,0,0),2)
        font = cv2.FONT_HERSHEY_SIMPLEX
        cv2.putText(image, 'conf score: %s'%1,(5,20), font, 0.6,(255,0,0),1,cv2.LINE_AA)
        
        cv2.imshow("pre_M_g", pre_M_g)
        cv2.imshow("pre_M_s", pre_M_s)
        cv2.imshow("pre_M_g", pre_M_g)
        cv2.imshow("ROI", roi)
        # Finetune SNet
        
        print('Tracking done in step: %s'%tracker.step)
        
        
    cv2.imshow("image", image)

# close all open windows
cv2.destroyAllWindows()

(214, 281, 110, 158) gt in first!


  roi = convas[cy-half:cy+half, cx-half:cx+half, :]


Classify it with a pre-trained Vgg16 model.
barbershop 0.256563
cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM 0.0426335
cellular telephone, cellular phone, cellphone, cell, mobile phone 0.0425359
web site, website, internet site, site 0.0375001
barber chair 0.0360401
Forwarding the vgg net cost : 4.21 s


  conf_i = roi[c-h_half:c+h_half, c-w_half:c+w_half].sum()


Not found saved model checkpoint/LiveFaceXL.ckpt. Trainning! 
Generating batches from img size:(480, 640, 3)  for trainning.
Start training the SGNets........ for 7 epoches
Total batches in each epoch:  223
Initial Gnet Loss:  2.18148 In steps:  1
Initial Gnet Loss:  3.31207 In steps:  2
Initial Gnet Loss:  3.09164 In steps:  3
Initial Gnet Loss:  2.27267 In steps:  4
Initial Gnet Loss:  11.2435 In steps:  5
Initial Gnet Loss:  3.34961 In steps:  6
Initial Gnet Loss:  2.85988 In steps:  7
Initial Gnet Loss:  1.68124 In steps:  8
Initial Gnet Loss:  1.94571 In steps:  9
Initial Gnet Loss:  10.2861 In steps:  10
Initial Gnet Loss:  4.28106 In steps:  11
Initial Gnet Loss:  2.84455 In steps:  12
Initial Gnet Loss:  9.34216 In steps:  13
Initial Gnet Loss:  2.23038 In steps:  14
Initial Gnet Loss:  8.6919 In steps:  15
Initial Gnet Loss:  10.1598 In steps:  16
Initial Gnet Loss:  2.49069 In steps:  17
Initial Gnet Loss:  1.80017 In steps:  18
Initial Gnet Loss:  4.79867 In steps:  19
Initi

NotFoundError: Tensor name "vgg_1/conv3_3/weights" not found in checkpoint files checkpoint/LiveFaceXL.ckpt
	 [[Node: save_2/restore_slice_62 = RestoreSlice[dt=DT_FLOAT, preferred_shard=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_save_2/Const_0, save_2/restore_slice_62/tensor_name, save_2/restore_slice_62/shape_and_slice)]]
	 [[Node: save_2/restore_slice_76/_109 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/gpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=1, tensor_name="edge_152_save_2/restore_slice_76", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/gpu:0"]()]]

Caused by op 'save_2/restore_slice_62', defined at:
  File "/home/makehave/anaconda3/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/makehave/anaconda3/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/traitlets/config/application.py", line 653, in launch_instance
    app.start()
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/zmq/eventloop/ioloop.py", line 162, in start
    super(ZMQIOLoop, self).start()
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/tornado/ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-4-1a897d020642>", line 57, in <module>
    saver = tf.train.Saver()
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 986, in __init__
    self.build()
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1015, in build
    restore_sequentially=self._restore_sequentially)
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 620, in build
    restore_sequentially, reshape)
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 357, in _AddRestoreOps
    tensors = self.restore_op(filename_tensor, saveable, preferred_shard)
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 270, in restore_op
    preferred_shard=preferred_shard))
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/tensorflow/python/ops/io_ops.py", line 204, in _restore_slice
    preferred_shard, name=name)
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/tensorflow/python/ops/gen_io_ops.py", line 359, in _restore_slice
    preferred_shard=preferred_shard, name=name)
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 749, in apply_op
    op_def=op_def)
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2380, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home/makehave/anaconda3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1298, in __init__
    self._traceback = _extract_stack()

NotFoundError (see above for traceback): Tensor name "vgg_1/conv3_3/weights" not found in checkpoint files checkpoint/LiveFaceXL.ckpt
	 [[Node: save_2/restore_slice_62 = RestoreSlice[dt=DT_FLOAT, preferred_shard=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_save_2/Const_0, save_2/restore_slice_62/tensor_name, save_2/restore_slice_62/shape_and_slice)]]
	 [[Node: save_2/restore_slice_76/_109 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/gpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=1, tensor_name="edge_152_save_2/restore_slice_76", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/gpu:0"]()]]


In [None]:
cv2.imshow("pre_M", pre_M)

In [None]:
str(img.shape)

In [None]:
refPt_2_gt(refPt)

In [None]:
image.shape