In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [2]:
"""
Main script for FCNT tracker. 
"""

# Import custom class and functions
from inputproducer import InputProducer
from tracker import TrackerVanilla
from vgg16 import Vgg16
from sgnet import GNet, SNet
from utils import calcEntropy, img_with_bbox, IOU_eval, select_fms, gauss2d, compare_images

import cv2
import numpy as np 
import tensorflow as tf
import matplotlib.pylab as plt

from skimage.filters.rank import entropy
from scipy.misc import imresize
from subprocess import call
import sys
import os
import time

tf.app.flags.DEFINE_integer('iter_epoch_sg', 8,
                          """Number of epoches for trainning"""
                          """SGnet works""")
tf.app.flags.DEFINE_integer('batch_size', 25,
                          """Batch size for SGNet trainning"""
                          """SGnet works""")
tf.app.flags.DEFINE_integer('n_samples_per_batch', 5000,
                          """Number of samples per batch for trainning"""
                          """SGnet works""")
tf.app.flags.DEFINE_integer('iter_max', 1349,
							"""Max iter times through imgs""")
tf.app.flags.DEFINE_integer('sel_num', 354,
                          """Number of feature maps selected.""")
tf.app.flags.DEFINE_string('model_name', 'contour_carsC4-Norandom-wd05-largePos-largetrain-sgnet-smallbz',
						"""true for train, false for eval""")
FLAGS = tf.app.flags.FLAGS

## Define varies pathes
DATA_ROOT = 'data/Car1'
PRE_ROOT = os.path.join(DATA_ROOT, 'img_loc')
IMG_PATH = os.path.join(DATA_ROOT, 'img')
GT_PATH = os.path.join(DATA_ROOT, 'groundtruth_rect.txt')
VGG_WEIGHTS_PATH = 'vgg16_weights.npz'

if not os.path.isdir(PRE_ROOT):
    os.mkdir(PRE_ROOT)


TB_SUMMARY = os.path.join('tb_summary', FLAGS.model_name)
if not os.path.isdir('tb_summary'):
    os.mkdir('tb_summary')
if not os.path.isdir(TB_SUMMARY):
    os.mkdir(TB_SUMMARY)

CKPT_PATH = 'checkpoint'
if not os.path.isdir(CKPT_PATH):
    os.mkdir(CKPT_PATH)

model_name = FLAGS.model_name+'.ckpt'
CKPT_MODEL = os.path.join(CKPT_PATH, model_name)

def init_vgg(roi_t0):
    """
    Initialize a tf.Session and a vgg16 graph. Followed
    by forwarding the vgg net once to predict top5 class labels
    for image generated in the first frame.

    Args:
        roi_t0: np.ndarray with shape (28x28x3), extracted roi in the first frame.
    Returns:
        sess: tf.Session object.
        vgg: Vgg16 class instance.
    """
    print('Classify it with a pre-trained Vgg16 model.')
    t_start = time.time()
    sess = tf.Session()
    sess.run(tf.initialize_all_variables())
    vgg = Vgg16(VGG_WEIGHTS_PATH, sess)
    #vgg.print_prob(roi_t0, sess)
    print('Forwarding the vgg net cost : %.2f s'%(time.time() - t_start))
    return sess, vgg

def gen_sel_maps(sess, roi, vgg, idx_c4, idx_c5):
    """Returns selected c4 and c5 maps"""
    if len(roi.shape) == 3: roi = [roi]
    fd = {vgg.imgs : roi}
    c4_arr, c5_arr = sess.run([vgg.conv4_3_norm, vgg.conv5_3_norm], feed_dict=fd)
    c4_maps = c4_arr[...,idx_c4]
    c5_maps = c5_arr[...,idx_c5]
    return c4_maps, c5_maps


def train_SGNets(sess, img, gt, vgg, snet, gnet, inputProducer, idx_c4, idx_c5):
    """
    Train SGnets' variables by minimizing a composite L2 regression losses.

    Args:
        sess: tf.Session object.
        vgg: Vgg16 class instance.
        snet: SNet class instance.
        gnet:  GNet class instance.
        inputProducer: InputProducer class instance.
    """
    snet.params['wd'] = 0.05
    
    loss = snet.loss + gnet.loss
    tf.scalar_summary('loss', loss)
    writer = tf.train.SummaryWriter(TB_SUMMARY, sess.graph)
    
    vars_train = snet.variables + gnet.variables

    # Backprop using SGD and updates vgg variables and sgnets variables
    global_step = tf.Variable(0, trainable=False)
    lr_exp = tf.train.exponential_decay(
            0.25, # Initial learning rate 
            global_step, 
            1000, # Decay steps 
            0.8, # Decay rate 
            name='sg_lr')

    tf.scalar_summary('Learning rate', lr_exp)
    optimizer = tf.train.GradientDescentOptimizer(lr_exp)
    train_op = optimizer.minimize(loss, var_list= vars_train, global_step=global_step)
    merged = tf.merge_all_summaries()

    sample_batches, target_batches = inputProducer.gen_batches(img, gt, n_samples=FLAGS.n_samples_per_batch,\
                                                               batch_sz=FLAGS.batch_size, pos_ratio=0.9, \
                                                               scale_factors=np.arange(1, 3., 0.2),\
                                                               random_brightness=False) 
    print('Start training the SGNets........ for %s epoches'%FLAGS.iter_epoch_sg)
    saver = tf.train.Saver()
    step = 1
    loss_list = []
    for ep in range(FLAGS.iter_epoch_sg):
        print('Total batches in each epoch: ', len(sample_batches))
        for roi, target in zip(sample_batches, target_batches):
            #roi[roi>0] = 1 # neglect gaussian..set to 1 for target arear
            
            t = time.time()
            c4_maps, c5_maps = gen_sel_maps(sess, roi, vgg, idx_c4, idx_c5)
            
            fd = {gnet.input_maps: c5_maps, gnet.gt_M: target, snet.input_maps: c4_maps, snet.gt_M: target}
            
            # Initialization 
            if step == 1:
                loss_g = 10
                init_s = 0
                while loss_g > 1.5:
                    init_s += 1
                    sess.run(tf.initialize_variables(gnet.variables))
                    loss_g = sess.run(gnet.loss, feed_dict=fd)
                    print('Initial Gnet Loss: ', loss_g, 'In steps: ', init_s)
                sess.run(tf.initialize_variables(snet.variables + [global_step]))
                
            
            l, _, lr = sess.run([loss, train_op, lr_exp], feed_dict=fd)
            
            loss_list += [l]
            if l <= 0.1:
                print('break learning!')
                break
            if step % 20 == 0:
                
                loss_ac = np.diff(np.diff(loss_list[-19:]))
                loss_ac_summary = tf.scalar_summary('Loss acceleration', loss_ac.mean())
                
                
       
                summary, ac_loss_summary = sess.run([merged, loss_ac_summary], feed_dict=fd)

                writer.add_summary(summary, global_step=step)
                writer.add_summary(ac_loss_summary, global_step=step)
                
                loss_std = np.std(loss_list[-19:])
                if loss_std <= 0.007:
                    
                    print('Stop learning! Last 10 batches Loss Std: ', loss_std)
                    break

            #if step % 20 == 0:
                print('Epoch: ', ep+1, 'Step: ', (ep+1)*step, 'Loss : %.2f'%l, \
                    'Speed: %.2f second/batch'%(time.time()-t), 'Lr: ', lr)
                #saver.save(sess, CKPT_MODEL)
            step += 1



print('Reading the first image...')
t_start = time.time()
## Instantiate inputProducer and retrive the first img
# with associated ground truth. 
inputProducer = InputProducer(IMG_PATH, GT_PATH)
img, gt, s  = next(inputProducer.gen_img)
roi_t0, _, rz_factor = inputProducer.extract_roi(img, gt)



Reading the first image...


In [3]:
# Predicts the first img.
sess, vgg = init_vgg(roi_t0)
fd = {vgg.imgs: [roi_t0]}
gt_M = inputProducer.gen_mask((28,28)) # rank2 array


## At t=0. Train S and G Nets 
# Instainate SGNets with conv tensors and training.
# 1. feature maps selection
# 2. Train G and S networks.
idx_c4 = select_fms(sess, vgg.conv4_3_norm, gt, rz_factor, fd, FLAGS.sel_num)
idx_c5 = select_fms(sess, vgg.conv5_3_norm, gt, rz_factor, fd, FLAGS.sel_num)
snet = SNet('SNet', FLAGS.sel_num)
gnet = GNet('GNet', FLAGS.sel_num)


saver = tf.train.Saver()
saved_ckpt = os.path.join('checkpoint', FLAGS.model_name.split('_')[-1]+'.ckpt')
if os.path.exists(saved_ckpt):
    print('Found saved model %s, restoring! '%saved_ckpt)
    saver.restore(sess, saved_ckpt)
else:
    print('Not found saved model %s. Trainning! '%saved_ckpt)
    train_SGNets(sess, img, gt, vgg, snet, gnet, inputProducer, idx_c4, idx_c5)
    saver.save(sess, saved_ckpt)





Classify it with a pre-trained Vgg16 model.
Forwarding the vgg net cost : 5.00 s


  conf_i = roi[c-h_half:c+h_half, c-w_half:c+w_half].sum()


Found saved model checkpoint/carsC4-Norandom-wd05-largePos-largetrain-sgnet-smallbz.ckpt, restoring! 


In [4]:
import skimage.io
import skimage.transform
import cv2
show = skimage.io.imshow
%matplotlib inline

In [5]:
## Visual how does GSnet performs.

inputProducer = InputProducer(IMG_PATH, GT_PATH)
inputProducer.roi_params['roi_scale'] = 1.5

for i in range(len(inputProducer.imgs_path_list)-1):
    
    img, gt_cur, s  = next(inputProducer.gen_img)
    img = img.astype(np.float)
  

    ## Crop a rectangle ROI region centered at last target location.
    roi, _, rz_factor = inputProducer.extract_roi(img, gt_cur)
    noise_value = 50#np.argmax(hist)*0.1
    roi[roi<noise_value] = 50
    
    c4_maps, c5_maps = gen_sel_maps(sess, roi, vgg, idx_c4, idx_c5)
    fd = {gnet.input_maps: c5_maps, snet.input_maps: c4_maps}

    pre_M_g, pre_M_s = sess.run([gnet.pre_M, snet.pre_M], feed_dict=fd)
    
    pre_M_g = imresize(pre_M_g, (224,224)).astype(np.float32)
    pre_M_s = imresize(pre_M_s, (224,224)).astype(np.float32)
    pre_M = pre_M_g + pre_M_s
    
    size = pre_M_s.shape[0]
    g = gauss2d((size,size), sigma=0.05)
    g /= g.max()

    _,_,w,h = gt_cur
    half, ratio = size/0.5, 1
    #gaus_filter[half-(ratio*h): half+(ratio*h), half-(ratio*w):half+(ratio*w)] = 1
    pre_M_filtered = pre_M_s * g

    """
    cv2.imshow('preMs_after_filtering', pre_M_filtered/pre_M_filtered.max())
    
    cv2.imshow('img', img/img.max())
    cv2.imshow('roi', roi)
    cv2.imshow('pre_M_g', pre_M_g/pre_M_g.max())
    cv2.imshow('pre_M_s', pre_M_s/pre_M_s.max())
    cv2.imshow('pre_M', pre_M/pre_M.max())
    cv2.waitKey(1)
    if i > 100:
        time.sleep(0.5)
    """

  roi = convas[cy-half:cy+half, cx-half:cx+half, :]


In [6]:
cv2.destroyAllWindows()

In [8]:
## Instainate a tracker object, set apoproaite initial parameters.
tracker = TrackerVanilla(gt)
inputProducer.roi_params['roi_scale'] = 1.5

print("Total time cost for initialization : %.2f s"%(time.time() - t_start))


# Get first c4_maps for img in first frame
c4_maps_0, _ = gen_sel_maps(sess, roi_t0, vgg, idx_c4, idx_c5)


# Iter imgs

inputProducer = InputProducer(IMG_PATH, GT_PATH)
for _ in range(1):
    img, gt, s  = next(inputProducer.gen_img)
x,y,w,h = gt
roi_mean = img[y:y+h, x:x+w].mean()

gt_last = gt

gt_list = []
pre_M_list = []
roi_list = []
res_list = []q
diff_list = []
m_list, z_list =[], []
font = cv2.FONT_HERSHEY_SIMPLEX

targets_records = [gt_M]
c4_maps_records = c4_maps_0
c4_maps_gt, targets_gt = np.repeat(c4_maps_0,2,axis=0), \
            np.repeat(np.array([gt_M]),2, axis=0)
%timeit
def compute_score(loc_list, last_pre_loc, arear_list):
    scores_dis = []
    x0, y0, w0, h0 = last_pre_loc
    for loc in loc_list:
        x, y, w, h = loc
        scores_dis += [abs(x-x0)+ abs(y-y0)+ abs(w-w0)+ abs(h-h0)]
    scores_dis = np.array(scores_dis) / max(scores_dis)
    arear_list = np.array(arear_list)/ max(arear_list)
    scores = scores_dis / (arear_list*0.51)
    best_idx = np.argmin(scores)
    return loc_list[best_idx], best_idx, scores[best_idx]


conf_scores = []
total_arear = 224**2

for i in range(len(inputProducer.imgs_path_list)-1):


    t_enter = time.time()
    # Gnerates next frame infos
    
    img, gt_cur, s  = next(inputProducer.gen_img)
    img = img.astype(np.float)

    #""
    x,y,w,h = gt_last  
    roi_cur_mean = img[y:y+h, x:x+w].mean()
    img[y:y+h, x:x+w] += np.ones([h, w, 3])*int(roi_mean-roi_cur_mean)
    img[img>255] = 255       

    #print(gt_last)
    ## Crop a rectangle ROI region centered at last target location.
    roi, _, rz_factor = inputProducer.extract_roi(img, gt_last)
    
    hist = np.bincount(roi.ravel(),minlength=256)
    
    noise_value = 50#np.argmax(hist)*0.1
    
    roi[roi<noise_value] = 50 

    
    entropy_roi = calcEntropy(roi)

    ## Perform Target localiation predicted by GNet
    # Get heat map predicted by GNet
    c4_maps, c5_maps = gen_sel_maps(sess, roi, vgg, idx_c4, idx_c5)
    fd = {gnet.input_maps: c5_maps, snet.input_maps: c4_maps}

    pre_M_g, pre_M_s = sess.run([gnet.pre_M, snet.pre_M], feed_dict=fd)
    
    pre_M_g = imresize(pre_M_g, (224,224)).astype(np.float32)
    pre_M_s = imresize(pre_M_s, (224,224)).astype(np.float32)

   

    ###########
    #pre_M = (pre_M_g+pre_M_s)/(pre_M_g+pre_M_s).max()   
    pre_M = pre_M_s/pre_M_s.max()
    
    #cv2.imshow('pre_M', pre_M)
        
        
    
    if i == 0:
        pre_M[pre_M<(0.7)] = 0     
        cvuint8 = cv2.convertScaleAbs(pre_M)
        img2, contours, hierarchy = cv2.findContours(cvuint8, 1, 2)
        assert len(contours)>=1

        arear_list, bbox_list, COM_list = [], [], []
        for cnt in contours:

            x,y,w,h = cv2.boundingRect(cnt)
            bbox_list += [(x,y,w,h)]
            arear_list += [cv2.contourArea(cnt)]

            M = cv2.moments(cnt)
            cx = 22# int(M['m10']/M['m00'])
            cy = 33#int(M['m01']/M['m00'])
            COM_list += [(cx, cy)]
        best_idx = np.argmax(arear_list)
        pre_loc_roi = bbox_list[best_idx]
        com = COM_list[best_idx]
        last_arear = arear_list[best_idx]
        last_pre_loc_roi = pre_loc_roi
        conf_score = 1
    else:
        pre_M_filtered = pre_M.copy()
        
        #"""
        cv2.imshow('preM_before_filtering', pre_M_filtered)

        #pre_M_filtered = entropy(pre_M_filtered, np.ones(last_pre_loc_roi[2:]), shift_x=True, shift_y=True)
        """
        #size = pre_M_s.shape[0]
        g = gauss2d((224,224), sigma=2)
        g /= g.max()

        #_,_,w,h = gt_cur
        #half, ratio = size/0.5, 1
        #gaus_filter[half-(ratio*h): half+(ratio*h), half-(ratio*w):half+(ratio*w)] = 1
        pre_M_filtered = pre_M_s * g
        pre_M_filtered /= pre_M_filtered.max()
                
   
        cv2.imshow('preM_after_filtering', pre_M_filtered)       
        """

        arear_list, bbox_list, COM_list, displace_list = [], [], [], []
        for shrehold in np.arange(0.3, 0.9, 0.05):
            pre_M_tmp = pre_M_filtered.copy()
            pre_M_tmp[pre_M_tmp<shrehold] = 0
            #show(pre_M)
            #plt.show()
            cvuint8 = cv2.convertScaleAbs(pre_M_tmp)
            img2, contours, hierarchy = cv2.findContours(cvuint8, 1, 2)
            if len(contours) < 1: continue

            for cnt in contours:
                x,y,w,h = cv2.boundingRect(cnt)
                if x+w >= 224 or y+h >224: continue
                if w/rz_factor>224 or h/rz_factor>224: continue
                bbox_list += [(x,y,w,h)]
                arear_list += [cv2.contourArea(cnt)]


        pre_loc_roi, idx, conf_score = compute_score(bbox_list, last_pre_loc_roi, arear_list)
        conf_scores += [conf_score]
        last_pre_loc_roi = pre_loc_roi
        
        entropy_prem = calcEntropy(pre_M)
        x,y,w,h = pre_loc_roi
        cv2.rectangle(pre_M,(x,y),(x+w,y+h),(225,0,0),2)
        cv2.putText(pre_M, 'entropy: %.2f'%(entropy_prem),(5,20), font, 0.4,(255,0,0),1,cv2.LINE_AA)

        cv2.imshow('pre_M_loc', pre_M/pre_M.max())
    
        
        
    
        



    # centeroid displacement
    x,y,w,h = pre_loc_roi
    dx, dy = (x+(w/2)-112)/rz_factor, (y+(h/2)-112)/rz_factor
    pre_loc = [gt_last[0]+dx, gt_last[1]+dy, w/rz_factor, h/rz_factor]
    pre_loc = [abs(int(i)) for i in pre_loc]

    

    print(pre_loc, '<<t loc roi', gt_cur, '<<Ground truth')
    
    diff = np.array(pre_loc) - np.array(gt_cur)
    if sum([abs(i) for i in diff]) > 50:
        diff_list += [i-1]
    #print('Step: ',i-1, 'pre - actual : ', diff, 'Conf level:', tracker.cur_best_conf)


    # Draw bbox on image. And print associated IoU score.
    x,y,w,h = [int(i) for i in pre_loc]
    xt, yt, wt, ht = gt_cur
    xl, yl, wl, hl = [int(i) for i in gt_last]
    imgcopy = img.copy().astype(np.uint8)
    cv2.rectangle(imgcopy,(x,y),(x+w,y+h),(225,0,0),2)
    cv2.rectangle(imgcopy,(xt,yt),(xt+wt,yt+ht),(0,225,0),1)

    #cv2.rectangle(imgcopy,(cx_pre-1,cy_pre-1),(cx_pre+1,cy_pre+1),(0,225,0),1)
    
    cv2.putText(imgcopy, 'Frame: %s IOU score: %.2f'%(i, IOU_eval(img, gt_cur, pre_loc)),(5,20), font, 0.6,(255,0,0),1,cv2.LINE_AA)

    file_name = FLAGS.model_name + inputProducer.imgs_path_list[i-1].split('/')[-1]
    file_name = os.path.join(PRE_ROOT, file_name)
    
    x,y,w,h = pre_loc_roi
    test = cv2.rectangle(roi,(x,y),(x+w,y+h),(225,0,0),1)
    plt.imsave(file_name, imgcopy)
    
    cv2.putText(roi, 'entropy: %.2f'%(entropy_roi),(5,20), font, 0.4,(255,0,0),1,cv2.LINE_AA)

    cv2.imshow('roi', roi)
    cv2.imshow('result', imgcopy)
    


    #show(imgcopy)
    #plt.show()

    res_list += [imgcopy]

    gt_last = pre_loc
    gt_list += [pre_loc]
    
    
    # Finetune Snet
    gauss = gauss2d((pre_loc_roi[3],pre_loc_roi[2]))

    x,y,w,h = pre_loc_roi
    gt_M_t = np.zeros(roi.shape[:2])
    gt_M_t[y:y+h, x:x+w] = gauss#np.repeat(gauss[...,np.newaxis], 3, axis=-1)
    gt_M_t = imresize(gt_M_t, (28,28))
    gt_M_t= gt_M_t/gt_M_t.max()


    targets_records = np.concatenate((targets_records, [gt_M_t]), axis=0)
    
    c4_maps_t, _ = gen_sel_maps(sess, roi , vgg, idx_c4, idx_c5)
    
    c4_maps_records = np.concatenate((c4_maps_records, c4_maps_t), axis=0)
    #c4_maps = c4_maps_t

    
    if i >11:
        c4_maps_records = c4_maps_records[-1:,...]
        targets_records = targets_records[-1:,...]

    c4_maps_records = np.concatenate((c4_maps_records, c4_maps_gt), axis=0)
    targets_records = np.concatenate((targets_records, targets_gt),axis=0)

    feed_dict_s = {snet.input_maps: c4_maps_records, snet.gt_M: targets_records}        

    #optimizer = tf.train.GradientDescentOptimizer(0.020)
    snet.params['wd']=0.001

    iter_nums = 10
    lr = 0.19
    
    optimizer = tf.train.GradientDescentOptimizer(lr)
    train_op = optimizer.minimize(snet.loss, var_list=snet.variables)
    _,prems, loss_ = sess.run([train_op, snet.pre_M, snet.loss], feed_dict = feed_dict_s)
    
    iter_nums = int(loss_*10)
    lr = loss_
    optimizer = tf.train.GradientDescentOptimizer(lr)
    train_op = optimizer.minimize(snet.loss, var_list=snet.variables)
    
    for s in range(iter_nums):
        _,prems, loss_ = sess.run([train_op, snet.pre_M, snet.loss], feed_dict = feed_dict_s)
        #print('loss: ', loss_)
        if s == 0:
            loss_s = loss_
            print('Snet update loss: ', loss_)
    print('Frame: ', i, 'Time consumed: ',time.time()-t_enter)
    
  
    cv2.waitKey(1)

i am new!
Total time cost for initialization : 93.16 s
[23, 87, 61, 51] <<t loc roi [23, 88, 66, 55] <<Ground truth
Snet update loss:  0.176095
Frame:  0 Time consumed:  1.086376667022705
[25, 88, 57, 47] <<t loc roi [23, 89, 66, 54] <<Ground truth
Snet update loss:  0.246978
Frame:  1 Time consumed:  1.2283275127410889
[28, 91, 54, 43] <<t loc roi [24, 89, 66, 54] <<Ground truth
Snet update loss:  0.184678
Frame:  2 Time consumed:  1.2228755950927734
[28, 91, 57, 41] <<t loc roi [24, 89, 65, 54] <<Ground truth
Snet update loss:  0.273436
Frame:  3 Time consumed:  1.24898099899292
[27, 92, 59, 45] <<t loc roi [24, 89, 65, 54] <<Ground truth
Snet update loss:  0.181108
Frame:  4 Time consumed:  1.2684895992279053
[26, 92, 62, 48] <<t loc roi [24, 89, 65, 53] <<Ground truth
Snet update loss:  0.212155
Frame:  5 Time consumed:  1.2031002044677734
[25, 91, 64, 51] <<t loc roi [24, 89, 64, 53] <<Ground truth
Snet update loss:  0.177215
Frame:  6 Time consumed:  1.1860017776489258
[25, 90, 6



Snet update loss:  0.0864282
Frame:  81 Time consumed:  1.8009722232818604
[50, 82, 49, 39] <<t loc roi [56, 86, 42, 35] <<Ground truth
Snet update loss:  0.101421
Frame:  82 Time consumed:  1.9509401321411133
[51, 83, 46, 38] <<t loc roi [57, 85, 42, 35] <<Ground truth
Frame:  83 Time consumed:  1.2282404899597168
[53, 83, 43, 36] <<t loc roi [57, 85, 42, 35] <<Ground truth
Snet update loss:  0.0973652
Frame:  84 Time consumed:  2.794286012649536
[55, 84, 41, 35] <<t loc roi [57, 86, 41, 35] <<Ground truth
Snet update loss:  0.105639
Frame:  85 Time consumed:  1.971954107284546
[55, 85, 44, 34] <<t loc roi [57, 86, 41, 34] <<Ground truth
Snet update loss:  0.107167
Frame:  86 Time consumed:  2.226466178894043
[54, 84, 45, 37] <<t loc roi [57, 86, 41, 34] <<Ground truth
Snet update loss:  0.0976585
Frame:  87 Time consumed:  2.228804111480713
[54, 84, 46, 37] <<t loc roi [58, 86, 41, 34] <<Ground truth
Frame:  88 Time consumed:  1.2374587059020996
[53, 85, 44, 37] <<t loc roi [58, 86, 

ValueError: operands could not be broadcast together with shapes (131,142,3) (144,142,3) (131,142,3) 

In [None]:

vid_path_prefix = os.path.join(PRE_ROOT, FLAGS.model_name) 
os.system('ffmpeg -framerate 25 -i %s%%04d.jpg -c:v libx264 -profile:v high -crf 20 -pix_fmt yuv420p %s.mp4'\
          %(vid_path_prefix, FLAGS.model_name+'SNETOnlybatchUpdate222'))