<a href="https://colab.research.google.com/github/mytimeyinji/yinji/blob/master/ceshi2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright © 2017 bily     Huazhong University of Science and Technology
#
# Distributed under terms of the MIT license.

"""Miscellaneous Utilities."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import errno
import json
import logging
import os
import re
import sys
from os import path as osp

try:
  import pynvml  # nvidia-ml provides utility for NVIDIA management

  HAS_NVML = True
except:
  HAS_NVML = False


def auto_select_gpu():
  """Select gpu which has largest free memory"""
  if HAS_NVML:
    pynvml.nvmlInit()
    deviceCount = pynvml.nvmlDeviceGetCount()
    largest_free_mem = 0
    largest_free_idx = 0
    for i in range(deviceCount):
      handle = pynvml.nvmlDeviceGetHandleByIndex(i)
      info = pynvml.nvmlDeviceGetMemoryInfo(handle)
      if info.free > largest_free_mem:
        largest_free_mem = info.free
        largest_free_idx = i
    pynvml.nvmlShutdown()
    largest_free_mem = largest_free_mem / 1024. / 1024.  # Convert to MB

    idx_to_gpu_id = {}
    for i in range(deviceCount):
      idx_to_gpu_id[i] = '{}'.format(i)

    gpu_id = idx_to_gpu_id[largest_free_idx]
    logging.info('Using largest free memory GPU {} with free memory {}MB'.format(gpu_id, largest_free_mem))
    return gpu_id
  else:
    logging.info('nvidia-ml-py is not installed, automatically select gpu is disabled!')
    return '0'


def get_center(x):
  return (x - 1.) / 2.


def get(config, key, default):
  """Get value in config by key, use default if key is not set
  This little function is useful for dynamical experimental settings.
  For example, we can add a new configuration without worrying compatibility with older versions.
  You can also achieve this by just calling config.get(key, default), but add a warning is even better : )
  """
  val = config.get(key)
  if val is None:
    logging.warning('{} is not explicitly specified, using default value: {}'.format(key, default))
    val = default
  return val


def mkdir_p(path):
  """mimic the behavior of mkdir -p in bash"""
  try:
    os.makedirs(path)
  except OSError as exc:  # Python >2.5
    if exc.errno == errno.EEXIST and os.path.isdir(path):
      pass
    else:
      raise


def tryfloat(s):
  try:
    return float(s)
  except:
    return s


def alphanum_key(s):
  """ Turn a string into a list of string and number chunks.
      "z23a" -> ["z", 23, "a"]
  """
  return [tryfloat(c) for c in re.split('([0-9.]+)', s)]


def sort_nicely(l):
  """Sort the given list in the way that humans expect."""
  return sorted(l, key=alphanum_key)


class Tee(object):
  """Mimic the behavior of tee in bash
  From: http://web.archive.org/web/20141016185743/https://mail.python.org/pipermail/python-list/2007-May/460639.html
  Usage:
    tee=Tee('logfile', 'w')
    print 'abcdefg'
    print 'another line'
    tee.close()
    print 'screen only'
    del tee # should do nothing
  """

  def __init__(self, name, mode):
    self.file = open(name, mode)
    self.stdout = sys.stdout
    sys.stdout = self

  def close(self):
    if self.stdout is not None:
      sys.stdout = self.stdout
      self.stdout = None
    if self.file is not None:
      self.file.close()
      self.file = None

  def write(self, data):
    self.file.write(data)
    self.stdout.write(data)

  def flush(self):
    self.file.flush()
    self.stdout.flush()

  def __del__(self):
    self.close()


def save_cfgs(train_dir, model_config, train_config, track_config):
  """Save all configurations in JSON format for future reference"""
  with open(osp.join(train_dir, 'model_config.json'), 'w') as f:
    json.dump(model_config, f, indent=2)
  with open(osp.join(train_dir, 'train_config.json'), 'w') as f:
    json.dump(train_config, f, indent=2)
  with open(osp.join(train_dir, 'track_config.json'), 'w') as f:
    json.dump(track_config, f, indent=2)


def load_cfgs(checkpoint):
  if osp.isdir(checkpoint):
    train_dir = checkpoint
  else:
    train_dir = osp.dirname(checkpoint)

  with open(osp.join(train_dir, 'model_config.json'), 'r') as f:
    model_config = json.load(f)
  with open(osp.join(train_dir, 'train_config.json'), 'r') as f:
    train_config = json.load(f)
  with open(osp.join(train_dir, 'track_config.json'), 'r') as f:
    track_config = json.load(f)
  return model_config, train_config, track_config

In [2]:
from __future__ import print_function

import math

from IPython import display
from matplotlib import cm
from matplotlib import gridspec
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import metrics
%tensorflow_version 1.x
import tensorflow as tf
from tensorflow.python.data import Dataset

tf.logging.set_verbosity(tf.logging.ERROR)
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.1f}'.format

TensorFlow 1.x selected.


In [0]:
from __future__ import print_function

import math

from IPython import display
from matplotlib import cm
from matplotlib import gridspec
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import metrics
%tensorflow_version 1.x
import tensorflow as tf
from tensorflow.python.data import Dataset

tf.logging.set_verbosity(tf.logging.ERROR)
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.1f}'.format


In [4]:
import tensorflow as tf
tf.__version__ 

'1.15.2'

In [0]:
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright © 2017 bily     Huazhong University of Science and Technology
#
# Distributed under terms of the MIT license.

"""Contains definitions of the network in [1].
  [1] Bertinetto, L., et al. (2016).
      "Fully-Convolutional Siamese Networks for Object Tracking."
      arXiv preprint arXiv:1606.09549.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import logging

import tensorflow as tf


slim = tf.contrib.slim


def convolutional_alexnet_arg_scope(embed_config,
                                    trainable=True,
                                    is_training=False):
  """Defines the default arg scope.
  Args:
    embed_config: A dictionary which contains configurations for the embedding function.
    trainable: If the weights in the embedding function is trainable.
    is_training: If the embedding function is built for training.
  Returns:
    An `arg_scope` to use for the convolutional_alexnet models.
  """
  # Only consider the model to be in training mode if it's trainable.
  # This is vital for batch_norm since moving_mean and moving_variance
  # will get updated even if not trainable.
  is_model_training = trainable and is_training

  if get(embed_config, 'use_bn', True):
    batch_norm_scale = get(embed_config, 'bn_scale', True)
    batch_norm_decay = 1 - get(embed_config, 'bn_momentum', 3e-4)
    batch_norm_epsilon = get(embed_config, 'bn_epsilon', 1e-6)
    batch_norm_params = {
      "scale": batch_norm_scale,
      # Decay for the moving averages.
      "decay": batch_norm_decay,
      # Epsilon to prevent 0s in variance.
      "epsilon": batch_norm_epsilon,
      "trainable": trainable,
      "is_training": is_model_training,
      # Collection containing the moving mean and moving variance.
      "variables_collections": {
        "beta": None,
        "gamma": None,
        "moving_mean": ["moving_vars"],
        "moving_variance": ["moving_vars"],
      },
      'updates_collections': None,  # Ensure that updates are done within a frame
    }
    normalizer_fn = slim.batch_norm
  else:
    batch_norm_params = {}
    normalizer_fn = None

  weight_decay = get(embed_config, 'weight_decay', 5e-4)
  if trainable:
    weights_regularizer = slim.l2_regularizer(weight_decay)
  else:
    weights_regularizer = None

  init_method = get(embed_config, 'init_method', 'kaiming_normal')
  if is_model_training:
    logging.info('embedding init method -- {}'.format(init_method))
  if init_method == 'kaiming_normal':
    # The same setting as siamese-fc
    initializer = slim.variance_scaling_initializer(factor=2.0, mode='FAN_OUT', uniform=False)
  else:
    initializer = slim.xavier_initializer()

  with slim.arg_scope(
      [slim.conv2d],
      weights_regularizer=weights_regularizer,
      weights_initializer=initializer,
      padding='VALID',
      trainable=trainable,
      activation_fn=tf.nn.relu,
      normalizer_fn=normalizer_fn,
      normalizer_params=batch_norm_params):
    with slim.arg_scope([slim.batch_norm], **batch_norm_params):
      with slim.arg_scope([slim.batch_norm], is_training=is_model_training) as arg_sc:
        return arg_sc


def convolutional_alexnet(inputs, reuse=None, scope='convolutional_alexnet'):
  """Defines the feature extractor of SiamFC.
  Args:
    inputs: a Tensor of shape [batch, h, w, c].
    reuse: if the weights in the embedding function are reused.
    scope: the variable scope of the computational graph.
  Returns:
    net: the computed features of the inputs.
    end_points: the intermediate outputs of the embedding function.
  """
  with tf.variable_scope(scope, 'convolutional_alexnet', [inputs], reuse=reuse) as sc:
    end_points_collection = sc.name + '_end_points'
    with slim.arg_scope([slim.conv2d, slim.max_pool2d],
                        outputs_collections=end_points_collection):
      net = inputs
      net = slim.conv2d(net, 96, [11, 11], 2, scope='conv1')
      net = slim.max_pool2d(net, [3, 3], 2, scope='pool1')
      with tf.variable_scope('conv2'):
        b1, b2 = tf.split(net, 2, 3)
        b1 = slim.conv2d(b1, 128, [5, 5], scope='b1')
        # The original implementation has bias terms for all convolution, but
        # it actually isn't necessary if the convolution layer is followed by a batch
        # normalization layer since batch norm will subtract the mean.
        b2 = slim.conv2d(b2, 128, [5, 5], scope='b2')
        net = tf.concat([b1, b2], 3)
      net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')
      net = slim.conv2d(net, 384, [3, 3], 1, scope='conv3')
      with tf.variable_scope('conv4'):
        b1, b2 = tf.split(net, 2, 3)
        b1 = slim.conv2d(b1, 192, [3, 3], 1, scope='b1')
        b2 = slim.conv2d(b2, 192, [3, 3], 1, scope='b2')
        net = tf.concat([b1, b2], 3)
      # Conv 5 with only convolution, has bias
      with tf.variable_scope('conv5'):
        with slim.arg_scope([slim.conv2d],
                            activation_fn=None, normalizer_fn=None):
          b1, b2 = tf.split(net, 2, 3)
          b1 = slim.conv2d(b1, 128, [3, 3], 1, scope='b1')
          b2 = slim.conv2d(b2, 128, [3, 3], 1, scope='b2')
        net = tf.concat([b1, b2], 3)
      # Convert end_points_collection into a dictionary of end_points.
      end_points = slim.utils.convert_collection_to_dict(end_points_collection)
      return net, end_points


convolutional_alexnet.stride = 8

In [0]:
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright © 2017 bily     Huazhong University of Science and Technology
#
# Distributed under terms of the MIT license.

"""Utilities for model construction"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import re

import numpy as np
import tensorflow as tf
from scipy import io as sio



def construct_gt_score_maps(response_size, batch_size, stride, gt_config=None):
  """Construct a batch of groundtruth score maps
  Args:
    response_size: A list or tuple with two elements [ho, wo]
    batch_size: An integer e.g., 16
    stride: Embedding stride e.g., 8
    gt_config: Configurations for groundtruth generation
  Return:
    A float tensor of shape [batch_size] + response_size
  """
  with tf.name_scope('construct_gt'):
    ho = response_size[0]
    wo = response_size[1]
    y = tf.cast(tf.range(0, ho), dtype=tf.float32) - get_center(ho)
    x = tf.cast(tf.range(0, wo), dtype=tf.float32) - get_center(wo)
    [Y, X] = tf.meshgrid(y, x)

    def _logistic_label(X, Y, rPos, rNeg):
      # dist_to_center = tf.sqrt(tf.square(X) + tf.square(Y))  # L2 metric
      dist_to_center = tf.abs(X) + tf.abs(Y)  # Block metric
      Z = tf.where(dist_to_center <= rPos,
                   tf.ones_like(X),
                   tf.where(dist_to_center < rNeg,
                            0.5 * tf.ones_like(X),
                            tf.zeros_like(X)))
      return Z

    rPos = gt_config['rPos'] / stride
    rNeg = gt_config['rNeg'] / stride
    gt = _logistic_label(X, Y, rPos, rNeg)

    # Duplicate a batch of maps
    gt_expand = tf.reshape(gt, [1] + response_size)
    gt = tf.tile(gt_expand, [batch_size, 1, 1])
    return gt


def get_params_from_mat(matpath):
  """Get parameter from .mat file into parms(dict)"""

  def squeeze(vars_):
    # Matlab save some params with shape (*, 1)
    # However, we don't need the trailing dimension in TensorFlow.
    if isinstance(vars_, (list, tuple)):
      return [np.squeeze(v, 1) for v in vars_]
    else:
      return np.squeeze(vars_, 1)

  netparams = sio.loadmat(matpath)["net"]["params"][0][0]
  params = dict()

  for i in range(netparams.size):
    param = netparams[0][i]
    name = param["name"][0]
    value = param["value"]
    value_size = param["value"].shape[0]

    match = re.match(r"([a-z]+)([0-9]+)([a-z]+)", name, re.I)
    if match:
      items = match.groups()
    elif name == 'adjust_f':
      params['detection/weights'] = squeeze(value)
      continue
    elif name == 'adjust_b':
      params['detection/biases'] = squeeze(value)
      continue
    else:
      raise Exception('unrecognized layer params')

    op, layer, types = items
    layer = int(layer)
    if layer in [1, 3]:
      if op == 'conv':  # convolution
        if types == 'f':
          params['conv%d/weights' % layer] = value
        elif types == 'b':
          value = squeeze(value)
          params['conv%d/biases' % layer] = value
      elif op == 'bn':  # batch normalization
        if types == 'x':
          m, v = squeeze(np.split(value, 2, 1))
          params['conv%d/BatchNorm/moving_mean' % layer] = m
          params['conv%d/BatchNorm/moving_variance' % layer] = np.square(v)
        elif types == 'm':
          value = squeeze(value)
          params['conv%d/BatchNorm/gamma' % layer] = value
        elif types == 'b':
          value = squeeze(value)
          params['conv%d/BatchNorm/beta' % layer] = value
      else:
        raise Exception
    elif layer in [2, 4]:
      if op == 'conv' and types == 'f':
        b1, b2 = np.split(value, 2, 3)
      else:
        b1, b2 = np.split(value, 2, 0)
      if op == 'conv':
        if types == 'f':
          params['conv%d/b1/weights' % layer] = b1
          params['conv%d/b2/weights' % layer] = b2
        elif types == 'b':
          b1, b2 = squeeze(np.split(value, 2, 0))
          params['conv%d/b1/biases' % layer] = b1
          params['conv%d/b2/biases' % layer] = b2
      elif op == 'bn':
        if types == 'x':
          m1, v1 = squeeze(np.split(b1, 2, 1))
          m2, v2 = squeeze(np.split(b2, 2, 1))
          params['conv%d/b1/BatchNorm/moving_mean' % layer] = m1
          params['conv%d/b2/BatchNorm/moving_mean' % layer] = m2
          params['conv%d/b1/BatchNorm/moving_variance' % layer] = np.square(v1)
          params['conv%d/b2/BatchNorm/moving_variance' % layer] = np.square(v2)
        elif types == 'm':
          params['conv%d/b1/BatchNorm/gamma' % layer] = squeeze(b1)
          params['conv%d/b2/BatchNorm/gamma' % layer] = squeeze(b2)
        elif types == 'b':
          params['conv%d/b1/BatchNorm/beta' % layer] = squeeze(b1)
          params['conv%d/b2/BatchNorm/beta' % layer] = squeeze(b2)
      else:
        raise Exception

    elif layer in [5]:
      if op == 'conv' and types == 'f':
        b1, b2 = np.split(value, 2, 3)
      else:
        b1, b2 = squeeze(np.split(value, 2, 0))
      assert op == 'conv', 'layer5 contains only convolution'
      if types == 'f':
        params['conv%d/b1/weights' % layer] = b1
        params['conv%d/b2/weights' % layer] = b2
      elif types == 'b':
        params['conv%d/b1/biases' % layer] = b1
        params['conv%d/b2/biases' % layer] = b2

  return params


def load_mat_model(matpath, embed_scope, detection_scope=None):
  """Restore SiameseFC models from .mat model files"""
  params = get_params_from_mat(matpath)

  assign_ops = []

  def _assign(ref_name, params, scope=embed_scope):
    var_in_model = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                     scope + ref_name)[0]
    var_in_mat = params[ref_name]
    op = tf.assign(var_in_model, var_in_mat)
    assign_ops.append(op)

  for l in range(1, 6):
    if l in [1, 3]:
      _assign('conv%d/weights' % l, params)
      # _assign('conv%d/biases' % l, params)
      _assign('conv%d/BatchNorm/beta' % l, params)
      _assign('conv%d/BatchNorm/gamma' % l, params)
      _assign('conv%d/BatchNorm/moving_mean' % l, params)
      _assign('conv%d/BatchNorm/moving_variance' % l, params)
    elif l in [2, 4]:
      # Branch 1
      _assign('conv%d/b1/weights' % l, params)
      # _assign('conv%d/b1/biases' % l, params)
      _assign('conv%d/b1/BatchNorm/beta' % l, params)
      _assign('conv%d/b1/BatchNorm/gamma' % l, params)
      _assign('conv%d/b1/BatchNorm/moving_mean' % l, params)
      _assign('conv%d/b1/BatchNorm/moving_variance' % l, params)
      # Branch 2
      _assign('conv%d/b2/weights' % l, params)
      # _assign('conv%d/b2/biases' % l, params)
      _assign('conv%d/b2/BatchNorm/beta' % l, params)
      _assign('conv%d/b2/BatchNorm/gamma' % l, params)
      _assign('conv%d/b2/BatchNorm/moving_mean' % l, params)
      _assign('conv%d/b2/BatchNorm/moving_variance' % l, params)
    elif l in [5]:
      # Branch 1
      _assign('conv%d/b1/weights' % l, params)
      _assign('conv%d/b1/biases' % l, params)
      # Branch 2
      _assign('conv%d/b2/weights' % l, params)
      _assign('conv%d/b2/biases' % l, params)
    else:
      raise Exception('layer number must below 5')

  if detection_scope:
    _assign(detection_scope + 'biases', params, scope='')

  initialize = tf.group(*assign_ops)
  return initialize

In [0]:
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright © 2017 bily     Huazhong University of Science and Technology
#
# Distributed under terms of the MIT license.


import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.ops.metrics_impl import _confusion_matrix_at_thresholds


def _auc(labels, predictions, weights=None, num_thresholds=200,
         metrics_collections=None, updates_collections=None,
         curve='ROC', name=None, summation_method='trapezoidal'):
  """Computes the approximate AUC via a Riemann sum.
  Modified version of tf.metrics.auc. Add support for AUC computation
  of the recall curve.
  """
  with tf.variable_scope(
      name, 'auc', (labels, predictions, weights)):
    if curve != 'ROC' and curve != 'PR' and curve != 'R':
      raise ValueError('curve must be either ROC, PR or R, %s unknown' %
                       (curve))
    kepsilon = 1e-7  # to account for floating point imprecisions
    thresholds = [(i + 1) * 1.0 / (num_thresholds - 1)
                  for i in range(num_thresholds - 2)]
    thresholds = [0.0 - kepsilon] + thresholds + [1.0 + kepsilon]

    values, update_ops = _confusion_matrix_at_thresholds(
      labels, predictions, thresholds, weights)

    # Add epsilons to avoid dividing by 0.
    epsilon = 1.0e-6

    def compute_auc(tp, fn, tn, fp, name):
      """Computes the roc-auc or pr-auc based on confusion counts."""
      rec = tf.div(tp + epsilon, tp + fn + epsilon)
      if curve == 'ROC':
        fp_rate = tf.div(fp, fp + tn + epsilon)
        x = fp_rate
        y = rec
      elif curve == 'R':  # recall auc
        x = tf.linspace(1., 0., num_thresholds)
        y = rec
      else:  # curve == 'PR'.
        prec = tf.div(tp + epsilon, tp + fp + epsilon)
        x = rec
        y = prec
      if summation_method == 'trapezoidal':
        return tf.reduce_sum(
          tf.multiply(x[:num_thresholds - 1] - x[1:],
                      (y[:num_thresholds - 1] + y[1:]) / 2.),
          name=name)
      elif summation_method == 'minoring':
        return tf.reduce_sum(
          tf.multiply(x[:num_thresholds - 1] - x[1:],
                      tf.minimum(y[:num_thresholds - 1], y[1:])),
          name=name)
      elif summation_method == 'majoring':
        return tf.reduce_sum(
          tf.multiply(x[:num_thresholds - 1] - x[1:],
                      tf.maximum(y[:num_thresholds - 1], y[1:])),
          name=name)
      else:
        raise ValueError('Invalid summation_method: %s' % summation_method)

    # sum up the areas of all the trapeziums
    auc_value = compute_auc(
      values['tp'], values['fn'], values['tn'], values['fp'], 'value')
    update_op = compute_auc(
      update_ops['tp'], update_ops['fn'], update_ops['tn'], update_ops['fp'],
      'update_op')

    if metrics_collections:
      ops.add_to_collections(metrics_collections, auc_value)

    if updates_collections:
      ops.add_to_collections(updates_collections, update_op)

    return auc_value, update_op


def get_center_index(response):
  """Get the index of the center in the response map"""
  shape = tf.shape(response)
  c1 = tf.to_int32((shape[1] - 1) / 2)
  c2 = tf.to_int32((shape[2] - 1) / 2)
  return c1, c2


def center_score_error(response):
  """Center score error.
  The error is low when the center of the response map is classified as target.
  """
  with tf.name_scope('CS-err'):
    r, c = get_center_index(response)
    center_score = response[:, r, c]
    mean, update_op = tf.metrics.mean(tf.to_float(center_score < 0))
    with tf.control_dependencies([update_op]):
      mean = tf.identity(mean)
    return mean


def get_maximum_index(response):
  """Get the index of the maximum value in the response map"""
  response_shape = response.get_shape().as_list()
  response_spatial_size = response_shape[-2:]  # e.g. [29, 29]
  length = response_spatial_size[0] * response_spatial_size[1]

  # Get maximum response index (note index starts from zero)
  ind_max = tf.argmax(tf.reshape(response, [-1, length]), 1)
  ind_row = tf.div(ind_max, response_spatial_size[1])
  ind_col = tf.mod(ind_max, response_spatial_size[1])
  return ind_row, ind_col


def center_dist_error(response):
  """Center distance error.
  The error is low when the maximum response is at the center of the response map.
  """
  with tf.name_scope('CD-err'):
    radius_in_pixel = 50.
    total_stride = 8.
    num_thresholds = 100
    radius_in_response = radius_in_pixel / total_stride

    gt_r, gt_c = get_center_index(response)
    max_r, max_c = get_maximum_index(response)
    gt_r = tf.to_float(gt_r)
    gt_c = tf.to_float(gt_c)
    max_r = tf.to_float(max_r)
    max_c = tf.to_float(max_c)
    distances = tf.sqrt((gt_r - max_r) ** 2 + (gt_c - max_c) ** 2)

    # We cast distances as prediction accuracies in the range [0, 1] where 0 means fail and
    # 1 means success. In this way, we can readily use streaming_auc to compute area
    # under curve.
    dist_norm = distances / radius_in_response
    dist_norm = tf.minimum(dist_norm, 1.)
    predictions = 1. - dist_norm
    labels = tf.ones_like(predictions)

    auc, update_op = _auc(labels, predictions, num_thresholds=num_thresholds, curve='R')
    with tf.control_dependencies([update_op]):
      err = 1. - auc
    return err

In [8]:
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright © 2017 bily     Huazhong University of Science and Technology
#
# Distributed under terms of the MIT license.

"""Dataset Sampler"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np


class Sampler(object):
  def __init__(self, data_source, shuffle=True):
    self.data_source = data_source
    self.shuffle = shuffle

  def __iter__(self):
    data_idxs = np.arange(len(self.data_source))
    if self.shuffle:
      np.random.shuffle(data_idxs)

    for idx in data_idxs:
      yield idx


if __name__ == '__main__':
  x = [1, 2, 3]
  sampler = Sampler(x, shuffle=True)
  p = 0
  for xx in sampler:
    print(x[xx])
    p += 1
    if p == 10: break

3
1
2


In [0]:
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright © 2017 bily     Huazhong University of Science and Technology
#
# Distributed under terms of the MIT license.


"""Various transforms for video and image augmentation"""

import numbers

import tensorflow as tf


class Compose(object):
  """Composes several transforms together."""

  def __init__(self, transforms):
    self.transforms = transforms

  def __call__(self, example):
    for t in self.transforms:
      example = t(example)
    return example


class RandomGray(object):
  def __init__(self, gray_ratio=0.25):
    self.gray_ratio = gray_ratio

  def __call__(self, img_sequence):
    def rgb_to_gray():
      gray_images = tf.image.rgb_to_grayscale(img_sequence)
      return tf.concat([gray_images] * 3, axis=3)

    def identity():
      return tf.identity(img_sequence)

    return tf.cond(tf.less(tf.random_uniform([], 0, 1), self.gray_ratio), rgb_to_gray, identity)


class RandomStretch(object):
  def __init__(self, max_stretch=0.05, interpolation='bilinear'):
    self.max_stretch = max_stretch
    self.interpolation = interpolation

  def __call__(self, img):
    scale = 1.0 + tf.random_uniform([], -self.max_stretch, self.max_stretch)
    img_shape = tf.shape(img)
    ts = tf.to_int32(tf.round(tf.to_float(img_shape[:2]) * scale))
    resize_method_map = {'bilinear': tf.image.ResizeMethod.BILINEAR,
                         'bicubic': tf.image.ResizeMethod.BICUBIC}
    return tf.image.resize_images(img, ts, method=resize_method_map[self.interpolation])


class CenterCrop(object):
  def __init__(self, size):
    if isinstance(size, numbers.Number):
      self.size = (int(size), int(size))
    else:
      self.size = size

  def __call__(self, img):
    th, tw = self.size
    return tf.image.resize_image_with_crop_or_pad(img, th, tw)


class RandomCrop(object):
  def __init__(self, size):
    if isinstance(size, numbers.Number):
      self.size = (int(size), int(size))
    else:
      self.size = size

  def __call__(self, img):
    img_shape = tf.shape(img)
    th, tw = self.size

    y1 = tf.random_uniform([], 0, img_shape[0] - th, dtype=tf.int32)
    x1 = tf.random_uniform([], 0, img_shape[1] - tw, dtype=tf.int32)

    return tf.image.crop_to_bounding_box(img, y1, x1, th, tw)

In [0]:
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright © 2017 bily     Huazhong University of Science and Technology
#
# Distributed under terms of the MIT license.

"""VID Dataset"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import pickle

import numpy as np


def downsample(n_in, n_out, max_frame_dist=1):
  # Get a list of frame distance between consecutive frames
  max_frame_dist = np.minimum(n_in, max_frame_dist)
  possible_frame_dist = range(1, max_frame_dist + 1)
  frame_dist = np.random.choice(possible_frame_dist, n_out - 1)
  end_to_start_frame_dist = np.sum(frame_dist)

  # Check frame dist boundary
  possible_max_start_idx = n_in - 1 - end_to_start_frame_dist
  if possible_max_start_idx < 0:
    n_extra = - possible_max_start_idx
    while n_extra > 0:
      for idx, dist in enumerate(frame_dist):
        if dist > 1:
          frame_dist[idx] = dist - 1
          n_extra -= 1
          if n_extra == 0: break

  # Get frame dist
  end_to_start_frame_dist = np.sum(frame_dist)
  possible_max_start_idx = n_in - 1 - end_to_start_frame_dist
  start_idx = np.random.choice(possible_max_start_idx + 1, 1)
  out_idxs = np.cumsum(np.concatenate((start_idx, frame_dist)))
  return out_idxs


def upsample(n_in, n_out):
  n_more = n_out - n_in
  in_idxs = range(n_in)
  more_idxs = np.random.choice(in_idxs, n_more)
  out_idxs = sorted(list(in_idxs) + list(more_idxs))
  return out_idxs


class VID:
  def __init__(self, imdb_path, max_frame_dist, epoch_size=None):
    with open(imdb_path, 'rb') as f:
      imdb = pickle.load(f)

    self.videos = imdb['videos']
    self.time_steps = 2
    self.max_frame_dist = max_frame_dist

    if epoch_size is None:
      self.epoch_size = len(self.videos)
    else:
      self.epoch_size = int(epoch_size)

  def __getitem__(self, index):
    img_ids = self.videos[index % len(self.videos)]
    n_frames = len(img_ids)

    if n_frames < self.time_steps:
      out_idxs = upsample(n_frames, self.time_steps)
    elif n_frames == self.time_steps:
      out_idxs = range(n_frames)
    else:
      out_idxs = downsample(n_frames, self.time_steps, self.max_frame_dist)

    video = []
    for j, frame_idx in enumerate(out_idxs):
      img_path = img_ids[frame_idx]
      video.append(img_path.encode('utf-8'))
    return video

  def __len__(self):
    return self.epoch_size

In [0]:
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright © 2017 bily     Huazhong University of Science and Technology
#
# Distributed under terms of the MIT license.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import logging

import tensorflow as tf



class DataLoader(object):
  def __init__(self, config, is_training):
    self.config = config
    self.is_training = is_training

    preprocess_name = get(config, 'preprocessing_name', None)
    logging.info('preproces -- {}'.format(preprocess_name))

    if preprocess_name == 'siamese_fc_color':
      self.v_transform = None
      # TODO: use a single operation (tf.image.crop_and_resize) to achieve all transformations ?
      self.z_transform = Compose([RandomStretch(),
                                  CenterCrop((255 - 8, 255 - 8)),
                                  RandomCrop(255 - 2 * 8),
                                  CenterCrop((127, 127))])
      self.x_transform = Compose([RandomStretch(),
                                  CenterCrop((255 - 8, 255 - 8)),
                                  RandomCrop(255 - 2 * 8), ])
    elif preprocess_name == 'siamese_fc_gray':
      self.v_transform = RandomGray()
      self.z_transform = Compose([RandomStretch(),
                                  CenterCrop((255 - 8, 255 - 8)),
                                  RandomCrop(255 - 2 * 8),
                                  CenterCrop((127, 127))])
      self.x_transform = Compose([RandomStretch(),
                                  CenterCrop((255 - 8, 255 - 8)),
                                  RandomCrop(255 - 2 * 8), ])
    elif preprocess_name == 'None':
      self.v_transform = None
      self.z_transform = CenterCrop((127, 127))
      self.x_transform = CenterCrop((255, 255))
    else:
      raise ValueError('Preprocessing name {} was not recognized.'.format(preprocess_name))

    self.dataset_py = VID(config['input_imdb'], config['max_frame_dist'])
    self.sampler = Sampler(self.dataset_py, shuffle=is_training)

  def build(self):
    self.build_dataset()
    self.build_iterator()

  def build_dataset(self):
    def sample_generator():
      for video_id in self.sampler:
        sample = self.dataset_py[video_id]
        yield sample

    def transform_fn(video):
      exemplar_file = tf.read_file(video[0])
      instance_file = tf.read_file(video[1])
      exemplar_image = tf.image.decode_jpeg(exemplar_file, channels=3, dct_method="INTEGER_ACCURATE")
      instance_image = tf.image.decode_jpeg(instance_file, channels=3, dct_method="INTEGER_ACCURATE")

      if self.v_transform is not None:
        video = tf.stack([exemplar_image, instance_image])
        video = self.v_transform(video)
        exemplar_image = video[0]
        instance_image = video[1]

      if self.z_transform is not None:
        exemplar_image = self.z_transform(exemplar_image)

      if self.x_transform is not None:
        instance_image = self.x_transform(instance_image)

      return exemplar_image, instance_image

    dataset = tf.data.Dataset.from_generator(sample_generator,
                                             output_types=(tf.string),
                                             output_shapes=(tf.TensorShape([2])))
    dataset = dataset.map(transform_fn, num_parallel_calls=self.config['prefetch_threads'])
    dataset = dataset.prefetch(self.config['prefetch_capacity'])
    dataset = dataset.repeat()
    dataset = dataset.batch(self.config['batch_size'])
    self.dataset_tf = dataset

  def build_iterator(self):
    self.iterator = self.dataset_tf.make_one_shot_iterator()

  def get_one_batch(self):
    return self.iterator.get_next()

In [0]:
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright © 2017 bily     Huazhong University of Science and Technology
#
# Distributed under terms of the MIT license.

"""Construct the computational graph of siamese model for training. """

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import functools

import tensorflow as tf



slim = tf.contrib.slim


class SiameseModel:
  def __init__(self, model_config, train_config, mode='train'):
    self.model_config = model_config
    self.train_config = train_config
    self.mode = mode
    assert mode in ['train', 'validation', 'inference']

    if self.mode == 'train':
      self.data_config = self.train_config['train_data_config']
    elif self.mode == 'validation':
      self.data_config = self.train_config['validation_data_config']

    self.dataloader = None
    self.exemplars = None
    self.instances = None
    self.response = None
    self.batch_loss = None
    self.total_loss = None
    self.init_fn = None
    self.global_step = None

  def is_training(self):
    """Returns true if the model is built for training mode"""
    return self.mode == 'train'

  def build_inputs(self):
    """Input fetching and batching
    Outputs:
      self.exemplars: image batch of shape [batch, hz, wz, 3]
      self.instances: image batch of shape [batch, hx, wx, 3]
    """
    if self.mode in ['train', 'validation']:
      with tf.device("/cpu:0"):  # Put data loading and preprocessing in CPU is substantially faster
        self.dataloader = DataLoader(self.data_config, self.is_training())
        self.dataloader.build()
        exemplars, instances = self.dataloader.get_one_batch()

        exemplars = tf.to_float(exemplars)
        instances = tf.to_float(instances)
    else:
      self.examplar_feed = tf.placeholder(shape=[None, None, None, 3],
                                          dtype=tf.uint8,
                                          name='examplar_input')
      self.instance_feed = tf.placeholder(shape=[None, None, None, 3],
                                          dtype=tf.uint8,
                                          name='instance_input')
      exemplars = tf.to_float(self.examplar_feed)
      instances = tf.to_float(self.instance_feed)

    self.exemplars = exemplars
    self.instances = instances

  def build_image_embeddings(self, reuse=False):
    """Builds the image model subgraph and generates image embeddings
    Inputs:
      self.exemplars: A tensor of shape [batch, hz, wz, 3]
      self.instances: A tensor of shape [batch, hx, wx, 3]
    Outputs:
      self.exemplar_embeds: A Tensor of shape [batch, hz_embed, wz_embed, embed_dim]
      self.instance_embeds: A Tensor of shape [batch, hx_embed, wx_embed, embed_dim]
    """
    config = self.model_config['embed_config']
    arg_scope = convolutional_alexnet_arg_scope(config,
                                                trainable=config['train_embedding'],
                                                is_training=self.is_training())

    @functools.wraps(convolutional_alexnet)
    def embedding_fn(images, reuse=False):
      with slim.arg_scope(arg_scope):
        return convolutional_alexnet(images, reuse=reuse)

    self.exemplar_embeds, _ = embedding_fn(self.exemplars, reuse=reuse)
    self.instance_embeds, _ = embedding_fn(self.instances, reuse=True)

  def build_template(self):
    # The template is simply the feature of the exemplar image in SiamFC.
    self.templates = self.exemplar_embeds

  def build_detection(self, reuse=False):
    with tf.variable_scope('detection', reuse=reuse):
      def _translation_match(x, z):  # translation match for one example within a batch
        x = tf.expand_dims(x, 0)  # [1, in_height, in_width, in_channels]
        z = tf.expand_dims(z, -1)  # [filter_height, filter_width, in_channels, 1]
        return tf.nn.conv2d(x, z, strides=[1, 1, 1, 1], padding='VALID', name='translation_match')

      output = tf.map_fn(lambda x: _translation_match(x[0], x[1]),
                         (self.instance_embeds, self.templates),
                         dtype=self.instance_embeds.dtype)
      output = tf.squeeze(output, [1, 4])  # of shape e.g., [8, 15, 15]

      # Adjust score, this is required to make training possible.
      config = self.model_config['adjust_response_config']
      bias = tf.get_variable('biases', [1],
                             dtype=tf.float32,
                             initializer=tf.constant_initializer(0.0, dtype=tf.float32),
                             trainable=config['train_bias'])
      response = config['scale'] * output + bias
      self.response = response

  def build_loss(self):
    response = self.response
    response_size = response.get_shape().as_list()[1:3]  # [height, width]

    gt = construct_gt_score_maps(response_size,
                                 self.data_config['batch_size'],
                                 self.model_config['embed_config']['stride'],
                                 self.train_config['gt_config'])

    with tf.name_scope('Loss'):
      loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=response,
                                                     labels=gt)

      with tf.name_scope('Balance_weights'):
        n_pos = tf.reduce_sum(tf.to_float(tf.equal(gt[0], 1)))
        n_neg = tf.reduce_sum(tf.to_float(tf.equal(gt[0], 0)))
        w_pos = 0.5 / n_pos
        w_neg = 0.5 / n_neg
        class_weights = tf.where(tf.equal(gt, 1),
                                 w_pos * tf.ones_like(gt),
                                 tf.ones_like(gt))
        class_weights = tf.where(tf.equal(gt, 0),
                                 w_neg * tf.ones_like(gt),
                                 class_weights)
        loss = loss * class_weights

      # Note that we use reduce_sum instead of reduce_mean since the loss has
      # already been normalized by class_weights in spatial dimension.
      loss = tf.reduce_sum(loss, [1, 2])

      batch_loss = tf.reduce_mean(loss, name='batch_loss')
      tf.losses.add_loss(batch_loss)

      total_loss = tf.losses.get_total_loss()
      self.batch_loss = batch_loss
      self.total_loss = total_loss

      tf.summary.image('exemplar', self.exemplars, family=self.mode)
      tf.summary.image('instance', self.instances, family=self.mode)

      mean_batch_loss, update_op1 = tf.metrics.mean(batch_loss)
      mean_total_loss, update_op2 = tf.metrics.mean(total_loss)
      with tf.control_dependencies([update_op1, update_op2]):
        tf.summary.scalar('batch_loss', mean_batch_loss, family=self.mode)
        tf.summary.scalar('total_loss', mean_total_loss, family=self.mode)

      if self.mode == 'train':
        tf.summary.image('GT', tf.reshape(gt[0], [1] + response_size + [1]), family='GT')
      tf.summary.image('Response', tf.expand_dims(tf.sigmoid(response), -1), family=self.mode)
      tf.summary.histogram('Response', self.response, family=self.mode)

      # Two more metrics to monitor the performance of training
      tf.summary.scalar('center_score_error', center_score_error(response), family=self.mode)
      tf.summary.scalar('center_dist_error', center_dist_error(response), family=self.mode)

  def setup_global_step(self):
    global_step = tf.Variable(
      initial_value=0,
      name='global_step',
      trainable=False,
      collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES])

    self.global_step = global_step

  def setup_embedding_initializer(self):
    """Sets up the function to restore embedding variables from checkpoint."""
    embed_config = self.model_config['embed_config']
    if embed_config['embedding_checkpoint_file']:
      # Restore Siamese FC models from .mat model files
      initialize = load_mat_model(embed_config['embedding_checkpoint_file'],
                                  'convolutional_alexnet/', 'detection/')

      def restore_fn(sess):
        tf.logging.info("Restoring embedding variables from checkpoint file %s",
                        embed_config['embedding_checkpoint_file'])
        sess.run([initialize])

      self.init_fn = restore_fn

  def build(self, reuse=False):
    """Creates all ops for training and evaluation"""
    with tf.name_scope(self.mode):
      self.build_inputs()
      self.build_image_embeddings(reuse=reuse)
      self.build_template()
      self.build_detection(reuse=reuse)
      self.setup_embedding_initializer()

      if self.mode in ['train', 'validation']:
        self.build_loss()

      if self.is_training():
        self.setup_global_step()

In [0]:
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright @ 2017 bily     Huazhong University of Science and Technology
#

"""Default configurations of model specification, training and tracking
For most of the time, DO NOT modify the configurations within this file.
Use the configurations here as the default configurations and only update
them following the examples in the `experiments` directory.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os.path as osp

LOG_DIR = 'Logs/SiamFC'  # where checkpoints, logs are saved
RUN_NAME = 'SiamFC-3s-color-scratch'  # identifier of the experiment

MODEL_CONFIG = {
  'z_image_size': 127,  # Exemplar image size

  'embed_config': {'embedding_name': 'convolutional_alexnet',
                   'embedding_checkpoint_file': None,  # mat file path of the pretrained embedding model.
                   'train_embedding': True,
                   'init_method': 'kaiming_normal',
                   'use_bn': True,
                   'bn_scale': True,
                   'bn_momentum': 0.05,
                   'bn_epsilon': 1e-6,
                   'embedding_feature_num': 256,
                   'weight_decay': 5e-4,
                   'stride': 8, },

  'adjust_response_config': {'train_bias': True,
                             'scale': 1e-3, },
}

TRAIN_CONFIG = {
  'train_dir': osp.join(LOG_DIR, 'track_model_checkpoints', RUN_NAME),

  'seed': 123,  # fix seed for reproducing experiments

  'train_data_config': {'input_imdb': 'data/train_imdb.pickle',
                        'preprocessing_name': 'siamese_fc_color',
                        'num_examples_per_epoch': 5.32e4,
                        'epoch': 50,
                        'batch_size': 8,
                        'max_frame_dist': 100,  # Maximum distance between any two random frames draw from videos.
                        'prefetch_threads': 4,
                        'prefetch_capacity': 15 * 8, },  # The maximum elements number in the data loading queue

  'validation_data_config': {'input_imdb': 'data/validation_imdb.pickle',
                             'preprocessing_name': 'None',
                             'batch_size': 8,
                             'max_frame_dist': 100,  # Maximum distance between any two random frames draw from videos.
                             'prefetch_threads': 1,
                             'prefetch_capacity': 15 * 8, },  # The maximum elements number in the data loading queue

  # Configurations for generating groundtruth maps
  'gt_config': {'rPos': 16,
                'rNeg': 0, },

  # Optimizer for training the model.
  'optimizer_config': {'optimizer': 'MOMENTUM',  # SGD and MOMENTUM are supported
                       'momentum': 0.9,
                       'use_nesterov': False, },

  # Learning rate configs
  'lr_config': {'policy': 'exponential',
                'initial_lr': 0.01,
                'num_epochs_per_decay': 1,
                'lr_decay_factor': 0.8685113737513527,
                'staircase': True, },

  # If not None, clip gradients to this value.
  'clip_gradients': None,

  # Frequency at which loss and global step are logged
  'log_every_n_steps': 10,

  # Frequency to save model
  'save_model_every_n_step': 5.32e4 // 8,  # save model every epoch

  # How many model checkpoints to keep. No limit if None.
  'max_checkpoints_to_keep': None,
}

TRACK_CONFIG = {
  # Directory for saving log files during tracking.
  'log_dir': osp.join(LOG_DIR, 'track_model_inference', RUN_NAME),

  # Logging level of inference, use 1 for detailed inspection. 0 for speed.
  'log_level': 0,

  'x_image_size': 255,  # Search image size during tracking

  # Configurations for upsampling score maps
  'upsample_method': 'bicubic',
  'upsample_factor': 16,

  # Configurations for searching scales
  'num_scales': 3,  # Number of scales to search
  'scale_step': 1.0375,  # Scale changes between different scale search
  'scale_damp': 0.59,  # Damping factor for scale update
  'scale_penalty': 0.9745,  # Score penalty for scale change

  # Configurations for penalizing large displacement from the center
  'window_influence': 0.176,

  'include_first': False, # If track the first frame
}

In [0]:
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright @ 2017 bily     Huazhong University of Science and Technology
#

"""Default configurations of model specification, training and tracking
For most of the time, DO NOT modify the configurations within this file.
Use the configurations here as the default configurations and only update
them following the examples in the `experiments` directory.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os.path as osp

LOG_DIR = 'Logs/SiamFC'  # where checkpoints, logs are saved
RUN_NAME = 'SiamFC-3s-color-scratch'  # identifier of the experiment

MODEL_CONFIG = {
  'z_image_size': 127,  # Exemplar image size

  'embed_config': {'embedding_name': 'convolutional_alexnet',
                   'embedding_checkpoint_file': None,  # mat file path of the pretrained embedding model.
                   'train_embedding': True,
                   'init_method': 'kaiming_normal',
                   'use_bn': True,
                   'bn_scale': True,
                   'bn_momentum': 0.05,
                   'bn_epsilon': 1e-6,
                   'embedding_feature_num': 256,
                   'weight_decay': 5e-4,
                   'stride': 8, },

  'adjust_response_config': {'train_bias': True,
                             'scale': 1e-3, },
}

TRAIN_CONFIG = {
  'train_dir': osp.join(LOG_DIR, 'track_model_checkpoints', RUN_NAME),

  'seed': 123,  # fix seed for reproducing experiments

  'train_data_config': {'input_imdb': 'data/train_imdb.pickle',
                        'preprocessing_name': 'siamese_fc_color',
                        'num_examples_per_epoch': 5.32e4,
                        'epoch': 50,
                        'batch_size': 8,
                        'max_frame_dist': 100,  # Maximum distance between any two random frames draw from videos.
                        'prefetch_threads': 4,
                        'prefetch_capacity': 15 * 8, },  # The maximum elements number in the data loading queue

  'validation_data_config': {'input_imdb': 'data/validation_imdb.pickle',
                             'preprocessing_name': 'None',
                             'batch_size': 8,
                             'max_frame_dist': 100,  # Maximum distance between any two random frames draw from videos.
                             'prefetch_threads': 1,
                             'prefetch_capacity': 15 * 8, },  # The maximum elements number in the data loading queue

  # Configurations for generating groundtruth maps
  'gt_config': {'rPos': 16,
                'rNeg': 0, },

  # Optimizer for training the model.
  'optimizer_config': {'optimizer': 'MOMENTUM',  # SGD and MOMENTUM are supported
                       'momentum': 0.9,
                       'use_nesterov': False, },

  # Learning rate configs
  'lr_config': {'policy': 'exponential',
                'initial_lr': 0.01,
                'num_epochs_per_decay': 1,
                'lr_decay_factor': 0.8685113737513527,
                'staircase': True, },

  # If not None, clip gradients to this value.
  'clip_gradients': None,

  # Frequency at which loss and global step are logged
  'log_every_n_steps': 10,

  # Frequency to save model
  'save_model_every_n_step': 5.32e4 // 8,  # save model every epoch

  # How many model checkpoints to keep. No limit if None.
  'max_checkpoints_to_keep': None,
}

TRACK_CONFIG = {
  # Directory for saving log files during tracking.
  'log_dir': osp.join(LOG_DIR, 'track_model_inference', RUN_NAME),

  # Logging level of inference, use 1 for detailed inspection. 0 for speed.
  'log_level': 0,

  'x_image_size': 255,  # Search image size during tracking

  # Configurations for upsampling score maps
  'upsample_method': 'bicubic',
  'upsample_factor': 16,

  # Configurations for searching scales
  'num_scales': 3,  # Number of scales to search
  'scale_step': 1.0375,  # Scale changes between different scale search
  'scale_damp': 0.59,  # Damping factor for scale update
  'scale_penalty': 0.9745,  # Score penalty for scale change

  # Configurations for penalizing large displacement from the center
  'window_influence': 0.176,

  'include_first': False, # If track the first frame
}

In [27]:
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright © 2017 bily     Huazhong University of Science and Technology
#
# Distributed under terms of the MIT license.

"""Train the model"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import logging
import os
import os.path as osp
import random
import time
from datetime import datetime

import numpy as np
import tensorflow as tf
from sacred import Experiment
from sacred.observers import FileStorageObserver

ex = Experiment(configuration.RUN_NAME)
ex.observers.append(FileStorageObserver.create(osp.join(configuration.LOG_DIR, 'sacred')))



@ex.config
def configurations():
  # Add configurations for current script, for more details please see the documentation of `sacred`.
  # REFER: http://sacred.readthedocs.io/en/latest/index.html
  model_config = configuration.MODEL_CONFIG
  train_config = configuration.TRAIN_CONFIG
  track_config = configuration.TRACK_CONFIG


def _configure_learning_rate(train_config, global_step):
  lr_config = train_config['lr_config']

  num_batches_per_epoch = \
    int(train_config['train_data_config']['num_examples_per_epoch'] / train_config['train_data_config']['batch_size'])

  lr_policy = lr_config['policy']
  if lr_policy == 'piecewise_constant':
    lr_boundaries = [int(e * num_batches_per_epoch) for e in lr_config['lr_boundaries']]
    return tf.train.piecewise_constant(global_step,
                                       lr_boundaries,
                                       lr_config['lr_values'])
  elif lr_policy == 'exponential':
    decay_steps = int(num_batches_per_epoch) * lr_config['num_epochs_per_decay']
    return tf.train.exponential_decay(lr_config['initial_lr'],
                                      global_step,
                                      decay_steps=decay_steps,
                                      decay_rate=lr_config['lr_decay_factor'],
                                      staircase=lr_config['staircase'])
  elif lr_policy == 'cosine':
    T_total = train_config['train_data_config']['epoch'] * num_batches_per_epoch
    return 0.5 * lr_config['initial_lr'] * (1 + tf.cos(np.pi * tf.to_float(global_step) / T_total))
  else:
    raise ValueError('Learning rate policy [%s] was not recognized', lr_policy)


def _configure_optimizer(train_config, learning_rate):
  optimizer_config = train_config['optimizer_config']
  optimizer_name = optimizer_config['optimizer'].upper()
  if optimizer_name == 'MOMENTUM':
    optimizer = tf.train.MomentumOptimizer(
      learning_rate,
      momentum=optimizer_config['momentum'],
      use_nesterov=optimizer_config['use_nesterov'],
      name='Momentum')
  elif optimizer_name == 'SGD':
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
  else:
    raise ValueError('Optimizer [%s] was not recognized', optimizer_config['optimizer'])
  return optimizer


@ex.automain
def main(model_config, train_config, track_config):
  os.environ['CUDA_VISIBLE_DEVICES'] = auto_select_gpu()

  # Create training directory which will be used to save: configurations, model files, TensorBoard logs
  train_dir = train_config['train_dir']
  if not osp.isdir(train_dir):
    logging.info('Creating training directory: %s', train_dir)
    mkdir_p(train_dir)

  g = tf.Graph()
  with g.as_default():
    # Set fixed seed for reproducible experiments
    random.seed(train_config['seed'])
    np.random.seed(train_config['seed'])
    tf.set_random_seed(train_config['seed'])

    # Build the training and validation model
    model = siamese_model.SiameseModel(model_config, train_config, mode='train')
    model.build()
    model_va = siamese_model.SiameseModel(model_config, train_config, mode='validation')
    model_va.build(reuse=True)

    # Save configurations for future reference
    save_cfgs(train_dir, model_config, train_config, track_config)

    learning_rate = _configure_learning_rate(train_config, model.global_step)
    optimizer = _configure_optimizer(train_config, learning_rate)
    tf.summary.scalar('learning_rate', learning_rate)

    # Set up the training ops
    opt_op = tf.contrib.layers.optimize_loss(
      loss=model.total_loss,
      global_step=model.global_step,
      learning_rate=learning_rate,
      optimizer=optimizer,
      clip_gradients=train_config['clip_gradients'],
      learning_rate_decay_fn=None,
      summaries=['learning_rate'])

    with tf.control_dependencies([opt_op]):
      train_op = tf.no_op(name='train')

    saver = tf.train.Saver(tf.global_variables(),
                           max_to_keep=train_config['max_checkpoints_to_keep'])

    summary_writer = tf.summary.FileWriter(train_dir, g)
    summary_op = tf.summary.merge_all()

    global_variables_init_op = tf.global_variables_initializer()
    local_variables_init_op = tf.local_variables_initializer()
    g.finalize()  # Finalize graph to avoid adding ops by mistake

    # Dynamically allocate GPU memory
    gpu_options = tf.GPUOptions(allow_growth=True)
    sess_config = tf.ConfigProto(gpu_options=gpu_options)

    sess = tf.Session(config=sess_config)
    model_path = tf.train.latest_checkpoint(train_config['train_dir'])

    if not model_path:
      sess.run(global_variables_init_op)
      sess.run(local_variables_init_op)
      start_step = 0

      if model_config['embed_config']['embedding_checkpoint_file']:
        model.init_fn(sess)
    else:
      logging.info('Restore from last checkpoint: {}'.format(model_path))
      sess.run(local_variables_init_op)
      saver.restore(sess, model_path)
      start_step = tf.train.global_step(sess, model.global_step.name) + 1

    # Training loop
    data_config = train_config['train_data_config']
    total_steps = int(data_config['epoch'] *
                      data_config['num_examples_per_epoch'] /
                      data_config['batch_size'])
    logging.info('Train for {} steps'.format(total_steps))
    for step in range(start_step, total_steps):
      start_time = time.time()
      _, loss, batch_loss = sess.run([train_op, model.total_loss, model.batch_loss])
      duration = time.time() - start_time

      if step % 10 == 0:
        examples_per_sec = data_config['batch_size'] / float(duration)
        time_remain = data_config['batch_size'] * (total_steps - step) / examples_per_sec
        m, s = divmod(time_remain, 60)
        h, m = divmod(m, 60)
        format_str = ('%s: step %d, total loss = %.2f, batch loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch; %dh:%02dm:%02ds remains)')
        logging.info(format_str % (datetime.now(), step, loss, batch_loss,
                                   examples_per_sec, duration, h, m, s))

      if step % 100 == 0:
        summary_str = sess.run(summary_op)
        summary_writer.add_summary(summary_str, step)

      if step % train_config['save_model_every_n_step'] == 0 or (step + 1) == total_steps:
        checkpoint_path = osp.join(train_config['train_dir'], 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)

NameError: ignored

In [17]:
!pip install sacred

# 用于数据库连接
!pip install numpy pymongo


Collecting sacred
[?25l  Downloading https://files.pythonhosted.org/packages/f9/7f/c5679977f1eceac432c59cc92bd1ddb7272c282c3db8eb846d0e1c03b6a0/sacred-0.8.1.tar.gz (90kB)
[K     |███▋                            | 10kB 15.7MB/s eta 0:00:01[K     |███████▎                        | 20kB 1.7MB/s eta 0:00:01[K     |██████████▉                     | 30kB 2.2MB/s eta 0:00:01[K     |██████████████▌                 | 40kB 1.6MB/s eta 0:00:01[K     |██████████████████              | 51kB 1.9MB/s eta 0:00:01[K     |█████████████████████▊          | 61kB 2.2MB/s eta 0:00:01[K     |█████████████████████████▎      | 71kB 2.4MB/s eta 0:00:01[K     |█████████████████████████████   | 81kB 2.7MB/s eta 0:00:01[K     |████████████████████████████████| 92kB 2.4MB/s 
Collecting jsonpickle<2.0,>=1.2
  Downloading https://files.pythonhosted.org/packages/cb/e0/54421447d55bc7304a785be9ec81f28e1e8a8c6619b0e35154ed8f1b7761/jsonpickle-1.4-py2.py3-none-any.whl
Collecting munch<3.0,>=2.0.2
  Downl