From a1f6dd881d8437b7aac68e99eb43cfe648a64c2c Mon Sep 17 00:00:00 2001
From: zsdonghao <dhsig552@163.com>
Date: Sat, 11 May 2019 12:09:20 +0800
Subject: [PATCH] remove tash

---
 _backup/download.py        |  168 --
 _tensorlayer/__init__.py   |   31 -
 _tensorlayer/activation.py |  109 -
 _tensorlayer/cost.py       |  562 ----
 _tensorlayer/db.py         |  449 ---
 _tensorlayer/files.py      |  858 ------
 _tensorlayer/iterate.py    |  432 ---
 _tensorlayer/layers.py     | 5530 ------------------------------------
 _tensorlayer/nlp.py        |  932 ------
 _tensorlayer/ops.py        |  219 --
 _tensorlayer/prepro.py     | 1542 ----------
 _tensorlayer/rein.py       |   85 -
 _tensorlayer/utils.py      |  516 ----
 _tensorlayer/visualize.py  |  340 ---
 14 files changed, 11773 deletions(-)
 delete mode 100755 _backup/download.py
 delete mode 100755 _tensorlayer/__init__.py
 delete mode 100755 _tensorlayer/activation.py
 delete mode 100755 _tensorlayer/cost.py
 delete mode 100755 _tensorlayer/db.py
 delete mode 100755 _tensorlayer/files.py
 delete mode 100755 _tensorlayer/iterate.py
 delete mode 100755 _tensorlayer/layers.py
 delete mode 100755 _tensorlayer/nlp.py
 delete mode 100755 _tensorlayer/ops.py
 delete mode 100755 _tensorlayer/prepro.py
 delete mode 100755 _tensorlayer/rein.py
 delete mode 100755 _tensorlayer/utils.py
 delete mode 100755 _tensorlayer/visualize.py

diff --git a/_backup/download.py b/_backup/download.py
deleted file mode 100755
index d9dea5c..0000000
--- a/_backup/download.py
+++ /dev/null
@@ -1,168 +0,0 @@
-"""
-Modification of https://github.com/stanfordnlp/treelstm/blob/master/scripts/download.py
-
-Downloads the following:
-- Celeb-A dataset
-- LSUN dataset
-- MNIST dataset
-"""
-from __future__ import print_function
-import os, sys, gzip, json, shutil, zipfile, argparse, subprocess, requests
-from tqdm import tqdm
-from six.moves import urllib
-
-parser = argparse.ArgumentParser(description='Download dataset for DCGAN.')
-parser.add_argument('datasets', metavar='N', type=str, nargs='+', choices=['celebA', 'lsun', 'mnist'],
-                   help='name of dataset to download [celebA, lsun, mnist]')
-
-def download(url, dirpath):
-    filename = url.split('/')[-1]
-    filepath = os.path.join(dirpath, filename)
-    u = urllib.request.urlopen(url)
-    f = open(filepath, 'wb')
-    filesize = int(u.headers["Content-Length"])
-    print("Downloading: %s Bytes: %s" % (filename, filesize))
-
-    downloaded = 0
-    block_sz = 8192
-    status_width = 70
-    while True:
-        buf = u.read(block_sz)
-        if not buf:
-            print('')
-            break
-        else:
-            print('', end='\r')
-        downloaded += len(buf)
-        f.write(buf)
-        status = (("[%-" + str(status_width + 1) + "s] %3.2f%%") %
-            ('=' * int(float(downloaded) / filesize * status_width) + '>', downloaded * 100. / filesize))
-        print(status, end='')
-        sys.stdout.flush()
-    f.close()
-    return filepath
-
-def download_file_from_google_drive(id, destination):
-    URL = "https://docs.google.com/uc?export=download"
-    session = requests.Session()
-
-    response = session.get(URL, params={ 'id': id }, stream=True)
-    token = get_confirm_token(response)
-
-    if token:
-        params = { 'id' : id, 'confirm' : token }
-        response = session.get(URL, params=params, stream=True)
-
-    save_response_content(response, destination)
-
-def get_confirm_token(response):
-    for key, value in response.cookies.items():
-        if key.startswith('download_warning'):
-            return value
-    return None
-
-def save_response_content(response, destination, chunk_size=32*1024):
-    total_size = int(response.headers.get('content-length', 0))
-    with open(destination, "wb") as f:
-        for chunk in tqdm(response.iter_content(chunk_size), total=total_size,
-                unit='B', unit_scale=True, desc=destination):
-            if chunk: # filter out keep-alive new chunks
-                f.write(chunk)
-
-def unzip(filepath):
-    print("Extracting: " + filepath)
-    dirpath = os.path.dirname(filepath)
-    with zipfile.ZipFile(filepath) as zf:
-        zf.extractall(dirpath)
-    os.remove(filepath)
-
-def download_celeb_a(dirpath):
-	data_dir = 'celebA'
-	if os.path.exists(os.path.join(dirpath, data_dir)):
-		print('Found Celeb-A - skip')
-		return
-
-	filename, drive_id  = "img_align_celeba.zip", "0B7EVK8r0v71pZjFTYXZWM3FlRnM"
-	save_path = os.path.join(dirpath, filename)
-
-	if os.path.exists(save_path):
-		print('[*] {} already exists'.format(save_path))
-	else:
-		download_file_from_google_drive(drive_id, save_path)
-
-	zip_dir = ''
-	with zipfile.ZipFile(save_path) as zf:
-		zip_dir = zf.namelist()[0]
-		zf.extractall(dirpath)
-	os.remove(save_path)
-	os.rename(os.path.join(dirpath, zip_dir), os.path.join(dirpath, data_dir))
-
-def _list_categories(tag):
-    url = 'http://lsun.cs.princeton.edu/htbin/list.cgi?tag=' + tag
-    f = urllib.request.urlopen(url)
-    return json.loads(f.read())
-
-def _download_lsun(out_dir, category, set_name, tag):
-    url = 'http://lsun.cs.princeton.edu/htbin/download.cgi?tag={tag}' \
-          '&category={category}&set={set_name}'.format(**locals())
-    print(url)
-    if set_name == 'test':
-        out_name = 'test_lmdb.zip'
-    else:
-        out_name = '{category}_{set_name}_lmdb.zip'.format(**locals())
-    out_path = os.path.join(out_dir, out_name)
-    cmd = ['curl', url, '-o', out_path]
-    print('Downloading', category, set_name, 'set')
-    subprocess.call(cmd)
-
-def download_lsun(dirpath):
-    data_dir = os.path.join(dirpath, 'lsun')
-    if os.path.exists(data_dir):
-        print('Found LSUN - skip')
-        return
-    else:
-        os.mkdir(data_dir)
-
-    tag = 'latest'
-    #categories = _list_categories(tag)
-    categories = ['bedroom']
-
-    for category in categories:
-        _download_lsun(data_dir, category, 'train', tag)
-        _download_lsun(data_dir, category, 'val', tag)
-    _download_lsun(data_dir, '', 'test', tag)
-
-def download_mnist(dirpath):
-    data_dir = os.path.join(dirpath, 'mnist')
-    if os.path.exists(data_dir):
-        print('Found MNIST - skip')
-        return
-    else:
-        os.mkdir(data_dir)
-    url_base = 'http://yann.lecun.com/exdb/mnist/'
-    file_names = ['train-images-idx3-ubyte.gz','train-labels-idx1-ubyte.gz','t10k-images-idx3-ubyte.gz','t10k-labels-idx1-ubyte.gz']
-    for file_name in file_names:
-        url = (url_base+file_name).format(**locals())
-        print(url)
-        out_path = os.path.join(data_dir,file_name)
-        cmd = ['curl', url, '-o', out_path]
-        print('Downloading ', file_name)
-        subprocess.call(cmd)
-        cmd = ['gzip', '-d', out_path]
-        print('Decompressing ', file_name)
-        subprocess.call(cmd)
-
-def prepare_data_dir(path = './data'):
-    if not os.path.exists(path):
-        os.mkdir(path)
-
-if __name__ == '__main__':
-    args = parser.parse_args()
-    prepare_data_dir()
-
-    if 'celebA' in args.datasets:
-        download_celeb_a('./data')
-    if 'lsun' in args.datasets:
-        download_lsun('./data')
-    if 'mnist' in args.datasets:
-        download_mnist('./data')
diff --git a/_tensorlayer/__init__.py b/_tensorlayer/__init__.py
deleted file mode 100755
index dd41d47..0000000
--- a/_tensorlayer/__init__.py
+++ /dev/null
@@ -1,31 +0,0 @@
-"""
-Deep learning and Reinforcement learning library for Researchers and Engineers
-"""
-# from __future__ import absolute_import
-
-
-try:
-    install_instr = "Please make sure you install a recent enough version of TensorFlow."
-    import tensorflow
-except ImportError:
-    raise ImportError("__init__.py : Could not import TensorFlow." + install_instr)
-
-from . import activation
-act = activation
-from . import cost
-from . import files
-# from . import init
-from . import iterate
-from . import layers
-from . import ops
-from . import utils
-from . import visualize
-from . import prepro        # was preprocesse
-from . import nlp
-from . import rein
-
-
-__version__ = "1.4.2"
-
-global_flag = {}
-global_dict = {}
diff --git a/_tensorlayer/activation.py b/_tensorlayer/activation.py
deleted file mode 100755
index 7b6b640..0000000
--- a/_tensorlayer/activation.py
+++ /dev/null
@@ -1,109 +0,0 @@
-#! /usr/bin/python
-# -*- coding: utf8 -*-
-
-
-
-import tensorflow as tf
-
-def identity(x, name=None):
-    """The identity activation function, Shortcut is ``linear``.
-
-    Parameters
-    ----------
-    x : a tensor input
-        input(s)
-
-
-    Returns
-    --------
-    A `Tensor` with the same type as `x`.
-    """
-    return x
-
-# Shortcut
-linear = identity
-
-def ramp(x=None, v_min=0, v_max=1, name=None):
-    """The ramp activation function.
-
-    Parameters
-    ----------
-    x : a tensor input
-        input(s)
-    v_min : float
-        if input(s) smaller than v_min, change inputs to v_min
-    v_max : float
-        if input(s) greater than v_max, change inputs to v_max
-    name : a string or None
-        An optional name to attach to this activation function.
-
-
-    Returns
-    --------
-    A `Tensor` with the same type as `x`.
-    """
-    return tf.clip_by_value(x, clip_value_min=v_min, clip_value_max=v_max, name=name)
-
-def leaky_relu(x=None, alpha=0.1, name="LeakyReLU"):
-    """The LeakyReLU, Shortcut is ``lrelu``.
-
-    Modified version of ReLU, introducing a nonzero gradient for negative
-    input.
-
-    Parameters
-    ----------
-    x : A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`,
-        `int16`, or `int8`.
-    alpha : `float`. slope.
-    name : a string or None
-        An optional name to attach to this activation function.
-
-    Examples
-    ---------
-    >>> network = tl.layers.DenseLayer(network, n_units=100, name = 'dense_lrelu',
-    ...                 act= lambda x : tl.act.lrelu(x, 0.2))
-
-    References
-    ------------
-    - `Rectifier Nonlinearities Improve Neural Network Acoustic Models, Maas et al. (2013) <http://web.stanford.edu/~awni/papers/relu_hybrid_icml2013_final.pdf>`_
-    """
-    with tf.name_scope(name) as scope:
-        # x = tf.nn.relu(x)
-        # m_x = tf.nn.relu(-x)
-        # x -= alpha * m_x
-        x = tf.maximum(x, alpha * x)
-    return x
-
-#Shortcut
-lrelu = leaky_relu
-
-def pixel_wise_softmax(output, name='pixel_wise_softmax'):
-    """Return the softmax outputs of images, every pixels have multiple label, the sum of a pixel is 1.
-    Usually be used for image segmentation.
-
-    Parameters
-    ------------
-    output : tensor
-        - For 2d image, 4D tensor [batch_size, height, weight, channel], channel >= 2.
-        - For 3d image, 5D tensor [batch_size, depth, height, weight, channel], channel >= 2.
-
-    Examples
-    ---------
-    >>> outputs = pixel_wise_softmax(network.outputs)
-    >>> dice_loss = 1 - dice_coe(outputs, y_, epsilon=1e-5)
-
-    References
-    -----------
-    - `tf.reverse <https://www.tensorflow.org/versions/master/api_docs/python/array_ops.html#reverse>`_
-    """
-    with tf.name_scope(name) as scope:
-        return tf.nn.softmax(output)
-        ## old implementation
-        # exp_map = tf.exp(output)
-        # if output.get_shape().ndims == 4:   # 2d image
-        #     evidence = tf.add(exp_map, tf.reverse(exp_map, [False, False, False, True]))
-        # elif output.get_shape().ndims == 5: # 3d image
-        #     evidence = tf.add(exp_map, tf.reverse(exp_map, [False, False, False, False, True]))
-        # else:
-        #     raise Exception("output parameters should be 2d or 3d image, not %s" % str(output._shape))
-        # return tf.div(exp_map, evidence)
diff --git a/_tensorlayer/cost.py b/_tensorlayer/cost.py
deleted file mode 100755
index 811464f..0000000
--- a/_tensorlayer/cost.py
+++ /dev/null
@@ -1,562 +0,0 @@
-#! /usr/bin/python
-# -*- coding: utf8 -*-
-
-
-
-import tensorflow as tf
-import numbers
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import standard_ops
-
-## Cost Functions
-
-def cross_entropy(output, target, name=None):
-    """It is a softmax cross-entropy operation, returns the TensorFlow expression of cross-entropy of two distributions, implement
-    softmax internally. See ``tf.nn.sparse_softmax_cross_entropy_with_logits``.
-
-    Parameters
-    ----------
-    output : Tensorflow variable
-        A distribution with shape: [batch_size, n_feature].
-    target : Tensorflow variable
-        A batch of index with shape: [batch_size, ].
-    name : string
-        Name of this loss.
-
-    Examples
-    --------
-    >>> ce = tl.cost.cross_entropy(y_logits, y_target_logits, 'my_loss')
-
-    References
-    -----------
-    - About cross-entropy: `wiki <https://en.wikipedia.org/wiki/Cross_entropy>`_.\n
-    - The code is borrowed from: `here <https://en.wikipedia.org/wiki/Cross_entropy>`_.
-    """
-    # try: # old
-    #     return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, targets=target))
-    # except: # TF 1.0
-    #     assert name is not None, "Please give a unique name to tl.cost.cross_entropy for TF1.0+"
-    return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target, logits=output, name=name))
-
-def sigmoid_cross_entropy(output, target, name=None):
-    """It is a sigmoid cross-entropy operation, see ``tf.nn.sigmoid_cross_entropy_with_logits``.
-    """
-    try: # TF 1.0
-        return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output, name=name))
-    except:
-        return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output, targets=target))
-
-
-def binary_cross_entropy(output, target, epsilon=1e-8, name='bce_loss'):
-    """Computes binary cross entropy given `output`.
-
-    For brevity, let `x = output`, `z = target`.  The binary cross entropy loss is
-
-        loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i]))
-
-    Parameters
-    ----------
-    output : tensor of type `float32` or `float64`.
-    target : tensor of the same type and shape as `output`.
-    epsilon : float
-        A small value to avoid output is zero.
-    name : string
-        An optional name to attach to this layer.
-
-    References
-    -----------
-    - `DRAW <https://github.com/ericjang/draw/blob/master/draw.py#L73>`_
-    """
-#     from tensorflow.python.framework import ops
-#     with ops.op_scope([output, target], name, "bce_loss") as name:
-#         output = ops.convert_to_tensor(output, name="preds")
-#         target = ops.convert_to_tensor(targets, name="target")
-    with tf.name_scope(name):
-        return tf.reduce_mean(-(target * tf.log(output + epsilon) +
-                              (1. - target) * tf.log(1. - output + epsilon)))
-
-
-def mean_squared_error(output, target, is_mean=False):
-    """Return the TensorFlow expression of mean-squre-error of two distributions.
-
-    Parameters
-    ----------
-    output : 2D or 4D tensor.
-    target : 2D or 4D tensor.
-    is_mean : boolean, if True, use ``tf.reduce_mean`` to compute the loss of one data, otherwise, use ``tf.reduce_sum`` (default).
-
-    References
-    ------------
-    - `Wiki Mean Squared Error <https://en.wikipedia.org/wiki/Mean_squared_error>`_
-    """
-    with tf.name_scope("mean_squared_error_loss"):
-        if output.get_shape().ndims == 2:   # [batch_size, n_feature]
-            if is_mean:
-                mse = tf.reduce_mean(tf.reduce_mean(tf.squared_difference(output, target), 1))
-            else:
-                mse = tf.reduce_mean(tf.reduce_sum(tf.squared_difference(output, target), 1))
-        elif output.get_shape().ndims == 4: # [batch_size, w, h, c]
-            if is_mean:
-                mse = tf.reduce_mean(tf.reduce_mean(tf.squared_difference(output, target), [1, 2, 3]))
-            else:
-                mse = tf.reduce_mean(tf.reduce_sum(tf.squared_difference(output, target), [1, 2, 3]))
-        return mse
-
-
-
-def dice_coe(output, target, epsilon=1e-10):
-    """Sørensen–Dice coefficient for comparing the similarity of two distributions,
-    usually be used for binary image segmentation i.e. labels are binary.
-    The coefficient = [0, 1], 1 if totally match.
-
-    Parameters
-    -----------
-    output : tensor
-        A distribution with shape: [batch_size, ....], (any dimensions).
-    target : tensor
-        A distribution with shape: [batch_size, ....], (any dimensions).
-    epsilon : float
-        An optional name to attach to this layer.
-
-    Examples
-    ---------
-    >>> outputs = tl.act.pixel_wise_softmax(network.outputs)
-    >>> dice_loss = 1 - tl.cost.dice_coe(outputs, y_, epsilon=1e-5)
-
-    References
-    -----------
-    - `wiki-dice <https://en.wikipedia.org/wiki/Sørensen–Dice_coefficient>`_
-    """
-    # inse = tf.reduce_sum( tf.mul(output, target) )
-    # l = tf.reduce_sum( tf.mul(output, output) )
-    # r = tf.reduce_sum( tf.mul(target, target) )
-    inse = tf.reduce_sum( output * target )
-    l = tf.reduce_sum( output * output )
-    r = tf.reduce_sum( target * target )
-    dice = 2 * (inse) / (l + r)
-    if epsilon == 0:
-        return dice
-    else:
-        return tf.clip_by_value(dice, 0, 1.0-epsilon)
-
-
-def dice_hard_coe(output, target, epsilon=1e-10):
-    """Non-differentiable Sørensen–Dice coefficient for comparing the similarity of two distributions,
-    usually be used for binary image segmentation i.e. labels are binary.
-    The coefficient = [0, 1], 1 if totally match.
-
-    Parameters
-    -----------
-    output : tensor
-        A distribution with shape: [batch_size, ....], (any dimensions).
-    target : tensor
-        A distribution with shape: [batch_size, ....], (any dimensions).
-    epsilon : float
-        An optional name to attach to this layer.
-
-    Examples
-    ---------
-    >>> outputs = pixel_wise_softmax(network.outputs)
-    >>> dice_loss = 1 - dice_coe(outputs, y_, epsilon=1e-5)
-
-    References
-    -----------
-    - `wiki-dice <https://en.wikipedia.org/wiki/Sørensen–Dice_coefficient>`_
-    """
-    output = tf.cast(output > 0.5, dtype=tf.float32)
-    target = tf.cast(target > 0.5, dtype=tf.float32)
-    inse = tf.reduce_sum( output * target )
-    l = tf.reduce_sum( output * output )
-    r = tf.reduce_sum( target * target )
-    dice = 2 * (inse) / (l + r)
-    if epsilon == 0:
-        return dice
-    else:
-        return tf.clip_by_value(dice, 0, 1.0-epsilon)
-
-def iou_coe(output, target, threshold=0.5, epsilon=1e-10):
-    """Non-differentiable Intersection over Union, usually be used for evaluating binary image segmentation.
-    The coefficient = [0, 1], 1 means totally match.
-
-    Parameters
-    -----------
-    output : tensor
-        A distribution with shape: [batch_size, ....], (any dimensions).
-    target : tensor
-        A distribution with shape: [batch_size, ....], (any dimensions).
-    threshold : float
-        The threshold value to be true.
-    epsilon : float
-        A small value to avoid zero denominator when both output and target output nothing.
-
-    Examples
-    ---------
-    >>> outputs = tl.act.pixel_wise_softmax(network.outputs)
-    >>> iou = tl.cost.iou_coe(outputs[:,:,:,0], y_[:,:,:,0])
-
-    Notes
-    ------
-    - IOU cannot be used as training loss, people usually use dice coefficient for training, and IOU for evaluating.
-    """
-    pre = tf.cast(output > threshold, dtype=tf.float32)
-    truth = tf.cast(target > threshold, dtype=tf.float32)
-    intersection = tf.reduce_sum(pre * truth)
-    union = tf.reduce_sum(tf.cast((pre + truth) > threshold, dtype=tf.float32))
-    return tf.reduce_sum(intersection) / (tf.reduce_sum(union) + epsilon)
-
-
-def cross_entropy_seq(logits, target_seqs, batch_size=None):#, batch_size=1, num_steps=None):
-    """Returns the expression of cross-entropy of two sequences, implement
-    softmax internally. Normally be used for Fixed Length RNN outputs.
-
-    Parameters
-    ----------
-    logits : Tensorflow variable
-        2D tensor, ``network.outputs``, [batch_size*n_steps (n_examples), number of output units]
-    target_seqs : Tensorflow variable
-        target : 2D tensor [batch_size, n_steps], if the number of step is dynamic, please use ``cross_entropy_seq_with_mask`` instead.
-    batch_size : None or int.
-        If not None, the return cost will be divided by batch_size.
-
-    Examples
-    --------
-    >>> see PTB tutorial for more details
-    >>> input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
-    >>> targets = tf.placeholder(tf.int32, [batch_size, num_steps])
-    >>> cost = tf.cost.cross_entropy_seq(network.outputs, targets)
-    """
-    try: # TF 1.0
-        sequence_loss_by_example_fn = tf.contrib.legacy_seq2seq.sequence_loss_by_example
-    except:
-        sequence_loss_by_example_fn = tf.nn.seq2seq.sequence_loss_by_example
-
-    loss = sequence_loss_by_example_fn(
-        [logits],
-        [tf.reshape(target_seqs, [-1])],
-        [tf.ones_like(tf.reshape(target_seqs, [-1]), dtype=tf.float32)])
-        # [tf.ones([batch_size * num_steps])])
-    cost = tf.reduce_sum(loss) #/ batch_size
-    if batch_size is not None:
-        cost = cost / batch_size
-    return cost
-
-
-def cross_entropy_seq_with_mask(logits, target_seqs, input_mask, return_details=False, name=None):
-    """Returns the expression of cross-entropy of two sequences, implement
-    softmax internally. Normally be used for Dynamic RNN outputs.
-
-    Parameters
-    -----------
-    logits : network identity outputs
-        2D tensor, ``network.outputs``, [batch_size, number of output units].
-    target_seqs : int of tensor, like word ID.
-        [batch_size, ?]
-    input_mask : the mask to compute loss
-        The same size with target_seqs, normally 0 and 1.
-    return_details : boolean
-        - If False (default), only returns the loss.
-        - If True, returns the loss, losses, weights and targets (reshape to one vetcor).
-
-    Examples
-    --------
-    - see Image Captioning Example.
-    """
-    targets = tf.reshape(target_seqs, [-1])   # to one vector
-    weights = tf.to_float(tf.reshape(input_mask, [-1]))   # to one vector like targets
-    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets, name=name) * weights
-    #losses = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets, name=name)) # for TF1.0 and others
-
-    try: ## TF1.0
-        loss = tf.divide(tf.reduce_sum(losses),   # loss from mask. reduce_sum before element-wise mul with mask !!
-                        tf.reduce_sum(weights),
-                        name="seq_loss_with_mask")
-    except: ## TF0.12
-        loss = tf.div(tf.reduce_sum(losses),   # loss from mask. reduce_sum before element-wise mul with mask !!
-                        tf.reduce_sum(weights),
-                        name="seq_loss_with_mask")
-    if return_details:
-        return loss, losses, weights, targets
-    else:
-        return loss
-
-
-def cosine_similarity(v1, v2):
-    """Cosine similarity [-1, 1], `wiki <https://en.wikipedia.org/wiki/Cosine_similarity>`_.
-
-    Parameters
-    -----------
-    v1, v2 : tensor of [batch_size, n_feature], with the same number of features.
-
-    Returns
-    -----------
-    a tensor of [batch_size, ]
-    """
-    try: ## TF1.0
-        cost = tf.reduce_sum(tf.multiply(v1, v2), 1) / (tf.sqrt(tf.reduce_sum(tf.multiply(v1, v1), 1)) * tf.sqrt(tf.reduce_sum(tf.multiply(v2, v2), 1)))
-    except: ## TF0.12
-        cost = tf.reduce_sum(tf.mul(v1, v2), reduction_indices=1) / (tf.sqrt(tf.reduce_sum(tf.mul(v1, v1), reduction_indices=1)) * tf.sqrt(tf.reduce_sum(tf.mul(v2, v2), reduction_indices=1)))
-    return cost
-
-
-## Regularization Functions
-def li_regularizer(scale, scope=None):
-  """li regularization removes the neurons of previous layer, `i` represents `inputs`.\n
-  Returns a function that can be used to apply group li regularization to weights.\n
-  The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
-
-  Parameters
-  ----------
-  scale : float
-    A scalar multiplier `Tensor`. 0.0 disables the regularizer.
-  scope: An optional scope name for TF12+.
-
-  Returns
-  --------
-  A function with signature `li(weights, name=None)` that apply Li regularization.
-
-  Raises
-  ------
-  ValueError : if scale is outside of the range [0.0, 1.0] or if scale is not a float.
-  """
-  import numbers
-  from tensorflow.python.framework import ops
-  from tensorflow.python.ops import standard_ops
-  # from tensorflow.python.platform import tf_logging as logging
-
-  if isinstance(scale, numbers.Integral):
-    raise ValueError('scale cannot be an integer: %s' % scale)
-  if isinstance(scale, numbers.Real):
-    if scale < 0.:
-      raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
-                       scale)
-    if scale >= 1.:
-      raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
-                       scale)
-    if scale == 0.:
-      logging.info('Scale of 0 disables regularizer.')
-      return lambda _, name=None: None
-
-  def li(weights, name=None):
-    """Applies li regularization to weights."""
-    with tf.name_scope('li_regularizer') as scope:
-        my_scale = ops.convert_to_tensor(scale,
-                                           dtype=weights.dtype.base_dtype,
-                                           name='scale')
-        if tf.__version__ <= '0.12':
-            standard_ops_fn = standard_ops.mul
-        else:
-            standard_ops_fn = standard_ops.multiply
-            return standard_ops_fn(
-              my_scale,
-              standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 1))),
-              name=scope)
-  return li
-
-
-
-def lo_regularizer(scale, scope=None):
-  """lo regularization removes the neurons of current layer, `o` represents `outputs`\n
-  Returns a function that can be used to apply group lo regularization to weights.\n
-  The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
-
-  Parameters
-  ----------
-  scale : float
-    A scalar multiplier `Tensor`. 0.0 disables the regularizer.
-  scope: An optional scope name for TF12+.
-
-  Returns
-  -------
-  A function with signature `lo(weights, name=None)` that apply Lo regularization.
-
-  Raises
-  ------
-  ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float.
-  """
-  import numbers
-  from tensorflow.python.framework import ops
-  from tensorflow.python.ops import standard_ops
-  # from tensorflow.python.platform import tf_logging as logging
-
-  if isinstance(scale, numbers.Integral):
-    raise ValueError('scale cannot be an integer: %s' % scale)
-  if isinstance(scale, numbers.Real):
-    if scale < 0.:
-      raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
-                       scale)
-    if scale >= 1.:
-      raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
-                       scale)
-    if scale == 0.:
-      logging.info('Scale of 0 disables regularizer.')
-      return lambda _, name=None: None
-
-  def lo(weights, name='lo_regularizer'):
-    """Applies group column regularization to weights."""
-    with tf.name_scope(name) as scope:
-        my_scale = ops.convert_to_tensor(scale,
-                                       dtype=weights.dtype.base_dtype,
-                                       name='scale')
-        if tf.__version__ <= '0.12':
-            standard_ops_fn = standard_ops.mul
-        else:
-            standard_ops_fn = standard_ops.multiply
-        return standard_ops_fn(
-          my_scale,
-          standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 0))),
-          name=scope)
-  return lo
-
-def maxnorm_regularizer(scale=1.0, scope=None):
-  """Max-norm regularization returns a function that can be used
-  to apply max-norm regularization to weights.
-  About max-norm: `wiki <https://en.wikipedia.org/wiki/Matrix_norm#Max_norm>`_.\n
-  The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
-
-  Parameters
-  ----------
-  scale : float
-    A scalar multiplier `Tensor`. 0.0 disables the regularizer.
-  scope: An optional scope name.
-
-  Returns
-  ---------
-  A function with signature `mn(weights, name=None)` that apply Lo regularization.
-
-  Raises
-  --------
-  ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float.
-  """
-  import numbers
-  from tensorflow.python.framework import ops
-  from tensorflow.python.ops import standard_ops
-
-  if isinstance(scale, numbers.Integral):
-    raise ValueError('scale cannot be an integer: %s' % scale)
-  if isinstance(scale, numbers.Real):
-    if scale < 0.:
-      raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
-                       scale)
-    # if scale >= 1.:
-    #   raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
-    #                    scale)
-    if scale == 0.:
-      logging.info('Scale of 0 disables regularizer.')
-      return lambda _, name=None: None
-
-  def mn(weights, name='max_regularizer'):
-    """Applies max-norm regularization to weights."""
-    with tf.name_scope(name) as scope:
-          my_scale = ops.convert_to_tensor(scale,
-                                           dtype=weights.dtype.base_dtype,
-                                           name='scale')
-          if tf.__version__ <= '0.12':
-              standard_ops_fn = standard_ops.mul
-          else:
-              standard_ops_fn = standard_ops.multiply
-          return standard_ops_fn(my_scale, standard_ops.reduce_max(standard_ops.abs(weights)), name=scope)
-  return mn
-
-def maxnorm_o_regularizer(scale, scope):
-  """Max-norm output regularization removes the neurons of current layer.\n
-  Returns a function that can be used to apply max-norm regularization to each column of weight matrix.\n
-  The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
-
-  Parameters
-  ----------
-  scale : float
-    A scalar multiplier `Tensor`. 0.0 disables the regularizer.
-  scope: An optional scope name.
-
-  Returns
-  ---------
-  A function with signature `mn_o(weights, name=None)` that apply Lo regularization.
-
-  Raises
-  ---------
-  ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float.
-  """
-  import numbers
-  from tensorflow.python.framework import ops
-  from tensorflow.python.ops import standard_ops
-
-  if isinstance(scale, numbers.Integral):
-    raise ValueError('scale cannot be an integer: %s' % scale)
-  if isinstance(scale, numbers.Real):
-    if scale < 0.:
-      raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
-                       scale)
-    # if scale >= 1.:
-    #   raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
-    #                    scale)
-    if scale == 0.:
-      logging.info('Scale of 0 disables regularizer.')
-      return lambda _, name=None: None
-
-  def mn_o(weights, name='maxnorm_o_regularizer'):
-     """Applies max-norm regularization to weights."""
-     with tf.name_scope(name) as scope:
-          my_scale = ops.convert_to_tensor(scale,
-                                           dtype=weights.dtype.base_dtype,
-                                                   name='scale')
-          if tf.__version__ <= '0.12':
-             standard_ops_fn = standard_ops.mul
-          else:
-             standard_ops_fn = standard_ops.multiply
-          return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 0)), name=scope)
-  return mn_o
-
-def maxnorm_i_regularizer(scale, scope=None):
-  """Max-norm input regularization removes the neurons of previous layer.\n
-  Returns a function that can be used to apply max-norm regularization to each row of weight matrix.\n
-  The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
-
-  Parameters
-  ----------
-  scale : float
-    A scalar multiplier `Tensor`. 0.0 disables the regularizer.
-  scope: An optional scope name.
-
-  Returns
-  ---------
-  A function with signature `mn_i(weights, name=None)` that apply Lo regularization.
-
-  Raises
-  ---------
-  ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float.
-  """
-  import numbers
-  from tensorflow.python.framework import ops
-  from tensorflow.python.ops import standard_ops
-
-  if isinstance(scale, numbers.Integral):
-    raise ValueError('scale cannot be an integer: %s' % scale)
-  if isinstance(scale, numbers.Real):
-    if scale < 0.:
-      raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
-                       scale)
-    # if scale >= 1.:
-    #   raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
-    #                    scale)
-    if scale == 0.:
-      logging.info('Scale of 0 disables regularizer.')
-      return lambda _, name=None: None
-
-  def mn_i(weights, name='maxnorm_i_regularizer'):
-     """Applies max-norm regularization to weights."""
-     with tf.name_scope(name) as scope:
-          my_scale = ops.convert_to_tensor(scale,
-                                           dtype=weights.dtype.base_dtype,
-                                                   name='scale')
-          if tf.__version__ <= '0.12':
-             standard_ops_fn = standard_ops.mul
-          else:
-             standard_ops_fn = standard_ops.multiply
-          return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 1)), name=scope)
-  return mn_i
-
-
-
-
-
-#
diff --git a/_tensorlayer/db.py b/_tensorlayer/db.py
deleted file mode 100755
index 02aa056..0000000
--- a/_tensorlayer/db.py
+++ /dev/null
@@ -1,449 +0,0 @@
-#! /usr/bin/python
-# -*- coding: utf8 -*-
-import tensorflow as tf
-import tensorlayer as tl
-from . import iterate
-import numpy as np
-import time
-import math
-
-
-import pymongo
-import gridfs
-import pickle
-from pymongo import MongoClient
-from datetime import datetime
-
-class TensorDB(object):
-    """TensorDB is a MongoDB based manager that help you to manage data, model and logging.
-
-    Parameters
-    -------------
-    ip : string, localhost or IP address.
-    port : int, port number.
-    db_name : string, database name.
-    user_name : string, set to None if it donnot need authentication.
-    password : string.
-
-    Properties
-    ------------
-    db : ``pymongo.MongoClient[db_name]``, xxxxxx
-    datafs : ``gridfs.GridFS(self.db, collection="datafs")``, xxxxxxxxxx
-    modelfs : ``gridfs.GridFS(self.db, collection="modelfs")``,
-    paramsfs : ``gridfs.GridFS(self.db, collection="paramsfs")``,
-    db.Params : Collection for
-    db.TrainLog : Collection for
-    db.ValidLog : Collection for
-    db.TestLog : Collection for
-
-    Dependencies
-    -------------
-    1 : MongoDB, as TensorDB is based on MongoDB, you need to install it in your
-       local machine or remote machine.
-    2 : pip install pymongo, for MongoDB python API.
-
-    Optional Tools
-    ----------------
-    1 : You may like to install MongoChef or Mongo Management Studo APP for
-       visualizing or testing your MongoDB.
-    """
-    def __init__(
-        self,
-        ip = 'localhost',
-        port = 27017,
-        db_name = 'db_name',
-        user_name = None,
-        password = 'password',
-    ):
-        ## connect mongodb
-        client = MongoClient(ip, port)
-        self.db = client[db_name]
-        if user_name != None:
-            self.db.authenticate(user_name, password)
-        ## define file system (Buckets)
-        self.datafs = gridfs.GridFS(self.db, collection="datafs")
-        self.modelfs = gridfs.GridFS(self.db, collection="modelfs")
-        self.paramsfs = gridfs.GridFS(self.db, collection="paramsfs")
-        ##
-        print("[TensorDB] Connect SUCCESS {}:{} {} {}".format(ip, port, db_name, user_name))
-
-        self.ip = ip
-        self.port = port
-        self.db_name = db_name
-        self.user_name = user_name
-
-    # def save_bulk_data(self, data=None, filename='filename'):
-    #     """ Put bulk data into TensorDB.datafs, return file ID.
-    #     When you have a very large data, you may like to save it into GridFS Buckets
-    #     instead of Collections, then when you want to load it, XXXX
-    #
-    #     Parameters
-    #     -----------
-    #     data : serialized data.
-    #     filename : string, GridFS Buckets.
-    #
-    #     References
-    #     -----------
-    #     - MongoDB find, xxxxx
-    #     """
-    #     s = time.time()
-    #     f_id = self.datafs.put(data, filename=filename)
-    #     print("[TensorDB] save_bulk_data: {} took: {}s".format(filename, round(time.time()-s, 2)))
-    #     return f_id
-    #
-    # def save_collection(self, data=None, collect_name='collect_name'):
-    #     """ Insert data into MongoDB Collections, return xx.
-    #
-    #     Parameters
-    #     -----------
-    #     data : serialized data.
-    #     collect_name : string, MongoDB collection name.
-    #
-    #     References
-    #     -----------
-    #     - MongoDB find, xxxxx
-    #     """
-    #     s = time.time()
-    #     rl = self.db[collect_name].insert_many(data)
-    #     print("[TensorDB] save_collection: {} took: {}s".format(collect_name, round(time.time()-s, 2)))
-    #     return rl
-    #
-    # def find(self, args={}, collect_name='collect_name'):
-    #     """ Find data from MongoDB Collections.
-    #
-    #     Parameters
-    #     -----------
-    #     args : dictionary, arguments for finding.
-    #     collect_name : string, MongoDB collection name.
-    #
-    #     References
-    #     -----------
-    #     - MongoDB find, xxxxx
-    #     """
-    #     s = time.time()
-    #
-    #     pc = self.db[collect_name].find(args)  # pymongo.cursor.Cursor object
-    #     flist = pc.distinct('f_id')
-    #     fldict = {}
-    #     for f in flist: # you may have multiple Buckets files
-    #         # fldict[f] = pickle.loads(self.datafs.get(f).read())
-    #         # s2 = time.time()
-    #         tmp = self.datafs.get(f).read()
-    #         # print(time.time()-s2)
-    #         fldict[f] = pickle.loads(tmp)
-    #         # print(time.time()-s2)
-    #         # exit()
-    #     # print(round(time.time()-s, 2))
-    #     data = [fldict[x['f_id']][x['id']] for x in pc]
-    #     data = np.asarray(data)
-    #     print("[TensorDB] find: {} get: {} took: {}s".format(collect_name, pc.count(), round(time.time()-s, 2)))
-    #     return data
-
-    # def del_data(self, data, args={}):
-    #     pass
-    #
-    # def save_model(self):
-    #     pass
-    #
-    # def load_model(self):
-    #     pass
-    #
-    # def del_model(self):
-    #     pass
-
-    def save_params(self, params=[], args={}):#, file_name='parameters'):
-        """ Save parameters into MongoDB Buckets, and save the file ID into Params Collections.
-
-        Parameters
-        ----------
-        params : a list of parameters
-        args : dictionary, item meta data.
-
-        Returns
-        ---------
-        f_id : the Buckets ID of the parameters.
-        """
-        s = time.time()
-        f_id = self.paramsfs.put(pickle.dumps(params, protocol=2))#, file_name=file_name)
-        args.update({'f_id': f_id, 'time': datetime.utcnow()})
-        self.db.Params.insert_one(args)
-        # print("[TensorDB] Save params: {} SUCCESS, took: {}s".format(file_name, round(time.time()-s, 2)))
-        print("[TensorDB] Save params: SUCCESS, took: {}s".format(round(time.time()-s, 2)))
-        return f_id
-
-    def find_one_params(self, args={}):
-        """ Find one parameter from MongoDB Buckets.
-
-        Parameters
-        ----------
-        args : dictionary, find items.
-
-        Returns
-        --------
-        params : the parameters, return False if nothing found.
-        f_id : the Buckets ID of the parameters, return False if nothing found.
-        """
-        s = time.time()
-        d = self.db.Params.find_one(args)
-
-        if d is not None:
-            f_id = d['f_id']
-        else:
-            print("[TensorDB] FAIL! Cannot find: {}".format(args))
-            return False, False
-        try:
-            params = pickle.loads(self.paramsfs.get(f_id).read())
-            print("[TensorDB] Find one params SUCCESS, {} took: {}s".format(args, round(time.time()-s, 2)))
-            return params, f_id
-        except:
-            return False, False
-
-    def find_all_params(self, args={}):
-        """ Find all parameter from MongoDB Buckets
-
-        Parameters
-        ----------
-        args : dictionary, find items
-
-        Returns
-        --------
-        params : the parameters, return False if nothing found.
-        """
-        s = time.time()
-        pc = self.db.Params.find(args)
-
-        if pc is not None:
-            f_id_list = pc.distinct('f_id')
-            params = []
-            for f_id in f_id_list: # you may have multiple Buckets files
-                tmp = self.paramsfs.get(f_id).read()
-                params.append(pickle.loads(tmp))
-        else:
-            print("[TensorDB] FAIL! Cannot find any: {}".format(args))
-            return False
-
-        print("[TensorDB] Find all params SUCCESS, took: {}s".format(round(time.time()-s, 2)))
-        return params
-
-    def del_params(self, args={}):
-        """ Delete params in MongoDB uckets.
-
-        Parameters
-        -----------
-        args : dictionary, find items to delete, leave it empty to delete all parameters.
-        """
-        pc = self.db.Params.find(args)
-        f_id_list = pc.distinct('f_id')
-        # remove from Buckets
-        for f in f_id_list:
-            self.paramsfs.delete(f)
-        # remove from Collections
-        self.db.Params.remove(args)
-
-        print("[TensorDB] Delete params SUCCESS: {}".format(args))
-
-    def _print_dict(self, args):
-        # return " / ".join(str(key) + ": "+ str(value) for key, value in args.items())
-        string = ''
-        for key, value in args.items():
-            if key is not '_id':
-                string += str(key) + ": "+ str(value) + " / "
-        return string
-
-    def save_job(self, script=None, args={}):
-        """Save the job.
-
-        Parameters
-        -----------
-        script : a script file name or None.
-        args : dictionary, items to save.
-
-        Examples
-        ---------
-        >>> # Save your job
-        >>> db.save_job('your_script.py', {'job_id': 1, 'learning_rate': 0.01, 'n_units': 100})
-        >>> # Run your job
-        >>> temp = db.find_one_job(args={'job_id': 1})
-        >>> print(temp['learning_rate'])
-        ... 0.01
-        >>> import _your_script
-        ... running your script
-        """
-        if script is None:
-            _script = open(script, 'rb').read()
-            args.update({'script': _script, 'script_name': script})
-        _result = self.db.Job.insert_one(args)
-        _log = self._print_dict(args)
-        print("[TensorDB] Save Job: {}".format(script))
-        return _result
-
-    def find_one_job(self, args={}):
-        """ Find one job from MongoDB Job Collections.
-
-        Parameters
-        ----------
-        args : dictionary, find items.
-
-        Returns
-        --------
-        dictionary : contains all meta data and script.
-        """
-        temp = self.db.Job.find_one(args)
-
-        if 'script_name' in temp.keys():
-            f = open('_' + temp['script_name'], 'wb')
-            f.write(temp['script'])
-            f.close()
-        print("[TensorDB] Find Job: {}".format(args))
-        return temp
-
-    def train_log(self, args={}):
-        """Save the training log.
-
-        Parameters
-        -----------
-        args : dictionary, items to save.
-
-        Examples
-        ---------
-        >>> db.train_log(time=time.time(), {'loss': loss, 'acc': acc})
-        """
-        _result = self.db.TrainLog.insert_one(args)
-        _log = self._print_dict(args)
-        print("[TensorDB] TrainLog: " +_log)
-        return _result
-
-    def del_train_log(self, args={}):
-        """ Delete train log.
-
-        Parameters
-        -----------
-        args : dictionary, find items to delete, leave it empty to delete all log.
-        """
-        self.db.TrainLog.delete_many(args)
-        print("[TensorDB] Delete TrainLog SUCCESS")
-
-    def valid_log(self, args={}):
-        """Save the validating log.
-
-        Parameters
-        -----------
-        args : dictionary, items to save.
-
-        Examples
-        ---------
-        >>> db.valid_log(time=time.time(), {'loss': loss, 'acc': acc})
-        """
-        _result = self.db.ValidLog.insert_one(args)
-        # _log = "".join(str(key) + ": " + str(value) for key, value in args.items())
-        _log = self._print_dict(args)
-        print("[TensorDB] ValidLog: " +_log)
-        return _result
-
-    def del_valid_log(self, args={}):
-        """ Delete validation log.
-
-        Parameters
-        -----------
-        args : dictionary, find items to delete, leave it empty to delete all log.
-        """
-        self.db.ValidLog.delete_many(args)
-        print("[TensorDB] Delete ValidLog SUCCESS")
-
-    def test_log(self, args={}):
-        """Save the testing log.
-
-        Parameters
-        -----------
-        args : dictionary, items to save.
-
-        Examples
-        ---------
-        >>> db.test_log(time=time.time(), {'loss': loss, 'acc': acc})
-        """
-        _result = self.db.TestLog.insert_one(args)
-        # _log = "".join(str(key) + str(value) for key, value in args.items())
-        _log = self._print_dict(args)
-        print("[TensorDB] TestLog: " +_log)
-        return _result
-
-    def del_test_log(self, args={}):
-        """ Delete test log.
-
-        Parameters
-        -----------
-        args : dictionary, find items to delete, leave it empty to delete all log.
-        """
-        self.db.TestLog.delete_many(args)
-        print("[TensorDB] Delete TestLog SUCCESS")
-
-    def __str__(self):
-        _s = "[TensorDB] Info:\n"
-        _t = _s + "    " + str(self.db)
-        return _t
-
-
-
-
-
-if __name__ == '__main__':
-
-    db = TensorDB(ip='localhost', port=27017, db_name='mnist', user_name=None, password=None)
-
-    db.save_job('your_script.py', {'job_id': 1, 'learning_rate': 0.01, 'n_units': 100})
-    temp = db.find_one_job(args={'job_id': 1})
-
-    print(temp['learning_rate'])
-
-    import _your_script
-    print("import _your_script SUCCESS")
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-#
diff --git a/_tensorlayer/files.py b/_tensorlayer/files.py
deleted file mode 100755
index b4ac65b..0000000
--- a/_tensorlayer/files.py
+++ /dev/null
@@ -1,858 +0,0 @@
-#! /usr/bin/python
-# -*- coding: utf8 -*-
-
-
-import tensorflow as tf
-import os
-import numpy as np
-import re
-import sys
-import tarfile
-import gzip
-import zipfile
-from . import visualize
-from . import nlp
-import pickle
-from six.moves import urllib
-from six.moves import cPickle
-from six.moves import zip
-from tensorflow.python.platform import gfile
-
-
-## Load dataset functions
-def load_mnist_dataset(shape=(-1,784), path="data/mnist/"):
-    """Automatically download MNIST dataset
-    and return the training, validation and test set with 50000, 10000 and 10000
-    digit images respectively.
-
-    Parameters
-    ----------
-    shape : tuple
-        The shape of digit images, defaults to (-1,784)
-    path : string
-        Path to download data to, defaults to data/mnist/
-
-    Examples
-    --------
-    >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1,784))
-    >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 28, 28, 1))
-    """
-    # We first define functions for loading MNIST images and labels.
-    # For convenience, they also download the requested files if needed.
-    def load_mnist_images(path, filename):
-        filepath = maybe_download_and_extract(filename, path, 'http://yann.lecun.com/exdb/mnist/')
-
-        print(filepath)
-        # Read the inputs in Yann LeCun's binary format.
-        with gzip.open(filepath, 'rb') as f:
-            data = np.frombuffer(f.read(), np.uint8, offset=16)
-        # The inputs are vectors now, we reshape them to monochrome 2D images,
-        # following the shape convention: (examples, channels, rows, columns)
-        data = data.reshape(shape)
-        # The inputs come as bytes, we convert them to float32 in range [0,1].
-        # (Actually to range [0, 255/256], for compatibility to the version
-        # provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
-        return data / np.float32(256)
-
-    def load_mnist_labels(path, filename):
-        filepath = maybe_download_and_extract(filename, path, 'http://yann.lecun.com/exdb/mnist/')
-        # Read the labels in Yann LeCun's binary format.
-        with gzip.open(filepath, 'rb') as f:
-            data = np.frombuffer(f.read(), np.uint8, offset=8)
-        # The labels are vectors of integers now, that's exactly what we want.
-        return data
-
-    # Download and read the training and test set images and labels.
-    print("Load or Download MNIST > {}".format(path))
-    X_train = load_mnist_images(path, 'train-images-idx3-ubyte.gz')
-    y_train = load_mnist_labels(path, 'train-labels-idx1-ubyte.gz')
-    X_test = load_mnist_images(path, 't10k-images-idx3-ubyte.gz')
-    y_test = load_mnist_labels(path, 't10k-labels-idx1-ubyte.gz')
-
-    # We reserve the last 10000 training examples for validation.
-    X_train, X_val = X_train[:-10000], X_train[-10000:]
-    y_train, y_val = y_train[:-10000], y_train[-10000:]
-
-    # We just return all the arrays in order, as expected in main().
-    # (It doesn't matter how we do this as long as we can read them again.)
-    X_train = np.asarray(X_train, dtype=np.float32)
-    y_train = np.asarray(y_train, dtype=np.int32)
-    X_val = np.asarray(X_val, dtype=np.float32)
-    y_val = np.asarray(y_val, dtype=np.int32)
-    X_test = np.asarray(X_test, dtype=np.float32)
-    y_test = np.asarray(y_test, dtype=np.int32)
-    return X_train, y_train, X_val, y_val, X_test, y_test
-
-
-def load_cifar10_dataset(shape=(-1, 32, 32, 3), path='data/cifar10/', plotable=False, second=3):
-    """The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with
-    6000 images per class. There are 50000 training images and 10000 test images.
-
-    The dataset is divided into five training batches and one test batch, each with
-    10000 images. The test batch contains exactly 1000 randomly-selected images from
-    each class. The training batches contain the remaining images in random order,
-    but some training batches may contain more images from one class than another.
-    Between them, the training batches contain exactly 5000 images from each class.
-
-    Parameters
-    ----------
-    shape : tupe
-        The shape of digit images: e.g. (-1, 3, 32, 32) , (-1, 32, 32, 3) , (-1, 32*32*3)
-    plotable : True, False
-        Whether to plot some image examples.
-    second : int
-        If ``plotable`` is True, ``second`` is the display time.
-    path : string
-        Path to download data to, defaults to data/cifar10/
-
-    Examples
-    --------
-    >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=True)
-
-    Notes
-    ------
-    CIFAR-10 images can only be display without color change under uint8.
-    >>> X_train = np.asarray(X_train, dtype=np.uint8)
-    >>> plt.ion()
-    >>> fig = plt.figure(1232)
-    >>> count = 1
-    >>> for row in range(10):
-    >>>     for col in range(10):
-    >>>         a = fig.add_subplot(10, 10, count)
-    >>>         plt.imshow(X_train[count-1], interpolation='nearest')
-    >>>         plt.gca().xaxis.set_major_locator(plt.NullLocator())    # 不显示刻度(tick)
-    >>>         plt.gca().yaxis.set_major_locator(plt.NullLocator())
-    >>>         count = count + 1
-    >>> plt.draw()
-    >>> plt.pause(3)
-
-    References
-    ----------
-    - `CIFAR website <https://www.cs.toronto.edu/~kriz/cifar.html>`_
-    - `Data download link <https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz>`_
-    - `Code references <https://teratail.com/questions/28932>`_
-    """
-
-    print("Load or Download cifar10 > {}".format(path))
-
-    #Helper function to unpickle the data
-    def unpickle(file):
-        fp = open(file, 'rb')
-        if sys.version_info.major == 2:
-            data = pickle.load(fp)
-        elif sys.version_info.major == 3:
-            data = pickle.load(fp, encoding='latin-1')
-        fp.close()
-        return data
-
-    filename = 'cifar-10-python.tar.gz'
-    url = 'https://www.cs.toronto.edu/~kriz/'
-    #Download and uncompress file
-    maybe_download_and_extract(filename, path, url, extract=True)
-
-    #Unpickle file and fill in data
-    X_train = None
-    y_train = []
-    for i in range(1,6):
-        data_dic = unpickle(os.path.join(path, 'cifar-10-batches-py/', "data_batch_{}".format(i)))
-        if i == 1:
-            X_train = data_dic['data']
-        else:
-            X_train = np.vstack((X_train, data_dic['data']))
-        y_train += data_dic['labels']
-
-    test_data_dic = unpickle(os.path.join(path,  'cifar-10-batches-py/', "test_batch"))
-    X_test = test_data_dic['data']
-    y_test = np.array(test_data_dic['labels'])
-
-    if shape == (-1, 3, 32, 32):
-        X_test = X_test.reshape(shape)
-        X_train = X_train.reshape(shape)
-    elif shape == (-1, 32, 32, 3):
-        X_test = X_test.reshape(shape, order='F')
-        X_train = X_train.reshape(shape, order='F')
-        X_test = np.transpose(X_test, (0, 2, 1, 3))
-        X_train = np.transpose(X_train, (0, 2, 1, 3))
-    else:
-        X_test = X_test.reshape(shape)
-        X_train = X_train.reshape(shape)
-
-    y_train = np.array(y_train)
-
-    if plotable == True:
-        print('\nCIFAR-10')
-        import matplotlib.pyplot as plt
-        fig = plt.figure(1)
-
-        print('Shape of a training image: X_train[0]',X_train[0].shape)
-
-        plt.ion()       # interactive mode
-        count = 1
-        for row in range(10):
-            for col in range(10):
-                a = fig.add_subplot(10, 10, count)
-                if shape == (-1, 3, 32, 32):
-                    # plt.imshow(X_train[count-1], interpolation='nearest')
-                    plt.imshow(np.transpose(X_train[count-1], (1, 2, 0)), interpolation='nearest')
-                    # plt.imshow(np.transpose(X_train[count-1], (2, 1, 0)), interpolation='nearest')
-                elif shape == (-1, 32, 32, 3):
-                    plt.imshow(X_train[count-1], interpolation='nearest')
-                    # plt.imshow(np.transpose(X_train[count-1], (1, 0, 2)), interpolation='nearest')
-                else:
-                    raise Exception("Do not support the given 'shape' to plot the image examples")
-                plt.gca().xaxis.set_major_locator(plt.NullLocator())    # 不显示刻度(tick)
-                plt.gca().yaxis.set_major_locator(plt.NullLocator())
-                count = count + 1
-        plt.draw()      # interactive mode
-        plt.pause(3)   # interactive mode
-
-        print("X_train:",X_train.shape)
-        print("y_train:",y_train.shape)
-        print("X_test:",X_test.shape)
-        print("y_test:",y_test.shape)
-
-    X_train = np.asarray(X_train, dtype=np.float32)
-    X_test = np.asarray(X_test, dtype=np.float32)
-    y_train = np.asarray(y_train, dtype=np.int32)
-    y_test = np.asarray(y_test, dtype=np.int32)
-
-    return X_train, y_train, X_test, y_test
-
-
-def load_ptb_dataset(path='data/ptb/'):
-    """Penn TreeBank (PTB) dataset is used in many LANGUAGE MODELING papers,
-    including "Empirical Evaluation and Combination of Advanced Language
-    Modeling Techniques", "Recurrent Neural Network Regularization".
-
-    It consists of 929k training words, 73k validation words, and 82k test
-    words. It has 10k words in its vocabulary.
-
-    In "Recurrent Neural Network Regularization", they trained regularized LSTMs
-    of two sizes; these are denoted the medium LSTM and large LSTM. Both LSTMs
-    have two layers and are unrolled for 35 steps. They initialize the hidden
-    states to zero. They then use the final hidden states of the current
-    minibatch as the initial hidden state of the subsequent minibatch
-    (successive minibatches sequentially traverse the training set).
-    The size of each minibatch is 20.
-
-    The medium LSTM has 650 units per layer and its parameters are initialized
-    uniformly in [−0.05, 0.05]. They apply 50% dropout on the non-recurrent
-    connections. They train the LSTM for 39 epochs with a learning rate of 1,
-    and after 6 epochs they decrease it by a factor of 1.2 after each epoch.
-    They clip the norm of the gradients (normalized by minibatch size) at 5.
-
-    The large LSTM has 1500 units per layer and its parameters are initialized
-    uniformly in [−0.04, 0.04]. We apply 65% dropout on the non-recurrent
-    connections. They train the model for 55 epochs with a learning rate of 1;
-    after 14 epochs they start to reduce the learning rate by a factor of 1.15
-    after each epoch. They clip the norm of the gradients (normalized by
-    minibatch size) at 10.
-
-    Parameters
-    ----------
-    path : : string
-        Path to download data to, defaults to data/ptb/
-
-    Returns
-    --------
-    train_data, valid_data, test_data, vocabulary size
-
-    Examples
-    --------
-    >>> train_data, valid_data, test_data, vocab_size = tl.files.load_ptb_dataset()
-
-    Code References
-    ---------------
-    - ``tensorflow.models.rnn.ptb import reader``
-
-    Download Links
-    ---------------
-    - `Manual download <http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz>`_
-    """
-    print("Load or Download Penn TreeBank (PTB) dataset > {}".format(path))
-
-    #Maybe dowload and uncompress tar, or load exsisting files
-    filename = 'simple-examples.tgz'
-    url = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/'
-    maybe_download_and_extract(filename, path, url, extract=True)
-
-    data_path = os.path.join(path, 'simple-examples', 'data')
-    train_path = os.path.join(data_path, "ptb.train.txt")
-    valid_path = os.path.join(data_path, "ptb.valid.txt")
-    test_path = os.path.join(data_path, "ptb.test.txt")
-
-    word_to_id = nlp.build_vocab(nlp.read_words(train_path))
-
-    train_data = nlp.words_to_word_ids(nlp.read_words(train_path), word_to_id)
-    valid_data = nlp.words_to_word_ids(nlp.read_words(valid_path), word_to_id)
-    test_data = nlp.words_to_word_ids(nlp.read_words(test_path), word_to_id)
-    vocabulary = len(word_to_id)
-
-    # print(nlp.read_words(train_path))     # ... 'according', 'to', 'mr.', '<unk>', '<eos>']
-    # print(train_data)                 # ...  214,         5,    23,    1,       2]
-    # print(word_to_id)                 # ... 'beyond': 1295, 'anti-nuclear': 9599, 'trouble': 1520, '<eos>': 2 ... }
-    # print(vocabulary)                 # 10000
-    # exit()
-    return train_data, valid_data, test_data, vocabulary
-
-
-def load_matt_mahoney_text8_dataset(path='data/mm_test8/'):
-    """Download a text file from Matt Mahoney's website
-    if not present, and make sure it's the right size.
-    Extract the first file enclosed in a zip file as a list of words.
-    This dataset can be used for Word Embedding.
-
-    Parameters
-    ----------
-    path : : string
-        Path to download data to, defaults to data/mm_test8/
-
-    Returns
-    --------
-    word_list : a list
-        a list of string (word).\n
-        e.g. [.... 'their', 'families', 'who', 'were', 'expelled', 'from', 'jerusalem', ...]
-
-    Examples
-    --------
-    >>> words = tl.files.load_matt_mahoney_text8_dataset()
-    >>> print('Data size', len(words))
-    """
-
-    print("Load or Download matt_mahoney_text8 Dataset> {}".format(path))
-
-    filename = 'text8.zip'
-    url = 'http://mattmahoney.net/dc/'
-    maybe_download_and_extract(filename, path, url, expected_bytes=31344016)
-
-    with zipfile.ZipFile(os.path.join(path, filename)) as f:
-        word_list = f.read(f.namelist()[0]).split()
-
-    return word_list
-
-
-def load_imdb_dataset(path='data/imdb/', nb_words=None, skip_top=0,
-              maxlen=None, test_split=0.2, seed=113,
-              start_char=1, oov_char=2, index_from=3):
-    """Load IMDB dataset
-
-    Parameters
-    ----------
-    path : : string
-        Path to download data to, defaults to data/imdb/
-
-    Examples
-    --------
-    >>> X_train, y_train, X_test, y_test = tl.files.load_imbd_dataset(
-    ...                                 nb_words=20000, test_split=0.2)
-    >>> print('X_train.shape', X_train.shape)
-    ... (20000,)  [[1, 62, 74, ... 1033, 507, 27],[1, 60, 33, ... 13, 1053, 7]..]
-    >>> print('y_train.shape', y_train.shape)
-    ... (20000,)  [1 0 0 ..., 1 0 1]
-
-    References
-    -----------
-    - `Modified from keras. <https://github.com/fchollet/keras/blob/master/keras/datasets/imdb.py>`_
-    """
-
-    filename = "imdb.pkl"
-    url = 'https://s3.amazonaws.com/text-datasets/'
-    maybe_download_and_extract(filename, path, url)
-
-    if filename.endswith(".gz"):
-        f = gzip.open(os.path.join(path, filename), 'rb')
-    else:
-        f = open(os.path.join(path, filename), 'rb')
-
-    X, labels = cPickle.load(f)
-    f.close()
-
-    np.random.seed(seed)
-    np.random.shuffle(X)
-    np.random.seed(seed)
-    np.random.shuffle(labels)
-
-    if start_char is not None:
-        X = [[start_char] + [w + index_from for w in x] for x in X]
-    elif index_from:
-        X = [[w + index_from for w in x] for x in X]
-
-    if maxlen:
-        new_X = []
-        new_labels = []
-        for x, y in zip(X, labels):
-            if len(x) < maxlen:
-                new_X.append(x)
-                new_labels.append(y)
-        X = new_X
-        labels = new_labels
-    if not X:
-        raise Exception('After filtering for sequences shorter than maxlen=' +
-                        str(maxlen) + ', no sequence was kept. '
-                        'Increase maxlen.')
-    if not nb_words:
-        nb_words = max([max(x) for x in X])
-
-    # by convention, use 2 as OOV word
-    # reserve 'index_from' (=3 by default) characters: 0 (padding), 1 (start), 2 (OOV)
-    if oov_char is not None:
-        X = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in X]
-    else:
-        nX = []
-        for x in X:
-            nx = []
-            for w in x:
-                if (w >= nb_words or w < skip_top):
-                    nx.append(w)
-            nX.append(nx)
-        X = nX
-
-    X_train = np.array(X[:int(len(X) * (1 - test_split))])
-    y_train = np.array(labels[:int(len(X) * (1 - test_split))])
-
-    X_test = np.array(X[int(len(X) * (1 - test_split)):])
-    y_test = np.array(labels[int(len(X) * (1 - test_split)):])
-
-    return X_train, y_train, X_test, y_test
-
-def load_nietzsche_dataset(path='data/nietzsche/'):
-    """Load Nietzsche dataset.
-    Returns a string.
-
-    Parameters
-    ----------
-    path : string
-        Path to download data to, defaults to data/nietzsche/
-
-    Examples
-    --------
-    >>> see tutorial_generate_text.py
-    >>> words = tl.files.load_nietzsche_dataset()
-    >>> words = basic_clean_str(words)
-    >>> words = words.split()
-    """
-    print("Load or Download nietzsche dataset > {}".format(path))
-
-    filename = "nietzsche.txt"
-    url = 'https://s3.amazonaws.com/text-datasets/'
-    filepath = maybe_download_and_extract(filename, path, url)
-
-    with open(filepath, "r") as f:
-        words = f.read()
-        return words
-
-def load_wmt_en_fr_dataset(path='data/wmt_en_fr/'):
-    """It will download English-to-French translation data from the WMT'15
-    Website (10^9-French-English corpus), and the 2013 news test from
-    the same site as development set.
-    Returns the directories of training data and test data.
-
-    Parameters
-    ----------
-    path : string
-        Path to download data to, defaults to data/wmt_en_fr/
-
-    References
-    ----------
-    - Code modified from /tensorflow/models/rnn/translation/data_utils.py
-
-    Notes
-    -----
-    Usually, it will take a long time to download this dataset.
-    """
-    # URLs for WMT data.
-    _WMT_ENFR_TRAIN_URL = "http://www.statmt.org/wmt10/"
-    _WMT_ENFR_DEV_URL = "http://www.statmt.org/wmt15/"
-
-    def gunzip_file(gz_path, new_path):
-        """Unzips from gz_path into new_path."""
-        print("Unpacking %s to %s" % (gz_path, new_path))
-        with gzip.open(gz_path, "rb") as gz_file:
-            with open(new_path, "wb") as new_file:
-                for line in gz_file:
-                    new_file.write(line)
-
-    def get_wmt_enfr_train_set(path):
-        """Download the WMT en-fr training corpus to directory unless it's there."""
-        filename = "training-giga-fren.tar"
-        maybe_download_and_extract(filename, path, _WMT_ENFR_TRAIN_URL, extract=True)
-        train_path = os.path.join(path, "giga-fren.release2.fixed")
-        gunzip_file(train_path + ".fr.gz", train_path + ".fr")
-        gunzip_file(train_path + ".en.gz", train_path + ".en")
-        return train_path
-
-    def get_wmt_enfr_dev_set(path):
-        """Download the WMT en-fr training corpus to directory unless it's there."""
-        filename = "dev-v2.tgz"
-        dev_file = maybe_download_and_extract(filename, path, _WMT_ENFR_DEV_URL, extract=False)
-        dev_name = "newstest2013"
-        dev_path = os.path.join(path, "newstest2013")
-        if not (gfile.Exists(dev_path + ".fr") and gfile.Exists(dev_path + ".en")):
-            print("Extracting tgz file %s" % dev_file)
-            with tarfile.open(dev_file, "r:gz") as dev_tar:
-              fr_dev_file = dev_tar.getmember("dev/" + dev_name + ".fr")
-              en_dev_file = dev_tar.getmember("dev/" + dev_name + ".en")
-              fr_dev_file.name = dev_name + ".fr"  # Extract without "dev/" prefix.
-              en_dev_file.name = dev_name + ".en"
-              dev_tar.extract(fr_dev_file, path)
-              dev_tar.extract(en_dev_file, path)
-        return dev_path
-
-    print("Load or Download WMT English-to-French translation > {}".format(path))
-
-    train_path = get_wmt_enfr_train_set(path)
-    dev_path = get_wmt_enfr_dev_set(path)
-
-    return train_path, dev_path
-
-
-## Load and save network
-def save_npz(save_list=[], name='model.npz', sess=None):
-    """Input parameters and the file name, save parameters into .npz file. Use tl.utils.load_npz() to restore.
-
-    Parameters
-    ----------
-    save_list : a list
-        Parameters want to be saved.
-    name : a string or None
-        The name of the .npz file.
-    sess : None or Session
-
-    Examples
-    --------
-    >>> tl.files.save_npz(network.all_params, name='model_test.npz', sess=sess)
-    ... File saved to: model_test.npz
-    >>> load_params = tl.files.load_npz(name='model_test.npz')
-    ... Loading param0, (784, 800)
-    ... Loading param1, (800,)
-    ... Loading param2, (800, 800)
-    ... Loading param3, (800,)
-    ... Loading param4, (800, 10)
-    ... Loading param5, (10,)
-    >>> put parameters into a TensorLayer network, please see assign_params()
-
-    Notes
-    -----
-    If you got session issues, you can change the value.eval() to value.eval(session=sess)
-
-    References
-    ----------
-    - `Saving dictionary using numpy <http://stackoverflow.com/questions/22315595/saving-dictionary-of-header-information-using-numpy-savez>`_
-    """
-    ## save params into a list
-    save_list_var = []
-    if sess:
-        save_list_var = sess.run(save_list)
-    else:
-        try:
-            for k, value in enumerate(save_list):
-                save_list_var.append(value.eval())
-        except:
-            print(" Fail to save model, Hint: pass the session into this function, save_npz(network.all_params, name='model.npz', sess=sess)")
-    np.savez(name, params=save_list_var)
-    save_list_var = None
-    del save_list_var
-    print("[*] %s saved" % name)
-
-    ## save params into a dictionary
-    # rename_dict = {}
-    # for k, value in enumerate(save_dict):
-    #     rename_dict.update({'param'+str(k) : value.eval()})
-    # np.savez(name, **rename_dict)
-    # print('Model is saved to: %s' % name)
-
-def load_npz(path='', name='model.npz'):
-    """Load the parameters of a Model saved by tl.files.save_npz().
-
-    Parameters
-    ----------
-    path : a string
-        Folder path to .npz file.
-    name : a string or None
-        The name of the .npz file.
-
-    Returns
-    --------
-    params : list
-        A list of parameters in order.
-
-    Examples
-    --------
-    - See save_npz and assign_params
-
-    References
-    ----------
-    - `Saving dictionary using numpy <http://stackoverflow.com/questions/22315595/saving-dictionary-of-header-information-using-numpy-savez>`_
-    """
-    ## if save_npz save params into a dictionary
-    # d = np.load( path+name )
-    # params = []
-    # print('Load Model')
-    # for key, val in sorted( d.items() ):
-    #     params.append(val)
-    #     print('Loading %s, %s' % (key, str(val.shape)))
-    # return params
-    ## if save_npz save params into a list
-    d = np.load( path+name )
-    # for val in sorted( d.items() ):
-    #     params = val
-    #     return params
-    return d['params']
-    # print(d.items()[0][1]['params'])
-    # exit()
-    # return d.items()[0][1]['params']
-
-def assign_params(sess, params, network):
-    """Assign the given parameters to the TensorLayer network.
-
-    Parameters
-    ----------
-    sess : TensorFlow Session
-    params : a list
-        A list of parameters in order.
-    network : a :class:`Layer` class
-        The network to be assigned
-
-    Examples
-    --------
-    >>> Save your network as follow:
-    >>> tl.files.save_npz(network.all_params, name='model_test.npz')
-    >>> network.print_params()
-    ...
-    ... Next time, load and assign your network as follow:
-    >>> sess.run(tf.initialize_all_variables()) # re-initialize, then save and assign
-    >>> load_params = tl.files.load_npz(name='model_test.npz')
-    >>> tl.files.assign_params(sess, load_params, network)
-    >>> network.print_params()
-
-    References
-    ----------
-    - `Assign value to a TensorFlow variable <http://stackoverflow.com/questions/34220532/how-to-assign-value-to-a-tensorflow-variable>`_
-    """
-    ops = []
-    for idx, param in enumerate(params):
-        ops.append(network.all_params[idx].assign(param))
-    sess.run(ops)
-
-def load_and_assign_npz(sess=None, name=None, network=None):
-    """Load model from npz and assign to a network.
-
-    Parameters
-    -------------
-    sess : TensorFlow Session
-    name : string
-        Model path.
-    network : a :class:`Layer` class
-        The network to be assigned
-
-    Returns
-    --------
-    Returns False if faild to model is not exist.
-
-    Examples
-    ---------
-    >>> tl.files.load_and_assign_npz(sess=sess, name='net.npz', network=net)
-    """
-    assert network is not None
-    assert sess is not None
-    if not os.path.exists(name):
-        print("[!] Load {} failed!".format(name))
-        return False
-    else:
-        params = load_npz(name=name)
-        assign_params(sess, params, network)
-        print("[*] Load {} SUCCESS!".format(name))
-        return network
-
-# Load and save variables
-def save_any_to_npy(save_dict={}, name='file.npy'):
-    """Save variables to .npy file.
-
-    Examples
-    ---------
-    >>> tl.files.save_any_to_npy(save_dict={'data': ['a','b']}, name='test.npy')
-    >>> data = tl.files.load_npy_to_any(name='test.npy')
-    >>> print(data)
-    ... {'data': ['a','b']}
-    """
-    np.save(name, save_dict)
-
-def load_npy_to_any(path='', name='file.npy'):
-    """Load .npy file.
-
-    Examples
-    ---------
-    - see save_any_to_npy()
-    """
-    file_path = os.path.join(path, name)
-    try:
-        npy = np.load(file_path).item()
-    except:
-        npy = np.load(file_path)
-    finally:
-        try:
-            return npy
-        except:
-            print("[!] Fail to load %s" % file_path)
-            exit()
-
-
-# Visualizing npz files
-def npz_to_W_pdf(path=None, regx='w1pre_[0-9]+\.(npz)'):
-    """Convert the first weight matrix of .npz file to .pdf by using tl.visualize.W().
-
-    Parameters
-    ----------
-    path : a string or None
-        A folder path to npz files.
-    regx : a string
-        Regx for the file name.
-
-    Examples
-    --------
-    >>> Convert the first weight matrix of w1_pre...npz file to w1_pre...pdf.
-    >>> tl.files.npz_to_W_pdf(path='/Users/.../npz_file/', regx='w1pre_[0-9]+\.(npz)')
-    """
-    file_list = load_file_list(path=path, regx=regx)
-    for f in file_list:
-        W = load_npz(path, f)[0]
-        print("%s --> %s" % (f, f.split('.')[0]+'.pdf'))
-        visualize.W(W, second=10, saveable=True, name=f.split('.')[0], fig_idx=2012)
-
-
-## Helper functions
-def load_file_list(path=None, regx='\.npz', printable=True):
-    """Return a file list in a folder by given a path and regular expression.
-
-    Parameters
-    ----------
-    path : a string or None
-        A folder path.
-    regx : a string
-        The regx of file name.
-    printable : boolean, whether to print the files infomation.
-
-    Examples
-    ----------
-    >>> file_list = tl.files.load_file_list(path=None, regx='w1pre_[0-9]+\.(npz)')
-    """
-    if path == False:
-        path = os.getcwd()
-    file_list = os.listdir(path)
-    return_list = []
-    for idx, f in enumerate(file_list):
-        if re.search(regx, f):
-            return_list.append(f)
-    # return_list.sort()
-    if printable:
-        print('Match file list = %s' % return_list)
-        print('Number of files = %d' % len(return_list))
-    return return_list
-
-def load_folder_list(path=""):
-    """Return a folder list in a folder by given a folder path.
-
-    Parameters
-    ----------
-    path : a string or None
-        A folder path.
-    """
-    return [os.path.join(path,o) for o in os.listdir(path) if os.path.isdir(os.path.join(path,o))]
-
-def exists_or_mkdir(path, verbose=True):
-    """Check a folder by given name, if not exist, create the folder and return False,
-    if directory exists, return True.
-
-    Parameters
-    ----------
-    path : a string
-        A folder path.
-    verbose : boolean
-        If True, prints results, deaults is True
-
-    Returns
-    --------
-    True if folder exist, otherwise, returns False and create the folder
-
-    Examples
-    --------
-    >>> tl.files.exists_or_mkdir("checkpoints/train")
-    """
-    if not os.path.exists(path):
-        if verbose:
-            print("[*] creates %s ..." % path)
-        os.makedirs(path)
-        return False
-    else:
-        if verbose:
-            print("[!] %s exists ..." % path)
-        return True
-
-def maybe_download_and_extract(filename, working_directory, url_source, extract=False, expected_bytes=None):
-    """Checks if file exists in working_directory otherwise tries to dowload the file,
-    and optionally also tries to extract the file if format is ".zip" or ".tar"
-
-    Parameters
-    ----------
-    filename : string
-        The name of the (to be) dowloaded file.
-    working_directory : string
-        A folder path to search for the file in and dowload the file to
-    url : string
-        The URL to download the file from
-    extract : bool, defaults to False
-        If True, tries to uncompress the dowloaded file is ".tar.gz/.tar.bz2" or ".zip" file
-    expected_bytes : int/None
-        If set tries to verify that the downloaded file is of the specified size, otherwise raises an Exception,
-        defaults to None which corresponds to no check being performed
-    Returns
-    ----------
-    filepath to dowloaded (uncompressed) file
-
-    Examples
-    --------
-    >>> down_file = tl.files.maybe_download_and_extract(filename = 'train-images-idx3-ubyte.gz',
-                                                        working_directory = 'data/',
-                                                        url_source = 'http://yann.lecun.com/exdb/mnist/')
-    >>> tl.files.maybe_download_and_extract(filename = 'ADEChallengeData2016.zip',
-                                            working_directory = 'data/',
-                                            url_source = 'http://sceneparsing.csail.mit.edu/data/',
-                                            extract=True)
-    """
-    # We first define a download function, supporting both Python 2 and 3.
-    def _download(filename, working_directory, url_source):
-        def _dlProgress(count, blockSize, totalSize):
-            if(totalSize != 0):
-                percent = float(count * blockSize) / float(totalSize) * 100.0
-                sys.stdout.write("\r" "Downloading " + filename + "...%d%%" % percent)
-                sys.stdout.flush()
-        if sys.version_info[0] == 2:
-            from urllib import urlretrieve
-        else:
-            from urllib.request import urlretrieve
-        filepath = os.path.join(working_directory, filename)
-        urlretrieve(url_source+filename, filepath, reporthook=_dlProgress)
-
-    exists_or_mkdir(working_directory, verbose=False)
-    filepath = os.path.join(working_directory, filename)
-
-    if not os.path.exists(filepath):
-        _download(filename, working_directory, url_source)
-        print()
-        statinfo = os.stat(filepath)
-        print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')
-        if(not(expected_bytes is None) and (expected_bytes != statinfo.st_size)):
-            raise Exception('Failed to verify ' + filename + '. Can you get to it with a browser?')
-        if(extract):
-            if tarfile.is_tarfile(filepath):
-                print('Trying to extract tar file')
-                tarfile.open(filepath, 'r').extractall(working_directory)
-                print('... Success!')
-            elif zipfile.is_zipfile(filepath):
-                print('Trying to extract zip file')
-                with zipfile.ZipFile(filepath) as zf:
-                    zf.extractall(working_directory)
-                print('... Success!')
-            else:
-                print("Unknown compression_format only .tar.gz/.tar.bz2/.tar and .zip supported")
-    return filepath
diff --git a/_tensorlayer/iterate.py b/_tensorlayer/iterate.py
deleted file mode 100755
index 9778fc6..0000000
--- a/_tensorlayer/iterate.py
+++ /dev/null
@@ -1,432 +0,0 @@
-#! /usr/bin/python
-# -*- coding: utf8 -*-
-
-
-
-import numpy as np
-from six.moves import xrange
-
-def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False):
-    """Generate a generator that input a group of example in numpy.array and
-    their labels, return the examples and labels by the given batchsize.
-
-    Parameters
-    ----------
-    inputs : numpy.array
-        (X) The input features, every row is a example.
-    targets : numpy.array
-        (y) The labels of inputs, every row is a example.
-    batch_size : int
-        The batch size.
-    shuffle : boolean
-        Indicating whether to use a shuffling queue, shuffle the dataset before return.
-
-    Hints
-    -------
-    - If you have two inputs, e.g. X1 (1000, 100) and X2 (1000, 80), you can ``np.hstack((X1, X2))
-    into (1000, 180) and feed into ``inputs``, then you can split a batch of X1 and X2.
-
-    Examples
-    --------
-    >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']])
-    >>> y = np.asarray([0,1,2,3,4,5])
-    >>> for batch in tl.iterate.minibatches(inputs=X, targets=y, batch_size=2, shuffle=False):
-    >>>     print(batch)
-    ... (array([['a', 'a'],
-    ...        ['b', 'b']],
-    ...         dtype='<U1'), array([0, 1]))
-    ... (array([['c', 'c'],
-    ...        ['d', 'd']],
-    ...         dtype='<U1'), array([2, 3]))
-    ... (array([['e', 'e'],
-    ...        ['f', 'f']],
-    ...         dtype='<U1'), array([4, 5]))
-    """
-    assert len(inputs) == len(targets)
-    if shuffle:
-        indices = np.arange(len(inputs))
-        np.random.shuffle(indices)
-    for start_idx in range(0, len(inputs) - batch_size + 1, batch_size):
-        if shuffle:
-            excerpt = indices[start_idx:start_idx + batch_size]
-        else:
-            excerpt = slice(start_idx, start_idx + batch_size)
-        yield inputs[excerpt], targets[excerpt]
-
-def seq_minibatches(inputs, targets, batch_size, seq_length, stride=1):
-    """Generate a generator that return a batch of sequence inputs and targets.
-    If ``batch_size = 100, seq_length = 5``, one return will have ``500`` rows (examples).
-
-    Examples
-    --------
-    - Synced sequence input and output.
-    >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']])
-    >>> y = np.asarray([0, 1, 2, 3, 4, 5])
-    >>> for batch in tl.iterate.seq_minibatches(inputs=X, targets=y, batch_size=2, seq_length=2, stride=1):
-    >>>     print(batch)
-    ... (array([['a', 'a'],
-    ...        ['b', 'b'],
-    ...         ['b', 'b'],
-    ...         ['c', 'c']],
-    ...         dtype='<U1'), array([0, 1, 1, 2]))
-    ... (array([['c', 'c'],
-    ...         ['d', 'd'],
-    ...         ['d', 'd'],
-    ...         ['e', 'e']],
-    ...         dtype='<U1'), array([2, 3, 3, 4]))
-    ...
-    ...
-
-    - Many to One
-    >>> return_last = True
-    >>> num_steps = 2
-    >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']])
-    >>> Y = np.asarray([0,1,2,3,4,5])
-    >>> for batch in tl.iterate.seq_minibatches(inputs=X, targets=Y, batch_size=2, seq_length=num_steps, stride=1):
-    >>>     x, y = batch
-    >>>     if return_last:
-    >>>         tmp_y = y.reshape((-1, num_steps) + y.shape[1:])
-    >>>     y = tmp_y[:, -1]
-    >>>     print(x, y)
-    ... [['a' 'a']
-    ... ['b' 'b']
-    ... ['b' 'b']
-    ... ['c' 'c']] [1 2]
-    ... [['c' 'c']
-    ... ['d' 'd']
-    ... ['d' 'd']
-    ... ['e' 'e']] [3 4]
-    """
-    assert len(inputs) == len(targets)
-    n_loads = (batch_size * stride) + (seq_length - stride)
-    for start_idx in range(0, len(inputs) - n_loads + 1, (batch_size * stride)):
-        seq_inputs = np.zeros((batch_size, seq_length) + inputs.shape[1:],
-                              dtype=inputs.dtype)
-        seq_targets = np.zeros((batch_size, seq_length) + targets.shape[1:],
-                               dtype=targets.dtype)
-        for b_idx in xrange(batch_size):
-            start_seq_idx = start_idx + (b_idx * stride)
-            end_seq_idx = start_seq_idx + seq_length
-            seq_inputs[b_idx] = inputs[start_seq_idx:end_seq_idx]
-            seq_targets[b_idx] = targets[start_seq_idx:end_seq_idx]
-        flatten_inputs = seq_inputs.reshape((-1,) + inputs.shape[1:])
-        flatten_targets = seq_targets.reshape((-1,) + targets.shape[1:])
-        yield flatten_inputs, flatten_targets
-
-def seq_minibatches2(inputs, targets, batch_size, num_steps):
-    """Generate a generator that iterates on two list of words. Yields (Returns) the source contexts and
-    the target context by the given batch_size and num_steps (sequence_length),
-    see ``PTB tutorial``. In TensorFlow's tutorial, this generates the batch_size pointers into the raw
-    PTB data, and allows minibatch iteration along these pointers.
-
-    - Hint, if the input data are images, you can modify the code as follow.
-
-    .. code-block:: python
-
-        from
-        data = np.zeros([batch_size, batch_len)
-        to
-        data = np.zeros([batch_size, batch_len, inputs.shape[1], inputs.shape[2], inputs.shape[3]])
-
-    Parameters
-    ----------
-    inputs : a list
-            the context in list format; note that context usually be
-            represented by splitting by space, and then convert to unique
-            word IDs.
-    targets : a list
-            the context in list format; note that context usually be
-            represented by splitting by space, and then convert to unique
-            word IDs.
-    batch_size : int
-            the batch size.
-    num_steps : int
-            the number of unrolls. i.e. sequence_length
-
-    Yields
-    ------
-    Pairs of the batched data, each a matrix of shape [batch_size, num_steps].
-
-    Raises
-    ------
-    ValueError : if batch_size or num_steps are too high.
-
-    Examples
-    --------
-    >>> X = [i for i in range(20)]
-    >>> Y = [i for i in range(20,40)]
-    >>> for batch in tl.iterate.seq_minibatches2(X, Y, batch_size=2, num_steps=3):
-    ...     x, y = batch
-    ...     print(x, y)
-    ...
-    ... [[  0.   1.   2.]
-    ... [ 10.  11.  12.]]
-    ... [[ 20.  21.  22.]
-    ... [ 30.  31.  32.]]
-    ...
-    ... [[  3.   4.   5.]
-    ... [ 13.  14.  15.]]
-    ... [[ 23.  24.  25.]
-    ... [ 33.  34.  35.]]
-    ...
-    ... [[  6.   7.   8.]
-    ... [ 16.  17.  18.]]
-    ... [[ 26.  27.  28.]
-    ... [ 36.  37.  38.]]
-
-    Code References
-    ---------------
-    - ``tensorflow/models/rnn/ptb/reader.py``
-    """
-    assert len(inputs) == len(targets)
-    data_len = len(inputs)
-    batch_len = data_len // batch_size
-    # data = np.zeros([batch_size, batch_len])
-    data = np.zeros((batch_size, batch_len) + inputs.shape[1:],
-                          dtype=inputs.dtype)
-    data2 = np.zeros([batch_size, batch_len])
-
-    for i in range(batch_size):
-        data[i] = inputs[batch_len * i:batch_len * (i + 1)]
-        data2[i] = targets[batch_len * i:batch_len * (i + 1)]
-
-    epoch_size = (batch_len - 1) // num_steps
-
-    if epoch_size == 0:
-        raise ValueError("epoch_size == 0, decrease batch_size or num_steps")
-
-    for i in range(epoch_size):
-        x = data[:, i*num_steps:(i+1)*num_steps]
-        x2 = data2[:, i*num_steps:(i+1)*num_steps]
-        yield (x, x2)
-
-
-def ptb_iterator(raw_data, batch_size, num_steps):
-    """
-    Generate a generator that iterates on a list of words, see PTB tutorial. Yields (Returns) the source contexts and
-    the target context by the given batch_size and num_steps (sequence_length).\n
-    see ``PTB tutorial``.
-
-    e.g. x = [0, 1, 2]  y = [1, 2, 3] , when batch_size = 1, num_steps = 3,
-    raw_data = [i for i in range(100)]
-
-    In TensorFlow's tutorial, this generates batch_size pointers into the raw
-    PTB data, and allows minibatch iteration along these pointers.
-
-    Parameters
-    ----------
-    raw_data : a list
-            the context in list format; note that context usually be
-            represented by splitting by space, and then convert to unique
-            word IDs.
-    batch_size : int
-            the batch size.
-    num_steps : int
-            the number of unrolls. i.e. sequence_length
-
-    Yields
-    ------
-    Pairs of the batched data, each a matrix of shape [batch_size, num_steps].
-    The second element of the tuple is the same data time-shifted to the
-    right by one.
-
-    Raises
-    ------
-    ValueError : if batch_size or num_steps are too high.
-
-    Examples
-    --------
-    >>> train_data = [i for i in range(20)]
-    >>> for batch in tl.iterate.ptb_iterator(train_data, batch_size=2, num_steps=3):
-    >>>     x, y = batch
-    >>>     print(x, y)
-    ... [[ 0  1  2] <---x                       1st subset/ iteration
-    ...  [10 11 12]]
-    ... [[ 1  2  3] <---y
-    ...  [11 12 13]]
-    ...
-    ... [[ 3  4  5]  <--- 1st batch input       2nd subset/ iteration
-    ...  [13 14 15]] <--- 2nd batch input
-    ... [[ 4  5  6]  <--- 1st batch target
-    ...  [14 15 16]] <--- 2nd batch target
-    ...
-    ... [[ 6  7  8]                             3rd subset/ iteration
-    ...  [16 17 18]]
-    ... [[ 7  8  9]
-    ...  [17 18 19]]
-
-    Code References
-    ----------------
-    - ``tensorflow/models/rnn/ptb/reader.py``
-    """
-    raw_data = np.array(raw_data, dtype=np.int32)
-
-    data_len = len(raw_data)
-    batch_len = data_len // batch_size
-    data = np.zeros([batch_size, batch_len], dtype=np.int32)
-    for i in range(batch_size):
-        data[i] = raw_data[batch_len * i:batch_len * (i + 1)]
-
-    epoch_size = (batch_len - 1) // num_steps
-
-    if epoch_size == 0:
-        raise ValueError("epoch_size == 0, decrease batch_size or num_steps")
-
-    for i in range(epoch_size):
-        x = data[:, i*num_steps:(i+1)*num_steps]
-        y = data[:, i*num_steps+1:(i+1)*num_steps+1]
-        yield (x, y)
-
-
-
-# def minibatches_for_sequence2D(inputs, targets, batch_size, sequence_length, stride=1):
-#     """
-#     Input a group of example in 2D numpy.array and their labels.
-#     Return the examples and labels by the given batchsize, sequence_length.
-#     Use for RNN.
-#
-#     Parameters
-#     ----------
-#     inputs : numpy.array
-#         (X) The input features, every row is a example.
-#     targets : numpy.array
-#         (y) The labels of inputs, every row is a example.
-#     batchsize : int
-#         The batch size must be a multiple of sequence_length: int(batch_size % sequence_length) == 0
-#     sequence_length : int
-#         The sequence length
-#     stride : int
-#         The stride step
-#
-#     Examples
-#     --------
-#     >>> sequence_length = 2
-#     >>> batch_size = 4
-#     >>> stride = 1
-#     >>> X_train = np.asarray([[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15],[16,17,18],[19,20,21],[22,23,24]])
-#     >>> y_train = np.asarray(['0','1','2','3','4','5','6','7'])
-#     >>> print('X_train = %s' % X_train)
-#     >>> print('y_train = %s' % y_train)
-#     >>> for batch in minibatches_for_sequence2D(X_train, y_train, batch_size=batch_size, sequence_length=sequence_length, stride=stride):
-#     >>>     inputs, targets = batch
-#     >>>     print(inputs)
-#     >>>     print(targets)
-#     ... [[ 1.  2.  3.]
-#     ... [ 4.  5.  6.]
-#     ... [ 4.  5.  6.]
-#     ... [ 7.  8.  9.]]
-#     ... [1 2]
-#     ... [[  4.   5.   6.]
-#     ... [  7.   8.   9.]
-#     ... [  7.   8.   9.]
-#     ... [ 10.  11.  12.]]
-#     ... [2 3]
-#     ... ...
-#     ... [[ 16.  17.  18.]
-#     ... [ 19.  20.  21.]
-#     ... [ 19.  20.  21.]
-#     ... [ 22.  23.  24.]]
-#     ... [6 7]
-#     """
-#     print('len(targets)=%d batch_size=%d sequence_length=%d stride=%d' % (len(targets), batch_size, sequence_length, stride))
-#     assert len(inputs) == len(targets), '1 feature vector have 1 target vector/value' #* sequence_length
-#     # assert int(batch_size % sequence_length) == 0, 'batch_size % sequence_length must == 0\
-#     # batch_size is number of examples rather than number of targets'
-#
-#     # print(inputs.shape, len(inputs), len(inputs[0]))
-#
-#     n_targets = int(batch_size/sequence_length)
-#     # n_targets = int(np.ceil(batch_size/sequence_length))
-#     X = np.empty(shape=(0,len(inputs[0])), dtype=np.float32)
-#     y = np.zeros(shape=(1, n_targets), dtype=np.int32)
-#
-#     for idx in range(sequence_length, len(inputs), stride):  # go through all example during 1 epoch
-#         for n in range(n_targets):   # for num of target
-#             X = np.concatenate((X, inputs[idx-sequence_length+n:idx+n]))
-#             y[0][n] = targets[idx-1+n]
-#             # y = np.vstack((y, targets[idx-1+n]))
-#         yield X, y[0]
-#         X = np.empty(shape=(0,len(inputs[0])))
-#         # y = np.empty(shape=(1,0))
-#
-#
-# def minibatches_for_sequence4D(inputs, targets, batch_size, sequence_length, stride=1): #
-#     """
-#     Input a group of example in 4D numpy.array and their labels.
-#     Return the examples and labels by the given batchsize, sequence_length.
-#     Use for RNN.
-#
-#     Parameters
-#     ----------
-#     inputs : numpy.array
-#         (X) The input features, every row is a example.
-#     targets : numpy.array
-#         (y) The labels of inputs, every row is a example.
-#     batchsize : int
-#         The batch size must be a multiple of sequence_length: int(batch_size % sequence_length) == 0
-#     sequence_length : int
-#         The sequence length
-#     stride : int
-#         The stride step
-#
-#     Examples
-#     --------
-#     >>> sequence_length = 2
-#     >>> batch_size = 2
-#     >>> stride = 1
-#     >>> X_train = np.asarray([[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15],[16,17,18],[19,20,21],[22,23,24]])
-#     >>> y_train = np.asarray(['0','1','2','3','4','5','6','7'])
-#     >>> X_train = np.expand_dims(X_train, axis=1)
-#     >>> X_train = np.expand_dims(X_train, axis=3)
-#     >>> for batch in minibatches_for_sequence4D(X_train, y_train, batch_size=batch_size, sequence_length=sequence_length, stride=stride):
-#     >>>     inputs, targets = batch
-#     >>>     print(inputs)
-#     >>>     print(targets)
-#     ... [[[[ 1.]
-#     ...    [ 2.]
-#     ...    [ 3.]]]
-#     ... [[[ 4.]
-#     ...   [ 5.]
-#     ...   [ 6.]]]]
-#     ... [1]
-#     ... [[[[ 4.]
-#     ...    [ 5.]
-#     ...    [ 6.]]]
-#     ... [[[ 7.]
-#     ...   [ 8.]
-#     ...   [ 9.]]]]
-#     ... [2]
-#     ... ...
-#     ... [[[[ 19.]
-#     ...    [ 20.]
-#     ...    [ 21.]]]
-#     ... [[[ 22.]
-#     ...   [ 23.]
-#     ...   [ 24.]]]]
-#     ... [7]
-#     """
-#     print('len(targets)=%d batch_size=%d sequence_length=%d stride=%d' % (len(targets), batch_size, sequence_length, stride))
-#     assert len(inputs) == len(targets), '1 feature vector have 1 target vector/value' #* sequence_length
-#     # assert int(batch_size % sequence_length) == 0, 'in LSTM, batch_size % sequence_length must == 0\
-#     # batch_size is number of X_train rather than number of targets'
-#     assert stride >= 1, 'stride must be >=1, at least move 1 step for each iternation'
-#
-#     n_example, n_channels, width, height = inputs.shape
-#     print('n_example=%d n_channels=%d width=%d height=%d' % (n_example, n_channels, width, height))
-#
-#     n_targets = int(np.ceil(batch_size/sequence_length)) # 实际为 batchsize/sequence_length + 1
-#     print(n_targets)
-#     X = np.zeros(shape=(batch_size, n_channels, width, height), dtype=np.float32)
-#     # X = np.zeros(shape=(n_targets, sequence_length, n_channels, width, height), dtype=np.float32)
-#     y = np.zeros(shape=(1,n_targets), dtype=np.int32)
-#     # y = np.empty(shape=(0,1), dtype=np.float32)
-#     # time.sleep(2)
-#     for idx in range(sequence_length, n_example-n_targets+2, stride):  # go through all example during 1 epoch
-#         for n in range(n_targets):   # for num of target
-#             # print(idx+n, inputs[idx-sequence_length+n : idx+n].shape)
-#             X[n*sequence_length : (n+1)*sequence_length] = inputs[idx+n-sequence_length : idx+n]
-#             # X[n] = inputs[idx-sequence_length+n:idx+n]
-#             y[0][n] = targets[idx+n-1]
-#             # y = np.vstack((y, targets[idx-1+n]))
-#         # y = targets[idx: idx+n_targets]
-#         yield X, y[0]
diff --git a/_tensorlayer/layers.py b/_tensorlayer/layers.py
deleted file mode 100755
index d429fe0..0000000
--- a/_tensorlayer/layers.py
+++ /dev/null
@@ -1,5530 +0,0 @@
-#! /usr/bin/python
-# -*- coding: utf8 -*-
-
-
-
-import tensorflow as tf
-import time
-from . import visualize
-from . import utils
-from . import files
-from . import cost
-from . import iterate
-from . import ops
-import numpy as np
-from six.moves import xrange
-import random, warnings
-import copy
-
-# __all__ = [
-#     "Layer",
-#     "DenseLayer",
-# ]
-
-
-# set_keep = locals()
-set_keep = globals()
-set_keep['_layers_name_list'] =[]
-set_keep['name_reuse'] = False
-
-try:  # For TF12 and later
-    TF_GRAPHKEYS_VARIABLES = tf.GraphKeys.GLOBAL_VARIABLES
-except:  # For TF11 and before
-    TF_GRAPHKEYS_VARIABLES = tf.GraphKeys.VARIABLES
-
-## Variable Operation
-def flatten_reshape(variable, name=''):
-    """Reshapes high-dimension input to a vector.
-    [batch_size, mask_row, mask_col, n_mask] ---> [batch_size, mask_row * mask_col * n_mask]
-
-    Parameters
-    ----------
-    variable : a tensorflow variable
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Examples
-    --------
-    >>> W_conv2 = weight_variable([5, 5, 100, 32])   # 64 features for each 5x5 patch
-    >>> b_conv2 = bias_variable([32])
-    >>> W_fc1 = weight_variable([7 * 7 * 32, 256])
-
-    >>> h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
-    >>> h_pool2 = max_pool_2x2(h_conv2)
-    >>> h_pool2.get_shape()[:].as_list() = [batch_size, 7, 7, 32]
-    ...         [batch_size, mask_row, mask_col, n_mask]
-    >>> h_pool2_flat = tl.layers.flatten_reshape(h_pool2)
-    ...         [batch_size, mask_row * mask_col * n_mask]
-    >>> h_pool2_flat_drop = tf.nn.dropout(h_pool2_flat, keep_prob)
-    ...
-    """
-    dim = 1
-    for d in variable.get_shape()[1:].as_list():
-        dim *= d
-    return tf.reshape(variable, shape=[-1, dim], name=name)
-
-def clear_layers_name():
-    """Clear all layer names in set_keep['_layers_name_list'],
-    enable layer name reuse.
-
-    Examples
-    ---------
-    >>> network = tl.layers.InputLayer(x, name='input_layer')
-    >>> network = tl.layers.DenseLayer(network, n_units=800, name='relu1')
-    ...
-    >>> tl.layers.clear_layers_name()
-    >>> network2 = tl.layers.InputLayer(x, name='input_layer')
-    >>> network2 = tl.layers.DenseLayer(network2, n_units=800, name='relu1')
-    ...
-    """
-    set_keep['_layers_name_list'] =[]
-
-def set_name_reuse(enable=True):
-    """Enable or disable reuse layer name. By default, each layer must has unique
-    name. When you want two or more input placeholder (inference) share the same
-    model parameters, you need to enable layer name reuse, then allow the
-    parameters have same name scope.
-
-    Parameters
-    ------------
-    enable : boolean, enable name reuse.
-
-    Examples
-    ------------
-    >>> def embed_seq(input_seqs, is_train, reuse):
-    >>>    with tf.variable_scope("model", reuse=reuse):
-    >>>         tl.layers.set_name_reuse(reuse)
-    >>>         network = tl.layers.EmbeddingInputlayer(
-    ...                     inputs = input_seqs,
-    ...                     vocabulary_size = vocab_size,
-    ...                     embedding_size = embedding_size,
-    ...                     name = 'e_embedding')
-    >>>        network = tl.layers.DynamicRNNLayer(network,
-    ...                     cell_fn = tf.nn.rnn_cell.BasicLSTMCell,
-    ...                     n_hidden = embedding_size,
-    ...                     dropout = (0.7 if is_train else None),
-    ...                     initializer = w_init,
-    ...                     sequence_length = tl.layers.retrieve_seq_length_op2(input_seqs),
-    ...                     return_last = True,
-    ...                     name = 'e_dynamicrnn',)
-    >>>    return network
-    >>>
-    >>> net_train = embed_seq(t_caption, is_train=True, reuse=False)
-    >>> net_test = embed_seq(t_caption, is_train=False, reuse=True)
-
-    - see ``tutorial_ptb_lstm.py`` for example.
-    """
-    set_keep['name_reuse'] = enable
-
-def initialize_rnn_state(state):
-    """Return the initialized RNN state.
-    The input is LSTMStateTuple or State of RNNCells.
-
-    Parameters
-    -----------
-    state : a RNN state.
-    """
-    try: # TF1.0
-        LSTMStateTuple = tf.contrib.rnn.LSTMStateTuple
-    except:
-        LSTMStateTuple = tf.nn.rnn_cell.LSTMStateTuple
-
-    if isinstance(state, LSTMStateTuple):
-        c = state.c.eval()
-        h = state.h.eval()
-        return (c, h)
-    else:
-        new_state = state.eval()
-        return new_state
-
-def print_all_variables(train_only=False):
-    """Print all trainable and non-trainable variables
-    without tl.layers.initialize_global_variables(sess)
-
-    Parameters
-    ----------
-    train_only : boolean
-        If True, only print the trainable variables, otherwise, print all variables.
-    """
-    # tvar = tf.trainable_variables() if train_only else tf.all_variables()
-    if train_only:
-        t_vars = tf.trainable_variables()
-        print("  [*] printing trainable variables")
-    else:
-        try: # TF1.0
-            t_vars = tf.global_variables()
-        except: # TF0.12
-            t_vars = tf.all_variables()
-        print("  [*] printing global variables")
-    for idx, v in enumerate(t_vars):
-        print("  var {:3}: {:15}   {}".format(idx, str(v.get_shape()), v.name))
-
-def get_variables_with_name(name, train_only=True, printable=False):
-    """Get variable list by a given name scope.
-
-    Examples
-    ---------
-    >>> dense_vars = tl.layers.get_variable_with_name('dense', True, True)
-    """
-    print("  [*] geting variables with %s" % name)
-    # tvar = tf.trainable_variables() if train_only else tf.all_variables()
-    if train_only:
-        t_vars = tf.trainable_variables()
-    else:
-        try: # TF1.0
-            t_vars = tf.global_variables()
-        except: # TF0.12
-            t_vars = tf.all_variables()
-
-    d_vars = [var for var in t_vars if name in var.name]
-    if printable:
-        for idx, v in enumerate(d_vars):
-            print("  got {:3}: {:15}   {}".format(idx, v.name, str(v.get_shape())))
-    return d_vars
-
-def get_layers_with_name(network=None, name="", printable=False):
-    """Get layer list in a network by a given name scope.
-
-    Examples
-    ---------
-    >>> layers = tl.layers.get_layers_with_name(network, "CNN", True)
-    """
-    assert network is not None
-    print("  [*] geting layers with %s" % name)
-
-    layers = []
-    i = 0
-    for layer in network.all_layers:
-        # print(type(layer.name))
-        if name in layer.name:
-            layers.append(layer)
-            if printable:
-                # print(layer.name)
-                print("  got {:3}: {:15}   {}".format(i, layer.name, str(layer.get_shape())))
-                i = i + 1
-    return layers
-
-def list_remove_repeat(l=None):
-    """Remove the repeated items in a list, and return the processed list.
-    You may need it to create merged layer like Concat, Elementwise and etc.
-
-    Parameters
-    ----------
-    l : a list
-
-    Examples
-    ---------
-    >>> l = [2, 3, 4, 2, 3]
-    >>> l = list_remove_repeat(l)
-    ... [2, 3, 4]
-    """
-    l2 = []
-    [l2.append(i) for i in l if not i in l2]
-    return l2
-
-def initialize_global_variables(sess=None):
-    """Excute ``sess.run(tf.global_variables_initializer())`` for TF12+ or
-    sess.run(tf.initialize_all_variables()) for TF11.
-
-    Parameters
-    ----------
-    sess : a Session
-    """
-    assert sess is not None
-    try:    # TF12
-        sess.run(tf.global_variables_initializer())
-    except: # TF11
-        sess.run(tf.initialize_all_variables())
-
-
-## Basic layer
-class Layer(object):
-    """
-    The :class:`Layer` class represents a single layer of a neural network. It
-    should be subclassed when implementing new types of layers.
-    Because each layer can keep track of the layer(s) feeding into it, a
-    network's output :class:`Layer` instance can double as a handle to the full
-    network.
-
-    Parameters
-    ----------
-    inputs : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    name : a string or None
-        An optional name to attach to this layer.
-    """
-    def __init__(
-        self,
-        inputs = None,
-        name ='layer'
-    ):
-        self.inputs = inputs
-        scope_name=tf.get_variable_scope().name
-        if scope_name:
-            name = scope_name + '/' + name
-        if (name in set_keep['_layers_name_list']) and name_reuse == False:
-            raise Exception("Layer '%s' already exists, please choice other 'name' or reuse this layer\
-            \nHint : Use different name for different 'Layer' (The name is used to control parameter sharing)" % name)
-        else:
-            self.name = name
-            if name not in ['', None, False]:
-                set_keep['_layers_name_list'].append(name)
-
-
-    def print_params(self, details=True):
-        ''' Print all info of parameters in the network'''
-        for i, p in enumerate(self.all_params):
-            if details:
-                try:
-                    print("  param {:3}: {:15} (mean: {:<18}, median: {:<18}, std: {:<18})   {}".format(i, str(p.eval().shape), p.eval().mean(), np.median(p.eval()), p.eval().std(), p.name))
-                except Exception as e:
-                    print(str(e))
-                    raise Exception("Hint: print params details after tl.layers.initialize_global_variables(sess) or use network.print_params(False).")
-            else:
-                print("  param {:3}: {:15}    {}".format(i, str(p.get_shape()), p.name))
-        print("  num of params: %d" % self.count_params())
-
-    def print_layers(self):
-        ''' Print all info of layers in the network '''
-        for i, p in enumerate(self.all_layers):
-            print("  layer %d: %s" % (i, str(p)))
-
-    def count_params(self):
-        ''' Return the number of parameters in the network '''
-        n_params = 0
-        for i, p in enumerate(self.all_params):
-            n = 1
-            # for s in p.eval().shape:
-            for s in p.get_shape():
-                try:
-                    s = int(s)
-                except:
-                    s = 1
-                if s:
-                    n = n * s
-            n_params = n_params + n
-        return n_params
-
-    def __str__(self):
-        # print("\nIt is a Layer class")
-        # self.print_params(False)
-        # self.print_layers()
-        return "  Last layer is: %s" % self.__class__.__name__
-
-## Input layer
-class InputLayer(Layer):
-    """
-    The :class:`InputLayer` class is the starting layer of a neural network.
-
-    Parameters
-    ----------
-    inputs : a placeholder or tensor
-        The input tensor data.
-    name : a string or None
-        An optional name to attach to this layer.
-    """
-    def __init__(
-        self,
-        inputs = None,
-        name ='input_layer'
-    ):
-        Layer.__init__(self, inputs=inputs, name=name)
-        print("  [TL] InputLayer  %s: %s" % (self.name, inputs.get_shape()))
-        self.outputs = inputs
-        self.all_layers = []
-        self.all_params = []
-        self.all_drop = {}
-
-## OneHot layer
-class OneHotInputLayer(Layer):
-    """
-    The :class:`OneHotInputLayer` class is the starting layer of a neural network, see ``tf.one_hot``.
-
-    Parameters
-    ----------
-    inputs : a placeholder or tensor
-        The input tensor data.
-    name : a string or None
-        An optional name to attach to this layer.
-    depth : If the input indices is rank N, the output will have rank N+1. The new axis is created at dimension axis (default: the new axis is appended at the end).
-    on_value : If on_value is not provided, it will default to the value 1 with type dtype.
-        default, None
-    off_value : If off_value is not provided, it will default to the value 0 with type dtype.
-        default, None
-    axis : default, None
-    dtype : default, None
-    """
-    def __init__(
-        self,
-        inputs = None,
-        depth = None,
-        on_value = None,
-        off_value = None,
-        axis = None,
-        dtype=None,
-        name ='input_layer'
-    ):
-        Layer.__init__(self, inputs=inputs, name=name)
-        assert depth != None, "depth is not given"
-        print("  [TL]:Instantiate OneHotInputLayer  %s: %s" % (self.name, inputs.get_shape()))
-        self.outputs = tf.one_hot(inputs, depth, on_value=on_value, off_value=off_value, axis=axis, dtype=dtype)
-        self.all_layers = []
-        self.all_params = []
-        self.all_drop = {}
-
-## Word Embedding Input layer
-class Word2vecEmbeddingInputlayer(Layer):
-    """
-    The :class:`Word2vecEmbeddingInputlayer` class is a fully connected layer,
-    for Word Embedding. Words are input as integer index.
-    The output is the embedded word vector.
-
-    Parameters
-    ----------
-    inputs : placeholder
-        For word inputs. integer index format.
-    train_labels : placeholder
-        For word labels. integer index format.
-    vocabulary_size : int
-        The size of vocabulary, number of words.
-    embedding_size : int
-        The number of embedding dimensions.
-    num_sampled : int
-        The Number of negative examples for NCE loss.
-    nce_loss_args : a dictionary
-        The arguments for tf.nn.nce_loss()
-    E_init : embedding initializer
-        The initializer for initializing the embedding matrix.
-    E_init_args : a dictionary
-        The arguments for embedding initializer
-    nce_W_init : NCE decoder biases initializer
-        The initializer for initializing the nce decoder weight matrix.
-    nce_W_init_args : a dictionary
-        The arguments for initializing the nce decoder weight matrix.
-    nce_b_init : NCE decoder biases initializer
-        The initializer for tf.get_variable() of the nce decoder bias vector.
-    nce_b_init_args : a dictionary
-        The arguments for tf.get_variable() of the nce decoder bias vector.
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Variables
-    --------------
-    nce_cost : a tensor
-        The NCE loss.
-    outputs : a tensor
-        The outputs of embedding layer.
-    normalized_embeddings : tensor
-        Normalized embedding matrix
-
-    Examples
-    --------
-    - Without TensorLayer : see tensorflow/examples/tutorials/word2vec/word2vec_basic.py
-    >>> train_inputs = tf.placeholder(tf.int32, shape=[batch_size])
-    >>> train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
-    >>> embeddings = tf.Variable(
-    ...     tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
-    >>> embed = tf.nn.embedding_lookup(embeddings, train_inputs)
-    >>> nce_weights = tf.Variable(
-    ...     tf.truncated_normal([vocabulary_size, embedding_size],
-    ...                    stddev=1.0 / math.sqrt(embedding_size)))
-    >>> nce_biases = tf.Variable(tf.zeros([vocabulary_size]))
-    >>> cost = tf.reduce_mean(
-    ...    tf.nn.nce_loss(weights=nce_weights, biases=nce_biases,
-    ...               inputs=embed, labels=train_labels,
-    ...               num_sampled=num_sampled, num_classes=vocabulary_size,
-    ...               num_true=1))
-
-    - With TensorLayer : see tutorial_word2vec_basic.py
-    >>> train_inputs = tf.placeholder(tf.int32, shape=[batch_size])
-    >>> train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
-    >>> emb_net = tl.layers.Word2vecEmbeddingInputlayer(
-    ...         inputs = train_inputs,
-    ...         train_labels = train_labels,
-    ...         vocabulary_size = vocabulary_size,
-    ...         embedding_size = embedding_size,
-    ...         num_sampled = num_sampled,
-    ...        name ='word2vec_layer',
-    ...    )
-    >>> cost = emb_net.nce_cost
-    >>> train_params = emb_net.all_params
-    >>> train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(
-    ...                                             cost, var_list=train_params)
-    >>> normalized_embeddings = emb_net.normalized_embeddings
-
-    References
-    ----------
-    - `tensorflow/examples/tutorials/word2vec/word2vec_basic.py <https://github.com/tensorflow/tensorflow/blob/r0.7/tensorflow/examples/tutorials/word2vec/word2vec_basic.py>`_
-    """
-    def __init__(
-        self,
-        inputs = None,
-        train_labels = None,
-        vocabulary_size = 80000,
-        embedding_size = 200,
-        num_sampled = 64,
-        nce_loss_args = {},
-        E_init = tf.random_uniform_initializer(minval=-1.0, maxval=1.0),
-        E_init_args = {},
-        nce_W_init = tf.truncated_normal_initializer(stddev=0.03),
-        nce_W_init_args = {},
-        nce_b_init = tf.constant_initializer(value=0.0),
-        nce_b_init_args = {},
-        name ='word2vec_layer',
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = inputs
-        print("  [TL] Word2vecEmbeddingInputlayer %s: (%d, %d)" % (self.name, vocabulary_size, embedding_size))
-        # Look up embeddings for inputs.
-        # Note: a row of 'embeddings' is the vector representation of a word.
-        # for the sake of speed, it is better to slice the embedding matrix
-        # instead of transfering a word id to one-hot-format vector and then
-        # multiply by the embedding matrix.
-        # embed is the outputs of the hidden layer (embedding layer), it is a
-        # row vector with 'embedding_size' values.
-        with tf.variable_scope(name) as vs:
-            embeddings = tf.get_variable(name='embeddings',
-                                    shape=(vocabulary_size, embedding_size),
-                                    initializer=E_init,
-                                    **E_init_args)
-            embed = tf.nn.embedding_lookup(embeddings, self.inputs)
-            # Construct the variables for the NCE loss (i.e. negative sampling)
-            nce_weights = tf.get_variable(name='nce_weights',
-                                    shape=(vocabulary_size, embedding_size),
-                                    initializer=nce_W_init,
-                                    **nce_W_init_args)
-            nce_biases = tf.get_variable(name='nce_biases',
-                                    shape=(vocabulary_size),
-                                    initializer=nce_b_init,
-                                    **nce_b_init_args)
-
-        # Compute the average NCE loss for the batch.
-        # tf.nce_loss automatically draws a new sample of the negative labels
-        # each time we evaluate the loss.
-        self.nce_cost = tf.reduce_mean(
-            tf.nn.nce_loss(weights=nce_weights, biases=nce_biases,
-                           inputs=embed, labels=train_labels,
-                           num_sampled=num_sampled, num_classes=vocabulary_size,
-                           **nce_loss_args))
-
-        self.outputs = embed
-        self.normalized_embeddings = tf.nn.l2_normalize(embeddings, 1)
-
-        self.all_layers = [self.outputs]
-        self.all_params = [embeddings, nce_weights, nce_biases]
-        self.all_drop = {}
-
-class EmbeddingInputlayer(Layer):
-    """
-    The :class:`EmbeddingInputlayer` class is a fully connected layer,
-    for Word Embedding. Words are input as integer index.
-    The output is the embedded word vector.
-
-    If you have a pre-train matrix, you can assign the matrix into it.
-    To train a word embedding matrix, you can used class:`Word2vecEmbeddingInputlayer`.
-
-    Note that, do not update this embedding matrix.
-
-    Parameters
-    ----------
-    inputs : placeholder
-        For word inputs. integer index format.
-        a 2D tensor : [batch_size, num_steps(num_words)]
-    vocabulary_size : int
-        The size of vocabulary, number of words.
-    embedding_size : int
-        The number of embedding dimensions.
-    E_init : embedding initializer
-        The initializer for initializing the embedding matrix.
-    E_init_args : a dictionary
-        The arguments for embedding initializer
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Variables
-    ------------
-    outputs : a tensor
-        The outputs of embedding layer.
-        the outputs 3D tensor : [batch_size, num_steps(num_words), embedding_size]
-
-    Examples
-    --------
-    >>> vocabulary_size = 50000
-    >>> embedding_size = 200
-    >>> model_file_name = "model_word2vec_50k_200"
-    >>> batch_size = None
-    ...
-    >>> all_var = tl.files.load_npy_to_any(name=model_file_name+'.npy')
-    >>> data = all_var['data']; count = all_var['count']
-    >>> dictionary = all_var['dictionary']
-    >>> reverse_dictionary = all_var['reverse_dictionary']
-    >>> tl.files.save_vocab(count, name='vocab_'+model_file_name+'.txt')
-    >>> del all_var, data, count
-    ...
-    >>> load_params = tl.files.load_npz(name=model_file_name+'.npz')
-    >>> x = tf.placeholder(tf.int32, shape=[batch_size])
-    >>> y_ = tf.placeholder(tf.int32, shape=[batch_size, 1])
-    >>> emb_net = tl.layers.EmbeddingInputlayer(
-    ...                inputs = x,
-    ...                vocabulary_size = vocabulary_size,
-    ...                embedding_size = embedding_size,
-    ...                name ='embedding_layer')
-    >>> tl.layers.initialize_global_variables(sess)
-    >>> tl.files.assign_params(sess, [load_params[0]], emb_net)
-    >>> word = b'hello'
-    >>> word_id = dictionary[word]
-    >>> print('word_id:', word_id)
-    ... 6428
-    ...
-    >>> words = [b'i', b'am', b'hao', b'dong']
-    >>> word_ids = tl.files.words_to_word_ids(words, dictionary)
-    >>> context = tl.files.word_ids_to_words(word_ids, reverse_dictionary)
-    >>> print('word_ids:', word_ids)
-    ... [72, 1226, 46744, 20048]
-    >>> print('context:', context)
-    ... [b'i', b'am', b'hao', b'dong']
-    ...
-    >>> vector = sess.run(emb_net.outputs, feed_dict={x : [word_id]})
-    >>> print('vector:', vector.shape)
-    ... (1, 200)
-    >>> vectors = sess.run(emb_net.outputs, feed_dict={x : word_ids})
-    >>> print('vectors:', vectors.shape)
-    ... (4, 200)
-
-    """
-    def __init__(
-        self,
-        inputs = None,
-        vocabulary_size = 80000,
-        embedding_size = 200,
-        E_init = tf.random_uniform_initializer(-0.1, 0.1),
-        E_init_args = {},
-        name ='embedding_layer',
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = inputs
-        print("  [TL] EmbeddingInputlayer %s: (%d, %d)" % (self.name, vocabulary_size, embedding_size))
-
-        with tf.variable_scope(name) as vs:
-            embeddings = tf.get_variable(name='embeddings',
-                                    shape=(vocabulary_size, embedding_size),
-                                    initializer=E_init,
-                                    **E_init_args)
-            embed = tf.nn.embedding_lookup(embeddings, self.inputs)
-
-        self.outputs = embed
-
-        self.all_layers = [self.outputs]
-        self.all_params = [embeddings]
-        self.all_drop = {}
-
-## Dense layer
-class DenseLayer(Layer):
-    """
-    The :class:`DenseLayer` class is a fully connected layer.
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    n_units : int
-        The number of units of the layer.
-    act : activation function
-        The function that is applied to the layer activations.
-    W_init : weights initializer
-        The initializer for initializing the weight matrix.
-    b_init : biases initializer or None
-        The initializer for initializing the bias vector. If None, skip biases.
-    W_init_args : dictionary
-        The arguments for the weights tf.get_variable.
-    b_init_args : dictionary
-        The arguments for the biases tf.get_variable.
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Examples
-    --------
-    >>> network = tl.layers.InputLayer(x, name='input_layer')
-    >>> network = tl.layers.DenseLayer(
-    ...                 network,
-    ...                 n_units=800,
-    ...                 act = tf.nn.relu,
-    ...                 W_init=tf.truncated_normal_initializer(stddev=0.1),
-    ...                 name ='relu_layer'
-    ...                 )
-
-    >>> Without TensorLayer, you can do as follow.
-    >>> W = tf.Variable(
-    ...     tf.random_uniform([n_in, n_units], -1.0, 1.0), name='W')
-    >>> b = tf.Variable(tf.zeros(shape=[n_units]), name='b')
-    >>> y = tf.nn.relu(tf.matmul(inputs, W) + b)
-
-    Notes
-    -----
-    If the input to this layer has more than two axes, it need to flatten the
-    input by using :class:`FlattenLayer` in this case.
-    """
-    def __init__(
-        self,
-        layer = None,
-        n_units = 100,
-        act = tf.identity,
-        W_init = tf.truncated_normal_initializer(stddev=0.1),
-        b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {},
-        b_init_args = {},
-        name ='dense_layer',
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = layer.outputs
-        if self.inputs.get_shape().ndims != 2:
-            raise Exception("The input dimension must be rank 2, please reshape or flatten it")
-
-        n_in = int(self.inputs.get_shape()[-1])
-        self.n_units = n_units
-        print("  [TL] DenseLayer  %s: %d %s" % (self.name, self.n_units, act.__name__))
-        with tf.variable_scope(name) as vs:
-            W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, **W_init_args )
-            if b_init:
-                b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, **b_init_args )
-                self.outputs = act(tf.matmul(self.inputs, W) + b)
-            else:
-                self.outputs = act(tf.matmul(self.inputs, W))
-
-        # Hint : list(), dict() is pass by value (shallow), without them, it is
-        # pass by reference.
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        if b_init:
-            self.all_params.extend( [W, b] )
-        else:
-            self.all_params.extend( [W] )
-
-class ReconLayer(DenseLayer):
-    """
-    The :class:`ReconLayer` class is a reconstruction layer `DenseLayer` which
-    use to pre-train a `DenseLayer`.
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    x_recon : tensorflow variable
-        The variables used for reconstruction.
-    name : a string or None
-        An optional name to attach to this layer.
-    n_units : int
-        The number of units of the layer, should be equal to x_recon
-    act : activation function
-        The activation function that is applied to the reconstruction layer.
-        Normally, for sigmoid layer, the reconstruction activation is sigmoid;
-        for rectifying layer, the reconstruction activation is softplus.
-
-    Examples
-    --------
-    >>> network = tl.layers.InputLayer(x, name='input_layer')
-    >>> network = tl.layers.DenseLayer(network, n_units=196,
-    ...                                 act=tf.nn.sigmoid, name='sigmoid1')
-    >>> recon_layer1 = tl.layers.ReconLayer(network, x_recon=x, n_units=784,
-    ...                                 act=tf.nn.sigmoid, name='recon_layer1')
-    >>> recon_layer1.pretrain(sess, x=x, X_train=X_train, X_val=X_val,
-    ...                         denoise_name=None, n_epoch=1200, batch_size=128,
-    ...                         print_freq=10, save=True, save_name='w1pre_')
-
-    Methods
-    -------
-    pretrain(self, sess, x, X_train, X_val, denoise_name=None, n_epoch=100, batch_size=128, print_freq=10, save=True, save_name='w1pre_')
-        Start to pre-train the parameters of previous DenseLayer.
-
-    Notes
-    -----
-    The input layer should be `DenseLayer` or a layer has only one axes.
-    You may need to modify this part to define your own cost function.
-    By default, the cost is implemented as follow:
-    - For sigmoid layer, the implementation can be `UFLDL <http://deeplearning.stanford.edu/wiki/index.php/UFLDL_Tutorial>`_
-    - For rectifying layer, the implementation can be `Glorot (2011). Deep Sparse Rectifier Neural Networks <http://doi.org/10.1.1.208.6449>`_
-    """
-    def __init__(
-        self,
-        layer = None,
-        x_recon = None,
-        name = 'recon_layer',
-        n_units = 784,
-        act = tf.nn.softplus,
-    ):
-        DenseLayer.__init__(self, layer=layer, n_units=n_units, act=act, name=name)
-        print("     [TL] %s is a ReconLayer" % self.name)
-
-        # y : reconstruction outputs; train_params : parameters to train
-        # Note that: train_params = [W_encoder, b_encoder, W_decoder, b_encoder]
-        y = self.outputs
-        self.train_params = self.all_params[-4:]
-
-        # =====================================================================
-        #
-        # You need to modify the below cost function and optimizer so as to
-        # implement your own pre-train method.
-        #
-        # =====================================================================
-        lambda_l2_w = 0.004
-        learning_rate = 0.0001
-        print("     lambda_l2_w: %f" % lambda_l2_w)
-        print("     learning_rate: %f" % learning_rate)
-
-        # Mean-squre-error i.e. quadratic-cost
-        mse = tf.reduce_sum(tf.squared_difference(y, x_recon),  1)
-        mse = tf.reduce_mean(mse)            # in theano: mse = ((y - x) ** 2 ).sum(axis=1).mean()
-            # mse = tf.reduce_mean(tf.reduce_sum(tf.square(tf.sub(y, x_recon)),  1))
-            # mse = tf.reduce_mean(tf.squared_difference(y, x_recon)) # <haodong>: Error
-            # mse = tf.sqrt(tf.reduce_mean(tf.square(y - x_recon)))   # <haodong>: Error
-        # Cross-entropy
-            # ce = cost.cross_entropy(y, x_recon)                                               # <haodong>: list , list , Error (only be used for softmax output)
-            # ce = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, x_recon))          # <haodong>: list , list , Error (only be used for softmax output)
-            # ce = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y, x_recon))   # <haodong>: list , index , Error (only be used for softmax output)
-        L2_w = tf.contrib.layers.l2_regularizer(lambda_l2_w)(self.train_params[0]) \
-                + tf.contrib.layers.l2_regularizer(lambda_l2_w)(self.train_params[2])           # faster than the code below
-            # L2_w = lambda_l2_w * tf.reduce_mean(tf.square(self.train_params[0])) + lambda_l2_w * tf.reduce_mean( tf.square(self.train_params[2]))
-        # DropNeuro
-        P_o = cost.lo_regularizer(0.03)(self.train_params[0])   # + cost.lo_regularizer(0.5)(self.train_params[2])    # <haodong>: if add lo on decoder, no neuron will be broken
-        P_i = cost.li_regularizer(0.03)(self.train_params[0])  # + cost.li_regularizer(0.001)(self.train_params[2])
-
-        # L1 of activation outputs
-        activation_out = self.all_layers[-2]
-        L1_a = 0.001 * tf.reduce_mean(activation_out)   # <haodong>:  theano: T.mean( self.a[i] )         # some neuron are broken, white and black
-            # L1_a = 0.001 * tf.reduce_mean( tf.reduce_sum(activation_out, 0) )         # <haodong>: some neuron are broken, white and black
-            # L1_a = 0.001 * 100 * tf.reduce_mean( tf.reduce_sum(activation_out, 1) )   # <haodong>: some neuron are broken, white and black
-        # KL Divergence
-        beta = 4
-        rho = 0.15
-        p_hat = tf.reduce_mean(activation_out, 0)   # theano: p_hat = T.mean( self.a[i], axis=0 )
-        try: ## TF1.0
-            KLD = beta * tf.reduce_sum( rho * tf.log(tf.divide(rho, p_hat)) + (1- rho) * tf.log((1- rho)/ (tf.subtract(float(1), p_hat))) )
-        except: ## TF0.12
-            KLD = beta * tf.reduce_sum( rho * tf.log(tf.div(rho, p_hat)) + (1- rho) * tf.log((1- rho)/ (tf.sub(float(1), p_hat))) )
-            # KLD = beta * tf.reduce_sum( rho * tf.log(rho/ p_hat) + (1- rho) * tf.log((1- rho)/(1- p_hat)) )
-            # theano: L1_a = l1_a[i] * T.sum( rho[i] * T.log(rho[i]/ p_hat) + (1- rho[i]) * T.log((1- rho[i])/(1- p_hat)) )
-        # Total cost
-        if act == tf.nn.softplus:
-            print('     use: mse, L2_w, L1_a')
-            self.cost = mse + L1_a + L2_w
-        elif act == tf.nn.sigmoid:
-            # ----------------------------------------------------
-            # Cross-entropy was used in Denoising AE
-            # print('     use: ce, L2_w, KLD')
-            # self.cost = ce + L2_w + KLD
-            # ----------------------------------------------------
-            # Mean-squared-error was used in Vanilla AE
-            print('     use: mse, L2_w, KLD')
-            self.cost = mse + L2_w + KLD
-            # ----------------------------------------------------
-            # Add DropNeuro penalty (P_o) can remove neurons of AE
-            # print('     use: mse, L2_w, KLD, P_o')
-            # self.cost = mse + L2_w + KLD + P_o
-            # ----------------------------------------------------
-            # Add DropNeuro penalty (P_i) can remove neurons of previous layer
-            #   If previous layer is InputLayer, it means remove useless features
-            # print('     use: mse, L2_w, KLD, P_i')
-            # self.cost = mse + L2_w + KLD + P_i
-        else:
-            raise Exception("Don't support the given reconstruct activation function")
-
-        self.train_op = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999,
-                                        epsilon=1e-08, use_locking=False).minimize(self.cost, var_list=self.train_params)
-                # self.train_op = tf.train.GradientDescentOptimizer(1.0).minimize(self.cost, var_list=self.train_params)
-
-    def pretrain(self, sess, x, X_train, X_val, denoise_name=None, n_epoch=100, batch_size=128, print_freq=10,
-                  save=True, save_name='w1pre_'):
-        # ====================================================
-        #
-        # You need to modify the cost function in __init__() so as to
-        # get your own pre-train method.
-        #
-        # ====================================================
-        print("     [*] %s start pretrain" % self.name)
-        print("     batch_size: %d" % batch_size)
-        if denoise_name:
-            print("     denoising layer keep: %f" % self.all_drop[set_keep[denoise_name]])
-            dp_denoise = self.all_drop[set_keep[denoise_name]]
-        else:
-            print("     no denoising layer")
-
-        for epoch in range(n_epoch):
-            start_time = time.time()
-            for X_train_a, _ in iterate.minibatches(X_train, X_train, batch_size, shuffle=True):
-                dp_dict = utils.dict_to_one( self.all_drop )
-                if denoise_name:
-                    dp_dict[set_keep[denoise_name]] = dp_denoise
-                feed_dict = {x: X_train_a}
-                feed_dict.update(dp_dict)
-                sess.run(self.train_op, feed_dict=feed_dict)
-
-            if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
-                print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time))
-                train_loss, n_batch = 0, 0
-                for X_train_a, _ in iterate.minibatches(X_train, X_train, batch_size, shuffle=True):
-                    dp_dict = utils.dict_to_one( self.all_drop )
-                    feed_dict = {x: X_train_a}
-                    feed_dict.update(dp_dict)
-                    err = sess.run(self.cost, feed_dict=feed_dict)
-                    train_loss += err
-                    n_batch += 1
-                print("   train loss: %f" % (train_loss/ n_batch))
-                val_loss, n_batch = 0, 0
-                for X_val_a, _ in iterate.minibatches(X_val, X_val, batch_size, shuffle=True):
-                    dp_dict = utils.dict_to_one( self.all_drop )
-                    feed_dict = {x: X_val_a}
-                    feed_dict.update(dp_dict)
-                    err = sess.run(self.cost, feed_dict=feed_dict)
-                    val_loss += err
-                    n_batch += 1
-                print("   val loss: %f" % (val_loss/ n_batch))
-                if save:
-                    try:
-                        visualize.W(self.train_params[0].eval(), second=10, saveable=True, shape=[28,28], name=save_name+str(epoch+1), fig_idx=2012)
-                        files.save_npz([self.all_params[0]] , name=save_name+str(epoch+1)+'.npz')
-                    except:
-                        raise Exception("You should change the visualize.W() in ReconLayer.pretrain(), if you want to save the feature images for different dataset")
-
-## Noise layer
-class DropoutLayer(Layer):
-    """
-    The :class:`DropoutLayer` class is a noise layer which randomly set some
-    values to zero by a given keeping probability.
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    keep : float
-        The keeping probability, the lower more values will be set to zero.
-    is_fix : boolean
-        Default False, if True, the keeping probability is fixed and cannot be changed via feed_dict.
-    is_train : boolean
-        If False, skip this layer, default is True.
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Examples
-    --------
-    - Define network
-    >>> network = tl.layers.InputLayer(x, name='input_layer')
-    >>> network = tl.layers.DropoutLayer(network, keep=0.8, name='drop1')
-    >>> network = tl.layers.DenseLayer(network, n_units=800, act = tf.nn.relu, name='relu1')
-    >>> ...
-
-    - For training, enable dropout as follow.
-    >>> feed_dict = {x: X_train_a, y_: y_train_a}
-    >>> feed_dict.update( network.all_drop )     # enable noise layers
-    >>> sess.run(train_op, feed_dict=feed_dict)
-    >>> ...
-
-    - For testing, disable dropout as follow.
-    >>> dp_dict = tl.utils.dict_to_one( network.all_drop ) # disable noise layers
-    >>> feed_dict = {x: X_val_a, y_: y_val_a}
-    >>> feed_dict.update(dp_dict)
-    >>> err, ac = sess.run([cost, acc], feed_dict=feed_dict)
-    >>> ...
-
-    Notes
-    -------
-    - A frequent question regarding :class:`DropoutLayer` is that why it donot have `is_train` like :class:`BatchNormLayer`.
-    In many simple cases, user may find it is better to use one inference instead of two inferences for training and testing seperately, :class:`DropoutLayer`
-    allows you to control the dropout rate via `feed_dict`. However, you can fix the keeping probability by setting `is_fix` to True.
-    """
-    def __init__(
-        self,
-        layer = None,
-        keep = 0.5,
-        is_fix = False,
-        is_train = True,
-        name = 'dropout_layer',
-    ):
-        Layer.__init__(self, name=name)
-        if is_train is False:
-            print("  [TL] skip DropoutLayer")
-            self.outputs = layer.outputs
-            self.all_layers = list(layer.all_layers)
-            self.all_params = list(layer.all_params)
-            self.all_drop = dict(layer.all_drop)
-        else:
-            self.inputs = layer.outputs
-            print("  [TL] DropoutLayer %s: keep:%f is_fix:%s" % (self.name, keep, is_fix))
-
-            # The name of placeholder for keep_prob is the same with the name
-            # of the Layer.
-            if is_fix:
-                self.outputs = tf.nn.dropout(self.inputs, keep, name=name)
-            else:
-                set_keep[name] = tf.placeholder(tf.float32)
-                self.outputs = tf.nn.dropout(self.inputs, set_keep[name], name=name) # 1.2
-
-            self.all_layers = list(layer.all_layers)
-            self.all_params = list(layer.all_params)
-            self.all_drop = dict(layer.all_drop)
-            if is_fix is False:
-                self.all_drop.update( {set_keep[name]: keep} )
-            self.all_layers.extend( [self.outputs] )
-
-        # print(set_keep[name])
-        #   Tensor("Placeholder_2:0", dtype=float32)
-        # print(denoising1)
-        #   Tensor("Placeholder_2:0", dtype=float32)
-        # print(self.all_drop[denoising1])
-        #   0.8
-        #
-        # https://www.tensorflow.org/versions/r0.8/tutorials/mnist/tf/index.html
-        # The optional feed_dict argument allows the caller to override the
-        # value of tensors in the graph. Each key in feed_dict can be one of
-        # the following types:
-        # If the key is a Tensor, the value may be a Python scalar, string,
-        # list, or numpy ndarray that can be converted to the same dtype as that
-        # tensor. Additionally, if the key is a placeholder, the shape of the
-        # value will be checked for compatibility with the placeholder.
-        # If the key is a SparseTensor, the value should be a SparseTensorValue.
-
-class GaussianNoiseLayer(Layer):
-    """
-    The :class:`GaussianNoiseLayer` class is noise layer that adding noise with
-    normal distribution to the activation.
-
-    Parameters
-    ------------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    mean : float
-    stddev : float
-    is_train : boolean
-        If False, skip this layer, default is True.
-    name : a string or None
-        An optional name to attach to this layer.
-    """
-    def __init__(
-        self,
-        layer = None,
-        mean = 0.0,
-        stddev = 1.0,
-        is_train = True,
-        name = 'gaussian_noise_layer',
-    ):
-        Layer.__init__(self, name=name)
-        if is_train is False:
-            print("  [TL] skip GaussianNoiseLayer")
-            self.outputs = layer.outputs
-            self.all_layers = list(layer.all_layers)
-            self.all_params = list(layer.all_params)
-            self.all_drop = dict(layer.all_drop)
-        else:
-            self.inputs = layer.outputs
-            print("  [TL] GaussianNoiseLayer %s: mean:%f stddev:%f" % (self.name, mean, stddev))
-            with tf.variable_scope(name) as vs:
-                # noise = np.random.normal(0.0 , sigma , tf.to_int64(self.inputs).get_shape())
-                noise = tf.random_normal(shape = self.inputs.get_shape(), mean=mean, stddev=stddev)
-                self.outputs = self.inputs + noise
-            self.all_layers = list(layer.all_layers)
-            self.all_params = list(layer.all_params)
-            self.all_drop = dict(layer.all_drop)
-
-class DropconnectDenseLayer(Layer):
-    """
-    The :class:`DropconnectDenseLayer` class is ``DenseLayer`` with DropConnect
-    behaviour which randomly remove connection between this layer to previous
-    layer by a given keeping probability.
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    keep : float
-        The keeping probability, the lower more values will be set to zero.
-    n_units : int
-        The number of units of the layer.
-    act : activation function
-        The function that is applied to the layer activations.
-    W_init : weights initializer
-        The initializer for initializing the weight matrix.
-    b_init : biases initializer
-        The initializer for initializing the bias vector.
-    W_init_args : dictionary
-        The arguments for the weights tf.get_variable().
-    b_init_args : dictionary
-        The arguments for the biases tf.get_variable().
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Examples
-    --------
-    >>> network = tl.layers.InputLayer(x, name='input_layer')
-    >>> network = tl.layers.DropconnectDenseLayer(network, keep = 0.8,
-    ...         n_units=800, act = tf.nn.relu, name='dropconnect_relu1')
-    >>> network = tl.layers.DropconnectDenseLayer(network, keep = 0.5,
-    ...         n_units=800, act = tf.nn.relu, name='dropconnect_relu2')
-    >>> network = tl.layers.DropconnectDenseLayer(network, keep = 0.5,
-    ...         n_units=10, act = tl.activation.identity, name='output_layer')
-
-    References
-    ----------
-    - `Wan, L. (2013). Regularization of neural networks using dropconnect <http://machinelearning.wustl.edu/mlpapers/papers/icml2013_wan13>`_
-    """
-    def __init__(
-        self,
-        layer = None,
-        keep = 0.5,
-        n_units = 100,
-        act = tf.identity,
-        W_init = tf.truncated_normal_initializer(stddev=0.1),
-        b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {},
-        b_init_args = {},
-        name ='dropconnect_layer',
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = layer.outputs
-        if self.inputs.get_shape().ndims != 2:
-            raise Exception("The input dimension must be rank 2")
-        n_in = int(self.inputs.get_shape()[-1])
-        self.n_units = n_units
-        print("  [TL] DropconnectDenseLayer %s: %d %s" % (self.name, self.n_units, act.__name__))
-
-        with tf.variable_scope(name) as vs:
-            W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, **W_init_args )
-            b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, **b_init_args )
-            self.outputs = act(tf.matmul(self.inputs, W) + b)#, name=name)    # 1.2
-
-        set_keep[name] = tf.placeholder(tf.float32)
-        W_dropcon = tf.nn.dropout(W,  set_keep[name])
-        self.outputs = act(tf.matmul(self.inputs, W_dropcon) + b)
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_drop.update( {set_keep[name]: keep} )
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( [W, b] )
-
-## Convolutional layer (Pro)
-
-class Conv1dLayer(Layer):
-    """
-    The :class:`Conv1dLayer` class is a 1D CNN layer, see `tf.nn.conv1d <https://www.tensorflow.org/versions/master/api_docs/python/nn.html#conv1d>`_.
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer, [batch, in_width, in_channels].
-    act : activation function, None for identity.
-    shape : list of shape
-        shape of the filters, [filter_length, in_channels, out_channels].
-    stride : an int.
-        The number of entries by which the filter is moved right at each step.
-    padding : a string from: "SAME", "VALID".
-        The type of padding algorithm to use.
-    use_cudnn_on_gpu : An optional bool. Defaults to True.
-    data_format : An optional string from "NHWC", "NCHW". Defaults to "NHWC", the data is stored in the order of [batch, in_width, in_channels]. The "NCHW" format stores data as [batch, in_channels, in_width].
-    W_init : weights initializer
-        The initializer for initializing the weight matrix.
-    b_init : biases initializer or None
-        The initializer for initializing the bias vector. If None, skip biases.
-    W_init_args : dictionary
-        The arguments for the weights tf.get_variable().
-    b_init_args : dictionary
-        The arguments for the biases tf.get_variable().
-    name : a string or None
-        An optional name to attach to this layer.
-    """
-    def __init__(
-        self,
-        layer = None,
-        act = tf.identity,
-        shape = [5, 1, 5],
-        stride = 1,
-        padding='SAME',
-        use_cudnn_on_gpu=None,
-        data_format=None,
-        W_init = tf.truncated_normal_initializer(stddev=0.02),
-        b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {},
-        b_init_args = {},
-        name ='cnn_layer',
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = layer.outputs
-        print("  [TL] Conv1dLayer %s: shape:%s stride:%s pad:%s act:%s" %
-                            (self.name, str(shape), str(stride), padding, act.__name__))
-        if act is None:
-            act = tf.identity
-        with tf.variable_scope(name) as vs:
-            W = tf.get_variable(name='W_conv1d', shape=shape, initializer=W_init, **W_init_args )
-            if b_init:
-                b = tf.get_variable(name='b_conv1d', shape=(shape[-1]), initializer=b_init, **b_init_args )
-                self.outputs = act( tf.nn.conv1d(self.inputs, W, stride=stride, padding=padding,
-                            use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) + b ) #1.2
-            else:
-                self.outputs = act( tf.nn.conv1d(self.inputs, W, stride=stride, padding=padding,
-                            use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format))
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        if b_init:
-            self.all_params.extend( [W, b] )
-        else:
-            self.all_params.extend( [W] )
-
-class Conv2dLayer(Layer):
-    """
-    The :class:`Conv2dLayer` class is a 2D CNN layer, see `tf.nn.conv2d <https://www.tensorflow.org/versions/master/api_docs/python/nn.html#conv2d>`_.
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    act : activation function
-        The function that is applied to the layer activations.
-    shape : list of shape
-        shape of the filters, [filter_height, filter_width, in_channels, out_channels].
-    strides : a list of ints.
-        The stride of the sliding window for each dimension of input.\n
-        It Must be in the same order as the dimension specified with format.
-    padding : a string from: "SAME", "VALID".
-        The type of padding algorithm to use.
-    W_init : weights initializer
-        The initializer for initializing the weight matrix.
-    b_init : biases initializer or None
-        The initializer for initializing the bias vector. If None, skip biases.
-    W_init_args : dictionary
-        The arguments for the weights tf.get_variable().
-    b_init_args : dictionary
-        The arguments for the biases tf.get_variable().
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Notes
-    ------
-    - shape = [h, w, the number of output channel of previous layer, the number of output channels]
-    - the number of output channel of a layer is its last dimension.
-
-    Examples
-    --------
-    >>> x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
-    >>> network = tl.layers.InputLayer(x, name='input_layer')
-    >>> network = tl.layers.Conv2dLayer(network,
-    ...                   act = tf.nn.relu,
-    ...                   shape = [5, 5, 1, 32],  # 32 features for each 5x5 patch
-    ...                   strides=[1, 1, 1, 1],
-    ...                   padding='SAME',
-    ...                   W_init=tf.truncated_normal_initializer(stddev=5e-2),
-    ...                   W_init_args={},
-    ...                   b_init = tf.constant_initializer(value=0.0),
-    ...                   b_init_args = {},
-    ...                   name ='cnn_layer1')     # output: (?, 28, 28, 32)
-    >>> network = tl.layers.PoolLayer(network,
-    ...                   ksize=[1, 2, 2, 1],
-    ...                   strides=[1, 2, 2, 1],
-    ...                   padding='SAME',
-    ...                   pool = tf.nn.max_pool,
-    ...                   name ='pool_layer1',)   # output: (?, 14, 14, 32)
-
-    >>> Without TensorLayer, you can implement 2d convolution as follow.
-    >>> W = tf.Variable(W_init(shape=[5, 5, 1, 32], ), name='W_conv')
-    >>> b = tf.Variable(b_init(shape=[32], ), name='b_conv')
-    >>> outputs = tf.nn.relu( tf.nn.conv2d(inputs, W,
-    ...                       strides=[1, 1, 1, 1],
-    ...                       padding='SAME') + b )
-    """
-    def __init__(
-        self,
-        layer = None,
-        act = tf.identity,
-        shape = [5, 5, 1, 100],
-        strides=[1, 1, 1, 1],
-        padding='SAME',
-        W_init = tf.truncated_normal_initializer(stddev=0.02),
-        b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {},
-        b_init_args = {},
-        name ='cnn_layer',
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = layer.outputs
-        print("  [TL] Conv2dLayer %s: shape:%s strides:%s pad:%s act:%s" %
-                            (self.name, str(shape), str(strides), padding, act.__name__))
-
-        with tf.variable_scope(name) as vs:
-            W = tf.get_variable(name='W_conv2d', shape=shape, initializer=W_init, **W_init_args )
-            if b_init:
-                b = tf.get_variable(name='b_conv2d', shape=(shape[-1]), initializer=b_init, **b_init_args )
-                self.outputs = act( tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding) + b ) #1.2
-            else:
-                self.outputs = act( tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding))
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        if b_init:
-            self.all_params.extend( [W, b] )
-        else:
-            self.all_params.extend( [W] )
-
-class DeConv2dLayer(Layer):
-    """
-    The :class:`DeConv2dLayer` class is deconvolutional 2D layer, see `tf.nn.conv2d_transpose <https://www.tensorflow.org/versions/master/api_docs/python/nn.html#conv2d_transpose>`_.
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    act : activation function
-        The function that is applied to the layer activations.
-    shape : list of shape
-        shape of the filters, [height, width, output_channels, in_channels], filter's in_channels dimension must match that of value.
-    output_shape : list of output shape
-        representing the output shape of the deconvolution op.
-    strides : a list of ints.
-        The stride of the sliding window for each dimension of the input tensor.
-    padding : a string from: "SAME", "VALID".
-        The type of padding algorithm to use.
-    W_init : weights initializer
-        The initializer for initializing the weight matrix.
-    b_init : biases initializer
-        The initializer for initializing the bias vector. If None, skip biases.
-    W_init_args : dictionary
-        The arguments for the weights initializer.
-    b_init_args : dictionary
-        The arguments for the biases initializer.
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Notes
-    -----
-    - shape = [h, w, the number of output channels of this layer, the number of output channel of previous layer]
-    - output_shape = [batch_size, any, any, the number of output channels of this layer]
-    - the number of output channel of a layer is its last dimension.
-
-    Examples
-    ---------
-    - A part of the generator in DCGAN example
-    >>> batch_size = 64
-    >>> inputs = tf.placeholder(tf.float32, [batch_size, 100], name='z_noise')
-    >>> net_in = tl.layers.InputLayer(inputs, name='g/in')
-    >>> net_h0 = tl.layers.DenseLayer(net_in, n_units = 8192,
-    ...                            W_init = tf.random_normal_initializer(stddev=0.02),
-    ...                            act = tf.identity, name='g/h0/lin')
-    >>> print(net_h0.outputs._shape)
-    ... (64, 8192)
-    >>> net_h0 = tl.layers.ReshapeLayer(net_h0, shape = [-1, 4, 4, 512], name='g/h0/reshape')
-    >>> net_h0 = tl.layers.BatchNormLayer(net_h0, act=tf.nn.relu, is_train=is_train, name='g/h0/batch_norm')
-    >>> print(net_h0.outputs._shape)
-    ... (64, 4, 4, 512)
-    >>> net_h1 = tl.layers.DeConv2dLayer(net_h0,
-    ...                            shape = [5, 5, 256, 512],
-    ...                            output_shape = [batch_size, 8, 8, 256],
-    ...                            strides=[1, 2, 2, 1],
-    ...                            act=tf.identity, name='g/h1/decon2d')
-    >>> net_h1 = tl.layers.BatchNormLayer(net_h1, act=tf.nn.relu, is_train=is_train, name='g/h1/batch_norm')
-    >>> print(net_h1.outputs._shape)
-    ... (64, 8, 8, 256)
-
-    - U-Net
-    >>> ....
-    >>> conv10 = tl.layers.Conv2dLayer(conv9, act=tf.nn.relu,
-    ...        shape=[3,3,1024,1024], strides=[1,1,1,1], padding='SAME',
-    ...        W_init=w_init, b_init=b_init, name='conv10')
-    >>> print(conv10.outputs)
-    ... (batch_size, 32, 32, 1024)
-    >>> deconv1 = tl.layers.DeConv2dLayer(conv10, act=tf.nn.relu,
-    ...         shape=[3,3,512,1024], strides=[1,2,2,1], output_shape=[batch_size,64,64,512],
-    ...         padding='SAME', W_init=w_init, b_init=b_init, name='devcon1_1')
-    """
-    def __init__(
-        self,
-        layer = None,
-        act = tf.identity,
-        shape = [3, 3, 128, 256],
-        output_shape = [1, 256, 256, 128],
-        strides = [1, 2, 2, 1],
-        padding = 'SAME',
-        W_init = tf.truncated_normal_initializer(stddev=0.02),
-        b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {},
-        b_init_args = {},
-        name ='decnn2d_layer',
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = layer.outputs
-        print("  [TL] DeConv2dLayer %s: shape:%s out_shape:%s strides:%s pad:%s act:%s" %
-                            (self.name, str(shape), str(output_shape), str(strides), padding, act.__name__))
-        # print("  DeConv2dLayer: Untested")
-        with tf.variable_scope(name) as vs:
-            W = tf.get_variable(name='W_deconv2d', shape=shape, initializer=W_init, **W_init_args )
-            if b_init:
-                b = tf.get_variable(name='b_deconv2d', shape=(shape[-2]), initializer=b_init, **b_init_args )
-                self.outputs = act( tf.nn.conv2d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b )
-            else:
-                self.outputs = act( tf.nn.conv2d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding))
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        if b_init:
-            self.all_params.extend( [W, b] )
-        else:
-            self.all_params.extend( [W] )
-
-class Conv3dLayer(Layer):
-    """
-    The :class:`Conv3dLayer` class is a 3D CNN layer, see `tf.nn.conv3d <https://www.tensorflow.org/versions/master/api_docs/python/nn.html#conv3d>`_.
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    act : activation function
-        The function that is applied to the layer activations.
-    shape : list of shape
-        shape of the filters, [filter_depth, filter_height, filter_width, in_channels, out_channels].
-    strides : a list of ints. 1-D of length 4.
-        The stride of the sliding window for each dimension of input. Must be in the same order as the dimension specified with format.
-    padding : a string from: "SAME", "VALID".
-        The type of padding algorithm to use.
-    W_init : weights initializer
-        The initializer for initializing the weight matrix.
-    b_init : biases initializer
-        The initializer for initializing the bias vector.
-    W_init_args : dictionary
-        The arguments for the weights initializer.
-    b_init_args : dictionary
-        The arguments for the biases initializer.
-    name : a string or None
-        An optional name to attach to this layer.
-    """
-    def __init__(
-        self,
-        layer = None,
-        act = tf.identity,
-        shape = [2, 2, 2, 64, 128],
-        strides=[1, 2, 2, 2, 1],
-        padding='SAME',
-        W_init = tf.truncated_normal_initializer(stddev=0.02),
-        b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {},
-        b_init_args = {},
-        name ='cnn3d_layer',
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = layer.outputs
-        print("  [TL] Conv3dLayer %s: shape:%s strides:%s pad:%s act:%s" % (self.name, str(shape), str(strides), padding, act.__name__))
-
-        with tf.variable_scope(name) as vs:
-            # W = tf.Variable(W_init(shape=shape, **W_init_args), name='W_conv')
-            # b = tf.Variable(b_init(shape=[shape[-1]], **b_init_args), name='b_conv')
-            W = tf.get_variable(name='W_conv3d', shape=shape, initializer=W_init, **W_init_args )
-            b = tf.get_variable(name='b_conv3d', shape=(shape[-1]), initializer=b_init, **b_init_args )
-            self.outputs = act( tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None) + b )
-
-        # self.outputs = act( tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None) + b )
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( [W, b] )
-
-class DeConv3dLayer(Layer):
-    """The :class:`DeConv3dLayer` class is deconvolutional 3D layer, see `tf.nn.conv3d_transpose <https://www.tensorflow.org/versions/master/api_docs/python/nn.html#conv3d_transpose>`_.
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    act : activation function
-        The function that is applied to the layer activations.
-    shape : list of shape
-        shape of the filters, [depth, height, width, output_channels, in_channels], filter's in_channels dimension must match that of value.
-    output_shape : list of output shape
-        representing the output shape of the deconvolution op.
-    strides : a list of ints.
-        The stride of the sliding window for each dimension of the input tensor.
-    padding : a string from: "SAME", "VALID".
-        The type of padding algorithm to use.
-    W_init : weights initializer
-        The initializer for initializing the weight matrix.
-    b_init : biases initializer
-        The initializer for initializing the bias vector.
-    W_init_args : dictionary
-        The arguments for the weights initializer.
-    b_init_args : dictionary
-        The arguments for the biases initializer.
-    name : a string or None
-        An optional name to attach to this layer.
-    """
-    def __init__(
-        self,
-        layer = None,
-        act = tf.identity,
-        shape = [2, 2, 2, 128, 256],
-        output_shape = [1, 12, 32, 32, 128],
-        strides = [1, 2, 2, 2, 1],
-        padding = 'SAME',
-        W_init = tf.truncated_normal_initializer(stddev=0.02),
-        b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {},
-        b_init_args = {},
-        name ='decnn3d_layer',
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = layer.outputs
-        print("  [TL] DeConv3dLayer %s: shape:%s out_shape:%s strides:%s pad:%s act:%s" %
-                            (self.name, str(shape), str(output_shape), str(strides), padding, act.__name__))
-
-        with tf.variable_scope(name) as vs:
-            W = tf.get_variable(name='W_deconv3d', shape=shape, initializer=W_init, **W_init_args )
-            b = tf.get_variable(name='b_deconv3d', shape=(shape[-2]), initializer=b_init, **b_init_args )
-
-            self.outputs = act( tf.nn.conv3d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b )
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( [W, b] )
-
-class UpSampling2dLayer(Layer):
-    """The :class:`UpSampling2dLayer` class is upSampling 2d layer, see `tf.image.resize_images <https://www.tensorflow.org/versions/master/api_docs/python/image/resizing#resize_images>`_.
-
-    Parameters
-    -----------
-    layer : a layer class with 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels].
-    size : a tupe of int or float.
-        (height, width) scale factor or new size of height and width.
-    is_scale : boolean, if True (default), size is scale factor, otherwise, size is number of pixels of height and width.
-    method : 0, 1, 2, 3. ResizeMethod. Defaults to ResizeMethod.BILINEAR.
-        - ResizeMethod.BILINEAR, Bilinear interpolation.
-        - ResizeMethod.NEAREST_NEIGHBOR, Nearest neighbor interpolation.
-        - ResizeMethod.BICUBIC, Bicubic interpolation.
-        - ResizeMethod.AREA, Area interpolation.
-    align_corners : bool. If true, exactly align all 4 corners of the input and output. Defaults to false.
-    name : a string or None
-        An optional name to attach to this layer.
-    """
-    def __init__(
-        self,
-        layer = None,
-        size = [],
-        is_scale = True,
-        method = 0,
-        align_corners = False,
-        name ='upsample2d_layer',
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = layer.outputs
-        if len(self.inputs.get_shape()) == 3:
-            if is_scale:
-                size_h = size[0] * int(self.inputs.get_shape()[0])
-                size_w = size[1] * int(self.inputs.get_shape()[1])
-                size = [size_h, size_w]
-        elif len(self.inputs.get_shape()) == 4:
-            if is_scale:
-                size_h = size[0] * int(self.inputs.get_shape()[1])
-                size_w = size[1] * int(self.inputs.get_shape()[2])
-                size = [size_h, size_w]
-        else:
-            raise Exception("Donot support shape %s" % self.inputs.get_shape())
-        print("  [TL] UpSampling2dLayer %s: is_scale:%s size:%s method:%d align_corners:%s" %
-                                (name, is_scale, size, method, align_corners))
-        with tf.variable_scope(name) as vs:
-            try:
-                self.outputs = tf.image.resize_images(self.inputs, size=size, method=method, align_corners=align_corners)
-            except: # for TF 0.10
-                self.outputs = tf.image.resize_images(self.inputs, new_height=size[0], new_width=size[1], method=method, align_corners=align_corners)
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-
-class DownSampling2dLayer(Layer):
-    """The :class:`DownSampling2dLayer` class is downSampling 2d layer, see `tf.image.resize_images <https://www.tensorflow.org/versions/master/api_docs/python/image/resizing#resize_images>`_.
-
-    Parameters
-    -----------
-    layer : a layer class with 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels].
-    size : a tupe of int or float.
-        (height, width) scale factor or new size of height and width.
-    is_scale : boolean, if True (default), size is scale factor, otherwise, size is number of pixels of height and width.
-    method : 0, 1, 2, 3. ResizeMethod. Defaults to ResizeMethod.BILINEAR.
-        - ResizeMethod.BILINEAR, Bilinear interpolation.
-        - ResizeMethod.NEAREST_NEIGHBOR, Nearest neighbor interpolation.
-        - ResizeMethod.BICUBIC, Bicubic interpolation.
-        - ResizeMethod.AREA, Area interpolation.
-    align_corners : bool. If true, exactly align all 4 corners of the input and output. Defaults to false.
-    name : a string or None
-        An optional name to attach to this layer.
-    """
-    def __init__(
-        self,
-        layer = None,
-        size = [],
-        is_scale = True,
-        method = 0,
-        align_corners = False,
-        name ='downsample2d_layer',
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = layer.outputs
-        if len(self.inputs.get_shape()) == 3:
-            if is_scale:
-                size_h = size[0] * int(self.inputs.get_shape()[0])
-                size_w = size[1] * int(self.inputs.get_shape()[1])
-                size = [size_h, size_w]
-        elif len(self.inputs.get_shape()) == 4:
-            if is_scale:
-                size_h = size[0] * int(self.inputs.get_shape()[1])
-                size_w = size[1] * int(self.inputs.get_shape()[2])
-                size = [size_h, size_w]
-        else:
-            raise Exception("Donot support shape %s" % self.inputs.get_shape())
-        print("  [TL] DownSampling2dLayer %s: is_scale:%s size:%s method:%d, align_corners:%s" %
-                                (name, is_scale, size, method, align_corners))
-        with tf.variable_scope(name) as vs:
-            try:
-                self.outputs = tf.image.resize_images(self.inputs, size=size, method=method, align_corners=align_corners)
-            except: # for TF 0.10
-                self.outputs = tf.image.resize_images(self.inputs, new_height=size[0], new_width=size[1], method=method, align_corners=align_corners)
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-
-class AtrousConv2dLayer(Layer):
-    """The :class:`AtrousConv2dLayer` class is Atrous convolution (a.k.a. convolution with holes or dilated convolution) 2D layer, see `tf.nn.atrous_conv2d <https://www.tensorflow.org/versions/master/api_docs/python/nn.html#atrous_conv2d>`_.
-
-    Parameters
-    -----------
-    layer : a layer class with 4-D Tensor of shape [batch, height, width, channels].
-    filters : A 4-D Tensor with the same type as value and shape [filter_height, filter_width, in_channels, out_channels]. filters' in_channels dimension must match that of value. Atrous convolution is equivalent to standard convolution with upsampled filters with effective height filter_height + (filter_height - 1) * (rate - 1) and effective width filter_width + (filter_width - 1) * (rate - 1), produced by inserting rate - 1 zeros along consecutive elements across the filters' spatial dimensions.
-    n_filter : number of filter.
-    filter_size : tuple (height, width) for filter size.
-    rate : A positive int32. The stride with which we sample input values across the height and width dimensions. Equivalently, the rate by which we upsample the filter values by inserting zeros across the height and width dimensions. In the literature, the same parameter is sometimes called input stride or dilation.
-    act : activation function, None for linear.
-    padding : A string, either 'VALID' or 'SAME'. The padding algorithm.
-    W_init : weights initializer. The initializer for initializing the weight matrix.
-    b_init : biases initializer or None. The initializer for initializing the bias vector. If None, skip biases.
-    W_init_args : dictionary. The arguments for the weights tf.get_variable().
-    b_init_args : dictionary. The arguments for the biases tf.get_variable().
-    name : a string or None, an optional name to attach to this layer.
-    """
-    def __init__(
-        self,
-        layer = None,
-        n_filter = 32,
-        filter_size = (3,3),
-        rate = 2,
-        act = None,
-        padding = 'SAME',
-        W_init = tf.truncated_normal_initializer(stddev=0.02),
-        b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {},
-        b_init_args = {},
-        name = 'atrou2d'
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = layer.outputs
-        print("  [TL] AtrousConv2dLayer %s: n_filter:%d filter_size:%s rate:%d pad:%s act:%s" %
-                            (self.name, n_filter, filter_size, rate, padding, act.__name__))
-        if act is None:
-            act = tf.identity
-        with tf.variable_scope(name) as vs:
-            shape = [filter_size[0], filter_size[1], int(self.inputs.get_shape()[-1]), n_filter]
-            filters = tf.get_variable(name='filter', shape=shape, initializer=W_init, **W_init_args )
-            if b_init:
-                b = tf.get_variable(name='b', shape=(n_filter), initializer=b_init, **b_init_args )
-                self.outputs = act(tf.nn.atrous_conv2d(self.inputs, filters, rate, padding) + b)
-            else:
-                self.outputs = act(tf.nn.atrous_conv2d(self.inputs, filters, rate, padding))
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        if b_init:
-            self.all_params.extend( [filters, b] )
-        else:
-            self.all_params.extend( [filters] )
-
-class SeparableConv2dLayer(Layer):# Untested
-    """The :class:`SeparableConv2dLayer` class is 2-D convolution with separable filters, see `tf.layers.separable_conv2d <https://www.tensorflow.org/api_docs/python/tf/layers/separable_conv2d>`_.
-
-    Parameters
-    -----------
-    layer : a layer class
-    filters : integer, the dimensionality of the output space (i.e. the number output of filters in the convolution).
-    kernel_size : a tuple or list of N positive integers specifying the spatial dimensions of of the filters. Can be a single integer to specify the same value for all spatial dimensions.
-    strides : a tuple or list of N positive integers specifying the strides of the convolution. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any dilation_rate value != 1.
-    padding : one of "valid" or "same" (case-insensitive).
-    data_format : A string, one of channels_last (default) or channels_first. The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shapedata_format = 'NWHC' (batch, width, height, channels) while channels_first corresponds to inputs with shape (batch, channels, width, height).
-    dilation_rate : an integer or tuple/list of 2 integers, specifying the dilation rate to use for dilated convolution. Can be a single integer to specify the same value for all spatial dimensions. Currently, specifying any dilation_rate value != 1 is incompatible with specifying any stride value != 1.
-    depth_multiplier : The number of depthwise convolution output channels for each input channel. The total number of depthwise convolution output channels will be equal to num_filters_in * depth_multiplier.
-    act (activation) : Activation function. Set it to None to maintain a linear activation.
-    use_bias : Boolean, whether the layer uses a bias.
-    depthwise_initializer : An initializer for the depthwise convolution kernel.
-    pointwise_initializer : An initializer for the pointwise convolution kernel.
-    bias_initializer : An initializer for the bias vector. If None, no bias will be applied.
-    depthwise_regularizer : Optional regularizer for the depthwise convolution kernel.
-    pointwise_regularizer : Optional regularizer for the pointwise convolution kernel.
-    bias_regularizer : Optional regularizer for the bias vector.
-    activity_regularizer : Regularizer function for the output.
-    name : a string or None, an optional name to attach to this layer.
-    """
-    def __init__(
-        self,
-        layer = None,
-        filters = None,
-        kernel_size=5,
-        strides=(1, 1),
-        padding='valid',
-        data_format='channels_last',
-        dilation_rate=(1, 1),
-        depth_multiplier=1,
-        act=None,
-        use_bias=True,
-        depthwise_initializer=None,
-        pointwise_initializer=None,
-        bias_initializer=tf.zeros_initializer,
-        depthwise_regularizer=None,
-        pointwise_regularizer=None,
-        bias_regularizer=None,
-        activity_regularizer=None,
-        name = 'atrou2d'
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = layer.outputs
-        assert filters is not None
-        assert tf.__version__ > "0.12.1", "This layer only supports for TF 1.0+"
-        if act is None:
-            act = tf.identity
-
-        bias_initializer = bias_initializer()
-
-        print("  [TL] SeparableConv2dLayer %s: filters:%s kernel_size:%s strides:%s padding:%s dilation_rate:%s depth_multiplier:%s act:%s" %
-                            (self.name, str(filters), str(kernel_size), str(strides), padding, str(dilation_rate), str(depth_multiplier), act.__name__))
-
-        with tf.variable_scope(name) as vs:
-            self.outputs = tf.layers.separable_conv2d(self.inputs, filters, kernel_size,
-                 strides=strides, padding=padding, data_format=data_format,
-                 dilation_rate=dilation_rate, depth_multiplier=depth_multiplier, activation=act,
-                 use_bias=use_bias, depthwise_initializer=depthwise_initializer, pointwise_initializer=pointwise_initializer,
-                 bias_initializer=bias_initializer, depthwise_regularizer=depthwise_regularizer,
-                 pointwise_regularizer=pointwise_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer,)
-                 #trainable=True, name=None, reuse=None)
-
-            variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( variables )
-
-## Initializers for Convuolutional Layers
-def deconv2d_bilinear_upsampling_initializer(shape):
-    """Returns initializer that can be passed to DeConv2dLayer to initalize the
-    weights to correspond to channel wise bilinear upsampling.
-    Used in some segmantic segmentation approches such as [FCN](https://arxiv.org/abs/1605.06211)
-
-    Parameters
-    ----------
-        shape : list of shape
-            shape of the filters, [height, width, output_channels, in_channels], must match that passed to DeConv2dLayer
-
-    Returns
-    ----------
-        tf.constant_initializer
-            with weights set to correspond to per channel bilinear upsampling when passed as W_int in DeConv2dLayer
-
-    Examples
-    --------
-    >>> rescale_factor = 2 #upsampling by a factor of 2, ie e.g 100->200
-    >>> filter_size = (2 * rescale_factor - rescale_factor % 2) #Corresponding bilinear filter size
-    >>> num_in_channels = 3
-    >>> num_out_channels = 3
-    >>> deconv_filter_shape = [filter_size, filter_size, num_out_channels, num_in_channels]
-    >>> x = tf.placeholder(tf.float32, [1, imsize, imsize, num_channels])
-    >>> network = tl.layers.InputLayer(x, name='input_layer')
-    >>> bilinear_init = deconv2d_bilinear_upsampling_initializer(shape=filter_shape)
-    >>> network = tl.layers.DeConv2dLayer(network,
-                            shape = filter_shape,
-                            output_shape = [1, imsize*rescale_factor, imsize*rescale_factor, num_out_channels],
-                            strides=[1, rescale_factor, rescale_factor, 1],
-                            W_init=bilinear_init,
-                            padding='SAME',
-                            act=tf.identity, name='g/h1/decon2d')
-    """
-    if shape[0] != shape[1]:
-        raise Exception('deconv2d_bilinear_upsampling_initializer only supports symmetrical filter sizes')
-    if shape[3] < shape [2]:
-        raise Exception('deconv2d_bilinear_upsampling_initializer behaviour is not defined for num_in_channels < num_out_channels ')
-
-    filter_size = shape[0]
-    num_out_channels = shape[2]
-    num_in_channels = shape[3]
-
-    #Create bilinear filter kernel as numpy array
-    bilinear_kernel = np.zeros([filter_size, filter_size], dtype=np.float32)
-    scale_factor = (filter_size + 1) // 2
-    if filter_size % 2 == 1:
-        center = scale_factor - 1
-    else:
-        center = scale_factor - 0.5
-    for x in range(filter_size):
-        for y in range(filter_size):
-            bilinear_kernel[x,y] = (1 - abs(x - center) / scale_factor) * \
-                                   (1 - abs(y - center) / scale_factor)
-    weights = np.zeros((filter_size, filter_size, num_out_channels, num_in_channels))
-    for i in range(num_out_channels):
-        weights[:, :, i, i] = bilinear_kernel
-
-    #assign numpy array to constant_initalizer and pass to get_variable
-    bilinear_weights_init = tf.constant_initializer(value=weights, dtype=tf.float32)
-    return bilinear_weights_init
-
-## Convolutional layer (Simplified)
-def Conv1d(net, n_filter=32, filter_size=5, stride=1, act=None,
-        padding='SAME', use_cudnn_on_gpu=None,data_format=None,
-        W_init = tf.truncated_normal_initializer(stddev=0.02),
-        b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {}, b_init_args = {}, name ='conv1d',):
-    """Wrapper for :class:`Conv1dLayer`, if you don't understand how to use :class:`Conv1dLayer`, this function may be easier.
-
-    Parameters
-    ----------
-    net : TensorLayer layer.
-    n_filter : number of filter.
-    filter_size : an int.
-    stride : an int.
-    act : None or activation function.
-    others : see :class:`Conv1dLayer`.
-    """
-    if act is None:
-        act = tf.identity
-    net = Conv1dLayer(layer = net,
-            act = act,
-            shape = [filter_size, int(net.outputs.get_shape()[-1]), n_filter],
-            stride = stride,
-            padding = padding,
-            use_cudnn_on_gpu = use_cudnn_on_gpu,
-            data_format = data_format,
-            W_init = W_init,
-            b_init = b_init,
-            W_init_args = W_init_args,
-            b_init_args = b_init_args,
-            name = name,
-        )
-    return net
-
-def Conv2d(net, n_filter=32, filter_size=(3, 3), strides=(1, 1), act = None,
-        padding='SAME', W_init = tf.truncated_normal_initializer(stddev=0.02), b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {}, b_init_args = {}, name ='conv2d',):
-    """Wrapper for :class:`Conv2dLayer`, if you don't understand how to use :class:`Conv2dLayer`, this function may be easier.
-
-    Parameters
-    ----------
-    net : TensorLayer layer.
-    n_filter : number of filter.
-    filter_size : tuple (height, width) for filter size.
-    strides : tuple (height, width) for strides.
-    act : None or activation function.
-    others : see :class:`Conv2dLayer`.
-
-    Examples
-    --------
-    >>> w_init = tf.truncated_normal_initializer(stddev=0.01)
-    >>> b_init = tf.constant_initializer(value=0.0)
-    >>> inputs = InputLayer(x, name='inputs')
-    >>> conv1 = Conv2d(inputs, 64, (3, 3), act=tf.nn.relu, padding='SAME', W_init=w_init, b_init=b_init, name='conv1_1')
-    >>> conv1 = Conv2d(conv1, 64, (3, 3), act=tf.nn.relu, padding='SAME', W_init=w_init, b_init=b_init, name='conv1_2')
-    >>> pool1 = MaxPool2d(conv1, (2, 2), padding='SAME', name='pool1')
-    >>> conv2 = Conv2d(pool1, 128, (3, 3), act=tf.nn.relu, padding='SAME', W_init=w_init, b_init=b_init, name='conv2_1')
-    >>> conv2 = Conv2d(conv2, 128, (3, 3), act=tf.nn.relu, padding='SAME', W_init=w_init, b_init=b_init, name='conv2_2')
-    >>> pool2 = MaxPool2d(conv2, (2, 2), padding='SAME', name='pool2')
-    """
-    assert len(strides) == 2, "len(strides) should be 2, Conv2d and Conv2dLayer are different."
-    if act is None:
-        act = tf.identity
-    net = Conv2dLayer(net,
-                       act = act,
-                       shape = [filter_size[0], filter_size[1], int(net.outputs.get_shape()[-1]), n_filter],  # 32 features for each 5x5 patch
-                       strides = [1, strides[0], strides[1], 1],
-                       padding = padding,
-                       W_init = W_init,
-                       W_init_args = W_init_args,
-                       b_init = b_init,
-                       b_init_args = b_init_args,
-                       name = name)
-    return net
-
-def DeConv2d(net, n_out_channel = 32, filter_size=(3, 3),
-        out_size = (30, 30), strides = (2, 2), padding = 'SAME', batch_size = None, act = None,
-        W_init = tf.truncated_normal_initializer(stddev=0.02), b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {}, b_init_args = {}, name ='decnn2d'):
-    """Wrapper for :class:`DeConv2dLayer`, if you don't understand how to use :class:`DeConv2dLayer`, this function may be easier.
-
-    Parameters
-    ----------
-    net : TensorLayer layer.
-    n_out_channel : int, number of output channel.
-    filter_size : tuple of (height, width) for filter size.
-    out_size :  tuple of (height, width) of output.
-    batch_size : int or None, batch_size. If None, try to find the batch_size from the first dim of net.outputs (you should tell the batch_size when define the input placeholder).
-    strides : tuple of (height, width) for strides.
-    act : None or activation function.
-    others : see :class:`DeConv2dLayer`.
-    """
-    assert len(strides) == 2, "len(strides) should be 2, DeConv2d and DeConv2dLayer are different."
-    if act is None:
-        act = tf.identity
-    if batch_size is None:
-        batch_size = tf.shape(net.outputs)[0]
-    net = DeConv2dLayer(layer = net,
-                    act = act,
-                    shape = [filter_size[0], filter_size[1], n_out_channel, int(net.outputs.get_shape()[-1])],
-                    output_shape = [batch_size, int(out_size[0]), int(out_size[1]), n_out_channel],
-                    strides = [1, strides[0], strides[1], 1],
-                    padding = padding,
-                    W_init = W_init,
-                    b_init = b_init,
-                    W_init_args = W_init_args,
-                    b_init_args = b_init_args,
-                    name = name)
-    return net
-
-def MaxPool1d(net, filter_size, strides, padding='valid', data_format='channels_last', name=None): #Untested
-    """Wrapper for `tf.layers.max_pooling1d <https://www.tensorflow.org/api_docs/python/tf/layers/max_pooling1d>`_ .
-
-    Parameters
-    ------------
-    net : TensorLayer layer, the tensor over which to pool. Must have rank 3.
-    filter_size (pool_size) : An integer or tuple/list of a single integer, representing the size of the pooling window.
-    strides : An integer or tuple/list of a single integer, specifying the strides of the pooling operation.
-    padding : A string. The padding method, either 'valid' or 'same'. Case-insensitive.
-    data_format : A string, one of channels_last (default) or channels_first. The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch, length, channels) while channels_first corresponds to inputs with shape (batch, channels, length).
-    name : A string, the name of the layer.
-
-    Returns
-    --------
-    - A :class:`Layer` which the output tensor, of rank 3.
-    """
-    print("  [TL] MaxPool1d %s: filter_size:%s strides:%s padding:%s" %
-                        (name, str(filter_size), str(strides), str(padding)))
-    outputs = tf.layers.max_pooling1d(net.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name)
-
-    net_new = copy.copy(net)
-    net_new.outputs = outputs
-    net_new.all_layers.extend( [outputs] )
-    return net_new
-
-def MeanPool1d(net, filter_size, strides, padding='valid', data_format='channels_last', name=None): #Untested
-    """Wrapper for `tf.layers.average_pooling1d <https://www.tensorflow.org/api_docs/python/tf/layers/average_pooling1d>`_ .
-
-    Parameters
-    ------------
-    net : TensorLayer layer, the tensor over which to pool. Must have rank 3.
-    filter_size (pool_size) : An integer or tuple/list of a single integer, representing the size of the pooling window.
-    strides : An integer or tuple/list of a single integer, specifying the strides of the pooling operation.
-    padding : A string. The padding method, either 'valid' or 'same'. Case-insensitive.
-    data_format : A string, one of channels_last (default) or channels_first. The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch, length, channels) while channels_first corresponds to inputs with shape (batch, channels, length).
-    name : A string, the name of the layer.
-
-    Returns
-    --------
-    - A :class:`Layer` which the output tensor, of rank 3.
-    """
-    print("  [TL] MeanPool1d %s: filter_size:%s strides:%s padding:%s" %
-                        (name, str(filter_size), str(strides), str(padding)))
-    outputs = tf.layers.average_pooling1d(net.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name)
-
-    net_new = copy.copy(net)
-    net_new.outputs = outputs
-    net_new.all_layers.extend( [outputs] )
-    return net_new
-
-def MaxPool2d(net, filter_size=(2, 2), strides=None, padding='SAME', name='maxpool'):
-    """Wrapper for :class:`PoolLayer`.
-
-    Parameters
-    -----------
-    net : TensorLayer layer.
-    filter_size : tuple of (height, width) for filter size.
-    strides : tuple of (height, width). Default is the same with filter_size.
-    others : see :class:`PoolLayer`.
-    """
-    if strides is None:
-        strides = filter_size
-    assert len(strides) == 2, "len(strides) should be 2, MaxPool2d and PoolLayer are different."
-    net = PoolLayer(net, ksize=[1, filter_size[0], filter_size[1], 1],
-            strides=[1, strides[0], strides[1], 1],
-            padding=padding,
-            pool = tf.nn.max_pool,
-            name = name)
-    return net
-
-def MeanPool2d(net, filter_size=(2, 2), strides=None, padding='SAME', name='meanpool'):
-    """Wrapper for :class:`PoolLayer`.
-
-    Parameters
-    -----------
-    net : TensorLayer layer.
-    filter_size : tuple of (height, width) for filter size.
-    strides : tuple of (height, width). Default is the same with filter_size.
-    others : see :class:`PoolLayer`.
-    """
-    if strides is None:
-        strides = filter_size
-    assert len(strides) == 2, "len(strides) should be 2, MeanPool2d and PoolLayer are different."
-    net = PoolLayer(net, ksize=[1, filter_size[0], filter_size[1], 1],
-            strides=[1, strides[0], strides[1], 1],
-            padding=padding,
-            pool = tf.nn.avg_pool,
-            name = name)
-    return net
-
-def MaxPool3d(net, filter_size, strides, padding='valid', data_format='channels_last', name=None): #Untested
-    """Wrapper for `tf.layers.max_pooling3d <https://www.tensorflow.org/api_docs/python/tf/layers/max_pooling3d>`_ .
-
-    Parameters
-    ------------
-    net : TensorLayer layer, the tensor over which to pool. Must have rank 5.
-    filter_size (pool_size) : An integer or tuple/list of 3 integers: (pool_depth, pool_height, pool_width) specifying the size of the pooling window. Can be a single integer to specify the same value for all spatial dimensions.
-    strides : An integer or tuple/list of 3 integers, specifying the strides of the pooling operation. Can be a single integer to specify the same value for all spatial dimensions.
-    padding : A string. The padding method, either 'valid' or 'same'. Case-insensitive.
-    data_format : A string. The ordering of the dimensions in the inputs. channels_last (default) and channels_first are supported. channels_last corresponds to inputs with shape (batch, depth, height, width, channels) while channels_first corresponds to inputs with shape (batch, channels, depth, height, width).
-    name : A string, the name of the layer.
-    """
-    print("  [TL] MaxPool3d %s: filter_size:%s strides:%s padding:%s" %
-                        (name, str(filter_size), str(strides), str(padding)))
-    outputs = tf.layers.max_pooling3d(net.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name)
-
-    net_new = copy.copy(net)
-    net_new.outputs = outputs
-    net_new.all_layers.extend( [outputs] )
-    return net_new
-
-def MeanPool3d(net, filter_size, strides, padding='valid', data_format='channels_last', name=None): #Untested
-    """Wrapper for `tf.layers.average_pooling3d <https://www.tensorflow.org/api_docs/python/tf/layers/average_pooling3d>`_
-
-    Parameters
-    ------------
-    net : TensorLayer layer, the tensor over which to pool. Must have rank 5.
-    filter_size (pool_size) : An integer or tuple/list of 3 integers: (pool_depth, pool_height, pool_width) specifying the size of the pooling window. Can be a single integer to specify the same value for all spatial dimensions.
-    strides : An integer or tuple/list of 3 integers, specifying the strides of the pooling operation. Can be a single integer to specify the same value for all spatial dimensions.
-    padding : A string. The padding method, either 'valid' or 'same'. Case-insensitive.
-    data_format : A string. The ordering of the dimensions in the inputs. channels_last (default) and channels_first are supported. channels_last corresponds to inputs with shape (batch, depth, height, width, channels) while channels_first corresponds to inputs with shape (batch, channels, depth, height, width).
-    name : A string, the name of the layer.
-    """
-    print("  [TL] MeanPool3d %s: filter_size:%s strides:%s padding:%s name:%s" %
-                        (name, str(filter_size), str(strides), str(padding)))
-    outputs = tf.layers.average_pooling3d(net.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name)
-
-    net_new = copy.copy(net)
-    net_new.outputs = outputs
-    net_new.all_layers.extend( [outputs] )
-    return net_new
-
-## Super resolution
-def SubpixelConv2d(net, scale=2, n_out_channel=None, act=tf.identity, name='subpixel_conv2d'):
-    """The :class:`SubpixelConv2d` class is a sub-pixel 2d convolutional ayer, usually be used
-    for super-resolution application.
-
-    Parameters
-    ------------
-    net : TensorLayer layer.
-    scale : int, upscaling ratio, a wrong setting will lead to Dimension size error.
-    n_out_channel : int or None, the number of output channels.
-        Note that, the number of input channels == (scale x scale) x The number of output channels.
-        If None, automatically set n_out_channel == the number of input channels / (scale x scale).
-    act : activation function.
-    name : string.
-        An optional name to attach to this layer.
-
-    Examples
-    ---------
-    >>> # examples here just want to tell you how to set the n_out_channel.
-    >>> x = np.random.rand(2, 16, 16, 4)
-    >>> X = tf.placeholder("float32", shape=(2, 16, 16, 4), name="X")
-    >>> net = InputLayer(X, name='input')
-    >>> net = SubpixelConv2d(net, scale=2, n_out_channel=1, name='subpixel_conv2d')
-    >>> y = sess.run(net.outputs, feed_dict={X: x})
-    >>> print(x.shape, y.shape)
-    ... (2, 16, 16, 4) (2, 32, 32, 1)
-    >>>
-    >>> x = np.random.rand(2, 16, 16, 4*10)
-    >>> X = tf.placeholder("float32", shape=(2, 16, 16, 4*10), name="X")
-    >>> net = InputLayer(X, name='input2')
-    >>> net = SubpixelConv2d(net, scale=2, n_out_channel=10, name='subpixel_conv2d2')
-    >>> y = sess.run(net.outputs, feed_dict={X: x})
-    >>> print(x.shape, y.shape)
-    ... (2, 16, 16, 40) (2, 32, 32, 10)
-    >>>
-    >>> x = np.random.rand(2, 16, 16, 25*10)
-    >>> X = tf.placeholder("float32", shape=(2, 16, 16, 25*10), name="X")
-    >>> net = InputLayer(X, name='input3')
-    >>> net = SubpixelConv2d(net, scale=5, n_out_channel=None, name='subpixel_conv2d3')
-    >>> y = sess.run(net.outputs, feed_dict={X: x})
-    >>> print(x.shape, y.shape)
-    ... (2, 16, 16, 250) (2, 80, 80, 10)
-
-    References
-    ------------
-    - `Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional Neural Network <https://arxiv.org/pdf/1609.05158.pdf>`_
-    """
-    # github/Tetrachrome/subpixel  https://github.com/Tetrachrome/subpixel/blob/master/subpixel.py
-
-    _err_log = "SubpixelConv2d: The number of input channels == (scale x scale) x The number of output channels"
-
-    scope_name = tf.get_variable_scope().name
-    if scope_name:
-        name = scope_name + '/' + name
-
-    def _phase_shift(I, r):
-        if tf.__version__ < '1.0':
-            raise Exception("Only support TF1.0+")
-        bsize, a, b, c = I.get_shape().as_list()
-        bsize = tf.shape(I)[0] # Handling Dimension(None) type for undefined batch dim
-        X = tf.reshape(I, (bsize, a, b, r, r))
-        X = tf.transpose(X, (0, 1, 2, 4, 3))  # bsize, a, b, 1, 1 # tf 0.12
-        # X = tf.split(1, a, X)  # a, [bsize, b, r, r] # tf 0.12
-        X = tf.split(X, a, 1)
-        # X = tf.concat(2, [tf.squeeze(x, axis=1) for x in X])  # bsize, b, a*r, r # tf 0.12
-        X = tf.concat([tf.squeeze(x, axis=1) for x in X], 2)
-        # X = tf.split(1, b, X)  # b, [bsize, a*r, r] # tf 0.12
-        X = tf.split(X, b, 1)
-        # X = tf.concat(2, [tf.squeeze(x, axis=1) for x in X])  # bsize, a*r, b*r # tf 0.12
-        X = tf.concat([tf.squeeze(x, axis=1) for x in X], 2)
-        return tf.reshape(X, (bsize, a*r, b*r, 1))
-
-    def _PS(X, r, n_out_channel):
-        if n_out_channel > 1:
-            assert int(X.get_shape()[-1]) == (r ** 2) * n_out_channel, _err_log
-            Xc = tf.split(X, n_out_channel, 3)
-            X = tf.concat([_phase_shift(x, r) for x in Xc], 3)
-        elif n_out_channel == 1:
-            assert int(X.get_shape()[-1]) == (r ** 2), _err_log
-            X = _phase_shift(X, r)
-        else:
-            print(_err_log)
-        return X
-
-    inputs = net.outputs
-
-    if n_out_channel is None:
-        assert int(inputs.get_shape()[-1])/ (scale ** 2) % 1 == 0, _err_log
-        n_out_channel = int(int(inputs.get_shape()[-1])/ (scale ** 2))
-
-    print("  [TL] SubpixelConv2d  %s: scale: %d n_out_channel: %s act: %s" % (name, scale, n_out_channel, act.__name__))
-
-    net_new = Layer(inputs, name=name)
-    # with tf.name_scope(name):
-    with tf.variable_scope(name) as vs:
-        net_new.outputs = act(_PS(inputs, r=scale, n_out_channel=n_out_channel))
-
-    net_new.all_layers = list(net.all_layers)
-    net_new.all_params = list(net.all_params)
-    net_new.all_drop = dict(net.all_drop)
-    net_new.all_layers.extend( [net_new.outputs] )
-    return net_new
-
-
-# ## Normalization layer
-class LocalResponseNormLayer(Layer):
-    """The :class:`LocalResponseNormLayer` class is for Local Response Normalization, see ``tf.nn.local_response_normalization``.
-    The 4-D input tensor is treated as a 3-D array of 1-D vectors (along the last dimension), and each vector is normalized independently.
-    Within a given vector, each component is divided by the weighted, squared sum of inputs within depth_radius.
-
-    Parameters
-    -----------
-    layer : a layer class. Must be one of the following types: float32, half. 4-D.
-    depth_radius : An optional int. Defaults to 5. 0-D. Half-width of the 1-D normalization window.
-    bias : An optional float. Defaults to 1. An offset (usually positive to avoid dividing by 0).
-    alpha : An optional float. Defaults to 1. A scale factor, usually positive.
-    beta : An optional float. Defaults to 0.5. An exponent.
-    name : A string or None, an optional name to attach to this layer.
-    """
-    def __init__(
-        self,
-        layer = None,
-        depth_radius = None,
-        bias = None,
-        alpha = None,
-        beta = None,
-        name ='lrn_layer',
-    ):
-        self.inputs = layer.outputs
-        print("  [TL] LocalResponseNormLayer %s: depth_radius: %d, bias: %f, alpha: %f, beta: %f" %
-                            (self.name, depth_radius, bias, alpha, beta))
-        with tf.variable_scope(name) as vs:
-            self.outputs = tf.nn.local_response_normalization(self.inputs, depth_radius=depth_radius, bias=bias, alpha=alpha, beta=beta)
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-
-class BatchNormLayer(Layer):
-    """
-    The :class:`BatchNormLayer` class is a normalization layer, see ``tf.nn.batch_normalization`` and ``tf.nn.moments``.
-
-    Batch normalization on fully-connected or convolutional maps.
-
-    Parameters
-    -----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    decay : float, default is 0.9.
-        A decay factor for ExponentialMovingAverage, use larger value for large dataset.
-    epsilon : float
-        A small float number to avoid dividing by 0.
-    act : activation function.
-    is_train : boolean
-        Whether train or inference.
-    beta_init : beta initializer
-        The initializer for initializing beta
-    gamma_init : gamma initializer
-        The initializer for initializing gamma
-    name : a string or None
-        An optional name to attach to this layer.
-
-    References
-    ----------
-    - `Source <https://github.com/ry/tensorflow-resnet/blob/master/resnet.py>`_
-    - `stackoverflow <http://stackoverflow.com/questions/38312668/how-does-one-do-inference-with-batch-normalization-with-tensor-flow>`_
-    """
-    def __init__(
-        self,
-        layer = None,
-        decay = 0.9,
-        epsilon = 0.00001,
-        act = tf.identity,
-        is_train = False,
-        beta_init = tf.zeros_initializer,
-        gamma_init = tf.random_normal_initializer(mean=1.0, stddev=0.002), # tf.ones_initializer,
-        name ='batchnorm_layer',
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = layer.outputs
-        print("  [TL] BatchNormLayer %s: decay:%f epsilon:%f act:%s is_train:%s" %
-                            (self.name, decay, epsilon, act.__name__, is_train))
-        x_shape = self.inputs.get_shape()
-        params_shape = x_shape[-1:]
-
-        from tensorflow.python.training import moving_averages
-        from tensorflow.python.ops import control_flow_ops
-
-        with tf.variable_scope(name) as vs:
-            axis = list(range(len(x_shape) - 1))
-
-            ## 1. beta, gamma
-            if tf.__version__ > '0.12.1' and beta_init == tf.zeros_initializer:
-                beta_init = beta_init()
-            beta = tf.get_variable('beta', shape=params_shape,
-                               initializer=beta_init,
-                               trainable=is_train)#, restore=restore)
-
-            gamma = tf.get_variable('gamma', shape=params_shape,
-                                initializer=gamma_init, trainable=is_train,
-                                )#restore=restore)
-
-            ## 2.
-            if tf.__version__ > '0.12.1':
-                moving_mean_init = tf.zeros_initializer()
-            else:
-                moving_mean_init = tf.zeros_initializer
-            moving_mean = tf.get_variable('moving_mean',
-                                      params_shape,
-                                      initializer=moving_mean_init,
-                                      trainable=False,)#   restore=restore)
-            moving_variance = tf.get_variable('moving_variance',
-                                          params_shape,
-                                          initializer=tf.constant_initializer(1.),
-                                          trainable=False,)#   restore=restore)
-
-            ## 3.
-            # These ops will only be preformed when training.
-            mean, variance = tf.nn.moments(self.inputs, axis)
-            try:    # TF12
-                update_moving_mean = moving_averages.assign_moving_average(
-                                moving_mean, mean, decay, zero_debias=False)     # if zero_debias=True, has bias
-                update_moving_variance = moving_averages.assign_moving_average(
-                                moving_variance, variance, decay, zero_debias=False) # if zero_debias=True, has bias
-                # print("TF12 moving")
-            except Exception as e:  # TF11
-                update_moving_mean = moving_averages.assign_moving_average(
-                                moving_mean, mean, decay)
-                update_moving_variance = moving_averages.assign_moving_average(
-                                moving_variance, variance, decay)
-                # print("TF11 moving")
-
-            def mean_var_with_update():
-                with tf.control_dependencies([update_moving_mean, update_moving_variance]):
-                    return tf.identity(mean), tf.identity(variance)
-
-            if is_train:
-                mean, var = mean_var_with_update()
-                self.outputs = act( tf.nn.batch_normalization(self.inputs, mean, var, beta, gamma, epsilon) )
-            else:
-                self.outputs = act( tf.nn.batch_normalization(self.inputs, moving_mean, moving_variance, beta, gamma, epsilon) )
-
-            variables = [beta, gamma, moving_mean, moving_variance]
-
-            # print(len(variables))
-            # for idx, v in enumerate(variables):
-            #     print("  var {:3}: {:15}   {}".format(idx, str(v.get_shape()), v))
-            # exit()
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( variables )
-
-# class BatchNormLayer_TF(Layer):   # Work well TF contrib https://github.com/tensorflow/tensorflow/blob/b826b79718e3e93148c3545e7aa3f90891744cc0/tensorflow/contrib/layers/python/layers/layers.py#L100
-#     """
-#     The :class:`BatchNormLayer` class is a normalization layer, see ``tf.nn.batch_normalization`` and ``tf.nn.moments``.
-#
-#     Batch normalization on fully-connected or convolutional maps.
-#
-#     Parameters
-#     -----------
-#     layer : a :class:`Layer` instance
-#         The `Layer` class feeding into this layer.
-#     decay : float
-#         A decay factor for ExponentialMovingAverage.
-#     center: If True, subtract `beta`. If False, `beta` is ignored.
-#     scale: If True, multiply by `gamma`. If False, `gamma` is
-#         not used. When the next layer is linear (also e.g. `nn.relu`), this can be
-#         disabled since the scaling can be done by the next layer.
-#     epsilon : float
-#         A small float number to avoid dividing by 0.
-#     act : activation function.
-#     is_train : boolean
-#         Whether train or inference.
-#     beta_init : beta initializer
-#         The initializer for initializing beta
-#     gamma_init : gamma initializer
-#         The initializer for initializing gamma
-#     name : a string or None
-#         An optional name to attach to this layer.
-#
-#     References
-#     ----------
-#     - `Source <https://github.com/ry/tensorflow-resnet/blob/master/resnet.py>`_
-#     - `stackoverflow <http://stackoverflow.com/questions/38312668/how-does-one-do-inference-with-batch-normalization-with-tensor-flow>`_
-#     """
-#     def __init__(
-#         self,
-#         layer = None,
-#         decay = 0.95,#.999,
-#         center = True,
-#         scale = True,
-#         epsilon = 0.00001,
-#         act = tf.identity,
-#         is_train = False,
-#         beta_init = tf.zeros_initializer,
-#         # gamma_init = tf.ones_initializer,
-#         gamma_init = tf.random_normal_initializer(mean=1.0, stddev=0.002),
-#         name ='batchnorm_layer',
-#     ):
-#         Layer.__init__(self, name=name)
-#         self.inputs = layer.outputs
-#         print("  [TL] BatchNormLayer %s: decay: %f, epsilon: %f, act: %s, is_train: %s" %
-#                             (self.name, decay, epsilon, act.__name__, is_train))
-#         from tensorflow.contrib.layers.python.layers import utils
-#         from tensorflow.contrib.framework.python.ops import variables
-#         from tensorflow.python.ops import init_ops
-#         from tensorflow.python.ops import nn
-#         from tensorflow.python.training import moving_averages
-#         from tensorflow.python.framework import ops
-#         from tensorflow.python.ops import variable_scope
-#         variables_collections = None
-#         outputs_collections=None
-#         updates_collections=None#ops.GraphKeys.UPDATE_OPS
-#         # with variable_scope.variable_op_scope([inputs],
-#         #                                     scope, 'BatchNorm', reuse=reuse) as sc:
-#         # with variable_scope.variable_op_scope([self.inputs], None, name) as vs:
-#         with tf.variable_scope(name) as vs:
-#             inputs_shape = self.inputs.get_shape()
-#             dtype = self.inputs.dtype.base_dtype
-#             axis = list(range(len(inputs_shape) - 1)) # [0, 1, 2]
-#             params_shape = inputs_shape[-1:]
-#             # Allocate parameters for the beta and gamma of the normalization.
-#             beta, gamma = None, None
-#             if center:
-#               beta_collections = utils.get_variable_collections(variables_collections,
-#                                                                 'beta')
-#               beta = variables.model_variable('beta',
-#                                               shape=params_shape,
-#                                               dtype=dtype,
-#                                             #   initializer=init_ops.zeros_initializer,
-#                                               initializer=beta_init,
-#                                               collections=beta_collections,)
-#                                             #   trainable=trainable)
-#             if scale:
-#               gamma_collections = utils.get_variable_collections(variables_collections,
-#                                                                  'gamma')
-#               gamma = variables.model_variable('gamma',
-#                                                shape=params_shape,
-#                                                dtype=dtype,
-#                                             #    initializer=init_ops.ones_initializer,
-#                                                initializer=gamma_init,
-#                                                collections=gamma_collections,)
-#                                             #    trainable=trainable)
-#             # Create moving_mean and moving_variance variables and add them to the
-#             # appropiate collections.
-#             moving_mean_collections = utils.get_variable_collections(
-#                 variables_collections,
-#                 'moving_mean')
-#             moving_mean = variables.model_variable(
-#                 'moving_mean',
-#                 shape=params_shape,
-#                 dtype=dtype,
-#                 # initializer=init_ops.zeros_initializer,
-#                 initializer=tf.zeros_initializer,
-#                 trainable=False,
-#                 collections=moving_mean_collections)
-#             moving_variance_collections = utils.get_variable_collections(
-#                 variables_collections,
-#                 'moving_variance')
-#             moving_variance = variables.model_variable(
-#                 'moving_variance',
-#                 shape=params_shape,
-#                 dtype=dtype,
-#                 # initializer=init_ops.ones_initializer,
-#                 initializer=tf.constant_initializer(1.),
-#                 trainable=False,
-#                 collections=moving_variance_collections)
-#             if is_train:
-#               # Calculate the moments based on the individual batch.
-#               mean, variance = nn.moments(self.inputs, axis, shift=moving_mean)
-#               # Update the moving_mean and moving_variance moments.
-#             #   update_moving_mean = moving_averages.assign_moving_average(
-#             #       moving_mean, mean, decay)
-#             #   update_moving_variance = moving_averages.assign_moving_average(
-#             #       moving_variance, variance, decay)
-#             #   if updates_collections is None:
-#             #     # Make sure the updates are computed here.
-#             #       with ops.control_dependencies([update_moving_mean,
-#             #                                        update_moving_variance]):
-#             #          outputs = nn.batch_normalization(
-#             #               self.inputs, mean, variance, beta, gamma, epsilon)
-#
-#               update_moving_mean = tf.assign(moving_mean,
-#                                    moving_mean * decay + mean * (1 - decay))
-#               update_moving_variance = tf.assign(moving_variance,
-#                                   moving_variance * decay + variance * (1 - decay))
-#               with tf.control_dependencies([update_moving_mean, update_moving_variance]):
-#                   outputs = nn.batch_normalization(
-#                               self.inputs, mean, variance, beta, gamma, epsilon)
-#             #   else:
-#             #     # Collect the updates to be computed later.
-#             #     ops.add_to_collections(updates_collections, update_moving_mean)
-#             #     ops.add_to_collections(updates_collections, update_moving_variance)
-#             #     outputs = nn.batch_normalization(
-#             #         self.inputs, mean, variance, beta, gamma, epsilon)
-#             else:
-#             #   mean, variance = nn.moments(self.inputs, axis, shift=moving_mean)
-#               outputs = nn.batch_normalization(
-#                   self.inputs, moving_mean, moving_variance, beta, gamma, epsilon)
-#                 # self.inputs, mean, variance, beta, gamma, epsilon)
-#             outputs.set_shape(self.inputs.get_shape())
-#             # if activation_fn:
-#             self.outputs = act(outputs)
-#
-#             # variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
-#             # return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
-#             variables = [beta, gamma, moving_mean, moving_variance]
-#
-#         mean, variance = nn.moments(self.inputs, axis, shift=moving_mean)
-#         self.check_mean = mean
-#         self.check_variance = variance
-#
-#         self.all_layers = list(layer.all_layers)
-#         self.all_params = list(layer.all_params)
-#         self.all_drop = dict(layer.all_drop)
-#         self.all_layers.extend( [self.outputs] )
-#         self.all_params.extend( variables )
-#
-# class BatchNormLayer5(Layer):   # Akara Work well
-#     """
-#     The :class:`BatchNormLayer` class is a normalization layer, see ``tf.nn.batch_normalization`` and ``tf.nn.moments``.
-#
-#     Batch normalization on fully-connected or convolutional maps.
-#
-#     Parameters
-#     -----------
-#     layer : a :class:`Layer` instance
-#         The `Layer` class feeding into this layer.
-#     decay : float
-#         A decay factor for ExponentialMovingAverage.
-#     epsilon : float
-#         A small float number to avoid dividing by 0.
-#     act : activation function.
-#     is_train : boolean
-#         Whether train or inference.
-#     beta_init : beta initializer
-#         The initializer for initializing beta
-#     gamma_init : gamma initializer
-#         The initializer for initializing gamma
-#     name : a string or None
-#         An optional name to attach to this layer.
-#
-#     References
-#     ----------
-#     - `Source <https://github.com/ry/tensorflow-resnet/blob/master/resnet.py>`_
-#     - `stackoverflow <http://stackoverflow.com/questions/38312668/how-does-one-do-inference-with-batch-normalization-with-tensor-flow>`_
-#     """
-#     def __init__(
-#         self,
-#         layer = None,
-#         decay = 0.9,
-#         epsilon = 0.00001,
-#         act = tf.identity,
-#         is_train = False,
-#         beta_init = tf.zeros_initializer,
-#         # gamma_init = tf.ones_initializer,
-#         gamma_init = tf.random_normal_initializer(mean=1.0, stddev=0.002),
-#         name ='batchnorm_layer',
-#     ):
-#         Layer.__init__(self, name=name)
-#         self.inputs = layer.outputs
-#         print("  [TL] BatchNormLayer %s: decay: %f, epsilon: %f, act: %s, is_train: %s" %
-#                             (self.name, decay, epsilon, act.__name__, is_train))
-#         x_shape = self.inputs.get_shape()
-#         params_shape = x_shape[-1:]
-#
-#         from tensorflow.python.training import moving_averages
-#         from tensorflow.python.ops import control_flow_ops
-#
-#         with tf.variable_scope(name) as vs:
-#             axis = list(range(len(x_shape) - 1))
-#
-#             ## 1. beta, gamma
-#             beta = tf.get_variable('beta', shape=params_shape,
-#                                initializer=beta_init,
-#                                trainable=is_train)#, restore=restore)
-#
-#             gamma = tf.get_variable('gamma', shape=params_shape,
-#                                 initializer=gamma_init, trainable=is_train,
-#                                 )#restore=restore)
-#
-#             ## 2. moving variables during training (not update by gradient!)
-#             moving_mean = tf.get_variable('moving_mean',
-#                                       params_shape,
-#                                       initializer=tf.zeros_initializer,
-#                                       trainable=False,)#   restore=restore)
-#             moving_variance = tf.get_variable('moving_variance',
-#                                           params_shape,
-#                                           initializer=tf.constant_initializer(1.),
-#                                           trainable=False,)#   restore=restore)
-#
-#             batch_mean, batch_var = tf.nn.moments(self.inputs, axis)
-#             ## 3.
-#             # These ops will only be preformed when training.
-#             def mean_var_with_update():
-#                 try:    # TF12
-#                     update_moving_mean = moving_averages.assign_moving_average(
-#                                     moving_mean, batch_mean, decay, zero_debias=False)     # if zero_debias=True, has bias
-#                     update_moving_variance = moving_averages.assign_moving_average(
-#                                     moving_variance, batch_var, decay, zero_debias=False) # if zero_debias=True, has bias
-#                     # print("TF12 moving")
-#                 except Exception as e:  # TF11
-#                     update_moving_mean = moving_averages.assign_moving_average(
-#                                     moving_mean, batch_mean, decay)
-#                     update_moving_variance = moving_averages.assign_moving_average(
-#                                     moving_variance, batch_var, decay)
-#                     # print("TF11 moving")
-#
-#             # def mean_var_with_update():
-#                 with tf.control_dependencies([update_moving_mean, update_moving_variance]):
-#                     # return tf.identity(update_moving_mean), tf.identity(update_moving_variance)
-#                     return tf.identity(batch_mean), tf.identity(batch_var)
-#
-#             # if not is_train:
-#             if is_train:
-#                 mean, var = mean_var_with_update()
-#             else:
-#                 mean, var = (moving_mean, moving_variance)
-#
-#             normed = tf.nn.batch_normalization(
-#               x=self.inputs,
-#               mean=mean,
-#               variance=var,
-#               offset=beta,
-#               scale=gamma,
-#               variance_epsilon=epsilon,
-#               name="tf_bn"
-#             )
-#             self.outputs = act( normed )
-#
-#             variables = [beta, gamma, moving_mean, moving_variance]
-#             # print(len(variables))
-#             # for idx, v in enumerate(variables):
-#             #     print("  var {:3}: {:15}   {}".format(idx, str(v.get_shape()), v))
-#             # exit()
-#
-#         self.all_layers = list(layer.all_layers)
-#         self.all_params = list(layer.all_params)
-#         self.all_drop = dict(layer.all_drop)
-#         self.all_layers.extend( [self.outputs] )
-#         self.all_params.extend( variables )
-#         # self.all_params.extend( [beta, gamma] )
-#
-# class BatchNormLayer4(Layer): # work TFlearn https://github.com/tflearn/tflearn/blob/master/tflearn/layers/normalization.py
-#     """
-#     The :class:`BatchNormLayer` class is a normalization layer, see ``tf.nn.batch_normalization`` and ``tf.nn.moments``.
-#
-#     Batch normalization on fully-connected or convolutional maps.
-#
-#     Parameters
-#     -----------
-#     layer : a :class:`Layer` instance
-#         The `Layer` class feeding into this layer.
-#     decay : float
-#         A decay factor for ExponentialMovingAverage.
-#     epsilon : float
-#         A small float number to avoid dividing by 0.
-#     act : activation function.
-#     is_train : boolean
-#         Whether train or inference.
-#     beta_init : beta initializer
-#         The initializer for initializing beta
-#     gamma_init : gamma initializer
-#         The initializer for initializing gamma
-#     name : a string or None
-#         An optional name to attach to this layer.
-#
-#     References
-#     ----------
-#     - `Source <https://github.com/ry/tensorflow-resnet/blob/master/resnet.py>`_
-#     - `stackoverflow <http://stackoverflow.com/questions/38312668/how-does-one-do-inference-with-batch-normalization-with-tensor-flow>`_
-#     """
-#     def __init__(
-#         self,
-#         layer = None,
-#         decay = 0.999,
-#         epsilon = 0.00001,
-#         act = tf.identity,
-#         is_train = None,
-#         beta_init = tf.zeros_initializer,
-#         # gamma_init = tf.ones_initializer,
-#         gamma_init = tf.random_normal_initializer(mean=1.0, stddev=0.002),
-#         name ='batchnorm_layer',
-#     ):
-#         Layer.__init__(self, name=name)
-#         self.inputs = layer.outputs
-#         print("  [TL] BatchNormLayer %s: decay: %f, epsilon: %f, act: %s, is_train: %s" %
-#                             (self.name, decay, epsilon, act.__name__, is_train))
-#         input_shape = self.inputs.get_shape()
-#         # params_shape = input_shape[-1:]
-#         input_ndim = len(input_shape)
-#         from tensorflow.python.training import moving_averages
-#         from tensorflow.python.ops import control_flow_ops
-#
-#         # gamma_init = tf.random_normal_initializer(mean=gamma, stddev=stddev)
-#
-#         # Variable Scope fix for older TF
-#         scope = name
-#         try:
-#             vscope = tf.variable_scope(scope, default_name=name, values=[self.inputs],)
-#                                     #    reuse=reuse)
-#         except Exception:
-#             vscope = tf.variable_op_scope([self.inputs], scope, name)#, reuse=reuse)
-#
-#         with vscope as scope:
-#             name = scope.name
-#         # with tf.variable_scope(name) as vs:
-#             beta = tf.get_variable('beta', shape=[input_shape[-1]],
-#                                 initializer=beta_init,)
-#                             #    initializer=tf.constant_initializer(beta),)
-#                             #    trainable=trainable, )#restore=restore)
-#             gamma = tf.get_variable('gamma', shape=[input_shape[-1]],
-#                                 initializer=gamma_init, )#trainable=trainable,)
-#                                 # restore=restore)
-#
-#             axis = list(range(input_ndim - 1))
-#             moving_mean = tf.get_variable('moving_mean',
-#                                       input_shape[-1:],
-#                                       initializer=tf.zeros_initializer,
-#                                       trainable=False,)
-#                                     #   restore=restore)
-#             moving_variance = tf.get_variable('moving_variance',
-#                                           input_shape[-1:],
-#                                           initializer=tf.constant_initializer(1.),
-#                                           trainable=False,)
-#                                         #   restore=restore)
-#
-#             # Define a function to update mean and variance
-#             def update_mean_var():
-#                 mean, variance = tf.nn.moments(self.inputs, axis)
-#
-#                 # Fix TF 0.12
-#                 try:
-#                     update_moving_mean = moving_averages.assign_moving_average(
-#                         moving_mean, mean, decay, zero_debias=False)            # if zero_debias=True, accuracy is high ..
-#                     update_moving_variance = moving_averages.assign_moving_average(
-#                         moving_variance, variance, decay, zero_debias=False)
-#                 except Exception as e:  # TF 11
-#                     update_moving_mean = moving_averages.assign_moving_average(
-#                         moving_mean, mean, decay)
-#                     update_moving_variance = moving_averages.assign_moving_average(
-#                         moving_variance, variance, decay)
-#
-#                 with tf.control_dependencies(
-#                         [update_moving_mean, update_moving_variance]):
-#                     return tf.identity(mean), tf.identity(variance)
-#
-#             # Retrieve variable managing training mode
-#             # is_training = tflearn.get_training_mode()
-#             if not is_train:    # test : mean=0, std=1
-#             # if is_train:      # train : mean=0, std=1
-#                 is_training = tf.cast(tf.ones([]), tf.bool)
-#             else:
-#                 is_training = tf.cast(tf.zeros([]), tf.bool)
-#             mean, var = tf.cond(
-#                 is_training, update_mean_var, lambda: (moving_mean, moving_variance))
-#                             #  ones                 zeros
-#             try:
-#                 inference = tf.nn.batch_normalization(
-#                     self.inputs, mean, var, beta, gamma, epsilon)
-#                 inference.set_shape(input_shape)
-#             # Fix for old Tensorflow
-#             except Exception as e:
-#                 inference = tf.nn.batch_norm_with_global_normalization(
-#                     self.inputs, mean, var, beta, gamma, epsilon,
-#                     scale_after_normalization=True,
-#                 )
-#                 inference.set_shape(input_shape)
-#
-#             variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope.name)    # 2 params beta, gamma
-#                 # variables = [beta, gamma, moving_mean, moving_variance]
-#
-#             # print(len(variables))
-#             # for idx, v in enumerate(variables):
-#             #     print("  var {:3}: {:15}   {}".format(idx, str(v.get_shape()), v.name))
-#             # exit()
-#
-#         # Add attributes for easy access
-#         # inference.scope = scope
-#         inference.scope = name
-#         inference.beta = beta
-#         inference.gamma = gamma
-#
-#         self.outputs = act( inference )
-#
-#         self.all_layers = list(layer.all_layers)
-#         self.all_params = list(layer.all_params)
-#         self.all_drop = dict(layer.all_drop)
-#         self.all_layers.extend( [self.outputs] )
-#         self.all_params.extend( variables )
-
-# class BatchNormLayer2(Layer):   # don't work http://r2rt.com/implementing-batch-normalization-in-tensorflow.html
-#     """
-#     The :class:`BatchNormLayer` class is a normalization layer, see ``tf.nn.batch_normalization`` and ``tf.nn.moments``.
-#
-#     Batch normalization on fully-connected or convolutional maps.
-#
-#     Parameters
-#     -----------
-#     layer : a :class:`Layer` instance
-#         The `Layer` class feeding into this layer.
-#     decay : float
-#         A decay factor for ExponentialMovingAverage.
-#     epsilon : float
-#         A small float number to avoid dividing by 0.
-#     act : activation function.
-#     is_train : boolean
-#         Whether train or inference.
-#     beta_init : beta initializer
-#         The initializer for initializing beta
-#     gamma_init : gamma initializer
-#         The initializer for initializing gamma
-#     name : a string or None
-#         An optional name to attach to this layer.
-#
-#     References
-#     ----------
-#     - `Source <https://github.com/ry/tensorflow-resnet/blob/master/resnet.py>`_
-#     - `stackoverflow <http://stackoverflow.com/questions/38312668/how-does-one-do-inference-with-batch-normalization-with-tensor-flow>`_
-#     """
-#     def __init__(
-#         self,
-#         layer = None,
-#         decay = 0.999,
-#         epsilon = 0.00001,
-#         act = tf.identity,
-#         is_train = None,
-#         beta_init = tf.zeros_initializer,
-#         # gamma_init = tf.ones_initializer,
-#         gamma_init = tf.random_normal_initializer(mean=1.0, stddev=0.002),
-#         name ='batchnorm_layer',
-#     ):
-#         Layer.__init__(self, name=name)
-#         self.inputs = layer.outputs
-#         print("  [TL] BatchNormLayer %s: decay: %f, epsilon: %f, act: %s, is_train: %s" %
-#                             (self.name, decay, epsilon, act.__name__, is_train))
-#         x_shape = self.inputs.get_shape()
-#         params_shape = x_shape[-1:]
-#
-#         with tf.variable_scope(name) as vs:
-#             gamma = tf.get_variable("gamma", shape=params_shape,
-#                         initializer=gamma_init)
-#             beta = tf.get_variable("beta", shape=params_shape,
-#                         initializer=beta_init)
-#             pop_mean = tf.get_variable("pop_mean", shape=params_shape,
-#                         initializer=tf.zeros_initializer, trainable=False)
-#             pop_var = tf.get_variable("pop_var", shape=params_shape,
-#                         initializer=tf.constant_initializer(1.), trainable=False)
-#
-#             if is_train:
-#                 batch_mean, batch_var = tf.nn.moments(self.inputs, list(range(len(x_shape) - 1)))
-#                 train_mean = tf.assign(pop_mean,
-#                                        pop_mean * decay + batch_mean * (1 - decay))
-#                 train_var = tf.assign(pop_var,
-#                                       pop_var * decay + batch_var * (1 - decay))
-#                 with tf.control_dependencies([train_mean, train_var]):
-#                     self.outputs = act(tf.nn.batch_normalization(self.inputs,
-#                         batch_mean, batch_var, beta, gamma, epsilon))
-#             else:
-#                 self.outputs = act(tf.nn.batch_normalization(self.inputs,
-#                     pop_mean, pop_var, beta, gamma, epsilon))
-#                     # self.outputs = act( tf.nn.batch_normalization(self.inputs, mean, variance, beta, gamma, epsilon) )
-#             # variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)  # 8 params in TF12 if zero_debias=True
-#             variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)    # 2 params beta, gamma
-#                 # variables = [beta, gamma, moving_mean, moving_variance]
-#
-#             # print(len(variables))
-#             # for idx, v in enumerate(variables):
-#             #     print("  var {:3}: {:15}   {}".format(idx, str(v.get_shape()), v.name))
-#             # exit()
-#
-#         self.all_layers = list(layer.all_layers)
-#         self.all_params = list(layer.all_params)
-#         self.all_drop = dict(layer.all_drop)
-#         self.all_layers.extend( [self.outputs] )
-#         self.all_params.extend( variables )
-
-# class BatchNormLayer3(Layer):   # don't work http://r2rt.com/implementing-batch-normalization-in-tensorflow.html
-#     """
-#     The :class:`BatchNormLayer` class is a normalization layer, see ``tf.nn.batch_normalization`` and ``tf.nn.moments``.
-#
-#     Batch normalization on fully-connected or convolutional maps.
-#
-#     Parameters
-#     -----------
-#     layer : a :class:`Layer` instance
-#         The `Layer` class feeding into this layer.
-#     decay : float
-#         A decay factor for ExponentialMovingAverage.
-#     epsilon : float
-#         A small float number to avoid dividing by 0.
-#     act : activation function.
-#     is_train : boolean
-#         Whether train or inference.
-#     beta_init : beta initializer
-#         The initializer for initializing beta
-#     gamma_init : gamma initializer
-#         The initializer for initializing gamma
-#     name : a string or None
-#         An optional name to attach to this layer.
-#
-#     References
-#     ----------
-#     - `Source <https://github.com/ry/tensorflow-resnet/blob/master/resnet.py>`_
-#     - `stackoverflow <http://stackoverflow.com/questions/38312668/how-does-one-do-inference-with-batch-normalization-with-tensor-flow>`_
-#     """
-#     def __init__(
-#         self,
-#         layer = None,
-#         decay = 0.999,
-#         epsilon = 0.00001,
-#         act = tf.identity,
-#         is_train = None,
-#         beta_init = tf.zeros_initializer,
-#         # gamma_init = tf.ones_initializer,
-#         gamma_init = tf.random_normal_initializer(mean=1.0, stddev=0.002),
-#         name ='batchnorm_layer',
-#     ):
-#         """
-#         Batch normalization on convolutional maps.
-#         Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
-#         Args:
-#             x:           Tensor, 4D BHWD input maps
-#             n_out:       integer, depth of input maps
-#             phase_train: boolean tf.Varialbe, true indicates training phase
-#             scope:       string, variable scope
-#         Return:
-#             normed:      batch-normalized maps
-#         """
-#         Layer.__init__(self, name=name)
-#         self.inputs = layer.outputs
-#         print("  [TL] BatchNormLayer %s: decay: %f, epsilon: %f, act: %s, is_train: %s" %
-#                             (self.name, decay, epsilon, act.__name__, is_train))
-#         x_shape = self.inputs.get_shape()
-#         params_shape = x_shape[-1:]
-#
-#         if is_train:
-#             phase_train = tf.cast(tf.ones([]), tf.bool)
-#         else:
-#             phase_train = tf.cast(tf.zeros([]), tf.bool)
-#
-#         with tf.variable_scope(name) as vs:
-#             gamma = tf.get_variable("gamma", shape=params_shape,
-#                         initializer=gamma_init)
-#             beta = tf.get_variable("beta", shape=params_shape,
-#                         initializer=beta_init)
-#             batch_mean, batch_var = tf.nn.moments(self.inputs, list(range(len(x_shape) - 1)),#[0,1,2],
-#                             name='moments')
-#             ema = tf.train.ExponentialMovingAverage(decay=decay)
-#
-#             def mean_var_with_update():
-#                 ema_apply_op = ema.apply([batch_mean, batch_var])
-#                 with tf.control_dependencies([ema_apply_op]):
-#                     return tf.identity(batch_mean), tf.identity(batch_var)
-#
-#             mean, var = tf.cond(phase_train,
-#                                 mean_var_with_update,
-#                                 lambda: (ema.average(batch_mean), ema.average(batch_var)))
-#             normed = tf.nn.batch_normalization(self.inputs, mean, var, beta, gamma, epsilon)
-#             self.outputs = act( normed )
-#             variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)    # 2 params beta, gamma
-#                 # variables = [beta, gamma, moving_mean, moving_variance]
-#
-#             # print(len(variables))
-#             # for idx, v in enumerate(variables):
-#             #     print("  var {:3}: {:15}   {}".format(idx, str(v.get_shape()), v.name))
-#             # exit()
-#
-#         self.all_layers = list(layer.all_layers)
-#         self.all_params = list(layer.all_params)
-#         self.all_drop = dict(layer.all_drop)
-#         self.all_layers.extend( [self.outputs] )
-#         self.all_params.extend( variables )
-
-# class BatchNormLayer_old(Layer):  # don't work
-#     """
-#     The :class:`BatchNormLayer` class is a normalization layer, see ``tf.nn.batch_normalization``.
-#
-#     Batch normalization on fully-connected or convolutional maps.
-#
-#     Parameters
-#     -----------
-#     layer : a :class:`Layer` instance
-#         The `Layer` class feeding into this layer.
-#     decay : float
-#         A decay factor for ExponentialMovingAverage.
-#     epsilon : float
-#         A small float number to avoid dividing by 0.
-#     is_train : boolean
-#         Whether train or inference.
-#     name : a string or None
-#         An optional name to attach to this layer.
-#
-#     References
-#     ----------
-#     - `tf.nn.batch_normalization <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.batch_normalization.md>`_
-#     - `stackoverflow <http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow>`_
-#     - `tensorflow.contrib <https://github.com/tensorflow/tensorflow/blob/b826b79718e3e93148c3545e7aa3f90891744cc0/tensorflow/contrib/layers/python/layers/layers.py#L100>`_
-#     """
-#     def __init__(
-#         self,
-#         layer = None,
-#         act = tf.identity,
-#         decay = 0.999,
-#         epsilon = 0.001,
-#         is_train = None,
-#         name ='batchnorm_layer',
-#     ):
-#         Layer.__init__(self, name=name)
-#         self.inputs = layer.outputs
-#         print("  [TL] BatchNormLayer %s: decay: %f, epsilon: %f, is_train: %s" %
-#                             (self.name, decay, epsilon, is_train))
-#         if is_train == None:
-#             raise Exception("is_train must be True or False")
-#
-#         # (name, input_var, decay, epsilon, is_train)
-#         inputs_shape = self.inputs.get_shape()
-#         axis = list(range(len(inputs_shape) - 1))
-#         params_shape = inputs_shape[-1:]
-#
-#         with tf.variable_scope(name) as vs:
-#             beta = tf.get_variable(name='beta', shape=params_shape,
-#                                  initializer=tf.constant_initializer(0.0))
-#             gamma = tf.get_variable(name='gamma', shape=params_shape,
-#                                   initializer=tf.constant_initializer(1.0))
-#             batch_mean, batch_var = tf.nn.moments(self.inputs,
-#                                                 axis,
-#                                                 name='moments')
-#             ema = tf.train.ExponentialMovingAverage(decay=decay)
-#
-#             def mean_var_with_update():
-#               ema_apply_op = ema.apply([batch_mean, batch_var])
-#               with tf.control_dependencies([ema_apply_op]):
-#                   return tf.identity(batch_mean), tf.identity(batch_var)
-#
-#             if is_train:
-#                 is_train = tf.cast(tf.ones(1), tf.bool)
-#             else:
-#                 is_train = tf.cast(tf.zeros(1), tf.bool)
-#
-#             is_train = tf.reshape(is_train, [])
-#
-#             # print(is_train)
-#             # exit()
-#
-#             mean, var = tf.cond(
-#               is_train,
-#               mean_var_with_update,
-#               lambda: (ema.average(batch_mean), ema.average(batch_var))
-#             )
-#             normed = tf.nn.batch_normalization(
-#               x=self.inputs,
-#               mean=mean,
-#               variance=var,
-#               offset=beta,
-#               scale=gamma,
-#               variance_epsilon=epsilon,
-#               name='tf_bn'
-#             )
-#         self.outputs = act( normed )
-#
-#         self.all_layers = list(layer.all_layers)
-#         self.all_params = list(layer.all_params)
-#         self.all_drop = dict(layer.all_drop)
-#         self.all_layers.extend( [self.outputs] )
-#         self.all_params.extend( [beta, gamma] )
-
-## Pooling layer
-class PoolLayer(Layer):
-    """
-    The :class:`PoolLayer` class is a Pooling layer, you can choose
-    ``tf.nn.max_pool`` and ``tf.nn.avg_pool`` for 2D or
-    ``tf.nn.max_pool3d`` and ``tf.nn.avg_pool3d`` for 3D.
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    ksize : a list of ints that has length >= 4.
-        The size of the window for each dimension of the input tensor.
-    strides : a list of ints that has length >= 4.
-        The stride of the sliding window for each dimension of the input tensor.
-    padding : a string from: "SAME", "VALID".
-        The type of padding algorithm to use.
-    pool : a pooling function
-        - see `TensorFlow pooling APIs <https://www.tensorflow.org/versions/master/api_docs/python/nn.html#pooling>`_
-        - class ``tf.nn.max_pool``
-        - class ``tf.nn.avg_pool``
-        - class ``tf.nn.max_pool3d``
-        - class ``tf.nn.avg_pool3d``
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Examples
-    --------
-    - see :class:`Conv2dLayer`.
-    """
-    def __init__(
-        self,
-        layer = None,
-        ksize=[1, 2, 2, 1],
-        strides=[1, 2, 2, 1],
-        padding='SAME',
-        pool = tf.nn.max_pool,
-        name ='pool_layer',
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = layer.outputs
-        print("  [TL] PoolLayer   %s: ksize:%s strides:%s padding:%s pool:%s" %
-                            (self.name, str(ksize), str(strides), padding, pool.__name__))
-
-        self.outputs = pool(self.inputs, ksize=ksize, strides=strides, padding=padding, name=name)
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-
-## Padding layer
-class PadLayer(Layer):
-    """
-    The :class:`PadLayer` class is a Padding layer for any modes and dimensions.
-    Please see `tf.pad <https://www.tensorflow.org/api_docs/python/tf/pad>`_ for usage.
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    padding : a Tensor of type int32.
-    mode : one of "CONSTANT", "REFLECT", or "SYMMETRIC" (case-insensitive)
-    name : a string or None
-        An optional name to attach to this layer.
-    """
-    def __init__(
-        self,
-        layer = None,
-        paddings = None,
-        mode = 'CONSTANT',
-        name = 'pad_layer',
-    ):
-        Layer.__init__(self, name=name)
-        assert paddings is not None, "paddings should be a Tensor of type int32. see https://www.tensorflow.org/api_docs/python/tf/pad"
-        self.inputs = layer.outputs
-        print("  [TL] PoolLayer   %s: paddings:%s mode:%s" %
-                            (self.name, list(paddings.get_shape()), mode))
-
-        self.outputs = tf.pad(self.inputs, paddings=paddings, mode=mode, name=name)
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-
-## TimeDistributedLayer
-class TimeDistributedLayer(Layer):
-    """
-    The :class:`TimeDistributedLayer` class that applies a function to every timestep of the input tensor.
-    For example, if using :class:`DenseLayer` as the ``layer_class``, inputs [batch_size , length, dim]
-    outputs [batch_size , length, new_dim].
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer, [batch_size , length, dim]
-    layer_class : a :class:`Layer` class
-    args : dictionary
-        The arguments for the ``layer_class``.
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Examples
-    --------
-    >>> batch_size = 32
-    >>> timestep = 20
-    >>> input_dim = 100
-    >>> x = tf.placeholder(dtype=tf.float32, shape=[batch_size, timestep,  input_dim], name="encode_seqs")
-    >>> net = InputLayer(x, name='input')
-    >>> net = TimeDistributedLayer(net, layer_class=DenseLayer, args={'n_units':50, 'name':'dense'}, name='time_dense')
-    ... [TL] InputLayer  input: (32, 20, 100)
-    ... [TL] TimeDistributedLayer time_dense: layer_class:DenseLayer
-    >>> print(net.outputs._shape)
-    ... (32, 20, 50)
-    >>> net.print_params(False)
-    ... param   0: (100, 50)          time_dense/dense/W:0
-    ... param   1: (50,)              time_dense/dense/b:0
-    ... num of params: 5050
-    """
-    def __init__(
-        self,
-        layer = None,
-        layer_class = None,
-        args = {},
-        name ='time_distributed',
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = layer.outputs
-        print("  [TL] TimeDistributedLayer %s: layer_class:%s args:%s" %
-                            (self.name, layer_class.__name__, args))
-
-        if not args: args = dict()
-        assert isinstance(args, dict), "'args' must be a dict."
-
-        if not isinstance(self.inputs, tf.Tensor):
-            self.inputs = tf.transpose(tf.stack(self.inputs), [1, 0, 2])
-
-        input_shape = self.inputs.get_shape()
-
-        timestep = input_shape[1]
-        x = tf.unstack(self.inputs, axis=1)
-
-        with ops.suppress_stdout():
-            for i in range(0, timestep):
-                with tf.variable_scope(name, reuse=(False if i==0 else True)) as vs:
-                    set_name_reuse((False if i==0 else True))
-                    net = layer_class(InputLayer(x[i], name=args['name']+str(i)), **args)
-                    # net = layer_class(InputLayer(x[i], name="input_"+args['name']), **args)
-                    x[i] = net.outputs
-                    variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
-
-        self.outputs = tf.stack(x, axis=1, name=name)
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( variables )
-
-
-
-## Recurrent layer
-class RNNLayer(Layer):
-    """
-    The :class:`RNNLayer` class is a RNN layer, you can implement vanilla RNN,
-    LSTM and GRU with it.
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    cell_fn : a TensorFlow's core RNN cell as follow (Note TF1.0+ and TF1.0- are different).
-        - see `RNN Cells in TensorFlow <https://www.tensorflow.org/api_docs/python/>`_
-    cell_init_args : a dictionary
-        The arguments for the cell initializer.
-    n_hidden : a int
-        The number of hidden units in the layer.
-    initializer : initializer
-        The initializer for initializing the parameters.
-    n_steps : a int
-        The sequence length.
-    initial_state : None or RNN State
-        If None, initial_state is zero_state.
-    return_last : boolean
-        - If True, return the last output, "Sequence input and single output"
-        - If False, return all outputs, "Synced sequence input and output"
-        - In other word, if you want to apply one or more RNN(s) on this layer, set to False.
-    return_seq_2d : boolean
-        - When return_last = False
-        - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it.
-        - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it.
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Variables
-    --------------
-    outputs : a tensor
-        The output of this RNN.
-        return_last = False, outputs = all cell_output, which is the hidden state.
-            cell_output.get_shape() = (?, n_hidden)
-
-    final_state : a tensor or StateTuple
-        When state_is_tuple = False,
-        it is the final hidden and cell states, states.get_shape() = [?, 2 * n_hidden].\n
-        When state_is_tuple = True, it stores two elements: (c, h), in that order.
-        You can get the final state after each iteration during training, then
-        feed it to the initial state of next iteration.
-
-    initial_state : a tensor or StateTuple
-        It is the initial state of this RNN layer, you can use it to initialize
-        your state at the begining of each epoch or iteration according to your
-        training procedure.
-
-    batch_size : int or tensor
-        Is int, if able to compute the batch_size, otherwise, tensor for ``?``.
-
-    Examples
-    --------
-    - For words
-    >>> input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
-    >>> network = tl.layers.EmbeddingInputlayer(
-    ...                 inputs = input_data,
-    ...                 vocabulary_size = vocab_size,
-    ...                 embedding_size = hidden_size,
-    ...                 E_init = tf.random_uniform_initializer(-init_scale, init_scale),
-    ...                 name ='embedding_layer')
-    >>> if is_training:
-    >>>     network = tl.layers.DropoutLayer(network, keep=keep_prob, name='drop1')
-    >>> network = tl.layers.RNNLayer(network,
-    ...             cell_fn=tf.nn.rnn_cell.BasicLSTMCell,
-    ...             cell_init_args={'forget_bias': 0.0},# 'state_is_tuple': True},
-    ...             n_hidden=hidden_size,
-    ...             initializer=tf.random_uniform_initializer(-init_scale, init_scale),
-    ...             n_steps=num_steps,
-    ...             return_last=False,
-    ...             name='basic_lstm_layer1')
-    >>> lstm1 = network
-    >>> if is_training:
-    >>>     network = tl.layers.DropoutLayer(network, keep=keep_prob, name='drop2')
-    >>> network = tl.layers.RNNLayer(network,
-    ...             cell_fn=tf.nn.rnn_cell.BasicLSTMCell,
-    ...             cell_init_args={'forget_bias': 0.0}, # 'state_is_tuple': True},
-    ...             n_hidden=hidden_size,
-    ...             initializer=tf.random_uniform_initializer(-init_scale, init_scale),
-    ...             n_steps=num_steps,
-    ...             return_last=False,
-    ...             return_seq_2d=True,
-    ...             name='basic_lstm_layer2')
-    >>> lstm2 = network
-    >>> if is_training:
-    >>>     network = tl.layers.DropoutLayer(network, keep=keep_prob, name='drop3')
-    >>> network = tl.layers.DenseLayer(network,
-    ...             n_units=vocab_size,
-    ...             W_init=tf.random_uniform_initializer(-init_scale, init_scale),
-    ...             b_init=tf.random_uniform_initializer(-init_scale, init_scale),
-    ...             act = tl.activation.identity, name='output_layer')
-
-    - For CNN+LSTM
-    >>> x = tf.placeholder(tf.float32, shape=[batch_size, image_size, image_size, 1])
-    >>> network = tl.layers.InputLayer(x, name='input_layer')
-    >>> network = tl.layers.Conv2dLayer(network,
-    ...                         act = tf.nn.relu,
-    ...                         shape = [5, 5, 1, 32],  # 32 features for each 5x5 patch
-    ...                         strides=[1, 2, 2, 1],
-    ...                         padding='SAME',
-    ...                         name ='cnn_layer1')
-    >>> network = tl.layers.PoolLayer(network,
-    ...                         ksize=[1, 2, 2, 1],
-    ...                         strides=[1, 2, 2, 1],
-    ...                         padding='SAME',
-    ...                         pool = tf.nn.max_pool,
-    ...                         name ='pool_layer1')
-    >>> network = tl.layers.Conv2dLayer(network,
-    ...                         act = tf.nn.relu,
-    ...                         shape = [5, 5, 32, 10], # 10 features for each 5x5 patch
-    ...                         strides=[1, 2, 2, 1],
-    ...                         padding='SAME',
-    ...                         name ='cnn_layer2')
-    >>> network = tl.layers.PoolLayer(network,
-    ...                         ksize=[1, 2, 2, 1],
-    ...                         strides=[1, 2, 2, 1],
-    ...                         padding='SAME',
-    ...                         pool = tf.nn.max_pool,
-    ...                         name ='pool_layer2')
-    >>> network = tl.layers.FlattenLayer(network, name='flatten_layer')
-    >>> network = tl.layers.ReshapeLayer(network, shape=[-1, num_steps, int(network.outputs._shape[-1])])
-    >>> rnn1 = tl.layers.RNNLayer(network,
-    ...                         cell_fn=tf.nn.rnn_cell.LSTMCell,
-    ...                         cell_init_args={},
-    ...                         n_hidden=200,
-    ...                         initializer=tf.random_uniform_initializer(-0.1, 0.1),
-    ...                         n_steps=num_steps,
-    ...                         return_last=False,
-    ...                         return_seq_2d=True,
-    ...                         name='rnn_layer')
-    >>> network = tl.layers.DenseLayer(rnn1, n_units=3,
-    ...                         act = tl.activation.identity, name='output_layer')
-
-    Notes
-    -----
-    Input dimension should be rank 3 : [batch_size, n_steps, n_features], if no, please see :class:`ReshapeLayer`.
-
-    References
-    ----------
-    - `Neural Network RNN Cells in TensorFlow <https://www.tensorflow.org/api_docs/python/rnn_cell/>`_
-    - `tensorflow/python/ops/rnn.py <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn.py>`_
-    - `tensorflow/python/ops/rnn_cell.py <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn_cell.py>`_
-    - see TensorFlow tutorial ``ptb_word_lm.py``, TensorLayer tutorials ``tutorial_ptb_lstm*.py`` and ``tutorial_generate_text.py``
-    """
-    def __init__(
-        self,
-        layer = None,
-        cell_fn = None,#tf.nn.rnn_cell.BasicRNNCell,
-        cell_init_args = {},
-        n_hidden = 100,
-        initializer = tf.random_uniform_initializer(-0.1, 0.1),
-        n_steps = 5,
-        initial_state = None,
-        return_last = False,
-        # is_reshape = True,
-        return_seq_2d = False,
-        name = 'rnn_layer',
-    ):
-        Layer.__init__(self, name=name)
-        if cell_fn is None:
-            raise Exception("Please put in cell_fn")
-        if 'GRU' in cell_fn.__name__:
-            try:
-                cell_init_args.pop('state_is_tuple')
-            except:
-                pass
-
-        self.inputs = layer.outputs
-
-        print("  [TL] RNNLayer %s: n_hidden:%d n_steps:%d in_dim:%d in_shape:%s cell_fn:%s " % (self.name, n_hidden,
-            n_steps, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__))
-        # You can get the dimension by .get_shape() or ._shape, and check the
-        # dimension by .with_rank() as follow.
-        # self.inputs.get_shape().with_rank(2)
-        # self.inputs.get_shape().with_rank(3)
-
-        # Input dimension should be rank 3 [batch_size, n_steps(max), n_features]
-        try:
-            self.inputs.get_shape().with_rank(3)
-        except:
-            raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps, n_features]")
-
-
-        # is_reshape : boolean (deprecate)
-        #     Reshape the inputs to 3 dimension tensor.\n
-        #     If input is［batch_size, n_steps, n_features], we do not need to reshape it.\n
-        #     If input is [batch_size * n_steps, n_features], we need to reshape it.
-        # if is_reshape:
-        #     self.inputs = tf.reshape(self.inputs, shape=[-1, n_steps, int(self.inputs._shape[-1])])
-
-        fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0]
-
-        if fixed_batch_size.value:
-            batch_size = fixed_batch_size.value
-            print("       RNN batch_size (concurrent processes): %d" % batch_size)
-        else:
-            from tensorflow.python.ops import array_ops
-            batch_size = array_ops.shape(self.inputs)[0]
-            print("       non specified batch_size, uses a tensor instead.")
-        self.batch_size = batch_size
-
-        # Simplified version of tensorflow.models.rnn.rnn.py's rnn().
-        # This builds an unrolled LSTM for tutorial purposes only.
-        # In general, use the rnn() or state_saving_rnn() from rnn.py.
-        #
-        # The alternative version of the code below is:
-        #
-        # from tensorflow.models.rnn import rnn
-        # inputs = [tf.squeeze(input_, [1])
-        #           for input_ in tf.split(1, num_steps, inputs)]
-        # outputs, state = rnn.rnn(cell, inputs, initial_state=self._initial_state)
-        outputs = []
-        self.cell = cell = cell_fn(num_units=n_hidden, **cell_init_args)
-        if initial_state is None:
-            self.initial_state = cell.zero_state(batch_size, dtype=tf.float32)  # 1.2.3
-        state = self.initial_state
-        # with tf.variable_scope("model", reuse=None, initializer=initializer):
-        with tf.variable_scope(name, initializer=initializer) as vs:
-            for time_step in range(n_steps):
-                if time_step > 0: tf.get_variable_scope().reuse_variables()
-                (cell_output, state) = cell(self.inputs[:, time_step, :], state)
-                outputs.append(cell_output)
-
-            # Retrieve just the RNN variables.
-            # rnn_variables = [v for v in tf.all_variables() if v.name.startswith(vs.name)]
-            rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
-
-        print("     n_params : %d" % (len(rnn_variables)))
-
-        if return_last:
-            # 2D Tensor [batch_size, n_hidden]
-            self.outputs = outputs[-1]
-        else:
-            if return_seq_2d:
-                # PTB tutorial: stack dense layer after that, or compute the cost from the output
-                # 2D Tensor [n_example, n_hidden]
-                try: # TF1.0
-                    self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_hidden])
-                except: # TF0.12
-                    self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_hidden])
-
-
-            else:
-                # <akara>: stack more RNN layer after that
-                # 3D Tensor [n_example/n_steps, n_steps, n_hidden]
-                try: # TF1.0
-                    self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_steps, n_hidden])
-                except: # TF0.12
-                    self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_steps, n_hidden])
-
-        self.final_state = state
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        # print(type(self.outputs))
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( rnn_variables )
-
-class BiRNNLayer(Layer):
-    """
-    The :class:`BiRNNLayer` class is a Bidirectional RNN layer.
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    cell_fn : a TensorFlow's core RNN cell as follow (Note TF1.0+ and TF1.0- are different).
-        - see `RNN Cells in TensorFlow <https://www.tensorflow.org/api_docs/python/>`_
-    cell_init_args : a dictionary
-        The arguments for the cell initializer.
-    n_hidden : a int
-        The number of hidden units in the layer.
-    initializer : initializer
-        The initializer for initializing the parameters.
-    n_steps : a int
-        The sequence length.
-    fw_initial_state : None or forward RNN State
-        If None, initial_state is zero_state.
-    bw_initial_state : None or backward RNN State
-        If None, initial_state is zero_state.
-    dropout : `tuple` of `float`: (input_keep_prob, output_keep_prob).
-        The input and output keep probability.
-    n_layer : a int, default is 1.
-        The number of RNN layers.
-    return_last : boolean
-        - If True, return the last output, "Sequence input and single output"
-        - If False, return all outputs, "Synced sequence input and output"
-        - In other word, if you want to apply one or more RNN(s) on this layer, set to False.
-    return_seq_2d : boolean
-        - When return_last = False
-        - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it.
-        - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it.
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Variables
-    --------------
-    outputs : a tensor
-        The output of this RNN.
-        return_last = False, outputs = all cell_output, which is the hidden state.
-            cell_output.get_shape() = (?, n_hidden)
-
-    fw(bw)_final_state : a tensor or StateTuple
-        When state_is_tuple = False,
-        it is the final hidden and cell states, states.get_shape() = [?, 2 * n_hidden].\n
-        When state_is_tuple = True, it stores two elements: (c, h), in that order.
-        You can get the final state after each iteration during training, then
-        feed it to the initial state of next iteration.
-
-    fw(bw)_initial_state : a tensor or StateTuple
-        It is the initial state of this RNN layer, you can use it to initialize
-        your state at the begining of each epoch or iteration according to your
-        training procedure.
-
-    batch_size : int or tensor
-        Is int, if able to compute the batch_size, otherwise, tensor for ``?``.
-
-    Notes
-    -----
-    - Input dimension should be rank 3 : [batch_size, n_steps, n_features], if no, please see :class:`ReshapeLayer`.
-    - For predicting, the sequence length has to be the same with the sequence length of training, while, for normal
-    RNN, we can use sequence length of 1 for predicting.
-
-    References
-    ----------
-    - `Source <https://github.com/akaraspt/deepsleep/blob/master/deepsleep/model.py>`_
-    """
-    def __init__(
-        self,
-        layer = None,
-        cell_fn = None, #tf.nn.rnn_cell.LSTMCell,
-        cell_init_args = {'use_peepholes':True, 'state_is_tuple':True},
-        n_hidden = 100,
-        initializer = tf.random_uniform_initializer(-0.1, 0.1),
-        n_steps = 5,
-        fw_initial_state = None,
-        bw_initial_state = None,
-        dropout = None,
-        n_layer = 1,
-        return_last = False,
-        return_seq_2d = False,
-        name = 'birnn_layer',
-    ):
-        Layer.__init__(self, name=name)
-        if cell_fn is None:
-            raise Exception("Please put in cell_fn")
-        if 'GRU' in cell_fn.__name__:
-            try:
-                cell_init_args.pop('state_is_tuple')
-            except:
-                pass
-
-        self.inputs = layer.outputs
-
-        print("  [TL] BiRNNLayer %s: n_hidden:%d n_steps:%d in_dim:%d in_shape:%s cell_fn:%s dropout:%s n_layer:%d " % (self.name, n_hidden,
-            n_steps, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__, dropout, n_layer))
-
-        fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0]
-
-        if fixed_batch_size.value:
-            self.batch_size = fixed_batch_size.value
-            print("       RNN batch_size (concurrent processes): %d" % self.batch_size)
-        else:
-            from tensorflow.python.ops import array_ops
-            self.batch_size = array_ops.shape(self.inputs)[0]
-            print("       non specified batch_size, uses a tensor instead.")
-
-        # Input dimension should be rank 3 [batch_size, n_steps(max), n_features]
-        try:
-            self.inputs.get_shape().with_rank(3)
-        except:
-            raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps, n_features]")
-
-        with tf.variable_scope(name, initializer=initializer) as vs:
-            self.fw_cell = cell_fn(num_units=n_hidden, **cell_init_args)
-            self.bw_cell = cell_fn(num_units=n_hidden, **cell_init_args)
-            # Apply dropout
-            if dropout:
-                if type(dropout) in [tuple, list]:
-                    in_keep_prob = dropout[0]
-                    out_keep_prob = dropout[1]
-                elif isinstance(dropout, float):
-                    in_keep_prob, out_keep_prob = dropout, dropout
-                else:
-                    raise Exception("Invalid dropout type (must be a 2-D tuple of "
-                                    "float)")
-                try: # TF 1.0
-                    DropoutWrapper_fn = tf.contrib.rnn.DropoutWrapper
-                except:
-                    DropoutWrapper_fn = tf.nn.rnn_cell.DropoutWrapper
-                self.fw_cell = DropoutWrapper_fn(
-                          self.fw_cell,
-                          input_keep_prob=in_keep_prob,
-                          output_keep_prob=out_keep_prob)
-                self.bw_cell = DropoutWrapper_fn(
-                          self.bw_cell,
-                          input_keep_prob=in_keep_prob,
-                          output_keep_prob=out_keep_prob)
-            # Apply multiple layers
-            if n_layer > 1:
-                try: # TF1.0
-                    MultiRNNCell_fn = tf.contrib.rnn.MultiRNNCell
-                except:
-                    MultiRNNCell_fn = tf.nn.rnn_cell.MultiRNNCell
-
-                try:
-                    self.fw_cell = MultiRNNCell_fn([self.fw_cell] * n_layer,
-                                                          state_is_tuple=True)
-                    self.bw_cell = MultiRNNCell_fn([self.bw_cell] * n_layer,
-                                                          state_is_tuple=True)
-                except:
-                    self.fw_cell = MultiRNNCell_fn([self.fw_cell] * n_layer)
-                    self.bw_cell = MultiRNNCell_fn([self.bw_cell] * n_layer)
-
-            # Initial state of RNN
-            if fw_initial_state is None:
-                self.fw_initial_state = self.fw_cell.zero_state(self.batch_size, dtype=tf.float32)
-            else:
-                self.fw_initial_state = fw_initial_state
-            if bw_initial_state is None:
-                self.bw_initial_state = self.bw_cell.zero_state(self.batch_size, dtype=tf.float32)
-            else:
-                self.bw_initial_state = bw_initial_state
-            # exit()
-            # Feedforward to MultiRNNCell
-            try: ## TF1.0
-                list_rnn_inputs = tf.unstack(self.inputs, axis=1)
-            except: ## TF0.12
-                list_rnn_inputs = tf.unpack(self.inputs, axis=1)
-
-            try: # TF1.0
-                bidirectional_rnn_fn = tf.contrib.rnn.static_bidirectional_rnn
-            except:
-                bidirectional_rnn_fn = tf.nn.bidirectional_rnn
-            outputs, fw_state, bw_state = bidirectional_rnn_fn(               # outputs, fw_state, bw_state = tf.contrib.rnn.static_bidirectional_rnn(
-                cell_fw=self.fw_cell,
-                cell_bw=self.bw_cell,
-                inputs=list_rnn_inputs,
-                initial_state_fw=self.fw_initial_state,
-                initial_state_bw=self.bw_initial_state
-            )
-
-            if return_last:
-                self.outputs = outputs[-1]
-            else:
-                self.outputs = outputs
-                if return_seq_2d:
-                    # 2D Tensor [n_example, n_hidden]
-                    try: # TF1.0
-                        self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_hidden*2])
-                    except: # TF0.12
-                        self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_hidden*2])
-                else:
-                    # <akara>: stack more RNN layer after that
-                    # 3D Tensor [n_example/n_steps, n_steps, n_hidden]
-
-                    try: # TF1.0
-                        self.outputs = tf.reshape(tf.concat(outputs,1), [-1, n_steps, n_hidden*2])
-                    except: # TF0.12
-                        self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_steps, n_hidden*2])
-            self.fw_final_state = fw_state
-            self.bw_final_state = bw_state
-
-            # Retrieve just the RNN variables.
-            rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
-
-        print("     n_params : %d" % (len(rnn_variables)))
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( rnn_variables )
-
-# Advanced Ops for Dynamic RNN
-def advanced_indexing_op(input, index):
-    """Advanced Indexing for Sequences, returns the outputs by given sequence lengths.
-    When return the last output :class:`DynamicRNNLayer` uses it to get the last outputs with the sequence lengths.
-
-    Parameters
-    -----------
-    input : tensor for data
-        [batch_size, n_step(max), n_features]
-    index : tensor for indexing, i.e. sequence_length in Dynamic RNN.
-        [batch_size]
-
-    Examples
-    ---------
-    >>> batch_size, max_length, n_features = 3, 5, 2
-    >>> z = np.random.uniform(low=-1, high=1, size=[batch_size, max_length, n_features]).astype(np.float32)
-    >>> b_z = tf.constant(z)
-    >>> sl = tf.placeholder(dtype=tf.int32, shape=[batch_size])
-    >>> o = advanced_indexing_op(b_z, sl)
-    >>>
-    >>> sess = tf.InteractiveSession()
-    >>> tl.layers.initialize_global_variables(sess)
-    >>>
-    >>> order = np.asarray([1,1,2])
-    >>> print("real",z[0][order[0]-1], z[1][order[1]-1], z[2][order[2]-1])
-    >>> y = sess.run([o], feed_dict={sl:order})
-    >>> print("given",order)
-    >>> print("out", y)
-    ... real [-0.93021595  0.53820813] [-0.92548317 -0.77135968] [ 0.89952248  0.19149846]
-    ... given [1 1 2]
-    ... out [array([[-0.93021595,  0.53820813],
-    ...             [-0.92548317, -0.77135968],
-    ...             [ 0.89952248,  0.19149846]], dtype=float32)]
-
-    References
-    -----------
-    - Modified from TFlearn (the original code is used for fixed length rnn), `references <https://github.com/tflearn/tflearn/blob/master/tflearn/layers/recurrent.py>`_.
-    """
-    batch_size = tf.shape(input)[0]
-    # max_length = int(input.get_shape()[1])    # for fixed length rnn, length is given
-    max_length = tf.shape(input)[1]             # for dynamic_rnn, length is unknown
-    dim_size = int(input.get_shape()[2])
-    index = tf.range(0, batch_size) * max_length + (index - 1)
-    flat = tf.reshape(input, [-1, dim_size])
-    relevant = tf.gather(flat, index)
-    return relevant
-
-def retrieve_seq_length_op(data):
-    """An op to compute the length of a sequence from input shape of [batch_size, n_step(max), n_features],
-    it can be used when the features of padding (on right hand side) are all zeros.
-
-    Parameters
-    -----------
-    data : tensor
-        [batch_size, n_step(max), n_features] with zero padding on right hand side.
-
-    Examples
-    ---------
-    >>> data = [[[1],[2],[0],[0],[0]],
-    ...         [[1],[2],[3],[0],[0]],
-    ...         [[1],[2],[6],[1],[0]]]
-    >>> data = np.asarray(data)
-    >>> print(data.shape)
-    ... (3, 5, 1)
-    >>> data = tf.constant(data)
-    >>> sl = retrieve_seq_length_op(data)
-    >>> sess = tf.InteractiveSession()
-    >>> tl.layers.initialize_global_variables(sess)
-    >>> y = sl.eval()
-    ... [2 3 4]
-
-    - Multiple features
-    >>> data = [[[1,2],[2,2],[1,2],[1,2],[0,0]],
-    ...         [[2,3],[2,4],[3,2],[0,0],[0,0]],
-    ...         [[3,3],[2,2],[5,3],[1,2],[0,0]]]
-    >>> sl
-    ... [4 3 4]
-
-    References
-    ------------
-    - Borrow from `TFlearn <https://github.com/tflearn/tflearn/blob/master/tflearn/layers/recurrent.py>`_.
-    """
-    with tf.name_scope('GetLength'):
-        ## TF 1.0 change reduction_indices to axis
-        used = tf.sign(tf.reduce_max(tf.abs(data), 2))
-        length = tf.reduce_sum(used, 1)
-        ## TF < 1.0
-        # used = tf.sign(tf.reduce_max(tf.abs(data), reduction_indices=2))
-        # length = tf.reduce_sum(used, reduction_indices=1)
-        length = tf.cast(length, tf.int32)
-    return length
-
-def retrieve_seq_length_op2(data):
-    """An op to compute the length of a sequence, from input shape of [batch_size, n_step(max)],
-    it can be used when the features of padding (on right hand side) are all zeros.
-
-    Parameters
-    -----------
-    data : tensor
-        [batch_size, n_step(max)] with zero padding on right hand side.
-
-    Examples
-    --------
-    >>> data = [[1,2,0,0,0],
-    ...         [1,2,3,0,0],
-    ...         [1,2,6,1,0]]
-    >>> o = retrieve_seq_length_op2(data)
-    >>> sess = tf.InteractiveSession()
-    >>> tl.layers.initialize_global_variables(sess)
-    >>> print(o.eval())
-    ... [2 3 4]
-    """
-    return tf.reduce_sum(tf.cast(tf.greater(data, tf.zeros_like(data)), tf.int32), 1)
-
-
-def retrieve_seq_length_op3(data, pad_val=0):
-    data_shape_size = data.get_shape().ndims
-    if data_shape_size == 3:
-        return tf.reduce_sum(tf.cast(tf.reduce_any(tf.not_equal(data, pad_val), axis=2), dtype=tf.int32), 1)
-    elif data_shape_size == 2:
-        return tf.reduce_sum(tf.cast(tf.not_equal(data, pad_val), dtype=tf.int32), 1)
-    elif data_shape_size == 1:
-        raise ValueError("retrieve_seq_length_op3: data has wrong shape!")
-    else:
-        raise ValueError("retrieve_seq_length_op3: handling data_shape_size %s hasn't been implemented!" % (data_shape_size))
-
-
-def target_mask_op(data, pad_val=0):
-    data_shape_size = data.get_shape().ndims
-    if data_shape_size == 3:
-        return tf.cast(tf.reduce_any(tf.not_equal(data, pad_val), axis=2), dtype=tf.int32)
-    elif data_shape_size == 2:
-        return tf.cast(tf.not_equal(data, pad_val), dtype=tf.int32)
-    elif data_shape_size == 1:
-        raise ValueError("target_mask_op: data has wrong shape!")
-    else:
-        raise ValueError("target_mask_op: handling data_shape_size %s hasn't been implemented!" % (data_shape_size))
-
-
-# Dynamic RNN
-class DynamicRNNLayer(Layer):
-    """
-    The :class:`DynamicRNNLayer` class is a Dynamic RNN layer, see ``tf.nn.dynamic_rnn``.
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    cell_fn : a TensorFlow's core RNN cell as follow (Note TF1.0+ and TF1.0- are different).
-        - see `RNN Cells in TensorFlow <https://www.tensorflow.org/api_docs/python/>`_
-    cell_init_args : a dictionary
-        The arguments for the cell initializer.
-    n_hidden : a int
-        The number of hidden units in the layer.
-    initializer : initializer
-        The initializer for initializing the parameters.
-    sequence_length : a tensor, array or None
-        The sequence length of each row of input data, see ``Advanced Ops for Dynamic RNN``.
-            - If None, it uses ``retrieve_seq_length_op`` to compute the sequence_length, i.e. when the features of padding (on right hand side) are all zeros.
-            - If using word embedding, you may need to compute the sequence_length from the ID array (the integer features before word embedding) by using ``retrieve_seq_length_op2`` or ``retrieve_seq_length_op``.
-            - You can also input an numpy array.
-            - More details about TensorFlow dynamic_rnn in `Wild-ML Blog <http://www.wildml.com/2016/08/rnns-in-tensorflow-a-practical-guide-and-undocumented-features/>`_.
-    initial_state : None or RNN State
-        If None, initial_state is zero_state.
-    dropout : `tuple` of `float`: (input_keep_prob, output_keep_prob).
-        The input and output keep probability.
-    n_layer : a int, default is 1.
-        The number of RNN layers.
-    return_last : boolean
-        - If True, return the last output, "Sequence input and single output"
-        - If False, return all outputs, "Synced sequence input and output"
-        - In other word, if you want to apply one or more RNN(s) on this layer, set to False.
-    return_seq_2d : boolean
-        - When return_last = False
-        - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer or computing cost after it.
-        - If False, return 3D Tensor [n_example/n_steps(max), n_steps(max), n_hidden], for stacking multiple RNN after it.
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Variables
-    ------------
-    outputs : a tensor
-        The output of this RNN.
-        return_last = False, outputs = all cell_output, which is the hidden state.
-            cell_output.get_shape() = (?, n_hidden)
-
-    final_state : a tensor or StateTuple
-        When state_is_tuple = False,
-        it is the final hidden and cell states, states.get_shape() = [?, 2 * n_hidden].\n
-        When state_is_tuple = True, it stores two elements: (c, h), in that order.
-        You can get the final state after each iteration during training, then
-        feed it to the initial state of next iteration.
-
-    initial_state : a tensor or StateTuple
-        It is the initial state of this RNN layer, you can use it to initialize
-        your state at the begining of each epoch or iteration according to your
-        training procedure.
-
-    sequence_length : a tensor or array, shape = [batch_size]
-        The sequence lengths computed by Advanced Opt or the given sequence lengths.
-
-    Notes
-    -----
-    Input dimension should be rank 3 : [batch_size, n_steps(max), n_features], if no, please see :class:`ReshapeLayer`.
-
-    Examples
-    --------
-    >>> input_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="input_seqs")
-    >>> network = tl.layers.EmbeddingInputlayer(
-    ...             inputs = input_seqs,
-    ...             vocabulary_size = vocab_size,
-    ...             embedding_size = embedding_size,
-    ...             name = 'seq_embedding')
-    >>> network = tl.layers.DynamicRNNLayer(network,
-    ...             cell_fn = tf.contrib.rnn.BasicLSTMCell, # for TF0.2 tf.nn.rnn_cell.BasicLSTMCell,
-    ...             n_hidden = embedding_size,
-    ...             dropout = 0.7,
-    ...             sequence_length = tl.layers.retrieve_seq_length_op2(input_seqs),
-    ...             return_seq_2d = True,     # stack denselayer or compute cost after it
-    ...             name = 'dynamic_rnn')
-    ... network = tl.layers.DenseLayer(network, n_units=vocab_size,
-    ...             act=tf.identity, name="output")
-
-    References
-    ----------
-    - `Wild-ML Blog <http://www.wildml.com/2016/08/rnns-in-tensorflow-a-practical-guide-and-undocumented-features/>`_
-    - `dynamic_rnn.ipynb <https://github.com/dennybritz/tf-rnn/blob/master/dynamic_rnn.ipynb>`_
-    - `tf.nn.dynamic_rnn <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.dynamic_rnn.md>`_
-    - `tflearn rnn <https://github.com/tflearn/tflearn/blob/master/tflearn/layers/recurrent.py>`_
-    - ``tutorial_dynamic_rnn.py``
-    """
-    def __init__(
-        self,
-        layer = None,
-        cell_fn = None,#tf.nn.rnn_cell.LSTMCell,
-        cell_init_args = {'state_is_tuple' : True},
-        n_hidden = 256,
-        initializer = tf.random_uniform_initializer(-0.1, 0.1),
-        sequence_length = None,
-        initial_state = None,
-        dropout = None,
-        n_layer = 1,
-        return_last = False,
-        return_seq_2d = False,
-        dynamic_rnn_init_args={},
-        name = 'dyrnn_layer',
-    ):
-        Layer.__init__(self, name=name)
-        if cell_fn is None:
-            raise Exception("Please put in cell_fn")
-        if 'GRU' in cell_fn.__name__:
-            try:
-                cell_init_args.pop('state_is_tuple')
-            except:
-                pass
-        self.inputs = layer.outputs
-
-        print("  [TL] DynamicRNNLayer %s: n_hidden:%d, in_dim:%d in_shape:%s cell_fn:%s dropout:%s n_layer:%d" % (self.name, n_hidden,
-             self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__, dropout, n_layer))
-
-        # Input dimension should be rank 3 [batch_size, n_steps(max), n_features]
-        try:
-            self.inputs.get_shape().with_rank(3)
-        except:
-            raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps(max), n_features]")
-
-        # Get the batch_size
-        fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0]
-        if fixed_batch_size.value:
-            batch_size = fixed_batch_size.value
-            print("       batch_size (concurrent processes): %d" % batch_size)
-        else:
-            from tensorflow.python.ops import array_ops
-            batch_size = array_ops.shape(self.inputs)[0]
-            print("       non specified batch_size, uses a tensor instead.")
-        self.batch_size = batch_size
-
-        # Creats the cell function
-        cell_instance_fn=lambda: cell_fn(num_units=n_hidden, **cell_init_args)
-        # self.cell = cell_fn(num_units=n_hidden, **cell_init_args)
-
-        # Apply dropout
-        if dropout:
-            if type(dropout) in [tuple, list]:
-                in_keep_prob = dropout[0]
-                out_keep_prob = dropout[1]
-            elif isinstance(dropout, float):
-                in_keep_prob, out_keep_prob = dropout, dropout
-            else:
-                raise Exception("Invalid dropout type (must be a 2-D tuple of "
-                                "float)")
-            try: # TF1.0
-                DropoutWrapper_fn = tf.contrib.rnn.DropoutWrapper
-            except:
-                DropoutWrapper_fn = tf.nn.rnn_cell.DropoutWrapper
-
-            cell_instance_fn1=cell_instance_fn
-            cell_instance_fn=DropoutWrapper_fn(
-                                cell_instance_fn1(),
-                                input_keep_prob=in_keep_prob,
-                                output_keep_prob=out_keep_prob)
-            # self.cell = DropoutWrapper_fn(
-            #           self.cell,
-            #           input_keep_prob=in_keep_prob,
-            #           output_keep_prob=out_keep_prob)
-        # Apply multiple layers
-        if n_layer > 1:
-            try:
-                MultiRNNCell_fn = tf.contrib.rnn.MultiRNNCell
-            except:
-                MultiRNNCell_fn = tf.nn.rnn_cell.MultiRNNCell
-
-            cell_instance_fn2=cell_instance_fn
-            try:
-                cell_instance_fn=lambda: MultiRNNCell_fn([cell_instance_fn2() for _ in range(n_layer)], state_is_tuple=True)
-                # self.cell = MultiRNNCell_fn([self.cell] * n_layer, state_is_tuple=True)
-            except:
-                cell_instance_fn=lambda: MultiRNNCell_fn([cell_instance_fn2() for _ in range(n_layer)])
-                # self.cell = MultiRNNCell_fn([self.cell] * n_layer)
-
-        self.cell=cell_instance_fn()
-        # Initialize initial_state
-        if initial_state is None:
-            self.initial_state = self.cell.zero_state(batch_size, dtype=tf.float32)#dtype="float")
-        else:
-            self.initial_state = initial_state
-
-        # Computes sequence_length
-        if sequence_length is None:
-            try: ## TF1.0
-                sequence_length = retrieve_seq_length_op(
-                            self.inputs if isinstance(self.inputs, tf.Tensor) else tf.stack(self.inputs))
-            except: ## TF0.12
-                sequence_length = retrieve_seq_length_op(
-                            self.inputs if isinstance(self.inputs, tf.Tensor) else tf.pack(self.inputs))
-
-        # Main - Computes outputs and last_states
-        with tf.variable_scope(name, initializer=initializer) as vs:
-            outputs, last_states = tf.nn.dynamic_rnn(
-                cell=self.cell,
-                # inputs=X
-                inputs = self.inputs,
-                # dtype=tf.float64,
-                sequence_length=sequence_length,
-                initial_state = self.initial_state,
-                **dynamic_rnn_init_args
-                )
-            rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
-
-            # print("     n_params : %d" % (len(rnn_variables)))
-            # Manage the outputs
-            if return_last:
-                # [batch_size, n_hidden]
-                # outputs = tf.transpose(tf.pack(outputs), [1, 0, 2]) # TF1.0 tf.pack --> tf.stack
-                self.outputs = advanced_indexing_op(outputs, sequence_length)
-            else:
-                # [batch_size, n_step(max), n_hidden]
-                # self.outputs = result[0]["outputs"]
-                # self.outputs = outputs    # it is 3d, but it is a list
-                if return_seq_2d:
-                    # PTB tutorial:
-                    # 2D Tensor [n_example, n_hidden]
-                    try: # TF1.0
-                        self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_hidden])
-                    except: # TF0.12
-                        self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_hidden])
-                else:
-                    # <akara>:
-                    # 3D Tensor [batch_size, n_steps(max), n_hidden]
-                    max_length = tf.shape(outputs)[1]
-                    batch_size = tf.shape(outputs)[0]
-
-
-                    try: # TF1.0
-                        self.outputs = tf.reshape(tf.concat(outputs, 1), [batch_size, max_length, n_hidden])
-                    except: # TF0.12
-                        self.outputs = tf.reshape(tf.concat(1, outputs), [batch_size, max_length, n_hidden])
-                    # self.outputs = tf.reshape(tf.concat(1, outputs), [-1, max_length, n_hidden])
-
-        # Final state
-        self.final_state = last_states
-
-        self.sequence_length = sequence_length
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( rnn_variables )
-
-# Bidirectional Dynamic RNN
-class BiDynamicRNNLayer(Layer):
-    """
-    The :class:`BiDynamicRNNLayer` class is a RNN layer, you can implement vanilla RNN,
-    LSTM and GRU with it.
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    cell_fn : a TensorFlow's core RNN cell as follow (Note TF1.0+ and TF1.0- are different).
-        - see `RNN Cells in TensorFlow <https://www.tensorflow.org/api_docs/python/>`_
-    cell_init_args : a dictionary
-        The arguments for the cell initializer.
-    n_hidden : a int
-        The number of hidden units in the layer.
-    initializer : initializer
-        The initializer for initializing the parameters.
-    sequence_length : a tensor, array or None
-        The sequence length of each row of input data, see ``Advanced Ops for Dynamic RNN``.
-            - If None, it uses ``retrieve_seq_length_op`` to compute the sequence_length, i.e. when the features of padding (on right hand side) are all zeros.
-            - If using word embedding, you may need to compute the sequence_length from the ID array (the integer features before word embedding) by using ``retrieve_seq_length_op2`` or ``retrieve_seq_length_op``.
-            - You can also input an numpy array.
-            - More details about TensorFlow dynamic_rnn in `Wild-ML Blog <http://www.wildml.com/2016/08/rnns-in-tensorflow-a-practical-guide-and-undocumented-features/>`_.
-    fw_initial_state : None or forward RNN State
-        If None, initial_state is zero_state.
-    bw_initial_state : None or backward RNN State
-        If None, initial_state is zero_state.
-    dropout : `tuple` of `float`: (input_keep_prob, output_keep_prob).
-        The input and output keep probability.
-    n_layer : a int, default is 1.
-        The number of RNN layers.
-    return_last : boolean
-        If True, return the last output, "Sequence input and single output"\n
-        If False, return all outputs, "Synced sequence input and output"\n
-        In other word, if you want to apply one or more RNN(s) on this layer, set to False.
-    return_seq_2d : boolean
-        - When return_last = False
-        - If True, return 2D Tensor [n_example, 2 * n_hidden], for stacking DenseLayer or computing cost after it.
-        - If False, return 3D Tensor [n_example/n_steps(max), n_steps(max), 2 * n_hidden], for stacking multiple RNN after it.
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Variables
-    -----------------------
-    outputs : a tensor
-        The output of this RNN.
-        return_last = False, outputs = all cell_output, which is the hidden state.
-            cell_output.get_shape() = (?, 2 * n_hidden)
-
-    fw(bw)_final_state : a tensor or StateTuple
-        When state_is_tuple = False,
-        it is the final hidden and cell states, states.get_shape() = [?, 2 * n_hidden].\n
-        When state_is_tuple = True, it stores two elements: (c, h), in that order.
-        You can get the final state after each iteration during training, then
-        feed it to the initial state of next iteration.
-
-    fw(bw)_initial_state : a tensor or StateTuple
-        It is the initial state of this RNN layer, you can use it to initialize
-        your state at the begining of each epoch or iteration according to your
-        training procedure.
-
-    sequence_length : a tensor or array, shape = [batch_size]
-        The sequence lengths computed by Advanced Opt or the given sequence lengths.
-
-    Notes
-    -----
-    Input dimension should be rank 3 : [batch_size, n_steps(max), n_features], if no, please see :class:`ReshapeLayer`.
-
-
-    References
-    ----------
-    - `Wild-ML Blog <http://www.wildml.com/2016/08/rnns-in-tensorflow-a-practical-guide-and-undocumented-features/>`_
-    - `bidirectional_rnn.ipynb <https://github.com/dennybritz/tf-rnn/blob/master/bidirectional_rnn.ipynb>`_
-    """
-    def __init__(
-        self,
-        layer = None,
-        cell_fn = None,#tf.nn.rnn_cell.LSTMCell,
-        cell_init_args = {'state_is_tuple':True},
-        n_hidden = 256,
-        initializer = tf.random_uniform_initializer(-0.1, 0.1),
-        sequence_length = None,
-        fw_initial_state = None,
-        bw_initial_state = None,
-        dropout = None,
-        n_layer = 1,
-        return_last = False,
-        return_seq_2d = False,
-        dynamic_rnn_init_args={},
-        name = 'bi_dyrnn_layer',
-    ):
-        Layer.__init__(self, name=name)
-        if cell_fn is None:
-            raise Exception("Please put in cell_fn")
-        if 'GRU' in cell_fn.__name__:
-            try:
-                cell_init_args.pop('state_is_tuple')
-            except:
-                pass
-        self.inputs = layer.outputs
-
-        print("  [TL] BiDynamicRNNLayer %s: n_hidden:%d in_dim:%d in_shape:%s cell_fn:%s dropout:%s n_layer:%d" %
-              (self.name, n_hidden, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__, dropout, n_layer))
-
-        # Input dimension should be rank 3 [batch_size, n_steps(max), n_features]
-        try:
-            self.inputs.get_shape().with_rank(3)
-        except:
-            raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps(max), n_features]")
-
-        # Get the batch_size
-        fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0]
-        if fixed_batch_size.value:
-            batch_size = fixed_batch_size.value
-            print("       batch_size (concurrent processes): %d" % batch_size)
-        else:
-            from tensorflow.python.ops import array_ops
-            batch_size = array_ops.shape(self.inputs)[0]
-            print("       non specified batch_size, uses a tensor instead.")
-        self.batch_size = batch_size
-
-        with tf.variable_scope(name, initializer=initializer) as vs:
-            # Creats the cell function
-            cell_instance_fn=lambda: cell_fn(num_units=n_hidden, **cell_init_args)
-            # self.fw_cell = cell_fn(num_units=n_hidden, **cell_init_args)
-            # self.bw_cell = cell_fn(num_units=n_hidden, **cell_init_args)
-
-            # Apply dropout
-            if dropout:
-                if type(dropout) in [tuple, list]:
-                    in_keep_prob = dropout[0]
-                    out_keep_prob = dropout[1]
-                elif isinstance(dropout, float):
-                    in_keep_prob, out_keep_prob = dropout, dropout
-                else:
-                    raise Exception("Invalid dropout type (must be a 2-D tuple of "
-                                    "float)")
-                try:
-                    DropoutWrapper_fn = tf.contrib.rnn.DropoutWrapper
-                except:
-                    DropoutWrapper_fn = tf.nn.rnn_cell.DropoutWrapper
-
-                    cell_instance_fn1=cell_instance_fn
-                    cell_instance_fn=lambda: DropoutWrapper_fn(
-                                        cell_instance_fn1(),
-                                        input_keep_prob=in_keep_prob,
-                                        output_keep_prob=out_keep_prob)
-
-                # self.fw_cell = DropoutWrapper_fn(
-                #     self.fw_cell,
-                #     input_keep_prob=in_keep_prob,
-                #     output_keep_prob=out_keep_prob)
-                # self.bw_cell = DropoutWrapper_fn(
-                #     self.bw_cell,
-                #     input_keep_prob=in_keep_prob,
-                #     output_keep_prob=out_keep_prob)
-            # Apply multiple layers
-            if n_layer > 1:
-                try:
-                    MultiRNNCell_fn = tf.contrib.rnn.MultiRNNCell
-                except:
-                    MultiRNNCell_fn = tf.nn.rnn_cell.MultiRNNCell
-
-                cell_instance_fn2=cell_instance_fn
-                cell_instance_fn=lambda: MultiRNNCell_fn([cell_instance_fn2() for _ in range(n_layer)])
-                # self.fw_cell = MultiRNNCell_fn([self.fw_cell] * n_layer)
-                # self.bw_cell = MultiRNNCell_fn([self.bw_cell] * n_layer)
-            self.fw_cell=cell_instance_fn()
-            self.bw_cell=cell_instance_fn()
-            # Initial state of RNN
-            if fw_initial_state is None:
-                self.fw_initial_state = self.fw_cell.zero_state(self.batch_size, dtype=tf.float32)
-            else:
-                self.fw_initial_state = fw_initial_state
-            if bw_initial_state is None:
-                self.bw_initial_state = self.bw_cell.zero_state(self.batch_size, dtype=tf.float32)
-            else:
-                self.bw_initial_state = bw_initial_state
-            # Computes sequence_length
-            if sequence_length is None:
-                try: ## TF1.0
-                    sequence_length = retrieve_seq_length_op(
-                        self.inputs if isinstance(self.inputs, tf.Tensor) else tf.stack(self.inputs))
-                except: ## TF0.12
-                    sequence_length = retrieve_seq_length_op(
-                        self.inputs if isinstance(self.inputs, tf.Tensor) else tf.pack(self.inputs))
-
-            outputs, (states_fw, states_bw) = tf.nn.bidirectional_dynamic_rnn(
-                cell_fw=self.fw_cell,
-                cell_bw=self.bw_cell,
-                inputs=self.inputs,
-                sequence_length=sequence_length,
-                initial_state_fw=self.fw_initial_state,
-                initial_state_bw=self.bw_initial_state,
-                **dynamic_rnn_init_args
-            )
-            rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
-
-            print("     n_params : %d" % (len(rnn_variables)))
-            # Manage the outputs
-            try: # TF1.0
-                outputs = tf.concat(outputs, 2)
-            except: # TF0.12
-                outputs = tf.concat(2, outputs)
-            if return_last:
-                # [batch_size, 2 * n_hidden]
-                self.outputs = advanced_indexing_op(outputs, sequence_length)
-            else:
-                # [batch_size, n_step(max), 2 * n_hidden]
-                if return_seq_2d:
-                    # PTB tutorial:
-                    # 2D Tensor [n_example, 2 * n_hidden]
-                    try: # TF1.0
-                        self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, 2 * n_hidden])
-                    except: # TF0.12
-                        self.outputs = tf.reshape(tf.concat(1, outputs), [-1, 2 * n_hidden])
-                else:
-                    # <akara>:
-                    # 3D Tensor [batch_size, n_steps(max), 2 * n_hidden]
-                    max_length = tf.shape(outputs)[1]
-                    batch_size = tf.shape(outputs)[0]
-                    try: # TF1.0
-                        self.outputs = tf.reshape(tf.concat(outputs, 1), [batch_size, max_length, 2 * n_hidden])
-                    except: # TF0.12
-                        self.outputs = tf.reshape(tf.concat(1, outputs), [batch_size, max_length, 2 * n_hidden])
-                    # self.outputs = tf.reshape(tf.concat(1, outputs), [-1, max_length, 2 * n_hidden])
-
-        # Final state
-        self.fw_final_states = states_fw
-        self.bw_final_states = states_bw
-
-        self.sequence_length = sequence_length
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( rnn_variables )
-
-# Seq2seq
-class Seq2Seq(Layer):
-    """
-    The :class:`Seq2Seq` class is a simple :class:`DynamicRNNLayer` based Seq2seq layer,
-    both encoder and decoder are :class:`DynamicRNNLayer`, network details
-    see `Model <https://camo.githubusercontent.com/242210d7d0151cae91107ee63bff364a860db5dd/687474703a2f2f6936342e74696e797069632e636f6d2f333031333674652e706e67>`_
-    and `Sequence to Sequence Learning with Neural Networks <https://arxiv.org/abs/1409.3215>`_ .
-
-    Parameters
-    ----------
-    net_encode_in : a :class:`Layer` instance
-        Encode sequences, [batch_size, None, n_features].
-    net_decode_in : a :class:`Layer` instance
-        Decode sequences, [batch_size, None, n_features].
-    cell_fn : a TensorFlow's core RNN cell as follow (Note TF1.0+ and TF1.0- are different).
-        - see `RNN Cells in TensorFlow <https://www.tensorflow.org/api_docs/python/>`_
-    cell_init_args : a dictionary
-        The arguments for the cell initializer.
-    n_hidden : a int
-        The number of hidden units in the layer.
-    initializer : initializer
-        The initializer for initializing the parameters.
-    encode_sequence_length : tensor for encoder sequence length, see :class:`DynamicRNNLayer` .
-    decode_sequence_length : tensor for decoder sequence length, see :class:`DynamicRNNLayer` .
-    initial_state : None or forward RNN State
-        If None, initial_state is of encoder zero_state.
-    dropout : `tuple` of `float`: (input_keep_prob, output_keep_prob).
-        The input and output keep probability.
-    n_layer : a int, default is 1.
-        The number of RNN layers.
-    return_seq_2d : boolean
-        - When return_last = False
-        - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer or computing cost after it.
-        - If False, return 3D Tensor [n_example/n_steps(max), n_steps(max), n_hidden], for stacking multiple RNN after it.
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Variables
-    ------------
-    outputs : a tensor
-        The output of RNN decoder.
-
-    final_state : a tensor or StateTuple
-        Final state of decoder, see :class:`DynamicRNNLayer` .
-
-    Examples
-    ----------
-    >>> from tensorlayer.layers import *
-    >>> batch_size = 32
-    >>> encode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="encode_seqs")
-    >>> decode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="decode_seqs")
-    >>> target_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_seqs")
-    >>> target_mask = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_mask") # tl.prepro.sequences_get_mask()
-    >>> with tf.variable_scope("model") as vs:#, reuse=reuse):
-    ...     # for chatbot, you can use the same embedding layer,
-    ...     # for translation, you may want to use 2 seperated embedding layers
-    >>>     net_encode = EmbeddingInputlayer(
-    ...             inputs = encode_seqs,
-    ...             vocabulary_size = 10000,
-    ...             embedding_size = 200,
-    ...             name = 'seq_embedding')
-    >>>     vs.reuse_variables()
-    >>>     tl.layers.set_name_reuse(True)
-    >>>     net_decode = EmbeddingInputlayer(
-    ...             inputs = decode_seqs,
-    ...             vocabulary_size = 10000,
-    ...             embedding_size = 200,
-    ...             name = 'seq_embedding')
-    >>>     net = Seq2Seq(net_encode, net_decode,
-    ...             cell_fn = tf.nn.rnn_cell.LSTMCell,
-    ...             n_hidden = 200,
-    ...             initializer = tf.random_uniform_initializer(-0.1, 0.1),
-    ...             encode_sequence_length = retrieve_seq_length_op2(encode_seqs),
-    ...             decode_sequence_length = retrieve_seq_length_op2(decode_seqs),
-    ...             initial_state = None,
-    ...             dropout = None,
-    ...             n_layer = 1,
-    ...             return_seq_2d = True,
-    ...             name = 'seq2seq')
-    >>> net_out = DenseLayer(net, n_units=10000, act=tf.identity, name='output')
-    >>> e_loss = tl.cost.cross_entropy_seq_with_mask(logits=net_out.outputs, target_seqs=target_seqs, input_mask=target_mask, return_details=False, name='cost')
-    >>> y = tf.nn.softmax(net_out.outputs)
-    >>> net_out.print_params(False)
-
-    Notes
-    --------
-    - How to feed data: `Sequence to Sequence Learning with Neural Networks <https://arxiv.org/pdf/1409.3215v3.pdf>`_
-    - input_seqs : ``['how', 'are', 'you', '<PAD_ID'>]``
-    - decode_seqs : ``['<START_ID>', 'I', 'am', 'fine', '<PAD_ID'>]``
-    - target_seqs : ``['I', 'am', 'fine', '<END_ID']``
-    - target_mask : ``[1, 1, 1, 1, 0]``
-    - related functions : tl.prepro <pad_sequences, precess_sequences, sequences_add_start_id, sequences_get_mask>
-    """
-    def __init__(
-        self,
-        net_encode_in = None,
-        net_decode_in = None,
-        cell_fn = None,#tf.nn.rnn_cell.LSTMCell,
-        cell_init_args = {'state_is_tuple':True},
-        n_hidden = 256,
-        initializer = tf.random_uniform_initializer(-0.1, 0.1),
-        encode_sequence_length = None,
-        decode_sequence_length = None,
-        initial_state = None,
-        dropout = None,
-        n_layer = 1,
-        # return_last = False,
-        return_seq_2d = False,
-        name = 'seq2seq',
-    ):
-        Layer.__init__(self, name=name)
-        if cell_fn is None:
-            raise Exception("Please put in cell_fn")
-        if 'GRU' in cell_fn.__name__:
-            try:
-                cell_init_args.pop('state_is_tuple')
-            except:
-                pass
-        # self.inputs = layer.outputs
-        print("  [**] Seq2Seq %s: n_hidden:%d cell_fn:%s dropout:%s n_layer:%d" %
-              (self.name, n_hidden, cell_fn.__name__, dropout, n_layer))
-
-        with tf.variable_scope(name) as vs:#, reuse=reuse):
-            # tl.layers.set_name_reuse(reuse)
-            # network = InputLayer(self.inputs, name=name+'/input')
-            network_encode = DynamicRNNLayer(net_encode_in,
-                     cell_fn = cell_fn,
-                     cell_init_args = cell_init_args,
-                     n_hidden = n_hidden,
-                     initial_state = initial_state,
-                     dropout = dropout,
-                     n_layer = n_layer,
-                     sequence_length = encode_sequence_length,
-                     return_last = False,
-                     return_seq_2d = True,
-                     name = name+'_encode')
-            # vs.reuse_variables()
-            # tl.layers.set_name_reuse(True)
-            network_decode = DynamicRNNLayer(net_decode_in,
-                     cell_fn = cell_fn,
-                     cell_init_args = cell_init_args,
-                     n_hidden = n_hidden,
-                     initial_state = network_encode.final_state,
-                     dropout = dropout,
-                     n_layer = n_layer,
-                     sequence_length = decode_sequence_length,
-                     return_last = False,
-                     return_seq_2d = return_seq_2d,
-                     name = name+'_decode')
-            self.outputs = network_decode.outputs
-
-            rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
-
-        # Final state
-        self.final_state = network_decode.final_state
-
-        # self.sequence_length = sequence_length
-        self.all_layers = list(network_decode.all_layers)
-        self.all_params = list(network_decode.all_params)
-        self.all_drop = dict(network_decode.all_drop)
-
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( rnn_variables )
-
-        self.all_layers = list_remove_repeat(self.all_layers)
-        self.all_params = list_remove_repeat(self.all_params)
-
-class PeekySeq2Seq(Layer):
-    """
-    Waiting for contribution.
-    The :class:`PeekySeq2Seq` class, see `Model <https://camo.githubusercontent.com/7f690d451036938a51e62feb77149c8bb4be6675/687474703a2f2f6936342e74696e797069632e636f6d2f333032617168692e706e67>`_
-    and `Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation <https://arxiv.org/abs/1406.1078>`_ .
-    """
-    def __init__(
-        self,
-        net_encode_in = None,
-        net_decode_in = None,
-        cell_fn = None,#tf.nn.rnn_cell.LSTMCell,
-        cell_init_args = {'state_is_tuple':True},
-        n_hidden = 256,
-        initializer = tf.random_uniform_initializer(-0.1, 0.1),
-        in_sequence_length = None,
-        out_sequence_length = None,
-        initial_state = None,
-        dropout = None,
-        n_layer = 1,
-        # return_last = False,
-        return_seq_2d = False,
-        name = 'peeky_seq2seq',
-    ):
-        Layer.__init__(self, name=name)
-        if cell_fn is None:
-            raise Exception("Please put in cell_fn")
-        # self.inputs = layer.outputs
-        print("  [TL] PeekySeq2seq %s: n_hidden:%d cell_fn:%s dropout:%s n_layer:%d" %
-              (self.name, n_hidden, cell_fn.__name__, dropout, n_layer))
-
-class AttentionSeq2Seq(Layer):
-    """
-    Waiting for contribution.
-    The :class:`AttentionSeq2Seq` class, see `Model <https://camo.githubusercontent.com/0e2e4e5fb2dd47846c2fe027737a5df5e711df1b/687474703a2f2f6936342e74696e797069632e636f6d2f6132727733642e706e67>`_
-    and `Neural Machine Translation by Jointly Learning to Align and Translate <https://arxiv.org/pdf/1409.0473v6.pdf>`_ .
-    """
-    def __init__(
-        self,
-        net_encode_in = None,
-        net_decode_in = None,
-        cell_fn = None,#tf.nn.rnn_cell.LSTMCell,
-        cell_init_args = {'state_is_tuple':True},
-        n_hidden = 256,
-        initializer = tf.random_uniform_initializer(-0.1, 0.1),
-        in_sequence_length = None,
-        out_sequence_length = None,
-        initial_state = None,
-        dropout = None,
-        n_layer = 1,
-        # return_last = False,
-        return_seq_2d = False,
-        name = 'attention_seq2seq',
-    ):
-        Layer.__init__(self, name=name)
-        if cell_fn is None:
-            raise Exception("Please put in cell_fn")
-        # self.inputs = layer.outputs
-        print("  [TL] PeekySeq2seq %s: n_hidden:%d cell_fn:%s dropout:%s n_layer:%d" %
-              (self.name, n_hidden, cell_fn.__name__, dropout, n_layer))
-
-## Shape layer
-class FlattenLayer(Layer):
-    """
-    The :class:`FlattenLayer` class is layer which reshape high-dimension
-    input to a vector. Then we can apply DenseLayer, RNNLayer, ConcatLayer and
-    etc on the top of it.
-
-    [batch_size, mask_row, mask_col, n_mask] ---> [batch_size, mask_row * mask_col * n_mask]
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Examples
-    --------
-    >>> x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
-    >>> network = tl.layers.InputLayer(x, name='input_layer')
-    >>> network = tl.layers.Conv2dLayer(network,
-    ...                    act = tf.nn.relu,
-    ...                    shape = [5, 5, 32, 64],
-    ...                    strides=[1, 1, 1, 1],
-    ...                    padding='SAME',
-    ...                    name ='cnn_layer')
-    >>> network = tl.layers.Pool2dLayer(network,
-    ...                    ksize=[1, 2, 2, 1],
-    ...                    strides=[1, 2, 2, 1],
-    ...                    padding='SAME',
-    ...                    pool = tf.nn.max_pool,
-    ...                    name ='pool_layer',)
-    >>> network = tl.layers.FlattenLayer(network, name='flatten_layer')
-    """
-    def __init__(
-        self,
-        layer = None,
-        name ='flatten_layer',
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = layer.outputs
-        self.outputs = flatten_reshape(self.inputs, name=name)
-        self.n_units = int(self.outputs.get_shape()[-1])
-        print("  [TL] FlattenLayer %s: %d" % (self.name, self.n_units))
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-
-class ReshapeLayer(Layer):
-    """
-    The :class:`ReshapeLayer` class is layer which reshape the tensor.
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    shape : a list
-        The output shape.
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Examples
-    --------
-    - The core of this layer is ``tf.reshape``.
-    - Use TensorFlow only :
-    >>> x = tf.placeholder(tf.float32, shape=[None, 3])
-    >>> y = tf.reshape(x, shape=[-1, 3, 3])
-    >>> sess = tf.InteractiveSession()
-    >>> print(sess.run(y, feed_dict={x:[[1,1,1],[2,2,2],[3,3,3],[4,4,4],[5,5,5],[6,6,6]]}))
-    ... [[[ 1.  1.  1.]
-    ... [ 2.  2.  2.]
-    ... [ 3.  3.  3.]]
-    ... [[ 4.  4.  4.]
-    ... [ 5.  5.  5.]
-    ... [ 6.  6.  6.]]]
-    """
-    def __init__(
-        self,
-        layer = None,
-        shape = [],
-        name ='reshape_layer',
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = layer.outputs
-        self.outputs = tf.reshape(self.inputs, shape=shape, name=name)
-        print("  [TL] ReshapeLayer %s: %s" % (self.name, self.outputs.get_shape()))
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-
-class LambdaLayer(Layer):
-    """
-    The :class:`LambdaLayer` class is a layer which is able to use the provided function.
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    fn : a function
-        The function that applies to the outputs of previous layer.
-    fn_args : a dictionary
-        The arguments for the function (option).
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Examples
-    ---------
-    >>> x = tf.placeholder(tf.float32, shape=[None, 1], name='x')
-    >>> network = tl.layers.InputLayer(x, name='input_layer')
-    >>> network = LambdaLayer(network, lambda x: 2*x, name='lambda_layer')
-    >>> y = network.outputs
-    >>> sess = tf.InteractiveSession()
-    >>> out = sess.run(y, feed_dict={x : [[1],[2]]})
-    ... [[2],[4]]
-    """
-    def __init__(
-        self,
-        layer = None,
-        fn = None,
-        fn_args = {},
-        name = 'lambda_layer',
-    ):
-        Layer.__init__(self, name=name)
-        assert layer is not None
-        assert fn is not None
-        self.inputs = layer.outputs
-        print("  [TL] LambdaLayer  %s" % self.name)
-        with tf.variable_scope(name) as vs:
-            self.outputs = fn(self.inputs, **fn_args)
-            variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( variables )
-
-## Merge layer
-class ConcatLayer(Layer):
-    """
-    The :class:`ConcatLayer` class is layer which concat (merge) two or more
-    :class:`DenseLayer` to a single class:`DenseLayer`.
-
-    Parameters
-    ----------
-    layer : a list of :class:`Layer` instances
-        The `Layer` class feeding into this layer.
-    concat_dim : int
-        Dimension along which to concatenate.
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Examples
-    --------
-    >>> sess = tf.InteractiveSession()
-    >>> x = tf.placeholder(tf.float32, shape=[None, 784])
-    >>> inputs = tl.layers.InputLayer(x, name='input_layer')
-    >>> net1 = tl.layers.DenseLayer(inputs, n_units=800, act = tf.nn.relu, name='relu1_1')
-    >>> net2 = tl.layers.DenseLayer(inputs, n_units=300, act = tf.nn.relu, name='relu2_1')
-    >>> network = tl.layers.ConcatLayer(layer = [net1, net2], name ='concat_layer')
-    ...     [TL] InputLayer input_layer (?, 784)
-    ...     [TL] DenseLayer relu1_1: 800, <function relu at 0x1108e41e0>
-    ...     [TL] DenseLayer relu2_1: 300, <function relu at 0x1108e41e0>
-    ...     [TL] ConcatLayer concat_layer, 1100
-    ...
-    >>> tl.layers.initialize_global_variables(sess)
-    >>> network.print_params()
-    ...     param 0: (784, 800) (mean: 0.000021, median: -0.000020 std: 0.035525)
-    ...     param 1: (800,) (mean: 0.000000, median: 0.000000 std: 0.000000)
-    ...     param 2: (784, 300) (mean: 0.000000, median: -0.000048 std: 0.042947)
-    ...     param 3: (300,) (mean: 0.000000, median: 0.000000 std: 0.000000)
-    ...     num of params: 863500
-    >>> network.print_layers()
-    ...     layer 0: Tensor("Relu:0", shape=(?, 800), dtype=float32)
-    ...     layer 1: Tensor("Relu_1:0", shape=(?, 300), dtype=float32)
-    ...
-    """
-    def __init__(
-        self,
-        layer = [],
-        concat_dim = 1,
-        name ='concat_layer',
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = []
-        for l in layer:
-            self.inputs.append(l.outputs)
-        try: # TF1.0
-            self.outputs = tf.concat(self.inputs, concat_dim, name=name)
-        except: # TF0.12
-            self.outputs = tf.concat(concat_dim, self.inputs, name=name)
-        self.n_units = int(self.outputs.get_shape()[-1])
-        print("  [TL] ConcatLayer %s: %d" % (self.name, self.n_units))
-
-        self.all_layers = list(layer[0].all_layers)
-        self.all_params = list(layer[0].all_params)
-        self.all_drop = dict(layer[0].all_drop)
-
-        for i in range(1, len(layer)):
-            self.all_layers.extend(list(layer[i].all_layers))
-            self.all_params.extend(list(layer[i].all_params))
-            self.all_drop.update(dict(layer[i].all_drop))
-
-        self.all_layers = list_remove_repeat(self.all_layers)
-        self.all_params = list_remove_repeat(self.all_params)
-        #self.all_drop = list_remove_repeat(self.all_drop) # it is a dict
-
-class ElementwiseLayer(Layer):
-    """
-    The :class:`ElementwiseLayer` class combines multiple :class:`Layer` which have the same output shapes by a given elemwise-wise operation.
-
-    Parameters
-    ----------
-    layer : a list of :class:`Layer` instances
-        The `Layer` class feeding into this layer.
-    combine_fn : a TensorFlow elemwise-merge function
-        e.g. AND is ``tf.minimum`` ;  OR is ``tf.maximum`` ; ADD is ``tf.add`` ; MUL is ``tf.multiply`` and so on.
-        See `TensorFlow Math API <https://www.tensorflow.org/versions/master/api_docs/python/math_ops.html#math>`_ .
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Examples
-    --------
-    - AND Logic
-    >>> net_0 = tl.layers.DenseLayer(net_0, n_units=500,
-    ...                        act = tf.nn.relu, name='net_0')
-    >>> net_1 = tl.layers.DenseLayer(net_1, n_units=500,
-    ...                        act = tf.nn.relu, name='net_1')
-    >>> net_com = tl.layers.ElementwiseLayer(layer = [net_0, net_1],
-    ...                         combine_fn = tf.minimum,
-    ...                         name = 'combine_layer')
-    """
-    def __init__(
-        self,
-        layer = [],
-        combine_fn = tf.minimum,
-        name ='elementwise_layer',
-    ):
-        Layer.__init__(self, name=name)
-
-        print("  [TL] ElementwiseLayer %s: size:%s fn:%s" % (self.name, layer[0].outputs.get_shape(), combine_fn.__name__))
-
-        self.outputs = layer[0].outputs
-        # print(self.outputs._shape, type(self.outputs._shape))
-        for l in layer[1:]:
-            assert str(self.outputs.get_shape()) == str(l.outputs.get_shape()), "Hint: the input shapes should be the same. %s != %s" %  (self.outputs.get_shape() , str(l.outputs.get_shape()))
-            self.outputs = combine_fn(self.outputs, l.outputs, name=name)
-
-        self.all_layers = list(layer[0].all_layers)
-        self.all_params = list(layer[0].all_params)
-        self.all_drop = dict(layer[0].all_drop)
-
-        for i in range(1, len(layer)):
-            self.all_layers.extend(list(layer[i].all_layers))
-            self.all_params.extend(list(layer[i].all_params))
-            self.all_drop.update(dict(layer[i].all_drop))
-
-        self.all_layers = list_remove_repeat(self.all_layers)
-        self.all_params = list_remove_repeat(self.all_params)
-        # self.all_drop = list_remove_repeat(self.all_drop)
-
-# Extend
-class ExpandDimsLayer(Layer):
-    """
-    The :class:`ExpandDimsLayer` class inserts a dimension of 1 into a tensor's shape,
-    see `tf.expand_dims() <https://www.tensorflow.org/api_docs/python/array_ops/shapes_and_shaping#expand_dims>`_ .
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    axis : int, 0-D (scalar).
-        Specifies the dimension index at which to expand the shape of input.
-    name : a string or None
-        An optional name to attach to this layer.
-    """
-    def __init__(
-        self,
-        layer = None,
-        axis = None,
-        name = 'expand_dims',
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = layer.outputs
-
-        print("  [TL] ExpandDimsLayer  %s: axis:%d" % (self.name, axis))
-        with tf.variable_scope(name) as vs:
-            try:    # TF12 TF1.0
-                self.outputs = tf.expand_dims(self.inputs, axis=axis)
-            except: # TF11
-                self.outputs = tf.expand_dims(self.inputs, dim=axis)
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        # self.all_params.extend( variables )
-
-class TileLayer(Layer):
-    """
-    The :class:`TileLayer` class constructs a tensor by tiling a given tensor,
-    see `tf.tile() <https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#tile>`_ .
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    multiples: a list of int
-        Must be one of the following types: int32, int64. 1-D. Length must be the same as the number of dimensions in input
-    name : a string or None
-        An optional name to attach to this layer.
-    """
-    def __init__(
-        self,
-        layer = None,
-        multiples = None,
-        name = 'tile',
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = layer.outputs
-
-        print("  [TL] TileLayer  %s: multiples:%s" % (self.name, multiples))
-        with tf.variable_scope(name) as vs:
-            self.outputs = tf.tile(self.inputs, multiples=multiples)
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        # self.all_params.extend( variables )
-
-## TF-Slim layer
-class SlimNetsLayer(Layer):
-    """
-    The :class:`SlimNetsLayer` class can be used to merge all TF-Slim nets into
-    TensorLayer. Model can be found in `slim-model <https://github.com/tensorflow/models/tree/master/slim#pre-trained-models>`_ , more about slim
-    see `slim-git <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim>`_ .
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    slim_layer : a slim network function
-        The network you want to stack onto, end with ``return net, end_points``.
-    slim_args : dictionary
-        The arguments for the slim model.
-    name : a string or None
-        An optional name to attach to this layer.
-
-    Examples
-    --------
-    - see Inception V3 example on `Github <https://github.com/zsdonghao/tensorlayer>`_
-
-    Notes
-    -----
-    The due to TF-Slim stores the layers as dictionary, the ``all_layers`` in this
-    network is not in order ! Fortunately, the ``all_params`` are in order.
-    """
-    def __init__(
-        self,
-        layer = None,
-        slim_layer = None,
-        slim_args = {},
-        name ='tfslim_layer',
-    ):
-        Layer.__init__(self, name=name)
-        assert slim_layer is not None
-        assert slim_args is not None
-        self.inputs = layer.outputs
-        print("  [TL] SlimNetsLayer %s: %s" % (self.name, slim_layer.__name__))
-
-        # with tf.variable_scope(name) as vs:
-        #     net, end_points = slim_layer(self.inputs, **slim_args)
-        #     slim_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
-
-        net, end_points = slim_layer(self.inputs, **slim_args)
-
-        slim_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=name)
-        if slim_variables == []:
-            print("No variables found under %s : the name of SlimNetsLayer should be matched with the begining of the ckpt file, see tutorial_inceptionV3_tfslim.py for more details" % name)
-
-
-        self.outputs = net
-
-        slim_layers = []
-        for v in end_points.values():
-            # tf.contrib.layers.summaries.summarize_activation(v)
-            slim_layers.append(v)
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-
-        self.all_layers.extend( slim_layers )
-        self.all_params.extend( slim_variables )
-
-## Keras layer
-class KerasLayer(Layer):
-    """
-    The :class:`KerasLayer` class can be used to merge all Keras layers into
-    TensorLayer. Example can be found here `tutorial_keras.py <https://github.com/zsdonghao/tensorlayer/blob/master/example/tutorial_keras.py>`_
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    keras_layer : a keras network function
-    keras_args : dictionary
-        The arguments for the keras model.
-    name : a string or None
-        An optional name to attach to this layer.
-    """
-    def __init__(
-        self,
-        layer = None,
-        keras_layer = None,
-        keras_args = {},
-        name ='keras_layer',
-    ):
-        Layer.__init__(self, name=name)
-        assert layer is not None
-        assert keras_layer is not None
-        self.inputs = layer.outputs
-        print("  [TL] KerasLayer %s: %s" % (self.name, keras_layer))
-        print("       This API will be removed, please use LambdaLayer instead.")
-        with tf.variable_scope(name) as vs:
-            self.outputs = keras_layer(self.inputs, **keras_args)
-            variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( variables )
-
-## Estimator layer
-class EstimatorLayer(Layer):
-    """
-    The :class:`EstimatorLayer` class accepts ``model_fn`` that described the model.
-    It is similar with :class:`KerasLayer`, see `tutorial_keras.py <https://github.com/zsdonghao/tensorlayer/blob/master/example/tutorial_keras.py>`_
-
-    Parameters
-    ----------
-    layer : a :class:`Layer` instance
-        The `Layer` class feeding into this layer.
-    model_fn : a function that described the model.
-    args : dictionary
-        The arguments for the model_fn.
-    name : a string or None
-        An optional name to attach to this layer.
-    """
-    def __init__(
-        self,
-        layer = None,
-        model_fn = None,
-        args = {},
-        name ='estimator_layer',
-    ):
-        Layer.__init__(self, name=name)
-        assert layer is not None
-        assert model_fn is not None
-        self.inputs = layer.outputs
-        print("  [TL] EstimatorLayer %s: %s" % (self.name, model_fn))
-        print("       This API will be removed, please use LambdaLayer instead.")
-        with tf.variable_scope(name) as vs:
-            self.outputs = model_fn(self.inputs, **args)
-            variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( variables )
-
-## Special activation
-class PReluLayer(Layer):
-    """
-    The :class:`PReluLayer` class is Parametric Rectified Linear layer.
-
-    Parameters
-    ----------
-    x : A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`,
-        `int16`, or `int8`.
-    channel_shared : `bool`. Single weight is shared by all channels
-    a_init : alpha initializer, default zero constant.
-        The initializer for initializing the alphas.
-    a_init_args : dictionary
-        The arguments for the weights initializer.
-    name : A name for this activation op (optional).
-
-    References
-    -----------
-    - `Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification <http://arxiv.org/pdf/1502.01852v1.pdf>`_
-    """
-    def __init__(
-        self,
-        layer = None,
-        channel_shared = False,
-        a_init = tf.constant_initializer(value=0.0),
-        a_init_args = {},
-        # restore = True,
-        name="prelu_layer"
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = layer.outputs
-        print("  [TL] PReluLayer %s: channel_shared:%s" % (self.name, channel_shared))
-        if channel_shared:
-            w_shape = (1,)
-        else:
-            w_shape = int(self.inputs.get_shape()[-1])
-
-        # with tf.name_scope(name) as scope:
-        with tf.variable_scope(name) as vs:
-            alphas = tf.get_variable(name='alphas', shape=w_shape, initializer=a_init, **a_init_args )
-            try:  ## TF 1.0
-                self.outputs = tf.nn.relu(self.inputs) + tf.multiply(alphas, (self.inputs - tf.abs(self.inputs))) * 0.5
-            except: ## TF 0.12
-                self.outputs = tf.nn.relu(self.inputs) + tf.mul(alphas, (self.inputs - tf.abs(self.inputs))) * 0.5
-
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( [alphas] )
-
-## Flow control layer
-class MultiplexerLayer(Layer):
-    """
-    The :class:`MultiplexerLayer` selects one of several input and forwards the selected input into the output,
-    see `tutorial_mnist_multiplexer.py`.
-
-    Parameters
-    ----------
-    layer : a list of :class:`Layer` instances
-        The `Layer` class feeding into this layer.
-    name : a string or None
-        An optional name to attach to this layer.
-
-
-    Variables
-    -----------------------
-    sel : a placeholder
-        Input an int [0, inf], which input is the output
-
-    Examples
-    --------
-    >>> x = tf.placeholder(tf.float32, shape=[None, 784], name='x')
-    >>> y_ = tf.placeholder(tf.int64, shape=[None, ], name='y_')
-    >>> # define the network
-    >>> net_in = tl.layers.InputLayer(x, name='input_layer')
-    >>> net_in = tl.layers.DropoutLayer(net_in, keep=0.8, name='drop1')
-    >>> # net 0
-    >>> net_0 = tl.layers.DenseLayer(net_in, n_units=800,
-    ...                                act = tf.nn.relu, name='net0/relu1')
-    >>> net_0 = tl.layers.DropoutLayer(net_0, keep=0.5, name='net0/drop2')
-    >>> net_0 = tl.layers.DenseLayer(net_0, n_units=800,
-    ...                                act = tf.nn.relu, name='net0/relu2')
-    >>> # net 1
-    >>> net_1 = tl.layers.DenseLayer(net_in, n_units=800,
-    ...                                act = tf.nn.relu, name='net1/relu1')
-    >>> net_1 = tl.layers.DropoutLayer(net_1, keep=0.8, name='net1/drop2')
-    >>> net_1 = tl.layers.DenseLayer(net_1, n_units=800,
-    ...                                act = tf.nn.relu, name='net1/relu2')
-    >>> net_1 = tl.layers.DropoutLayer(net_1, keep=0.8, name='net1/drop3')
-    >>> net_1 = tl.layers.DenseLayer(net_1, n_units=800,
-    ...                                act = tf.nn.relu, name='net1/relu3')
-    >>> # multiplexer
-    >>> net_mux = tl.layers.MultiplexerLayer(layer = [net_0, net_1], name='mux_layer')
-    >>> network = tl.layers.ReshapeLayer(net_mux, shape=[-1, 800], name='reshape_layer') #
-    >>> network = tl.layers.DropoutLayer(network, keep=0.5, name='drop3')
-    >>> # output layer
-    >>> network = tl.layers.DenseLayer(network, n_units=10,
-    ...                                act = tf.identity, name='output_layer')
-
-    References
-    ------------
-    - See ``tf.pack() for TF0.12 or tf.stack() for TF1.0`` and ``tf.gather()`` at `TensorFlow - Slicing and Joining <https://www.tensorflow.org/versions/master/api_docs/python/array_ops.html#slicing-and-joining>`_
-    """
-    def __init__(self,
-               layer = [],
-               name='mux_layer'):
-        Layer.__init__(self, name=name)
-        self.n_inputs = len(layer)
-
-        self.inputs = []
-        for l in layer:
-            self.inputs.append(l.outputs)
-        try: ## TF1.0
-            all_inputs = tf.stack(self.inputs, name=name) # pack means concat a list of tensor in a new dim  # 1.2
-        except:
-            all_inputs = tf.pack(self.inputs, name=name) # pack means concat a list of tensor in a new dim  # 1.2
-
-        print("  [TL] MultiplexerLayer %s: n_inputs:%d" % (self.name, self.n_inputs))
-
-        self.sel = tf.placeholder(tf.int32)
-        self.outputs = tf.gather(all_inputs, self.sel, name=name) # [sel, :, : ...] # 1.2
-
-        # print(self.outputs, vars(self.outputs))
-        #         # tf.reshape(self.outputs, shape=)
-        # exit()
-        # the same with ConcatLayer
-        self.all_layers = list(layer[0].all_layers)
-        self.all_params = list(layer[0].all_params)
-        self.all_drop = dict(layer[0].all_drop)
-
-        for i in range(1, len(layer)):
-            self.all_layers.extend(list(layer[i].all_layers))
-            self.all_params.extend(list(layer[i].all_params))
-            self.all_drop.update(dict(layer[i].all_drop))
-
-        self.all_layers = list_remove_repeat(self.all_layers)
-        self.all_params = list_remove_repeat(self.all_params)
-        # self.all_drop = list_remove_repeat(self.all_drop)
-## We can Duplicate the network instead of DemultiplexerLayer
-# class DemultiplexerLayer(Layer):
-#     """
-#     The :class:`DemultiplexerLayer` takes a single input and select one of many output lines, which is connected to the input.
-#
-#     Parameters
-#     ----------
-#     layer : a list of :class:`Layer` instances
-#         The `Layer` class feeding into this layer.
-#     n_outputs : a int
-#         The number of output
-#     name : a string or None
-#         An optional name to attach to this layer.
-#
-#     Field (Class Variables)
-#     -----------------------
-#     sel : a placeholder
-#         Input int [0, inf], the
-#     outputs : a list of Tensor
-#         A list of outputs
-#
-#     Examples
-#     --------
-#     >>>
-#     """
-#     def __init__(self,
-#            layer = None,
-#            name='demux_layer'):
-#         Layer.__init__(self, name=name)
-#         self.outputs = []
-
-## Wrapper
-class EmbeddingAttentionSeq2seqWrapper(Layer):
-  """Sequence-to-sequence model with attention and for multiple buckets (Deprecated after TF0.12).
-
-    This example implements a multi-layer recurrent neural network as encoder,
-    and an attention-based decoder. This is the same as the model described in
-    this paper:
-    - `Grammar as a Foreign Language <http://arxiv.org/abs/1412.7449>`_
-    please look there for details,
-    or into the seq2seq library for complete model implementation.
-    This example also allows to use GRU cells in addition to LSTM cells, and
-    sampled softmax to handle large output vocabulary size. A single-layer
-    version of this model, but with bi-directional encoder, was presented in
-    - `Neural Machine Translation by Jointly Learning to Align and Translate <http://arxiv.org/abs/1409.0473>`_
-    The sampled softmax is described in Section 3 of the following paper.
-    - `On Using Very Large Target Vocabulary for Neural Machine Translation <http://arxiv.org/abs/1412.2007>`_
-
-    Parameters
-    ----------
-    source_vocab_size : size of the source vocabulary.
-    target_vocab_size : size of the target vocabulary.
-    buckets : a list of pairs (I, O), where I specifies maximum input length
-        that will be processed in that bucket, and O specifies maximum output
-        length. Training instances that have inputs longer than I or outputs
-        longer than O will be pushed to the next bucket and padded accordingly.
-        We assume that the list is sorted, e.g., [(2, 4), (8, 16)].
-    size : number of units in each layer of the model.
-    num_layers : number of layers in the model.
-    max_gradient_norm : gradients will be clipped to maximally this norm.
-    batch_size : the size of the batches used during training;
-        the model construction is independent of batch_size, so it can be
-        changed after initialization if this is convenient, e.g., for decoding.
-    learning_rate : learning rate to start with.
-    learning_rate_decay_factor : decay learning rate by this much when needed.
-    use_lstm : if true, we use LSTM cells instead of GRU cells.
-    num_samples : number of samples for sampled softmax.
-    forward_only : if set, we do not construct the backward pass in the model.
-    name : a string or None
-        An optional name to attach to this layer.
-  """
-  def __init__(self,
-               source_vocab_size,
-               target_vocab_size,
-               buckets,
-               size,
-               num_layers,
-               max_gradient_norm,
-               batch_size,
-               learning_rate,
-               learning_rate_decay_factor,
-               use_lstm=False,
-               num_samples=512,
-               forward_only=False,
-               name='wrapper'):
-    Layer.__init__(self)#, name=name)
-
-    self.source_vocab_size = source_vocab_size
-    self.target_vocab_size = target_vocab_size
-    self.buckets = buckets
-    self.batch_size = batch_size
-    self.learning_rate = tf.Variable(float(learning_rate), trainable=False, name='learning_rate')
-    self.learning_rate_decay_op = self.learning_rate.assign(
-        self.learning_rate * learning_rate_decay_factor)
-    self.global_step = tf.Variable(0, trainable=False, name='global_step')
-
-    if tf.__version__ >= "0.12":
-        raise Exception("Deprecated after TF0.12 : use other seq2seq layers instead.")
-
-    # =========== Fake output Layer for compute cost ======
-    # If we use sampled softmax, we need an output projection.
-    with tf.variable_scope(name) as vs:
-        output_projection = None
-        softmax_loss_function = None
-        # Sampled softmax only makes sense if we sample less than vocabulary size.
-        if num_samples > 0 and num_samples < self.target_vocab_size:
-          w = tf.get_variable("proj_w", [size, self.target_vocab_size])
-          w_t = tf.transpose(w)
-          b = tf.get_variable("proj_b", [self.target_vocab_size])
-          output_projection = (w, b)
-
-          def sampled_loss(inputs, labels):
-            labels = tf.reshape(labels, [-1, 1])
-            return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples,
-                    self.target_vocab_size)
-          softmax_loss_function = sampled_loss
-
-        # ============ Seq Encode Layer =============
-        # Create the internal multi-layer cell for our RNN.
-        try: # TF1.0
-          single_cell = tf.contrib.rnn.GRUCell(size)
-        except:
-          single_cell = tf.nn.rnn_cell.GRUCell(size)
-
-        if use_lstm:
-          try: # TF1.0
-            single_cell = tf.contrib.rnn.BasicLSTMCell(size)
-          except:
-            single_cell = tf.nn.rnn_cell.BasicLSTMCell(size)
-
-        cell = single_cell
-        if num_layers > 1:
-          try: # TF1.0
-            cell = tf.contrib.rnn.MultiRNNCell([single_cell] * num_layers)
-          except:
-            cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers)
-
-        # ============== Seq Decode Layer ============
-        # The seq2seq function: we use embedding for the input and attention.
-        def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
-          return tf.nn.seq2seq.embedding_attention_seq2seq(
-              encoder_inputs, decoder_inputs, cell,
-              num_encoder_symbols=source_vocab_size,
-              num_decoder_symbols=target_vocab_size,
-              embedding_size=size,
-              output_projection=output_projection,
-              feed_previous=do_decode)
-
-        #=============================================================
-        # Feeds for inputs.
-        self.encoder_inputs = []
-        self.decoder_inputs = []
-        self.target_weights = []
-        for i in xrange(buckets[-1][0]):  # Last bucket is the biggest one.
-          self.encoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
-                                                    name="encoder{0}".format(i)))
-        for i in xrange(buckets[-1][1] + 1):
-          self.decoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
-                                                    name="decoder{0}".format(i)))
-          self.target_weights.append(tf.placeholder(tf.float32, shape=[None],
-                                                    name="weight{0}".format(i)))
-
-        # Our targets are decoder inputs shifted by one.
-        targets = [self.decoder_inputs[i + 1]
-                   for i in xrange(len(self.decoder_inputs) - 1)]
-        self.targets = targets  # DH add for debug
-
-
-        # Training outputs and losses.
-        if forward_only:
-          self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
-              self.encoder_inputs, self.decoder_inputs, targets,
-              self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, True),
-              softmax_loss_function=softmax_loss_function)
-          # If we use output projection, we need to project outputs for decoding.
-          if output_projection is not None:
-            for b in xrange(len(buckets)):
-              self.outputs[b] = [
-                  tf.matmul(output, output_projection[0]) + output_projection[1]
-                  for output in self.outputs[b]
-              ]
-        else:
-          self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
-              self.encoder_inputs, self.decoder_inputs, targets,
-              self.target_weights, buckets,
-              lambda x, y: seq2seq_f(x, y, False),
-              softmax_loss_function=softmax_loss_function)
-
-        # Gradients and SGD update operation for training the model.
-        params = tf.trainable_variables()
-        if not forward_only:
-          self.gradient_norms = []
-          self.updates = []
-          opt = tf.train.GradientDescentOptimizer(self.learning_rate)
-          for b in xrange(len(buckets)):
-            gradients = tf.gradients(self.losses[b], params)
-            clipped_gradients, norm = tf.clip_by_global_norm(gradients,
-                                                             max_gradient_norm)
-            self.gradient_norms.append(norm)
-            self.updates.append(opt.apply_gradients(
-                zip(clipped_gradients, params), global_step=self.global_step))
-
-        # if save into npz
-        self.all_params = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
-
-    # if save into ckpt
-    self.saver = tf.train.Saver(tf.all_variables())
-
-  def step(self, session, encoder_inputs, decoder_inputs, target_weights,
-           bucket_id, forward_only):
-    """Run a step of the model feeding the given inputs.
-
-    Parameters
-    ----------
-    session : tensorflow session to use.
-    encoder_inputs : list of numpy int vectors to feed as encoder inputs.
-    decoder_inputs : list of numpy int vectors to feed as decoder inputs.
-    target_weights : list of numpy float vectors to feed as target weights.
-    bucket_id : which bucket of the model to use.
-    forward_only : whether to do the backward step or only forward.
-
-    Returns
-    --------
-    A triple consisting of gradient norm (or None if we did not do backward),
-    average perplexity, and the outputs.
-
-    Raises
-    --------
-    ValueError : if length of encoder_inputs, decoder_inputs, or
-        target_weights disagrees with bucket size for the specified bucket_id.
-    """
-    # Check if the sizes match.
-    encoder_size, decoder_size = self.buckets[bucket_id]
-    if len(encoder_inputs) != encoder_size:
-      raise ValueError("Encoder length must be equal to the one in bucket,"
-                       " %d != %d." % (len(encoder_inputs), encoder_size))
-    if len(decoder_inputs) != decoder_size:
-      raise ValueError("Decoder length must be equal to the one in bucket,"
-                       " %d != %d." % (len(decoder_inputs), decoder_size))
-    if len(target_weights) != decoder_size:
-      raise ValueError("Weights length must be equal to the one in bucket,"
-                       " %d != %d." % (len(target_weights), decoder_size))
-    # print('in model.step()')
-    # print('a',bucket_id, encoder_size, decoder_size)
-
-    # Input feed: encoder inputs, decoder inputs, target_weights, as provided.
-    input_feed = {}
-    for l in xrange(encoder_size):
-      input_feed[self.encoder_inputs[l].name] = encoder_inputs[l]
-    for l in xrange(decoder_size):
-      input_feed[self.decoder_inputs[l].name] = decoder_inputs[l]
-      input_feed[self.target_weights[l].name] = target_weights[l]
-    # print(self.encoder_inputs[l].name)
-    # print(self.decoder_inputs[l].name)
-    # print(self.target_weights[l].name)
-
-    # Since our targets are decoder inputs shifted by one, we need one more.
-    last_target = self.decoder_inputs[decoder_size].name
-    input_feed[last_target] = np.zeros([self.batch_size], dtype=np.int32)
-    # print('last_target', last_target)
-
-    # Output feed: depends on whether we do a backward step or not.
-    if not forward_only:
-      output_feed = [self.updates[bucket_id],  # Update Op that does SGD.
-                     self.gradient_norms[bucket_id],  # Gradient norm.
-                     self.losses[bucket_id]]  # Loss for this batch.
-    else:
-      output_feed = [self.losses[bucket_id]]  # Loss for this batch.
-      for l in xrange(decoder_size):  # Output logits.
-        output_feed.append(self.outputs[bucket_id][l])
-
-    outputs = session.run(output_feed, input_feed)
-    if not forward_only:
-      return outputs[1], outputs[2], None  # Gradient norm, loss, no outputs.
-    else:
-      return None, outputs[0], outputs[1:]  # No gradient norm, loss, outputs.
-
-  def get_batch(self, data, bucket_id, PAD_ID=0, GO_ID=1, EOS_ID=2, UNK_ID=3):
-    """ Get a random batch of data from the specified bucket, prepare for step.
-
-    To feed data in step(..) it must be a list of batch-major vectors, while
-    data here contains single length-major cases. So the main logic of this
-    function is to re-index data cases to be in the proper format for feeding.
-
-    Parameters
-    ----------
-    data : a tuple of size len(self.buckets) in which each element contains
-        lists of pairs of input and output data that we use to create a batch.
-    bucket_id : integer, which bucket to get the batch for.
-    PAD_ID : int
-        Index of Padding in vocabulary
-    GO_ID : int
-        Index of GO in vocabulary
-    EOS_ID : int
-        Index of End of sentence in vocabulary
-    UNK_ID : int
-        Index of Unknown word in vocabulary
-
-    Returns
-    -------
-    The triple (encoder_inputs, decoder_inputs, target_weights) for
-    the constructed batch that has the proper format to call step(...) later.
-    """
-    encoder_size, decoder_size = self.buckets[bucket_id]
-    encoder_inputs, decoder_inputs = [], []
-
-    # Get a random batch of encoder and decoder inputs from data,
-    # pad them if needed, reverse encoder inputs and add GO to decoder.
-    for _ in xrange(self.batch_size):
-      encoder_input, decoder_input = random.choice(data[bucket_id])
-
-      # Encoder inputs are padded and then reversed.
-      encoder_pad = [PAD_ID] * (encoder_size - len(encoder_input))
-      encoder_inputs.append(list(reversed(encoder_input + encoder_pad)))
-
-      # Decoder inputs get an extra "GO" symbol, and are padded then.
-      decoder_pad_size = decoder_size - len(decoder_input) - 1
-      decoder_inputs.append([GO_ID] + decoder_input +
-                            [PAD_ID] * decoder_pad_size)
-
-    # Now we create batch-major vectors from the data selected above.
-    batch_encoder_inputs, batch_decoder_inputs, batch_weights = [], [], []
-
-    # Batch encoder inputs are just re-indexed encoder_inputs.
-    for length_idx in xrange(encoder_size):
-      batch_encoder_inputs.append(
-          np.array([encoder_inputs[batch_idx][length_idx]
-                    for batch_idx in xrange(self.batch_size)], dtype=np.int32))
-
-    # Batch decoder inputs are re-indexed decoder_inputs, we create weights.
-    for length_idx in xrange(decoder_size):
-      batch_decoder_inputs.append(
-          np.array([decoder_inputs[batch_idx][length_idx]
-                    for batch_idx in xrange(self.batch_size)], dtype=np.int32))
-
-      # Create target_weights to be 0 for targets that are padding.
-      batch_weight = np.ones(self.batch_size, dtype=np.float32)
-      for batch_idx in xrange(self.batch_size):
-        # We set weight to 0 if the corresponding target is a PAD symbol.
-        # The corresponding target is decoder_input shifted by 1 forward.
-        if length_idx < decoder_size - 1:
-          target = decoder_inputs[batch_idx][length_idx + 1]
-        if length_idx == decoder_size - 1 or target == PAD_ID:
-          batch_weight[batch_idx] = 0.0
-      batch_weights.append(batch_weight)
-    return batch_encoder_inputs, batch_decoder_inputs, batch_weights
-
-## Developing or Untested
-class MaxoutLayer(Layer):
-    """
-    Waiting for contribution
-
-    Single DenseLayer with Max-out behaviour, work well with Dropout.
-
-    References
-    -----------
-    `Goodfellow (2013) Maxout Networks <http://arxiv.org/abs/1302.4389>`_
-    """
-    def __init__(
-        self,
-        layer = None,
-        n_units = 100,
-        name ='maxout_layer',
-    ):
-        Layer.__init__(self, name=name)
-        self.inputs = layer.outputs
-
-        print("  [TL] MaxoutLayer %s: %d" % (self.name, self.n_units))
-        print("    Waiting for contribution")
-        with tf.variable_scope(name) as vs:
-            pass
-            # W = tf.Variable(init.xavier_init(n_inputs=n_in, n_outputs=n_units, uniform=True), name='W')
-            # b = tf.Variable(tf.zeros([n_units]), name='b')
-
-        # self.outputs = act(tf.matmul(self.inputs, W) + b)
-        # https://www.tensorflow.org/versions/r0.9/api_docs/python/array_ops.html#pack
-        # http://stackoverflow.com/questions/34362193/how-to-explicitly-broadcast-a-tensor-to-match-anothers-shape-in-tensorflow
-        # tf.concat tf.pack  tf.tile
-
-        self.all_layers = list(layer.all_layers)
-        self.all_params = list(layer.all_params)
-        self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( [W, b] )
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-#
diff --git a/_tensorlayer/nlp.py b/_tensorlayer/nlp.py
deleted file mode 100755
index 3ed12a2..0000000
--- a/_tensorlayer/nlp.py
+++ /dev/null
@@ -1,932 +0,0 @@
-#! /usr/bin/python
-# -*- coding: utf8 -*-
-
-
-
-
-import tensorflow as tf
-import os
-from sys import platform as _platform
-import collections
-import random
-import numpy as np
-import warnings
-from six.moves import xrange
-from tensorflow.python.platform import gfile
-import re
-
-## Iteration functions
-def generate_skip_gram_batch(data, batch_size, num_skips, skip_window, data_index=0):
-    """Generate a training batch for the Skip-Gram model.
-
-    Parameters
-    ----------
-    data : a list
-        To present context.
-    batch_size : an int
-        Batch size to return.
-    num_skips : an int
-        How many times to reuse an input to generate a label.
-    skip_window : an int
-        How many words to consider left and right.
-    data_index : an int
-        Index of the context location.
-        without using yield, this code use data_index to instead.
-
-    Returns
-    --------
-    batch : a list
-        Inputs
-    labels : a list
-        Labels
-    data_index : an int
-        Index of the context location.
-
-    Examples
-    --------
-    >>> Setting num_skips=2, skip_window=1, use the right and left words.
-    >>> In the same way, num_skips=4, skip_window=2 means use the nearby 4 words.
-
-    >>> data = [1,2,3,4,5,6,7,8,9,10,11]
-    >>> batch, labels, data_index = tl.nlp.generate_skip_gram_batch(data=data, batch_size=8, num_skips=2, skip_window=1, data_index=0)
-    >>> print(batch)
-    ... [2 2 3 3 4 4 5 5]
-    >>> print(labels)
-    ... [[3]
-    ... [1]
-    ... [4]
-    ... [2]
-    ... [5]
-    ... [3]
-    ... [4]
-    ... [6]]
-
-    References
-    -----------
-    - `TensorFlow word2vec tutorial <https://www.tensorflow.org/versions/r0.9/tutorials/word2vec/index.html#vector-representations-of-words>`_
-    """
-    # global data_index   # you can put data_index outside the function, then
-    #       modify the global data_index in the function without return it.
-    # note: without using yield, this code use data_index to instead.
-    assert batch_size % num_skips == 0
-    assert num_skips <= 2 * skip_window
-    batch = np.ndarray(shape=(batch_size), dtype=np.int32)
-    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
-    span = 2 * skip_window + 1 # [ skip_window target skip_window ]
-    buffer = collections.deque(maxlen=span)
-    for _ in range(span):
-        buffer.append(data[data_index])
-        data_index = (data_index + 1) % len(data)
-    for i in range(batch_size // num_skips):
-        target = skip_window  # target label at the center of the buffer
-        targets_to_avoid = [ skip_window ]
-        for j in range(num_skips):
-            while target in targets_to_avoid:
-                target = random.randint(0, span - 1)
-            targets_to_avoid.append(target)
-            batch[i * num_skips + j] = buffer[skip_window]
-            labels[i * num_skips + j, 0] = buffer[target]
-        buffer.append(data[data_index])
-        data_index = (data_index + 1) % len(data)
-    return batch, labels, data_index
-
-
-## Sampling functions
-def sample(a=[], temperature=1.0):
-    """Sample an index from a probability array.
-
-    Parameters
-    ----------
-    a : a list
-        List of probabilities.
-    temperature : float or None
-        The higher the more uniform.\n
-        When a = [0.1, 0.2, 0.7],\n
-            temperature = 0.7, the distribution will be sharpen [ 0.05048273  0.13588945  0.81362782]\n
-            temperature = 1.0, the distribution will be the same [0.1    0.2    0.7]\n
-            temperature = 1.5, the distribution will be filtered [ 0.16008435  0.25411807  0.58579758]\n
-        If None, it will be ``np.argmax(a)``
-
-    Notes
-    ------
-    No matter what is the temperature and input list, the sum of all probabilities will be one.
-    Even if input list = [1, 100, 200], the sum of all probabilities will still be one.
-
-    For large vocabulary_size, choice a higher temperature to avoid error.
-    """
-    b = np.copy(a)
-    try:
-        if temperature == 1:
-            return np.argmax(np.random.multinomial(1, a, 1))
-        if temperature is None:
-            return np.argmax(a)
-        else:
-            a = np.log(a) / temperature
-            a = np.exp(a) / np.sum(np.exp(a))
-            return np.argmax(np.random.multinomial(1, a, 1))
-    except:
-        # np.set_printoptions(threshold=np.nan)
-        # print(a)
-        # print(np.sum(a))
-        # print(np.max(a))
-        # print(np.min(a))
-        # exit()
-        message = "For large vocabulary_size, choice a higher temperature\
-         to avoid log error. Hint : use ``sample_top``. "
-        warnings.warn(message, Warning)
-        # print(a)
-        # print(b)
-        return np.argmax(np.random.multinomial(1, b, 1))
-
-def sample_top(a=[], top_k=10):
-    """Sample from ``top_k`` probabilities.
-
-    Parameters
-    ----------
-    a : a list
-        List of probabilities.
-    top_k : int
-        Number of candidates to be considered.
-    """
-    idx = np.argpartition(a, -top_k)[-top_k:]
-    probs = a[idx]
-    # print("new", probs)
-    probs = probs / np.sum(probs)
-    choice = np.random.choice(idx, p=probs)
-    return choice
-    ## old implementation
-    # a = np.array(a)
-    # idx = np.argsort(a)[::-1]
-    # idx = idx[:top_k]
-    # # a = a[idx]
-    # probs = a[idx]
-    # print("prev", probs)
-    # # probs = probs / np.sum(probs)
-    # # choice = np.random.choice(idx, p=probs)
-    # # return choice
-
-
-## Vector representations of words (Advanced)  UNDOCUMENT
-class SimpleVocabulary(object):
-  """Simple vocabulary wrapper, see create_vocab().
-
-  Parameters
-  ------------
-  vocab : A dictionary of word to word_id.
-  unk_id : Id of the special 'unknown' word.
-  """
-
-  def __init__(self, vocab, unk_id):
-    """Initializes the vocabulary."""
-
-
-    self._vocab = vocab
-    self._unk_id = unk_id
-
-  def word_to_id(self, word):
-    """Returns the integer id of a word string."""
-    if word in self._vocab:
-      return self._vocab[word]
-    else:
-      return self._unk_id
-
-class Vocabulary(object):
-  """Create Vocabulary class from a given vocabulary and its id-word, word-id convert,
-  see create_vocab() and ``tutorial_tfrecord3.py``.
-
-  Parameters
-  -----------
-  vocab_file : File containing the vocabulary, where the words are the first
-        whitespace-separated token on each line (other tokens are ignored) and
-        the word ids are the corresponding line numbers.
-  start_word : Special word denoting sentence start.
-  end_word : Special word denoting sentence end.
-  unk_word : Special word denoting unknown words.
-
-  Properties
-  ------------
-  vocab : a dictionary from word to id.
-  reverse_vocab : a list from id to word.
-  start_id : int of start id
-  end_id : int of end id
-  unk_id : int of unk id
-  pad_id : int of padding id
-
-  Vocab_files
-  -------------
-  >>> Look as follow, includes `start_word` , `end_word` but no `unk_word` .
-  >>> a 969108
-  >>> <S> 586368
-  >>> </S> 586368
-  >>> . 440479
-  >>> on 213612
-  >>> of 202290
-  >>> the 196219
-  >>> in 182598
-  >>> with 152984
-  >>> and 139109
-  >>> is 97322
-  """
-
-  def __init__(self,
-               vocab_file,
-               start_word="<S>",
-               end_word="</S>",
-               unk_word="<UNK>",
-               pad_word="<PAD>"):
-    if not tf.gfile.Exists(vocab_file):
-      tf.logging.fatal("Vocab file %s not found.", vocab_file)
-    tf.logging.info("Initializing vocabulary from file: %s", vocab_file)
-
-    with tf.gfile.GFile(vocab_file, mode="r") as f:
-      reverse_vocab = list(f.readlines())
-    reverse_vocab = [line.split()[0] for line in reverse_vocab]
-    assert start_word in reverse_vocab
-    assert end_word in reverse_vocab
-    if unk_word not in reverse_vocab:
-      reverse_vocab.append(unk_word)
-    vocab = dict([(x, y) for (y, x) in enumerate(reverse_vocab)])
-
-    print("  [TL] Vocabulary from %s : %s %s %s" % (vocab_file, start_word, end_word, unk_word))
-    print("    vocabulary with %d words (includes start_word, end_word, unk_word)" % len(vocab))
-    # tf.logging.info("     vocabulary with %d words" % len(vocab))
-
-    self.vocab = vocab  # vocab[word] = id
-    self.reverse_vocab = reverse_vocab  # reverse_vocab[id] = word
-
-    # Save special word ids.
-    self.start_id = vocab[start_word]
-    self.end_id = vocab[end_word]
-    self.unk_id = vocab[unk_word]
-    self.pad_id = vocab[pad_word]
-    print("      start_id: %d" % self.start_id)
-    print("      end_id: %d" % self.end_id)
-    print("      unk_id: %d" % self.unk_id)
-    print("      pad_id: %d" % self.pad_id)
-
-  def word_to_id(self, word):
-    """Returns the integer word id of a word string."""
-    if word in self.vocab:
-      return self.vocab[word]
-    else:
-      return self.unk_id
-
-  def id_to_word(self, word_id):
-    """Returns the word string of an integer word id."""
-    if word_id >= len(self.reverse_vocab):
-      return self.reverse_vocab[self.unk_id]
-    else:
-      return self.reverse_vocab[word_id]
-
-def process_sentence(sentence, start_word="<S>", end_word="</S>"):
-    """Converts a sentence string into a list of string words, add start_word and end_word,
-    see ``create_vocab()`` and ``tutorial_tfrecord3.py``.
-
-    Parameter
-    ---------
-    sentence : a sentence in string.
-    start_word : a string or None, if None, non start word will be appended.
-    end_word : a string or None, if None, non end word will be appended.
-
-    Returns
-    ---------
-    A list of strings; the processed caption.
-
-    Examples
-    -----------
-    >>> c = "how are you?"
-    >>> c = tl.nlp.process_sentence(c)
-    >>> print(c)
-    ... ['<S>', 'how', 'are', 'you', '?', '</S>']
-    """
-    try:
-        import nltk
-    except:
-        raise Exception("Hint : NLTK is required.")
-    if start_word is not None:
-        process_sentence = [start_word]
-    else:
-        process_sentence = []
-    process_sentence.extend(nltk.tokenize.word_tokenize(sentence.lower()))
-    if end_word is not None:
-        process_sentence.append(end_word)
-    return process_sentence
-
-def create_vocab(sentences, word_counts_output_file, min_word_count=1):
-    """Creates the vocabulary of word to word_id, see create_vocab() and ``tutorial_tfrecord3.py``.
-
-    The vocabulary is saved to disk in a text file of word counts. The id of each
-    word in the file is its corresponding 0-based line number.
-
-    Parameters
-    ------------
-    sentences : a list of lists of strings.
-    word_counts_output_file : A string
-        The file name.
-    min_word_count : a int
-        Minimum number of occurrences for a word.
-
-    Returns
-    --------
-    - tl.nlp.SimpleVocabulary object.
-
-    Mores
-    -----
-    - ``tl.nlp.build_vocab()``
-
-    Examples
-    --------
-    >>> captions = ["one two , three", "four five five"]
-    >>> processed_capts = []
-    >>> for c in captions:
-    >>>     c = tl.nlp.process_sentence(c, start_word="<S>", end_word="</S>")
-    >>>     processed_capts.append(c)
-    >>> print(processed_capts)
-    ...[['<S>', 'one', 'two', ',', 'three', '</S>'], ['<S>', 'four', 'five', 'five', '</S>']]
-
-    >>> tl.nlp.create_vocab(processed_capts, word_counts_output_file='vocab.txt', min_word_count=1)
-    ...   [TL] Creating vocabulary.
-    ...   Total words: 8
-    ...   Words in vocabulary: 8
-    ...   Wrote vocabulary file: vocab.txt
-    >>> vocab = tl.nlp.Vocabulary('vocab.txt', start_word="<S>", end_word="</S>", unk_word="<UNK>")
-    ...   [TL] Instantiate Vocabulary from vocab.txt : <S> </S> <UNK>
-    ...   vocabulary with 9 words (includes unk_word)
-    """
-    from collections import Counter
-    print("  [TL] Creating vocabulary.")
-    counter = Counter()
-    for c in sentences:
-        counter.update(c)
-        # print('c',c)
-    print("    Total words: %d" % len(counter))
-
-    # Filter uncommon words and sort by descending count.
-    word_counts = [x for x in counter.items() if x[1] >= min_word_count]
-    word_counts.sort(key=lambda x: x[1], reverse=True)
-    word_counts = [("<PAD>", 0)] + word_counts # 1st id should be reserved for padding
-    # print(word_counts)
-    print("    Words in vocabulary: %d" % len(word_counts))
-
-    # Write out the word counts file.
-    with tf.gfile.FastGFile(word_counts_output_file, "w") as f:
-        f.write("\n".join(["%s %d" % (w, c) for w, c in word_counts]))
-    print("    Wrote vocabulary file: %s" % word_counts_output_file)
-
-    # Create the vocabulary dictionary.
-    reverse_vocab = [x[0] for x in word_counts]
-    unk_id = len(reverse_vocab)
-    vocab_dict = dict([(x, y) for (y, x) in enumerate(reverse_vocab)])
-    vocab = SimpleVocabulary(vocab_dict, unk_id)
-
-    return vocab
-
-
-## Vector representations of words
-def simple_read_words(filename="nietzsche.txt"):
-    """Read context from file without any preprocessing.
-
-    Parameters
-    ----------
-    filename : a string
-        A file path (like .txt file)
-
-    Returns
-    --------
-    The context in a string
-    """
-    with open("nietzsche.txt", "r") as f:
-        words = f.read()
-        return words
-
-def read_words(filename="nietzsche.txt", replace = ['\n', '<eos>']):
-    """File to list format context. Note that, this script can not handle punctuations.
-    For customized read_words method, see ``tutorial_generate_text.py``.
-
-    Parameters
-    ----------
-    filename : a string
-        A file path (like .txt file),
-    replace : a list
-        [original string, target string], to disable replace use ['', '']
-
-    Returns
-    --------
-    The context in a list, split by space by default, and use ``'<eos>'`` to represent ``'\n'``,
-    e.g. ``[... 'how', 'useful', 'it', "'s" ... ]``.
-
-    Code References
-    ---------------
-    - `tensorflow.models.rnn.ptb.reader <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/models/rnn/ptb>`_
-    """
-    with tf.gfile.GFile(filename, "r") as f:
-        try:    # python 3.4 or older
-            context_list = f.read().replace(*replace).split()
-        except: # python 3.5
-            f.seek(0)
-            replace = [x.encode('utf-8') for x in replace]
-            context_list = f.read().replace(*replace).split()
-        return context_list
-
-def read_analogies_file(eval_file='questions-words.txt', word2id={}):
-    """Reads through an analogy question file, return its id format.
-
-    Parameters
-    ----------
-    eval_data : a string
-        The file name.
-    word2id : a dictionary
-        Mapping words to unique IDs.
-
-    Returns
-    --------
-    analogy_questions : a [n, 4] numpy array containing the analogy question's
-             word ids.
-             questions_skipped: questions skipped due to unknown words.
-
-    Examples
-    ---------
-    >>> eval_file should be in this format :
-    >>> : capital-common-countries
-    >>> Athens Greece Baghdad Iraq
-    >>> Athens Greece Bangkok Thailand
-    >>> Athens Greece Beijing China
-    >>> Athens Greece Berlin Germany
-    >>> Athens Greece Bern Switzerland
-    >>> Athens Greece Cairo Egypt
-    >>> Athens Greece Canberra Australia
-    >>> Athens Greece Hanoi Vietnam
-    >>> Athens Greece Havana Cuba
-    ...
-
-    >>> words = tl.files.load_matt_mahoney_text8_dataset()
-    >>> data, count, dictionary, reverse_dictionary = \
-                tl.nlp.build_words_dataset(words, vocabulary_size, True)
-    >>> analogy_questions = tl.nlp.read_analogies_file( \
-                eval_file='questions-words.txt', word2id=dictionary)
-    >>> print(analogy_questions)
-    ... [[ 3068  1248  7161  1581]
-    ... [ 3068  1248 28683  5642]
-    ... [ 3068  1248  3878   486]
-    ... ...,
-    ... [ 1216  4309 19982 25506]
-    ... [ 1216  4309  3194  8650]
-    ... [ 1216  4309   140   312]]
-    """
-    questions = []
-    questions_skipped = 0
-    with open(eval_file, "rb") as analogy_f:
-      for line in analogy_f:
-          if line.startswith(b":"):  # Skip comments.
-                continue
-          words = line.strip().lower().split(b" ")  # lowercase
-          ids = [word2id.get(w.strip()) for w in words]
-          if None in ids or len(ids) != 4:
-              questions_skipped += 1
-          else:
-              questions.append(np.array(ids))
-    print("Eval analogy file: ", eval_file)
-    print("Questions: ", len(questions))
-    print("Skipped: ", questions_skipped)
-    analogy_questions = np.array(questions, dtype=np.int32)
-    return analogy_questions
-
-def build_vocab(data):
-    """Build vocabulary.
-    Given the context in list format.
-    Return the vocabulary, which is a dictionary for word to id.
-    e.g. {'campbell': 2587, 'atlantic': 2247, 'aoun': 6746 .... }
-
-    Parameters
-    ----------
-    data : a list of string
-        the context in list format
-
-    Returns
-    --------
-    word_to_id : a dictionary
-        mapping words to unique IDs. e.g. {'campbell': 2587, 'atlantic': 2247, 'aoun': 6746 .... }
-
-    Code References
-    ---------------
-    - `tensorflow.models.rnn.ptb.reader <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/models/rnn/ptb>`_
-
-    Examples
-    --------
-    >>> data_path = os.getcwd() + '/simple-examples/data'
-    >>> train_path = os.path.join(data_path, "ptb.train.txt")
-    >>> word_to_id = build_vocab(read_txt_words(train_path))
-    """
-    # data = _read_words(filename)
-    counter = collections.Counter(data)
-    # print('counter', counter)   # dictionary for the occurrence number of each word, e.g. 'banknote': 1, 'photography': 1, 'kia': 1
-    count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))
-    # print('count_pairs',count_pairs)  # convert dictionary to list of tuple, e.g. ('ssangyong', 1), ('swapo', 1), ('wachter', 1)
-    words, _ = list(zip(*count_pairs))
-    word_to_id = dict(zip(words, range(len(words))))
-    # print(words)    # list of words
-    # print(word_to_id) # dictionary for word to id, e.g. 'campbell': 2587, 'atlantic': 2247, 'aoun': 6746
-    return word_to_id
-
-def build_reverse_dictionary(word_to_id):
-    """Given a dictionary for converting word to integer id.
-    Returns a reverse dictionary for converting a id to word.
-
-    Parameters
-    ----------
-    word_to_id : dictionary
-        mapping words to unique ids
-
-    Returns
-    --------
-    reverse_dictionary : a dictionary
-        mapping ids to words
-    """
-    reverse_dictionary = dict(zip(word_to_id.values(), word_to_id.keys()))
-    return reverse_dictionary
-
-def build_words_dataset(words=[], vocabulary_size=50000, printable=True, unk_key = 'UNK'):
-    """Build the words dictionary and replace rare words with 'UNK' token.
-    The most common word has the smallest integer id.
-
-    Parameters
-    ----------
-    words : a list of string or byte
-        The context in list format. You may need to do preprocessing on the words,
-        such as lower case, remove marks etc.
-    vocabulary_size : an int
-        The maximum vocabulary size, limiting the vocabulary size.
-        Then the script replaces rare words with 'UNK' token.
-    printable : boolean
-        Whether to print the read vocabulary size of the given words.
-    unk_key : a string
-        Unknown words = unk_key
-
-    Returns
-    --------
-    data : a list of integer
-        The context in a list of ids
-    count : a list of tuple and list
-        count[0] is a list : the number of rare words\n
-        count[1:] are tuples : the number of occurrence of each word\n
-        e.g. [['UNK', 418391], (b'the', 1061396), (b'of', 593677), (b'and', 416629), (b'one', 411764)]
-    dictionary : a dictionary
-        word_to_id, mapping words to unique IDs.
-    reverse_dictionary : a dictionary
-        id_to_word, mapping id to unique word.
-
-    Examples
-    --------
-    >>> words = tl.files.load_matt_mahoney_text8_dataset()
-    >>> vocabulary_size = 50000
-    >>> data, count, dictionary, reverse_dictionary = tl.nlp.build_words_dataset(words, vocabulary_size)
-
-    Code References
-    -----------------
-    - `tensorflow/examples/tutorials/word2vec/word2vec_basic.py <https://github.com/tensorflow/tensorflow/blob/r0.7/tensorflow/examples/tutorials/word2vec/word2vec_basic.py>`_
-    """
-    import collections
-    count = [[unk_key, -1]]
-    count.extend(collections.Counter(words).most_common(vocabulary_size - 1))
-    dictionary = dict()
-    for word, _ in count:
-        dictionary[word] = len(dictionary)
-    data = list()
-    unk_count = 0
-    for word in words:
-        if word in dictionary:
-            index = dictionary[word]
-        else:
-            index = 0  # dictionary['UNK']
-            unk_count += 1
-        data.append(index)
-    count[0][1] = unk_count
-    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
-    if printable:
-        print('Real vocabulary size    %d' % len(collections.Counter(words).keys()))
-        print('Limited vocabulary size {}'.format(vocabulary_size))
-    assert len(collections.Counter(words).keys()) >= vocabulary_size , \
-            "the limited vocabulary_size must be less than or equal to the read vocabulary_size"
-    return data, count, dictionary, reverse_dictionary
-
-def words_to_word_ids(data=[], word_to_id={}, unk_key = 'UNK'):
-    """Given a context (words) in list format and the vocabulary,
-    Returns a list of IDs to represent the context.
-
-    Parameters
-    ----------
-    data : a list of string or byte
-        the context in list format
-    word_to_id : a dictionary
-        mapping words to unique IDs.
-    unk_key : a string
-        Unknown words = unk_key
-
-    Returns
-    --------
-    A list of IDs to represent the context.
-
-    Examples
-    --------
-    >>> words = tl.files.load_matt_mahoney_text8_dataset()
-    >>> vocabulary_size = 50000
-    >>> data, count, dictionary, reverse_dictionary = \
-    ...         tl.nlp.build_words_dataset(words, vocabulary_size, True)
-    >>> context = [b'hello', b'how', b'are', b'you']
-    >>> ids = tl.nlp.words_to_word_ids(words, dictionary)
-    >>> context = tl.nlp.word_ids_to_words(ids, reverse_dictionary)
-    >>> print(ids)
-    ... [6434, 311, 26, 207]
-    >>> print(context)
-    ... [b'hello', b'how', b'are', b'you']
-
-    Code References
-    ---------------
-    - `tensorflow.models.rnn.ptb.reader <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/models/rnn/ptb>`_
-    """
-    # if isinstance(data[0], six.string_types):
-    #     print(type(data[0]))
-    #     # exit()
-    #     print(data[0])
-    #     print(word_to_id)
-    #     return [word_to_id[str(word)] for word in data]
-    # else:
-
-    word_ids = []
-    for word in data:
-        if word_to_id.get(word) is not None:
-            word_ids.append(word_to_id[word])
-        else:
-            word_ids.append(word_to_id[unk_key])
-    return word_ids
-    # return [word_to_id[word] for word in data]    # this one
-
-    # if isinstance(data[0], str):
-    #     # print('is a string object')
-    #     return [word_to_id[word] for word in data]
-    # else:#if isinstance(s, bytes):
-    #     # print('is a unicode object')
-    #     # print(data[0])
-    #     return [word_to_id[str(word)] f
-
-def word_ids_to_words(data, id_to_word):
-    """Given a context (ids) in list format and the vocabulary,
-    Returns a list of words to represent the context.
-
-    Parameters
-    ----------
-    data : a list of integer
-        the context in list format
-    id_to_word : a dictionary
-        mapping id to unique word.
-
-    Returns
-    --------
-    A list of string or byte to represent the context.
-
-    Examples
-    ---------
-    >>> see words_to_word_ids
-    """
-    return [id_to_word[i] for i in data]
-
-def save_vocab(count=[], name='vocab.txt'):
-    """Save the vocabulary to a file so the model can be reloaded.
-
-    Parameters
-    ----------
-    count : a list of tuple and list
-        count[0] is a list : the number of rare words\n
-        count[1:] are tuples : the number of occurrence of each word\n
-        e.g. [['UNK', 418391], (b'the', 1061396), (b'of', 593677), (b'and', 416629), (b'one', 411764)]
-
-    Examples
-    ---------
-    >>> words = tl.files.load_matt_mahoney_text8_dataset()
-    >>> vocabulary_size = 50000
-    >>> data, count, dictionary, reverse_dictionary = \
-    ...     tl.nlp.build_words_dataset(words, vocabulary_size, True)
-    >>> tl.nlp.save_vocab(count, name='vocab_text8.txt')
-    >>> vocab_text8.txt
-    ... UNK 418391
-    ... the 1061396
-    ... of 593677
-    ... and 416629
-    ... one 411764
-    ... in 372201
-    ... a 325873
-    ... to 316376
-    """
-    pwd = os.getcwd()
-    vocabulary_size = len(count)
-    with open(os.path.join(pwd, name), "w") as f:
-        for i in xrange(vocabulary_size):
-            f.write("%s %d\n" % (tf.compat.as_text(count[i][0]), count[i][1]))
-    print("%d vocab saved to %s in %s" % (vocabulary_size, name, pwd))
-
-## Functions for translation
-def basic_tokenizer(sentence, _WORD_SPLIT=re.compile(b"([.,!?\"':;)(])")):
-  """Very basic tokenizer: split the sentence into a list of tokens.
-
-  Parameters
-  -----------
-  sentence : tensorflow.python.platform.gfile.GFile Object
-  _WORD_SPLIT : regular expression for word spliting.
-
-
-  Examples
-  --------
-  >>> see create_vocabulary
-  >>> from tensorflow.python.platform import gfile
-  >>> train_path = "wmt/giga-fren.release2"
-  >>> with gfile.GFile(train_path + ".en", mode="rb") as f:
-  >>>    for line in f:
-  >>>       tokens = tl.nlp.basic_tokenizer(line)
-  >>>       print(tokens)
-  >>>       exit()
-  ... [b'Changing', b'Lives', b'|', b'Changing', b'Society', b'|', b'How',
-  ...   b'It', b'Works', b'|', b'Technology', b'Drives', b'Change', b'Home',
-  ...   b'|', b'Concepts', b'|', b'Teachers', b'|', b'Search', b'|', b'Overview',
-  ...   b'|', b'Credits', b'|', b'HHCC', b'Web', b'|', b'Reference', b'|',
-  ...   b'Feedback', b'Virtual', b'Museum', b'of', b'Canada', b'Home', b'Page']
-
-  References
-  ----------
-  - Code from ``/tensorflow/models/rnn/translation/data_utils.py``
-  """
-  words = []
-  sentence = tf.compat.as_bytes(sentence)
-  for space_separated_fragment in sentence.strip().split():
-    words.extend(re.split(_WORD_SPLIT, space_separated_fragment))
-  return [w for w in words if w]
-
-def create_vocabulary(vocabulary_path, data_path, max_vocabulary_size,
-                      tokenizer=None, normalize_digits=True,
-                      _DIGIT_RE=re.compile(br"\d"),
-                      _START_VOCAB=[b"_PAD", b"_GO", b"_EOS", b"_UNK"]):
-  """Create vocabulary file (if it does not exist yet) from data file.
-
-  Data file is assumed to contain one sentence per line. Each sentence is
-  tokenized and digits are normalized (if normalize_digits is set).
-  Vocabulary contains the most-frequent tokens up to max_vocabulary_size.
-  We write it to vocabulary_path in a one-token-per-line format, so that later
-  token in the first line gets id=0, second line gets id=1, and so on.
-
-  Parameters
-  -----------
-  vocabulary_path : path where the vocabulary will be created.
-  data_path : data file that will be used to create vocabulary.
-  max_vocabulary_size : limit on the size of the created vocabulary.
-  tokenizer : a function to use to tokenize each data sentence.
-        if None, basic_tokenizer will be used.
-  normalize_digits : Boolean
-        if true, all digits are replaced by 0s.
-
-  References
-  ----------
-  - Code from ``/tensorflow/models/rnn/translation/data_utils.py``
-  """
-  if not gfile.Exists(vocabulary_path):
-    print("Creating vocabulary %s from data %s" % (vocabulary_path, data_path))
-    vocab = {}
-    with gfile.GFile(data_path, mode="rb") as f:
-      counter = 0
-      for line in f:
-        counter += 1
-        if counter % 100000 == 0:
-          print("  processing line %d" % counter)
-        tokens = tokenizer(line) if tokenizer else basic_tokenizer(line)
-        for w in tokens:
-          word = re.sub(_DIGIT_RE, b"0", w) if normalize_digits else w
-          if word in vocab:
-            vocab[word] += 1
-          else:
-            vocab[word] = 1
-      vocab_list = _START_VOCAB + sorted(vocab, key=vocab.get, reverse=True)
-      if len(vocab_list) > max_vocabulary_size:
-        vocab_list = vocab_list[:max_vocabulary_size]
-      with gfile.GFile(vocabulary_path, mode="wb") as vocab_file:
-        for w in vocab_list:
-          vocab_file.write(w + b"\n")
-  else:
-    print("Vocabulary %s from data %s exists" % (vocabulary_path, data_path))
-
-def initialize_vocabulary(vocabulary_path):
-  """Initialize vocabulary from file, return the word_to_id (dictionary)
-  and id_to_word (list).
-
-  We assume the vocabulary is stored one-item-per-line, so a file:\n
-    dog\n
-    cat\n
-  will result in a vocabulary {"dog": 0, "cat": 1}, and this function will
-  also return the reversed-vocabulary ["dog", "cat"].
-
-  Parameters
-  -----------
-  vocabulary_path : path to the file containing the vocabulary.
-
-  Returns
-  --------
-  vocab : a dictionary
-        Word to id. A dictionary mapping string to integers.
-  rev_vocab : a list
-        Id to word. The reversed vocabulary (a list, which reverses the vocabulary mapping).
-
-  Examples
-  ---------
-  >>> Assume 'test' contains
-  ... dog
-  ... cat
-  ... bird
-  >>> vocab, rev_vocab = tl.nlp.initialize_vocabulary("test")
-  >>> print(vocab)
-  >>> {b'cat': 1, b'dog': 0, b'bird': 2}
-  >>> print(rev_vocab)
-  >>> [b'dog', b'cat', b'bird']
-
-  Raises
-  -------
-  ValueError : if the provided vocabulary_path does not exist.
-  """
-  if gfile.Exists(vocabulary_path):
-    rev_vocab = []
-    with gfile.GFile(vocabulary_path, mode="rb") as f:
-      rev_vocab.extend(f.readlines())
-    rev_vocab = [tf.compat.as_bytes(line.strip()) for line in rev_vocab]
-    vocab = dict([(x, y) for (y, x) in enumerate(rev_vocab)])
-    return vocab, rev_vocab
-  else:
-    raise ValueError("Vocabulary file %s not found.", vocabulary_path)
-
-def sentence_to_token_ids(sentence, vocabulary,
-                          tokenizer=None, normalize_digits=True,
-                          UNK_ID=3, _DIGIT_RE=re.compile(br"\d")):
-  """Convert a string to list of integers representing token-ids.
-
-  For example, a sentence "I have a dog" may become tokenized into
-  ["I", "have", "a", "dog"] and with vocabulary {"I": 1, "have": 2,
-  "a": 4, "dog": 7"} this function will return [1, 2, 4, 7].
-
-  Parameters
-  -----------
-  sentence :  tensorflow.python.platform.gfile.GFile Object
-        The sentence in bytes format to convert to token-ids.\n
-        see basic_tokenizer(), data_to_token_ids()
-  vocabulary : a dictionary mapping tokens to integers.
-  tokenizer : a function to use to tokenize each sentence;
-        If None, basic_tokenizer will be used.
-  normalize_digits : Boolean
-        If true, all digits are replaced by 0s.
-
-  Returns
-  --------
-  A list of integers, the token-ids for the sentence.
-  """
-
-  if tokenizer:
-    words = tokenizer(sentence)
-  else:
-    words = basic_tokenizer(sentence)
-  if not normalize_digits:
-    return [vocabulary.get(w, UNK_ID) for w in words]
-  # Normalize digits by 0 before looking words up in the vocabulary.
-  return [vocabulary.get(re.sub(_DIGIT_RE, b"0", w), UNK_ID) for w in words]
-
-def data_to_token_ids(data_path, target_path, vocabulary_path,
-                      tokenizer=None, normalize_digits=True,
-                      UNK_ID=3, _DIGIT_RE=re.compile(br"\d")):
-  """Tokenize data file and turn into token-ids using given vocabulary file.
-
-  This function loads data line-by-line from data_path, calls the above
-  sentence_to_token_ids, and saves the result to target_path. See comment
-  for sentence_to_token_ids on the details of token-ids format.
-
-  Parameters
-  -----------
-  data_path : path to the data file in one-sentence-per-line format.
-  target_path : path where the file with token-ids will be created.
-  vocabulary_path : path to the vocabulary file.
-  tokenizer : a function to use to tokenize each sentence;
-      if None, basic_tokenizer will be used.
-  normalize_digits : Boolean; if true, all digits are replaced by 0s.
-
-  References
-  ----------
-  - Code from ``/tensorflow/models/rnn/translation/data_utils.py``
-  """
-  if not gfile.Exists(target_path):
-    print("Tokenizing data in %s" % data_path)
-    vocab, _ = initialize_vocabulary(vocabulary_path)
-    with gfile.GFile(data_path, mode="rb") as data_file:
-      with gfile.GFile(target_path, mode="w") as tokens_file:
-        counter = 0
-        for line in data_file:
-          counter += 1
-          if counter % 100000 == 0:
-            print("  tokenizing line %d" % counter)
-          token_ids = sentence_to_token_ids(line, vocab, tokenizer,
-                                            normalize_digits, UNK_ID=UNK_ID,
-                                            _DIGIT_RE=_DIGIT_RE)
-          tokens_file.write(" ".join([str(tok) for tok in token_ids]) + "\n")
-  else:
-    print("Target path %s exists" % target_path)
diff --git a/_tensorlayer/ops.py b/_tensorlayer/ops.py
deleted file mode 100755
index 608799c..0000000
--- a/_tensorlayer/ops.py
+++ /dev/null
@@ -1,219 +0,0 @@
-#! /usr/bin/python
-# -*- coding: utf8 -*-
-
-
-
-
-import tensorflow as tf
-import os
-import sys
-from sys import platform as _platform
-
-
-def exit_tf(sess=None):
-    """Close tensorboard and nvidia-process if available
-
-    Parameters
-    ----------
-    sess : a session instance of TensorFlow
-        TensorFlow session
-    """
-    text = "[tl] Close tensorboard and nvidia-process if available"
-    sess.close()
-    # import time
-    # time.sleep(2)
-    if _platform == "linux" or _platform == "linux2":
-        print('linux: %s' % text)
-        os.system('nvidia-smi')
-        os.system('fuser 6006/tcp -k')  # kill tensorboard 6006
-        os.system("nvidia-smi | grep python |awk '{print $3}'|xargs kill") # kill all nvidia-smi python process
-    elif _platform == "darwin":
-        print('OS X: %s' % text)
-        os.system("lsof -i tcp:6006 | grep -v PID | awk '{print $2}' | xargs kill") # kill tensorboard 6006
-    elif _platform == "win32":
-        print('Windows: %s' % text)
-    else:
-        print(_platform)
-    exit()
-
-def clear_all(printable=True):
-    """Clears all the placeholder variables of keep prob,
-    including keeping probabilities of all dropout, denoising, dropconnect etc.
-
-    Parameters
-    ----------
-    printable : boolean
-        If True, print all deleted variables.
-    """
-    print('clear all .....................................')
-    gl = globals().copy()
-    for var in gl:
-        if var[0] == '_': continue
-        if 'func' in str(globals()[var]): continue
-        if 'module' in str(globals()[var]): continue
-        if 'class' in str(globals()[var]): continue
-
-        if printable:
-            print(" clear_all ------- %s" % str(globals()[var]))
-
-        del globals()[var]
-
-# def clear_all2(vars, printable=True):
-#     """
-#     The :function:`clear_all()` Clears all the placeholder variables of keep prob,
-#     including keeping probabilities of all dropout, denoising, dropconnect
-#     Parameters
-#     ----------
-#     printable : if True, print all deleted variables.
-#     """
-#     print('clear all .....................................')
-#     for var in vars:
-#         if var[0] == '_': continue
-#         if 'func' in str(var): continue
-#         if 'module' in str(var): continue
-#         if 'class' in str(var): continue
-#
-#         if printable:
-#             print(" clear_all ------- %s" % str(var))
-#
-#         del var
-
-def set_gpu_fraction(sess=None, gpu_fraction=0.3):
-    """Set the GPU memory fraction for the application.
-
-    Parameters
-    ----------
-    sess : a session instance of TensorFlow
-        TensorFlow session
-    gpu_fraction : a float
-        Fraction of GPU memory, (0 ~ 1]
-
-    References
-    ----------
-    - `TensorFlow using GPU <https://www.tensorflow.org/versions/r0.9/how_tos/using_gpu/index.html>`_
-    """
-    print("  tensorlayer: GPU MEM Fraction %f" % gpu_fraction)
-    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction)
-    sess = tf.Session(config = tf.ConfigProto(gpu_options = gpu_options))
-    return sess
-
-
-
-
-
-def disable_print():
-    """Disable console output, ``suppress_stdout`` is recommended.
-
-    Examples
-    ---------
-    >>> print("You can see me")
-    >>> tl.ops.disable_print()
-    >>> print(" You can't see me")
-    >>> tl.ops.enable_print()
-    >>> print("You can see me")
-    """
-    # sys.stdout = os.devnull   # this one kill the process
-    sys.stdout = None
-    sys.stderr = os.devnull
-
-def enable_print():
-    """Enable console output, ``suppress_stdout`` is recommended.
-
-    Examples
-    --------
-    - see tl.ops.disable_print()
-    """
-    sys.stdout = sys.__stdout__
-    sys.stderr = sys.__stderr__
-
-
-# class temporary_disable_print:
-#     """Temporarily disable console output.
-#
-#     Examples
-#     ---------
-#     >>> print("You can see me")
-#     >>> with tl.ops.temporary_disable_print() as t:
-#     >>>     print("You can't see me")
-#     >>> print("You can see me")
-#     """
-#     def __init__(self):
-#         pass
-#     def __enter__(self):
-#         sys.stdout = None
-#         sys.stderr = os.devnull
-#     def __exit__(self, type, value, traceback):
-#         sys.stdout = sys.__stdout__
-#         sys.stderr = sys.__stderr__
-#         return isinstance(value, TypeError)
-
-
-from contextlib import contextmanager
-@contextmanager
-def suppress_stdout():
-    """Temporarily disable console output.
-
-    Examples
-    ---------
-    >>> print("You can see me")
-    >>> with tl.ops.suppress_stdout():
-    >>>     print("You can't see me")
-    >>> print("You can see me")
-
-    References
-    -----------
-    - `stackoverflow <http://stackoverflow.com/questions/2125702/how-to-suppress-console-output-in-python>`_
-    """
-    with open(os.devnull, "w") as devnull:
-        old_stdout = sys.stdout
-        sys.stdout = devnull
-        try:
-            yield
-        finally:
-            sys.stdout = old_stdout
-
-
-
-def get_site_packages_directory():
-    """Print and return the site-packages directory.
-
-    Examples
-    ---------
-    >>> loc = tl.ops.get_site_packages_directory()
-    """
-    import site
-    try:
-        loc = site.getsitepackages()
-        print("  tl.ops : site-packages in ", loc)
-        return loc
-    except:
-        print("  tl.ops : Cannot find package dir from virtual environment")
-        return False
-
-
-
-def empty_trash():
-    """Empty trash folder.
-
-    """
-    text = "[tl] Empty the trash"
-    if _platform == "linux" or _platform == "linux2":
-        print('linux: %s' % text)
-        os.system("rm -rf ~/.local/share/Trash/*")
-    elif _platform == "darwin":
-        print('OS X: %s' % text)
-        os.system("sudo rm -rf ~/.Trash/*")
-    elif _platform == "win32":
-        print('Windows: %s' % text)
-        try:
-            os.system("rd /s c:\$Recycle.Bin")  # Windows 7 or Server 2008
-        except:
-            pass
-        try:
-            os.system("rd /s c:\recycler")  #  Windows XP, Vista, or Server 2003
-        except:
-            pass
-    else:
-        print(_platform)
-
-#
diff --git a/_tensorlayer/prepro.py b/_tensorlayer/prepro.py
deleted file mode 100755
index 047d646..0000000
--- a/_tensorlayer/prepro.py
+++ /dev/null
@@ -1,1542 +0,0 @@
-#! /usr/bin/python
-# -*- coding: utf8 -*-
-
-
-import tensorflow as tf
-import tensorlayer as tl
-import numpy as np
-
-import time
-import numbers
-import random
-import os
-import re
-import sys
-
-import threading
-# import Queue  # <-- donot work for py3
-is_py2 = sys.version[0] == '2'
-if is_py2:
-    import Queue as queue
-else:
-    import queue as queue
-
-from six.moves import range
-import scipy
-from scipy import linalg
-import scipy.ndimage as ndi
-
-from skimage import transform
-from skimage import exposure
-import skimage
-
-# linalg https://docs.scipy.org/doc/scipy/reference/linalg.html
-# ndimage https://docs.scipy.org/doc/scipy/reference/ndimage.html
-
-## Threading
-def threading_data(data=None, fn=None, **kwargs):
-    """Return a batch of result by given data.
-    Usually be used for data augmentation.
-
-    Parameters
-    -----------
-    data : numpy array or zip of numpy array, see Examples below.
-    fn : the function for data processing.
-    more args : the args for fn, see Examples below.
-
-    Examples
-    --------
-    - Single array
-    >>> X --> [batch_size, row, col, 1] greyscale
-    >>> results = threading_data(X, zoom, zoom_range=[0.5, 1], is_random=True)
-    ... results --> [batch_size, row, col, channel]
-    >>> tl.visualize.images2d(images=np.asarray(results), second=0.01, saveable=True, name='after', dtype=None)
-    >>> tl.visualize.images2d(images=np.asarray(X), second=0.01, saveable=True, name='before', dtype=None)
-
-    - List of array (e.g. functions with ``multi``)
-    >>> X, Y --> [batch_size, row, col, 1]  greyscale
-    >>> data = threading_data([_ for _ in zip(X, Y)], zoom_multi, zoom_range=[0.5, 1], is_random=True)
-    ... data --> [batch_size, 2, row, col, 1]
-    >>> X_, Y_ = data.transpose((1,0,2,3,4))
-    ... X_, Y_ --> [batch_size, row, col, 1]
-    >>> tl.visualize.images2d(images=np.asarray(X_), second=0.01, saveable=True, name='after', dtype=None)
-    >>> tl.visualize.images2d(images=np.asarray(Y_), second=0.01, saveable=True, name='before', dtype=None)
-
-    - Customized function for image segmentation
-    >>> def distort_img(data):
-    ...     x, y = data
-    ...     x, y = flip_axis_multi([x, y], axis=0, is_random=True)
-    ...     x, y = flip_axis_multi([x, y], axis=1, is_random=True)
-    ...     x, y = crop_multi([x, y], 100, 100, is_random=True)
-    ...     return x, y
-    >>> X, Y --> [batch_size, row, col, channel]
-    >>> data = threading_data([_ for _ in zip(X, Y)], distort_img)
-    >>> X_, Y_ = data.transpose((1,0,2,3,4))
-
-    References
-    ----------
-    - `python queue <https://pymotw.com/2/Queue/index.html#module-Queue>`_
-    - `run with limited queue <http://effbot.org/librarybook/queue.htm>`_
-    """
-    ## plot function info
-    # for name, value in kwargs.items():
-    #     print('{0} = {1}'.format(name, value))
-    # exit()
-    # define function for threading
-    def apply_fn(results, i, data, kwargs):
-        results[i] = fn(data, **kwargs)
-
-    ## start multi-threaded reading.
-    results = [None] * len(data) ## preallocate result list
-    threads = []
-    for i in range(len(data)):
-        t = threading.Thread(
-                        name='threading_and_return',
-                        target=apply_fn,
-                        args=(results, i, data[i], kwargs)
-                        )
-        t.start()
-        threads.append(t)
-
-    ## <Milo> wait for all threads to complete
-    for t in threads:
-        t.join()
-
-    return np.asarray(results)
-
-    ## old implementation
-    # define function for threading
-    # def function(q, i, data, kwargs):
-    #     result = fn(data, **kwargs)
-    #     q.put([i, result])
-    # ## start threading
-    # q = queue.Queue()
-    # threads = []
-    # for i in range(len(data)):
-    #     t = threading.Thread(
-    #                     name='threading_and_return',
-    #                     target=function,
-    #                     args=(q, i, data[i], kwargs)
-    #                     )
-    #     t.start()
-    #     threads.append(t)
-    #
-    # ## <Milo> wait for all threads to complete
-    # for t in threads:
-    #     t.join()
-    #
-    # ## get results
-    # results = []
-    # for i in range(len(data)):
-    #     result = q.get()
-    #     results.append(result)
-    # results = sorted(results)
-    # for i in range(len(results)):
-    #     results[i] = results[i][1]
-    # return np.asarray(results)
-
-
-## Image
-def rotation(x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2,
-                    fill_mode='nearest', cval=0.):
-    """Rotate an image randomly or non-randomly.
-
-    Parameters
-    -----------
-    x : numpy array
-        An image with dimension of [row, col, channel] (default).
-    rg : int or float
-        Degree to rotate, usually 0 ~ 180.
-    is_random : boolean, default False
-        If True, randomly rotate.
-    row_index, col_index, channel_index : int
-        Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
-    fill_mode : string
-        Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’
-
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
-    cval : scalar, optional
-        Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0
-
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
-
-    Examples
-    ---------
-    >>> x --> [row, col, 1] greyscale
-    >>> x = rotation(x, rg=40, is_random=False)
-    >>> tl.visualize.frame(x[:,:,0], second=0.01, saveable=True, name='temp',cmap='gray')
-    """
-    if is_random:
-        theta = np.pi / 180 * np.random.uniform(-rg, rg)
-    else:
-        theta = np.pi /180 * rg
-    rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0],
-                                [np.sin(theta), np.cos(theta), 0],
-                                [0, 0, 1]])
-
-    h, w = x.shape[row_index], x.shape[col_index]
-    transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w)
-    x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
-    return x
-
-def rotation_multi(x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2,
-                    fill_mode='nearest', cval=0.):
-    """Rotate multiple images with the same arguments, randomly or non-randomly.
-    Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
-
-    Parameters
-    -----------
-    x : list of numpy array
-        List of images with dimension of [n_images, row, col, channel] (default).
-    others : see ``rotation``.
-
-    Examples
-    --------
-    >>> x, y --> [row, col, 1]  greyscale
-    >>> x, y = rotation_multi([x, y], rg=90, is_random=False)
-    >>> tl.visualize.frame(x[:,:,0], second=0.01, saveable=True, name='x',cmap='gray')
-    >>> tl.visualize.frame(y[:,:,0], second=0.01, saveable=True, name='y',cmap='gray')
-    """
-    if is_random:
-        theta = np.pi / 180 * np.random.uniform(-rg, rg)
-    else:
-        theta = np.pi /180 * rg
-    rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0],
-                                [np.sin(theta), np.cos(theta), 0],
-                                [0, 0, 1]])
-
-    h, w = x[0].shape[row_index], x[0].shape[col_index]
-    transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w)
-    results = []
-    for data in x:
-        results.append( apply_transform(data, transform_matrix, channel_index, fill_mode, cval))
-    return np.asarray(results)
-
-# crop
-def crop(x, wrg, hrg, is_random=False, row_index=0, col_index=1, channel_index=2):
-    """Randomly or centrally crop an image.
-
-    Parameters
-    ----------
-    x : numpy array
-        An image with dimension of [row, col, channel] (default).
-    wrg : float
-        Size of weight.
-    hrg : float
-        Size of height.
-    is_random : boolean, default False
-        If True, randomly crop, else central crop.
-    row_index, col_index, channel_index : int
-        Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
-    """
-    h, w = x.shape[row_index], x.shape[col_index]
-    assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image"
-    if is_random:
-        h_offset = int(np.random.uniform(0, h-hrg) -1)
-        w_offset = int(np.random.uniform(0, w-wrg) -1)
-        # print(h_offset, w_offset, x[h_offset: hrg+h_offset ,w_offset: wrg+w_offset].shape)
-        return x[h_offset: hrg+h_offset ,w_offset: wrg+w_offset]
-    else:   # central crop
-        h_offset = int(np.floor((h - hrg)/2.))
-        w_offset = int(np.floor((w - wrg)/2.))
-        h_end = h_offset + hrg
-        w_end = w_offset + wrg
-        return x[h_offset: h_end, w_offset: w_end]
-        # old implementation
-        # h_offset = (h - hrg)/2
-        # w_offset = (w - wrg)/2
-        # # print(x[h_offset: h-h_offset ,w_offset: w-w_offset].shape)
-        # return x[h_offset: h-h_offset ,w_offset: w-w_offset]
-        # central crop
-
-
-def crop_multi(x, wrg, hrg, is_random=False, row_index=0, col_index=1, channel_index=2):
-    """Randomly or centrally crop multiple images.
-
-    Parameters
-    ----------
-    x : list of numpy array
-        List of images with dimension of [n_images, row, col, channel] (default).
-    others : see ``crop``.
-    """
-    h, w = x[0].shape[row_index], x[0].shape[col_index]
-    assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image"
-    if is_random:
-        h_offset = int(np.random.uniform(0, h-hrg) -1)
-        w_offset = int(np.random.uniform(0, w-wrg) -1)
-        results = []
-        for data in x:
-            results.append( data[h_offset: hrg+h_offset ,w_offset: wrg+w_offset])
-        return np.asarray(results)
-    else:
-        # central crop
-        h_offset = (h - hrg)/2
-        w_offset = (w - wrg)/2
-        results = []
-        for data in x:
-            results.append( data[h_offset: h-h_offset ,w_offset: w-w_offset] )
-        return np.asarray(results)
-
-# flip
-def flip_axis(x, axis, is_random=False):
-    """Flip the axis of an image, such as flip left and right, up and down, randomly or non-randomly,
-
-    Parameters
-    ----------
-    x : numpy array
-        An image with dimension of [row, col, channel] (default).
-    axis : int
-        - 0, flip up and down
-        - 1, flip left and right
-        - 2, flip channel
-    is_random : boolean, default False
-        If True, randomly flip.
-    """
-    if is_random:
-        factor = np.random.uniform(-1, 1)
-        if factor > 0:
-            x = np.asarray(x).swapaxes(axis, 0)
-            x = x[::-1, ...]
-            x = x.swapaxes(0, axis)
-            return x
-        else:
-            return x
-    else:
-        x = np.asarray(x).swapaxes(axis, 0)
-        x = x[::-1, ...]
-        x = x.swapaxes(0, axis)
-        return x
-
-def flip_axis_multi(x, axis, is_random=False):
-    """Flip the axises of multiple images together, such as flip left and right, up and down, randomly or non-randomly,
-
-    Parameters
-    -----------
-    x : list of numpy array
-        List of images with dimension of [n_images, row, col, channel] (default).
-    others : see ``flip_axis``.
-    """
-    if is_random:
-        factor = np.random.uniform(-1, 1)
-        if factor > 0:
-            # x = np.asarray(x).swapaxes(axis, 0)
-            # x = x[::-1, ...]
-            # x = x.swapaxes(0, axis)
-            # return x
-            results = []
-            for data in x:
-                data = np.asarray(data).swapaxes(axis, 0)
-                data = data[::-1, ...]
-                data = data.swapaxes(0, axis)
-                results.append( data )
-            return np.asarray(results)
-        else:
-            return np.asarray(x)
-    else:
-        # x = np.asarray(x).swapaxes(axis, 0)
-        # x = x[::-1, ...]
-        # x = x.swapaxes(0, axis)
-        # return x
-        results = []
-        for data in x:
-            data = np.asarray(data).swapaxes(axis, 0)
-            data = data[::-1, ...]
-            data = data.swapaxes(0, axis)
-            results.append( data )
-        return np.asarray(results)
-
-# shift
-def shift(x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channel_index=2,
-                 fill_mode='nearest', cval=0.):
-    """Shift an image randomly or non-randomly.
-
-    Parameters
-    -----------
-    x : numpy array
-        An image with dimension of [row, col, channel] (default).
-    wrg : float
-        Percentage of shift in axis x, usually -0.25 ~ 0.25.
-    hrg : float
-        Percentage of shift in axis y, usually -0.25 ~ 0.25.
-    is_random : boolean, default False
-        If True, randomly shift.
-    row_index, col_index, channel_index : int
-        Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
-    fill_mode : string
-        Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’.
-
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
-    cval : scalar, optional
-        Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0.
-
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
-    """
-    h, w = x.shape[row_index], x.shape[col_index]
-    if is_random:
-        tx = np.random.uniform(-hrg, hrg) * h
-        ty = np.random.uniform(-wrg, wrg) * w
-    else:
-        tx, ty = hrg * h, wrg * w
-    translation_matrix = np.array([[1, 0, tx],
-                                   [0, 1, ty],
-                                   [0, 0, 1]])
-
-    transform_matrix = translation_matrix  # no need to do offset
-    x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
-    return x
-
-def shift_multi(x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channel_index=2,
-                 fill_mode='nearest', cval=0.):
-    """Shift images with the same arguments, randomly or non-randomly.
-    Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
-
-    Parameters
-    -----------
-    x : list of numpy array
-        List of images with dimension of [n_images, row, col, channel] (default).
-    others : see ``shift``.
-    """
-    h, w = x[0].shape[row_index], x[0].shape[col_index]
-    if is_random:
-        tx = np.random.uniform(-hrg, hrg) * h
-        ty = np.random.uniform(-wrg, wrg) * w
-    else:
-        tx, ty = hrg * h, wrg * w
-    translation_matrix = np.array([[1, 0, tx],
-                                   [0, 1, ty],
-                                   [0, 0, 1]])
-
-    transform_matrix = translation_matrix  # no need to do offset
-    results = []
-    for data in x:
-        results.append( apply_transform(data, transform_matrix, channel_index, fill_mode, cval))
-    return np.asarray(results)
-
-# shear
-def shear(x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_index=2,
-                 fill_mode='nearest', cval=0.):
-    """Shear an image randomly or non-randomly.
-
-    Parameters
-    -----------
-    x : numpy array
-        An image with dimension of [row, col, channel] (default).
-    intensity : float
-        Percentage of shear, usually -0.5 ~ 0.5 (is_random==True), 0 ~ 0.5 (is_random==False),
-        you can have a quick try by shear(X, 1).
-    is_random : boolean, default False
-        If True, randomly shear.
-    row_index, col_index, channel_index : int
-        Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
-    fill_mode : string
-        Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’.
-
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
-    cval : scalar, optional
-        Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0.
-
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
-    """
-    if is_random:
-        shear = np.random.uniform(-intensity, intensity)
-    else:
-        shear = intensity
-    shear_matrix = np.array([[1, -np.sin(shear), 0],
-                             [0, np.cos(shear), 0],
-                             [0, 0, 1]])
-
-    h, w = x.shape[row_index], x.shape[col_index]
-    transform_matrix = transform_matrix_offset_center(shear_matrix, h, w)
-    x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
-    return x
-
-def shear_multi(x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_index=2,
-                 fill_mode='nearest', cval=0.):
-    """Shear images with the same arguments, randomly or non-randomly.
-    Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
-
-    Parameters
-    -----------
-    x : list of numpy array
-        List of images with dimension of [n_images, row, col, channel] (default).
-    others : see ``shear``.
-    """
-    if is_random:
-        shear = np.random.uniform(-intensity, intensity)
-    else:
-        shear = intensity
-    shear_matrix = np.array([[1, -np.sin(shear), 0],
-                             [0, np.cos(shear), 0],
-                             [0, 0, 1]])
-
-    h, w = x[0].shape[row_index], x[0].shape[col_index]
-    transform_matrix = transform_matrix_offset_center(shear_matrix, h, w)
-    results = []
-    for data in x:
-        results.append( apply_transform(data, transform_matrix, channel_index, fill_mode, cval))
-    return np.asarray(results)
-
-# swirl
-def swirl(x, center=None, strength=1, radius=100, rotation=0, output_shape=None, order=1, mode='constant', cval=0, clip=True, preserve_range=False, is_random=False):
-    """Swirl an image randomly or non-randomly, see `scikit-image swirl API <http://scikit-image.org/docs/dev/api/skimage.transform.html#skimage.transform.swirl>`_
-    and `example <http://scikit-image.org/docs/dev/auto_examples/plot_swirl.html>`_.
-
-    Parameters
-    -----------
-    x : numpy array
-        An image with dimension of [row, col, channel] (default).
-    center : (row, column) tuple or (2,) ndarray, optional
-        Center coordinate of transformation.
-    strength : float, optional
-        The amount of swirling applied.
-    radius : float, optional
-        The extent of the swirl in pixels. The effect dies out rapidly beyond radius.
-    rotation : float, (degree) optional
-        Additional rotation applied to the image, usually [0, 360], relates to center.
-    output_shape : tuple (rows, cols), optional
-        Shape of the output image generated. By default the shape of the input image is preserved.
-    order : int, optional
-        The order of the spline interpolation, default is 1. The order has to be in the range 0-5. See skimage.transform.warp for detail.
-    mode : {‘constant’, ‘edge’, ‘symmetric’, ‘reflect’, ‘wrap’}, optional
-        Points outside the boundaries of the input are filled according to the given mode, with ‘constant’ used as the default. Modes match the behaviour of numpy.pad.
-    cval : float, optional
-        Used in conjunction with mode ‘constant’, the value outside the image boundaries.
-    clip : bool, optional
-        Whether to clip the output to the range of values of the input image. This is enabled by default, since higher order interpolation may produce values outside the given input range.
-    preserve_range : bool, optional
-        Whether to keep the original range of values. Otherwise, the input image is converted according to the conventions of img_as_float.
-    is_random : boolean, default False
-        If True, random swirl.
-            - random center = [(0 ~ x.shape[0]), (0 ~ x.shape[1])]
-            - random strength = [0, strength]
-            - random radius = [1e-10, radius]
-            - random rotation = [-rotation, rotation]
-
-    Examples
-    ---------
-    >>> x --> [row, col, 1] greyscale
-    >>> x = swirl(x, strength=4, radius=100)
-    """
-    assert radius != 0, Exception("Invalid radius value")
-    rotation = np.pi / 180 * rotation
-    if is_random:
-        center_h = int(np.random.uniform(0, x.shape[0]))
-        center_w = int(np.random.uniform(0, x.shape[1]))
-        center = (center_h, center_w)
-        strength = np.random.uniform(0, strength)
-        radius = np.random.uniform(1e-10, radius)
-        rotation = np.random.uniform(-rotation, rotation)
-
-    max_v = np.max(x)
-    if max_v > 1:   # Note: the input of this fn should be [-1, 1], rescale is required.
-        x = x / max_v
-    swirled = skimage.transform.swirl(x, center=center, strength=strength, radius=radius, rotation=rotation,
-        output_shape=output_shape, order=order, mode=mode, cval=cval, clip=clip, preserve_range=preserve_range)
-    if max_v > 1:
-        swirled = swirled * max_v
-    return swirled
-
-def swirl_multi(x, center=None, strength=1, radius=100, rotation=0, output_shape=None, order=1, mode='constant', cval=0, clip=True, preserve_range=False, is_random=False):
-    """Swirl multiple images with the same arguments, randomly or non-randomly.
-    Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
-
-    Parameters
-    -----------
-    x : list of numpy array
-        List of images with dimension of [n_images, row, col, channel] (default).
-    others : see ``swirl``.
-    """
-    assert radius != 0, Exception("Invalid radius value")
-    rotation = np.pi / 180 * rotation
-    if is_random:
-        center_h = int(np.random.uniform(0, x[0].shape[0]))
-        center_w = int(np.random.uniform(0, x[0].shape[1]))
-        center = (center_h, center_w)
-        strength = np.random.uniform(0, strength)
-        radius = np.random.uniform(1e-10, radius)
-        rotation = np.random.uniform(-rotation, rotation)
-
-    results = []
-    for data in x:
-        max_v = np.max(data)
-        if max_v > 1:   # Note: the input of this fn should be [-1, 1], rescale is required.
-            data = data / max_v
-        swirled = skimage.transform.swirl(data, center=center, strength=strength, radius=radius, rotation=rotation,
-            output_shape=output_shape, order=order, mode=mode, cval=cval, clip=clip, preserve_range=preserve_range)
-        if max_v > 1:
-            swirled = swirled * max_v
-        results.append( swirled )
-    return np.asarray(results)
-
-# elastic_transform
-
-from scipy.ndimage.interpolation import map_coordinates
-from scipy.ndimage.filters import gaussian_filter
-def elastic_transform(x, alpha, sigma, mode="constant", cval=0, is_random=False):
-    """Elastic deformation of images as described in `[Simard2003] <http://deeplearning.cs.cmu.edu/pdfs/Simard.pdf>`_ .
-
-    Parameters
-    -----------
-    x : numpy array, a greyscale image.
-    alpha : scalar factor.
-    sigma : scalar or sequence of scalars, the smaller the sigma, the more transformation.
-        Standard deviation for Gaussian kernel. The standard deviations of the Gaussian filter are given for each axis as a sequence, or as a single number, in which case it is equal for all axes.
-    mode : default constant, see `scipy.ndimage.filters.gaussian_filter <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.filters.gaussian_filter.html>`_.
-    cval : float, optional. Used in conjunction with mode ‘constant’, the value outside the image boundaries.
-    is_random : boolean, default False
-
-    Examples
-    ---------
-    >>> x = elastic_transform(x, alpha = x.shape[1] * 3, sigma = x.shape[1] * 0.07)
-
-    References
-    ------------
-    - `Github <https://gist.github.com/chsasank/4d8f68caf01f041a6453e67fb30f8f5a>`_.
-    - `Kaggle <https://www.kaggle.com/pscion/ultrasound-nerve-segmentation/elastic-transform-for-data-augmentation-0878921a>`_
-    """
-    if is_random is False:
-        random_state = np.random.RandomState(None)
-    else:
-        random_state = np.random.RandomState(int(time.time()))
-    #
-    is_3d = False
-    if len(x.shape) == 3 and x.shape[-1] == 1:
-        x = x[:,:,0]
-        is_3d = True
-    elif len(x.shape) == 3 and x.shape[-1] != 1:
-        raise Exception("Only support greyscale image")
-    assert len(x.shape)==2
-
-    shape = x.shape
-
-    dx = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma, mode=mode, cval=cval) * alpha
-    dy = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma, mode=mode, cval=cval) * alpha
-
-    x_, y_ = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), indexing='ij')
-    indices = np.reshape(x_ + dx, (-1, 1)), np.reshape(y_ + dy, (-1, 1))
-    if is_3d:
-        return map_coordinates(x, indices, order=1).reshape((shape[0], shape[1], 1))
-    else:
-        return map_coordinates(x, indices, order=1).reshape(shape)
-
-def elastic_transform_multi(x, alpha, sigma, mode="constant", cval=0, is_random=False):
-    """Elastic deformation of images as described in `[Simard2003] <http://deeplearning.cs.cmu.edu/pdfs/Simard.pdf>`_.
-
-    Parameters
-    -----------
-    x : list of numpy array
-    others : see ``elastic_transform``.
-    """
-    if is_random is False:
-        random_state = np.random.RandomState(None)
-    else:
-        random_state = np.random.RandomState(int(time.time()))
-
-    shape = x[0].shape
-    if len(shape) == 3:
-        shape = (shape[0], shape[1])
-    new_shape = random_state.rand(*shape)
-
-    results = []
-    for data in x:
-        is_3d = False
-        if len(data.shape) == 3 and data.shape[-1] == 1:
-            data = data[:,:,0]
-            is_3d = True
-        elif len(data.shape) == 3 and data.shape[-1] != 1:
-            raise Exception("Only support greyscale image")
-        assert len(data.shape)==2
-
-        dx = gaussian_filter((new_shape * 2 - 1), sigma, mode=mode, cval=cval) * alpha
-        dy = gaussian_filter((new_shape * 2 - 1), sigma, mode=mode, cval=cval) * alpha
-
-        x_, y_ = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), indexing='ij')
-        indices = np.reshape(x_ + dx, (-1, 1)), np.reshape(y_ + dy, (-1, 1))
-        # print(data.shape)
-        if is_3d:
-            results.append( map_coordinates(data, indices, order=1).reshape((shape[0], shape[1], 1)))
-        else:
-            results.append( map_coordinates(data, indices, order=1).reshape(shape) )
-    return np.asarray(results)
-
-# zoom
-def zoom(x, zoom_range=(0.9, 1.1), is_random=False, row_index=0, col_index=1, channel_index=2,
-                fill_mode='nearest', cval=0.):
-    """Zoom in and out of a single image, randomly or non-randomly.
-
-    Parameters
-    -----------
-    x : numpy array
-        An image with dimension of [row, col, channel] (default).
-    zoom_range : list or tuple
-        - If is_random=False, (h, w) are the fixed zoom factor for row and column axies, factor small than one is zoom in.
-        - If is_random=True, (min zoom out, max zoom out) for x and y with different random zoom in/out factor.
-        e.g (0.5, 1) zoom in 1~2 times.
-    is_random : boolean, default False
-        If True, randomly zoom.
-    row_index, col_index, channel_index : int
-        Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
-    fill_mode : string
-        Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’.
-
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
-    cval : scalar, optional
-        Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0.
-
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
-    """
-    if len(zoom_range) != 2:
-        raise Exception('zoom_range should be a tuple or list of two floats. '
-                        'Received arg: ', zoom_range)
-    if is_random:
-        if zoom_range[0] == 1 and zoom_range[1] == 1:
-            zx, zy = 1, 1
-            print(" random_zoom : not zoom in/out")
-        else:
-            zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2)
-    else:
-        zx, zy = zoom_range
-    # print(zx, zy)
-    zoom_matrix = np.array([[zx, 0, 0],
-                            [0, zy, 0],
-                            [0, 0, 1]])
-
-    h, w = x.shape[row_index], x.shape[col_index]
-    transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w)
-    x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
-    return x
-
-def zoom_multi(x, zoom_range=(0.9, 1.1), is_random=False,
-        row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0.):
-    """Zoom in and out of images with the same arguments, randomly or non-randomly.
-    Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
-
-    Parameters
-    -----------
-    x : list of numpy array
-        List of images with dimension of [n_images, row, col, channel] (default).
-    others : see ``zoom``.
-    """
-    if len(zoom_range) != 2:
-        raise Exception('zoom_range should be a tuple or list of two floats. '
-                        'Received arg: ', zoom_range)
-
-    if is_random:
-        if zoom_range[0] == 1 and zoom_range[1] == 1:
-            zx, zy = 1, 1
-            print(" random_zoom : not zoom in/out")
-        else:
-            zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2)
-    else:
-        zx, zy = zoom_range
-
-    zoom_matrix = np.array([[zx, 0, 0],
-                            [0, zy, 0],
-                            [0, 0, 1]])
-
-    h, w = x[0].shape[row_index], x[0].shape[col_index]
-    transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w)
-    # x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
-    # return x
-    results = []
-    for data in x:
-        results.append( apply_transform(data, transform_matrix, channel_index, fill_mode, cval))
-    return np.asarray(results)
-
-# image = tf.image.random_brightness(image, max_delta=32. / 255.)
-# image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
-# image = tf.image.random_hue(image, max_delta=0.032)
-# image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
-
-# brightness
-def brightness(x, gamma=1, gain=1, is_random=False):
-    """Change the brightness of a single image, randomly or non-randomly.
-
-    Parameters
-    -----------
-    x : numpy array
-        An image with dimension of [row, col, channel] (default).
-    gamma : float, small than 1 means brighter.
-        Non negative real number. Default value is 1.
-
-        - If is_random is True, gamma in a range of (1-gamma, 1+gamma).
-    gain : float
-        The constant multiplier. Default value is 1.
-    is_random : boolean, default False
-        - If True, randomly change brightness.
-
-    References
-    -----------
-    - `skimage.exposure.adjust_gamma <http://scikit-image.org/docs/dev/api/skimage.exposure.html>`_
-    - `chinese blog <http://www.cnblogs.com/denny402/p/5124402.html>`_
-    """
-    if is_random:
-        gamma = np.random.uniform(1-gamma, 1+gamma)
-    x = exposure.adjust_gamma(x, gamma, gain)
-    return x
-
-def brightness_multi(x, gamma=1, gain=1, is_random=False):
-    """Change the brightness of multiply images, randomly or non-randomly.
-    Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
-
-    Parameters
-    -----------
-    x : list of numpy array
-        List of images with dimension of [n_images, row, col, channel] (default).
-    others : see ``brightness``.
-    """
-    if is_random:
-        gamma = np.random.uniform(1-gamma, 1+gamma)
-
-    results = []
-    for data in x:
-        results.append( exposure.adjust_gamma(data, gamma, gain) )
-    return np.asarray(results)
-
-
-# contrast
-def constant(x, cutoff=0.5, gain=10, inv=False, is_random=False):
-    # TODO
-    x = exposure.adjust_sigmoid(x, cutoff=cutoff, gain=gain, inv=inv)
-    return x
-
-def constant_multi():
-    #TODO
-    pass
-
-# resize
-def imresize(x, size=[100, 100], interp='bilinear', mode=None):
-    """Resize an image by given output size and method. Warning, this function
-    will rescale the value to [0, 255].
-
-    Parameters
-    -----------
-    x : numpy array
-        An image with dimension of [row, col, channel] (default).
-    size : int, float or tuple (h, w)
-        - int, Percentage of current size.
-        - float, Fraction of current size.
-        - tuple, Size of the output image.
-    interp : str, optional
-        Interpolation to use for re-sizing (‘nearest’, ‘lanczos’, ‘bilinear’, ‘bicubic’ or ‘cubic’).
-    mode : str, optional
-        The PIL image mode (‘P’, ‘L’, etc.) to convert arr before resizing.
-
-    Returns
-    --------
-    imresize : ndarray
-    The resized array of image.
-
-    References
-    ------------
-    - `scipy.misc.imresize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.misc.imresize.html>`_
-    """
-    if x.shape[-1] == 1:
-        # greyscale
-        x = scipy.misc.imresize(x[:,:,0], size, interp=interp, mode=mode)
-        return x[:, :, np.newaxis]
-    elif x.shape[-1] == 3:
-        # rgb, bgr ..
-        return scipy.misc.imresize(x, size, interp=interp, mode=mode)
-    else:
-        raise Exception("Unsupported channel %d" % x.shape[-1])
-
-# normailization
-def samplewise_norm(x, rescale=None, samplewise_center=False, samplewise_std_normalization=False,
-            channel_index=2, epsilon=1e-7):
-    """Normalize an image by rescale, samplewise centering and samplewise centering in order.
-
-    Parameters
-    -----------
-    x : numpy array
-        An image with dimension of [row, col, channel] (default).
-    rescale : rescaling factor.
-            If None or 0, no rescaling is applied, otherwise we multiply the data by the value provided (before applying any other transformation)
-    samplewise_center : set each sample mean to 0.
-    samplewise_std_normalization : divide each input by its std.
-    epsilon : small position value for dividing standard deviation.
-
-    Examples
-    --------
-    >>> x = samplewise_norm(x, samplewise_center=True, samplewise_std_normalization=True)
-    >>> print(x.shape, np.mean(x), np.std(x))
-    ... (160, 176, 1), 0.0, 1.0
-
-    Notes
-    ------
-    When samplewise_center and samplewise_std_normalization are True.
-
-    - For greyscale image, every pixels are subtracted and divided by the mean and std of whole image.
-    - For RGB image, every pixels are subtracted and divided by the mean and std of this pixel i.e. the mean and std of a pixel is 0 and 1.
-    """
-    if rescale:
-        x *= rescale
-
-    if x.shape[channel_index] == 1:
-        # greyscale
-        if samplewise_center:
-            x = x - np.mean(x)
-        if samplewise_std_normalization:
-            x = x / np.std(x)
-        return x
-    elif x.shape[channel_index] == 3:
-        # rgb
-        if samplewise_center:
-            x = x - np.mean(x, axis=channel_index, keepdims=True)
-        if samplewise_std_normalization:
-            x = x / (np.std(x, axis=channel_index, keepdims=True) + epsilon)
-        return x
-    else:
-        raise Exception("Unsupported channels %d" % x.shape[channel_index])
-
-def featurewise_norm(x, mean=None, std=None, epsilon=1e-7):
-    """Normalize every pixels by the same given mean and std, which are usually
-    compute from all examples.
-
-    Parameters
-    -----------
-    x : numpy array
-        An image with dimension of [row, col, channel] (default).
-    mean : value for subtraction.
-    std : value for division.
-    epsilon : small position value for dividing standard deviation.
-    """
-    if mean:
-        x = x - mean
-    if std:
-        x = x / (std + epsilon)
-    return x
-
-# whitening
-def get_zca_whitening_principal_components_img(X):
-    """Return the ZCA whitening principal components matrix.
-
-    Parameters
-    -----------
-    x : numpy array
-        Batch of image with dimension of [n_example, row, col, channel] (default).
-    """
-    flatX = np.reshape(X, (X.shape[0], X.shape[1] * X.shape[2] * X.shape[3]))
-    print("zca : computing sigma ..")
-    sigma = np.dot(flatX.T, flatX) / flatX.shape[0]
-    print("zca : computing U, S and V ..")
-    U, S, V = linalg.svd(sigma)
-    print("zca : computing principal components ..")
-    principal_components = np.dot(np.dot(U, np.diag(1. / np.sqrt(S + 10e-7))), U.T)
-    return principal_components
-
-def zca_whitening(x, principal_components):
-    """Apply ZCA whitening on an image by given principal components matrix.
-
-    Parameters
-    -----------
-    x : numpy array
-        An image with dimension of [row, col, channel] (default).
-    principal_components : matrix from ``get_zca_whitening_principal_components_img``.
-    """
-    # flatx = np.reshape(x, (x.size))
-    print(principal_components.shape, x.shape)  # ((28160, 28160), (160, 176, 1))
-    # flatx = np.reshape(x, (x.shape))
-    # flatx = np.reshape(x, (x.shape[0], ))
-    print(flatx.shape)  # (160, 176, 1)
-    whitex = np.dot(flatx, principal_components)
-    x = np.reshape(whitex, (x.shape[0], x.shape[1], x.shape[2]))
-    return x
-
-# developing
-# def barrel_transform(x, intensity):
-#     # https://github.com/fchollet/keras/blob/master/keras/preprocessing/image.py
-#     # TODO
-#     pass
-#
-# def barrel_transform_multi(x, intensity):
-#     # https://github.com/fchollet/keras/blob/master/keras/preprocessing/image.py
-#     # TODO
-#     pass
-
-# channel shift
-def channel_shift(x, intensity, is_random=False, channel_index=2):
-    """Shift the channels of an image, randomly or non-randomly, see `numpy.rollaxis <https://docs.scipy.org/doc/numpy/reference/generated/numpy.rollaxis.html>`_.
-
-    Parameters
-    -----------
-    x : numpy array
-        An image with dimension of [row, col, channel] (default).
-    intensity : float
-        Intensity of shifting.
-    is_random : boolean, default False
-        If True, randomly shift.
-    channel_index : int
-        Index of channel, default 2.
-    """
-    if is_random:
-        factor = np.random.uniform(-intensity, intensity)
-    else:
-        factor = intensity
-    x = np.rollaxis(x, channel_index, 0)
-    min_x, max_x = np.min(x), np.max(x)
-    channel_images = [np.clip(x_channel + factor, min_x, max_x)
-                      for x_channel in x]
-    x = np.stack(channel_images, axis=0)
-    x = np.rollaxis(x, 0, channel_index+1)
-    return x
-    # x = np.rollaxis(x, channel_index, 0)
-    # min_x, max_x = np.min(x), np.max(x)
-    # channel_images = [np.clip(x_channel + np.random.uniform(-intensity, intensity), min_x, max_x)
-    #                   for x_channel in x]
-    # x = np.stack(channel_images, axis=0)
-    # x = np.rollaxis(x, 0, channel_index+1)
-    # return x
-
-def channel_shift_multi(x, intensity, channel_index=2):
-    """Shift the channels of images with the same arguments, randomly or non-randomly, see `numpy.rollaxis <https://docs.scipy.org/doc/numpy/reference/generated/numpy.rollaxis.html>`_ .
-    Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
-
-    Parameters
-    -----------
-    x : list of numpy array
-        List of images with dimension of [n_images, row, col, channel] (default).
-    others : see ``channel_shift``.
-    """
-    if is_random:
-        factor = np.random.uniform(-intensity, intensity)
-    else:
-        factor = intensity
-
-    results = []
-    for data in x:
-        data = np.rollaxis(data, channel_index, 0)
-        min_x, max_x = np.min(data), np.max(data)
-        channel_images = [np.clip(x_channel + factor, min_x, max_x)
-                          for x_channel in x]
-        data = np.stack(channel_images, axis=0)
-        data = np.rollaxis(x, 0, channel_index+1)
-        results.append( data )
-    return np.asarray(results)
-
-# noise
-def drop(x, keep=0.5):
-    """Randomly set some pixels to zero by a given keeping probability.
-
-    Parameters
-    -----------
-    x : numpy array
-        An image with dimension of [row, col, channel] or [row, col].
-    keep : float (0, 1)
-        The keeping probability, the lower more values will be set to zero.
-    """
-    if len(x.shape) == 3:
-        if x.shape[-1]==3: # color
-            img_size = x.shape
-            mask = np.random.binomial(n=1, p=keep, size=x.shape[:-1])
-            for i in range(3):
-                x[:,:,i] = np.multiply(x[:,:,i] , mask)
-        elif x.shape[-1]==1: # greyscale image
-            img_size = x.shape
-            x = np.multiply(x , np.random.binomial(n=1, p=keep, size=img_size))
-        else:
-            raise Exception("Unsupported shape {}".format(x.shape))
-    elif len(x.shape) == 2 or 1: # greyscale matrix (image) or vector
-        img_size = x.shape
-        x = np.multiply(x , np.random.binomial(n=1, p=keep, size=img_size))
-    else:
-        raise Exception("Unsupported shape {}".format(x.shape))
-    return x
-
-# x = np.asarray([[1,2,3,4,5,6,7,8,9,10],[1,2,3,4,5,6,7,8,9,10]])
-# x = np.asarray([x,x,x,x,x,x])
-# x.shape = 10, 4, 3
-# # print(x)
-# # exit()
-# print(x.shape)
-# # exit()
-# print(drop(x, keep=1.))
-# exit()
-
-# manual transform
-def transform_matrix_offset_center(matrix, x, y):
-    """Return transform matrix offset center.
-
-    Parameters
-    ----------
-    matrix : numpy array
-        Transform matrix
-    x, y : int
-        Size of image.
-
-    Examples
-    --------
-    - See ``rotation``, ``shear``, ``zoom``.
-    """
-    o_x = float(x) / 2 + 0.5
-    o_y = float(y) / 2 + 0.5
-    offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
-    reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
-    transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix)
-    return transform_matrix
-
-
-def apply_transform(x, transform_matrix, channel_index=2, fill_mode='nearest', cval=0.):
-    """Return transformed images by given transform_matrix from ``transform_matrix_offset_center``.
-
-    Parameters
-    ----------
-    x : numpy array
-        Batch of images with dimension of 3, [batch_size, row, col, channel].
-    transform_matrix : numpy array
-        Transform matrix (offset center), can be generated by ``transform_matrix_offset_center``
-    channel_index : int
-        Index of channel, default 2.
-    fill_mode : string
-        Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’
-
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
-    cval : scalar, optional
-        Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0
-
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
-
-    Examples
-    --------
-    - See ``rotation``, ``shift``, ``shear``, ``zoom``.
-    """
-    x = np.rollaxis(x, channel_index, 0)
-    final_affine_matrix = transform_matrix[:2, :2]
-    final_offset = transform_matrix[:2, 2]
-    channel_images = [ndi.interpolation.affine_transform(x_channel, final_affine_matrix,
-                      final_offset, order=0, mode=fill_mode, cval=cval) for x_channel in x]
-    x = np.stack(channel_images, axis=0)
-    x = np.rollaxis(x, 0, channel_index+1)
-    return x
-
-
-def projective_transform_by_points(x, src, dst, map_args={}, output_shape=None, order=1, mode='constant', cval=0.0, clip=True, preserve_range=False):
-    """Projective transform by given coordinates, usually 4 coordinates. see `scikit-image <http://scikit-image.org/docs/dev/auto_examples/applications/plot_geometric.html>`_.
-
-    Parameters
-    -----------
-    x : numpy array
-        An image with dimension of [row, col, channel] (default).
-    src : list or numpy
-        The original coordinates, usually 4 coordinates of (x, y).
-    dst : list or numpy
-        The coordinates after transformation, the number of coordinates is the same with src.
-    map_args : dict, optional
-        Keyword arguments passed to inverse_map.
-    output_shape : tuple (rows, cols), optional
-        Shape of the output image generated. By default the shape of the input image is preserved. Note that, even for multi-band images, only rows and columns need to be specified.
-    order : int, optional
-        The order of interpolation. The order has to be in the range 0-5:
-
-        - 0 Nearest-neighbor
-        - 1 Bi-linear (default)
-        - 2 Bi-quadratic
-        - 3 Bi-cubic
-        - 4 Bi-quartic
-        - 5 Bi-quintic
-    mode : {‘constant’, ‘edge’, ‘symmetric’, ‘reflect’, ‘wrap’}, optional
-        Points outside the boundaries of the input are filled according to the given mode. Modes match the behaviour of numpy.pad.
-    cval : float, optional
-        Used in conjunction with mode ‘constant’, the value outside the image boundaries.
-    clip : bool, optional
-        Whether to clip the output to the range of values of the input image. This is enabled by default, since higher order interpolation may produce values outside the given input range.
-    preserve_range : bool, optional
-        Whether to keep the original range of values. Otherwise, the input image is converted according to the conventions of img_as_float.
-
-    Examples
-    --------
-    >>> Assume X is an image from CIFAR 10, i.e. shape == (32, 32, 3)
-    >>> src = [[0,0],[0,32],[32,0],[32,32]]
-    >>> dst = [[10,10],[0,32],[32,0],[32,32]]
-    >>> x = projective_transform_by_points(X, src, dst)
-
-    References
-    -----------
-    - `scikit-image : geometric transformations <http://scikit-image.org/docs/dev/auto_examples/applications/plot_geometric.html>`_
-    - `scikit-image : examples <http://scikit-image.org/docs/dev/auto_examples/index.html>`_
-    """
-    if type(src) is list:   # convert to numpy
-        src = np.array(src)
-    if type(dst) is list:
-        dst = np.array(dst)
-    if np.max(x)>1:         # convert to [0, 1]
-        x = x/255
-
-    m = transform.ProjectiveTransform()
-    m.estimate(dst, src)
-    warped = transform.warp(x, m,  map_args=map_args, output_shape=output_shape, order=order, mode=mode, cval=cval, clip=clip, preserve_range=preserve_range)
-    return warped
-
-# Numpy and PIL
-def array_to_img(x, dim_ordering=(0,1,2), scale=True):
-    """Converts a numpy array to PIL image object (uint8 format).
-
-    Parameters
-    ----------
-    x : numpy array
-        A image with dimension of 3 and channels of 1 or 3.
-    dim_ordering : list or tuple of 3 int
-        Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
-    scale : boolean, default is True
-        If True, converts image to [0, 255] from any range of value like [-1, 2].
-
-    References
-    -----------
-    - `PIL Image.fromarray <http://pillow.readthedocs.io/en/3.1.x/reference/Image.html?highlight=fromarray>`_
-    """
-    from PIL import Image
-    # if dim_ordering == 'default':
-    #     dim_ordering = K.image_dim_ordering()
-    # if dim_ordering == 'th':  # theano
-    #     x = x.transpose(1, 2, 0)
-    x = x.transpose(dim_ordering)
-    if scale:
-        x += max(-np.min(x), 0)
-        x_max = np.max(x)
-        if x_max != 0:
-            # print(x_max)
-            # x /= x_max
-            x = x / x_max
-        x *= 255
-    if x.shape[2] == 3:
-        # RGB
-        return Image.fromarray(x.astype('uint8'), 'RGB')
-    elif x.shape[2] == 1:
-        # grayscale
-        return Image.fromarray(x[:, :, 0].astype('uint8'), 'L')
-    else:
-        raise Exception('Unsupported channel number: ', x.shape[2])
-
-
-## Sequence
-def pad_sequences(sequences, maxlen=None, dtype='int32', padding='post', truncating='pre', value=0.):
-    """Pads each sequence to the same length:
-    the length of the longest sequence.
-    If maxlen is provided, any sequence longer
-    than maxlen is truncated to maxlen.
-    Truncation happens off either the beginning (default) or
-    the end of the sequence.
-    Supports post-padding and pre-padding (default).
-
-    Parameters
-    ----------
-    sequences : list of lists where each element is a sequence
-    maxlen : int, maximum length
-    dtype : type to cast the resulting sequence.
-    padding : 'pre' or 'post', pad either before or after each sequence.
-    truncating : 'pre' or 'post', remove values from sequences larger than
-        maxlen either in the beginning or in the end of the sequence
-    value : float, value to pad the sequences to the desired value.
-
-    Returns
-    ----------
-    x : numpy array with dimensions (number_of_sequences, maxlen)
-
-    Examples
-    ----------
-    >>> sequences = [[1,1,1,1,1],[2,2,2],[3,3]]
-    >>> sequences = pad_sequences(sequences, maxlen=None, dtype='int32',
-    ...                  padding='post', truncating='pre', value=0.)
-    ... [[1 1 1 1 1]
-    ...  [2 2 2 0 0]
-    ...  [3 3 0 0 0]]
-    """
-    lengths = [len(s) for s in sequences]
-
-    nb_samples = len(sequences)
-    if maxlen is None:
-        maxlen = np.max(lengths)
-
-    # take the sample shape from the first non empty sequence
-    # checking for consistency in the main loop below.
-    sample_shape = tuple()
-    for s in sequences:
-        if len(s) > 0:
-            sample_shape = np.asarray(s).shape[1:]
-            break
-
-    x = (np.ones((nb_samples, maxlen) + sample_shape) * value).astype(dtype)
-    for idx, s in enumerate(sequences):
-        if len(s) == 0:
-            continue  # empty list was found
-        if truncating == 'pre':
-            trunc = s[-maxlen:]
-        elif truncating == 'post':
-            trunc = s[:maxlen]
-        else:
-            raise ValueError('Truncating type "%s" not understood' % truncating)
-
-        # check `trunc` has expected shape
-        trunc = np.asarray(trunc, dtype=dtype)
-        if trunc.shape[1:] != sample_shape:
-            raise ValueError('Shape of sample %s of sequence at position %s is different from expected shape %s' %
-                             (trunc.shape[1:], idx, sample_shape))
-
-        if padding == 'post':
-            x[idx, :len(trunc)] = trunc
-        elif padding == 'pre':
-            x[idx, -len(trunc):] = trunc
-        else:
-            raise ValueError('Padding type "%s" not understood' % padding)
-    return x
-
-def process_sequences(sequences, end_id=0, pad_val=0, is_shorten=True, remain_end_id=False):
-    """Set all tokens(ids) after END token to the padding value, and then shorten (option) it to the maximum sequence length in this batch.
-
-    Parameters
-    -----------
-    sequences : numpy array or list of list with token IDs.
-        e.g. [[4,3,5,3,2,2,2,2], [5,3,9,4,9,2,2,3]]
-    end_id : int, the special token for END.
-    pad_val : int, replace the end_id and the ids after end_id to this value.
-    is_shorten : boolean, default True.
-        Shorten the sequences.
-    remain_end_id : boolean, default False.
-        Keep an end_id in the end.
-
-    Examples
-    ---------
-    >>> sentences_ids = [[4, 3, 5, 3, 2, 2, 2, 2],  <-- end_id is 2
-    ...                  [5, 3, 9, 4, 9, 2, 2, 3]]  <-- end_id is 2
-    >>> sentences_ids = precess_sequences(sentences_ids, end_id=vocab.end_id, pad_val=0, is_shorten=True)
-    ... [[4, 3, 5, 3, 0], [5, 3, 9, 4, 9]]
-    """
-    max_length = 0
-    for i_s, seq in enumerate(sequences):
-        is_end = False
-        for i_w, n in enumerate(seq):
-            if n == end_id and is_end == False: # 1st time to see end_id
-                is_end = True
-                if max_length < i_w:
-                    max_length = i_w
-                if remain_end_id is False:
-                    seq[i_w] = pad_val      # set end_id to pad_val
-            elif is_end == True:
-                seq[i_w] = pad_val
-
-    if remain_end_id is True:
-        max_length += 1
-    if is_shorten:
-        for i, seq in enumerate(sequences):
-            sequences[i] = seq[:max_length]
-    return sequences
-
-def sequences_add_start_id(sequences, start_id=0, remove_last=False):
-    """Add special start token(id) in the beginning of each sequence.
-
-    Examples
-    ---------
-    >>> sentences_ids = [[4,3,5,3,2,2,2,2], [5,3,9,4,9,2,2,3]]
-    >>> sentences_ids = sequences_add_start_id(sentences_ids, start_id=2)
-    ... [[2, 4, 3, 5, 3, 2, 2, 2, 2], [2, 5, 3, 9, 4, 9, 2, 2, 3]]
-    >>> sentences_ids = sequences_add_start_id(sentences_ids, start_id=2, remove_last=True)
-    ... [[2, 4, 3, 5, 3, 2, 2, 2], [2, 5, 3, 9, 4, 9, 2, 2]]
-
-    - For Seq2seq
-    >>> input = [a, b, c]
-    >>> target = [x, y, z]
-    >>> decode_seq = [start_id, a, b] <-- sequences_add_start_id(input, start_id, True)
-    """
-    sequences_out = [[] for _ in range(len(sequences))]#[[]] * len(sequences)
-    for i in range(len(sequences)):
-        if remove_last:
-            sequences_out[i] = [start_id] + sequences[i][:-1]
-        else:
-            sequences_out[i] = [start_id] + sequences[i]
-    return sequences_out
-
-def sequences_get_mask(sequences, pad_val=0):
-    """Return mask for sequences.
-
-    Examples
-    ---------
-    >>> sentences_ids = [[4, 0, 5, 3, 0, 0],
-    ...                  [5, 3, 9, 4, 9, 0]]
-    >>> mask = sequences_get_mask(sentences_ids, pad_val=0)
-    ... [[1 1 1 1 0 0]
-    ...  [1 1 1 1 1 0]]
-    """
-    mask = np.ones_like(sequences)
-    for i, seq in enumerate(sequences):
-        for i_w in reversed(range(len(seq))):
-            if seq[i_w] == pad_val:
-                mask[i, i_w] = 0
-            else:
-                break   # <-- exit the for loop, prepcess next sequence
-    return mask
-
-
-## Text
-# see tensorlayer.nlp
-
-
-## Tensor Opt
-def distorted_images(images=None, height=24, width=24):
-    """Distort images for generating more training data.
-
-    Features
-    ---------
-    They are cropped to height * width pixels randomly.
-
-    They are approximately whitened to make the model insensitive to dynamic range.
-
-    Randomly flip the image from left to right.
-
-    Randomly distort the image brightness.
-
-    Randomly distort the image contrast.
-
-    Whiten (Normalize) the images.
-
-    Parameters
-    ----------
-    images : 4D Tensor
-        The tensor or placeholder of images
-    height : int
-        The height for random crop.
-    width : int
-        The width for random crop.
-
-    Returns
-    -------
-    result : tuple of Tensor
-        (Tensor for distorted images, Tensor for while loop index)
-
-    Examples
-    --------
-    >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False)
-    >>> sess = tf.InteractiveSession()
-    >>> batch_size = 128
-    >>> x = tf.placeholder(tf.float32, shape=[batch_size, 32, 32, 3])
-    >>> distorted_images_op = tl.preprocess.distorted_images(images=x, height=24, width=24)
-    >>> sess.run(tf.initialize_all_variables())
-    >>> feed_dict={x: X_train[0:batch_size,:,:,:]}
-    >>> distorted_images, idx = sess.run(distorted_images_op, feed_dict=feed_dict)
-    >>> tl.visualize.images2d(X_train[0:9,:,:,:], second=2, saveable=False, name='cifar10', dtype=np.uint8, fig_idx=20212)
-    >>> tl.visualize.images2d(distorted_images[1:10,:,:,:], second=10, saveable=False, name='distorted_images', dtype=None, fig_idx=23012)
-
-    Notes
-    ------
-    - The first image in 'distorted_images' should be removed.
-
-    References
-    -----------
-    - `tensorflow.models.image.cifar10.cifar10_input <https://github.com/tensorflow/tensorflow/blob/r0.9/tensorflow/models/image/cifar10/cifar10_input.py>`_
-    """
-    print("This function is deprecated, please use tf.map_fn instead, e.g:\n   \
-            t_image = tf.map_fn(lambda img: tf.image.random_brightness(img, max_delta=32. / 255.), t_image)\n \
-            t_image = tf.map_fn(lambda img: tf.image.random_contrast(img, lower=0.5, upper=1.5), t_image)\n \
-            t_image = tf.map_fn(lambda img: tf.image.random_saturation(img, lower=0.5, upper=1.5), t_image)\n \
-            t_image = tf.map_fn(lambda img: tf.image.random_hue(img, max_delta=0.032), t_image)")
-    exit()
-    # print(" [Warning] distorted_images will be deprecated due to speed, see TFRecord tutorial for more info...")
-    try:
-        batch_size = int(images._shape[0])
-    except:
-        raise Exception('unknow batch_size of images')
-    distorted_x = tf.Variable(tf.constant(0.1, shape=[1, height, width, 3]))
-    i = tf.Variable(tf.constant(0))
-
-    c = lambda distorted_x, i: tf.less(i, batch_size)
-
-    def body(distorted_x, i):
-        # 1. Randomly crop a [height, width] section of the image.
-        image = tf.random_crop(tf.gather(images, i), [height, width, 3])
-        # 2. Randomly flip the image horizontally.
-        image = tf.image.random_flip_left_right(image)
-        # 3. Randomly change brightness.
-        image = tf.image.random_brightness(image, max_delta=63)
-        # 4. Randomly change contrast.
-        image = tf.image.random_contrast(image, lower=0.2, upper=1.8)
-        # 5. Subtract off the mean and divide by the variance of the pixels.
-        image = tf.image.per_image_whitening(image)
-        # 6. Append the image to a batch.
-        image = tf.expand_dims(image, 0)
-        return tf.concat(0, [distorted_x, image]), tf.add(i, 1)
-
-    result = tf.while_loop(cond=c, body=body, loop_vars=(distorted_x, i), parallel_iterations=16)
-    return result
-
-
-def crop_central_whiten_images(images=None, height=24, width=24):
-    """Crop the central of image, and normailize it for test data.
-
-    They are cropped to central of height * width pixels.
-
-    Whiten (Normalize) the images.
-
-    Parameters
-    ----------
-    images : 4D Tensor
-        The tensor or placeholder of images
-    height : int
-        The height for central crop.
-    width : int
-        The width for central crop.
-
-    Returns
-    -------
-    result : tuple Tensor
-        (Tensor for distorted images, Tensor for while loop index)
-
-    Examples
-    --------
-    >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False)
-    >>> sess = tf.InteractiveSession()
-    >>> batch_size = 128
-    >>> x = tf.placeholder(tf.float32, shape=[batch_size, 32, 32, 3])
-    >>> central_images_op = tl.preprocess.crop_central_whiten_images(images=x, height=24, width=24)
-    >>> sess.run(tf.initialize_all_variables())
-    >>> feed_dict={x: X_train[0:batch_size,:,:,:]}
-    >>> central_images, idx = sess.run(central_images_op, feed_dict=feed_dict)
-    >>> tl.visualize.images2d(X_train[0:9,:,:,:], second=2, saveable=False, name='cifar10', dtype=np.uint8, fig_idx=20212)
-    >>> tl.visualize.images2d(central_images[1:10,:,:,:], second=10, saveable=False, name='central_images', dtype=None, fig_idx=23012)
-
-    Notes
-    ------
-    The first image in 'central_images' should be removed.
-
-    Code References
-    ----------------
-    - ``tensorflow.models.image.cifar10.cifar10_input``
-    """
-    print("This function is deprecated, please use tf.map_fn instead, e.g:\n   \
-            t_image = tf.map_fn(lambda img: tf.image.random_brightness(img, max_delta=32. / 255.), t_image)\n \
-            t_image = tf.map_fn(lambda img: tf.image.random_contrast(img, lower=0.5, upper=1.5), t_image)\n \
-            t_image = tf.map_fn(lambda img: tf.image.random_saturation(img, lower=0.5, upper=1.5), t_image)\n \
-            t_image = tf.map_fn(lambda img: tf.image.random_hue(img, max_delta=0.032), t_image)")
-    exit()
-    # print(" [Warning] crop_central_whiten_images will be deprecated due to speed, see TFRecord tutorial for more info...")
-    try:
-        batch_size = int(images._shape[0])
-    except:
-        raise Exception('unknow batch_size of images')
-    central_x = tf.Variable(tf.constant(0.1, shape=[1, height, width, 3]))
-    i = tf.Variable(tf.constant(0))
-
-    c = lambda central_x, i: tf.less(i, batch_size)
-
-    def body(central_x, i):
-        # 1. Crop the central [height, width] of the image.
-        image = tf.image.resize_image_with_crop_or_pad(tf.gather(images, i), height, width)
-        # 2. Subtract off the mean and divide by the variance of the pixels.
-        image = tf.image.per_image_whitening(image)
-        # 5. Append the image to a batch.
-        image = tf.expand_dims(image, 0)
-        return tf.concat(0, [central_x, image]), tf.add(i, 1)
-
-    result = tf.while_loop(cond=c, body=body, loop_vars=(central_x, i), parallel_iterations=16)
-    return result
-
-
-
-
-
-
-
-
-
-
-
-
-#
diff --git a/_tensorlayer/rein.py b/_tensorlayer/rein.py
deleted file mode 100755
index 9ad3de7..0000000
--- a/_tensorlayer/rein.py
+++ /dev/null
@@ -1,85 +0,0 @@
-#! /usr/bin/python
-# -*- coding: utf8 -*-
-
-
-
-import tensorflow as tf
-import numpy as np
-from six.moves import xrange
-
-def discount_episode_rewards(rewards=[], gamma=0.99, mode=0):
-    """ Take 1D float array of rewards and compute discounted rewards for an
-    episode. When encount a non-zero value, consider as the end a of an episode.
-
-    Parameters
-    ----------
-    rewards : numpy list
-        a list of rewards
-    gamma : float
-        discounted factor
-    mode : int
-        if mode == 0, reset the discount process when encount a non-zero reward (Ping-pong game).
-        if mode == 1, would not reset the discount process.
-
-    Examples
-    ----------
-    >>> rewards = np.asarray([0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1])
-    >>> gamma = 0.9
-    >>> discount_rewards = tl.rein.discount_episode_rewards(rewards, gamma)
-    >>> print(discount_rewards)
-    ... [ 0.72899997  0.81        0.89999998  1.          0.72899997  0.81
-    ... 0.89999998  1.          0.72899997  0.81        0.89999998  1.        ]
-    >>> discount_rewards = tl.rein.discount_episode_rewards(rewards, gamma, mode=1)
-    >>> print(discount_rewards)
-    ... [ 1.52110755  1.69011939  1.87791049  2.08656716  1.20729685  1.34144104
-    ... 1.49048996  1.65610003  0.72899997  0.81        0.89999998  1.        ]
-    """
-    discounted_r = np.zeros_like(rewards, dtype=np.float32)
-    running_add = 0
-    for t in reversed(xrange(0, rewards.size)):
-        if mode == 0:
-            if rewards[t] != 0: running_add = 0
-
-        running_add = running_add * gamma + rewards[t]
-        discounted_r[t] = running_add
-    return discounted_r
-
-
-def cross_entropy_reward_loss(logits, actions, rewards, name=None):
-    """ Calculate the loss for Policy Gradient Network.
-
-    Parameters
-    ----------
-    logits : tensor
-        The network outputs without softmax. This function implements softmax
-        inside.
-    actions : tensor/ placeholder
-        The agent actions.
-    rewards : tensor/ placeholder
-        The rewards.
-
-    Examples
-    ----------
-    >>> states_batch_pl = tf.placeholder(tf.float32, shape=[None, D])   # observation for training
-    >>> network = tl.layers.InputLayer(states_batch_pl, name='input_layer')
-    >>> network = tl.layers.DenseLayer(network, n_units=H, act = tf.nn.relu, name='relu1')
-    >>> network = tl.layers.DenseLayer(network, n_units=3, act = tl.activation.identity, name='output_layer')
-    >>> probs = network.outputs
-    >>> sampling_prob = tf.nn.softmax(probs)
-    >>> actions_batch_pl = tf.placeholder(tf.int32, shape=[None])
-    >>> discount_rewards_batch_pl = tf.placeholder(tf.float32, shape=[None])
-    >>> loss = cross_entropy_reward_loss(probs, actions_batch_pl, discount_rewards_batch_pl)
-    >>> train_op = tf.train.RMSPropOptimizer(learning_rate, decay_rate).minimize(loss)
-    """
-
-    try: # TF 1.0
-        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=actions, logits=logits, name=name)
-    except:
-        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, targets=actions)
-        # cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, actions)
-
-    try: ## TF1.0
-        loss = tf.reduce_sum(tf.multiply(cross_entropy, rewards))
-    except: ## TF0.12
-        loss = tf.reduce_sum(tf.mul(cross_entropy, rewards))   # element-wise mul
-    return loss
diff --git a/_tensorlayer/utils.py b/_tensorlayer/utils.py
deleted file mode 100755
index a177183..0000000
--- a/_tensorlayer/utils.py
+++ /dev/null
@@ -1,516 +0,0 @@
-#! /usr/bin/python
-# -*- coding: utf8 -*-
-import tensorflow as tf
-import tensorlayer as tl
-from . import iterate
-import numpy as np
-import time
-import math
-import random
-
-
-def fit(sess, network, train_op, cost, X_train, y_train, x, y_, acc=None, batch_size=100,
-        n_epoch=100, print_freq=5, X_val=None, y_val=None, eval_train=True,
-        tensorboard=False, tensorboard_epoch_freq=5, tensorboard_weight_histograms=True, tensorboard_graph_vis=True):
-    """Traing a given non time-series network by the given cost function, training data, batch_size, n_epoch etc.
-
-    Parameters
-    ----------
-    sess : TensorFlow session
-        sess = tf.InteractiveSession()
-    network : a TensorLayer layer
-        the network will be trained
-    train_op : a TensorFlow optimizer
-        like tf.train.AdamOptimizer
-    X_train : numpy array
-        the input of training data
-    y_train : numpy array
-        the target of training data
-    x : placeholder
-        for inputs
-    y_ : placeholder
-        for targets
-    acc : the TensorFlow expression of accuracy (or other metric) or None
-        if None, would not display the metric
-    batch_size : int
-        batch size for training and evaluating
-    n_epoch : int
-        the number of training epochs
-    print_freq : int
-        display the training information every ``print_freq`` epochs
-    X_val : numpy array or None
-        the input of validation data
-    y_val : numpy array or None
-        the target of validation data
-    eval_train : boolean
-        if X_val and y_val are not None, it refects whether to evaluate the training data
-    tensorboard : boolean
-        if True summary data will be stored to the log/ direcory for visualization with tensorboard.
-        See also detailed tensorboard_X settings for specific configurations of features. (default False)
-        Also runs tl.layers.initialize_global_variables(sess) internally in fit() to setup the summary nodes, see Note:
-    tensorboard_epoch_freq : int
-        how many epochs between storing tensorboard checkpoint for visualization to log/ directory (default 5)
-    tensorboard_weight_histograms : boolean
-        if True updates tensorboard data in the logs/ directory for visulaization
-        of the weight histograms every tensorboard_epoch_freq epoch (default True)
-    tensorboard_graph_vis : boolean
-        if True stores the graph in the tensorboard summaries saved to log/ (default True)
-
-    Examples
-    --------
-    >>> see tutorial_mnist_simple.py
-    >>> tl.utils.fit(sess, network, train_op, cost, X_train, y_train, x, y_,
-    ...            acc=acc, batch_size=500, n_epoch=200, print_freq=5,
-    ...            X_val=X_val, y_val=y_val, eval_train=False)
-    >>> tl.utils.fit(sess, network, train_op, cost, X_train, y_train, x, y_,
-    ...            acc=acc, batch_size=500, n_epoch=200, print_freq=5,
-    ...            X_val=X_val, y_val=y_val, eval_train=False,
-    ...            tensorboard=True, tensorboard_weight_histograms=True, tensorboard_graph_vis=True)
-
-    Note
-    --------
-        If tensorboard=True, the global_variables_initializer will be run inside the fit function
-        in order to initalize the automatically generated summary nodes used for tensorboard visualization,
-        thus tf.global_variables_initializer().run() before the fit() call will be undefined.
-    """
-    assert X_train.shape[0] >= batch_size, "Number of training examples should be bigger than the batch size"
-
-    if(tensorboard):
-        print("Setting up tensorboard ...")
-        #Set up tensorboard summaries and saver
-        tl.files.exists_or_mkdir('logs/')
-
-        #Only write summaries for more recent TensorFlow versions
-        if hasattr(tf, 'summary') and hasattr(tf.summary, 'FileWriter'):
-            if tensorboard_graph_vis:
-                train_writer = tf.summary.FileWriter('logs/train',sess.graph)
-                val_writer = tf.summary.FileWriter('logs/validation',sess.graph)
-            else:
-                train_writer = tf.summary.FileWriter('logs/train')
-                val_writer = tf.summary.FileWriter('logs/validation')
-
-        #Set up summary nodes
-        if(tensorboard_weight_histograms):
-            for param in network.all_params:
-                if hasattr(tf, 'summary') and hasattr(tf.summary, 'histogram'):
-                    print('Param name ', param.name)
-                    tf.summary.histogram(param.name, param)
-
-        if hasattr(tf, 'summary') and hasattr(tf.summary, 'histogram'):
-            tf.summary.scalar('cost', cost)
-
-        merged = tf.summary.merge_all()
-
-        #Initalize all variables and summaries
-        tl.layers.initialize_global_variables(sess)
-        print("Finished! use $tensorboard --logdir=logs/ to start server")
-
-    print("Start training the network ...")
-    start_time_begin = time.time()
-    tensorboard_train_index, tensorboard_val_index = 0, 0
-    for epoch in range(n_epoch):
-        start_time = time.time()
-        loss_ep = 0; n_step = 0
-        for X_train_a, y_train_a in iterate.minibatches(X_train, y_train,
-                                                    batch_size, shuffle=True):
-            feed_dict = {x: X_train_a, y_: y_train_a}
-            feed_dict.update( network.all_drop )    # enable noise layers
-            loss, _ = sess.run([cost, train_op], feed_dict=feed_dict)
-            loss_ep += loss
-            n_step += 1
-        loss_ep = loss_ep/ n_step
-
-        if tensorboard and hasattr(tf, 'summary'):
-            if epoch+1 == 1 or (epoch+1) % tensorboard_epoch_freq == 0:
-                for X_train_a, y_train_a in iterate.minibatches(
-                                        X_train, y_train, batch_size, shuffle=True):
-                    dp_dict = dict_to_one( network.all_drop )    # disable noise layers
-                    feed_dict = {x: X_train_a, y_: y_train_a}
-                    feed_dict.update(dp_dict)
-                    result = sess.run(merged, feed_dict=feed_dict)
-                    train_writer.add_summary(result, tensorboard_train_index)
-                    tensorboard_train_index += 1
-
-                for X_val_a, y_val_a in iterate.minibatches(
-                                        X_val, y_val, batch_size, shuffle=True):
-                    dp_dict = dict_to_one( network.all_drop )    # disable noise layers
-                    feed_dict = {x: X_val_a, y_: y_val_a}
-                    feed_dict.update(dp_dict)
-                    result = sess.run(merged, feed_dict=feed_dict)
-                    val_writer.add_summary(result, tensorboard_val_index)
-                    tensorboard_val_index += 1
-
-        if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
-            if (X_val is not None) and (y_val is not None):
-                print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time))
-                if eval_train is True:
-                    train_loss, train_acc, n_batch = 0, 0, 0
-                    for X_train_a, y_train_a in iterate.minibatches(
-                                            X_train, y_train, batch_size, shuffle=True):
-                        dp_dict = dict_to_one( network.all_drop )    # disable noise layers
-                        feed_dict = {x: X_train_a, y_: y_train_a}
-                        feed_dict.update(dp_dict)
-                        if acc is not None:
-                            err, ac = sess.run([cost, acc], feed_dict=feed_dict)
-                            train_acc += ac
-                        else:
-                            err = sess.run(cost, feed_dict=feed_dict)
-                        train_loss += err;  n_batch += 1
-                    print("   train loss: %f" % (train_loss/ n_batch))
-                    if acc is not None:
-                        print("   train acc: %f" % (train_acc/ n_batch))
-                val_loss, val_acc, n_batch = 0, 0, 0
-                for X_val_a, y_val_a in iterate.minibatches(
-                                            X_val, y_val, batch_size, shuffle=True):
-                    dp_dict = dict_to_one( network.all_drop )    # disable noise layers
-                    feed_dict = {x: X_val_a, y_: y_val_a}
-                    feed_dict.update(dp_dict)
-                    if acc is not None:
-                        err, ac = sess.run([cost, acc], feed_dict=feed_dict)
-                        val_acc += ac
-                    else:
-                        err = sess.run(cost, feed_dict=feed_dict)
-                    val_loss += err; n_batch += 1
-                print("   val loss: %f" % (val_loss/ n_batch))
-                if acc is not None:
-                    print("   val acc: %f" % (val_acc/ n_batch))
-            else:
-                print("Epoch %d of %d took %fs, loss %f" % (epoch + 1, n_epoch, time.time() - start_time, loss_ep))
-    print("Total training time: %fs" % (time.time() - start_time_begin))
-
-
-def test(sess, network, acc, X_test, y_test, x, y_, batch_size, cost=None):
-    """
-    Test a given non time-series network by the given test data and metric.
-
-    Parameters
-    ----------
-    sess : TensorFlow session
-        sess = tf.InteractiveSession()
-    network : a TensorLayer layer
-        the network will be trained
-    acc : the TensorFlow expression of accuracy (or other metric) or None
-        if None, would not display the metric
-    X_test : numpy array
-        the input of test data
-    y_test : numpy array
-        the target of test data
-    x : placeholder
-        for inputs
-    y_ : placeholder
-        for targets
-    batch_size : int or None
-        batch size for testing, when dataset is large, we should use minibatche for testing.
-        when dataset is small, we can set it to None.
-    cost : the TensorFlow expression of cost or None
-        if None, would not display the cost
-
-    Examples
-    --------
-    >>> see tutorial_mnist_simple.py
-    >>> tl.utils.test(sess, network, acc, X_test, y_test, x, y_, batch_size=None, cost=cost)
-    """
-    print('Start testing the network ...')
-    if batch_size is None:
-        dp_dict = dict_to_one( network.all_drop )
-        feed_dict = {x: X_test, y_: y_test}
-        feed_dict.update(dp_dict)
-        if cost is not None:
-            print("   test loss: %f" % sess.run(cost, feed_dict=feed_dict))
-        print("   test acc: %f" % sess.run(acc, feed_dict=feed_dict))
-            # print("   test acc: %f" % np.mean(y_test == sess.run(y_op,
-            #                                           feed_dict=feed_dict)))
-    else:
-        test_loss, test_acc, n_batch = 0, 0, 0
-        for X_test_a, y_test_a in iterate.minibatches(
-                                    X_test, y_test, batch_size, shuffle=True):
-            dp_dict = dict_to_one( network.all_drop )    # disable noise layers
-            feed_dict = {x: X_test_a, y_: y_test_a}
-            feed_dict.update(dp_dict)
-            if cost is not None:
-                err, ac = sess.run([cost, acc], feed_dict=feed_dict)
-                test_loss += err
-            else:
-                ac = sess.run(acc, feed_dict=feed_dict)
-            test_acc += ac; n_batch += 1
-        if cost is not None:
-            print("   test loss: %f" % (test_loss/ n_batch))
-        print("   test acc: %f" % (test_acc/ n_batch))
-
-
-def predict(sess, network, X, x, y_op):
-    """
-    Return the predict results of given non time-series network.
-
-    Parameters
-    ----------
-    sess : TensorFlow session
-        sess = tf.InteractiveSession()
-    network : a TensorLayer layer
-        the network will be trained
-    X : numpy array
-        the input
-    x : placeholder
-        for inputs
-    y_op : placeholder
-        the argmax expression of softmax outputs
-
-    Examples
-    --------
-    >>> see tutorial_mnist_simple.py
-    >>> y = network.outputs
-    >>> y_op = tf.argmax(tf.nn.softmax(y), 1)
-    >>> print(tl.utils.predict(sess, network, X_test, x, y_op))
-    """
-    dp_dict = dict_to_one( network.all_drop )    # disable noise layers
-    feed_dict = {x: X,}
-    feed_dict.update(dp_dict)
-    return sess.run(y_op, feed_dict=feed_dict)
-
-## Evaluation
-def evaluation(y_test=None, y_predict=None, n_classes=None):
-    """
-    Input the predicted results, targets results and
-    the number of class, return the confusion matrix, F1-score of each class,
-    accuracy and macro F1-score.
-
-    Parameters
-    ----------
-    y_test : numpy.array or list
-        target results
-    y_predict : numpy.array or list
-        predicted results
-    n_classes : int
-        number of classes
-
-    Examples
-    --------
-    >>> c_mat, f1, acc, f1_macro = evaluation(y_test, y_predict, n_classes)
-    """
-    from sklearn.metrics import confusion_matrix, f1_score, accuracy_score
-    c_mat = confusion_matrix(y_test, y_predict, labels = [x for x in range(n_classes)])
-    f1    = f1_score(y_test, y_predict, average = None, labels = [x for x in range(n_classes)])
-    f1_macro = f1_score(y_test, y_predict, average='macro')
-    acc   = accuracy_score(y_test, y_predict)
-    print('confusion matrix: \n',c_mat)
-    print('f1-score:',f1)
-    print('f1-score(macro):',f1_macro)   # same output with > f1_score(y_true, y_pred, average='macro')
-    print('accuracy-score:', acc)
-    return c_mat, f1, acc, f1_macro
-
-def dict_to_one(dp_dict={}):
-    """
-    Input a dictionary, return a dictionary that all items are set to one,
-    use for disable dropout, dropconnect layer and so on.
-
-    Parameters
-    ----------
-    dp_dict : dictionary
-        keeping probabilities
-
-    Examples
-    --------
-    >>> dp_dict = dict_to_one( network.all_drop )
-    >>> dp_dict = dict_to_one( network.all_drop )
-    >>> feed_dict.update(dp_dict)
-    """
-    return {x: 1 for x in dp_dict}
-
-def flatten_list(list_of_list=[[],[]]):
-    """
-    Input a list of list, return a list that all items are in a list.
-
-    Parameters
-    ----------
-    list_of_list : a list of list
-
-    Examples
-    --------
-    >>> tl.utils.flatten_list([[1, 2, 3],[4, 5],[6]])
-    ... [1, 2, 3, 4, 5, 6]
-    """
-    return sum(list_of_list, [])
-
-
-def class_balancing_oversample(X_train=None, y_train=None, printable=True):
-    """Input the features and labels, return the features and labels after oversampling.
-
-    Parameters
-    ----------
-    X_train : numpy.array
-        Features, each row is an example
-    y_train : numpy.array
-        Labels
-
-    Examples
-    --------
-    >>> X_train, y_train = class_balancing_oversample(X_train, y_train, printable=True)
-    """
-    # ======== Classes balancing
-    if printable:
-        print("Classes balancing for training examples...")
-    from collections import Counter
-    c = Counter(y_train)
-    if printable:
-        print('the occurrence number of each stage: %s' % c.most_common())
-        print('the least stage is Label %s have %s instances' % c.most_common()[-1])
-        print('the most stage is  Label %s have %s instances' % c.most_common(1)[0])
-    most_num = c.most_common(1)[0][1]
-    if printable:
-        print('most num is %d, all classes tend to be this num' % most_num)
-
-    locations = {}
-    number = {}
-
-    for lab, num in c.most_common():    # find the index from y_train
-        number[lab] = num
-        locations[lab] = np.where(np.array(y_train)==lab)[0]
-    if printable:
-        print('convert list(np.array) to dict format')
-    X = {}  # convert list to dict
-    for lab, num in number.items():
-        X[lab] = X_train[locations[lab]]
-
-    # oversampling
-    if printable:
-        print('start oversampling')
-    for key in X:
-        temp = X[key]
-        while True:
-            if len(X[key]) >= most_num:
-                break
-            X[key] = np.vstack((X[key], temp))
-    if printable:
-        print('first features of label 0 >', len(X[0][0]))
-        print('the occurrence num of each stage after oversampling')
-    for key in X:
-        print(key, len(X[key]))
-    if printable:
-        print('make each stage have same num of instances')
-    for key in X:
-        X[key] = X[key][0:most_num,:]
-        print(key, len(X[key]))
-
-    # convert dict to list
-    if printable:
-        print('convert from dict to list format')
-    y_train = []
-    X_train = np.empty(shape=(0,len(X[0][0])))
-    for key in X:
-        X_train = np.vstack( (X_train, X[key] ) )
-        y_train.extend([key for i in range(len(X[key]))])
-    # print(len(X_train), len(y_train))
-    c = Counter(y_train)
-    if printable:
-        print('the occurrence number of each stage after oversampling: %s' % c.most_common())
-    # ================ End of Classes balancing
-    return X_train, y_train
-
-## Random
-def get_random_int(min=0, max=10, number=5, seed=None):
-    """Return a list of random integer by the given range and quantity.
-
-    Examples
-    ---------
-    >>> r = get_random_int(min=0, max=10, number=5)
-    ... [10, 2, 3, 3, 7]
-    """
-    rnd = random.Random()
-    if seed:
-        rnd = random.Random(seed)
-    # return [random.randint(min,max) for p in range(0, number)]
-    return [rnd.randint(min,max) for p in range(0, number)]
-
-#
-# def class_balancing_sequence_4D(X_train, y_train, sequence_length, model='downsampling' ,printable=True):
-#     ''' 输入、输出都是sequence format
-#         oversampling or downsampling
-#     '''
-#     n_features = X_train.shape[2]
-#     # ======== Classes balancing for sequence
-#     if printable:
-#         print("Classes balancing for 4D sequence training examples...")
-#     from collections import Counter
-#     c = Counter(y_train)    # Counter({2: 454, 4: 267, 3: 124, 1: 57, 0: 48})
-#     if printable:
-#         print('the occurrence number of each stage: %s' % c.most_common())
-#         print('the least Label %s have %s instances' % c.most_common()[-1])
-#         print('the most  Label %s have %s instances' % c.most_common(1)[0])
-#     # print(c.most_common()) # [(2, 454), (4, 267), (3, 124), (1, 57), (0, 48)]
-#     most_num = c.most_common(1)[0][1]
-#     less_num = c.most_common()[-1][1]
-#
-#     locations = {}
-#     number = {}
-#     for lab, num in c.most_common():
-#         number[lab] = num
-#         locations[lab] = np.where(np.array(y_train)==lab)[0]
-#     # print(locations)
-#     # print(number)
-#     if printable:
-#         print('  convert list to dict')
-#     X = {}  # convert list to dict
-#     ### a sequence
-#     for lab, _ in number.items():
-#         X[lab] = np.empty(shape=(0,1,n_features,1)) # 4D
-#     for lab, _ in number.items():
-#         #X[lab] = X_train[locations[lab]
-#         for l in locations[lab]:
-#             X[lab] = np.vstack((X[lab], X_train[l*sequence_length : (l+1)*(sequence_length)]))
-#         # X[lab] = X_train[locations[lab]*sequence_length : locations[lab]*(sequence_length+1)]    # a sequence
-#     # print(X)
-#
-#     if model=='oversampling':
-#         if printable:
-#             print('  oversampling -- most num is %d, all classes tend to be this num\nshuffle applied' % most_num)
-#         for key in X:
-#             temp = X[key]
-#             while True:
-#                 if len(X[key]) >= most_num * sequence_length:   # sequence
-#                     break
-#                 X[key] = np.vstack((X[key], temp))
-#             # print(key, len(X[key]))
-#         if printable:
-#             print('  make each stage have same num of instances')
-#         for key in X:
-#             X[key] = X[key][0:most_num*sequence_length,:]   # sequence
-#             if printable:
-#                 print(key, len(X[key]))
-#     elif model=='downsampling':
-#         import random
-#         if printable:
-#             print('  downsampling -- less num is %d, all classes tend to be this num by randomly choice without replacement\nshuffle applied' % less_num)
-#         for key in X:
-#             # print(key, len(X[key]))#, len(X[key])/sequence_length)
-#             s_idx = [ i for i in range(int(len(X[key])/sequence_length))]
-#             s_idx = np.asarray(s_idx)*sequence_length   # start index of sequnce in X[key]
-#             # print('s_idx',s_idx)
-#             r_idx = np.random.choice(s_idx, less_num, replace=False)    # random choice less_num of s_idx
-#             # print('r_idx',r_idx)
-#             temp = X[key]
-#             X[key] = np.empty(shape=(0,1,n_features,1)) # 4D
-#             for idx in r_idx:
-#                 X[key] = np.vstack((X[key], temp[idx:idx+sequence_length]))
-#             # print(key, X[key])
-#             # np.random.choice(l, len(l), replace=False)
-#     else:
-#         raise Exception('  model should be oversampling or downsampling')
-#
-#     # convert dict to list
-#     if printable:
-#         print('  convert dict to list')
-#     y_train = []
-#     # X_train = np.empty(shape=(0,len(X[0][0])))
-#     # X_train = np.empty(shape=(0,len(X[1][0])))    # 2D
-#     X_train = np.empty(shape=(0,1,n_features,1))    # 4D
-#     l_key = list(X.keys())  # shuffle
-#     random.shuffle(l_key)   # shuffle
-#     # for key in X:     # no shuffle
-#     for key in l_key:   # shuffle
-#         X_train = np.vstack( (X_train, X[key] ) )
-#         # print(len(X[key]))
-#         y_train.extend([key for i in range(int(len(X[key])/sequence_length))])
-#     # print(X_train,y_train, type(X_train), type(y_train))
-#     # ================ End of Classes balancing for sequence
-#     # print(X_train.shape, len(y_train))
-#     return X_train, np.asarray(y_train)
diff --git a/_tensorlayer/visualize.py b/_tensorlayer/visualize.py
deleted file mode 100755
index 8505bb9..0000000
--- a/_tensorlayer/visualize.py
+++ /dev/null
@@ -1,340 +0,0 @@
-#! /usr/bin/python
-# -*- coding: utf8 -*-
-
-
-import matplotlib
-matplotlib.use('Agg')
-import matplotlib.pyplot as plt
-# import matplotlib.pyplot as plt
-import numpy as np
-import os
-
-
-## Save images
-import scipy.misc
-def save_images(images, size, image_path):
-    """Save mutiple images into one single image.
-
-    Parameters
-    -----------
-    images : numpy array [batch, w, h, c]
-    size : list of two int, row and column number.
-        number of images should be equal or less than size[0] * size[1]
-    image_path : string.
-
-    Examples
-    ---------
-    >>> images = np.random.rand(64, 100, 100, 3)
-    >>> tl.visualize.save_images(images, [8, 8], 'temp.png')
-    """
-    def merge(images, size):
-        h, w = images.shape[1], images.shape[2]
-        img = np.zeros((h * size[0], w * size[1], 3))
-        for idx, image in enumerate(images):
-            i = idx % size[1]
-            j = idx // size[1]
-            img[j*h:j*h+h, i*w:i*w+w, :] = image
-        return img
-
-    def imsave(images, size, path):
-        return scipy.misc.imsave(path, merge(images, size))
-
-    assert len(images) <= size[0] * size[1], "number of images should be equal or less than size[0] * size[1] {}".format(len(images))
-    return imsave(images, size, image_path)
-
-def W(W=None, second=10, saveable=True, shape=[28,28], name='mnist', fig_idx=2396512):
-    """Visualize every columns of the weight matrix to a group of Greyscale img.
-
-    Parameters
-    ----------
-    W : numpy.array
-        The weight matrix
-    second : int
-        The display second(s) for the image(s), if saveable is False.
-    saveable : boolean
-        Save or plot the figure.
-    shape : a list with 2 int
-        The shape of feature image, MNIST is [28, 80].
-    name : a string
-        A name to save the image, if saveable is True.
-    fig_idx : int
-        matplotlib figure index.
-
-    Examples
-    --------
-    >>> tl.visualize.W(network.all_params[0].eval(), second=10, saveable=True, name='weight_of_1st_layer', fig_idx=2012)
-    """
-    if saveable is False:
-        plt.ion()
-    fig = plt.figure(fig_idx)      # show all feature images
-    size = W.shape[0]
-    n_units = W.shape[1]
-
-    num_r = int(np.sqrt(n_units))  # 每行显示的个数   若25个hidden unit -> 每行显示5个
-    num_c = int(np.ceil(n_units/num_r))
-    count = int(1)
-    for row in range(1, num_r+1):
-        for col in range(1, num_c+1):
-            if count > n_units:
-                break
-            a = fig.add_subplot(num_r, num_c, count)
-            # ------------------------------------------------------------
-            # plt.imshow(np.reshape(W[:,count-1],(28,28)), cmap='gray')
-            # ------------------------------------------------------------
-            feature = W[:,count-1] / np.sqrt( (W[:,count-1]**2).sum())
-            # feature[feature<0.0001] = 0   # value threshold
-            # if count == 1 or count == 2:
-            #     print(np.mean(feature))
-            # if np.std(feature) < 0.03:      # condition threshold
-            #     feature = np.zeros_like(feature)
-            # if np.mean(feature) < -0.015:      # condition threshold
-            #     feature = np.zeros_like(feature)
-            plt.imshow(np.reshape(feature ,(shape[0],shape[1])),
-                    cmap='gray', interpolation="nearest")#, vmin=np.min(feature), vmax=np.max(feature))
-            # plt.title(name)
-            # ------------------------------------------------------------
-            # plt.imshow(np.reshape(W[:,count-1] ,(np.sqrt(size),np.sqrt(size))), cmap='gray', interpolation="nearest")
-            plt.gca().xaxis.set_major_locator(plt.NullLocator())    # distable tick
-            plt.gca().yaxis.set_major_locator(plt.NullLocator())
-            count = count + 1
-    if saveable:
-        plt.savefig(name+'.pdf',format='pdf')
-    else:
-        plt.draw()
-        plt.pause(second)
-
-def frame(I=None, second=5, saveable=True, name='frame', cmap=None, fig_idx=12836):
-    """Display a frame(image). Make sure OpenAI Gym render() is disable before using it.
-
-    Parameters
-    ----------
-    I : numpy.array
-        The image
-    second : int
-        The display second(s) for the image(s), if saveable is False.
-    saveable : boolean
-        Save or plot the figure.
-    name : a string
-        A name to save the image, if saveable is True.
-    cmap : None or string
-        'gray' for greyscale, None for default, etc.
-    fig_idx : int
-        matplotlib figure index.
-
-    Examples
-    --------
-    >>> env = gym.make("Pong-v0")
-    >>> observation = env.reset()
-    >>> tl.visualize.frame(observation)
-    """
-    if saveable is False:
-        plt.ion()
-    fig = plt.figure(fig_idx)      # show all feature images
-
-    if len(I.shape) and I.shape[-1]==1:     # (10,10,1) --> (10,10)
-        I = I[:,:,0]
-
-    plt.imshow(I, cmap)
-    plt.title(name)
-    # plt.gca().xaxis.set_major_locator(plt.NullLocator())    # distable tick
-    # plt.gca().yaxis.set_major_locator(plt.NullLocator())
-
-    if saveable:
-        plt.savefig(name+'.pdf',format='pdf')
-    else:
-        plt.draw()
-        plt.pause(second)
-
-def CNN2d(CNN=None, second=10, saveable=True, name='cnn', fig_idx=3119362):
-    """Display a group of RGB or Greyscale CNN masks.
-
-    Parameters
-    ----------
-    CNN : numpy.array
-        The image. e.g: 64 5x5 RGB images can be (5, 5, 3, 64).
-    second : int
-        The display second(s) for the image(s), if saveable is False.
-    saveable : boolean
-        Save or plot the figure.
-    name : a string
-        A name to save the image, if saveable is True.
-    fig_idx : int
-        matplotlib figure index.
-
-    Examples
-    --------
-    >>> tl.visualize.CNN2d(network.all_params[0].eval(), second=10, saveable=True, name='cnn1_mnist', fig_idx=2012)
-    """
-    # print(CNN.shape)    # (5, 5, 3, 64)
-    # exit()
-    n_mask = CNN.shape[3]
-    n_row = CNN.shape[0]
-    n_col = CNN.shape[1]
-    n_color = CNN.shape[2]
-    row = int(np.sqrt(n_mask))
-    col = int(np.ceil(n_mask/row))
-    plt.ion()   # active mode
-    fig = plt.figure(fig_idx)
-    count = 1
-    for ir in range(1, row+1):
-        for ic in range(1, col+1):
-            if count > n_mask:
-                break
-            a = fig.add_subplot(col, row, count)
-            # print(CNN[:,:,:,count-1].shape, n_row, n_col)   # (5, 1, 32) 5 5
-            # exit()
-            # plt.imshow(
-            #         np.reshape(CNN[count-1,:,:,:], (n_row, n_col)),
-            #         cmap='gray', interpolation="nearest")     # theano
-            if n_color == 1:
-                plt.imshow(
-                        np.reshape(CNN[:,:,:,count-1], (n_row, n_col)),
-                        cmap='gray', interpolation="nearest")
-            elif n_color == 3:
-                plt.imshow(
-                        np.reshape(CNN[:,:,:,count-1], (n_row, n_col, n_color)),
-                        cmap='gray', interpolation="nearest")
-            else:
-                raise Exception("Unknown n_color")
-            plt.gca().xaxis.set_major_locator(plt.NullLocator())    # distable tick
-            plt.gca().yaxis.set_major_locator(plt.NullLocator())
-            count = count + 1
-    if saveable:
-        plt.savefig(name+'.pdf',format='pdf')
-    else:
-        plt.draw()
-        plt.pause(second)
-
-
-def images2d(images=None, second=10, saveable=True, name='images', dtype=None,
-                                                            fig_idx=3119362):
-    """Display a group of RGB or Greyscale images.
-
-    Parameters
-    ----------
-    images : numpy.array
-        The images.
-    second : int
-        The display second(s) for the image(s), if saveable is False.
-    saveable : boolean
-        Save or plot the figure.
-    name : a string
-        A name to save the image, if saveable is True.
-    dtype : None or numpy data type
-        The data type for displaying the images.
-    fig_idx : int
-        matplotlib figure index.
-
-    Examples
-    --------
-    >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False)
-    >>> tl.visualize.images2d(X_train[0:100,:,:,:], second=10, saveable=False, name='cifar10', dtype=np.uint8, fig_idx=20212)
-    """
-    # print(images.shape)    # (50000, 32, 32, 3)
-    # exit()
-    if dtype:
-        images = np.asarray(images, dtype=dtype)
-    n_mask = images.shape[0]
-    n_row = images.shape[1]
-    n_col = images.shape[2]
-    n_color = images.shape[3]
-    row = int(np.sqrt(n_mask))
-    col = int(np.ceil(n_mask/row))
-    plt.ion()   # active mode
-    fig = plt.figure(fig_idx)
-    count = 1
-    for ir in range(1, row+1):
-        for ic in range(1, col+1):
-            if count > n_mask:
-                break
-            a = fig.add_subplot(col, row, count)
-            # print(images[:,:,:,count-1].shape, n_row, n_col)   # (5, 1, 32) 5 5
-            # plt.imshow(
-            #         np.reshape(images[count-1,:,:,:], (n_row, n_col)),
-            #         cmap='gray', interpolation="nearest")     # theano
-            if n_color == 1:
-                plt.imshow(
-                        np.reshape(images[count-1,:,:], (n_row, n_col)),
-                        cmap='gray', interpolation="nearest")
-                # plt.title(name)
-            elif n_color == 3:
-                plt.imshow(images[count-1,:,:],
-                        cmap='gray', interpolation="nearest")
-                # plt.title(name)
-            else:
-                raise Exception("Unknown n_color")
-            plt.gca().xaxis.set_major_locator(plt.NullLocator())    # distable tick
-            plt.gca().yaxis.set_major_locator(plt.NullLocator())
-            count = count + 1
-    if saveable:
-        plt.savefig(name+'.pdf',format='pdf')
-    else:
-        plt.draw()
-        plt.pause(second)
-
-def tsne_embedding(embeddings, reverse_dictionary, plot_only=500,
-                        second=5, saveable=False, name='tsne', fig_idx=9862):
-    """Visualize the embeddings by using t-SNE.
-
-    Parameters
-    ----------
-    embeddings : a matrix
-        The images.
-    reverse_dictionary : a dictionary
-        id_to_word, mapping id to unique word.
-    plot_only : int
-        The number of examples to plot, choice the most common words.
-    second : int
-        The display second(s) for the image(s), if saveable is False.
-    saveable : boolean
-        Save or plot the figure.
-    name : a string
-        A name to save the image, if saveable is True.
-    fig_idx : int
-        matplotlib figure index.
-
-    Examples
-    --------
-    >>> see 'tutorial_word2vec_basic.py'
-    >>> final_embeddings = normalized_embeddings.eval()
-    >>> tl.visualize.tsne_embedding(final_embeddings, labels, reverse_dictionary,
-    ...                   plot_only=500, second=5, saveable=False, name='tsne')
-    """
-    def plot_with_labels(low_dim_embs, labels, figsize=(18, 18), second=5,
-                                    saveable=True, name='tsne', fig_idx=9862):
-        assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings"
-        if saveable is False:
-            plt.ion()
-            plt.figure(fig_idx)
-        plt.figure(figsize=figsize)  #in inches
-        for i, label in enumerate(labels):
-            x, y = low_dim_embs[i,:]
-            plt.scatter(x, y)
-            plt.annotate(label,
-                     xy=(x, y),
-                     xytext=(5, 2),
-                     textcoords='offset points',
-                     ha='right',
-                     va='bottom')
-        if saveable:
-            plt.savefig(name+'.pdf',format='pdf')
-        else:
-            plt.draw()
-            plt.pause(second)
-
-    try:
-        from sklearn.manifold import TSNE
-        import matplotlib.pyplot as plt
-        from six.moves import xrange
-
-        tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
-        # plot_only = 500
-        low_dim_embs = tsne.fit_transform(embeddings[:plot_only,:])
-        labels = [reverse_dictionary[i] for i in xrange(plot_only)]
-        plot_with_labels(low_dim_embs, labels, second=second, saveable=saveable, \
-                                                    name=name, fig_idx=fig_idx)
-    except ImportError:
-        print("Please install sklearn and matplotlib to visualize embeddings.")
-
-
-#