diff --git a/docs/modules/cost.rst b/docs/modules/cost.rst
index 17c62c744..b61d4fd4d 100644
--- a/docs/modules/cost.rst
+++ b/docs/modules/cost.rst
@@ -30,7 +30,8 @@ then you can apply L2 regularization on the weights matrix of first two layer as
 .. code-block:: python
 
   cost = tl.cost.cross_entropy(y, y_)
-  cost = cost + tf.contrib.layers.l2_regularizer(0.001)(network.all_params[0]) + tf.contrib.layers.l2_regularizer(0.001)(network.all_params[2])
+  cost = cost + tf.contrib.layers.l2_regularizer(0.001)(network.all_params[0])
+          + tf.contrib.layers.l2_regularizer(0.001)(network.all_params[2])
 
 Besides, TensorLayer provides a easy way to get all variables by a given name, so you can also
 apply L2 regularization on some weights as follow.
@@ -44,6 +45,7 @@ apply L2 regularization on some weights as follow.
 
 
 
+
 Regularization of Weights
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -75,12 +77,11 @@ Then max-norm regularization on W1 and W2 can be performed as follow.
 
 .. code-block:: python
 
-  y = network.outputs
-  # Alternatively, you can use tl.cost.cross_entropy(y, y_) instead.
-  cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y, y_))
-  cost = cross_entropy
-  cost = cost + tl.cost.maxnorm_regularizer(1.0)(network.all_params[0]) +
-            tl.cost.maxnorm_regularizer(1.0)(network.all_params[2])
+  max_norm = 0
+  for w in tl.layers.get_variables_with_name('W', train_only=True, printable=False):
+      max_norm += tl.cost.maxnorm_regularizer(1)(w)
+  cost = tl.cost.cross_entropy(y, y_) + max_norm
+
 
 In addition, all TensorFlow's regularizers like
 ``tf.contrib.layers.l2_regularizer`` can be used with TensorLayer.
diff --git a/docs/modules/prepro.rst b/docs/modules/prepro.rst
index 2c5275eb6..d00560e89 100644
--- a/docs/modules/prepro.rst
+++ b/docs/modules/prepro.rst
@@ -108,7 +108,7 @@ Images
 
 - These functions only apply on a single image, use ``threading_data`` to apply multiple threading see ``tutorial_image_preprocess.py``.
 - All functions have argument ``is_random``.
-- All functions end with `multi` , usually be used for image segmentation i.e. the input and output image should be matched.
+- All functions end with ``*_multi`` process all images together, usually be used for image segmentation i.e. the input and output image should be matched.
 
 Rotation
 ^^^^^^^^^
diff --git a/docs/modules/utils.rst b/docs/modules/utils.rst
index 2a93c26b4..64f2fe31c 100644
--- a/docs/modules/utils.rst
+++ b/docs/modules/utils.rst
@@ -14,6 +14,10 @@ API - Utility
    dict_to_one
    list_string_to_dict
    flatten_list
+   exit_tensorflow
+   open_tensorboard
+   clear_all_placeholder_variables
+   set_gpu_fraction
 
 Training, testing and predicting
 ----------------------------------
@@ -58,17 +62,17 @@ Flatten a list
 .. autofunction:: flatten_list
 
 Close TF session and associated processes
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+-----------------------------------------
 .. autofunction:: exit_tensorflow
 
 Open TensorBoard
-^^^^^^^^^^^^^^^^^^^
+----------------
 .. autofunction:: open_tensorboard
 
 Clear TensorFlow placeholder
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+----------------------------
 .. autofunction:: clear_all_placeholder_variables
 
 Set GPU functions
----------------------------
-.. autofunction:: set_gpu_fraction
\ No newline at end of file
+-----------------
+.. autofunction:: set_gpu_fraction
diff --git a/tensorlayer/activation.py b/tensorlayer/activation.py
index 51b187c7c..e1cc9f940 100644
--- a/tensorlayer/activation.py
+++ b/tensorlayer/activation.py
@@ -112,8 +112,8 @@ def pixel_wise_softmax(x, name='pixel_wise_softmax'):
     ----------
     x : Tensor
         input.
-        - For 2d image, 4D tensor (batch_size, height, weight, channel), where channel >= 2.
-        - For 3d image, 5D tensor (batch_size, depth, height, weight, channel), where channel >= 2.
+            - For 2d image, 4D tensor (batch_size, height, weight, channel), where channel >= 2.
+            - For 3d image, 5D tensor (batch_size, depth, height, weight, channel), where channel >= 2.
     name : str
         function name (optional)
 
diff --git a/tensorlayer/cost.py b/tensorlayer/cost.py
index 7e9d14281..e5a09b748 100644
--- a/tensorlayer/cost.py
+++ b/tensorlayer/cost.py
@@ -6,14 +6,14 @@
 
 
 def cross_entropy(output, target, name=None):
-    """It is a softmax cross-entropy operation, returns the TensorFlow expression of cross-entropy of two distributions, implement
+    """Softmax cross-entropy operation, returns the TensorFlow expression of cross-entropy for two distributions, it implements
     softmax internally. See ``tf.nn.sparse_softmax_cross_entropy_with_logits``.
 
     Parameters
     ----------
-    output : Tensorflow variable
+    output : Tensor
         A batch of distribution with shape: [batch_size, num of classes].
-    target : Tensorflow variable
+    target : Tensor
         A batch of index with shape: [batch_size, ].
     name : string
         Name of this loss.
@@ -36,7 +36,7 @@ def cross_entropy(output, target, name=None):
 
 
 def sigmoid_cross_entropy(output, target, name=None):
-    """It is a sigmoid cross-entropy operation, see ``tf.nn.sigmoid_cross_entropy_with_logits``.
+    """Sigmoid cross-entropy operation, see ``tf.nn.sigmoid_cross_entropy_with_logits``.
 
     Parameters
     ----------
@@ -55,11 +55,7 @@ def sigmoid_cross_entropy(output, target, name=None):
 
 
 def binary_cross_entropy(output, target, epsilon=1e-8, name='bce_loss'):
-    """It is a binary cross entropy operation.
-
-    # For brevity, let `x = output`, `z = target`.  The binary cross entropy loss is
-    #
-    #     loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i]))
+    """Binary cross entropy operation.
 
     Parameters
     ----------
@@ -74,7 +70,7 @@ def binary_cross_entropy(output, target, epsilon=1e-8, name='bce_loss'):
 
     References
     -----------
-    - `DRAW <https://github.com/ericjang/draw/blob/master/draw.py#L73>`__
+    - `ericjang-DRAW <https://github.com/ericjang/draw/blob/master/draw.py#L73>`__
 
     """
     #     from tensorflow.python.framework import ops
@@ -84,6 +80,10 @@ def binary_cross_entropy(output, target, epsilon=1e-8, name='bce_loss'):
     with tf.name_scope(name):
         return tf.reduce_mean(tf.reduce_sum(-(target * tf.log(output + epsilon) + (1. - target) * tf.log(1. - output + epsilon)), axis=1))
 
+    # For brevity, let `x = output`, `z = target`.  The binary cross entropy loss is
+    #
+    #     loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i]))
+
 
 def mean_squared_error(output, target, is_mean=False, name="mean_squared_error"):
     """Return the TensorFlow expression of mean-square-error (L2) of two batch of data.
@@ -96,8 +96,8 @@ def mean_squared_error(output, target, is_mean=False, name="mean_squared_error")
         The target distribution, format the same with `output`.
     is_mean : boolean
         Whether compute the mean or sum for each example.
-        - If True, use ``tf.reduce_mean`` to compute the loss between one target and predict data.
-        - If False, use ``tf.reduce_sum`` (default).
+            - If True, use ``tf.reduce_mean`` to compute the loss between one target and predict data.
+            - If False, use ``tf.reduce_sum`` (default).
 
     References
     ------------
@@ -161,8 +161,8 @@ def absolute_difference_error(output, target, is_mean=False):
         The target distribution, format the same with `output`.
     is_mean : boolean
         Whether compute the mean or sum for each example.
-        - If True, use ``tf.reduce_mean`` to compute the loss between one target and predict data.
-        - If False, use ``tf.reduce_sum`` (default).
+            - If True, use ``tf.reduce_mean`` to compute the loss between one target and predict data.
+            - If False, use ``tf.reduce_sum`` (default).
 
     """
     with tf.name_scope("mean_squared_error_loss"):
@@ -203,10 +203,8 @@ def dice_coe(output, target, loss_type='jaccard', axis=[1, 2, 3], smooth=1e-5):
         All dimensions are reduced, default ``[1,2,3]``.
     smooth : float
         This small value will be added to the numerator and denominator.
-        If both output and target are empty, it makes sure dice is 1.
-        If either output or target are empty (all pixels are background), dice = ```smooth/(small_value + smooth)``,
-        then if smooth is very small, dice close to 0 (even the image values lower than the threshold),
-        so in this case, higher smooth can have a higher dice.
+            - If both output and target are empty, it makes sure dice is 1.
+            - If either output or target are empty (all pixels are background), dice = ```smooth/(small_value + smooth)``, then if smooth is very small, dice close to 0 (even the image values lower than the threshold), so in this case, higher smooth can have a higher dice.
 
     Examples
     ---------
@@ -359,8 +357,8 @@ def cross_entropy_seq(logits, target_seqs, batch_size=None):  #, batch_size=1, n
         The target sequence, 2D tensor `[batch_size, n_steps]`, if the number of step is dynamic, please use ``tl.cost.cross_entropy_seq_with_mask`` instead.
     batch_size : None or int.
         Whether to divide the cost by batch size.
-        - If integer, the return cost will be divided by `batch_size`.
-        - If None (default), the return cost will not be divided by anything.
+            - If integer, the return cost will be divided by `batch_size`.
+            - If None (default), the return cost will not be divided by anything.
 
     Examples
     --------
@@ -401,8 +399,8 @@ def cross_entropy_seq_with_mask(logits, target_seqs, input_mask, return_details=
         The mask to compute loss, it has the same size with `target_seqs`, normally 0 or 1.
     return_details : boolean
         Whether to return detailed losses.
-        - If False (default), only returns the loss.
-        - If True, returns the loss, losses, weights and targets (see source code).
+            - If False (default), only returns the loss.
+            - If True, returns the loss, losses, weights and targets (see source code).
 
     Examples
     --------
@@ -581,7 +579,7 @@ def lo(weights, name='lo_regularizer'):
 def maxnorm_regularizer(scale=1.0, scope=None):
     """Max-norm regularization returns a function that can be used to apply max-norm regularization to weights.
 
-    More about max-norm, see `<https://en.wikipedia.org/wiki/Matrix_norm#Max_norm>`__.
+    More about max-norm, see `wiki-max norm <https://en.wikipedia.org/wiki/Matrix_norm#Max_norm>`_.
     The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`__.
 
     Parameters
diff --git a/tensorlayer/files.py b/tensorlayer/files.py
index ed8463f0d..d1eb14749 100644
--- a/tensorlayer/files.py
+++ b/tensorlayer/files.py
@@ -316,7 +316,7 @@ def load_matt_mahoney_text8_dataset(path='data'):
 
     Returns
     --------
-    word_list : list of str
+    list of str
         The raw text data e.g. [.... 'their', 'families', 'who', 'were', 'expelled', 'from', 'jerusalem', ...]
 
     Examples
@@ -439,13 +439,16 @@ def load_imdb_dataset(path='data', nb_words=None, skip_top=0, maxlen=None, test_
 def load_nietzsche_dataset(path='data'):
     """Load Nietzsche dataset.
 
-    Returns a string.
-
     Parameters
     ----------
     path : str
         The path that the data is downloaded to, defaults is ``data/nietzsche/``.
 
+    Returns
+    --------
+    str
+        The content.
+
     Examples
     --------
     >>> see tutorial_generate_text.py
@@ -544,8 +547,8 @@ def load_flickr25k_dataset(tag='sky', path="data", n_threads=50, printable=False
     ------------
     tag : str or None
         What images to return.
-        - If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search <https://www.flickr.com/search/>`__.
-        - If you want to get all images, set to ``None``.
+            - If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search <https://www.flickr.com/search/>`__.
+            - If you want to get all images, set to ``None``.
 
     path : str
         The path that the data is downloaded to, defaults is ``data/flickr25k/``.
@@ -612,8 +615,8 @@ def load_flickr1M_dataset(tag='sky', size=10, path="data", n_threads=50, printab
     ------------
     tag : str or None
         What images to return.
-        - If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search <https://www.flickr.com/search/>`__.
-        - If you want to get all images, set to ``None``.
+            - If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search <https://www.flickr.com/search/>`__.
+            - If you want to get all images, set to ``None``.
 
     size : int
         integer between 1 to 10. 1 means 100k images ... 5 means 500k images, 10 means all 1 million images. Default is 10.
@@ -1202,7 +1205,7 @@ def load_npz(path='', name='model.npz'):
 
     Returns
     --------
-    params : list of array
+    list of array
         A list of parameters in order.
 
     Examples
@@ -1247,7 +1250,7 @@ def assign_params(sess, params, network):
 
     Returns
     --------
-    ops : list of operations
+    list of operations
         A list of tf ops in order that assign params. Support sess.run(ops) manually.
 
     Examples
@@ -1281,7 +1284,8 @@ def load_and_assign_npz(sess=None, name=None, network=None):
 
     Returns
     --------
-    Returns False if faild to model is not exist.
+    False or network
+        Returns False, if the model is not exist.
 
     Examples
     --------
@@ -1646,7 +1650,8 @@ def exists_or_mkdir(path, verbose=True):
 
     Returns
     --------
-    True if folder exist, otherwise, returns False and create the folder
+    boolean
+        True if folder already exist, otherwise, returns False and create the folder.
 
     Examples
     --------
@@ -1683,7 +1688,8 @@ def maybe_download_and_extract(filename, working_directory, url_source, extract=
 
     Returns
     ----------
-    Filepath to dowloaded (uncompressed) file
+    str
+        File path of the dowloaded (uncompressed) file.
 
     Examples
     --------
diff --git a/tensorlayer/iterate.py b/tensorlayer/iterate.py
index b99aae2c7..d7db2a0a0 100644
--- a/tensorlayer/iterate.py
+++ b/tensorlayer/iterate.py
@@ -7,14 +7,14 @@
 
 def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False):
     """Generate a generator that input a group of example in numpy.array and
-    their labels, return the examples and labels by the given batchsize.
+    their labels, return the examples and labels by the given batch size.
 
     Parameters
     ----------
     inputs : numpy.array
-        (X) The input features, every row is a example.
+        The input features, every row is a example.
     targets : numpy.array
-        (y) The labels of inputs, every row is a example.
+        The labels of inputs, every row is a example.
     batch_size : int
         The batch size.
     shuffle : boolean
@@ -39,8 +39,8 @@ def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False):
 
     Notes
     -----
-    If you have two inputs, e.g. X1 (1000, 100) and X2 (1000, 80), you can ``np.hstack((X1, X2))
-    into (1000, 180) and feed into ``inputs``, then you can split a batch of X1 and X2.
+    If you have two inputs and one label and want to shuffle them together, e.g. X1 (1000, 100), X2 (1000, 80) and Y (1000, 1), you can stack them together (`np.hstack((X1, X2))`)
+    into (1000, 180) and feed to ``inputs``. After getting a batch, you can split it back into X1 and X2.
 
     """
     assert len(inputs) == len(targets)
@@ -57,14 +57,14 @@ def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False):
 
 def seq_minibatches(inputs, targets, batch_size, seq_length, stride=1):
     """Generate a generator that return a batch of sequence inputs and targets.
-    If ``batch_size=100, seq_length=5``, one return will have ``500`` rows (examples).
+    If `batch_size=100` and `seq_length=5`, one return will have 500 rows (examples).
 
     Parameters
     ----------
     inputs : numpy.array
-        (X) The input features, every row is a example.
+        The input features, every row is a example.
     targets : numpy.array
-        (y) The labels of inputs, every element is a example.
+        The labels of inputs, every element is a example.
     batch_size : int
         The batch size.
     seq_length : int
@@ -135,15 +135,6 @@ def seq_minibatches2(inputs, targets, batch_size, num_steps):
     the target context by the given batch_size and num_steps (sequence_length).
     In TensorFlow's tutorial, this generates the `batch_size` pointers into the raw PTB data, and allows minibatch iteration along these pointers.
 
-    - Hint, if the input data are images, you can modify the code as follow.
-
-    .. code-block:: python
-
-        from
-        data = np.zeros([batch_size, batch_len)
-        to
-        data = np.zeros([batch_size, batch_len, inputs.shape[1], inputs.shape[2], inputs.shape[3]])
-
     Parameters
     ----------
     inputs : list of data
@@ -186,6 +177,9 @@ def seq_minibatches2(inputs, targets, batch_size, num_steps):
     ... [[ 26.  27.  28.]
     ... [ 36.  37.  38.]]
 
+    Notes
+    -----
+    - Hint, if the input data are images, you can modify the source code `data = np.zeros([batch_size, batch_len)` to `data = np.zeros([batch_size, batch_len, inputs.shape[1], inputs.shape[2], inputs.shape[3]])`.
     """
     assert len(inputs) == len(targets)
     data_len = len(inputs)
@@ -211,8 +205,7 @@ def seq_minibatches2(inputs, targets, batch_size, num_steps):
 
 def ptb_iterator(raw_data, batch_size, num_steps):
     """Generate a generator that iterates on a list of words, see `PTB example <https://github.com/tensorlayer/tensorlayer/blob/master/example/tutorial_ptb_lstm_state_is_tuple.py>`__.
-    Yields the source contexts and the target context by the given batch_size and num_steps (sequence_length)
-    e.g. x = [0, 1, 2]  y = [1, 2, 3] , when batch_size = 1, num_steps = 3, raw_data = [i for i in range(100)]
+    Yields the source contexts and the target context by the given batch_size and num_steps (sequence_length).
 
     In TensorFlow's tutorial, this generates `batch_size` pointers into the raw
     PTB data, and allows minibatch iteration along these pointers.
@@ -258,11 +251,6 @@ def ptb_iterator(raw_data, batch_size, num_steps):
     ...  [16 17 18]]
     ... [[ 7  8  9]
     ...  [17 18 19]]
-
-    See Also
-    ----------------
-    - ``tensorflow/models/rnn/ptb/reader.py``
-
     """
     raw_data = np.array(raw_data, dtype=np.int32)
 
diff --git a/tensorlayer/layers/convolution.py b/tensorlayer/layers/convolution.py
index 160e8a45b..8e6a46709 100644
--- a/tensorlayer/layers/convolution.py
+++ b/tensorlayer/layers/convolution.py
@@ -118,6 +118,8 @@ class Conv2dLayer(Layer):
 
     Examples
     --------
+    With TensorFlow
+
     >>> x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
     >>> net = tl.layers.InputLayer(x, name='input_layer')
     >>> net = tl.layers.Conv2dLayer(net,
@@ -137,7 +139,8 @@ class Conv2dLayer(Layer):
     ...                   pool = tf.nn.max_pool,
     ...                   name ='pool_layer1',)   # output: (?, 14, 14, 32)
 
-    >>> Without TensorLayer, you can implement 2d convolution as follow.
+    Without TensorLayer, you can implement 2d convolution as follow.
+
     >>> W = tf.Variable(W_init(shape=[5, 5, 1, 32], ), name='W_conv')
     >>> b = tf.Variable(b_init(shape=[32], ), name='b_conv')
     >>> outputs = tf.nn.relu( tf.nn.conv2d(inputs, W,
@@ -431,7 +434,7 @@ class UpSampling2dLayer(Layer):
     ----------
     layer : :class:`Layer`
         Previous layer with 4-D Tensor of the shape (batch, height, width, channels) or 3-D Tensor of the shape (height, width, channels).
-    size : tuple of int/float.
+    size : tuple of int/float
         (height, width) scale factor or new size of height and width.
     is_scale : boolean
         If True (default), the `size` is a scale factor; otherwise, the `size` is the numbers of pixels of height and width.
@@ -491,7 +494,7 @@ class DownSampling2dLayer(Layer):
     ----------
     layer : :class:`Layer`
         Previous layer with 4-D Tensor in the shape of (batch, height, width, channels) or 3-D Tensor in the shape of (height, width, channels).
-    size : tuple of int/float.
+    size : tuple of int/float
         (height, width) scale factor or new size of height and width.
     is_scale : boolean
         If True (default), the `size` is the scale factor; otherwise, the `size` are numbers of pixels of height and width.
@@ -701,7 +704,7 @@ class DeformableConv2dLayer(Layer):
     >>> offset_1 = tl.layers.Conv2dLayer(layer=net, act=act, shape=(3, 3, 3, 18), strides=(1, 1, 1, 1),padding='SAME', name='offset_layer1')
     >>> net = tl.layers.DeformableConv2dLayer(layer=net, act=act, offset_layer=offset_1, shape=(3, 3, 3, 32),  name='deformable_conv_2d_layer1')
     >>> offset_2 = tl.layers.Conv2dLayer(layer=net, act=act, shape=(3, 3, 32, 18), strides=(1, 1, 1, 1), padding='SAME', name='offset_layer2')
-    >>> net = tl.layers.DeformableConv2dLayer(layer=net, act = act, offset_layer=offset_2, shape=(3, 3, 32, 64), name='deformable_conv_2d_layer2')
+    >>> net = tl.layers.DeformableConv2dLayer(layer=net, act=act, offset_layer=offset_2, shape=(3, 3, 32, 64), name='deformable_conv_2d_layer2')
 
     References
     ----------
@@ -785,6 +788,61 @@ def __init__(self,
         self.all_params.extend([W, b])
 
 
+class _DeformableConv2d(DeformableConv2dLayer):  # TODO
+    """Simplified version of :class:`DeformableConv2dLayer`, see
+    `Deformable Convolutional Networks <https://arxiv.org/abs/1703.06211>`__.
+
+    Parameters
+    ----------
+    layer : :class:`Layer`
+        Previous layer.
+    offset_layer : :class:`Layer`
+        To predict the offset of convolution operations.
+        The output shape is (batchsize, input height, input width, 2*(number of element in the convolution kernel))
+        e.g. if apply a 3*3 kernel, the number of the last dimension should be 18 (2*3*3)
+    act : activation function
+        The activation function of this layer.
+    n_filter : int
+        The number of filters.
+    filter_size : tuple of int
+        The filter size (height, width).
+    W_init : initializer
+        The initializer for the weight matrix.
+    b_init : initializer or None
+        The initializer for the bias vector. If None, skip biases.
+    W_init_args : dictionary
+        The arguments for the weight matrix initializer.
+    b_init_args : dictionary
+        The arguments for the bias vector initializer.
+    name : str
+        A unique layer name.
+    """
+
+    def __init__(
+            self,
+            layer,
+            act=tf.identity,
+            offset_layer=None,
+            # shape=(3, 3, 1, 100),
+            n_filter=32,
+            filter_size=(3, 3),
+            name='deformable_conv_2d_layer',
+            W_init=tf.truncated_normal_initializer(stddev=0.02),
+            b_init=tf.constant_initializer(value=0.0),
+            W_init_args={},
+            b_init_args={}):
+
+        try:
+            pre_channel = int(layer.outputs.get_shape()[-1])
+        except:  # if pre_channel is ?, it happens when using Spatial Transformer Net
+            pre_channel = 1
+            logging.info("[warnings] unknow input channels, set to 1")
+        shape = (filter_size[0], filter_size[1], pre_channel, n_filter)
+
+        DeformableConv2dLayer.__init__(
+            self, act=act, offset_layer=offset_layer, shape=shape, name=name, W_init=W_init, b_init=b_init, W_init_args=W_init_args, b_init_args=b_init_args)
+
+
 def atrous_conv1d(
         layer,
         n_filter=32,
@@ -920,7 +978,7 @@ def __init__(self,
             self.all_params.extend([filters])
 
 
-class SeparableConv2dLayer(Layer):
+class _SeparableConv2dLayer(Layer):  # TODO
     """The :class:`SeparableConv2dLayer` class is 2D convolution with separable filters, see `tf.layers.separable_conv2d <https://www.tensorflow.org/api_docs/python/tf/layers/separable_conv2d>`__.
 
     This layer has not been fully tested yet.
diff --git a/tensorlayer/layers/core.py b/tensorlayer/layers/core.py
index 140408116..a71e54262 100644
--- a/tensorlayer/layers/core.py
+++ b/tensorlayer/layers/core.py
@@ -28,7 +28,7 @@
 
 def flatten_reshape(variable, name='flatten'):
     """Reshapes a high-dimension vector input.
-    [batch_size, mask_row, mask_col, n_mask] ---> [batch_size, mask_row * mask_col * n_mask]
+    [batch_size, mask_row, mask_col, n_mask] ---> [batch_size, mask_row x mask_col x n_mask]
 
     Parameters
     ----------
@@ -65,7 +65,7 @@ def flatten_reshape(variable, name='flatten'):
 
 
 def clear_layers_name():
-    """Clear all layer names in set_keep['_layers_name_list'] if layer names are reused.
+    """Clear all layer names in `set_keep['_layers_name_list']` if layer names are reused.
 
     Examples
     ---------
@@ -92,7 +92,9 @@ def clear_layers_name():
 
 
 def set_name_reuse(enable=True):
-    """Enable or disable reuse layer name. By default, each layer must has unique
+    """Enable or disable reuse layer name.
+
+    By default, each layer must has unique
     name. When you want two or more input placeholder (inference) share the same
     model parameters, you need to enable layer name reuse, then allow the
     parameters have same name scope.
@@ -100,7 +102,7 @@ def set_name_reuse(enable=True):
     Parameters
     ----------
     enable : boolean
-        Enable or disable name/layer reuse, None means False
+        Enable or disable name/layer reuse, None means False.
 
     Examples
     --------
@@ -133,14 +135,14 @@ def set_name_reuse(enable=True):
 
 def initialize_rnn_state(state, feed_dict=None):
     """Returns the initialized RNN state.
-    The inputs are LSTMStateTuple or State of RNNCells and an optional feed_dict.
+    The inputs are `LSTMStateTuple` or `State` of `RNNCells`, and an optional `feed_dict`.
 
     Parameters
     ----------
     state : RNN state.
         The TensorFlow's RNN state.
     feed_dict : dictionary
-        Initial RNN state; if None, returns null state.
+        Initial RNN state; if None, returns zero state.
 
     Returns
     -------
@@ -163,13 +165,15 @@ def initialize_rnn_state(state, feed_dict=None):
 
 
 def print_all_variables(train_only=False):
-    """Print all trainable and non-trainable variables
-    without tl.layers.initialize_global_variables(sess)
+    """Print information of trainable or all variables,
+    without ``tl.layers.initialize_global_variables(sess)``.
 
     Parameters
     ----------
     train_only : boolean
-        If True, print the trainable variables; if False, print all variables.
+        Whether print trainable variables only.
+            - If True, print the trainable variables.
+            - If False, print all variables.
 
     """
     # tvar = tf.trainable_variables() if train_only else tf.all_variables()
@@ -200,7 +204,7 @@ def get_variables_with_name(name=None, train_only=True, printable=False):
 
     Returns
     -------
-    list
+    list of Tensor
         A list of TensorFlow variables
 
     Examples
@@ -241,8 +245,8 @@ def get_layers_with_name(net, name="", printable=False):
 
     Returns
     --------
-    list
-        a list of layers' output (TensorFlow tensor)
+    list of Tensor
+        A list of layers' output (TensorFlow tensor)
 
     Examples
     ---------
@@ -339,7 +343,7 @@ def initialize_global_variables(sess):
     Parameters
     ----------
     sess : Session
-        The TensorFlow session object.
+        TensorFlow session.
 
     """
     assert sess is not None
@@ -360,10 +364,18 @@ class Layer(object):
     Parameters
     ----------
     inputs : :class:`Layer` instance
-        The `Layer` class feeding into this layer
+        The `Layer` class feeding into this layer.
     name : str or None
-        A unique layer name
+        A unique layer name.
 
+    Methods
+    ---------
+    print_params(details=True, session=None)
+        Print all parameters of this network.
+    print_layers()
+        Print all outputs of all layers of this network.
+    count_params()
+        Return the number of parameters of this network.
     """
 
     def __init__(self, inputs=None, name='layer'):
@@ -433,9 +445,9 @@ class InputLayer(Layer):
     Parameters
     ----------
     inputs : placeholder or tensor
-        The input of a network
+        The input of a network.
     name : str
-        A unique layer name
+        A unique layer name.
 
     """
 
@@ -455,20 +467,20 @@ class OneHotInputLayer(Layer):
     Parameters
     ----------
     inputs : placeholder or tensor
-        The input of a network
+        The input of a network.
     depth : None or int
-        If the input indices is rank N, the output will have rank N+1. The new axis is created at dimension `axis` (default: the new axis is appended at the end)
+        If the input indices is rank N, the output will have rank N+1. The new axis is created at dimension `axis` (default: the new axis is appended at the end).
     on_value : None or number
-        If None, it will default to the value 1 with type dtype
+        The value to represnt `ON`.
+            - If None, it will default to the value 1.
     off_value : None or number
-        If None, it will default to the value 0 with type dtype
-        None for default
+        The value to represnt `OFF`. If None, it will default to the value 0.
     axis : None or int
-        The axis
+        The axis.
     dtype : None or TensorFlow dtype
-        The data type, None for tf.float32
+        The data type, None means tf.float32.
     name : str
-        A unique layer name
+        A unique layer name.
 
     """
 
@@ -627,14 +639,11 @@ def __init__(
 
 class EmbeddingInputlayer(Layer):
     """
-    The :class:`EmbeddingInputlayer` class is a fully connected layer,
-    for Word Embedding. Word content are accessed using integer indexes.
-    The output is the embedded word vector.
+    The :class:`EmbeddingInputlayer` class is a look-up table for word embedding.
 
-    If you have a pre-train matrix, you can assign the matrix into it.
+    Word content are accessed using integer indexes, then the output is the embedded word vector.
     To train a word embedding matrix, you can used :class:`Word2vecEmbeddingInputlayer`.
-
-    Note that, do not update this embedding matrix.
+    If you have a pre-trained matrix, you can assign the parameters into it.
 
     Parameters
     ----------
@@ -659,47 +668,12 @@ class EmbeddingInputlayer(Layer):
 
     Examples
     --------
-    >>> vocabulary_size = 50000
-    >>> embedding_size = 200
-    >>> model_file_name = "model_word2vec_50k_200"
-    >>> batch_size = None
-    ...
-    >>> all_var = tl.files.load_npy_to_any(name=model_file_name+'.npy')
-    >>> data = all_var['data']; count = all_var['count']
-    >>> dictionary = all_var['dictionary']
-    >>> reverse_dictionary = all_var['reverse_dictionary']
-    >>> tl.files.save_vocab(count, name='vocab_'+model_file_name+'.txt')
-    >>> del all_var, data, count
-    ...
-    >>> load_params = tl.files.load_npz(name=model_file_name+'.npz')
-    >>> x = tf.placeholder(tf.int32, shape=(batch_size))
-    >>> y_ = tf.placeholder(tf.int32, shape=(batch_size, 1))
+    >>> x = tf.placeholder(tf.int32, shape=(batch_size,))
     >>> emb_net = tl.layers.EmbeddingInputlayer(
     ...                inputs = x,
     ...                vocabulary_size = vocabulary_size,
     ...                embedding_size = embedding_size,
-    ...                name ='embedding_layer')
-    >>> tl.layers.initialize_global_variables(sess)
-    >>> tl.files.assign_params(sess, [load_params[0]], emb_net)
-    >>> word = b'hello'
-    >>> word_id = dictionary[word]
-    >>> print('word_id:', word_id)
-    ... 6428
-    ...
-    >>> words = [b'i', b'am', b'hao', b'dong']
-    >>> word_ids = tl.files.words_to_word_ids(words, dictionary)
-    >>> context = tl.files.word_ids_to_words(word_ids, reverse_dictionary)
-    >>> print('word_ids:', word_ids)
-    ... [72, 1226, 46744, 20048]
-    >>> print('context:', context)
-    ... [b'i', b'am', b'hao', b'dong']
-    ...
-    >>> vector = sess.run(emb_net.outputs, feed_dict={x : [word_id]})
-    >>> print('vector:', vector.shape)
-    ... (1, 200)
-    >>> vectors = sess.run(emb_net.outputs, feed_dict={x : word_ids})
-    >>> print('vectors:', vectors.shape)
-    ... (4, 200)
+    ...                name ='embed')
 
     """
 
@@ -710,7 +684,7 @@ def __init__(
             embedding_size=200,
             E_init=tf.random_uniform_initializer(-0.1, 0.1),
             E_init_args={},
-            name='embedding_layer',
+            name='embedding',
     ):
         Layer.__init__(self, name=name)
         self.inputs = inputs
@@ -907,9 +881,9 @@ def __init__(
 
 
 class ReconLayer(DenseLayer):
-    """
-    The :class:`ReconLayer` class is a reconstruction layer for :class:`DenseLayer` for AutoEncoder.
-    It is used to pre-train the previous :class:`DenseLayer`
+    """A reconstruction layer for :class:`DenseLayer` to implement AutoEncoder.
+
+    It is often used to pre-train the previous :class:`DenseLayer`
 
     Parameters
     ----------
@@ -940,7 +914,7 @@ class ReconLayer(DenseLayer):
     Methods
     -------
     pretrain(self, sess, x, X_train, X_val, denoise_name=None, n_epoch=100, batch_size=128, print_freq=10, save=True, save_name='w1pre')
-        Start to pre-train the parameters of previous DenseLayer.
+        Start to pre-train the parameters of the previous DenseLayer.
 
     Notes
     -----
diff --git a/tensorlayer/layers/flow_control.py b/tensorlayer/layers/flow_control.py
index b32d1cc71..5ab1b0170 100644
--- a/tensorlayer/layers/flow_control.py
+++ b/tensorlayer/layers/flow_control.py
@@ -50,10 +50,6 @@ class MultiplexerLayer(Layer):
     >>> network = tl.layers.DenseLayer(network, n_units=10,
     ...                                act = tf.identity, name='output_layer')
 
-    References
-    ------------
-    - See ``tf.pack() for TF0.12 or tf.stack() for TF1.0`` and ``tf.gather()`` at `TensorFlow - Slicing and Joining <https://www.tensorflow.org/versions/master/api_docs/python/array_ops.html#slicing-and-joining>`__
-
     """
 
     def __init__(self, layers, name='mux_layer'):
diff --git a/tensorlayer/layers/importer.py b/tensorlayer/layers/importer.py
index 35ec5fdd9..9e108ea7b 100644
--- a/tensorlayer/layers/importer.py
+++ b/tensorlayer/layers/importer.py
@@ -11,9 +11,9 @@ class LambdaLayer(Layer):
     ----------
     layer : :class:`Layer`
         Previous layer.
-    fn : a function
+    fn : function
         The function that applies to the outputs of previous layer.
-    fn_args : a dictionary
+    fn_args : dictionary
         The arguments for the function (option).
     name : str
         A unique layer name.
@@ -210,4 +210,4 @@ def __init__(
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
         self.all_layers.extend([self.outputs])
-        self.all_params.extend(variables)
\ No newline at end of file
+        self.all_params.extend(variables)
diff --git a/tensorlayer/layers/pooling.py b/tensorlayer/layers/pooling.py
index 82ceae2bb..b0cecc54d 100644
--- a/tensorlayer/layers/pooling.py
+++ b/tensorlayer/layers/pooling.py
@@ -8,8 +8,8 @@
 class PoolLayer(Layer):
     """
     The :class:`PoolLayer` class is a Pooling layer.
-    You can choose ``tf.nn.max_pool`` and ``tf.nn.avg_pool`` for 2D or
-    ``tf.nn.max_pool3d`` and ``tf.nn.avg_pool3d`` for 3D.
+    You can choose ``tf.nn.max_pool`` and ``tf.nn.avg_pool`` for 2D input or
+    ``tf.nn.max_pool3d`` and ``tf.nn.avg_pool3d`` for 3D input.
 
     Parameters
     ----------
@@ -24,7 +24,7 @@ class PoolLayer(Layer):
     padding : str
         The padding algorithm type: "SAME" or "VALID".
     pool : pooling function
-        One of ``tf.nn.max_pool``, ``tf.nn.avg_pool``, ``tf.nn.max_pool3d``, and ``f.nn.avg_pool3d``.
+        One of ``tf.nn.max_pool``, ``tf.nn.avg_pool``, ``tf.nn.max_pool3d`` and ``f.nn.avg_pool3d``.
         See `TensorFlow pooling APIs <https://www.tensorflow.org/versions/master/api_docs/python/nn.html#pooling>`__
     name : str
         A unique layer name.
diff --git a/tensorlayer/layers/recurrent.py b/tensorlayer/layers/recurrent.py
index 6401bc85a..5f8ae2f0d 100644
--- a/tensorlayer/layers/recurrent.py
+++ b/tensorlayer/layers/recurrent.py
@@ -16,8 +16,8 @@ class RNNLayer(Layer):
         Previous layer.
     cell_fn : TensorFlow cell function
         A TensorFlow core RNN cell
-        - see `RNN Cells in TensorFlow <https://www.tensorflow.org/api_docs/python/>`__
-        - Note TF1.0+ and TF1.0- are different
+            - See `RNN Cells in TensorFlow <https://www.tensorflow.org/api_docs/python/>`__
+            - Note TF1.0+ and TF1.0- are different
     cell_init_args : dictionary
         The arguments for the cell function.
     n_hidden : int
@@ -30,13 +30,13 @@ class RNNLayer(Layer):
         If None, `initial_state` is zero state.
     return_last : boolean
         Whether return last output or all outputs in each step.
-        - If True, return the last output, "Sequence input and single output"
-        - If False, return all outputs, "Synced sequence input and output"
-        - In other word, if you want to stack more RNNs on this layer, set to False.
+            - If True, return the last output, "Sequence input and single output"
+            - If False, return all outputs, "Synced sequence input and output"
+            - In other word, if you want to stack more RNNs on this layer, set to False.
     return_seq_2d : boolean
         Only consider this argument when `return_last` is `False`
-        - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it.
-        - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it.
+            - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it.
+            - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it.
     name : str
         A unique layer name.
 
@@ -47,13 +47,13 @@ class RNNLayer(Layer):
 
     final_state : Tensor or StateTuple
         The finial state of this layer.
-        - When `state_is_tuple` is `False`, it is the final hidden and cell states, `states.get_shape() = [?, 2 * n_hidden]`.
-        - When `state_is_tuple` is `True`, it stores two elements: `(c, h)`.
-        - In practice, you can get the final state after each iteration during training, then feed it to the initial state of next iteration.
+            - When `state_is_tuple` is `False`, it is the final hidden and cell states, `states.get_shape() = [?, 2 * n_hidden]`.
+            - When `state_is_tuple` is `True`, it stores two elements: `(c, h)`.
+            - In practice, you can get the final state after each iteration during training, then feed it to the initial state of next iteration.
 
     initial_state : Tensor or StateTuple
         The initial state of this layer.
-        - In practice, you can set your state at the begining of each epoch or iteration according to your training procedure.
+            - In practice, you can set your state at the begining of each epoch or iteration according to your training procedure.
 
     batch_size : int or Tensor
         It is an integer, if it is able to compute the `batch_size`; otherwise, tensor for dynamic batch size.
@@ -252,8 +252,8 @@ class BiRNNLayer(Layer):
         Previous layer.
     cell_fn : TensorFlow cell function
         A TensorFlow core RNN cell.
-        See `RNN Cells in TensorFlow <https://www.tensorflow.org/api_docs/python/>`__.
-        Note TF1.0+ and TF1.0- are different.
+            - See `RNN Cells in TensorFlow <https://www.tensorflow.org/api_docs/python/>`__.
+            - Note TF1.0+ and TF1.0- are different.
     cell_init_args : dictionary
         The arguments for the cell function.
     n_hidden : int
@@ -273,13 +273,13 @@ class BiRNNLayer(Layer):
         The number of RNN layers, default is 1.
     return_last : boolean
         Whether return last output or all outputs in each step.
-        If True, return the last output, "Sequence input and single output"
-        If False, return all outputs, "Synced sequence input and output"
-        In other word, if you want to stack more RNNs on this layer, set to False.
+            - If True, return the last output, "Sequence input and single output"
+            - If False, return all outputs, "Synced sequence input and output"
+            - In other word, if you want to stack more RNNs on this layer, set to False.
     return_seq_2d : boolean
         Only consider this argument when `return_last` is `False`
-        If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it.
-        If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it.
+            - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it.
+            - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it.
     name : str
         A unique layer name.
 
@@ -289,12 +289,12 @@ class BiRNNLayer(Layer):
         The output of this layer.
     fw(bw)_final_state : tensor or StateTuple
         The finial state of this layer.
-        When `state_is_tuple` is `False`, it is the final hidden and cell states, `states.get_shape() = [?, 2 * n_hidden]`.
-        When `state_is_tuple` is `True`, it stores two elements: `(c, h)`.
-        In practice, you can get the final state after each iteration during training, then feed it to the initial state of next iteration.
+            - When `state_is_tuple` is `False`, it is the final hidden and cell states, `states.get_shape() = [?, 2 * n_hidden]`.
+            - When `state_is_tuple` is `True`, it stores two elements: `(c, h)`.
+            - In practice, you can get the final state after each iteration during training, then feed it to the initial state of next iteration.
     fw(bw)_initial_state : tensor or StateTuple
         The initial state of this layer.
-        In practice, you can set your state at the begining of each epoch or iteration according to your training procedure.
+            - In practice, you can set your state at the begining of each epoch or iteration according to your training procedure.
     batch_size : int or tensor
         It is an integer, if it is able to compute the `batch_size`; otherwise, tensor for dynamic batch size.
 
@@ -639,30 +639,28 @@ class ConvLSTMLayer(Layer):
         If None, `initial_state` is zero state.
     return_last : boolean
         Whether return last output or all outputs in each step.
-        If True, return the last output, "Sequence input and single output".
-        If False, return all outputs, "Synced sequence input and output".
-        In other word, if you want to stack more RNNs on this layer, set to False.
+            - If True, return the last output, "Sequence input and single output".
+            - If False, return all outputs, "Synced sequence input and output".
+            - In other word, if you want to stack more RNNs on this layer, set to False.
     return_seq_2d : boolean
         Only consider this argument when `return_last` is `False`
-        If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it.
-        If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it.
+            - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it.
+            - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it.
     name : str
         A unique layer name.
 
     Attributes
     ----------
-    outputs : a tensor
+    outputs : tensor
         The output of this RNN. return_last = False, outputs = all cell_output, which is the hidden state.
         cell_output.get_shape() = (?, h, w, c])
 
-    final_state : a tensor or StateTuple
-        When state_is_tuple = False,
-        it is the final hidden and cell states,
-        When state_is_tuple = True,
-        You can get the final state after each iteration during training, then
-        feed it to the initial state of next iteration.
+    final_state : tensor or StateTuple
+        The finial state of this layer.
+            - When state_is_tuple = False, it is the final hidden and cell states,
+            - When state_is_tuple = True, You can get the final state after each iteration during training, then feed it to the initial state of next iteration.
 
-    initial_state : a tensor or StateTuple
+    initial_state : tensor or StateTuple
         It is the initial state of this ConvLSTM layer, you can use it to initialize
         your state at the beginning of each epoch or iteration according to your
         training procedure.
@@ -873,7 +871,9 @@ def retrieve_seq_length_op2(data):
 
 
 def retrieve_seq_length_op3(data, pad_val=0):  # HangSheng: return tensor for sequence length, if input is tf.string
-    """Return tensor for sequence length, if input is ``tf.string``."""
+    """Return tensor for sequence length, if input is ``tf.string``.
+
+    """
     data_shape_size = data.get_shape().ndims
     if data_shape_size == 3:
         return tf.reduce_sum(tf.cast(tf.reduce_any(tf.not_equal(data, pad_val), axis=2), dtype=tf.int32), 1)
@@ -886,7 +886,9 @@ def retrieve_seq_length_op3(data, pad_val=0):  # HangSheng: return tensor for se
 
 
 def target_mask_op(data, pad_val=0):  # HangSheng: return tensor for mask,if input is tf.string
-    """Return tensor for mask, if input is ``tf.string``."""
+    """Return tensor for mask, if input is ``tf.string``.
+
+    """
     data_shape_size = data.get_shape().ndims
     if data_shape_size == 3:
         return tf.cast(tf.reduce_any(tf.not_equal(data, pad_val), axis=2), dtype=tf.int32)
@@ -909,8 +911,8 @@ class DynamicRNNLayer(Layer):
         Previous layer
     cell_fn : TensorFlow cell function
         A TensorFlow core RNN cell
-        - see `RNN Cells in TensorFlow <https://www.tensorflow.org/api_docs/python/>`__
-        - Note TF1.0+ and TF1.0- are different
+            - See `RNN Cells in TensorFlow <https://www.tensorflow.org/api_docs/python/>`__
+            - Note TF1.0+ and TF1.0- are different
     cell_init_args : dictionary
         The arguments for the cell function.
     n_hidden : int
@@ -919,26 +921,26 @@ class DynamicRNNLayer(Layer):
         The initializer for initializing the parameters.
     sequence_length : tensor, array or None
         The sequence length of each row of input data, see ``Advanced Ops for Dynamic RNN``.
-        - If None, it uses ``retrieve_seq_length_op`` to compute the sequence length, i.e. when the features of padding (on right hand side) are all zeros.
-        - If using word embedding, you may need to compute the sequence length from the ID array (the integer features before word embedding) by using ``retrieve_seq_length_op2`` or ``retrieve_seq_length_op``.
-        - You can also input an numpy array.
-        - More details about TensorFlow dynamic RNN in `Wild-ML Blog <http://www.wildml.com/2016/08/rnns-in-tensorflow-a-practical-guide-and-undocumented-features/>`__.
+            - If None, it uses ``retrieve_seq_length_op`` to compute the sequence length, i.e. when the features of padding (on right hand side) are all zeros.
+            - If using word embedding, you may need to compute the sequence length from the ID array (the integer features before word embedding) by using ``retrieve_seq_length_op2`` or ``retrieve_seq_length_op``.
+            - You can also input an numpy array.
+            - More details about TensorFlow dynamic RNN in `Wild-ML Blog <http://www.wildml.com/2016/08/rnns-in-tensorflow-a-practical-guide-and-undocumented-features/>`__.
     initial_state : None or RNN State
         If None, `initial_state` is zero state.
     dropout : tuple of float or int
         The input and output keep probability (input_keep_prob, output_keep_prob).
-        - If one int, input and output keep probability are the same.
+            - If one int, input and output keep probability are the same.
     n_layer : int
         The number of RNN layers, default is 1.
     return_last : boolean
         Whether return last output or all outputs in each step.
-        - If True, return the last output, "Sequence input and single output"
-        - If False, return all outputs, "Synced sequence input and output"
-        - In other word, if you want to stack more RNNs on this layer, set to False.
+            - If True, return the last output, "Sequence input and single output"
+            - If False, return all outputs, "Synced sequence input and output"
+            - In other word, if you want to stack more RNNs on this layer, set to False.
     return_seq_2d : boolean
         Only consider this argument when `return_last` is `False`
-        - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it.
-        - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it.
+            - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it.
+            - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it.
     dynamic_rnn_init_args : dictionary
         The arguments for ``tf.nn.dynamic_rnn``.
     name : str
@@ -951,13 +953,13 @@ class DynamicRNNLayer(Layer):
 
     final_state : tensor or StateTuple
         The finial state of this layer.
-        - When `state_is_tuple` is `False`, it is the final hidden and cell states, `states.get_shape() = [?, 2 * n_hidden]`.
-        - When `state_is_tuple` is `True`, it stores two elements: `(c, h)`.
-        - In practice, you can get the final state after each iteration during training, then feed it to the initial state of next iteration.
+            - When `state_is_tuple` is `False`, it is the final hidden and cell states, `states.get_shape() = [?, 2 * n_hidden]`.
+            - When `state_is_tuple` is `True`, it stores two elements: `(c, h)`.
+            - In practice, you can get the final state after each iteration during training, then feed it to the initial state of next iteration.
 
     initial_state : tensor or StateTuple
         The initial state of this layer.
-        - In practice, you can set your state at the begining of each epoch or iteration according to your training procedure.
+            - In practice, you can set your state at the begining of each epoch or iteration according to your training procedure.
 
     batch_size : int or tensor
         It is an integer, if it is able to compute the `batch_size`; otherwise, tensor for dynamic batch size.
@@ -1166,8 +1168,8 @@ class BiDynamicRNNLayer(Layer):
         Previous layer.
     cell_fn : TensorFlow cell function
         A TensorFlow core RNN cell
-        - see `RNN Cells in TensorFlow <https://www.tensorflow.org/api_docs/python/>`__.
-        - Note TF1.0+ and TF1.0- are different.
+            - See `RNN Cells in TensorFlow <https://www.tensorflow.org/api_docs/python/>`__.
+            - Note TF1.0+ and TF1.0- are different.
     cell_init_args : dictionary
         The arguments for the cell initializer.
     n_hidden : int
@@ -1176,28 +1178,28 @@ class BiDynamicRNNLayer(Layer):
         The initializer for initializing the parameters.
     sequence_length : tensor, array or None
         The sequence length of each row of input data, see ``Advanced Ops for Dynamic RNN``.
-        - If None, it uses ``retrieve_seq_length_op`` to compute the sequence length, i.e. when the features of padding (on right hand side) are all zeros.
-        - If using word embedding, you may need to compute the sequence length from the ID array (the integer features before word embedding) by using ``retrieve_seq_length_op2`` or ``retrieve_seq_length_op``.
-        - You can also input an numpy array.
-        - More details about TensorFlow dynamic RNN in `Wild-ML Blog <http://www.wildml.com/2016/08/rnns-in-tensorflow-a-practical-guide-and-undocumented-features/>`__.
+            - If None, it uses ``retrieve_seq_length_op`` to compute the sequence length, i.e. when the features of padding (on right hand side) are all zeros.
+            - If using word embedding, you may need to compute the sequence length from the ID array (the integer features before word embedding) by using ``retrieve_seq_length_op2`` or ``retrieve_seq_length_op``.
+            - You can also input an numpy array.
+            - More details about TensorFlow dynamic RNN in `Wild-ML Blog <http://www.wildml.com/2016/08/rnns-in-tensorflow-a-practical-guide-and-undocumented-features/>`__.
     fw_initial_state : None or forward RNN State
         If None, `initial_state` is zero state.
     bw_initial_state : None or backward RNN State
         If None, `initial_state` is zero state.
     dropout : tuple of float or int
         The input and output keep probability (input_keep_prob, output_keep_prob).
-        - If one int, input and output keep probability are the same.
+            - If one int, input and output keep probability are the same.
     n_layer : int
         The number of RNN layers, default is 1.
     return_last : boolean
         Whether return last output or all outputs in each step.
-        - If True, return the last output, "Sequence input and single output"
-        - If False, return all outputs, "Synced sequence input and output"
-        - In other word, if you want to stack more RNNs on this layer, set to False.
+            - If True, return the last output, "Sequence input and single output"
+            - If False, return all outputs, "Synced sequence input and output"
+            - In other word, if you want to stack more RNNs on this layer, set to False.
     return_seq_2d : boolean
         Only consider this argument when `return_last` is `False`
-        - If True, return 2D Tensor [n_example, 2 * n_hidden], for stacking DenseLayer after it.
-        - If False, return 3D Tensor [n_example/n_steps, n_steps, 2 * n_hidden], for stacking multiple RNN after it.
+            - If True, return 2D Tensor [n_example, 2 * n_hidden], for stacking DenseLayer after it.
+            - If False, return 3D Tensor [n_example/n_steps, n_steps, 2 * n_hidden], for stacking multiple RNN after it.
     dynamic_rnn_init_args : dictionary
         The arguments for ``tf.nn.bidirectional_dynamic_rnn``.
     name : str
@@ -1210,13 +1212,13 @@ class BiDynamicRNNLayer(Layer):
 
     fw(bw)_final_state : tensor or StateTuple
         The finial state of this layer.
-        - When `state_is_tuple` is `False`, it is the final hidden and cell states, `states.get_shape() = [?, 2 * n_hidden]`.
-        - When `state_is_tuple` is `True`, it stores two elements: `(c, h)`.
-        - In practice, you can get the final state after each iteration during training, then feed it to the initial state of next iteration.
+            - When `state_is_tuple` is `False`, it is the final hidden and cell states, `states.get_shape() = [?, 2 * n_hidden]`.
+            - When `state_is_tuple` is `True`, it stores two elements: `(c, h)`.
+            - In practice, you can get the final state after each iteration during training, then feed it to the initial state of next iteration.
 
     fw(bw)_initial_state : tensor or StateTuple
         The initial state of this layer.
-        - In practice, you can set your state at the begining of each epoch or iteration according to your training procedure.
+            - In practice, you can set your state at the begining of each epoch or iteration according to your training procedure.
 
     batch_size : int or tensor
         It is an integer, if it is able to compute the `batch_size`; otherwise, tensor for dynamic batch size.
@@ -1423,8 +1425,8 @@ class Seq2Seq(Layer):
         Decode sequences, [batch_size, None, n_features].
     cell_fn : TensorFlow cell function
         A TensorFlow core RNN cell
-        - see `RNN Cells in TensorFlow <https://www.tensorflow.org/api_docs/python/>`__
-        - Note TF1.0+ and TF1.0- are different
+            - see `RNN Cells in TensorFlow <https://www.tensorflow.org/api_docs/python/>`__
+            - Note TF1.0+ and TF1.0- are different
     cell_init_args : dictionary
         The arguments for the cell initializer.
     n_hidden : int
@@ -1441,13 +1443,13 @@ class Seq2Seq(Layer):
         If None, `initial_state_decode` is the final state of the RNN encoder, it can be set by placeholder or other RNN.
     dropout : tuple of float or int
         The input and output keep probability (input_keep_prob, output_keep_prob).
-        - If one int, input and output keep probability are the same.
+            - If one int, input and output keep probability are the same.
     n_layer : int
         The number of RNN layers, default is 1.
     return_seq_2d : boolean
         Only consider this argument when `return_last` is `False`
-        - If True, return 2D Tensor [n_example, 2 * n_hidden], for stacking DenseLayer after it.
-        - If False, return 3D Tensor [n_example/n_steps, n_steps, 2 * n_hidden], for stacking multiple RNN after it.
+            - If True, return 2D Tensor [n_example, 2 * n_hidden], for stacking DenseLayer after it.
+            - If False, return 3D Tensor [n_example/n_steps, n_steps, 2 * n_hidden], for stacking multiple RNN after it.
     name : str
         A unique layer name.
 
diff --git a/tensorlayer/nlp.py b/tensorlayer/nlp.py
index 6532efc77..79faaa76e 100755
--- a/tensorlayer/nlp.py
+++ b/tensorlayer/nlp.py
@@ -99,10 +99,10 @@ def sample(a=[], temperature=1.0):
         List of probabilities.
     temperature : float or None
         The higher the more uniform. When a = [0.1, 0.2, 0.7],
-        - temperature = 0.7, the distribution will be sharpen [0.05048273,  0.13588945,  0.81362782]
-        - temperature = 1.0, the distribution will be the same [0.1,    0.2,    0.7]
-        - temperature = 1.5, the distribution will be filtered [0.16008435,  0.25411807,  0.58579758]
-        - If None, it will be ``np.argmax(a)``
+            - temperature = 0.7, the distribution will be sharpen [0.05048273,  0.13588945,  0.81362782]
+            - temperature = 1.0, the distribution will be the same [0.1,    0.2,    0.7]
+            - temperature = 1.5, the distribution will be filtered [0.16008435,  0.25411807,  0.58579758]
+            - If None, it will be ``np.argmax(a)``
 
     Notes
     ------
@@ -172,7 +172,7 @@ class SimpleVocabulary(object):
     Parameters
     ------------
     vocab : dictionary
-        A dictionary for converting word to ID.
+        A dictionary that maps word to ID.
     unk_id : int
         The ID for 'unknown' word.
 
@@ -209,9 +209,9 @@ class Vocabulary(object):
     Attributes
     ------------
     vocab : dictionary
-        A dictionary for converting word to ID.
+        A dictionary that maps word to ID.
     reverse_vocab : list of int
-        A list for converting ID to word.
+        A list that maps ID to word.
     start_id : int
         For start ID.
     end_id : int
@@ -307,7 +307,7 @@ def process_sentence(sentence, start_word="<S>", end_word="</S>"):
 
     Returns
     ---------
-    process_sentence : list of str
+    list of str
         A list of strings that separated into words.
 
     Examples
@@ -432,7 +432,7 @@ def simple_read_words(filename="nietzsche.txt"):
 
     Returns
     --------
-    words : str
+    str
         The context in a string.
 
     """
@@ -442,7 +442,7 @@ def simple_read_words(filename="nietzsche.txt"):
 
 
 def read_words(filename="nietzsche.txt", replace=['\n', '<eos>']):
-    r"""File to list format context. Note that, this script can not handle punctuations.
+    """File to list format context. Note that, this script can not handle punctuations.
     For customized read_words method, see ``tutorial_generate_text.py``.
 
     Parameters
@@ -454,12 +454,12 @@ def read_words(filename="nietzsche.txt", replace=['\n', '<eos>']):
 
     Returns
     --------
-    context_list : list of str
-        The context in a list, split by space by default, and use ``<eos>`` to represent ``\\n``, e.g. ``[... 'how', 'useful', 'it', "'s" ... ]``.
+    list of str
+        The context in a list, split by space by default, and use ``<eos>`` to represent ``\n``, e.g. ``[... 'how', 'useful', 'it', "'s" ... ]``.
 
     References
     ---------------
-    - `tensorflow.models.rnn.ptb.reader <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/models/rnn/ptb>`__
+    - `tensorflow.models.rnn.ptb.reader <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/models/rnn/ptb>`_
 
     """
     with tf.gfile.GFile(filename, "r") as f:
@@ -480,11 +480,11 @@ def read_analogies_file(eval_file='questions-words.txt', word2id={}):
     eval_data : str
         The file name.
     word2id : dictionary
-        For converting word to ID.
+        a dictionary that maps word to ID.
 
     Returns
     --------
-    analogy_questions : numpy.array
+    numpy.array
         A `[n_examples, 4]` numpy array containing the analogy question's word IDs.
 
     Examples
@@ -550,12 +550,12 @@ def build_vocab(data):
 
     Returns
     --------
-    word_to_id : dictionary
-        For converting word to unique ID. e.g. {'campbell': 2587, 'atlantic': 2247, 'aoun': 6746 .... }
+    dictionary
+        that maps word to unique ID. e.g. {'campbell': 2587, 'atlantic': 2247, 'aoun': 6746 .... }
 
     References
     ---------------
-    - `tensorflow.models.rnn.ptb.reader <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/models/rnn/ptb>`__
+    - `tensorflow.models.rnn.ptb.reader <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/models/rnn/ptb>`_
 
     Examples
     --------
@@ -577,18 +577,18 @@ def build_vocab(data):
 
 
 def build_reverse_dictionary(word_to_id):
-    """Given a dictionary for converting word to integer id.
-    Returns a reverse dictionary for converting a id to word.
+    """Given a dictionary that maps word to integer id.
+    Returns a reverse dictionary that maps a id to word.
 
     Parameters
     ----------
     word_to_id : dictionary
-        For converting word to ID.
+        that maps word to ID.
 
     Returns
     --------
-    reverse_dictionary : dictionary
-        FOr converting ID to word.
+    dictionary
+        A dictionary that maps IDs to words.
 
     """
     reverse_dictionary = dict(zip(word_to_id.values(), word_to_id.keys()))
@@ -616,13 +616,13 @@ def build_words_dataset(words=[], vocabulary_size=50000, printable=True, unk_key
         The context in a list of ID.
     count : list of tuple and list
         Pair words and IDs.
-        - count[0] is a list : the number of rare words
-        - count[1:] are tuples : the number of occurrence of each word
-        - e.g. [['UNK', 418391], (b'the', 1061396), (b'of', 593677), (b'and', 416629), (b'one', 411764)]
+            - count[0] is a list : the number of rare words
+            - count[1:] are tuples : the number of occurrence of each word
+            - e.g. [['UNK', 418391], (b'the', 1061396), (b'of', 593677), (b'and', 416629), (b'one', 411764)]
     dictionary : dictionary
-        It is `word_to_id` for converting word to ID.
+        It is `word_to_id` that maps word to ID.
     reverse_dictionary : a dictionary
-        It is `id_to_word`, for converting ID to word.
+        It is `id_to_word` that maps ID to word.
 
     Examples
     --------
@@ -668,13 +668,13 @@ def words_to_word_ids(data=[], word_to_id={}, unk_key='UNK'):
     data : list of string or byte
         The context in list format
     word_to_id : a dictionary
-        For converting word to ID.
+        that maps word to ID.
     unk_key : str
         Represent the unknown words.
 
     Returns
     --------
-    word_ids : list of int
+    list of int
         A list of IDs to represent the context.
 
     Examples
@@ -729,11 +729,11 @@ def word_ids_to_words(data, id_to_word):
     data : list of int
         The context in list format.
     id_to_word : dictionary
-        For converting ID to word.
+        a dictionary that maps ID to word.
 
     Returns
     --------
-    words : list of str
+    list of str
         A list of string or byte to represent the context.
 
     Examples
@@ -892,9 +892,9 @@ def initialize_vocabulary(vocabulary_path):
     Returns
     --------
     vocab : dictionary
-        For converting word to ID.
+        a dictionary that maps word to ID.
     rev_vocab : list of int
-        For converting ID to word.
+        a list that maps ID to word.
 
     Examples
     ---------
@@ -944,7 +944,8 @@ def sentence_to_token_ids(sentence, vocabulary, tokenizer=None, normalize_digits
 
     Returns
     --------
-    - A list of integers, the token-ids for the sentence.
+    list of int
+        The token-ids for the sentence.
 
     """
     if tokenizer:
diff --git a/tensorlayer/prepro.py b/tensorlayer/prepro.py
index 9aa4d81c7..b7aeb2385 100644
--- a/tensorlayer/prepro.py
+++ b/tensorlayer/prepro.py
@@ -27,7 +27,7 @@
 
 
 def threading_data(data=None, fn=None, thread_count=None, **kwargs):
-    """Return a batch of result by given data.
+    """Process a batch of data by given function by threading.
 
     Usually be used for data augmentation.
 
@@ -58,7 +58,7 @@ def threading_data(data=None, fn=None, thread_count=None, **kwargs):
     ...     return x
     >>> images = tl.prepro.threading_data(images, distort_img)
 
-    Process images and masks together.
+    Process images and masks together (Usually be used for image segmentation).
 
     >>> X, Y --> [batch_size, row, col, 1]
     >>> data = tl.prepro.threading_data([_ for _ in zip(X, Y)], tl.prepro.zoom_multi, zoom_range=[0.5, 1], is_random=True)
@@ -68,7 +68,7 @@ def threading_data(data=None, fn=None, thread_count=None, **kwargs):
     >>> tl.vis.save_image(X_, 'images.png')
     >>> tl.vis.save_image(Y_, 'masks.png')
 
-    Process images and masks together, using ``thread_count`` threads.
+    Process images and masks together by using ``thread_count``.
 
     >>> X, Y --> [batch_size, row, col, 1]
     >>> data = tl.prepro.threading_data(X, tl.prepro.zoom_multi, 8, zoom_range=[0.5, 1], is_random=True)
@@ -82,14 +82,19 @@ def threading_data(data=None, fn=None, thread_count=None, **kwargs):
 
     >>> def distort_img(data):
     ...     x, y = data
-    ...     x, y = flip_axis_multi([x, y], axis=0, is_random=True)
-    ...     x, y = flip_axis_multi([x, y], axis=1, is_random=True)
-    ...     x, y = crop_multi([x, y], 100, 100, is_random=True)
+    ...     x, y = tl.prepro.flip_axis_multi([x, y], axis=0, is_random=True)
+    ...     x, y = tl.prepro.flip_axis_multi([x, y], axis=1, is_random=True)
+    ...     x, y = tl.prepro.crop_multi([x, y], 100, 100, is_random=True)
     ...     return x, y
     >>> X, Y --> [batch_size, row, col, channel]
     >>> data = tl.prepro.threading_data([_ for _ in zip(X, Y)], distort_img)
     >>> X_, Y_ = data.transpose((1,0,2,3,4))
 
+    Returns
+    -------
+    list or numpyarray
+        The processed results.
+
     References
     ----------
     - `python queue <https://pymotw.com/2/Queue/index.html#module-Queue>`__
@@ -152,12 +157,17 @@ def rotation(x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=
         Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
     fill_mode : str
         Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
+            - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
     cval : float
         Value used for points outside the boundaries of the input if mode=`constant`. Default is 0.0
     order : int
         The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.apply_transform``.
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
+            - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
+
+    Returns
+    -------
+    numpy.array
+        A processed image.
 
     Examples
     ---------
@@ -189,6 +199,11 @@ def rotation_multi(x, rg=20, is_random=False, row_index=0, col_index=1, channel_
     others : args
         See ``tl.prepro.rotation``.
 
+    Returns
+    -------
+    numpy.array
+        A list of processed images.
+
     Examples
     --------
     >>> x, y --> [row, col, 1]  greyscale
@@ -226,6 +241,10 @@ def crop(x, wrg, hrg, is_random=False, row_index=0, col_index=1, channel_index=2
     row_index col_index and channel_index : int
         Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
 
+    Returns
+    -------
+    numpy.array
+        A processed image.
     """
     h, w = x.shape[row_index], x.shape[col_index]
     assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image"
@@ -258,6 +277,10 @@ def crop_multi(x, wrg, hrg, is_random=False, row_index=0, col_index=1, channel_i
     others : args
         See ``tl.prepro.crop``.
 
+    Returns
+    -------
+    numpy.array
+        A list of processed images.
     """
     h, w = x[0].shape[row_index], x[0].shape[col_index]
     assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image"
@@ -288,12 +311,16 @@ def flip_axis(x, axis=1, is_random=False):
         An image with dimension of [row, col, channel] (default).
     axis : int
         Which axis to flip.
-        - 0, flip up and down
-        - 1, flip left and right
-        - 2, flip channel
+            - 0, flip up and down
+            - 1, flip left and right
+            - 2, flip channel
     is_random : boolean
         If True, randomly flip. Default is False.
 
+    Returns
+    -------
+    numpy.array
+        A processed image.
     """
     if is_random:
         factor = np.random.uniform(-1, 1)
@@ -321,6 +348,10 @@ def flip_axis_multi(x, axis, is_random=False):
     others : args
         See ``tl.prepro.flip_axis``.
 
+    Returns
+    -------
+    numpy.array
+        A list of processed images.
     """
     if is_random:
         factor = np.random.uniform(-1, 1)
@@ -370,13 +401,17 @@ def shift(x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channe
         Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
     fill_mode : str
         Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`.
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
+            - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
     cval : float
         Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0.
     order : int
         The order of interpolation. The order has to be in the range 0-5. See ``apply_transform``.
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
+            - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
 
+    Returns
+    -------
+    numpy.array
+        A processed image.
     """
     h, w = x.shape[row_index], x.shape[col_index]
     if is_random:
@@ -402,6 +437,10 @@ def shift_multi(x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1,
     others : args
         See ``tl.prepro.shift``.
 
+    Returns
+    -------
+    numpy.array
+        A list of processed images.
     """
     h, w = x[0].shape[row_index], x[0].shape[col_index]
     if is_random:
@@ -435,12 +474,17 @@ def shear(x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_i
         Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
     fill_mode : str
         Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`.
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
+            - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
     cval : float
         Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0.
     order : int
         The order of interpolation. The order has to be in the range 0-5. See ``apply_transform``.
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
+            - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
+
+    Returns
+    -------
+    numpy.array
+        A processed image.
 
     References
     -----------
@@ -470,6 +514,10 @@ def shear_multi(x, intensity=0.1, is_random=False, row_index=0, col_index=1, cha
     others : args
         See ``tl.prepro.shear``.
 
+    Returns
+    -------
+    numpy.array
+        A list of processed images.
     """
     if is_random:
         shear = np.random.uniform(-intensity, intensity)
@@ -494,18 +542,23 @@ def shear2(x, shear=(0.1, 0.1), is_random=False, row_index=0, col_index=1, chann
         An image with dimension of [row, col, channel] (default).
     shear : tuple of two floats
         Percentage of shear for height and width direction (0, 1).
-    is_random : boolean,
+    is_random : boolean
         If True, randomly shear. Default is False.
     row_index col_index and channel_index : int
         Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
     fill_mode : str
         Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`.
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
+            - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
     cval : float
         Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0.
     order : int
         The order of interpolation. The order has to be in the range 0-5. See ``apply_transform``.
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
+            - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
+
+    Returns
+    -------
+    numpy.array
+        A processed image.
 
     References
     -----------
@@ -536,6 +589,10 @@ def shear_multi2(x, shear=(0.1, 0.1), is_random=False, row_index=0, col_index=1,
     others : args
         See ``tl.prepro.shear2``.
 
+    Returns
+    -------
+    numpy.array
+        A list of processed images.
     """
     assert len(shear) == 2, "shear should be tuple of 2 floats, or you want to use tl.prepro.shear_multi rather than tl.prepro.shear_multi2 ?"
     if is_random:
@@ -595,10 +652,15 @@ def swirl(x,
         Whether to keep the original range of values. Otherwise, the input image is converted according to the conventions of img_as_float.
     is_random : boolean,
         If True, random swirl. Default is False.
-        - random center = [(0 ~ x.shape[0]), (0 ~ x.shape[1])]
-        - random strength = [0, strength]
-        - random radius = [1e-10, radius]
-        - random rotation = [-rotation, rotation]
+            - random center = [(0 ~ x.shape[0]), (0 ~ x.shape[1])]
+            - random strength = [0, strength]
+            - random radius = [1e-10, radius]
+            - random rotation = [-rotation, rotation]
+
+    Returns
+    -------
+    numpy.array
+        A processed image.
 
     Examples
     ---------
@@ -658,6 +720,10 @@ def swirl_multi(x,
     others : args
         See ``tl.prepro.swirl``.
 
+    Returns
+    -------
+    numpy.array
+        A list of processed images.
     """
     assert radius != 0, Exception("Invalid radius value")
     rotation = np.pi / 180 * rotation
@@ -713,6 +779,11 @@ def elastic_transform(x, alpha, sigma, mode="constant", cval=0, is_random=False)
     is_random : boolean
         Default is False.
 
+    Returns
+    -------
+    numpy.array
+        A processed image.
+
     Examples
     ---------
     >>> x = tl.prepro.elastic_transform(x, alpha=x.shape[1]*3, sigma=x.shape[1]*0.07)
@@ -759,6 +830,10 @@ def elastic_transform_multi(x, alpha, sigma, mode="constant", cval=0, is_random=
     others : args
         See ``tl.prepro.elastic_transform``.
 
+    Returns
+    -------
+    numpy.array
+        A list of processed images.
     """
     if is_random is False:
         random_state = np.random.RandomState(None)
@@ -803,21 +878,25 @@ def zoom(x, zoom_range=(0.9, 1.1), is_random=False, row_index=0, col_index=1, ch
         An image with dimension of [row, col, channel] (default).
     zoom_range : list or tuple
         Zoom range for height and width.
-        - If is_random=False, (h, w) are the fixed zoom factor for row and column axies, factor small than one is zoom in.
-        - If is_random=True, (h, w) are (min zoom out, max zoom out) for x and y with different random zoom in/out factor, e.g (0.5, 1) zoom in 1~2 times.
+            - If is_random=False, (h, w) are the fixed zoom factor for row and column axies, factor small than one is zoom in.
+            - If is_random=True, (h, w) are (min zoom out, max zoom out) for x and y with different random zoom in/out factor, e.g (0.5, 1) zoom in 1~2 times.
     is_random : boolean
         If True, randomly zoom. Default is False.
     row_index col_index and channel_index : int
         Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
     fill_mode : str
         Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`.
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
+            - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
     cval : float
         Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0.
     order : int
         The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.apply_transform``.
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
+            - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
 
+    Returns
+    -------
+    numpy.array
+        A processed image.
     """
     if len(zoom_range) != 2:
         raise Exception('zoom_range should be a tuple or list of two floats. ' 'Received arg: ', zoom_range)
@@ -849,6 +928,10 @@ def zoom_multi(x, zoom_range=(0.9, 1.1), is_random=False, row_index=0, col_index
     others : args
         See ``tl.prepro.zoom``.
 
+    Returns
+    -------
+    numpy.array
+        A list of processed images.
     """
     if len(zoom_range) != 2:
         raise Exception('zoom_range should be a tuple or list of two floats. ' 'Received arg: ', zoom_range)
@@ -889,12 +972,17 @@ def brightness(x, gamma=1, gain=1, is_random=False):
         An image with dimension of [row, col, channel] (default).
     gamma : float
         Non negative real number. Default value is 1.
-        - Small than 1 means brighter.
-        - If `is_random` is True, gamma in a range of (1-gamma, 1+gamma).
+            - Small than 1 means brighter.
+            - If `is_random` is True, gamma in a range of (1-gamma, 1+gamma).
     gain : float
         The constant multiplier. Default value is 1.
     is_random : boolean
-        - If True, randomly change brightness. Default is False.
+        If True, randomly change brightness. Default is False.
+
+    Returns
+    -------
+    numpy.array
+        A processed image.
 
     References
     -----------
@@ -919,6 +1007,10 @@ def brightness_multi(x, gamma=1, gain=1, is_random=False):
     others : args
         See ``tl.prepro.brightness``.
 
+    Returns
+    -------
+    numpy.array
+        A list of processed images.
     """
     if is_random:
         gamma = np.random.uniform(1 - gamma, 1 + gamma)
@@ -938,18 +1030,23 @@ def illumination(x, gamma=1., contrast=1., saturation=1., is_random=False):
         An image with dimension of [row, col, channel] (default).
     gamma : float
         Change brightness (the same with ``tl.prepro.brightness``)
-        - if is_random=False, one float number, small than one means brighter, greater than one means darker.
-        - if is_random=True, tuple of two float numbers, (min, max).
+            - if is_random=False, one float number, small than one means brighter, greater than one means darker.
+            - if is_random=True, tuple of two float numbers, (min, max).
     contrast : float
         Change contrast.
-        - if is_random=False, one float number, small than one means blur.
-        - if is_random=True, tuple of two float numbers, (min, max).
+            - if is_random=False, one float number, small than one means blur.
+            - if is_random=True, tuple of two float numbers, (min, max).
     saturation : float
         Change saturation.
-        - if is_random=False, one float number, small than one means unsaturation.
-        - if is_random=True, tuple of two float numbers, (min, max).
+            - if is_random=False, one float number, small than one means unsaturation.
+            - if is_random=True, tuple of two float numbers, (min, max).
     is_random : boolean
-        Whether the parameters are randomly set.
+        If True, randomly change illumination. Default is False.
+
+    Returns
+    -------
+    numpy.array
+        A processed image.
 
     Examples
     ---------
@@ -1008,6 +1105,10 @@ def rgb_to_hsv(rgb):
     rgb : numpy.array
         An image with values between 0 and 255.
 
+    Returns
+    -------
+    numpy.array
+        A processed image.
     """
     # Translated from source of colorsys.rgb_to_hsv
     # r,g,b should be a numpy arrays with values between 0 and 255
@@ -1041,6 +1142,10 @@ def hsv_to_rgb(hsv):
     hsv : numpy.array
         An image with values between 0.0 and 1.0
 
+    Returns
+    -------
+    numpy.array
+        A processed image.
     """
     # Translated from source of colorsys.hsv_to_rgb
     # h,s should be a numpy arrays with values between 0.0 and 1.0
@@ -1073,14 +1178,20 @@ def adjust_hue(im, hout=0.66, is_offset=True, is_clip=True, is_random=False):
     im : numpy.array
         An image with values between 0 and 255.
     hout : float
-        - If is_offset is False, set all hue values to this value. 0 is red; 0.33 is green; 0.66 is blue.
-        - If is_offset is True, add this value as the offset to the hue channel.
+        The scale value for adjusting hue.
+            - If is_offset is False, set all hue values to this value. 0 is red; 0.33 is green; 0.66 is blue.
+            - If is_offset is True, add this value as the offset to the hue channel.
     is_offset : boolean
         Whether `hout` is added on HSV as offset or not. Default is True.
     is_clip : boolean
         If HSV value smaller than 0, set to 0. Default is True.
     is_random : boolean
-        Whether change HSV randomly. Default is False.
+        If True, randomly change hue. Default is False.
+
+    Returns
+    -------
+    numpy.array
+        A processed image.
 
     Examples
     ---------
@@ -1142,6 +1253,11 @@ def imresize(x, size=[100, 100], interp='bicubic', mode=None):
     mode : str
         The PIL image mode (`P`, `L`, etc.) to convert arr before resizing.
 
+    Returns
+    -------
+    numpy.array
+        A processed image.
+
     References
     ------------
     - `scipy.misc.imresize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.misc.imresize.html>`__
@@ -1167,9 +1283,14 @@ def pixel_value_scale(im, val=0.9, clip=[], is_random=False):
     im : numpy.array
         An image.
     val : float
-        Degree of value change.
-        - If is_random=False, multiply this value with all pixels.
-        - If is_random=True, multiply a value between [1-val, 1+val] with all pixels.
+        The scale value for changing pixel value.
+            - If is_random=False, multiply this value with all pixels.
+            - If is_random=True, multiply a value between [1-val, 1+val] with all pixels.
+
+    Returns
+    -------
+    numpy.array
+        A processed image.
 
     Examples
     ----------
@@ -1211,6 +1332,11 @@ def samplewise_norm(x, rescale=None, samplewise_center=False, samplewise_std_nor
     epsilon : float
         A small position value for dividing standard deviation.
 
+    Returns
+    -------
+    numpy.array
+        A processed image.
+
     Examples
     --------
     >>> x = samplewise_norm(x, samplewise_center=True, samplewise_std_normalization=True)
@@ -1260,6 +1386,11 @@ def featurewise_norm(x, mean=None, std=None, epsilon=1e-7):
     epsilon : float
         A small position value for dividing standard deviation.
 
+    Returns
+    -------
+    numpy.array
+        A processed image.
+
     """
     if mean:
         x = x - mean
@@ -1277,6 +1408,11 @@ def get_zca_whitening_principal_components_img(X):
     x : numpy.array
         Batch of images with dimension of [n_example, row, col, channel] (default).
 
+    Returns
+    -------
+    numpy.array
+        A processed image.
+
     """
     flatX = np.reshape(X, (X.shape[0], X.shape[1] * X.shape[2] * X.shape[3]))
     logging.info("zca : computing sigma ..")
@@ -1298,6 +1434,11 @@ def zca_whitening(x, principal_components):
     principal_components : matrix
         Matrix from ``get_zca_whitening_principal_components_img``.
 
+    Returns
+    -------
+    numpy.array
+        A processed image.
+
     """
     flatx = np.reshape(x, (x.size))
     # logging.info(principal_components.shape, x.shape)  # ((28160, 28160), (160, 176, 1))
@@ -1336,6 +1477,11 @@ def channel_shift(x, intensity, is_random=False, channel_index=2):
     channel_index : int
         Index of channel. Default is 2.
 
+    Returns
+    -------
+    numpy.array
+        A processed image.
+
     """
     if is_random:
         factor = np.random.uniform(-intensity, intensity)
@@ -1367,6 +1513,10 @@ def channel_shift_multi(x, intensity, is_random=False, channel_index=2):
     others : args
         See ``tl.prepro.channel_shift``.
 
+    Returns
+    -------
+    numpy.array
+        A list of processed images.
     """
     if is_random:
         factor = np.random.uniform(-intensity, intensity)
@@ -1395,6 +1545,10 @@ def drop(x, keep=0.5):
     keep : float
         The keeping probability (0, 1), the lower more values will be set to zero.
 
+    Returns
+    -------
+    numpy.array
+        A processed image.
     """
     if len(x.shape) == 3:
         if x.shape[-1] == 3:  # color
@@ -1437,6 +1591,11 @@ def transform_matrix_offset_center(matrix, x, y):
     x and y : 2 int
         Size of image.
 
+    Returns
+    -------
+    numpy.array
+        The transform matrix.
+
     Examples
     --------
     - See ``tl.prepro.rotation``, ``tl.prepro.shear``, ``tl.prepro.zoom``.
@@ -1451,7 +1610,7 @@ def transform_matrix_offset_center(matrix, x, y):
 
 
 def apply_transform(x, transform_matrix, channel_index=2, fill_mode='nearest', cval=0., order=1):
-    """Return transformed images by given transform_matrix from ``transform_matrix_offset_center``.
+    """Return transformed images by given ``transform_matrix`` from ``transform_matrix_offset_center``.
 
     Parameters
     ----------
@@ -1463,18 +1622,23 @@ def apply_transform(x, transform_matrix, channel_index=2, fill_mode='nearest', c
         Index of channel, default 2.
     fill_mode : str
         Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
+            - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
     cval : float
         Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0
     order : int
         The order of interpolation. The order has to be in the range 0-5:
-        - 0 Nearest-neighbor
-        - 1 Bi-linear (default)
-        - 2 Bi-quadratic
-        - 3 Bi-cubic
-        - 4 Bi-quartic
-        - 5 Bi-quintic
-        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
+            - 0 Nearest-neighbor
+            - 1 Bi-linear (default)
+            - 2 Bi-quadratic
+            - 3 Bi-cubic
+            - 4 Bi-quartic
+            - 5 Bi-quintic
+            - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
+
+    Returns
+    -------
+    numpy.array
+        A processed image.
 
     Examples
     --------
@@ -1511,12 +1675,12 @@ def projective_transform_by_points(x, src, dst, map_args={}, output_shape=None,
         Shape of the output image generated. By default the shape of the input image is preserved. Note that, even for multi-band images, only rows and columns need to be specified.
     order : int
         The order of interpolation. The order has to be in the range 0-5:
-        - 0 Nearest-neighbor
-        - 1 Bi-linear (default)
-        - 2 Bi-quadratic
-        - 3 Bi-cubic
-        - 4 Bi-quartic
-        - 5 Bi-quintic
+            - 0 Nearest-neighbor
+            - 1 Bi-linear (default)
+            - 2 Bi-quadratic
+            - 3 Bi-cubic
+            - 4 Bi-quartic
+            - 5 Bi-quintic
     mode : str
         One of `constant` (default), `edge`, `symmetric`, `reflect` or `wrap`.
         Points outside the boundaries of the input are filled according to the given mode. Modes match the behaviour of numpy.pad.
@@ -1527,6 +1691,11 @@ def projective_transform_by_points(x, src, dst, map_args={}, output_shape=None,
     preserve_range : boolean
         Whether to keep the original range of values. Otherwise, the input image is converted according to the conventions of img_as_float.
 
+    Returns
+    -------
+    numpy.array
+        A processed image.
+
     Examples
     --------
     Assume X is an image from CIFAR-10, i.e. shape == (32, 32, 3)
@@ -1567,6 +1736,11 @@ def array_to_img(x, dim_ordering=(0, 1, 2), scale=True):
     scale : boolean
         If True, converts image to [0, 255] from any range of value like [-1, 2]. Default is True.
 
+    Returns
+    -------
+    PIL.image
+        An image.
+
     References
     -----------
     - `PIL Image.fromarray <http://pillow.readthedocs.io/en/3.1.x/reference/Image.html?highlight=fromarray>`__
@@ -1611,6 +1785,10 @@ def find_contours(x, level=0.8, fully_connected='low', positive_orientation='low
     positive_orientation : str
         Either `low` or `high`. Indicates whether the output contours will produce positively-oriented polygons around islands of low- or high-valued elements. If `low` then contours will wind counter-clockwise around elements below the iso-value. Alternately, this means that low-valued elements are always on the left of the contour.
 
+    Returns
+    --------
+    list of (n,2)-ndarrays
+        Each contour is an ndarray of shape (n, 2), consisting of n (row, column) coordinates along the contour.
     """
     return skimage.measure.find_contours(x, level, fully_connected='low', positive_orientation='low')
 
@@ -1627,6 +1805,10 @@ def pt2map(list_points=[], size=(100, 100), val=1):
     val : float or int
         For the contour value.
 
+    Returns
+    -------
+    numpy.array
+        An image.
     """
     i_m = np.zeros(size)
     if list_points == []:
@@ -1645,10 +1827,14 @@ def binary_dilation(x, radius=3):
     Parameters
     -----------
     x : 2D array
-        An image.
+        A binary image.
     radius : int
         For the radius of mask.
 
+    Returns
+    -------
+    numpy.array
+        A processed binary image.
     """
     from skimage.morphology import disk, binary_dilation
     mask = disk(radius)
@@ -1663,10 +1849,14 @@ def dilation(x, radius=3):
     Parameters
     -----------
     x : 2D array
-        An image.
+        An greyscale image.
     radius : int
         For the radius of mask.
 
+    Returns
+    -------
+    numpy.array
+        A processed greyscale image.
     """
     from skimage.morphology import disk, dilation
     mask = disk(radius)
@@ -1681,10 +1871,14 @@ def binary_erosion(x, radius=3):
     Parameters
     -----------
     x : 2D array
-        An image.
+        A binary image.
     radius : int
         For the radius of mask.
 
+    Returns
+    -------
+    numpy.array
+        A processed binary image.
     """
     from skimage.morphology import disk, dilation, binary_erosion
     mask = disk(radius)
@@ -1699,10 +1893,14 @@ def erosion(x, radius=3):
     Parameters
     -----------
     x : 2D array
-        An image.
+        A greyscale image.
     radius : int
         For the radius of mask.
 
+    Returns
+    -------
+    numpy.array
+        A processed greyscale image.
     """
     from skimage.morphology import disk, dilation, erosion
     mask = disk(radius)
@@ -1720,6 +1918,12 @@ def obj_box_coords_rescale(coords=[], shape=[100, 200]):
     shape : list of 2 int
         【height, width].
 
+    Returns
+    -------
+    list of list of 4 numbers
+        A list of new bounding boxes.
+
+
     Examples
     ---------
     >>> coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50], [10, 10, 20, 20]], shape=[100, 100])
@@ -1732,6 +1936,10 @@ def obj_box_coords_rescale(coords=[], shape=[100, 200]):
     >>> print(coords)
     ... [[0.15, 0.4, 0.25, 0.5]]
 
+    Returns
+    -------
+    list of 4 numbers
+        New coordinates.
     """
     imh, imw = shape[0], shape[1]
     imh = imh * 1.0  # * 1.0 for python2 : force division to be float point
@@ -1758,10 +1966,15 @@ def obj_box_coord_rescale(coord=[], shape=[100, 200]):
     shape : list of 2 int
         For [height, width].
 
+    Returns
+    -------
+    list of 4 numbers
+        New bounding box.
+
     Examples
     ---------
-    >>> coord = obj_box_coord_rescale(coord=[30, 40, 50, 50], shape=[100, 100])
-    ... [[0.3, 0.4, 0.5, 0.5]]
+    >>> coord = tl.prepro.obj_box_coord_rescale(coord=[30, 40, 50, 50], shape=[100, 100])
+    ... [0.3, 0.4, 0.5, 0.5]
 
     """
     return obj_box_coords_rescale(coords=[coord], shape=shape)[0]
@@ -1778,10 +1991,15 @@ def obj_box_coord_scale_to_pixelunit(coord, shape=(100, 100)):
     shape : tuple of 2
         For [height, width].
 
+    Returns
+    -------
+    list of 4 numbers
+        New bounding box.
+
     Examples
     ---------
-    >>> x, y, x2, y2 = obj_box_coord_scale_to_pixelunit([0.2, 0.3, 0.5, 0.7], shape=(100, 200, 3))
-    ... (40, 30, 100, 70)
+    >>> x, y, x2, y2 = tl.prepro.obj_box_coord_scale_to_pixelunit([0.2, 0.3, 0.5, 0.7], shape=(100, 200, 3))
+    ... [40, 30, 100, 70]
 
     """
     imh, imw = shape[0:2]
@@ -1814,6 +2032,11 @@ def obj_box_coord_centroid_to_upleft_butright(coord, to_int=False):
     to_int : boolean
         Whether to convert output as integer.
 
+    Returns
+    -------
+    list of 4 numbers
+        New bounding box.
+
     Examples
     ---------
     >>> coord = obj_box_coord_centroid_to_upleft_butright([30, 40, 20, 20])
@@ -1846,6 +2069,11 @@ def obj_box_coord_upleft_butright_to_centroid(coord):
     coord : list of 4 int/float
         One coordinate.
 
+    Returns
+    -------
+    list of 4 numbers
+        New bounding box.
+
     """
     assert len(coord) == 4, "coordinate should be 4 values : [x1, y1, x2, y2]"
     x1, y1, x2, y2 = coord
@@ -1865,6 +2093,11 @@ def obj_box_coord_centroid_to_upleft(coord):
     coord : list of 4 int/float
         One coordinate.
 
+    Returns
+    -------
+    list of 4 numbers
+        New bounding box.
+
     """
     assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]"
     x_center, y_center, w, h = coord
@@ -1882,6 +2115,10 @@ def obj_box_coord_upleft_to_centroid(coord):
     coord : list of 4 int/float
         One coordinate.
 
+    Returns
+    -------
+    list of 4 numbers
+        New bounding box.
     """
     assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]"
     x, y, w, h = coord
@@ -1891,13 +2128,17 @@ def obj_box_coord_upleft_to_centroid(coord):
 
 
 def parse_darknet_ann_str_to_list(annotations):
-    r"""Input string format of class, x, y, w, h, return list of list format.
+    """Input string format of class, x, y, w, h, return list of list format.
 
     Parameters
     -----------
     annotations : str
         The annotations in darkent format "class, x, y, w, h ...." seperated by "\\n".
 
+    Returns
+    -------
+    list of list of 4 numbers
+        List of bounding box.
     """
     annotations = annotations.split("\n")
     ann = []
@@ -1914,7 +2155,23 @@ def parse_darknet_ann_str_to_list(annotations):
 
 
 def parse_darknet_ann_list_to_cls_box(annotations):
-    """Input list of [[class, x, y, w, h], ...], return two list of [class ...] and [[x, y, w, h], ...]."""
+    """Parse darknet annotation format into two lists for class and bounding box.
+
+    Input list of [[class, x, y, w, h], ...], return two list of [class ...] and [[x, y, w, h], ...].
+
+    Parameters
+    ------------
+    annotations : list of list
+        A list of class and bounding boxes of images e.g. [[class, x, y, w, h], ...]
+
+    Returns
+    -------
+    list of int
+        List of class labels.
+
+    list of list of 4 numbers
+        List of bounding box.
+    """
     class_list = []
     bbox_list = []
     for i in range(len(annotations)):
@@ -1939,6 +2196,13 @@ def obj_box_left_right_flip(im, coords=[], is_rescale=False, is_center=False, is
     is_random : boolean
         If True, randomly flip. Default is False.
 
+    Returns
+    -------
+    numpy.array
+        A processed image
+    list of list of 4 numbers
+        A list of new bounding boxes.
+
     Examples
     --------
     >>> im = np.zeros([80, 100])    # as an image with shape width=100, height=80
@@ -2020,6 +2284,13 @@ def obj_box_imresize(im, coords=[], size=[100, 100], interp='bicubic', mode=None
     is_rescale : boolean
         Set to True, if the input coordinates are rescaled to [0, 1], then return the original coordinates. Default is False.
 
+    Returns
+    -------
+    numpy.array
+        A processed image
+    list of list of 4 numbers
+        A list of new bounding boxes.
+
     Examples
     --------
     >>> im = np.zeros([80, 100, 3])    # as an image with shape width=100, height=80
@@ -2028,13 +2299,13 @@ def obj_box_imresize(im, coords=[], size=[100, 100], interp='bicubic', mode=None
     ... [[40, 80, 60, 60], [20, 40, 40, 40]]
     >>> _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30]], size=[40, 100], is_rescale=False)
     >>> print(coords)
-    ... [20, 20, 30, 15]
+    ... [[20, 20, 30, 15]]
     >>> _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30]], size=[60, 150], is_rescale=False)
     >>> print(coords)
-    ... [30, 30, 45, 22]
+    ... [[30, 30, 45, 22]]
     >>> im2, coords = obj_box_imresize(im, coords=[[0.2, 0.4, 0.3, 0.3]], size=[160, 200], is_rescale=True)
     >>> print(coords, im2.shape)
-    ... [0.2, 0.4, 0.3, 0.3] (160, 200, 3)
+    ... [[0.2, 0.4, 0.3, 0.3]] (160, 200, 3)
 
     """
     imh, imw = im.shape[0:2]
@@ -2100,6 +2371,14 @@ def obj_box_crop(im, classes=[], coords=[], wrg=100, hrg=100, is_rescale=False,
     thresh_wh2 : float
         Threshold, remove the box if its ratio of width to height or vice verse higher than the threshold.
 
+    Returns
+    -------
+    numpy.array
+        A processed image
+    list of int
+        A list of classes
+    list of list of 4 numbers
+        A list of new bounding boxes.
     """
     h, w = im.shape[0], im.shape[1]
     assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image"
@@ -2243,6 +2522,15 @@ def obj_box_shift(im,
     thresh_wh2 : float
         Threshold, remove the box if its ratio of width to height or vice verse higher than the threshold.
 
+
+    Returns
+    -------
+    numpy.array
+        A processed image
+    list of int
+        A list of classes
+    list of list of 4 numbers
+        A list of new bounding boxes.
     """
     imh, imw = im.shape[row_index], im.shape[col_index]
     assert (hrg < 1.0) and (hrg > 0.) and (wrg < 1.0) and (wrg > 0.), "shift range should be (0, 1)"
@@ -2367,6 +2655,14 @@ def obj_box_zoom(im,
     thresh_wh2 : float
         Threshold, remove the box if its ratio of width to height or vice verse higher than the threshold.
 
+    Returns
+    -------
+    numpy.array
+        A processed image
+    list of int
+        A list of classes
+    list of list of 4 numbers
+        A list of new bounding boxes.
     """
     if len(zoom_range) != 2:
         raise Exception('zoom_range should be a tuple or list of two floats. ' 'Received arg: ', zoom_range)
@@ -2549,6 +2845,11 @@ def remove_pad_sequences(sequences, pad_id=0):
     pad_id : int
         The pad ID.
 
+    Returns
+    ----------
+    list of list of int
+        The processed sequences.
+
     Examples
     ----------
     >>> sequences = [[2,3,4,0,0], [5,1,2,3,4,0,0,0], [4,5,0,2,4,0,0,0]]
@@ -2586,6 +2887,11 @@ def process_sequences(sequences, end_id=0, pad_val=0, is_shorten=True, remain_en
     remain_end_id : boolean
         Keep an `end_id` in the end. Default is False.
 
+    Returns
+    ----------
+    list of list of int
+        The processed sequences.
+
     Examples
     ---------
     >>> sentences_ids = [[4, 3, 5, 3, 2, 2, 2, 2],  <-- end_id is 2
@@ -2627,6 +2933,11 @@ def sequences_add_start_id(sequences, start_id=0, remove_last=False):
     remove_last : boolean
         Remove the last value of each sequences. Usually be used for removing the end ID.
 
+    Returns
+    ----------
+    list of list of int
+        The processed sequences.
+
     Examples
     ---------
     >>> sentences_ids = [[4,3,5,3,2,2,2,2], [5,3,9,4,9,2,2,3]]
@@ -2661,6 +2972,11 @@ def sequences_add_end_id(sequences, end_id=888):
     end_id : int
         The end ID.
 
+    Returns
+    ----------
+    list of list of int
+        The processed sequences.
+
     Examples
     ---------
     >>> sequences = [[1,2,3],[4,5,6,7]]
@@ -2686,6 +3002,11 @@ def sequences_add_end_id_after_pad(sequences, end_id=888, pad_id=0):
     pad_id : int
         The pad ID.
 
+    Returns
+    ----------
+    list of list of int
+        The processed sequences.
+
     Examples
     ---------
     >>> sequences = [[1,2,0,0], [1,2,3,0], [1,2,3,4]]
@@ -2726,6 +3047,11 @@ def sequences_get_mask(sequences, pad_val=0):
     pad_val : int
         The pad value.
 
+    Returns
+    ----------
+    list of list of int
+        The mask.
+
     Examples
     ---------
     >>> sentences_ids = [[4, 0, 5, 3, 0, 0],
diff --git a/tensorlayer/rein.py b/tensorlayer/rein.py
index ee1fd7bca..ad500a1f0 100644
--- a/tensorlayer/rein.py
+++ b/tensorlayer/rein.py
@@ -18,8 +18,13 @@ def discount_episode_rewards(rewards=[], gamma=0.99, mode=0):
         Discounted factor
     mode : int
         Mode for computing the discount rewards.
-        - If mode == 0, reset the discount process when encount a non-zero reward (Ping-pong game).
-        - If mode == 1, would not reset the discount process.
+            - If mode == 0, reset the discount process when encount a non-zero reward (Ping-pong game).
+            - If mode == 1, would not reset the discount process.
+
+    Returns
+    --------
+    list of float
+        The discounted rewards.
 
     Examples
     ----------
@@ -58,6 +63,11 @@ def cross_entropy_reward_loss(logits, actions, rewards, name=None):
     rewards : tensor or placeholder
         The rewards.
 
+    Returns
+    --------
+    Tensor
+        The TensorFlow loss function.
+
     Examples
     ----------
     >>> states_batch_pl = tf.placeholder(tf.float32, shape=[None, D])
@@ -95,6 +105,10 @@ def log_weight(probs, weights, name='log_weight'):
     weights : tensor
         The weights.
 
+    Returns
+    --------
+    Tensor
+        The Tensor after appling the log weighted expression.
     """
     with tf.variable_scope(name):
         exp_v = tf.reduce_mean(tf.log(probs) * weights)
@@ -109,8 +123,12 @@ def choice_action_by_probs(probs=[0.5, 0.5], action_list=None):
     probs : list of float.
         The probability distribution of all actions.
     action_list : None or a list of int or others
-        A list of action in integer, string or others.
-        - If None, returns an integer range between 0 and len(probs)-1.
+        A list of action in integer, string or others. If None, returns an integer range between 0 and len(probs)-1.
+
+    Returns
+    --------
+    float int or str
+        The chosen action.
 
     Examples
     ----------
diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_ops.py b/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_ops.py
index da963d34a..be971c337 100644
--- a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_ops.py
+++ b/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_ops.py
@@ -8,6 +8,7 @@
 lib_path = os.path.join(module_dir, 'roi_pooling.so')
 roi_pooling_module = tf.load_op_library(lib_path)
 
+
 def roi_pooling(input, rois, pool_height, pool_width):
     """
       returns a tensorflow operation for computing the Region of Interest Pooling
@@ -30,10 +31,8 @@ def _RoiPoolingGrad(op, *grads):
     orig_argmax_output = op.outputs[1]
 
     orig_output_grad = grads[0]
-    output_grad = roi_pooling_module.roi_pooling_grad(orig_inputs, orig_rois, orig_output,
-                                                      orig_argmax_output, orig_output_grad,
-                                                      pool_height=op.get_attr('pool_height'),
-                                                      pool_width=op.get_attr('pool_width'))
+    output_grad = roi_pooling_module.roi_pooling_grad(
+        orig_inputs, orig_rois, orig_output, orig_argmax_output, orig_output_grad, pool_height=op.get_attr('pool_height'), pool_width=op.get_attr('pool_width'))
     return [output_grad, None]
 
 
@@ -48,5 +47,4 @@ def _RoiPoolingShape(op):
     pool_width = op.get_attr('pool_width')
 
     #TODO: check the width/hegiht order
-    return [tf.TensorShape([n_rois, n_channels, pool_width, pool_height]),
-            tf.TensorShape(None)]
+    return [tf.TensorShape([n_rois, n_channels, pool_width, pool_height]), tf.TensorShape(None)]
diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_test.py b/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_test.py
index 763662db7..952e556ad 100644
--- a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_test.py
+++ b/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_test.py
@@ -9,21 +9,10 @@ class RoiPoolingTest(tf.test.TestCase):
 
     def test_roi_pooling_grad(self):
         # TODO(maciek): corner cases
-        input_value = [[
-            [[1], [2], [4], [4]],
-            [[3], [4], [1], [2]],
-            [[6], [2], [1], [7.0]],
-            [[1], [3], [2], [8]]
-        ]]
+        input_value = [[[[1], [2], [4], [4]], [[3], [4], [1], [2]], [[6], [2], [1], [7.0]], [[1], [3], [2], [8]]]]
         input_value = np.asarray(input_value, dtype='float32')
 
-        rois_value = [
-            [0, 0, 0, 1, 1],
-            [0, 1, 1, 2, 2],
-            [0, 2, 2, 3, 3],
-            [0, 0, 0, 2, 2],
-            [0, 0, 0, 3, 3]
-        ]
+        rois_value = [[0, 0, 0, 1, 1], [0, 1, 1, 2, 2], [0, 2, 2, 3, 3], [0, 0, 0, 2, 2], [0, 0, 0, 3, 3]]
         rois_value = np.asarray(rois_value, dtype='int32')
 
         with tf.Session(''):
@@ -33,11 +22,9 @@ def test_roi_pooling_grad(self):
             y = roi_pooling(input_const, rois_const, pool_height=2, pool_width=2)
             mean = tf.reduce_mean(y)
 
-            numerical_grad_error_1 = tf.test.compute_gradient_error(
-                [input_const], [input_value.shape], y, [5, 2, 2, 1])
+            numerical_grad_error_1 = tf.test.compute_gradient_error([input_const], [input_value.shape], y, [5, 2, 2, 1])
 
-            numerical_grad_error_2 = tf.test.compute_gradient_error(
-                [input_const], [input_value.shape], mean, [])
+            numerical_grad_error_2 = tf.test.compute_gradient_error([input_const], [input_value.shape], mean, [])
 
             self.assertLess(numerical_grad_error_1, 1e-4)
             self.assertLess(numerical_grad_error_2, 1e-4)
@@ -87,7 +74,7 @@ def test_very_big_output(self):
         than the number of available GPU threads
         """
 
-        pooled_w, pooled_h = 7,7
+        pooled_w, pooled_h = 7, 7
         input_w, input_h = 72, 240
         n_channels = 512
         n_batches = 2
@@ -106,5 +93,6 @@ def test_very_big_output(self):
 
         self.assertTrue(np.all(y_output == 1))
 
+
 if __name__ == '__main__':
     tf.test.main()
diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling_example.py b/tensorlayer/third_party/roi_pooling/roi_pooling_example.py
index 73e3015a8..050f6debf 100644
--- a/tensorlayer/third_party/roi_pooling/roi_pooling_example.py
+++ b/tensorlayer/third_party/roi_pooling/roi_pooling_example.py
@@ -4,24 +4,13 @@
 import tensorflow as tf
 from roi_pooling.roi_pooling_ops import roi_pooling
 
-# input feature map going into the RoI pooling 
-input_value = [[
-    [[1], [2], [4], [4]],
-    [[3], [4], [1], [2]],
-    [[6], [2], [1], [7.0]],
-    [[1], [3], [2], [8]]
-]]
+# input feature map going into the RoI pooling
+input_value = [[[[1], [2], [4], [4]], [[3], [4], [1], [2]], [[6], [2], [1], [7.0]], [[1], [3], [2], [8]]]]
 input_value = np.asarray(input_value, dtype='float32')
 
 # Regions of interest as lists of:
 # feature map index, upper left, bottom right coordinates
-rois_value = [
-    [0, 0, 0, 1, 1],
-    [0, 1, 1, 2, 2],
-    [0, 2, 2, 3, 3],
-    [0, 0, 0, 2, 2],
-    [0, 0, 0, 3, 3]
-]
+rois_value = [[0, 0, 0, 1, 1], [0, 1, 1, 2, 2], [0, 2, 2, 3, 3], [0, 0, 0, 2, 2], [0, 0, 0, 3, 3]]
 rois_value = np.asarray(rois_value, dtype='int32')
 
 # the pool_height and width are parameters of the ROI layer
diff --git a/tensorlayer/third_party/roi_pooling/setup.py b/tensorlayer/third_party/roi_pooling/setup.py
index f530964ed..b262072a0 100644
--- a/tensorlayer/third_party/roi_pooling/setup.py
+++ b/tensorlayer/third_party/roi_pooling/setup.py
@@ -12,18 +12,19 @@
 except ImportError:
     print("Please install tensorflow 0.12.0 or later")
     sys.exit()
-    
+
 
 class MyInstall(DistutilsInstall):
     def run(self):
         subprocess.call(['make', '-C', 'roi_pooling', 'build'])
         DistutilsInstall.run(self)
 
-setup(name='roi-pooling',
-            version='1.0',
-            description='ROI pooling as a custom TensorFlow operation',
-            author='deepsense.io',
-            packages=['roi_pooling'],
-            package_data={'roi_pooling': ['roi_pooling.so']},
-            cmdclass={'install': MyInstall}
-)
+
+setup(
+    name='roi-pooling',
+    version='1.0',
+    description='ROI pooling as a custom TensorFlow operation',
+    author='deepsense.io',
+    packages=['roi_pooling'],
+    package_data={'roi_pooling': ['roi_pooling.so']},
+    cmdclass={'install': MyInstall})
diff --git a/tensorlayer/third_party/roi_pooling/test_roi_layer.py b/tensorlayer/third_party/roi_pooling/test_roi_layer.py
index 954bbd994..d0e27449a 100644
--- a/tensorlayer/third_party/roi_pooling/test_roi_layer.py
+++ b/tensorlayer/third_party/roi_pooling/test_roi_layer.py
@@ -4,26 +4,13 @@
 
 # from roi_pooling.roi_pooling_ops import roi_pooling
 
-
-
 # input feature map going into the RoI pooling
-input_value = [[
-    [[1], [2], [4], [4]],
-    [[3], [4], [1], [2]],
-    [[6], [2], [1], [7.0]],
-    [[1], [3], [2], [8]]
-]]
+input_value = [[[[1], [2], [4], [4]], [[3], [4], [1], [2]], [[6], [2], [1], [7.0]], [[1], [3], [2], [8]]]]
 input_value = np.asarray(input_value, dtype='float32')
 
 # Regions of interest as lists of:
 # feature map index, upper left, bottom right coordinates
-rois_value = [
-    [0, 0, 0, 1, 1],
-    [0, 1, 1, 2, 2],
-    [0, 2, 2, 3, 3],
-    [0, 0, 0, 2, 2],
-    [0, 0, 0, 3, 3]
-]
+rois_value = [[0, 0, 0, 1, 1], [0, 1, 1, 2, 2], [0, 2, 2, 3, 3], [0, 0, 0, 2, 2], [0, 0, 0, 3, 3]]
 rois_value = np.asarray(rois_value, dtype='int32')
 
 # the pool_height and width are parameters of the ROI layer
diff --git a/tensorlayer/utils.py b/tensorlayer/utils.py
index 00e69e435..90a47bef5 100644
--- a/tensorlayer/utils.py
+++ b/tensorlayer/utils.py
@@ -4,7 +4,6 @@
 import subprocess
 import sys
 import time
-from contextlib import contextmanager
 from sys import exit as _exit
 from sys import platform as _platform
 
@@ -37,51 +36,56 @@ def fit(sess,
         tensorboard_graph_vis=True):
     """Training a given non time-series network by the given cost function, training data, batch_size, n_epoch etc.
 
+    - MNIST example click `here <https://github.com/tensorlayer/tensorlayer/blob/master/example/tutorial_mnist_simple.py>`_.
+    - In order to control the training details, the authors HIGHLY recommend ``tl.iterate`` see two MNIST examples `1 <https://github.com/tensorlayer/tensorlayer/blob/master/example/tutorial_mlp_dropout1.py>`_, `2 <https://github.com/tensorlayer/tensorlayer/blob/master/example/tutorial_mlp_dropout1.py>`_.
+
     Parameters
     ----------
     sess : Session
-        TensorFlow Session
-    network : a TensorLayer layer
-        the network will be trained
-    train_op : a TensorFlow optimizer
-        like tf.train.AdamOptimizer
-    X_train : numpy array
-        the input of training data
-    y_train : numpy array
-        the target of training data
+        TensorFlow Session.
+    network : TensorLayer layer
+        the network to be trained.
+    train_op : TensorFlow optimizer
+        The optimizer for training e.g. tf.train.AdamOptimizer.
+    X_train : numpy.array
+        The input of training data
+    y_train : numpy.array
+        The target of training data
     x : placeholder
-        for inputs
+        For inputs.
     y_ : placeholder
-        for targets
-    acc : the TensorFlow expression of accuracy (or other metric) or None
-        if None, would not display the metric
+        For targets.
+    acc : TensorFlow expression or None
+        Metric for accuracy or others. If None, would not print the information.
     batch_size : int
-        batch size for training and evaluating
+        The batch size for training and evaluating.
     n_epoch : int
-        the number of training epochs
+        The number of training epochs.
     print_freq : int
-        display the training information every ``print_freq`` epochs
-    X_val : numpy array or None
-        the input of validation data
-    y_val : numpy array or None
-        the target of validation data
+        Print the training information every ``print_freq`` epochs.
+    X_val : numpy.array or None
+        The input of validation data. If None, would not perform validation.
+    y_val : numpy.array or None
+        The target of validation data. If None, would not perform validation.
     eval_train : boolean
-        if X_val and y_val are not None, it refects whether to evaluate the training data
+        Whether to evaluate the model during training.
+        If X_val and y_val are not None, it reflects whether to evaluate the model on training data.
     tensorboard : boolean
-        if True summary data will be stored to the log/ direcory for visualization with tensorboard.
+        If True, summary data will be stored to the log/ directory for visualization with tensorboard.
         See also detailed tensorboard_X settings for specific configurations of features. (default False)
-        Also runs tl.layers.initialize_global_variables(sess) internally in fit() to setup the summary nodes, see Note:
+        Also runs `tl.layers.initialize_global_variables(sess)` internally in fit() to setup the summary nodes.
     tensorboard_epoch_freq : int
-        how many epochs between storing tensorboard checkpoint for visualization to log/ directory (default 5)
+        How many epochs between storing tensorboard checkpoint for visualization to log/ directory (default 5).
     tensorboard_weight_histograms : boolean
-        if True updates tensorboard data in the logs/ directory for visulaization
-        of the weight histograms every tensorboard_epoch_freq epoch (default True)
+        If True updates tensorboard data in the logs/ directory for visualization
+        of the weight histograms every tensorboard_epoch_freq epoch (default True).
     tensorboard_graph_vis : boolean
-        if True stores the graph in the tensorboard summaries saved to log/ (default True)
+        If True stores the graph in the tensorboard summaries saved to log/ (default True).
 
     Examples
     --------
-    >>> see tutorial_mnist_simple.py
+    See `tutorial_mnist_simple.py <https://github.com/tensorlayer/tensorlayer/blob/master/example/tutorial_mnist_simple.py>`_
+
     >>> tl.utils.fit(sess, network, train_op, cost, X_train, y_train, x, y_,
     ...            acc=acc, batch_size=500, n_epoch=200, print_freq=5,
     ...            X_val=X_val, y_val=y_val, eval_train=False)
@@ -92,9 +96,9 @@ def fit(sess,
 
     Notes
     --------
-    If tensorboard=True, the global_variables_initializer will be run inside the fit function
-    in order to initalize the automatically generated summary nodes used for tensorboard visualization,
-    thus tf.global_variables_initializer().run() before the fit() call will be undefined.
+    If tensorboard=True, the `global_variables_initializer` will be run inside the fit function
+    in order to initialize the automatically generated summary nodes used for tensorboard visualization,
+    thus `tf.global_variables_initializer().run()` before the `fit()` call will be undefined.
 
     """
     assert X_train.shape[0] >= batch_size, "Number of training examples should be bigger than the batch size"
@@ -207,29 +211,31 @@ def test(sess, network, acc, X_test, y_test, x, y_, batch_size, cost=None):
 
     Parameters
     ----------
-    sess : TensorFlow session
-        sess = tf.InteractiveSession()
-    network : a TensorLayer layer
-        the network will be trained
-    acc : the TensorFlow expression of accuracy (or other metric) or None
-        if None, would not display the metric
-    X_test : numpy array
-        the input of test data
+    sess : Session
+        TensorFlow session.
+    network : TensorLayer layer
+        The network.
+    acc : TensorFlow expression or None
+        Metric for accuracy or others.
+            - If None, would not print the information.
+    X_test : numpy.array
+        The input of testing data.
     y_test : numpy array
-        the target of test data
+        The target of testing data
     x : placeholder
-        for inputs
+        For inputs.
     y_ : placeholder
-        for targets
+        For targets.
     batch_size : int or None
-        batch size for testing, when dataset is large, we should use minibatche for testing.
-        when dataset is small, we can set it to None.
-    cost : the TensorFlow expression of cost or None
-        if None, would not display the cost
+        The batch size for testing, when dataset is large, we should use minibatche for testing;
+        if dataset is small, we can set it to None.
+    cost : TensorFlow expression or None
+        Metric for cost or others. If None, would not print the information.
 
     Examples
     --------
-    >>> see tutorial_mnist_simple.py
+    See `tutorial_mnist_simple.py <https://github.com/tensorlayer/tensorlayer/blob/master/example/tutorial_mnist_simple.py>`_
+
     >>> tl.utils.test(sess, network, acc, X_test, y_test, x, y_, batch_size=None, cost=cost)
 
     """
@@ -267,23 +273,24 @@ def predict(sess, network, X, x, y_op, batch_size=None):
 
     Parameters
     ----------
-    sess : TensorFlow session
-        sess = tf.InteractiveSession()
-    network : a TensorLayer layer
-        the network will be trained
-    X : numpy array
-        the input
+    sess : Session
+        TensorFlow Session.
+    network : TensorLayer layer
+        The network.
+    X : numpy.array
+        The inputs.
     x : placeholder
-        for inputs
+        For inputs.
     y_op : placeholder
-        the argmax expression of softmax outputs
+        The argmax expression of softmax outputs.
     batch_size : int or None
-        batch size for prediction, when dataset is large, we should use minibatche for prediction.
-        when dataset is small, we can set it to None.
+        The batch size for prediction, when dataset is large, we should use minibatche for prediction;
+        if dataset is small, we can set it to None.
 
     Examples
     --------
-    >>> see tutorial_mnist_simple.py
+    See `tutorial_mnist_simple.py <https://github.com/tensorlayer/tensorlayer/blob/master/example/tutorial_mnist_simple.py>`_
+
     >>> y = network.outputs
     >>> y_op = tf.argmax(tf.nn.softmax(y), 1)
     >>> print(tl.utils.predict(sess, network, X_test, x, y_op))
@@ -339,16 +346,16 @@ def evaluation(y_test=None, y_predict=None, n_classes=None):
 
     Parameters
     ----------
-    y_test : numpy.array or list
-        target results
-    y_predict : numpy.array or list
-        predicted results
+    y_test : list
+        The target results
+    y_predict : list
+        The predicted results
     n_classes : int
-        number of classes
+        The number of classes
 
     Examples
     --------
-    >>> c_mat, f1, acc, f1_macro = evaluation(y_test, y_predict, n_classes)
+    >>> c_mat, f1, acc, f1_macro = tl.utils.evaluation(y_test, y_predict, n_classes)
 
     """
     from sklearn.metrics import confusion_matrix, f1_score, accuracy_score
@@ -371,7 +378,7 @@ def dict_to_one(dp_dict={}):
     Parameters
     ----------
     dp_dict : dictionary
-        keeping probabilities
+        The dictionary contains key and number, e.g. keeping probabilities.
 
     Examples
     --------
@@ -406,9 +413,9 @@ def class_balancing_oversample(X_train=None, y_train=None, printable=True):
     Parameters
     ----------
     X_train : numpy.array
-        Features, each row is an example
+        The inputs.
     y_train : numpy.array
-        Labels
+        The targets.
 
     Examples
     --------
diff --git a/tensorlayer/visualize.py b/tensorlayer/visualize.py
index a978a2a3a..8b3eabf97 100644
--- a/tensorlayer/visualize.py
+++ b/tensorlayer/visualize.py
@@ -20,14 +20,14 @@ def read_image(image, path=''):
     Parameters
     -----------
     image : str
-        file name.
+        The image file name.
     path : str
-        path.
+        The image folder path.
 
     Returns
     -------
-    numpy array
-        Image
+    numpy.array
+        The image.
 
     """
     return scipy.misc.imread(os.path.join(path, image))
@@ -39,18 +39,18 @@ def read_images(img_list, path='', n_threads=10, printable=True):
     Parameters
     -------------
     img_list : list of str
-        the image file names.
+        The image file names.
     path : str
-        image folder path.
+        The image folder path.
     n_threads : int
-        number of threads to read image.
+        The number of threads to read image.
     printable : boolean
-        print information when reading images.
+        Whether to print information when reading images.
 
     Returns
     -------
-    list of numpy array
-        The read images
+    list of numpy.array
+        The images.
 
     """
     imgs = []
@@ -96,8 +96,8 @@ def save_images(images, size, image_path=''):
 
     Returns
     -------
-    numpy array
-        The single image
+    numpy.array
+        The image.
 
     Examples
     ---------
@@ -129,31 +129,33 @@ def draw_boxes_and_labels_to_image(image, classes=[], coords=[], scores=[], clas
 
     Parameters
     -----------
-    image : numpy array
-        RGB image in numpy.array, [height, width, channel].
+    image : numpy.array
+        The RGB image [height, width, channel].
     classes : list of int
-        a list of class ID (int).
+        A list of class ID (int).
     coords : list of int
-        a list of list for coordinates.
+        A list of list for coordinates.
             - Should be [x, y, x2, y2] (up-left and botton-right format)
             - If [x_center, y_center, w, h] (set is_center to True).
     scores : list of float
-        a list of score (float). (Optional)
+        A list of score (float). (Optional)
     classes_list : list of str
         for converting ID to string on image.
     is_center : boolean
-        If coords is [x_center, y_center, w, h], set it to True for converting [x_center, y_center, w, h] to [x, y, x2, y2] (up-left and botton-right).
-        If coords is [x1, x2, y1, y2], set it to False.
+        Whether the coordinates is [x_center, y_center, w, h]
+            - If coordinates are [x_center, y_center, w, h], set it to True for converting it to [x, y, x2, y2] (up-left and botton-right) internally.
+            - If coordinates are [x1, x2, y1, y2], set it to False.
     is_rescale : boolean
-        If True, the input coordinates are the portion of width and high, this API will scale the coordinates to pixel unit internally.
-        If False, feed the coordinates with pixel unit format.
+        Whether to rescale the coordinates from pixel-unit format to ratio format.
+            - If True, the input coordinates are the portion of width and high, this API will scale the coordinates to pixel unit internally.
+            - If False, feed the coordinates with pixel unit format.
     save_name : None or str
         The name of image file (i.e. image.png), if None, not to save image.
 
     Returns
     -------
-    numpy array
-        The saved image
+    numpy.array
+        The saved image.
 
     References
     -----------
@@ -212,14 +214,14 @@ def frame(I=None, second=5, saveable=True, name='frame', cmap=None, fig_idx=1283
     Parameters
     ----------
     I : numpy.array
-        The image
+        The image.
     second : int
         The display second(s) for the image(s), if saveable is False.
     saveable : boolean
         Save or plot the figure.
     name : str
         A name to save the image, if saveable is True.
-    cmap : None or string
+    cmap : None or str
         'gray' for greyscale, None for default, etc.
     fig_idx : int
         matplotlib figure index.
@@ -265,7 +267,7 @@ def CNN2d(CNN=None, second=10, saveable=True, name='cnn', fig_idx=3119362):
     name : str
         A name to save the image, if saveable is True.
     fig_idx : int
-        matplotlib figure index.
+        The matplotlib figure index.
 
     Examples
     --------
@@ -380,8 +382,8 @@ def tsne_embedding(embeddings, reverse_dictionary, plot_only=500, second=5, save
 
     Parameters
     ----------
-    embeddings : matrix
-        The images.
+    embeddings : numpy.array
+        The embedding matrix.
     reverse_dictionary : dictionary
         id_to_word, mapping id to unique word.
     plot_only : int