From 4061a680121aa51af95829ae95da925d06d8cfe0 Mon Sep 17 00:00:00 2001
From: zsdonghao <dhsig552@163.com>
Date: Sat, 21 Apr 2018 01:35:29 +0100
Subject: [PATCH] fix example docs style

---
 example/tutorial_atari_pong.py                |   8 +-
 .../tutorial_binarynet_cifar10_tfrecord.py    |  92 ++------
 example/tutorial_binarynet_mnist_cnn.py       |   2 -
 example/tutorial_cartpole_ac.py               |  11 +-
 example/tutorial_cifar10.py                   |  56 +----
 example/tutorial_cifar10_tfrecord.py          |  62 +----
 .../tutorial_dorefanet_cifar10_tfrecord.py    |  89 ++-----
 example/tutorial_dorefanet_mnist_cnn.py       |   2 -
 example/tutorial_frozenlake_dqn.py            |  15 +-
 example/tutorial_frozenlake_q_table.py        |   8 +-
 example/tutorial_generate_text.py             |  60 ++---
 example/tutorial_image_preprocess.py          |   1 -
 example/tutorial_inceptionV3_tfslim.py        |  10 +-
 example/tutorial_mnist.py                     | 198 ++++++++--------
 example/tutorial_mnist_distributed.py         |   7 +-
 example/tutorial_mnist_float16.py             |  12 +-
 example/tutorial_mnist_simple.py              |   6 +-
 example/tutorial_ptb_lstm.py                  |  56 ++---
 example/tutorial_ptb_lstm_state_is_tuple.py   |  58 +++--
 ...tutorial_ternaryweight_cifar10_tfrecord.py |  89 ++-----
 example/tutorial_tf_dataset_voc.py            |  15 +-
 example/tutorial_tfrecord2.py                 |   8 +-
 example/tutorial_tfrecord3.py                 |   6 +-
 example/tutorial_vgg16.py                     | 223 ++++--------------
 example/tutorial_vgg19.py                     | 188 +++++----------
 example/tutorial_word2vec_basic.py            |  31 +--
 26 files changed, 379 insertions(+), 934 deletions(-)

diff --git a/example/tutorial_atari_pong.py b/example/tutorial_atari_pong.py
index 35246ebde..902a0d8a1 100644
--- a/example/tutorial_atari_pong.py
+++ b/example/tutorial_atari_pong.py
@@ -133,11 +133,7 @@ def prepro(I):
             prev_x = None
 
         if reward != 0:
-            print(
-                (
-                    'episode %d: game %d took %.5fs, reward: %f' %
-                    (episode_number, game_number, time.time() - start_time, reward)
-                ), ('' if reward == -1 else ' !!!!!!!!')
-            )
+            print(('episode %d: game %d took %.5fs, reward: %f' % (episode_number, game_number, \
+                time.time() - start_time, reward)), ('' if reward == -1 else ' !!!!!!!!'))
             start_time = time.time()
             game_number += 1
diff --git a/example/tutorial_binarynet_cifar10_tfrecord.py b/example/tutorial_binarynet_cifar10_tfrecord.py
index dd5020150..7249d4954 100644
--- a/example/tutorial_binarynet_cifar10_tfrecord.py
+++ b/example/tutorial_binarynet_cifar10_tfrecord.py
@@ -148,86 +148,31 @@ def read_and_decode(filename, is_train=None):
     # prepare data in cpu
     x_train_, y_train_ = read_and_decode("train.cifar10", True)
     x_test_, y_test_ = read_and_decode("test.cifar10", False)
-
-    x_train_batch, y_train_batch = tf.train.shuffle_batch(
-        [x_train_, y_train_], batch_size=batch_size, capacity=2000, min_after_dequeue=1000, num_threads=32
-    )  # set the number of threads here
+    # set the number of threads here
+    x_train_batch, y_train_batch = tf.train.shuffle_batch([x_train_, y_train_], \
+        batch_size=batch_size, capacity=2000, min_after_dequeue=1000, num_threads=32)
     # for testing, uses batch instead of shuffle_batch
-    x_test_batch, y_test_batch = tf.train.batch(
-        [x_test_, y_test_], batch_size=batch_size, capacity=50000, num_threads=32
-    )
+    x_test_batch, y_test_batch = tf.train.batch([x_test_, y_test_], \
+        batch_size=batch_size, capacity=50000, num_threads=32)
 
     def model(x_crop, y_, reuse):
         """ For more simplified CNN APIs, check tensorlayer.org """
-        W_init = tf.truncated_normal_initializer(stddev=5e-2)
-        W_init2 = tf.truncated_normal_initializer(stddev=0.04)
-        b_init2 = tf.constant_initializer(value=0.1)
         with tf.variable_scope("model", reuse=reuse):
             net = tl.layers.InputLayer(x_crop, name='input')
-            net = tl.layers.Conv2d(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', W_init=W_init, name='cnn1')
+            net = tl.layers.Conv2d(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', name='cnn1')
             net = tl.layers.SignLayer(net)
             net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool1')
-            net = tl.layers.LocalResponseNormLayer(
-                net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1'
-            )
-            net = tl.layers.BinaryConv2d(
-                net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', W_init=W_init, name='cnn2'
-            )
-            net = tl.layers.LocalResponseNormLayer(
-                net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2'
-            )
+            net = tl.layers.LocalResponseNormLayer(net, 4, 1.0, 0.001 / 9.0, 0.75, name='norm1')
+            net = tl.layers.BinaryConv2d(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', name='cnn2')
+            net = tl.layers.LocalResponseNormLayer(net, 4, 1.0, 0.001 / 9.0, 0.75, name='norm2')
             net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool2')
-            net = tl.layers.FlattenLayer(net, name='flatten')  # output: (batch_size, 2304)
+            net = tl.layers.FlattenLayer(net, name='flatten')
             net = tl.layers.SignLayer(net)
-            net = tl.layers.BinaryDenseLayer(
-                net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu'
-            )  # output: (batch_size, 384)
+            net = tl.layers.BinaryDenseLayer(net, 384, act=tf.nn.relu, name='d1relu')
             net = tl.layers.SignLayer(net)
-            net = tl.layers.BinaryDenseLayer(
-                net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu'
-            )  # output: (batch_size, 192)
-            net = tl.layers.DenseLayer(
-                net, n_units=10, act=tf.identity, W_init=W_init2, name='output'
-            )  # output: (batch_size, 10)
-            y = net.outputs
-
-            ce = tl.cost.cross_entropy(y, y_, name='cost')
-            # L2 for the MLP, without this, the accuracy will be reduced by 15%.
-            L2 = 0
-            for p in tl.layers.get_variables_with_name('relu/W', True, True):
-                L2 += tf.contrib.layers.l2_regularizer(0.004)(p)
-            cost = ce + L2
+            net = tl.layers.BinaryDenseLayer(net, 192, act=tf.nn.relu, name='d2relu')
+            net = tl.layers.DenseLayer(net, 10, act=tf.identity, name='output')
 
-            # correct_prediction = tf.equal(tf.argmax(tf.nn.softmax(y), 1), y_)
-            correct_prediction = tf.equal(tf.cast(tf.argmax(y, 1), tf.int32), y_)
-            acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
-
-            return net, cost, acc
-
-    def model_batch_norm(x_crop, y_, reuse, is_train):
-        """ Batch normalization should be placed before rectifier. """
-        W_init = tf.truncated_normal_initializer(stddev=5e-2)
-        W_init2 = tf.truncated_normal_initializer(stddev=0.04)
-        b_init2 = tf.constant_initializer(value=0.1)
-        with tf.variable_scope("model", reuse=reuse):
-            net = InputLayer(x_crop, name='input')
-
-            net = tl.layers.Conv2d(net, 64, (5, 5), (1, 1), padding='SAME', W_init=W_init, b_init=None, name='cnn1')
-            net = tl.layers.BatchNormLayer(net, is_train, act=tf.nn.relu, name='batch1')
-            net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool1')
-            net = tl.layers.Conv2d(net, 64, (5, 5), (1, 1), padding='SAME', W_init=W_init, b_init=None, name='cnn2')
-            net = tl.layers.BatchNormLayer(net, is_train, act=tf.nn.relu, name='batch2')
-            net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool2')
-            net = tl.layers.FlattenLayer(net, name='flatten')  # output: (batch_size, 2304)
-            net = tl.layers.DenseLayer(
-                net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu'
-            )  # output: (batch_size, 384)
-            net = tl.layers.DenseLayer(
-                net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu'
-            )  # output: (batch_size, 192)
-            net = tl.layers.DenseLayer(
-                net, n_units=10, act=tf.identity, W_init=W_init2, name='output'
-            )  # output: (batch_size, 10)
             y = net.outputs
 
             ce = tl.cost.cross_entropy(y, y_, name='cost')
@@ -237,6 +182,7 @@ def model_batch_norm(x_crop, y_, reuse, is_train):
                 L2 += tf.contrib.layers.l2_regularizer(0.004)(p)
             cost = ce + L2
 
+            # correct_prediction = tf.equal(tf.argmax(tf.nn.softmax(y), 1), y_)
             correct_prediction = tf.equal(tf.cast(tf.argmax(y, 1), tf.int32), y_)
             acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 
@@ -249,12 +195,8 @@ def model_batch_norm(x_crop, y_, reuse, is_train):
     # cost, acc, network = model(x_crop, y_, None)
 
     with tf.device('/gpu:0'):  # <-- remove it if you don't have GPU
-        ## using local response normalization
         network, cost, acc, = model(x_train_batch, y_train_batch, False)
         _, cost_test, acc_test = model(x_test_batch, y_test_batch, True)
-        ## you may want to try batch normalization
-        # network, cost, acc, = model_batch_norm(x_train_batch, y_train_batch, None, is_train=True)
-        # _, cost_test, acc_test = model_batch_norm(x_test_batch, y_test_batch, True, is_train=False)
 
     ## train
     n_epoch = 50000
@@ -297,10 +239,8 @@ def model_batch_norm(x_crop, y_, reuse, is_train):
             n_batch += 1
 
         if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
-            print(
-                "Epoch %d : Step %d-%d of %d took %fs" %
-                (epoch, step, step + n_step_epoch, n_step, time.time() - start_time)
-            )
+            print("Epoch %d : Step %d-%d of %d took %fs" % \
+                (epoch, step, step + n_step_epoch, n_step, time.time() - start_time))
             print("   train loss: %f" % (train_loss / n_batch))
             print("   train acc: %f" % (train_acc / n_batch))
 
diff --git a/example/tutorial_binarynet_mnist_cnn.py b/example/tutorial_binarynet_mnist_cnn.py
index 509bd2e87..3044ed642 100644
--- a/example/tutorial_binarynet_mnist_cnn.py
+++ b/example/tutorial_binarynet_mnist_cnn.py
@@ -2,9 +2,7 @@
 # -*- coding: utf-8 -*-
 
 import time
-
 import tensorflow as tf
-
 import tensorlayer as tl
 
 X_train, y_train, X_val, y_val, X_test, y_test = \
diff --git a/example/tutorial_cartpole_ac.py b/example/tutorial_cartpole_ac.py
index b7fd64ba7..5af1c1072 100644
--- a/example/tutorial_cartpole_ac.py
+++ b/example/tutorial_cartpole_ac.py
@@ -147,9 +147,8 @@ def learn(self, s, r, s_):
 sess = tf.Session()
 
 actor = Actor(sess, n_features=N_F, n_actions=N_A, lr=LR_A)
-critic = Critic(
-    sess, n_features=N_F, lr=LR_C
-)  # we need a good teacher, so the teacher should learn faster than the actor
+# we need a good teacher, so the teacher should learn faster than the actor
+critic = Critic(sess, n_features=N_F, lr=LR_C)
 
 tl.layers.initialize_global_variables(sess)
 
@@ -193,10 +192,8 @@ def learn(self, s, r, s_):
                 running_reward = running_reward * 0.95 + ep_rs_sum * 0.05
             # start rending if running_reward greater than a threshold
             # if running_reward > DISPLAY_REWARD_THRESHOLD: RENDER = True
-            print(
-                "Episode: %d reward: %f running_reward %f took: %.5f" %
-                (i_episode, ep_rs_sum, running_reward, time.time() - episode_time)
-            )
+            print("Episode: %d reward: %f running_reward %f took: %.5f" % \
+                (i_episode, ep_rs_sum, running_reward, time.time() - episode_time))
 
             # Early Stopping for quick check
             if t >= MAX_EP_STEPS:
diff --git a/example/tutorial_cifar10.py b/example/tutorial_cifar10.py
index 8f1c5b80b..de643d702 100644
--- a/example/tutorial_cifar10.py
+++ b/example/tutorial_cifar10.py
@@ -1,13 +1,9 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
 
-# tl.prepro for data augmentation
-
 import time
-
 import numpy as np
 import tensorflow as tf
-
 import tensorlayer as tl
 from tensorlayer.layers import *
 
@@ -23,36 +19,17 @@ def model(x, y_, reuse):
     with tf.variable_scope("model", reuse=reuse):
         net = InputLayer(x, name='input')
         net = Conv2d(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', W_init=W_init, name='cnn1')
-        # net = Conv2dLayer(net, act=tf.nn.relu, shape=[5, 5, 3, 64],
-        #             strides=[1, 1, 1, 1], padding='SAME',                 # 64 features for each 5x5x3 patch
-        #             W_init=W_init, name ='cnn1')           # output: (batch_size, 24, 24, 64)
         net = MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool1')
-        # net = PoolLayer(net, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
-        #             padding='SAME', pool = tf.nn.max_pool, name ='pool1',)# output: (batch_size, 12, 12, 64)
         net = LocalResponseNormLayer(net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')
-        # net.outputs = tf.nn.lrn(net.outputs, 4, bias=1.0, alpha=0.001 / 9.0,
-        #            beta=0.75, name='norm1')
 
         net = Conv2d(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', W_init=W_init, name='cnn2')
-        # net = Conv2dLayer(net, act=tf.nn.relu, shape=[5, 5, 64, 64],
-        #             strides=[1, 1, 1, 1], padding='SAME',                 # 64 features for each 5x5 patch
-        #             W_init=W_init, name ='cnn2')           # output: (batch_size, 12, 12, 64)
         net = LocalResponseNormLayer(net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')
-        # net.outputs = tf.nn.lrn(net.outputs, 4, bias=1.0, alpha=0.001 / 9.0,
-        #             beta=0.75, name='norm2')
         net = MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool2')
-        # net = PoolLayer(net, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
-        #             padding='SAME', pool = tf.nn.max_pool, name ='pool2') # output: (batch_size, 6, 6, 64)
-        net = FlattenLayer(net, name='flatten')  # output: (batch_size, 2304)
-        net = DenseLayer(
-            net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu'
-        )  # output: (batch_size, 384)
-        net = DenseLayer(
-            net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu'
-        )  # output: (batch_size, 192)
-        net = DenseLayer(
-            net, n_units=10, act=tf.identity, W_init=tf.truncated_normal_initializer(stddev=1 / 192.0), name='output'
-        )  # output: (batch_size, 10)
+
+        net = FlattenLayer(net, name='flatten')
+        net = DenseLayer(net, 384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu')
+        net = DenseLayer(net, 192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu')
+        net = DenseLayer(net, 10, act=tf.identity, W_init=W_init2, name='output')
         y = net.outputs
 
         ce = tl.cost.cross_entropy(y, y_, name='cost')
@@ -75,35 +52,18 @@ def model_batch_norm(x, y_, reuse, is_train):
     b_init2 = tf.constant_initializer(value=0.1)
     with tf.variable_scope("model", reuse=reuse):
         net = InputLayer(x, name='input')
-
         net = Conv2d(net, 64, (5, 5), (1, 1), padding='SAME', W_init=W_init, b_init=None, name='cnn1')
-        # net = Conv2dLayer(net, act=tf.identity, shape=[5, 5, 3, 64],
-        #             strides=[1, 1, 1, 1], padding='SAME',                 # 64 features for each 5x5x3 patch
-        #             W_init=W_init, b_init=None, name='cnn1')              # output: (batch_size, 24, 24, 64)
         net = BatchNormLayer(net, is_train, act=tf.nn.relu, name='batch1')
         net = MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool1')
-        # net = PoolLayer(net, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
-        #             padding='SAME', pool=tf.nn.max_pool, name='pool1',)   # output: (batch_size, 12, 12, 64)
 
         net = Conv2d(net, 64, (5, 5), (1, 1), padding='SAME', W_init=W_init, b_init=None, name='cnn2')
-        # net = Conv2dLayer(net, act=tf.identity, shape=[5, 5, 64, 64],
-        #             strides=[1, 1, 1, 1], padding='SAME',                 # 64 features for each 5x5 patch
-        #             W_init=W_init, b_init=None, name ='cnn2')             # output: (batch_size, 12, 12, 64)
         net = BatchNormLayer(net, is_train, act=tf.nn.relu, name='batch2')
         net = MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool2')
-        # net = PoolLayer(net, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
-        #            padding='SAME', pool = tf.nn.max_pool, name ='pool2')  # output: (batch_size, 6, 6, 64)
 
         net = FlattenLayer(net, name='flatten')  # output: (batch_size, 2304)
-        net = DenseLayer(
-            net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu'
-        )  # output: (batch_size, 384)
-        net = DenseLayer(
-            net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu'
-        )  # output: (batch_size, 192)
-        net = DenseLayer(
-            net, n_units=10, act=tf.identity, W_init=tf.truncated_normal_initializer(stddev=1 / 192.0), name='output'
-        )  # output: (batch_size, 10)
+        net = DenseLayer(net, 384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu')
+        net = DenseLayer(net, 192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu')
+        net = DenseLayer(net, 10, act=tf.identity, W_init=W_init2, name='output')
         y = net.outputs
 
         ce = tl.cost.cross_entropy(y, y_, name='cost')
diff --git a/example/tutorial_cifar10_tfrecord.py b/example/tutorial_cifar10_tfrecord.py
index 62be4c5d9..103aea761 100644
--- a/example/tutorial_cifar10_tfrecord.py
+++ b/example/tutorial_cifar10_tfrecord.py
@@ -193,36 +193,17 @@ def model(x_crop, y_, reuse):
         with tf.variable_scope("model", reuse=reuse):
             net = InputLayer(x_crop, name='input')
             net = Conv2d(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', W_init=W_init, name='cnn1')
-            # net = Conv2dLayer(net, act=tf.nn.relu, shape=[5, 5, 3, 64],
-            #             strides=[1, 1, 1, 1], padding='SAME',                 # 64 features for each 5x5x3 patch
-            #             W_init=W_init, name ='cnn1')                          # output: (batch_size, 24, 24, 64)
             net = MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool1')
-            # net = PoolLayer(net, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
-            #             padding='SAME', pool = tf.nn.max_pool, name ='pool1',)# output: (batch_size, 12, 12, 64)
             net = LocalResponseNormLayer(net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')
-            # net.outputs = tf.nn.lrn(net.outputs, 4, bias=1.0, alpha=0.001 / 9.0,
-            #            beta=0.75, name='norm1')
 
             net = Conv2d(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', W_init=W_init, name='cnn2')
-            # net = Conv2dLayer(net, act=tf.nn.relu, shape=[5, 5, 64, 64],
-            #             strides=[1, 1, 1, 1], padding='SAME',                 # 64 features for each 5x5 patch
-            #             W_init=W_init, name ='cnn2')                          # output: (batch_size, 12, 12, 64)
             net = LocalResponseNormLayer(net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')
-            # net.outputs = tf.nn.lrn(net.outputs, 4, bias=1.0, alpha=0.001 / 9.0,
-            #             beta=0.75, name='norm2')
             net = MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool2')
-            # net = PoolLayer(net, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
-            #             padding='SAME', pool = tf.nn.max_pool, name ='pool2') # output: (batch_size, 6, 6, 64)
-            net = FlattenLayer(net, name='flatten')  # output: (batch_size, 2304)
-            net = DenseLayer(
-                net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu'
-            )  # output: (batch_size, 384)
-            net = DenseLayer(
-                net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu'
-            )  # output: (batch_size, 192)
-            net = DenseLayer(
-                net, n_units=10, act=tf.identity, W_init=W_init2, name='output'
-            )  # output: (batch_size, 10)
+
+            net = FlattenLayer(net, name='flatten')
+            net = DenseLayer(net, 384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu')
+            net = DenseLayer(net, 192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu')
+            net = DenseLayer(net, n_units=10, act=tf.identity, W_init=W_init2, name='output')
             y = net.outputs
 
             ce = tl.cost.cross_entropy(y, y_, name='cost')
@@ -245,35 +226,18 @@ def model_batch_norm(x_crop, y_, reuse, is_train):
         b_init2 = tf.constant_initializer(value=0.1)
         with tf.variable_scope("model", reuse=reuse):
             net = InputLayer(x_crop, name='input')
-
             net = Conv2d(net, 64, (5, 5), (1, 1), padding='SAME', W_init=W_init, b_init=None, name='cnn1')
-            # net = Conv2dLayer(net, act=tf.identity, shape=[5, 5, 3, 64],
-            #             strides=[1, 1, 1, 1], padding='SAME',                 # 64 features for each 5x5x3 patch
-            #             W_init=W_init, b_init=None, name='cnn1')              # output: (batch_size, 24, 24, 64)
             net = BatchNormLayer(net, is_train, act=tf.nn.relu, name='batch1')
             net = MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool1')
-            # net = PoolLayer(net, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
-            #             padding='SAME', pool=tf.nn.max_pool, name='pool1',)   # output: (batch_size, 12, 12, 64)
 
             net = Conv2d(net, 64, (5, 5), (1, 1), padding='SAME', W_init=W_init, b_init=None, name='cnn2')
-            # net = Conv2dLayer(net, act=tf.identity, shape=[5, 5, 64, 64],
-            #             strides=[1, 1, 1, 1], padding='SAME',                 # 64 features for each 5x5 patch
-            #             W_init=W_init, b_init=None, name ='cnn2')             # output: (batch_size, 12, 12, 64)
             net = BatchNormLayer(net, is_train, act=tf.nn.relu, name='batch2')
             net = MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool2')
-            # net = PoolLayer(net, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
-            #            padding='SAME', pool = tf.nn.max_pool, name ='pool2')  # output: (batch_size, 6, 6, 64)
-
-            net = FlattenLayer(net, name='flatten')  # output: (batch_size, 2304)
-            net = DenseLayer(
-                net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu'
-            )  # output: (batch_size, 384)
-            net = DenseLayer(
-                net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu'
-            )  # output: (batch_size, 192)
-            net = DenseLayer(
-                net, n_units=10, act=tf.identity, W_init=W_init2, name='output'
-            )  # output: (batch_size, 10)
+
+            net = FlattenLayer(net, name='flatten')
+            net = DenseLayer(net, 384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu')
+            net = DenseLayer(net, 192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu')
+            net = DenseLayer(net, n_units=10, act=tf.identity, W_init=W_init2, name='output')
             y = net.outputs
 
             ce = tl.cost.cross_entropy(y, y_, name='cost')
@@ -343,10 +307,8 @@ def model_batch_norm(x_crop, y_, reuse, is_train):
             n_batch += 1
 
         if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
-            print(
-                "Epoch %d : Step %d-%d of %d took %fs" %
-                (epoch, step, step + n_step_epoch, n_step, time.time() - start_time)
-            )
+            print("Epoch %d : Step %d-%d of %d took %fs" % \
+                (epoch, step, step + n_step_epoch, n_step, time.time() - start_time))
             print("   train loss: %f" % (train_loss / n_batch))
             print("   train acc: %f" % (train_acc / n_batch))
 
diff --git a/example/tutorial_dorefanet_cifar10_tfrecord.py b/example/tutorial_dorefanet_cifar10_tfrecord.py
index 63b76d855..f523fedd2 100644
--- a/example/tutorial_dorefanet_cifar10_tfrecord.py
+++ b/example/tutorial_dorefanet_cifar10_tfrecord.py
@@ -148,44 +148,27 @@ def read_and_decode(filename, is_train=None):
     # prepare data in cpu
     x_train_, y_train_ = read_and_decode("train.cifar10", True)
     x_test_, y_test_ = read_and_decode("test.cifar10", False)
-
-    x_train_batch, y_train_batch = tf.train.shuffle_batch(
-        [x_train_, y_train_], batch_size=batch_size, capacity=2000, min_after_dequeue=1000, num_threads=32
-    )  # set the number of threads here
+    # set the number of threads here
+    x_train_batch, y_train_batch = tf.train.shuffle_batch([x_train_, y_train_], \
+        batch_size=batch_size, capacity=2000, min_after_dequeue=1000, num_threads=32)
     # for testing, uses batch instead of shuffle_batch
-    x_test_batch, y_test_batch = tf.train.batch(
-        [x_test_, y_test_], batch_size=batch_size, capacity=50000, num_threads=32
-    )
+    x_test_batch, y_test_batch = tf.train.batch([x_test_, y_test_], \
+        batch_size=batch_size, capacity=50000, num_threads=32)
 
     def model(x_crop, y_, reuse):
         """ For more simplified CNN APIs, check tensorlayer.org """
-        W_init = tf.truncated_normal_initializer(stddev=5e-2)
-        W_init2 = tf.truncated_normal_initializer(stddev=0.04)
-        b_init2 = tf.constant_initializer(value=0.1)
         with tf.variable_scope("model", reuse=reuse):
             net = tl.layers.InputLayer(x_crop, name='input')
-            net = tl.layers.Conv2d(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', W_init=W_init, name='cnn1')
+            net = tl.layers.Conv2d(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', name='cnn1')
             net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool1')
-            net = tl.layers.LocalResponseNormLayer(
-                net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1'
-            )
-            net = tl.layers.DorefaConv2d(
-                net, 1, 3, 64, (5, 5), (1, 1), tf.nn.relu, padding='SAME', W_init=W_init, name='cnn2'
-            )
-            net = tl.layers.LocalResponseNormLayer(
-                net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2'
-            )
+            net = tl.layers.LocalResponseNormLayer(net, 4, 1.0, 0.001 / 9.0, 0.75, name='norm1')
+            net = tl.layers.DorefaConv2d(net, 1, 3, 64, (5, 5), (1, 1), tf.nn.relu, padding='SAME', name='cnn2')
+            net = tl.layers.LocalResponseNormLayer(net, 4, 1.0, 0.001 / 9.0, 0.75, name='norm2')
             net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool2')
-            net = tl.layers.FlattenLayer(net, name='flatten')  # output: (batch_size, 2304)
-            net = tl.layers.DorefaDenseLayer(
-                net, 1, 3, 384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu'
-            )  # output: (batch_size, 384)
-            net = tl.layers.DorefaDenseLayer(
-                net, 1, 3, 192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu'
-            )  # output: (batch_size, 192)
-            net = tl.layers.DenseLayer(
-                net, n_units=10, act=tf.identity, W_init=W_init2, name='output'
-            )  # output: (batch_size, 10)
+            net = tl.layers.FlattenLayer(net, name='flatten')
+            net = tl.layers.DorefaDenseLayer(net, 1, 3, 384, act=tf.nn.relu, name='d1relu')
+            net = tl.layers.DorefaDenseLayer(net, 1, 3, 192, act=tf.nn.relu, name='d2relu')
+            net = tl.layers.DenseLayer(net, 10, act=tf.identity, name='output')
             y = net.outputs
 
             ce = tl.cost.cross_entropy(y, y_, name='cost')
@@ -201,42 +184,6 @@ def model(x_crop, y_, reuse):
 
             return net, cost, acc
 
-    def model_batch_norm(x_crop, y_, reuse, is_train):
-        """ Batch normalization should be placed before rectifier. """
-        W_init = tf.truncated_normal_initializer(stddev=5e-2)
-        W_init2 = tf.truncated_normal_initializer(stddev=0.04)
-        b_init2 = tf.constant_initializer(value=0.1)
-        with tf.variable_scope("model", reuse=reuse):
-            net = tl.layers.InputLayer(x_crop, name='input')
-            net = tl.layers.Conv2d(net, 64, (5, 5), (1, 1), padding='SAME', W_init=W_init, b_init=None, name='cnn1')
-            net = tl.layers.BatchNormLayer(net, is_train, act=tf.nn.relu, name='batch1')
-            net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool1')
-            net = tl.layers.Conv2d(net, 64, (5, 5), (1, 1), padding='SAME', W_init=W_init, b_init=None, name='cnn2')
-            net = tl.layers.BatchNormLayer(net, is_train, act=tf.nn.relu, name='batch2')
-            net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool2')
-            net = tl.layers.FlattenLayer(net, name='flatten')  # output: (batch_size, 2304)
-            net = tl.layers.DenseLayer(
-                net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu'
-            )  # output: (batch_size, 384)
-            net = tl.layers.DenseLayer(
-                net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu'
-            )  # output: (batch_size, 192)
-            net = tl.layers.DenseLayer(
-                net, n_units=10, act=tf.identity, W_init=W_init2, name='output'
-            )  # output: (batch_size, 10)
-            y = net.outputs
-            ce = tl.cost.cross_entropy(y, y_, name='cost')
-            # L2 for the MLP, without this, the accuracy will be reduced by 15%.
-            L2 = 0
-            for p in tl.layers.get_variables_with_name('relu/W', True, True):
-                L2 += tf.contrib.layers.l2_regularizer(0.004)(p)
-            cost = ce + L2
-
-            correct_prediction = tf.equal(tf.cast(tf.argmax(y, 1), tf.int32), y_)
-            acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
-
-            return net, cost, acc
-
     ## You can also use placeholder to feed_dict in data after using
     ## val, l = sess.run([x_train_batch, y_train_batch]) to get the data
     # x_crop = tf.placeholder(tf.float32, shape=[batch_size, 24, 24, 3])
@@ -244,12 +191,8 @@ def model_batch_norm(x_crop, y_, reuse, is_train):
     # cost, acc, network = model(x_crop, y_, None)
 
     with tf.device('/gpu:0'):  # <-- remove it if you don't have GPU
-        ## using local response normalization
         network, cost, acc, = model(x_train_batch, y_train_batch, False)
         _, cost_test, acc_test = model(x_test_batch, y_test_batch, True)
-        ## you may want to try batch normalization
-        # network, cost, acc, = model_batch_norm(x_train_batch, y_train_batch, None, is_train=True)
-        # _, cost_test, acc_test = model_batch_norm(x_test_batch, y_test_batch, True, is_train=False)
 
     ## train
     n_epoch = 50000
@@ -292,10 +235,8 @@ def model_batch_norm(x_crop, y_, reuse, is_train):
             n_batch += 1
 
         if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
-            print(
-                "Epoch %d : Step %d-%d of %d took %fs" %
-                (epoch, step, step + n_step_epoch, n_step, time.time() - start_time)
-            )
+            print("Epoch %d : Step %d-%d of %d took %fs" % \
+                (epoch, step, step + n_step_epoch, n_step, time.time() - start_time))
             print("   train loss: %f" % (train_loss / n_batch))
             print("   train acc: %f" % (train_acc / n_batch))
 
diff --git a/example/tutorial_dorefanet_mnist_cnn.py b/example/tutorial_dorefanet_mnist_cnn.py
index 018445d86..d68500ae8 100644
--- a/example/tutorial_dorefanet_mnist_cnn.py
+++ b/example/tutorial_dorefanet_mnist_cnn.py
@@ -2,9 +2,7 @@
 # -*- coding: utf-8 -*-
 
 import time
-
 import tensorflow as tf
-
 import tensorlayer as tl
 
 X_train, y_train, X_val, y_val, X_test, y_test = \
diff --git a/example/tutorial_frozenlake_dqn.py b/example/tutorial_frozenlake_dqn.py
index 11ebc4d26..a5511ed84 100644
--- a/example/tutorial_frozenlake_dqn.py
+++ b/example/tutorial_frozenlake_dqn.py
@@ -48,13 +48,10 @@ def to_one_hot(i, n_classes=None):
 # 4x4 grid can be represented by one-hot vector with 16 integers.
 inputs = tf.placeholder(shape=[1, 16], dtype=tf.float32)
 net = InputLayer(inputs, name='observation')
-net = DenseLayer(
-    net, n_units=4, act=tf.identity, W_init=tf.random_uniform_initializer(0, 0.01), b_init=None, name='q_a_s'
-)
+net = DenseLayer(net, 4, act=tf.identity, W_init=tf.random_uniform_initializer(0, 0.01), b_init=None, name='q_a_s')
 y = net.outputs  # action-value / rewards of 4 actions
-predict = tf.argmax(
-    y, 1
-)  # chose action greedily with reward. in Q-Learning, policy is greedy, so we use "max" to select the next action.
+# chose action greedily with reward. in Q-Learning, policy is greedy, so we use "max" to select the next action.
+predict = tf.argmax(y, 1)
 
 ## Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values.
 nextQ = tf.placeholder(shape=[1, 4], dtype=tf.float32)
@@ -103,7 +100,5 @@ def to_one_hot(i, n_classes=None):
 
         ## Note that, the rewards here with random action
         running_reward = rAll if running_reward is None else running_reward * 0.99 + rAll * 0.01
-        print(
-            "Episode [%d/%d] sum reward:%f running reward:%f took:%.5fs %s" %
-            (i, num_episodes, rAll, running_reward, time.time() - episode_time, '' if rAll == 0 else ' !!!!!!!!')
-        )
+        print("Episode [%d/%d] sum reward:%f running reward:%f took:%.5fs %s" % \
+            (i, num_episodes, rAll, running_reward, time.time() - episode_time, '' if rAll == 0 else ' !!!!!!!!'))
diff --git a/example/tutorial_frozenlake_q_table.py b/example/tutorial_frozenlake_q_table.py
index f63127e76..e2b880728 100644
--- a/example/tutorial_frozenlake_q_table.py
+++ b/example/tutorial_frozenlake_q_table.py
@@ -14,9 +14,7 @@
 """
 
 import time
-
 import numpy as np
-
 import gym
 
 ## Load the environment
@@ -52,9 +50,7 @@
             break
     rList.append(rAll)
     running_reward = r if running_reward is None else running_reward * 0.99 + r * 0.01
-    print(
-        "Episode [%d/%d] sum reward:%f running reward:%f took:%.5fs %s" %
-        (i, num_episodes, rAll, running_reward, time.time() - episode_time, '' if rAll == 0 else ' !!!!!!!!')
-    )
+    print("Episode [%d/%d] sum reward:%f running reward:%f took:%.5fs %s" % \
+        (i, num_episodes, rAll, running_reward, time.time() - episode_time, '' if rAll == 0 else ' !!!!!!!!'))
 
 print("Final Q-Table Values:/n %s" % Q)
diff --git a/example/tutorial_generate_text.py b/example/tutorial_generate_text.py
index 7ff5a5c7c..6d4ce1a61 100644
--- a/example/tutorial_generate_text.py
+++ b/example/tutorial_generate_text.py
@@ -24,11 +24,9 @@
 
 import re
 import time
-
+import nltk
 import numpy as np
 import tensorflow as tf
-
-import nltk
 import tensorlayer as tl
 from tensorlayer.layers import *
 
@@ -155,9 +153,7 @@ def main_restore_embedding_layer():
 
     x = tf.placeholder(tf.int32, shape=[batch_size])
 
-    emb_net = tl.layers.EmbeddingInputlayer(
-        inputs=x, vocabulary_size=vocabulary_size, embedding_size=embedding_size, name='embedding_layer'
-    )
+    emb_net = tl.layers.EmbeddingInputlayer(x, vocabulary_size, embedding_size, name='emb')
 
     # sess.run(tf.initialize_all_variables())
     tl.layers.initialize_global_variables(sess)
@@ -233,20 +229,13 @@ def inference(x, is_train, sequence_length, reuse=None):
         print("\nsequence_length: %d, is_train: %s, reuse: %s" % (sequence_length, is_train, reuse))
         rnn_init = tf.random_uniform_initializer(-init_scale, init_scale)
         with tf.variable_scope("model", reuse=reuse):
-            network = EmbeddingInputlayer(
-                inputs=x, vocabulary_size=vocab_size, embedding_size=hidden_size, E_init=rnn_init, name='embedding'
-            )
-            network = RNNLayer(
-                network, cell_fn=tf.contrib.rnn.BasicLSTMCell, cell_init_args={
-                    'forget_bias': 0.0,
-                    'state_is_tuple': True
-                }, n_hidden=hidden_size, initializer=rnn_init, n_steps=sequence_length, return_last=False,
-                return_seq_2d=True, name='lstm1'
-            )
+            network = EmbeddingInputlayer(x, vocab_size, hidden_size, rnn_init, name='embedding')
+            network = RNNLayer(network, cell_fn=tf.contrib.rnn.BasicLSTMCell, \
+                cell_init_args={'forget_bias': 0.0, 'state_is_tuple': True}, \
+                n_hidden=hidden_size, initializer=rnn_init, n_steps=sequence_length, return_last=False,
+                return_seq_2d=True, name='lstm1')
             lstm1 = network
-            network = DenseLayer(
-                network, n_units=vocab_size, W_init=rnn_init, b_init=rnn_init, act=tf.identity, name='output'
-            )
+            network = DenseLayer(network, vocab_size, W_init=rnn_init, b_init=rnn_init, act=tf.identity, name='output')
         return network, lstm1
 
     # Inference for Training
@@ -308,21 +297,14 @@ def loss_fn(outputs, targets, batch_size, sequence_length):
         ## reset all states at the begining of every epoch
         state1 = tl.layers.initialize_rnn_state(lstm1.initial_state)
         for step, (x, y) in enumerate(tl.iterate.ptb_iterator(train_data, batch_size, sequence_length)):
-            _cost, state1, _ = sess.run(
-                [cost, lstm1.final_state, train_op], feed_dict={
-                    input_data: x,
-                    targets: y,
-                    lstm1.initial_state: state1,
-                }
-            )
+            _cost, state1, _ = sess.run([cost, lstm1.final_state, train_op], \
+                feed_dict={input_data: x, targets: y, lstm1.initial_state: state1})
             costs += _cost
             iters += sequence_length
 
             if step % (epoch_size // 10) == 1:
-                print(
-                    "%.3f perplexity: %.3f speed: %.0f wps" %
-                    (step * 1.0 / epoch_size, np.exp(costs / iters), iters * batch_size / (time.time() - start_time))
-                )
+                print("%.3f perplexity: %.3f speed: %.0f wps" % \
+                    (step * 1.0 / epoch_size, np.exp(costs / iters), iters * batch_size / (time.time() - start_time)))
         train_perplexity = np.exp(costs / iters)
         # print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
         print("Epoch: %d/%d Train Perplexity: %.3f" % (i + 1, max_max_epoch, train_perplexity))
@@ -337,24 +319,14 @@ def loss_fn(outputs, targets, batch_size, sequence_length):
             # feed the seed to initialize the state for generation.
             for ids in outs_id[:-1]:
                 a_id = np.asarray(ids).reshape(1, 1)
-                state1 = sess.run(
-                    [
-                        lstm1_test.final_state,
-                    ], feed_dict={
-                        input_data_test: a_id,
-                        lstm1_test.initial_state: state1,
-                    }
-                )
+                state1 = sess.run([lstm1_test.final_state], \
+                    feed_dict={input_data_test: a_id, lstm1_test.initial_state: state1})
             # feed the last word in seed, and start to generate sentence.
             a_id = outs_id[-1]
             for _ in range(print_length):
                 a_id = np.asarray(a_id).reshape(1, 1)
-                out, state1 = sess.run(
-                    [y_soft, lstm1_test.final_state], feed_dict={
-                        input_data_test: a_id,
-                        lstm1_test.initial_state: state1,
-                    }
-                )
+                out, state1 = sess.run([y_soft, lstm1_test.final_state], \
+                    feed_dict={input_data_test: a_id, lstm1_test.initial_state: state1})
                 ## Without sampling
                 # a_id = np.argmax(out[0])
                 ## Sample from all words, if vocab_size is large,
diff --git a/example/tutorial_image_preprocess.py b/example/tutorial_image_preprocess.py
index 99f1c27dc..2ea016c77 100755
--- a/example/tutorial_image_preprocess.py
+++ b/example/tutorial_image_preprocess.py
@@ -6,7 +6,6 @@
 """
 
 import time
-
 import tensorlayer as tl
 
 X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False)
diff --git a/example/tutorial_inceptionV3_tfslim.py b/example/tutorial_inceptionV3_tfslim.py
index 69a45769c..377e0addb 100644
--- a/example/tutorial_inceptionV3_tfslim.py
+++ b/example/tutorial_inceptionV3_tfslim.py
@@ -23,7 +23,6 @@
 
 import os
 import time
-
 import numpy as np
 # from tensorflow.contrib.slim.python.slim.nets.resnet_v2 import resnet_v2_152
 # from tensorflow.contrib.slim.python.slim.nets.vgg import vgg_16
@@ -125,7 +124,7 @@ def print_prob(prob):
 saver = tf.train.Saver()
 if not os.path.isfile("inception_v3.ckpt"):
     raise Exception(
-        "Please download inception_v3 ckpt from : https://github.com/tensorflow/models/tree/master/research/slim"
+        "Please download inception_v3 ckpt from https://github.com/tensorflow/models/tree/master/research/slim"
     )
 
 try:  # TF12+
@@ -136,9 +135,8 @@ def print_prob(prob):
 
 y = network.outputs
 probs = tf.nn.softmax(y)
-img1 = load_image(
-    "data/puzzle.jpeg"
-)  # test data in github: https://github.com/zsdonghao/tensorlayer/tree/master/example/data
+# test data in github: https://github.com/zsdonghao/tensorlayer/tree/master/example/data
+img1 = load_image("data/puzzle.jpeg")
 img1 = img1.reshape((1, 299, 299, 3))
 
 prob = sess.run(probs, feed_dict={x: img1})  # the 1st time need time to compile
@@ -149,5 +147,3 @@ def print_prob(prob):
 
 ## You can save the model into npz file
 # tl.files.save_npz(network.all_params, name='model_inceptionV3.npz')
-
-#
diff --git a/example/tutorial_mnist.py b/example/tutorial_mnist.py
index 174068074..f7c915ecc 100644
--- a/example/tutorial_mnist.py
+++ b/example/tutorial_mnist.py
@@ -12,9 +12,7 @@
 """
 
 import time
-
 import tensorflow as tf
-
 import tensorlayer as tl
 
 
@@ -40,38 +38,34 @@ def main_test_layers(model='relu'):
     # to speed up computation, so we use identity in the last layer.
     # see tf.nn.sparse_softmax_cross_entropy_with_logits()
     if model == 'relu':
-        network = tl.layers.InputLayer(x, name='input')
-        network = tl.layers.DropoutLayer(network, keep=0.8, name='drop1')
-        network = tl.layers.DenseLayer(network, n_units=800, act=tf.nn.relu, name='relu1')
-        network = tl.layers.DropoutLayer(network, keep=0.5, name='drop2')
-        network = tl.layers.DenseLayer(network, n_units=800, act=tf.nn.relu, name='relu2')
-        network = tl.layers.DropoutLayer(network, keep=0.5, name='drop3')
-        network = tl.layers.DenseLayer(network, n_units=10, act=tf.identity, name='output')
+        net = tl.layers.InputLayer(x, name='input')
+        net = tl.layers.DropoutLayer(net, keep=0.8, name='drop1')
+        net = tl.layers.DenseLayer(net, n_units=800, act=tf.nn.relu, name='relu1')
+        net = tl.layers.DropoutLayer(net, keep=0.5, name='drop2')
+        net = tl.layers.DenseLayer(net, n_units=800, act=tf.nn.relu, name='relu2')
+        net = tl.layers.DropoutLayer(net, keep=0.5, name='drop3')
+        net = tl.layers.DenseLayer(net, n_units=10, act=tf.identity, name='output')
     elif model == 'dropconnect':
-        network = tl.layers.InputLayer(x, name='input')
-        network = tl.layers.DropconnectDenseLayer(
-            network, keep=0.8, n_units=800, act=tf.nn.relu, name='dropconnect_relu1'
-        )
-        network = tl.layers.DropconnectDenseLayer(
-            network, keep=0.5, n_units=800, act=tf.nn.relu, name='dropconnect_relu2'
-        )
-        network = tl.layers.DropconnectDenseLayer(network, keep=0.5, n_units=10, act=tf.identity, name='output')
+        net = tl.layers.InputLayer(x, name='input')
+        net = tl.layers.DropconnectDenseLayer(net, keep=0.8, n_units=800, act=tf.nn.relu, name='dropconnect1')
+        net = tl.layers.DropconnectDenseLayer(net, keep=0.5, n_units=800, act=tf.nn.relu, name='dropconnect2')
+        net = tl.layers.DropconnectDenseLayer(net, keep=0.5, n_units=10, act=tf.identity, name='output')
 
     # To print all attributes of a Layer.
-    # attrs = vars(network)
+    # attrs = vars(net)
     # print(', '.join("%s: %s\n" % item for item in attrs.items()))
-    # print(network.all_drop)     # {'drop1': 0.8, 'drop2': 0.5, 'drop3': 0.5}
+    # print(net.all_drop)     # {'drop1': 0.8, 'drop2': 0.5, 'drop3': 0.5}
 
-    y = network.outputs
+    y = net.outputs
     cost = tl.cost.cross_entropy(y, y_, name='xentropy')
     correct_prediction = tf.equal(tf.argmax(y, 1), y_)
     acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
     # y_op = tf.argmax(tf.nn.softmax(y), 1)
 
     # You can add more penalty to the cost function as follow.
-    # cost = cost + tl.cost.maxnorm_regularizer(1.0)(network.all_params[0]) + tl.cost.maxnorm_regularizer(1.0)(network.all_params[2])
-    # cost = cost + tl.cost.lo_regularizer(0.0001)(network.all_params[0]) + tl.cost.lo_regularizer(0.0001)(network.all_params[2])
-    # cost = cost + tl.cost.maxnorm_o_regularizer(0.001)(network.all_params[0]) + tl.cost.maxnorm_o_regularizer(0.001)(network.all_params[2])
+    # cost = cost + tl.cost.maxnorm_regularizer(1.0)(net.all_params[0]) + tl.cost.maxnorm_regularizer(1.0)(net.all_params[2])
+    # cost = cost + tl.cost.lo_regularizer(0.0001)(net.all_params[0]) + tl.cost.lo_regularizer(0.0001)(net.all_params[2])
+    # cost = cost + tl.cost.maxnorm_o_regularizer(0.001)(net.all_params[0]) + tl.cost.maxnorm_o_regularizer(0.001)(net.all_params[2])
 
     # train
     n_epoch = 100
@@ -82,8 +76,8 @@ def main_test_layers(model='relu'):
 
     tl.layers.initialize_global_variables(sess)
 
-    network.print_params()
-    network.print_layers()
+    net.print_params()
+    net.print_layers()
 
     print('   learning_rate: %f' % learning_rate)
     print('   batch_size: %d' % batch_size)
@@ -92,7 +86,7 @@ def main_test_layers(model='relu'):
         start_time = time.time()
         for X_train_a, y_train_a in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=True):
             feed_dict = {x: X_train_a, y_: y_train_a}
-            feed_dict.update(network.all_drop)  # enable dropout or dropconnect layers
+            feed_dict.update(net.all_drop)  # enable dropout or dropconnect layers
             sess.run(train_op, feed_dict=feed_dict)
 
             # The optional feed_dict argument allows the caller to override the value of tensors in the graph. Each key in feed_dict can be one of the following types:
@@ -103,7 +97,7 @@ def main_test_layers(model='relu'):
             print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time))
             train_loss, train_acc, n_batch = 0, 0, 0
             for X_train_a, y_train_a in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=True):
-                dp_dict = tl.utils.dict_to_one(network.all_drop)  # disable noise layers
+                dp_dict = tl.utils.dict_to_one(net.all_drop)  # disable noise layers
                 feed_dict = {x: X_train_a, y_: y_train_a}
                 feed_dict.update(dp_dict)
                 err, ac = sess.run([cost, acc], feed_dict=feed_dict)
@@ -114,7 +108,7 @@ def main_test_layers(model='relu'):
             # print("   train acc: %f" % (train_acc/ n_batch))
             val_loss, val_acc, n_batch = 0, 0, 0
             for X_val_a, y_val_a in tl.iterate.minibatches(X_val, y_val, batch_size, shuffle=True):
-                dp_dict = tl.utils.dict_to_one(network.all_drop)  # disable noise layers
+                dp_dict = tl.utils.dict_to_one(net.all_drop)  # disable noise layers
                 feed_dict = {x: X_val_a, y_: y_val_a}
                 feed_dict.update(dp_dict)
                 err, ac = sess.run([cost, acc], feed_dict=feed_dict)
@@ -125,16 +119,16 @@ def main_test_layers(model='relu'):
             print("   val acc: %f" % (val_acc / n_batch))
             # try:
             #     # You can visualize the weight of 1st hidden layer as follow.
-            #     tl.vis.draw_weights(network.all_params[0].eval(), second=10, saveable=True, shape=[28, 28], name='w1_' + str(epoch + 1), fig_idx=2012)
+            #     tl.vis.draw_weights(net.all_params[0].eval(), second=10, saveable=True, shape=[28, 28], name='w1_' + str(epoch + 1), fig_idx=2012)
             #     # You can also save the weight of 1st hidden layer to .npz file.
-            #     # tl.files.save_npz([network.all_params[0]] , name='w1'+str(epoch+1)+'.npz')
+            #     # tl.files.save_npz([net.all_params[0]] , name='w1'+str(epoch+1)+'.npz')
             # except:  # pylint: disable=bare-except
             #     print("You should change vis.draw_weights(), if you want to save the feature images for different dataset")
 
     print('Evaluation')
     test_loss, test_acc, n_batch = 0, 0, 0
     for X_test_a, y_test_a in tl.iterate.minibatches(X_test, y_test, batch_size, shuffle=True):
-        dp_dict = tl.utils.dict_to_one(network.all_drop)  # disable noise layers
+        dp_dict = tl.utils.dict_to_one(net.all_drop)  # disable noise layers
         feed_dict = {x: X_test_a, y_: y_test_a}
         feed_dict.update(dp_dict)
         err, ac = sess.run([cost, acc], feed_dict=feed_dict)
@@ -150,12 +144,12 @@ def main_test_layers(model='relu'):
     print("Model saved in file: %s" % save_path)
 
     # You can also save the parameters into .npz file.
-    tl.files.save_npz(network.all_params, name='model.npz')
+    tl.files.save_npz(net.all_params, name='model.npz')
     # You can only save one parameter as follow.
-    # tl.files.save_npz([network.all_params[0]] , name='model.npz')
+    # tl.files.save_npz([net.all_params[0]] , name='model.npz')
     # Then, restore the parameters as follow.
     # load_params = tl.files.load_npz(path='', name='model.npz')
-    # tl.files.assign_params(sess, load_params, network)
+    # tl.files.assign_params(sess, load_params, net)
 
     # In the end, close TensorFlow session.
     sess.close()
@@ -170,32 +164,30 @@ def main_test_denoise_AE(model='relu'):
     # placeholder
     x = tf.placeholder(tf.float32, shape=[None, 784], name='x')
 
-    print("Build Network")
+    print("Build net")
     if model == 'relu':
-        network = tl.layers.InputLayer(x, name='input')
-        network = tl.layers.DropoutLayer(network, keep=0.5, name='denoising1')  # if drop some inputs, it is denoise AE
-        network = tl.layers.DenseLayer(network, n_units=196, act=tf.nn.relu, name='relu1')
-        recon_layer1 = tl.layers.ReconLayer(network, x_recon=x, n_units=784, act=tf.nn.softplus, name='recon_layer1')
+        net = tl.layers.InputLayer(x, name='input')
+        net = tl.layers.DropoutLayer(net, keep=0.5, name='denoising1')  # if drop some inputs, it is denoise AE
+        net = tl.layers.DenseLayer(net, n_units=196, act=tf.nn.relu, name='relu1')
+        recon_layer1 = tl.layers.ReconLayer(net, x_recon=x, n_units=784, act=tf.nn.softplus, name='recon_layer1')
     elif model == 'sigmoid':
         # sigmoid - set keep to 1.0, if you want a vanilla Autoencoder
-        network = tl.layers.InputLayer(x, name='input')
-        network = tl.layers.DropoutLayer(network, keep=0.5, name='denoising1')
-        network = tl.layers.DenseLayer(network, n_units=196, act=tf.nn.sigmoid, name='sigmoid1')
-        recon_layer1 = tl.layers.ReconLayer(network, x_recon=x, n_units=784, act=tf.nn.sigmoid, name='recon_layer1')
+        net = tl.layers.InputLayer(x, name='input')
+        net = tl.layers.DropoutLayer(net, keep=0.5, name='denoising1')
+        net = tl.layers.DenseLayer(net, n_units=196, act=tf.nn.sigmoid, name='sigmoid1')
+        recon_layer1 = tl.layers.ReconLayer(net, x_recon=x, n_units=784, act=tf.nn.sigmoid, name='recon_layer1')
 
     ## ready to train
     tl.layers.initialize_global_variables(sess)
 
     ## print all params
-    print("All Network Params")
-    network.print_params()
+    print("All net Params")
+    net.print_params()
 
     ## pretrain
     print("Pre-train Layer 1")
-    recon_layer1.pretrain(
-        sess, x=x, X_train=X_train, X_val=X_val, denoise_name='denoising1', n_epoch=200, batch_size=128, print_freq=10,
-        save=True, save_name='w1pre_'
-    )
+    recon_layer1.pretrain(sess, x=x, X_train=X_train, X_val=X_val, denoise_name='denoising1', \
+        n_epoch=200, batch_size=128, print_freq=10, save=True, save_name='w1pre_')
     # You can also disable denoisong by setting denoise_name=None.
     # recon_layer1.pretrain(sess, x=x, X_train=X_train, X_val=X_val,
     #                           denoise_name=None, n_epoch=500, batch_size=128,
@@ -226,26 +218,26 @@ def main_test_stacked_denoise_AE(model='relu'):
         act = tf.nn.sigmoid
         act_recon = act
 
-    # Define network
-    print("\nBuild Network")
-    network = tl.layers.InputLayer(x, name='input')
+    # Define net
+    print("\nBuild net")
+    net = tl.layers.InputLayer(x, name='input')
     # denoise layer for AE
-    network = tl.layers.DropoutLayer(network, keep=0.5, name='denoising1')
+    net = tl.layers.DropoutLayer(net, keep=0.5, name='denoising1')
     # 1st layer
-    network = tl.layers.DropoutLayer(network, keep=0.8, name='drop1')
-    network = tl.layers.DenseLayer(network, n_units=800, act=act, name=model + '1')
-    x_recon1 = network.outputs
-    recon_layer1 = tl.layers.ReconLayer(network, x_recon=x, n_units=784, act=act_recon, name='recon_layer1')
+    net = tl.layers.DropoutLayer(net, keep=0.8, name='drop1')
+    net = tl.layers.DenseLayer(net, n_units=800, act=act, name=model + '1')
+    x_recon1 = net.outputs
+    recon_layer1 = tl.layers.ReconLayer(net, x_recon=x, n_units=784, act=act_recon, name='recon_layer1')
     # 2nd layer
-    network = tl.layers.DropoutLayer(network, keep=0.5, name='drop2')
-    network = tl.layers.DenseLayer(network, n_units=800, act=act, name=model + '2')
-    recon_layer2 = tl.layers.ReconLayer(network, x_recon=x_recon1, n_units=800, act=act_recon, name='recon_layer2')
+    net = tl.layers.DropoutLayer(net, keep=0.5, name='drop2')
+    net = tl.layers.DenseLayer(net, n_units=800, act=act, name=model + '2')
+    recon_layer2 = tl.layers.ReconLayer(net, x_recon=x_recon1, n_units=800, act=act_recon, name='recon_layer2')
     # 3rd layer
-    network = tl.layers.DropoutLayer(network, keep=0.5, name='drop3')
-    network = tl.layers.DenseLayer(network, 10, act=tf.identity, name='output')
+    net = tl.layers.DropoutLayer(net, keep=0.5, name='drop3')
+    net = tl.layers.DenseLayer(net, 10, act=tf.identity, name='output')
 
     # Define fine-tune process
-    y = network.outputs
+    y = net.outputs
     cost = tl.cost.cross_entropy(y, y_, name='cost')
 
     n_epoch = 200
@@ -253,7 +245,7 @@ def main_test_stacked_denoise_AE(model='relu'):
     learning_rate = 0.0001
     print_freq = 10
 
-    train_params = network.all_params
+    train_params = net.all_params
 
     # train_op = tf.train.GradientDescentOptimizer(0.5).minimize(cost)
     train_op = tf.train.AdamOptimizer(learning_rate).minimize(cost, var_list=train_params)
@@ -262,23 +254,19 @@ def main_test_stacked_denoise_AE(model='relu'):
     tl.layers.initialize_global_variables(sess)
 
     # Pre-train
-    print("\nAll Network Params before pre-train")
-    network.print_params()
+    print("\nAll net Params before pre-train")
+    net.print_params()
     print("\nPre-train Layer 1")
-    recon_layer1.pretrain(
-        sess, x=x, X_train=X_train, X_val=X_val, denoise_name='denoising1', n_epoch=100, batch_size=128, print_freq=10,
-        save=True, save_name='w1pre_'
-    )
+    recon_layer1.pretrain(sess, x=x, X_train=X_train, X_val=X_val, denoise_name='denoising1', \
+        n_epoch=100, batch_size=128, print_freq=10, save=True, save_name='w1pre_')
     print("\nPre-train Layer 2")
-    recon_layer2.pretrain(
-        sess, x=x, X_train=X_train, X_val=X_val, denoise_name='denoising1', n_epoch=100, batch_size=128, print_freq=10,
-        save=False
-    )
-    print("\nAll Network Params after pre-train")
-    network.print_params()
+    recon_layer2.pretrain(sess, x=x, X_train=X_train, X_val=X_val, denoise_name='denoising1', \
+        n_epoch=100, batch_size=128, print_freq=10, save=False)
+    print("\nAll net Params after pre-train")
+    net.print_params()
 
     # Fine-tune
-    print("\nFine-tune Network")
+    print("\nFine-tune net")
     correct_prediction = tf.equal(tf.argmax(y, 1), y_)
     acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 
@@ -289,7 +277,7 @@ def main_test_stacked_denoise_AE(model='relu'):
         start_time = time.time()
         for X_train_a, y_train_a in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=True):
             feed_dict = {x: X_train_a, y_: y_train_a}
-            feed_dict.update(network.all_drop)  # enable noise layers
+            feed_dict.update(net.all_drop)  # enable noise layers
             feed_dict[tl.layers.LayersConfig.set_keep['denoising1']] = 1  # disable denoising layer
             sess.run(train_op, feed_dict=feed_dict)
 
@@ -297,7 +285,7 @@ def main_test_stacked_denoise_AE(model='relu'):
             print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time))
             train_loss, train_acc, n_batch = 0, 0, 0
             for X_train_a, y_train_a in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=True):
-                dp_dict = tl.utils.dict_to_one(network.all_drop)  # disable noise layers
+                dp_dict = tl.utils.dict_to_one(net.all_drop)  # disable noise layers
                 feed_dict = {x: X_train_a, y_: y_train_a}
                 feed_dict.update(dp_dict)
                 err, ac = sess.run([cost, acc], feed_dict=feed_dict)
@@ -308,7 +296,7 @@ def main_test_stacked_denoise_AE(model='relu'):
             print("   train acc: %f" % (train_acc / n_batch))
             val_loss, val_acc, n_batch = 0, 0, 0
             for X_val_a, y_val_a in tl.iterate.minibatches(X_val, y_val, batch_size, shuffle=True):
-                dp_dict = tl.utils.dict_to_one(network.all_drop)  # disable noise layers
+                dp_dict = tl.utils.dict_to_one(net.all_drop)  # disable noise layers
                 feed_dict = {x: X_val_a, y_: y_val_a}
                 feed_dict.update(dp_dict)
                 err, ac = sess.run([cost, acc], feed_dict=feed_dict)
@@ -319,14 +307,14 @@ def main_test_stacked_denoise_AE(model='relu'):
             print("   val acc: %f" % (val_acc / n_batch))
             # try:
             #     # visualize the 1st hidden layer during fine-tune
-            #     tl.vis.draw_weights(network.all_params[0].eval(), second=10, saveable=True, shape=[28, 28], name='w1_' + str(epoch + 1), fig_idx=2012)
+            #     tl.vis.draw_weights(net.all_params[0].eval(), second=10, saveable=True, shape=[28, 28], name='w1_' + str(epoch + 1), fig_idx=2012)
             # except:  # pylint: disable=bare-except
             #     print("You should change vis.draw_weights(), if you want to save the feature images for different dataset")
 
     print('Evaluation')
     test_loss, test_acc, n_batch = 0, 0, 0
     for X_test_a, y_test_a in tl.iterate.minibatches(X_test, y_test, batch_size, shuffle=True):
-        dp_dict = tl.utils.dict_to_one(network.all_drop)  # disable noise layers
+        dp_dict = tl.utils.dict_to_one(net.all_drop)  # disable noise layers
         feed_dict = {x: X_test_a, y_: y_test_a}
         feed_dict.update(dp_dict)
         err, ac = sess.run([cost, acc], feed_dict=feed_dict)
@@ -371,45 +359,45 @@ def main_test_cnn_layer():
     x = tf.placeholder(tf.float32, shape=[batch_size, 28, 28, 1])  # [batch_size, height, width, channels]
     y_ = tf.placeholder(tf.int64, shape=[batch_size])
 
-    network = tl.layers.InputLayer(x, name='input')
+    net = tl.layers.InputLayer(x, name='input')
     ## Professional conv API for tensorflow expert
-    # network = tl.layers.Conv2dLayer(network,
+    # net = tl.layers.Conv2dLayer(net,
     #                     act = tf.nn.relu,
     #                     shape = [5, 5, 1, 32],  # 32 features for each 5x5 patch
     #                     strides=[1, 1, 1, 1],
     #                     padding='SAME',
     #                     name ='cnn1')     # output: (?, 28, 28, 32)
-    # network = tl.layers.PoolLayer(network,
+    # net = tl.layers.PoolLayer(net,
     #                     ksize=[1, 2, 2, 1],
     #                     strides=[1, 2, 2, 1],
     #                     padding='SAME',
     #                     pool = tf.nn.max_pool,
     #                     name ='pool1',)   # output: (?, 14, 14, 32)
-    # network = tl.layers.Conv2dLayer(network,
+    # net = tl.layers.Conv2dLayer(net,
     #                     act = tf.nn.relu,
     #                     shape = [5, 5, 32, 64], # 64 features for each 5x5 patch
     #                     strides=[1, 1, 1, 1],
     #                     padding='SAME',
     #                     name ='cnn2')     # output: (?, 14, 14, 64)
-    # network = tl.layers.PoolLayer(network,
+    # net = tl.layers.PoolLayer(net,
     #                     ksize=[1, 2, 2, 1],
     #                     strides=[1, 2, 2, 1],
     #                     padding='SAME',
     #                     pool = tf.nn.max_pool,
     #                     name ='pool2',)   # output: (?, 7, 7, 64)
     ## Simplified conv API (the same with the above layers)
-    network = tl.layers.Conv2d(network, 32, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', name='cnn1')
-    network = tl.layers.MaxPool2d(network, (2, 2), (2, 2), padding='SAME', name='pool1')
-    network = tl.layers.Conv2d(network, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', name='cnn2')
-    network = tl.layers.MaxPool2d(network, (2, 2), (2, 2), padding='SAME', name='pool2')
+    net = tl.layers.Conv2d(net, 32, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', name='cnn1')
+    net = tl.layers.MaxPool2d(net, (2, 2), (2, 2), padding='SAME', name='pool1')
+    net = tl.layers.Conv2d(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', name='cnn2')
+    net = tl.layers.MaxPool2d(net, (2, 2), (2, 2), padding='SAME', name='pool2')
     ## end of conv
-    network = tl.layers.FlattenLayer(network, name='flatten')
-    network = tl.layers.DropoutLayer(network, keep=0.5, name='drop1')
-    network = tl.layers.DenseLayer(network, 256, act=tf.nn.relu, name='relu1')
-    network = tl.layers.DropoutLayer(network, keep=0.5, name='drop2')
-    network = tl.layers.DenseLayer(network, 10, act=tf.identity, name='output')
+    net = tl.layers.FlattenLayer(net, name='flatten')
+    net = tl.layers.DropoutLayer(net, keep=0.5, name='drop1')
+    net = tl.layers.DenseLayer(net, 256, act=tf.nn.relu, name='relu1')
+    net = tl.layers.DropoutLayer(net, keep=0.5, name='drop2')
+    net = tl.layers.DenseLayer(net, 10, act=tf.identity, name='output')
 
-    y = network.outputs
+    y = net.outputs
 
     cost = tl.cost.cross_entropy(y, y_, 'cost')
 
@@ -421,12 +409,12 @@ def main_test_cnn_layer():
     learning_rate = 0.0001
     print_freq = 10
 
-    train_params = network.all_params
+    train_params = net.all_params
     train_op = tf.train.AdamOptimizer(learning_rate).minimize(cost, var_list=train_params)
 
     tl.layers.initialize_global_variables(sess)
-    network.print_params()
-    network.print_layers()
+    net.print_params()
+    net.print_layers()
 
     print('   learning_rate: %f' % learning_rate)
     print('   batch_size: %d' % batch_size)
@@ -435,14 +423,14 @@ def main_test_cnn_layer():
         start_time = time.time()
         for X_train_a, y_train_a in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=True):
             feed_dict = {x: X_train_a, y_: y_train_a}
-            feed_dict.update(network.all_drop)  # enable noise layers
+            feed_dict.update(net.all_drop)  # enable noise layers
             sess.run(train_op, feed_dict=feed_dict)
 
         if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
             print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time))
             train_loss, train_acc, n_batch = 0, 0, 0
             for X_train_a, y_train_a in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=True):
-                dp_dict = tl.utils.dict_to_one(network.all_drop)  # disable noise layers
+                dp_dict = tl.utils.dict_to_one(net.all_drop)  # disable noise layers
                 feed_dict = {x: X_train_a, y_: y_train_a}
                 feed_dict.update(dp_dict)
                 err, ac = sess.run([cost, acc], feed_dict=feed_dict)
@@ -453,7 +441,7 @@ def main_test_cnn_layer():
             print("   train acc: %f" % (train_acc / n_batch))
             val_loss, val_acc, n_batch = 0, 0, 0
             for X_val_a, y_val_a in tl.iterate.minibatches(X_val, y_val, batch_size, shuffle=True):
-                dp_dict = tl.utils.dict_to_one(network.all_drop)  # disable noise layers
+                dp_dict = tl.utils.dict_to_one(net.all_drop)  # disable noise layers
                 feed_dict = {x: X_val_a, y_: y_val_a}
                 feed_dict.update(dp_dict)
                 err, ac = sess.run([cost, acc], feed_dict=feed_dict)
@@ -463,14 +451,14 @@ def main_test_cnn_layer():
             print("   val loss: %f" % (val_loss / n_batch))
             print("   val acc: %f" % (val_acc / n_batch))
             # try:
-            #     tl.vis.CNN2d(network.all_params[0].eval(), second=10, saveable=True, name='cnn1_' + str(epoch + 1), fig_idx=2012)
+            #     tl.vis.CNN2d(net.all_params[0].eval(), second=10, saveable=True, name='cnn1_' + str(epoch + 1), fig_idx=2012)
             # except:  # pylint: disable=bare-except
             #     print("You should change vis.CNN(), if you want to save the feature images for different dataset")
 
     print('Evaluation')
     test_loss, test_acc, n_batch = 0, 0, 0
     for X_test_a, y_test_a in tl.iterate.minibatches(X_test, y_test, batch_size, shuffle=True):
-        dp_dict = tl.utils.dict_to_one(network.all_drop)  # disable noise layers
+        dp_dict = tl.utils.dict_to_one(net.all_drop)  # disable noise layers
         feed_dict = {x: X_test_a, y_: y_test_a}
         feed_dict.update(dp_dict)
         err, ac = sess.run([cost, acc], feed_dict=feed_dict)
diff --git a/example/tutorial_mnist_distributed.py b/example/tutorial_mnist_distributed.py
index d76f90daf..befa9ac41 100644
--- a/example/tutorial_mnist_distributed.py
+++ b/example/tutorial_mnist_distributed.py
@@ -69,10 +69,9 @@
         #tl.layers.initialize_global_variables(sess)
 
         # train the network
-        tl.utils.fit(
-            sess, network, train_op, cost, X_train, y_train, x, y_, acc=acc, batch_size=500, n_epoch=500,
-            print_freq=print_freq, X_val=X_val, y_val=y_val, eval_train=eval_train
-        )
+        tl.utils.fit(sess, network, train_op, cost, X_train, y_train, x, y_, \
+            acc=acc, batch_size=500, n_epoch=500, print_freq=print_freq, \
+            X_val=X_val, y_val=y_val, eval_train=eval_train)
 
         if task_spec.is_master():
             # evaluation
diff --git a/example/tutorial_mnist_float16.py b/example/tutorial_mnist_float16.py
index b7b5d66cc..307a4f724 100644
--- a/example/tutorial_mnist_float16.py
+++ b/example/tutorial_mnist_float16.py
@@ -56,14 +56,10 @@ def model(x, is_train=True, reuse=False):
 
 # define the optimizer
 train_params = tl.layers.get_variables_with_name('model', train_only=True, printable=False)
-train_op = tf.train.AdamOptimizer(
-    learning_rate=0.0001,
-    beta1=0.9,
-    beta2=0.999,
-    # epsilon=1e-08,    # for float32 as default
-    epsilon=1e-4,  # for float16, see https://stackoverflow.com/questions/42064941/tensorflow-float16-support-is-broken
-    use_locking=False
-).minimize(cost, var_list=train_params)
+# for float16 epsilon=1e-4 see https://stackoverflow.com/questions/42064941/tensorflow-float16-support-is-broken
+# for float32 epsilon=1e-08
+train_op = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.9, beta2=0.999, \
+    epsilon=1e-4, use_locking=False).minimize(cost, var_list=train_params)
 
 # initialize all variables in the session
 tl.layers.initialize_global_variables(sess)
diff --git a/example/tutorial_mnist_simple.py b/example/tutorial_mnist_simple.py
index 457774b68..f4d6c3751 100644
--- a/example/tutorial_mnist_simple.py
+++ b/example/tutorial_mnist_simple.py
@@ -44,10 +44,8 @@
 network.print_layers()
 
 # train the network
-tl.utils.fit(
-    sess, network, train_op, cost, X_train, y_train, x, y_, acc=acc, batch_size=500, n_epoch=500, print_freq=5,
-    X_val=X_val, y_val=y_val, eval_train=False
-)
+tl.utils.fit(sess, network, train_op, cost, X_train, y_train, x, y_, acc=acc, batch_size=500, \
+    n_epoch=500, print_freq=5, X_val=X_val, y_val=y_val, eval_train=False)
 
 # evaluation
 tl.utils.test(sess, network, acc, X_test, y_test, x, y_, batch_size=None, cost=cost)
diff --git a/example/tutorial_ptb_lstm.py b/example/tutorial_ptb_lstm.py
index 418bd587a..7da221104 100644
--- a/example/tutorial_ptb_lstm.py
+++ b/example/tutorial_ptb_lstm.py
@@ -192,52 +192,48 @@ def inference(x, is_training, num_steps, reuse=None):
         - For DynamicRNNLayer, you can set dropout and the number of RNN layer internally.
         """
         print("\nnum_steps : %d, is_training : %s, reuse : %s" % (num_steps, is_training, reuse))
-        initializer = tf.random_uniform_initializer(-init_scale, init_scale)
+        init = tf.random_uniform_initializer(-init_scale, init_scale)
         with tf.variable_scope("model", reuse=reuse):
-            network = tl.layers.EmbeddingInputlayer(
-                inputs=x, vocabulary_size=vocab_size, embedding_size=hidden_size, E_init=initializer, name='embedding'
-            )
-            network = tl.layers.DropoutLayer(network, keep=keep_prob, is_fix=True, is_train=is_training, name='drop1')
-            network = tl.layers.RNNLayer(
-                network,
+            net = tl.layers.EmbeddingInputlayer(x, vocab_size, hidden_size, init, name='embedding')
+            net = tl.layers.DropoutLayer(net, keep=keep_prob, is_fix=True, is_train=is_training, name='drop1')
+            net = tl.layers.RNNLayer(
+                net,
                 cell_fn=tf.contrib.rnn.BasicLSTMCell,  #tf.nn.rnn_cell.BasicLSTMCell,
                 cell_init_args={'forget_bias': 0.0},  # 'state_is_tuple': True},
                 n_hidden=hidden_size,
-                initializer=initializer,
+                initializer=init,
                 n_steps=num_steps,
                 return_last=False,
                 name='basic_lstm_layer1'
             )
-            lstm1 = network
-            network = tl.layers.DropoutLayer(network, keep=keep_prob, is_fix=True, is_train=is_training, name='drop2')
-            network = tl.layers.RNNLayer(
-                network,
+            lstm1 = net
+            net = tl.layers.DropoutLayer(net, keep=keep_prob, is_fix=True, is_train=is_training, name='drop2')
+            net = tl.layers.RNNLayer(
+                net,
                 cell_fn=tf.contrib.rnn.BasicLSTMCell,  #tf.nn.rnn_cell.BasicLSTMCell,
                 cell_init_args={'forget_bias': 0.0},  # 'state_is_tuple': True},
                 n_hidden=hidden_size,
-                initializer=initializer,
+                initializer=init,
                 n_steps=num_steps,
                 return_last=False,
                 return_seq_2d=True,
                 name='basic_lstm_layer2'
             )
-            lstm2 = network
+            lstm2 = net
             # Alternatively, if return_seq_2d=False, in the above RNN layer,
             # you can reshape the outputs as follow:
-            # network = tl.layers.ReshapeLayer(network,
-            #       shape=[-1, int(network.outputs._shape[-1])], name='reshape')
-            network = tl.layers.DropoutLayer(network, keep=keep_prob, is_fix=True, is_train=is_training, name='drop3')
-            network = tl.layers.DenseLayer(
-                network, n_units=vocab_size, W_init=initializer, b_init=initializer, act=tf.identity, name='output'
-            )
-        return network, lstm1, lstm2
+            # net = tl.layers.ReshapeLayer(net,
+            #       shape=[-1, int(net.outputs._shape[-1])], name='reshape')
+            net = tl.layers.DropoutLayer(net, keep=keep_prob, is_fix=True, is_train=is_training, name='drop3')
+            net = tl.layers.DenseLayer(net, vocab_size, W_init=init, b_init=init, act=tf.identity, name='output')
+        return net, lstm1, lstm2
 
     # Inference for Training
-    network, lstm1, lstm2 = inference(input_data, is_training=True, num_steps=num_steps, reuse=None)
+    net, lstm1, lstm2 = inference(input_data, is_training=True, num_steps=num_steps, reuse=None)
     # Inference for Validating
-    network_val, lstm1_val, lstm2_val = inference(input_data, is_training=False, num_steps=num_steps, reuse=True)
+    net_val, lstm1_val, lstm2_val = inference(input_data, is_training=False, num_steps=num_steps, reuse=True)
     # Inference for Testing (Evaluation)
-    network_test, lstm1_test, lstm2_test = inference(input_data_test, is_training=False, num_steps=1, reuse=True)
+    net_test, lstm1_test, lstm2_test = inference(input_data_test, is_training=False, num_steps=1, reuse=True)
 
     # sess.run(tf.initialize_all_variables())
     tl.layers.initialize_global_variables(sess)
@@ -259,11 +255,11 @@ def loss_fn(outputs, targets):  #, batch_size, num_steps):
         return cost
 
     # Cost for Training
-    cost = loss_fn(network.outputs, targets)  #, batch_size, num_steps)
+    cost = loss_fn(net.outputs, targets)  #, batch_size, num_steps)
     # Cost for Validating
-    cost_val = loss_fn(network_val.outputs, targets)  #, batch_size, num_steps)
+    cost_val = loss_fn(net_val.outputs, targets)  #, batch_size, num_steps)
     # Cost for Testing (Evaluation)
-    cost_test = loss_fn(network_test.outputs, targets_test)  #, 1, 1)
+    cost_test = loss_fn(net_test.outputs, targets_test)  #, 1, 1)
 
     # Truncated Backpropagation for training
     with tf.variable_scope('learning_rate'):
@@ -276,8 +272,8 @@ def loss_fn(outputs, targets):  #, batch_size, num_steps):
     # sess.run(tf.initialize_all_variables())
     tl.layers.initialize_global_variables(sess)
 
-    network.print_params()
-    network.print_layers()
+    net.print_params()
+    net.print_layers()
     tl.layers.print_all_variables()
 
     print("\nStart learning a language model by using PTB dataset")
@@ -304,7 +300,7 @@ def loss_fn(outputs, targets):  #, batch_size, num_steps):
                 lstm2.initial_state: state2,
             }
             # For training, enable dropout
-            feed_dict.update(network.all_drop)
+            feed_dict.update(net.all_drop)
             _cost, state1, state2, _ = sess.run(
                 [cost, lstm1.final_state, lstm2.final_state, train_op], feed_dict=feed_dict
             )
diff --git a/example/tutorial_ptb_lstm_state_is_tuple.py b/example/tutorial_ptb_lstm_state_is_tuple.py
index 1202a4524..9b649b84d 100644
--- a/example/tutorial_ptb_lstm_state_is_tuple.py
+++ b/example/tutorial_ptb_lstm_state_is_tuple.py
@@ -124,7 +124,7 @@ def main(_):
 
     if FLAGS.model == "small":
         init_scale = 0.1
-        learning_rate = 1.0
+        learning_rate = 1.
         max_grad_norm = 5
         num_steps = 20
         hidden_size = 200
@@ -192,58 +192,54 @@ def inference(x, is_training, num_steps, reuse=None):
         - For DynamicRNNLayer, you can set dropout and the number of RNN layer internally.
         """
         print("\nnum_steps : %d, is_training : %s, reuse : %s" % (num_steps, is_training, reuse))
-        initializer = tf.random_uniform_initializer(-init_scale, init_scale)
+        init = tf.random_uniform_initializer(-init_scale, init_scale)
         with tf.variable_scope("model", reuse=reuse):
-            network = tl.layers.EmbeddingInputlayer(
-                inputs=x, vocabulary_size=vocab_size, embedding_size=hidden_size, E_init=initializer, name='embedding'
-            )
-            network = tl.layers.DropoutLayer(network, keep=keep_prob, is_fix=True, is_train=is_training, name='drop1')
-            network = tl.layers.RNNLayer(
-                network,
+            net = tl.layers.EmbeddingInputlayer(x, vocab_size, hidden_size, init, name='embedding')
+            net = tl.layers.DropoutLayer(net, keep=keep_prob, is_fix=True, is_train=is_training, name='drop1')
+            net = tl.layers.RNNLayer(
+                net,
                 cell_fn=tf.contrib.rnn.BasicLSTMCell,  #tf.nn.rnn_cell.BasicLSTMCell,
                 cell_init_args={
                     'forget_bias': 0.0,
                     'state_is_tuple': True
                 },
                 n_hidden=hidden_size,
-                initializer=initializer,
+                initializer=init,
                 n_steps=num_steps,
                 return_last=False,
                 name='basic_lstm1'
             )
-            lstm1 = network
-            network = tl.layers.DropoutLayer(network, keep=keep_prob, is_fix=True, is_train=is_training, name='drop2')
-            network = tl.layers.RNNLayer(
-                network,
+            lstm1 = net
+            net = tl.layers.DropoutLayer(net, keep=keep_prob, is_fix=True, is_train=is_training, name='drop2')
+            net = tl.layers.RNNLayer(
+                net,
                 cell_fn=tf.contrib.rnn.BasicLSTMCell,  #tf.nn.rnn_cell.BasicLSTMCell,
                 cell_init_args={
                     'forget_bias': 0.0,
                     'state_is_tuple': True
                 },
                 n_hidden=hidden_size,
-                initializer=initializer,
+                initializer=init,
                 n_steps=num_steps,
                 return_last=False,
                 return_seq_2d=True,
                 name='basic_lstm2'
             )
-            lstm2 = network
+            lstm2 = net
             # Alternatively, if return_seq_2d=False, in the above RNN layer,
             # you can reshape the outputs as follow:
-            # network = tl.layers.ReshapeLayer(network,
-            #       shape=[-1, int(network.outputs._shape[-1])], name='reshape')
-            network = tl.layers.DropoutLayer(network, keep=keep_prob, is_fix=True, is_train=is_training, name='drop3')
-            network = tl.layers.DenseLayer(
-                network, n_units=vocab_size, W_init=initializer, b_init=initializer, act=tf.identity, name='output'
-            )
-        return network, lstm1, lstm2
+            # net = tl.layers.ReshapeLayer(net,
+            #       shape=[-1, int(net.outputs._shape[-1])], name='reshape')
+            net = tl.layers.DropoutLayer(net, keep=keep_prob, is_fix=True, is_train=is_training, name='drop3')
+            net = tl.layers.DenseLayer(net, vocab_size, W_init=init, b_init=init, act=tf.identity, name='output')
+        return net, lstm1, lstm2
 
     # Inference for Training
-    network, lstm1, lstm2 = inference(input_data, is_training=True, num_steps=num_steps, reuse=None)
+    net, lstm1, lstm2 = inference(input_data, is_training=True, num_steps=num_steps, reuse=None)
     # Inference for Validating
-    network_val, lstm1_val, lstm2_val = inference(input_data, is_training=False, num_steps=num_steps, reuse=True)
+    net_val, lstm1_val, lstm2_val = inference(input_data, is_training=False, num_steps=num_steps, reuse=True)
     # Inference for Testing (Evaluation)
-    network_test, lstm1_test, lstm2_test = inference(input_data_test, is_training=False, num_steps=1, reuse=True)
+    net_test, lstm1_test, lstm2_test = inference(input_data_test, is_training=False, num_steps=1, reuse=True)
 
     # sess.run(tf.initialize_all_variables())
     tl.layers.initialize_global_variables(sess)
@@ -265,11 +261,11 @@ def loss_fn(outputs, targets, batch_size):
         return cost
 
     # Cost for Training
-    cost = loss_fn(network.outputs, targets, batch_size)
+    cost = loss_fn(net.outputs, targets, batch_size)
     # Cost for Validating
-    cost_val = loss_fn(network_val.outputs, targets, batch_size)
+    cost_val = loss_fn(net_val.outputs, targets, batch_size)
     # Cost for Testing (Evaluation)
-    cost_test = loss_fn(network_test.outputs, targets_test, 1)
+    cost_test = loss_fn(net_test.outputs, targets_test, 1)
 
     # Truncated Backpropagation for training
     with tf.variable_scope('learning_rate'):
@@ -282,8 +278,8 @@ def loss_fn(outputs, targets, batch_size):
     # sess.run(tf.initialize_all_variables())
     tl.layers.initialize_global_variables(sess)
 
-    network.print_params()
-    network.print_layers()
+    net.print_params()
+    net.print_layers()
     tl.layers.print_all_variables()
 
     print("\nStart learning a language model by using PTB dataset")
@@ -312,7 +308,7 @@ def loss_fn(outputs, targets, batch_size):
                 lstm2.initial_state.h: state2[1],
             }
             # For training, enable dropout
-            feed_dict.update(network.all_drop)
+            feed_dict.update(net.all_drop)
             _cost, state1_c, state1_h, state2_c, state2_h, _ = \
                                     sess.run([cost,
                                             lstm1.final_state.c,
diff --git a/example/tutorial_ternaryweight_cifar10_tfrecord.py b/example/tutorial_ternaryweight_cifar10_tfrecord.py
index 1f6f30357..0a5ea3c99 100644
--- a/example/tutorial_ternaryweight_cifar10_tfrecord.py
+++ b/example/tutorial_ternaryweight_cifar10_tfrecord.py
@@ -147,44 +147,27 @@ def read_and_decode(filename, is_train=None):
     # prepare data in cpu
     x_train_, y_train_ = read_and_decode("train.cifar10", True)
     x_test_, y_test_ = read_and_decode("test.cifar10", False)
-
-    x_train_batch, y_train_batch = tf.train.shuffle_batch(
-        [x_train_, y_train_], batch_size=batch_size, capacity=2000, min_after_dequeue=1000, num_threads=32
-    )  # set the number of threads here
+    # set the number of threads here
+    x_train_batch, y_train_batch = tf.train.shuffle_batch([x_train_, y_train_], \
+        batch_size=batch_size, capacity=2000, min_after_dequeue=1000, num_threads=32)
     # for testing, uses batch instead of shuffle_batch
-    x_test_batch, y_test_batch = tf.train.batch(
-        [x_test_, y_test_], batch_size=batch_size, capacity=50000, num_threads=32
-    )
+    x_test_batch, y_test_batch = tf.train.batch([x_test_, y_test_], \
+        batch_size=batch_size, capacity=50000, num_threads=32)
 
     def model(x_crop, y_, reuse):
         """ For more simplified CNN APIs, check tensorlayer.org """
-        W_init = tf.truncated_normal_initializer(stddev=5e-2)
-        W_init2 = tf.truncated_normal_initializer(stddev=0.04)
-        b_init2 = tf.constant_initializer(value=0.1)
         with tf.variable_scope("model", reuse=reuse):
             net = tl.layers.InputLayer(x_crop, name='input')
-            net = tl.layers.Conv2d(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', W_init=W_init, name='cnn1')
+            net = tl.layers.Conv2d(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', name='cnn1')
             net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool1')
-            net = tl.layers.LocalResponseNormLayer(
-                net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1'
-            )
-            net = tl.layers.TernaryConv2d(
-                net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', W_init=W_init, name='cnn2'
-            )
-            net = tl.layers.LocalResponseNormLayer(
-                net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2'
-            )
+            net = tl.layers.LocalResponseNormLayer(net, 4, 1.0, 0.001 / 9.0, 0.75, name='norm1')
+            net = tl.layers.TernaryConv2d(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', name='cnn2')
+            net = tl.layers.LocalResponseNormLayer(net, 4, 1.0, 0.001 / 9.0, 0.75, name='norm2')
             net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool2')
-            net = tl.layers.FlattenLayer(net, name='flatten')  # output: (batch_size, 2304)
-            net = tl.layers.TernaryDenseLayer(
-                net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu'
-            )  # output: (batch_size, 384)
-            net = tl.layers.TernaryDenseLayer(
-                net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu'
-            )  # output: (batch_size, 192)
-            net = tl.layers.DenseLayer(
-                net, n_units=10, act=tf.identity, W_init=W_init2, name='output'
-            )  # output: (batch_size, 10)
+            net = tl.layers.FlattenLayer(net, name='flatten')
+            net = tl.layers.TernaryDenseLayer(net, 384, act=tf.nn.relu, name='d1relu')
+            net = tl.layers.TernaryDenseLayer(net, 192, act=tf.nn.relu, name='d2relu')
+            net = tl.layers.DenseLayer(net, 10, act=tf.identity, name='output')
             y = net.outputs
 
             ce = tl.cost.cross_entropy(y, y_, name='cost')
@@ -200,42 +183,6 @@ def model(x_crop, y_, reuse):
 
             return net, cost, acc
 
-    def model_batch_norm(x_crop, y_, reuse, is_train):
-        """ Batch normalization should be placed before rectifier. """
-        W_init = tf.truncated_normal_initializer(stddev=5e-2)
-        W_init2 = tf.truncated_normal_initializer(stddev=0.04)
-        b_init2 = tf.constant_initializer(value=0.1)
-        with tf.variable_scope("model", reuse=reuse):
-            net = tl.layers.InputLayer(x_crop, name='input')
-            net = tl.layers.Conv2d(net, 64, (5, 5), (1, 1), padding='SAME', W_init=W_init, b_init=None, name='cnn1')
-            net = tl.layers.BatchNormLayer(net, is_train, act=tf.nn.relu, name='batch1')
-            net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool1')
-            net = tl.layers.Conv2d(net, 64, (5, 5), (1, 1), padding='SAME', W_init=W_init, b_init=None, name='cnn2')
-            net = tl.layers.BatchNormLayer(net, is_train, act=tf.nn.relu, name='batch2')
-            net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool2')
-            net = tl.layers.FlattenLayer(net, name='flatten')  # output: (batch_size, 2304)
-            net = tl.layers.DenseLayer(
-                net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu'
-            )  # output: (batch_size, 384)
-            net = tl.layers.DenseLayer(
-                net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu'
-            )  # output: (batch_size, 192)
-            net = tl.layers.DenseLayer(
-                net, n_units=10, act=tf.identity, W_init=W_init2, name='output'
-            )  # output: (batch_size, 10)
-            y = net.outputs
-            ce = tl.cost.cross_entropy(y, y_, name='cost')
-            # L2 for the MLP, without this, the accuracy will be reduced by 15%.
-            L2 = 0
-            for p in tl.layers.get_variables_with_name('relu/W', True, True):
-                L2 += tf.contrib.layers.l2_regularizer(0.004)(p)
-            cost = ce + L2
-
-            correct_prediction = tf.equal(tf.cast(tf.argmax(y, 1), tf.int32), y_)
-            acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
-
-            return net, cost, acc
-
     ## You can also use placeholder to feed_dict in data after using
     ## val, l = sess.run([x_train_batch, y_train_batch]) to get the data
     # x_crop = tf.placeholder(tf.float32, shape=[batch_size, 24, 24, 3])
@@ -243,12 +190,8 @@ def model_batch_norm(x_crop, y_, reuse, is_train):
     # cost, acc, network = model(x_crop, y_, None)
 
     with tf.device('/gpu:0'):  # <-- remove it if you don't have GPU
-        ## using local response normalization
         network, cost, acc, = model(x_train_batch, y_train_batch, False)
         _, cost_test, acc_test = model(x_test_batch, y_test_batch, True)
-        ## you may want to try batch normalization
-        # network, cost, acc, = model_batch_norm(x_train_batch, y_train_batch, None, is_train=True)
-        # _, cost_test, acc_test = model_batch_norm(x_test_batch, y_test_batch, True, is_train=False)
 
     ## train
     n_epoch = 50000
@@ -291,10 +234,8 @@ def model_batch_norm(x_crop, y_, reuse, is_train):
             n_batch += 1
 
         if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
-            print(
-                "Epoch %d : Step %d-%d of %d took %fs" %
-                (epoch, step, step + n_step_epoch, n_step, time.time() - start_time)
-            )
+            print("Epoch %d : Step %d-%d of %d took %fs" % \
+                (epoch, step, step + n_step_epoch, n_step, time.time() - start_time))
             print("   train loss: %f" % (train_loss / n_batch))
             print("   train acc: %f" % (train_acc / n_batch))
 
diff --git a/example/tutorial_tf_dataset_voc.py b/example/tutorial_tf_dataset_voc.py
index 43e405547..5f32305bd 100644
--- a/example/tutorial_tf_dataset_voc.py
+++ b/example/tutorial_tf_dataset_voc.py
@@ -50,12 +50,10 @@ def _data_aug_fn(im, ann):
     ## random resize and crop
     tmp0 = random.randint(1, int(im_size[0] * jitter))
     tmp1 = random.randint(1, int(im_size[1] * jitter))
-    im, coords = tl.prepro.obj_box_imresize(
-        im, coords, [im_size[0] + tmp0, im_size[1] + tmp1], is_rescale=True, interp='bicubic'
-    )
-    im, clas, coords = tl.prepro.obj_box_crop(
-        im, clas, coords, wrg=im_size[1], hrg=im_size[0], is_rescale=True, is_center=True, is_random=True
-    )
+    im, coords = tl.prepro.obj_box_imresize(im, coords, [im_size[0] + tmp0, im_size[1] + tmp1], \
+        is_rescale=True, interp='bicubic')
+    im, clas, coords = tl.prepro.obj_box_crop(im, clas, coords, wrg=im_size[1], hrg=im_size[0], \
+        is_rescale=True, is_center=True, is_random=True)
     ## value [0, 255] to [-1, 1] (optional)
     # im = im / 127.5 - 1
     ## value [0, 255] to [0, 1] (optional)
@@ -101,6 +99,5 @@ def _map_fn(filename, annotation):
 
 ## save all images
 for i in range(len(im)):
-    tl.vis.draw_boxes_and_labels_to_image(
-        im[i] * 255, ann[i][0], ann[i][1], [], classes, True, save_name='_bbox_vis_%d.png' % i
-    )
+    tl.vis.draw_boxes_and_labels_to_image(im[i] * 255, ann[i][0], ann[i][1], [], classes, \
+        True, save_name='_bbox_vis_%d.png' % i)
diff --git a/example/tutorial_tfrecord2.py b/example/tutorial_tfrecord2.py
index eb210d9da..c2fe39a3a 100755
--- a/example/tutorial_tfrecord2.py
+++ b/example/tutorial_tfrecord2.py
@@ -12,12 +12,10 @@
 """
 
 import os
-
 import numpy as np
 # import matplotlib
 # matplotlib.use('GTK')
 import tensorflow as tf
-
 import tensorlayer as tl
 
 ## Download data, and convert to TFRecord format, see ```tutorial_tfrecord.py```
@@ -82,9 +80,8 @@ def read_and_decode(filename):
 
 ## Use shuffle_batch or batch
 # see https://www.tensorflow.org/versions/master/api_docs/python/io_ops.html#shuffle_batch
-img_batch, label_batch = tf.train.shuffle_batch(
-    [img, label], batch_size=4, capacity=50000, min_after_dequeue=10000, num_threads=1
-)
+img_batch, label_batch = tf.train.shuffle_batch([img, label], batch_size=4, capacity=50000, \
+    min_after_dequeue=10000, num_threads=1)
 
 print("img_batch   : %s" % img_batch._shape)
 print("label_batch : %s" % label_batch._shape)
@@ -98,7 +95,6 @@ def read_and_decode(filename):
     for i in range(3):  # number of mini-batch (step)
         print("Step %d" % i)
         val, l = sess.run([img_batch, label_batch])
-        # exit()
         print(val.shape, l)
         tl.visualize.images2d(val, second=1, saveable=False, name='batch' + str(i), dtype=np.uint8, fig_idx=2020121)
         tl.vis.save_images(val, [2, 2], '_batch_%d.png' % i)
diff --git a/example/tutorial_tfrecord3.py b/example/tutorial_tfrecord3.py
index 6626476ec..1d912c351 100644
--- a/example/tutorial_tfrecord3.py
+++ b/example/tutorial_tfrecord3.py
@@ -15,13 +15,11 @@
 
 """
 
-import json
 import os
-
+import json
 import numpy as np
-import tensorflow as tf
 from PIL import Image
-
+import tensorflow as tf
 import tensorlayer as tl
 
 
diff --git a/example/tutorial_vgg16.py b/example/tutorial_vgg16.py
index faab073b4..51aaaa253 100644
--- a/example/tutorial_vgg16.py
+++ b/example/tutorial_vgg16.py
@@ -59,134 +59,33 @@ def conv_layers(net_in):
         net_in.outputs = net_in.outputs - mean
 
     # conv1
-    network = Conv2dLayer(
-        net_in,
-        act=tf.nn.relu,
-        shape=[3, 3, 3, 64],  # 64 features for each 3x3 patch
-        strides=[1, 1, 1, 1],
-        padding='SAME',
-        name='conv1_1'
-    )
-    network = Conv2dLayer(
-        network,
-        act=tf.nn.relu,
-        shape=[3, 3, 64, 64],  # 64 features for each 3x3 patch
-        strides=[1, 1, 1, 1],
-        padding='SAME',
-        name='conv1_2'
-    )
-    network = PoolLayer(
-        network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool1'
-    )
+    net = Conv2dLayer(net_in, act=tf.nn.relu, shape=[3, 3, 3, 64], strides=[1, 1, 1, 1], padding='SAME', name='conv1_1')
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 64, 64], strides=[1, 1, 1, 1], padding='SAME', name='conv1_2')
+    net = PoolLayer(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool1')
 
     # conv2
-    network = Conv2dLayer(
-        network,
-        act=tf.nn.relu,
-        shape=[3, 3, 64, 128],  # 128 features for each 3x3 patch
-        strides=[1, 1, 1, 1],
-        padding='SAME',
-        name='conv2_1'
-    )
-    network = Conv2dLayer(
-        network,
-        act=tf.nn.relu,
-        shape=[3, 3, 128, 128],  # 128 features for each 3x3 patch
-        strides=[1, 1, 1, 1],
-        padding='SAME',
-        name='conv2_2'
-    )
-    network = PoolLayer(
-        network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool2'
-    )
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 64, 128], strides=[1, 1, 1, 1], padding='SAME', name='conv2_1')
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 128, 128], strides=[1, 1, 1, 1], padding='SAME', name='conv2_2')
+    net = PoolLayer(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool2')
 
     # conv3
-    network = Conv2dLayer(
-        network,
-        act=tf.nn.relu,
-        shape=[3, 3, 128, 256],  # 256 features for each 3x3 patch
-        strides=[1, 1, 1, 1],
-        padding='SAME',
-        name='conv3_1'
-    )
-    network = Conv2dLayer(
-        network,
-        act=tf.nn.relu,
-        shape=[3, 3, 256, 256],  # 256 features for each 3x3 patch
-        strides=[1, 1, 1, 1],
-        padding='SAME',
-        name='conv3_2'
-    )
-    network = Conv2dLayer(
-        network,
-        act=tf.nn.relu,
-        shape=[3, 3, 256, 256],  # 256 features for each 3x3 patch
-        strides=[1, 1, 1, 1],
-        padding='SAME',
-        name='conv3_3'
-    )
-    network = PoolLayer(
-        network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool3'
-    )
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 128, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_1')
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_2')
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_3')
+    net = PoolLayer(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool3')
 
     # conv4
-    network = Conv2dLayer(
-        network,
-        act=tf.nn.relu,
-        shape=[3, 3, 256, 512],  # 512 features for each 3x3 patch
-        strides=[1, 1, 1, 1],
-        padding='SAME',
-        name='conv4_1'
-    )
-    network = Conv2dLayer(
-        network,
-        act=tf.nn.relu,
-        shape=[3, 3, 512, 512],  # 512 features for each 3x3 patch
-        strides=[1, 1, 1, 1],
-        padding='SAME',
-        name='conv4_2'
-    )
-    network = Conv2dLayer(
-        network,
-        act=tf.nn.relu,
-        shape=[3, 3, 512, 512],  # 512 features for each 3x3 patch
-        strides=[1, 1, 1, 1],
-        padding='SAME',
-        name='conv4_3'
-    )
-    network = PoolLayer(
-        network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool4'
-    )
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 256, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_1')
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_2')
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_3')
+    net = PoolLayer(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool4')
 
     # conv5
-    network = Conv2dLayer(
-        network,
-        act=tf.nn.relu,
-        shape=[3, 3, 512, 512],  # 512 features for each 3x3 patch
-        strides=[1, 1, 1, 1],
-        padding='SAME',
-        name='conv5_1'
-    )
-    network = Conv2dLayer(
-        network,
-        act=tf.nn.relu,
-        shape=[3, 3, 512, 512],  # 512 features for each 3x3 patch
-        strides=[1, 1, 1, 1],
-        padding='SAME',
-        name='conv5_2'
-    )
-    network = Conv2dLayer(
-        network,
-        act=tf.nn.relu,
-        shape=[3, 3, 512, 512],  # 512 features for each 3x3 patch
-        strides=[1, 1, 1, 1],
-        padding='SAME',
-        name='conv5_3'
-    )
-    network = PoolLayer(
-        network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool5'
-    )
-    return network
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_1')
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_2')
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_3')
+    net = PoolLayer(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool5')
+    return net
 
 
 def conv_layers_simple_api(net_in):
@@ -198,67 +97,41 @@ def conv_layers_simple_api(net_in):
         net_in.outputs = net_in.outputs - mean
 
     # conv1
-    network = Conv2d(
-        net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_1'
-    )
-    network = Conv2d(
-        network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_2'
-    )
-    network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1')
+    net = Conv2d(net_in, 64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_1')
+    net = Conv2d(net, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_2')
+    net = MaxPool2d(net, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1')
 
     # conv2
-    network = Conv2d(
-        network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_1'
-    )
-    network = Conv2d(
-        network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_2'
-    )
-    network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2')
+    net = Conv2d(net, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_1')
+    net = Conv2d(net, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_2')
+    net = MaxPool2d(net, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2')
 
     # conv3
-    network = Conv2d(
-        network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_1'
-    )
-    network = Conv2d(
-        network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_2'
-    )
-    network = Conv2d(
-        network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_3'
-    )
-    network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3')
+    net = Conv2d(net, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_1')
+    net = Conv2d(net, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_2')
+    net = Conv2d(net, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_3')
+    net = MaxPool2d(net, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3')
 
     # conv4
-    network = Conv2d(
-        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_1'
-    )
-    network = Conv2d(
-        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_2'
-    )
-    network = Conv2d(
-        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_3'
-    )
-    network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4')
+    net = Conv2d(net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_1')
+    net = Conv2d(net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_2')
+    net = Conv2d(net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_3')
+    net = MaxPool2d(net, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4')
 
     # conv5
-    network = Conv2d(
-        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_1'
-    )
-    network = Conv2d(
-        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_2'
-    )
-    network = Conv2d(
-        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_3'
-    )
-    network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5')
-    return network
+    net = Conv2d(net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_1')
+    net = Conv2d(net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_2')
+    net = Conv2d(net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_3')
+    net = MaxPool2d(net, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5')
+    return net
 
 
 def fc_layers(net):
-    network = FlattenLayer(net, name='flatten')
-    network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc1_relu')
-    network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc2_relu')
-    network = DenseLayer(network, n_units=1000, act=tf.identity, name='fc3_relu')
-    return network
+    net = FlattenLayer(net, name='flatten')
+    net = DenseLayer(net, n_units=4096, act=tf.nn.relu, name='fc1_relu')
+    net = DenseLayer(net, n_units=4096, act=tf.nn.relu, name='fc2_relu')
+    net = DenseLayer(net, n_units=1000, act=tf.identity, name='fc3_relu')
+    return net
 
 
 sess = tf.InteractiveSession()
@@ -269,9 +142,9 @@ def fc_layers(net):
 net_in = InputLayer(x, name='input')
 # net_cnn = conv_layers(net_in)               # professional CNN APIs
 net_cnn = conv_layers_simple_api(net_in)  # simplified CNN APIs
-network = fc_layers(net_cnn)
+net = fc_layers(net_cnn)
 
-y = network.outputs
+y = net.outputs
 probs = tf.nn.softmax(y)
 # y_op = tf.argmax(tf.nn.softmax(y), 1)
 # cost = tl.cost.cross_entropy(y, y_, name='cost')
@@ -279,8 +152,8 @@ def fc_layers(net):
 # acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 
 tl.layers.initialize_global_variables(sess)
-network.print_params()
-network.print_layers()
+net.print_params()
+net.print_layers()
 
 if not os.path.isfile("vgg16_weights.npz"):
     print("Please download vgg16_weights.npz from : http://www.cs.toronto.edu/~frossard/post/vgg16/")
@@ -292,7 +165,7 @@ def fc_layers(net):
     print("  Loading %s" % str(val[1].shape))
     params.append(val[1])
 
-tl.files.assign_params(sess, params, network)
+tl.files.assign_params(sess, params, net)
 
 img1 = imread('data/laska.png', mode='RGB')  # test data in github
 img1 = imresize(img1, (224, 224))
diff --git a/example/tutorial_vgg19.py b/example/tutorial_vgg19.py
index f484f3dc8..4352482c5 100755
--- a/example/tutorial_vgg19.py
+++ b/example/tutorial_vgg19.py
@@ -14,13 +14,11 @@
 
 import os
 import time
-
 import numpy as np
 import skimage
 import skimage.io
 import skimage.transform
 import tensorflow as tf
-
 import tensorlayer as tl
 from tensorlayer.layers import *
 
@@ -104,80 +102,38 @@ def Vgg19(rgb):
     # input layer
     net_in = InputLayer(bgr, name='input')
     # conv1
-    network = Conv2dLayer(
-        net_in, act=tf.nn.relu, shape=[3, 3, 3, 64], strides=[1, 1, 1, 1], padding='SAME', name='conv1_1'
-    )
-    network = Conv2dLayer(
-        network, act=tf.nn.relu, shape=[3, 3, 64, 64], strides=[1, 1, 1, 1], padding='SAME', name='conv1_2'
-    )
-    network = PoolLayer(
-        network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool1'
-    )
+    net = Conv2dLayer(net_in, act=tf.nn.relu, shape=[3, 3, 3, 64], strides=[1, 1, 1, 1], padding='SAME', name='conv1_1')
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 64, 64], strides=[1, 1, 1, 1], padding='SAME', name='conv1_2')
+    net = PoolLayer(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool1')
     # conv2
-    network = Conv2dLayer(
-        network, act=tf.nn.relu, shape=[3, 3, 64, 128], strides=[1, 1, 1, 1], padding='SAME', name='conv2_1'
-    )
-    network = Conv2dLayer(
-        network, act=tf.nn.relu, shape=[3, 3, 128, 128], strides=[1, 1, 1, 1], padding='SAME', name='conv2_2'
-    )
-    network = PoolLayer(
-        network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool2'
-    )
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 64, 128], strides=[1, 1, 1, 1], padding='SAME', name='conv2_1')
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 128, 128], strides=[1, 1, 1, 1], padding='SAME', name='conv2_2')
+    net = PoolLayer(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool2')
     # conv3
-    network = Conv2dLayer(
-        network, act=tf.nn.relu, shape=[3, 3, 128, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_1'
-    )
-    network = Conv2dLayer(
-        network, act=tf.nn.relu, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_2'
-    )
-    network = Conv2dLayer(
-        network, act=tf.nn.relu, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_3'
-    )
-    network = Conv2dLayer(
-        network, act=tf.nn.relu, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_4'
-    )
-    network = PoolLayer(
-        network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool3'
-    )
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 128, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_1')
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_2')
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_3')
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_4')
+    net = PoolLayer(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool3')
     # conv4
-    network = Conv2dLayer(
-        network, act=tf.nn.relu, shape=[3, 3, 256, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_1'
-    )
-    network = Conv2dLayer(
-        network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_2'
-    )
-    network = Conv2dLayer(
-        network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_3'
-    )
-    network = Conv2dLayer(
-        network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_4'
-    )
-    network = PoolLayer(
-        network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool4'
-    )
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 256, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_1')
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_2')
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_3')
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_4')
+    net = PoolLayer(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool4')
     # conv5
-    network = Conv2dLayer(
-        network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_1'
-    )
-    network = Conv2dLayer(
-        network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_2'
-    )
-    network = Conv2dLayer(
-        network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_3'
-    )
-    network = Conv2dLayer(
-        network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_4'
-    )
-    network = PoolLayer(
-        network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool5'
-    )
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_1')
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_2')
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_3')
+    net = Conv2dLayer(net, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_4')
+    net = PoolLayer(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool5')
     # fc 6~8
-    network = FlattenLayer(network, name='flatten')
-    network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc6')
-    network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc7')
-    network = DenseLayer(network, n_units=1000, act=tf.identity, name='fc8')
+    net = FlattenLayer(net, name='flatten')
+    net = DenseLayer(net, n_units=4096, act=tf.nn.relu, name='fc6')
+    net = DenseLayer(net, n_units=4096, act=tf.nn.relu, name='fc7')
+    net = DenseLayer(net, n_units=1000, act=tf.identity, name='fc8')
     print("build model finished: %fs" % (time.time() - start_time))
-    return network
+    return net
 
 
 def Vgg19_simple_api(rgb):
@@ -220,77 +176,45 @@ def Vgg19_simple_api(rgb):
     # input layer
     net_in = InputLayer(bgr, name='input')
     # conv1
-    network = Conv2d(
-        net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_1'
-    )
-    network = Conv2d(
-        network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_2'
-    )
-    network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1')
+    net = Conv2d(net_in, 64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_1')
+    net = Conv2d(net, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_2')
+    net = MaxPool2d(net, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1')
     # conv2
-    network = Conv2d(
-        network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_1'
-    )
-    network = Conv2d(
-        network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_2'
-    )
-    network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2')
+    net = Conv2d(net, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_1')
+    net = Conv2d(net, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_2')
+    net = MaxPool2d(net, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2')
     # conv3
-    network = Conv2d(
-        network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_1'
-    )
-    network = Conv2d(
-        network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_2'
-    )
-    network = Conv2d(
-        network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_3'
-    )
-    network = Conv2d(
-        network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_4'
-    )
-    network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3')
+    net = Conv2d(net, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_1')
+    net = Conv2d(net, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_2')
+    net = Conv2d(net, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_3')
+    net = Conv2d(net, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_4')
+    net = MaxPool2d(net, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3')
     # conv4
-    network = Conv2d(
-        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_1'
-    )
-    network = Conv2d(
-        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_2'
-    )
-    network = Conv2d(
-        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_3'
-    )
-    network = Conv2d(
-        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_4'
-    )
-    network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4')
+    net = Conv2d(net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_1')
+    net = Conv2d(net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_2')
+    net = Conv2d(net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_3')
+    net = Conv2d(net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_4')
+    net = MaxPool2d(net, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4')
     # conv5
-    network = Conv2d(
-        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_1'
-    )
-    network = Conv2d(
-        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_2'
-    )
-    network = Conv2d(
-        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_3'
-    )
-    network = Conv2d(
-        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_4'
-    )
-    network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5')
+    net = Conv2d(net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_1')
+    net = Conv2d(net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_2')
+    net = Conv2d(net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_3')
+    net = Conv2d(net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_4')
+    net = MaxPool2d(net, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5')
     # fc 6~8
-    network = FlattenLayer(network, name='flatten')
-    network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc6')
-    network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc7')
-    network = DenseLayer(network, n_units=1000, act=tf.identity, name='fc8')
+    net = FlattenLayer(net, name='flatten')
+    net = DenseLayer(net, n_units=4096, act=tf.nn.relu, name='fc6')
+    net = DenseLayer(net, n_units=4096, act=tf.nn.relu, name='fc7')
+    net = DenseLayer(net, n_units=1000, act=tf.identity, name='fc8')
     print("build model finished: %fs" % (time.time() - start_time))
-    return network
+    return net
 
 
 sess = tf.InteractiveSession()
 x = tf.placeholder("float", [None, 224, 224, 3])
-# network = Vgg19(x)
-network = Vgg19_simple_api(x)
-y = network.outputs
+# net = Vgg19(x)
+net = Vgg19_simple_api(x)
+y = net.outputs
 probs = tf.nn.softmax(y, name="prob")
 tl.layers.initialize_global_variables(sess)
 
@@ -309,7 +233,7 @@ def Vgg19_simple_api(rgb):
     params.extend([W, b])
 
 print("Restoring model from npz file")
-tl.files.assign_params(sess, params, network)
+tl.files.assign_params(sess, params, net)
 
 img1 = load_image("data/tiger.jpeg")  # test data in github
 img1 = img1.reshape((1, 224, 224, 3))
diff --git a/example/tutorial_word2vec_basic.py b/example/tutorial_word2vec_basic.py
index ad275c0ac..bdeab7747 100644
--- a/example/tutorial_word2vec_basic.py
+++ b/example/tutorial_word2vec_basic.py
@@ -138,24 +138,21 @@ def main_word2vec_basic():
 
     print('Most 5 common words (+UNK)',
           count[:5])  # [['UNK', 418391], (b'the', 1061396), (b'of', 593677), (b'and', 416629), (b'one', 411764)]
-    print(
-        'Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]]
-    )  # [5243, 3081, 12, 6, 195, 2, 3135, 46, 59, 156] [b'anarchism', b'originated', b'as', b'a', b'term', b'of', b'abuse', b'first', b'used', b'against']
+    print('Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]])
+    # [5243, 3081, 12, 6, 195, 2, 3135, 46, 59, 156] [b'anarchism', b'originated', b'as', b'a', b'term', b'of', b'abuse', b'first', b'used', b'against']
 
     del words  # Hint to reduce memory.
 
     # Step 3: Function to generate a training batch for the Skip-Gram model.
     print()
 
-    batch, labels, data_index = tl.nlp.generate_skip_gram_batch(
-        data=data, batch_size=8, num_skips=4, skip_window=2, data_index=0
-    )
+    batch, labels, data_index = tl.nlp.generate_skip_gram_batch(data=data, \
+        batch_size=8, num_skips=4, skip_window=2, data_index=0)
     for i in range(8):
         print(batch[i], reverse_dictionary[batch[i]], '->', labels[i, 0], reverse_dictionary[labels[i, 0]])
 
-    batch, labels, data_index = tl.nlp.generate_skip_gram_batch(
-        data=data, batch_size=8, num_skips=2, skip_window=1, data_index=0
-    )
+    batch, labels, data_index = tl.nlp.generate_skip_gram_batch(data=data, \
+        batch_size=8, num_skips=2, skip_window=1, data_index=0)
     for i in range(8):
         print(batch[i], reverse_dictionary[batch[i]], '->', labels[i, 0], reverse_dictionary[labels[i, 0]])
 
@@ -234,9 +231,8 @@ def main_word2vec_basic():
     print_freq = 2000
     while step < num_steps:
         start_time = time.time()
-        batch_inputs, batch_labels, data_index = tl.nlp.generate_skip_gram_batch(
-            data=data, batch_size=batch_size, num_skips=num_skips, skip_window=skip_window, data_index=data_index
-        )
+        batch_inputs, batch_labels, data_index = tl.nlp.generate_skip_gram_batch(data=data, \
+            batch_size=batch_size, num_skips=num_skips, skip_window=skip_window, data_index=data_index)
         feed_dict = {train_inputs: batch_inputs, train_labels: batch_labels}
         # We perform one update step by evaluating the train_op (including it
         # in the list of returned values for sess.run()
@@ -246,10 +242,8 @@ def main_word2vec_basic():
         if step % print_freq == 0:
             if step > 0:
                 average_loss /= print_freq
-            print(
-                "Average loss at step %d/%d. loss:%f took:%fs" %
-                (step, num_steps, average_loss, time.time() - start_time)
-            )
+            print("Average loss at step %d/%d. loss:%f took:%fs" % \
+                (step, num_steps, average_loss, time.time() - start_time))
             average_loss = 0
         # Prints out nearby words given a list of words.
         # Note that this is expensive (~20% slowdown if computed every 500 steps)
@@ -292,9 +286,8 @@ def main_word2vec_basic():
     print()
 
     final_embeddings = sess.run(normalized_embeddings)  #.eval()
-    tl.visualize.tsne_embedding(
-        final_embeddings, reverse_dictionary, plot_only=500, second=5, saveable=False, name='word2vec_basic'
-    )
+    tl.visualize.tsne_embedding(final_embeddings, reverse_dictionary, plot_only=500, \
+        second=5, saveable=False, name='word2vec_basic')
 
     # Step 7: Evaluate by analogy questions. see tensorflow/models/embedding/word2vec_optimized.py
     print()