diff --git a/.gitignore b/.gitignore
index 2ec50d5e9..c468f8a4f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,5 @@ tensorlayer.egg-info
 tensorlayer/__pacache__
 venv/
 .pytest_cache/
+update_tl.bat
+update_tl.py
diff --git a/.style.yapf b/.style.yapf
index 2c65b7bbd..65bdef585 100644
--- a/.style.yapf
+++ b/.style.yapf
@@ -1,4 +1,60 @@
 [style]
-based_on_style = pep8
+based_on_style=google
+
+# The number of columns to use for indentation.
 indent_width = 4
-column_limit = 160
+
+# The column limit.
+column_limit=120
+
+# Place each dictionary entry onto its own line.
+each_dict_entry_on_separate_line = True
+
+# Put closing brackets on a separate line, dedented, if the bracketed
+# expression can't fit in a single line. Applies to all kinds of brackets,
+# including function definitions and calls. For example:
+#
+#   config = {
+#       'key1': 'value1',
+#       'key2': 'value2',
+#   }        # <--- this bracket is dedented and on a separate line
+#
+#   time_series = self.remote_client.query_entity_counters(
+#       entity='dev3246.region1',
+#       key='dns.query_latency_tcp',
+#       transform=Transformation.AVERAGE(window=timedelta(seconds=60)),
+#       start_ts=now()-timedelta(days=3),
+#       end_ts=now(),
+#   )        # <--- this bracket is dedented and on a separate line
+dedent_closing_brackets=True
+
+# Do not split consecutive brackets. Only relevant when DEDENT_CLOSING_BRACKETS is set
+coalesce_brackets = False
+
+# Align closing bracket with visual indentation.
+align_closing_bracket_with_visual_indent = False
+
+# Split named assignments onto individual lines.
+split_before_named_assigns = False
+
+# If an argument / parameter list is going to be split, then split before the first argument.
+split_before_first_argument = True
+
+# Split before arguments if the argument list is terminated by a comma.
+split_arguments_when_comma_terminated = False
+
+# Insert a space between the ending comma and closing bracket of a list, etc.
+space_between_ending_comma_and_closing_bracket = True
+
+# Join short lines into one line. E.g., single line if statements.
+join_multiple_lines = True
+
+# Do not include spaces around selected binary operators.
+# Example: 1 + 2 * 3 - 4 / 5     =>     1 + 2*3 - 4/5
+no_spaces_around_selected_binary_operators = True
+
+# Allow lambdas to be formatted on more than one line.
+allow_multiline_lambdas = True
+
+SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT = 10
+SPLIT_PENALTY_AFTER_OPENING_BRACKET = 500
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index af0cb8b4a..9234cb5e2 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -54,6 +54,7 @@ method, this part of the documentation is for you.
   modules/activation
   modules/models
   modules/distributed
+  modules/db
 
 
 Command-line Reference
diff --git a/example/tutorial_atari_pong.py b/example/tutorial_atari_pong.py
index 386afe862..35246ebde 100644
--- a/example/tutorial_atari_pong.py
+++ b/example/tutorial_atari_pong.py
@@ -133,7 +133,11 @@ def prepro(I):
             prev_x = None
 
         if reward != 0:
-            print(('episode %d: game %d took %.5fs, reward: %f' % (episode_number, game_number, time.time() - start_time, reward)),
-                  ('' if reward == -1 else ' !!!!!!!!'))
+            print(
+                (
+                    'episode %d: game %d took %.5fs, reward: %f' %
+                    (episode_number, game_number, time.time() - start_time, reward)
+                ), ('' if reward == -1 else ' !!!!!!!!')
+            )
             start_time = time.time()
             game_number += 1
diff --git a/example/tutorial_binarynet_cifar10_tfrecord.py b/example/tutorial_binarynet_cifar10_tfrecord.py
index 6f7cadc42..dd5020150 100644
--- a/example/tutorial_binarynet_cifar10_tfrecord.py
+++ b/example/tutorial_binarynet_cifar10_tfrecord.py
@@ -83,7 +83,9 @@ def data_to_tfrecord(images, labels, filename):
                 feature={
                     "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label])),
                     'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])),
-                }))
+                }
+            )
+        )
         writer.write(example.SerializeToString())  # Serialize To String
     writer.close()
 
@@ -97,12 +99,13 @@ def read_and_decode(filename, is_train=None):
         serialized_example, features={
             'label': tf.FixedLenFeature([], tf.int64),
             'img_raw': tf.FixedLenFeature([], tf.string),
-        })
+        }
+    )
     # You can do more image distortion here for training data
     img = tf.decode_raw(features['img_raw'], tf.float32)
     img = tf.reshape(img, [32, 32, 3])
     # img = tf.cast(img, tf.float32) #* (1. / 255) - 0.5
-    if is_train == True:
+    if is_train ==True:
         # 1. Randomly crop a [height, width] section of the image.
         img = tf.random_crop(img, [24, 24, 3])
         # 2. Randomly flip the image horizontally.
@@ -147,9 +150,12 @@ def read_and_decode(filename, is_train=None):
     x_test_, y_test_ = read_and_decode("test.cifar10", False)
 
     x_train_batch, y_train_batch = tf.train.shuffle_batch(
-        [x_train_, y_train_], batch_size=batch_size, capacity=2000, min_after_dequeue=1000, num_threads=32)  # set the number of threads here
+        [x_train_, y_train_], batch_size=batch_size, capacity=2000, min_after_dequeue=1000, num_threads=32
+    )  # set the number of threads here
     # for testing, uses batch instead of shuffle_batch
-    x_test_batch, y_test_batch = tf.train.batch([x_test_, y_test_], batch_size=batch_size, capacity=50000, num_threads=32)
+    x_test_batch, y_test_batch = tf.train.batch(
+        [x_test_, y_test_], batch_size=batch_size, capacity=50000, num_threads=32
+    )
 
     def model(x_crop, y_, reuse):
         """ For more simplified CNN APIs, check tensorlayer.org """
@@ -161,16 +167,28 @@ def model(x_crop, y_, reuse):
             net = tl.layers.Conv2d(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', W_init=W_init, name='cnn1')
             net = tl.layers.SignLayer(net)
             net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool1')
-            net = tl.layers.LocalResponseNormLayer(net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')
-            net = tl.layers.BinaryConv2d(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', W_init=W_init, name='cnn2')
-            net = tl.layers.LocalResponseNormLayer(net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')
+            net = tl.layers.LocalResponseNormLayer(
+                net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1'
+            )
+            net = tl.layers.BinaryConv2d(
+                net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', W_init=W_init, name='cnn2'
+            )
+            net = tl.layers.LocalResponseNormLayer(
+                net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2'
+            )
             net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool2')
             net = tl.layers.FlattenLayer(net, name='flatten')  # output: (batch_size, 2304)
             net = tl.layers.SignLayer(net)
-            net = tl.layers.BinaryDenseLayer(net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu')  # output: (batch_size, 384)
+            net = tl.layers.BinaryDenseLayer(
+                net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu'
+            )  # output: (batch_size, 384)
             net = tl.layers.SignLayer(net)
-            net = tl.layers.BinaryDenseLayer(net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu')  # output: (batch_size, 192)
-            net = tl.layers.DenseLayer(net, n_units=10, act=tf.identity, W_init=W_init2, name='output')  # output: (batch_size, 10)
+            net = tl.layers.BinaryDenseLayer(
+                net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu'
+            )  # output: (batch_size, 192)
+            net = tl.layers.DenseLayer(
+                net, n_units=10, act=tf.identity, W_init=W_init2, name='output'
+            )  # output: (batch_size, 10)
             y = net.outputs
 
             ce = tl.cost.cross_entropy(y, y_, name='cost')
@@ -201,9 +219,15 @@ def model_batch_norm(x_crop, y_, reuse, is_train):
             net = tl.layers.BatchNormLayer(net, is_train, act=tf.nn.relu, name='batch2')
             net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool2')
             net = tl.layers.FlattenLayer(net, name='flatten')  # output: (batch_size, 2304)
-            net = tl.layers.DenseLayer(net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu')  # output: (batch_size, 384)
-            net = tl.layers.DenseLayer(net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu')  # output: (batch_size, 192)
-            net = tl.layers.DenseLayer(net, n_units=10, act=tf.identity, W_init=W_init2, name='output')  # output: (batch_size, 10)
+            net = tl.layers.DenseLayer(
+                net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu'
+            )  # output: (batch_size, 384)
+            net = tl.layers.DenseLayer(
+                net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu'
+            )  # output: (batch_size, 192)
+            net = tl.layers.DenseLayer(
+                net, n_units=10, act=tf.identity, W_init=W_init2, name='output'
+            )  # output: (batch_size, 10)
             y = net.outputs
 
             ce = tl.cost.cross_entropy(y, y_, name='cost')
@@ -273,7 +297,10 @@ def model_batch_norm(x_crop, y_, reuse, is_train):
             n_batch += 1
 
         if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
-            print("Epoch %d : Step %d-%d of %d took %fs" % (epoch, step, step + n_step_epoch, n_step, time.time() - start_time))
+            print(
+                "Epoch %d : Step %d-%d of %d took %fs" %
+                (epoch, step, step + n_step_epoch, n_step, time.time() - start_time)
+            )
             print("   train loss: %f" % (train_loss / n_batch))
             print("   train acc: %f" % (train_acc / n_batch))
 
diff --git a/example/tutorial_bipedalwalker_a3c_continuous_action.py b/example/tutorial_bipedalwalker_a3c_continuous_action.py
index d112c3bd4..9c2aac04d 100644
--- a/example/tutorial_bipedalwalker_a3c_continuous_action.py
+++ b/example/tutorial_bipedalwalker_a3c_continuous_action.py
@@ -67,6 +67,7 @@
 
 
 class ACNet(object):
+
     def __init__(self, scope, globalAC=None):
         self.scope = scope
         if scope == GLOBAL_NET_SCOPE:
@@ -144,7 +145,8 @@ def _build_net(self):
             self.v = v.outputs
 
     def update_global(self, feed_dict):  # run by a local
-        _, _, t = sess.run([self.update_a_op, self.update_c_op, self.test], feed_dict)  # local grads applies to global net
+        _, _, t = sess.run([self.update_a_op, self.update_c_op, self.test],
+                           feed_dict)  # local grads applies to global net
         return t
 
     def pull_global(self):  # run by a local
@@ -156,7 +158,10 @@ def choose_action(self, s):  # run by a local
 
     def save_ckpt(self):
         tl.files.exists_or_mkdir(self.scope)
-        tl.files.save_ckpt(sess=sess, mode_name='model.ckpt', var_list=self.a_params + self.c_params, save_dir=self.scope, printable=True)
+        tl.files.save_ckpt(
+            sess=sess, mode_name='model.ckpt', var_list=self.a_params + self.c_params, save_dir=self.scope,
+            printable=True
+        )
 
     def load_ckpt(self):
         tl.files.load_ckpt(sess=sess, var_list=self.a_params + self.c_params, save_dir=self.scope, printable=True)
@@ -164,6 +169,7 @@ def load_ckpt(self):
 
 
 class Worker(object):
+
     def __init__(self, name, globalAC):
         self.env = gym.make(GAME)
         self.name = name
@@ -202,7 +208,10 @@ def work(self):
                         buffer_v_target.append(v_s_)
                     buffer_v_target.reverse()
 
-                    buffer_s, buffer_a, buffer_v_target = np.vstack(buffer_s), np.vstack(buffer_a), np.vstack(buffer_v_target)
+                    buffer_s, buffer_a, buffer_v_target = (
+                        np.vstack(buffer_s), np.vstack(buffer_a), np.vstack(buffer_v_target)
+                    )
+
                     feed_dict = {
                         self.AC.s: buffer_s,
                         self.AC.a_his: buffer_a,
diff --git a/example/tutorial_cartpole_ac.py b/example/tutorial_cartpole_ac.py
index b39cdbd3e..b7fd64ba7 100644
--- a/example/tutorial_cartpole_ac.py
+++ b/example/tutorial_cartpole_ac.py
@@ -69,6 +69,7 @@
 
 
 class Actor(object):
+
     def __init__(self, sess, n_features, n_actions, lr=0.001):
         self.sess = sess
         self.s = tf.placeholder(tf.float32, [1, n_features], "state")
@@ -85,7 +86,9 @@ def __init__(self, sess, n_features, n_actions, lr=0.001):
 
         # Hao Dong
         with tf.variable_scope('loss'):
-            self.exp_v = tl.rein.cross_entropy_reward_loss(logits=self.acts_logits, actions=self.a, rewards=self.td_error, name='actor_weighted_loss')
+            self.exp_v = tl.rein.cross_entropy_reward_loss(
+                logits=self.acts_logits, actions=self.a, rewards=self.td_error, name='actor_weighted_loss'
+            )
 
         with tf.variable_scope('train'):
             self.train_op = tf.train.AdamOptimizer(lr).minimize(self.exp_v)
@@ -113,6 +116,7 @@ def choose_action_greedy(self, s):
 
 
 class Critic(object):
+
     def __init__(self, sess, n_features, lr=0.01):
         self.sess = sess
         self.s = tf.placeholder(tf.float32, [1, n_features], "state")
@@ -143,7 +147,9 @@ def learn(self, s, r, s_):
 sess = tf.Session()
 
 actor = Actor(sess, n_features=N_F, n_actions=N_A, lr=LR_A)
-critic = Critic(sess, n_features=N_F, lr=LR_C)  # we need a good teacher, so the teacher should learn faster than the actor
+critic = Critic(
+    sess, n_features=N_F, lr=LR_C
+)  # we need a good teacher, so the teacher should learn faster than the actor
 
 tl.layers.initialize_global_variables(sess)
 
@@ -187,7 +193,10 @@ def learn(self, s, r, s_):
                 running_reward = running_reward * 0.95 + ep_rs_sum * 0.05
             # start rending if running_reward greater than a threshold
             # if running_reward > DISPLAY_REWARD_THRESHOLD: RENDER = True
-            print("Episode: %d reward: %f running_reward %f took: %.5f" % (i_episode, ep_rs_sum, running_reward, time.time() - episode_time))
+            print(
+                "Episode: %d reward: %f running_reward %f took: %.5f" %
+                (i_episode, ep_rs_sum, running_reward, time.time() - episode_time)
+            )
 
             # Early Stopping for quick check
             if t >= MAX_EP_STEPS:
diff --git a/example/tutorial_cifar10.py b/example/tutorial_cifar10.py
index 5ffd5dfff..8f1c5b80b 100644
--- a/example/tutorial_cifar10.py
+++ b/example/tutorial_cifar10.py
@@ -44,9 +44,15 @@ def model(x, y_, reuse):
         # net = PoolLayer(net, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
         #             padding='SAME', pool = tf.nn.max_pool, name ='pool2') # output: (batch_size, 6, 6, 64)
         net = FlattenLayer(net, name='flatten')  # output: (batch_size, 2304)
-        net = DenseLayer(net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu')  # output: (batch_size, 384)
-        net = DenseLayer(net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu')  # output: (batch_size, 192)
-        net = DenseLayer(net, n_units=10, act=tf.identity, W_init=tf.truncated_normal_initializer(stddev=1 / 192.0), name='output')  # output: (batch_size, 10)
+        net = DenseLayer(
+            net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu'
+        )  # output: (batch_size, 384)
+        net = DenseLayer(
+            net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu'
+        )  # output: (batch_size, 192)
+        net = DenseLayer(
+            net, n_units=10, act=tf.identity, W_init=tf.truncated_normal_initializer(stddev=1 / 192.0), name='output'
+        )  # output: (batch_size, 10)
         y = net.outputs
 
         ce = tl.cost.cross_entropy(y, y_, name='cost')
@@ -89,9 +95,15 @@ def model_batch_norm(x, y_, reuse, is_train):
         #            padding='SAME', pool = tf.nn.max_pool, name ='pool2')  # output: (batch_size, 6, 6, 64)
 
         net = FlattenLayer(net, name='flatten')  # output: (batch_size, 2304)
-        net = DenseLayer(net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu')  # output: (batch_size, 384)
-        net = DenseLayer(net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu')  # output: (batch_size, 192)
-        net = DenseLayer(net, n_units=10, act=tf.identity, W_init=tf.truncated_normal_initializer(stddev=1 / 192.0), name='output')  # output: (batch_size, 10)
+        net = DenseLayer(
+            net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu'
+        )  # output: (batch_size, 384)
+        net = DenseLayer(
+            net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu'
+        )  # output: (batch_size, 192)
+        net = DenseLayer(
+            net, n_units=10, act=tf.identity, W_init=tf.truncated_normal_initializer(stddev=1 / 192.0), name='output'
+        )  # output: (batch_size, 10)
         y = net.outputs
 
         ce = tl.cost.cross_entropy(y, y_, name='cost')
@@ -136,11 +148,8 @@ def distort_fn(x, is_train=False):
     return x
 
 
-x = tf.placeholder(tf.float32, shape=[None, 24, 24, 3], name='x')
-y_ = tf.placeholder(
-    tf.int64, shape=[
-        None,
-    ], name='y_')
+x = tf.placeholder(dtype=tf.float32, shape=[None, 24, 24, 3], name='x')
+y_ = tf.placeholder(dtype=tf.int64, shape=[None], name='y_')
 
 ## using local response normalization
 # network, cost, _ = model(x, y_, False)
@@ -156,7 +165,8 @@ def distort_fn(x, is_train=False):
 batch_size = 128
 
 train_params = network.all_params
-train_op = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False).minimize(cost, var_list=train_params)
+train_op = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08,
+                                  use_locking=False).minimize(cost, var_list=train_params)
 
 tl.layers.initialize_global_variables(sess)
 
diff --git a/example/tutorial_cifar10_tfrecord.py b/example/tutorial_cifar10_tfrecord.py
index 7f15c2e3a..62be4c5d9 100644
--- a/example/tutorial_cifar10_tfrecord.py
+++ b/example/tutorial_cifar10_tfrecord.py
@@ -84,7 +84,9 @@ def data_to_tfrecord(images, labels, filename):
                 feature={
                     "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label])),
                     'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])),
-                }))
+                }
+            )
+        )
         writer.write(example.SerializeToString())  # Serialize To String
     writer.close()
 
@@ -98,12 +100,13 @@ def read_and_decode(filename, is_train=None):
         serialized_example, features={
             'label': tf.FixedLenFeature([], tf.int64),
             'img_raw': tf.FixedLenFeature([], tf.string),
-        })
+        }
+    )
     # You can do more image distortion here for training data
     img = tf.decode_raw(features['img_raw'], tf.float32)
     img = tf.reshape(img, [32, 32, 3])
     # img = tf.cast(img, tf.float32) #* (1. / 255) - 0.5
-    if is_train == True:
+    if is_train ==True:
         # 1. Randomly crop a [height, width] section of the image.
         img = tf.random_crop(img, [24, 24, 3])
         # 2. Randomly flip the image horizontally.
@@ -175,9 +178,12 @@ def read_and_decode(filename, is_train=None):
     x_test_, y_test_ = read_and_decode("test.cifar10", False)
 
     x_train_batch, y_train_batch = tf.train.shuffle_batch(
-        [x_train_, y_train_], batch_size=batch_size, capacity=2000, min_after_dequeue=1000, num_threads=32)  # set the number of threads here
+        [x_train_, y_train_], batch_size=batch_size, capacity=2000, min_after_dequeue=1000, num_threads=32
+    )  # set the number of threads here
     # for testing, uses batch instead of shuffle_batch
-    x_test_batch, y_test_batch = tf.train.batch([x_test_, y_test_], batch_size=batch_size, capacity=50000, num_threads=32)
+    x_test_batch, y_test_batch = tf.train.batch(
+        [x_test_, y_test_], batch_size=batch_size, capacity=50000, num_threads=32
+    )
 
     def model(x_crop, y_, reuse):
         """ For more simplified CNN APIs, check tensorlayer.org """
@@ -208,9 +214,15 @@ def model(x_crop, y_, reuse):
             # net = PoolLayer(net, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
             #             padding='SAME', pool = tf.nn.max_pool, name ='pool2') # output: (batch_size, 6, 6, 64)
             net = FlattenLayer(net, name='flatten')  # output: (batch_size, 2304)
-            net = DenseLayer(net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu')  # output: (batch_size, 384)
-            net = DenseLayer(net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu')  # output: (batch_size, 192)
-            net = DenseLayer(net, n_units=10, act=tf.identity, W_init=W_init2, name='output')  # output: (batch_size, 10)
+            net = DenseLayer(
+                net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu'
+            )  # output: (batch_size, 384)
+            net = DenseLayer(
+                net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu'
+            )  # output: (batch_size, 192)
+            net = DenseLayer(
+                net, n_units=10, act=tf.identity, W_init=W_init2, name='output'
+            )  # output: (batch_size, 10)
             y = net.outputs
 
             ce = tl.cost.cross_entropy(y, y_, name='cost')
@@ -253,9 +265,15 @@ def model_batch_norm(x_crop, y_, reuse, is_train):
             #            padding='SAME', pool = tf.nn.max_pool, name ='pool2')  # output: (batch_size, 6, 6, 64)
 
             net = FlattenLayer(net, name='flatten')  # output: (batch_size, 2304)
-            net = DenseLayer(net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu')  # output: (batch_size, 384)
-            net = DenseLayer(net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu')  # output: (batch_size, 192)
-            net = DenseLayer(net, n_units=10, act=tf.identity, W_init=W_init2, name='output')  # output: (batch_size, 10)
+            net = DenseLayer(
+                net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu'
+            )  # output: (batch_size, 384)
+            net = DenseLayer(
+                net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu'
+            )  # output: (batch_size, 192)
+            net = DenseLayer(
+                net, n_units=10, act=tf.identity, W_init=W_init2, name='output'
+            )  # output: (batch_size, 10)
             y = net.outputs
 
             ce = tl.cost.cross_entropy(y, y_, name='cost')
@@ -325,7 +343,10 @@ def model_batch_norm(x_crop, y_, reuse, is_train):
             n_batch += 1
 
         if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
-            print("Epoch %d : Step %d-%d of %d took %fs" % (epoch, step, step + n_step_epoch, n_step, time.time() - start_time))
+            print(
+                "Epoch %d : Step %d-%d of %d took %fs" %
+                (epoch, step, step + n_step_epoch, n_step, time.time() - start_time)
+            )
             print("   train loss: %f" % (train_loss / n_batch))
             print("   train acc: %f" % (train_acc / n_batch))
 
diff --git a/example/tutorial_dorefanet_cifar10_tfrecord.py b/example/tutorial_dorefanet_cifar10_tfrecord.py
index 879913c61..63b76d855 100644
--- a/example/tutorial_dorefanet_cifar10_tfrecord.py
+++ b/example/tutorial_dorefanet_cifar10_tfrecord.py
@@ -83,7 +83,9 @@ def data_to_tfrecord(images, labels, filename):
                 feature={
                     "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label])),
                     'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])),
-                }))
+                }
+            )
+        )
         writer.write(example.SerializeToString())  # Serialize To String
     writer.close()
 
@@ -97,12 +99,13 @@ def read_and_decode(filename, is_train=None):
         serialized_example, features={
             'label': tf.FixedLenFeature([], tf.int64),
             'img_raw': tf.FixedLenFeature([], tf.string),
-        })
+        }
+    )
     # You can do more image distortion here for training data
     img = tf.decode_raw(features['img_raw'], tf.float32)
     img = tf.reshape(img, [32, 32, 3])
     # img = tf.cast(img, tf.float32) #* (1. / 255) - 0.5
-    if is_train == True:
+    if is_train ==True:
         # 1. Randomly crop a [height, width] section of the image.
         img = tf.random_crop(img, [24, 24, 3])
         # 2. Randomly flip the image horizontally.
@@ -147,9 +150,12 @@ def read_and_decode(filename, is_train=None):
     x_test_, y_test_ = read_and_decode("test.cifar10", False)
 
     x_train_batch, y_train_batch = tf.train.shuffle_batch(
-        [x_train_, y_train_], batch_size=batch_size, capacity=2000, min_after_dequeue=1000, num_threads=32)  # set the number of threads here
+        [x_train_, y_train_], batch_size=batch_size, capacity=2000, min_after_dequeue=1000, num_threads=32
+    )  # set the number of threads here
     # for testing, uses batch instead of shuffle_batch
-    x_test_batch, y_test_batch = tf.train.batch([x_test_, y_test_], batch_size=batch_size, capacity=50000, num_threads=32)
+    x_test_batch, y_test_batch = tf.train.batch(
+        [x_test_, y_test_], batch_size=batch_size, capacity=50000, num_threads=32
+    )
 
     def model(x_crop, y_, reuse):
         """ For more simplified CNN APIs, check tensorlayer.org """
@@ -160,14 +166,26 @@ def model(x_crop, y_, reuse):
             net = tl.layers.InputLayer(x_crop, name='input')
             net = tl.layers.Conv2d(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', W_init=W_init, name='cnn1')
             net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool1')
-            net = tl.layers.LocalResponseNormLayer(net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')
-            net = tl.layers.DorefaConv2d(net, 1, 3, 64, (5, 5), (1, 1), tf.nn.relu, padding='SAME', W_init=W_init, name='cnn2')
-            net = tl.layers.LocalResponseNormLayer(net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')
+            net = tl.layers.LocalResponseNormLayer(
+                net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1'
+            )
+            net = tl.layers.DorefaConv2d(
+                net, 1, 3, 64, (5, 5), (1, 1), tf.nn.relu, padding='SAME', W_init=W_init, name='cnn2'
+            )
+            net = tl.layers.LocalResponseNormLayer(
+                net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2'
+            )
             net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool2')
             net = tl.layers.FlattenLayer(net, name='flatten')  # output: (batch_size, 2304)
-            net = tl.layers.DorefaDenseLayer(net, 1, 3, 384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu')  # output: (batch_size, 384)
-            net = tl.layers.DorefaDenseLayer(net, 1, 3, 192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu')  # output: (batch_size, 192)
-            net = tl.layers.DenseLayer(net, n_units=10, act=tf.identity, W_init=W_init2, name='output')  # output: (batch_size, 10)
+            net = tl.layers.DorefaDenseLayer(
+                net, 1, 3, 384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu'
+            )  # output: (batch_size, 384)
+            net = tl.layers.DorefaDenseLayer(
+                net, 1, 3, 192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu'
+            )  # output: (batch_size, 192)
+            net = tl.layers.DenseLayer(
+                net, n_units=10, act=tf.identity, W_init=W_init2, name='output'
+            )  # output: (batch_size, 10)
             y = net.outputs
 
             ce = tl.cost.cross_entropy(y, y_, name='cost')
@@ -197,9 +215,15 @@ def model_batch_norm(x_crop, y_, reuse, is_train):
             net = tl.layers.BatchNormLayer(net, is_train, act=tf.nn.relu, name='batch2')
             net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool2')
             net = tl.layers.FlattenLayer(net, name='flatten')  # output: (batch_size, 2304)
-            net = tl.layers.DenseLayer(net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu')  # output: (batch_size, 384)
-            net = tl.layers.DenseLayer(net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu')  # output: (batch_size, 192)
-            net = tl.layers.DenseLayer(net, n_units=10, act=tf.identity, W_init=W_init2, name='output')  # output: (batch_size, 10)
+            net = tl.layers.DenseLayer(
+                net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu'
+            )  # output: (batch_size, 384)
+            net = tl.layers.DenseLayer(
+                net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu'
+            )  # output: (batch_size, 192)
+            net = tl.layers.DenseLayer(
+                net, n_units=10, act=tf.identity, W_init=W_init2, name='output'
+            )  # output: (batch_size, 10)
             y = net.outputs
             ce = tl.cost.cross_entropy(y, y_, name='cost')
             # L2 for the MLP, without this, the accuracy will be reduced by 15%.
@@ -268,7 +292,10 @@ def model_batch_norm(x_crop, y_, reuse, is_train):
             n_batch += 1
 
         if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
-            print("Epoch %d : Step %d-%d of %d took %fs" % (epoch, step, step + n_step_epoch, n_step, time.time() - start_time))
+            print(
+                "Epoch %d : Step %d-%d of %d took %fs" %
+                (epoch, step, step + n_step_epoch, n_step, time.time() - start_time)
+            )
             print("   train loss: %f" % (train_loss / n_batch))
             print("   train acc: %f" % (train_acc / n_batch))
 
diff --git a/example/tutorial_frozenlake_dqn.py b/example/tutorial_frozenlake_dqn.py
index d564fe6c5..11ebc4d26 100644
--- a/example/tutorial_frozenlake_dqn.py
+++ b/example/tutorial_frozenlake_dqn.py
@@ -48,9 +48,13 @@ def to_one_hot(i, n_classes=None):
 # 4x4 grid can be represented by one-hot vector with 16 integers.
 inputs = tf.placeholder(shape=[1, 16], dtype=tf.float32)
 net = InputLayer(inputs, name='observation')
-net = DenseLayer(net, n_units=4, act=tf.identity, W_init=tf.random_uniform_initializer(0, 0.01), b_init=None, name='q_a_s')
+net = DenseLayer(
+    net, n_units=4, act=tf.identity, W_init=tf.random_uniform_initializer(0, 0.01), b_init=None, name='q_a_s'
+)
 y = net.outputs  # action-value / rewards of 4 actions
-predict = tf.argmax(y, 1)  # chose action greedily with reward. in Q-Learning, policy is greedy, so we use "max" to select the next action.
+predict = tf.argmax(
+    y, 1
+)  # chose action greedily with reward. in Q-Learning, policy is greedy, so we use "max" to select the next action.
 
 ## Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values.
 nextQ = tf.placeholder(shape=[1, 4], dtype=tf.float32)
@@ -93,11 +97,13 @@ def to_one_hot(i, n_classes=None):
             rAll += r
             s = s1
             ## Reduce chance of random action if an episode is done.
-            if d == True:
+            if d ==True:
                 e = 1. / ((i / 50) + 10)  # reduce e, GLIE: Greey in the limit with infinite Exploration
                 break
 
         ## Note that, the rewards here with random action
         running_reward = rAll if running_reward is None else running_reward * 0.99 + rAll * 0.01
-        print("Episode [%d/%d] sum reward:%f running reward:%f took:%.5fs %s" % (i, num_episodes, rAll, running_reward, time.time() - episode_time, ''
-                                                                                 if rAll == 0 else ' !!!!!!!!'))
+        print(
+            "Episode [%d/%d] sum reward:%f running reward:%f took:%.5fs %s" %
+            (i, num_episodes, rAll, running_reward, time.time() - episode_time, '' if rAll == 0 else ' !!!!!!!!')
+        )
diff --git a/example/tutorial_frozenlake_q_table.py b/example/tutorial_frozenlake_q_table.py
index 4de85a6a1..f63127e76 100644
--- a/example/tutorial_frozenlake_q_table.py
+++ b/example/tutorial_frozenlake_q_table.py
@@ -48,11 +48,13 @@
         Q[s, a] = Q[s, a] + lr * (r + lambd * np.max(Q[s1, :]) - Q[s, a])
         rAll += r
         s = s1
-        if d == True:
+        if d ==True:
             break
     rList.append(rAll)
     running_reward = r if running_reward is None else running_reward * 0.99 + r * 0.01
-    print("Episode [%d/%d] sum reward:%f running reward:%f took:%.5fs %s" % (i, num_episodes, rAll, running_reward, time.time() - episode_time, ''
-                                                                             if rAll == 0 else ' !!!!!!!!'))
+    print(
+        "Episode [%d/%d] sum reward:%f running reward:%f took:%.5fs %s" %
+        (i, num_episodes, rAll, running_reward, time.time() - episode_time, '' if rAll == 0 else ' !!!!!!!!')
+    )
 
 print("Final Q-Table Values:/n %s" % Q)
diff --git a/example/tutorial_generate_text.py b/example/tutorial_generate_text.py
index ae8122291..7ff5a5c7c 100644
--- a/example/tutorial_generate_text.py
+++ b/example/tutorial_generate_text.py
@@ -155,7 +155,9 @@ def main_restore_embedding_layer():
 
     x = tf.placeholder(tf.int32, shape=[batch_size])
 
-    emb_net = tl.layers.EmbeddingInputlayer(inputs=x, vocabulary_size=vocabulary_size, embedding_size=embedding_size, name='embedding_layer')
+    emb_net = tl.layers.EmbeddingInputlayer(
+        inputs=x, vocabulary_size=vocabulary_size, embedding_size=embedding_size, name='embedding_layer'
+    )
 
     # sess.run(tf.initialize_all_variables())
     tl.layers.initialize_global_variables(sess)
@@ -231,22 +233,20 @@ def inference(x, is_train, sequence_length, reuse=None):
         print("\nsequence_length: %d, is_train: %s, reuse: %s" % (sequence_length, is_train, reuse))
         rnn_init = tf.random_uniform_initializer(-init_scale, init_scale)
         with tf.variable_scope("model", reuse=reuse):
-            network = EmbeddingInputlayer(inputs=x, vocabulary_size=vocab_size, embedding_size=hidden_size, E_init=rnn_init, name='embedding')
+            network = EmbeddingInputlayer(
+                inputs=x, vocabulary_size=vocab_size, embedding_size=hidden_size, E_init=rnn_init, name='embedding'
+            )
             network = RNNLayer(
-                network,
-                cell_fn=tf.contrib.rnn.BasicLSTMCell,
-                cell_init_args={
+                network, cell_fn=tf.contrib.rnn.BasicLSTMCell, cell_init_args={
                     'forget_bias': 0.0,
                     'state_is_tuple': True
-                },
-                n_hidden=hidden_size,
-                initializer=rnn_init,
-                n_steps=sequence_length,
-                return_last=False,
-                return_seq_2d=True,
-                name='lstm1')
+                }, n_hidden=hidden_size, initializer=rnn_init, n_steps=sequence_length, return_last=False,
+                return_seq_2d=True, name='lstm1'
+            )
             lstm1 = network
-            network = DenseLayer(network, n_units=vocab_size, W_init=rnn_init, b_init=rnn_init, act=tf.identity, name='output')
+            network = DenseLayer(
+                network, n_units=vocab_size, W_init=rnn_init, b_init=rnn_init, act=tf.identity, name='output'
+            )
         return network, lstm1
 
     # Inference for Training
@@ -267,7 +267,9 @@ def loss_fn(outputs, targets, batch_size, sequence_length):
         # n_examples = batch_size * sequence_length
         # so
         # cost is the averaged cost of each mini-batch (concurrent process).
-        loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example([outputs], [tf.reshape(targets, [-1])], [tf.ones([batch_size * sequence_length])])
+        loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
+            [outputs], [tf.reshape(targets, [-1])], [tf.ones([batch_size * sequence_length])]
+        )
         cost = tf.reduce_sum(loss) / batch_size
         return cost
 
@@ -311,13 +313,16 @@ def loss_fn(outputs, targets, batch_size, sequence_length):
                     input_data: x,
                     targets: y,
                     lstm1.initial_state: state1,
-                })
+                }
+            )
             costs += _cost
             iters += sequence_length
 
             if step % (epoch_size // 10) == 1:
-                print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters),
-                                                                 iters * batch_size / (time.time() - start_time)))
+                print(
+                    "%.3f perplexity: %.3f speed: %.0f wps" %
+                    (step * 1.0 / epoch_size, np.exp(costs / iters), iters * batch_size / (time.time() - start_time))
+                )
         train_perplexity = np.exp(costs / iters)
         # print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
         print("Epoch: %d/%d Train Perplexity: %.3f" % (i + 1, max_max_epoch, train_perplexity))
@@ -338,7 +343,8 @@ def loss_fn(outputs, targets, batch_size, sequence_length):
                     ], feed_dict={
                         input_data_test: a_id,
                         lstm1_test.initial_state: state1,
-                    })
+                    }
+                )
             # feed the last word in seed, and start to generate sentence.
             a_id = outs_id[-1]
             for _ in range(print_length):
@@ -347,7 +353,8 @@ def loss_fn(outputs, targets, batch_size, sequence_length):
                     [y_soft, lstm1_test.final_state], feed_dict={
                         input_data_test: a_id,
                         lstm1_test.initial_state: state1,
-                    })
+                    }
+                )
                 ## Without sampling
                 # a_id = np.argmax(out[0])
                 ## Sample from all words, if vocab_size is large,
diff --git a/example/tutorial_imagenet_inceptionV3_distributed.py b/example/tutorial_imagenet_inceptionV3_distributed.py
index 37b548ed4..fc9495e74 100644
--- a/example/tutorial_imagenet_inceptionV3_distributed.py
+++ b/example/tutorial_imagenet_inceptionV3_distributed.py
@@ -131,7 +131,9 @@ def _parse_example_fn(line):
         return image_bytes, one_hot_labels
 
     def _map_fn(example_serialized):
-        image_bytes, one_hot_labels = tf.py_func(_parse_example_fn, [example_serialized], [tf.string, tf.float32], stateful=False)
+        image_bytes, one_hot_labels = tf.py_func(
+            _parse_example_fn, [example_serialized], [tf.string, tf.float32], stateful=False
+        )
 
         image = tf.image.decode_jpeg(image_bytes, channels=3)
         image = tf.image.resize_images(image, size=[image_size, image_size])
@@ -161,7 +163,8 @@ def build_network(image_input, num_classes=1001, is_training=False):
             prev_layer=net_in, slim_layer=inception_v3, slim_args={
                 'num_classes': num_classes,
                 'is_training': is_training
-            }, name='InceptionV3')
+            }, name='InceptionV3'
+        )
 
     predictions = tf.nn.sigmoid(network.outputs, name='Predictions')
     return network, predictions
@@ -171,11 +174,13 @@ def build_network(image_input, num_classes=1001, is_training=False):
 
 
 class EvaluatorStops(Exception):
+
     def __init__(self, message):
         super(EvaluatorStops, self).__init__(message)
 
 
 class EvaluatorHook(session_run_hook.SessionRunHook):
+
     def __init__(self, checkpoints_path, saver):
         self.checkpoints_path = checkpoints_path
         self.summary_writer = tf.summary.FileWriter(os.path.join(checkpoints_path, 'validation'))
@@ -350,7 +355,8 @@ def run_worker(task_spec, checkpoints_path, batch_size=32, epochs=10):
                 decay_steps=steps_per_epoch * 2,  # 2 epochs
                 decay_rate=0.94,
                 staircase=True,
-                name='learning_rate')
+                name='learning_rate'
+            )
             optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=0.9, epsilon=1.0)
             # clip and apply gradients
             gvs = optimizer.compute_gradients(loss=loss, var_list=network.all_params)
@@ -363,15 +369,17 @@ def run_worker(task_spec, checkpoints_path, batch_size=32, epochs=10):
             # metrics
             tf.summary.scalar('learning_rate/value', learning_rate)
             tf.summary.scalar('loss/logits', loss)
-            _, metrics_average_ops, metrics_ops = calculate_metrics(predicted_batch=predictions, real_batch=one_hot_classes, is_training=True)
+            _, metrics_average_ops, metrics_ops = calculate_metrics(
+                predicted_batch=predictions, real_batch=one_hot_classes, is_training=True
+            )
             with tf.control_dependencies([train_op]):
                 train_op = tf.group(metrics_average_ops)
 
         # start training
         hooks = [StopAtStepHook(last_step=steps_per_epoch * epochs)]
-        with tl.distributed.DistributedSession(
-                task_spec=task_spec, hooks=hooks, checkpoint_dir=checkpoints_path, save_summaries_secs=None, save_summaries_steps=300,
-                save_checkpoint_secs=60 * 60) as sess:
+        with tl.distributed.DistributedSession(task_spec=task_spec, hooks=hooks, checkpoint_dir=checkpoints_path,
+                                               save_summaries_secs=None, save_summaries_steps=300,
+                                               save_checkpoint_secs=60 * 60) as sess:
             # print network information
             if task_spec is None or task_spec.is_master():
                 network.print_params(False, session=sess)
@@ -392,7 +400,9 @@ def run_worker(task_spec, checkpoints_path, batch_size=32, epochs=10):
                             current_epoch = '{:.3f}'.format(float(step) / steps_per_epoch)
                             max_steps = epochs * steps_per_epoch
                             m = 'Epoch: {}/{} Steps: {}/{} Loss: {} Learning rate: {} Metrics: {}'
-                            logging.info(m.format(current_epoch, epochs, step, max_steps, loss_val, learning_rate_val, metrics))
+                            logging.info(
+                                m.format(current_epoch, epochs, step, max_steps, loss_val, learning_rate_val, metrics)
+                            )
             except OutOfRangeError:
                 pass
 
diff --git a/example/tutorial_imdb_fasttext.py b/example/tutorial_imdb_fasttext.py
index 182e063e3..eecc6c825 100644
--- a/example/tutorial_imdb_fasttext.py
+++ b/example/tutorial_imdb_fasttext.py
@@ -139,7 +139,8 @@ def train_test_and_save_model():
                     classifier.train_op, feed_dict={
                         classifier.inputs: tl.prepro.pad_sequences(X_batch),
                         classifier.labels: y_batch,
-                    })
+                    }
+                )
 
             print("     took %.5fs" % (time.time() - start_time))
 
@@ -147,7 +148,8 @@ def train_test_and_save_model():
             classifier.accuracy, feed_dict={
                 classifier.inputs: tl.prepro.pad_sequences(X_test),
                 classifier.labels: y_test,
-            })
+            }
+        )
         print('Test accuracy: %.5f' % test_accuracy)
 
         classifier.save(sess, MODEL_FILE_PATH)
diff --git a/example/tutorial_inceptionV3_tfslim.py b/example/tutorial_inceptionV3_tfslim.py
index d704776e7..69a45769c 100644
--- a/example/tutorial_inceptionV3_tfslim.py
+++ b/example/tutorial_inceptionV3_tfslim.py
@@ -41,7 +41,9 @@
 try:
     from data.imagenet_classes import *
 except Exception as e:
-    raise Exception("{} / download the file from: https://github.com/zsdonghao/tensorlayer/tree/master/example/data".format(e))
+    raise Exception(
+        "{} / download the file from: https://github.com/zsdonghao/tensorlayer/tree/master/example/data".format(e)
+    )
 
 
 def load_image(path):
@@ -122,7 +124,9 @@ def print_prob(prob):
 
 saver = tf.train.Saver()
 if not os.path.isfile("inception_v3.ckpt"):
-    raise Exception("Please download inception_v3 ckpt from : https://github.com/tensorflow/models/tree/master/research/slim")
+    raise Exception(
+        "Please download inception_v3 ckpt from : https://github.com/tensorflow/models/tree/master/research/slim"
+    )
 
 try:  # TF12+
     saver.restore(sess, "./inception_v3.ckpt")
@@ -132,7 +136,9 @@ def print_prob(prob):
 
 y = network.outputs
 probs = tf.nn.softmax(y)
-img1 = load_image("data/puzzle.jpeg")  # test data in github: https://github.com/zsdonghao/tensorlayer/tree/master/example/data
+img1 = load_image(
+    "data/puzzle.jpeg"
+)  # test data in github: https://github.com/zsdonghao/tensorlayer/tree/master/example/data
 img1 = img1.reshape((1, 299, 299, 3))
 
 prob = sess.run(probs, feed_dict={x: img1})  # the 1st time need time to compile
diff --git a/example/tutorial_keras.py b/example/tutorial_keras.py
index 481d211a8..e3eefaee5 100644
--- a/example/tutorial_keras.py
+++ b/example/tutorial_keras.py
@@ -43,7 +43,8 @@ def keras_block(x):
 learning_rate = 0.0001
 
 train_params = network.all_params
-train_op = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False).minimize(cost, var_list=train_params)
+train_op = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08,
+                                  use_locking=False).minimize(cost, var_list=train_params)
 
 tl.layers.initialize_global_variables(sess)
 
diff --git a/example/tutorial_mlp_dropout2.py b/example/tutorial_mlp_dropout2.py
index 5ed028cf2..0f70e2225 100644
--- a/example/tutorial_mlp_dropout2.py
+++ b/example/tutorial_mlp_dropout2.py
@@ -41,7 +41,8 @@ def mlp(x, is_train=True, reuse=False):
 
 # define the optimizer
 train_params = tl.layers.get_variables_with_name('MLP', train_only=True, printable=False)
-train_op = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False).minimize(cost, var_list=train_params)
+train_op = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.9, beta2=0.999, epsilon=1e-08,
+                                  use_locking=False).minimize(cost, var_list=train_params)
 
 # initialize all variables in the session
 tl.layers.initialize_global_variables(sess)
diff --git a/example/tutorial_mnist.py b/example/tutorial_mnist.py
index c5420320b..174068074 100644
--- a/example/tutorial_mnist.py
+++ b/example/tutorial_mnist.py
@@ -49,8 +49,12 @@ def main_test_layers(model='relu'):
         network = tl.layers.DenseLayer(network, n_units=10, act=tf.identity, name='output')
     elif model == 'dropconnect':
         network = tl.layers.InputLayer(x, name='input')
-        network = tl.layers.DropconnectDenseLayer(network, keep=0.8, n_units=800, act=tf.nn.relu, name='dropconnect_relu1')
-        network = tl.layers.DropconnectDenseLayer(network, keep=0.5, n_units=800, act=tf.nn.relu, name='dropconnect_relu2')
+        network = tl.layers.DropconnectDenseLayer(
+            network, keep=0.8, n_units=800, act=tf.nn.relu, name='dropconnect_relu1'
+        )
+        network = tl.layers.DropconnectDenseLayer(
+            network, keep=0.5, n_units=800, act=tf.nn.relu, name='dropconnect_relu2'
+        )
         network = tl.layers.DropconnectDenseLayer(network, keep=0.5, n_units=10, act=tf.identity, name='output')
 
     # To print all attributes of a Layer.
@@ -189,7 +193,9 @@ def main_test_denoise_AE(model='relu'):
     ## pretrain
     print("Pre-train Layer 1")
     recon_layer1.pretrain(
-        sess, x=x, X_train=X_train, X_val=X_val, denoise_name='denoising1', n_epoch=200, batch_size=128, print_freq=10, save=True, save_name='w1pre_')
+        sess, x=x, X_train=X_train, X_val=X_val, denoise_name='denoising1', n_epoch=200, batch_size=128, print_freq=10,
+        save=True, save_name='w1pre_'
+    )
     # You can also disable denoisong by setting denoise_name=None.
     # recon_layer1.pretrain(sess, x=x, X_train=X_train, X_val=X_val,
     #                           denoise_name=None, n_epoch=500, batch_size=128,
@@ -260,9 +266,14 @@ def main_test_stacked_denoise_AE(model='relu'):
     network.print_params()
     print("\nPre-train Layer 1")
     recon_layer1.pretrain(
-        sess, x=x, X_train=X_train, X_val=X_val, denoise_name='denoising1', n_epoch=100, batch_size=128, print_freq=10, save=True, save_name='w1pre_')
+        sess, x=x, X_train=X_train, X_val=X_val, denoise_name='denoising1', n_epoch=100, batch_size=128, print_freq=10,
+        save=True, save_name='w1pre_'
+    )
     print("\nPre-train Layer 2")
-    recon_layer2.pretrain(sess, x=x, X_train=X_train, X_val=X_val, denoise_name='denoising1', n_epoch=100, batch_size=128, print_freq=10, save=False)
+    recon_layer2.pretrain(
+        sess, x=x, X_train=X_train, X_val=X_val, denoise_name='denoising1', n_epoch=100, batch_size=128, print_freq=10,
+        save=False
+    )
     print("\nAll Network Params after pre-train")
     network.print_params()
 
diff --git a/example/tutorial_mnist_distributed.py b/example/tutorial_mnist_distributed.py
index 0a5ad307e..d76f90daf 100644
--- a/example/tutorial_mnist_distributed.py
+++ b/example/tutorial_mnist_distributed.py
@@ -70,21 +70,9 @@
 
         # train the network
         tl.utils.fit(
-            sess,
-            network,
-            train_op,
-            cost,
-            X_train,
-            y_train,
-            x,
-            y_,
-            acc=acc,
-            batch_size=500,
-            n_epoch=500,
-            print_freq=print_freq,
-            X_val=X_val,
-            y_val=y_val,
-            eval_train=eval_train)
+            sess, network, train_op, cost, X_train, y_train, x, y_, acc=acc, batch_size=500, n_epoch=500,
+            print_freq=print_freq, X_val=X_val, y_val=y_val, eval_train=eval_train
+        )
 
         if task_spec.is_master():
             # evaluation
diff --git a/example/tutorial_mnist_float16.py b/example/tutorial_mnist_float16.py
index be77ae620..b7b5d66cc 100644
--- a/example/tutorial_mnist_float16.py
+++ b/example/tutorial_mnist_float16.py
@@ -62,8 +62,8 @@ def model(x, is_train=True, reuse=False):
     beta2=0.999,
     # epsilon=1e-08,    # for float32 as default
     epsilon=1e-4,  # for float16, see https://stackoverflow.com/questions/42064941/tensorflow-float16-support-is-broken
-    use_locking=False).minimize(
-        cost, var_list=train_params)
+    use_locking=False
+).minimize(cost, var_list=train_params)
 
 # initialize all variables in the session
 tl.layers.initialize_global_variables(sess)
diff --git a/example/tutorial_mnist_simple.py b/example/tutorial_mnist_simple.py
index 4bc8d6a1b..457774b68 100644
--- a/example/tutorial_mnist_simple.py
+++ b/example/tutorial_mnist_simple.py
@@ -45,7 +45,9 @@
 
 # train the network
 tl.utils.fit(
-    sess, network, train_op, cost, X_train, y_train, x, y_, acc=acc, batch_size=500, n_epoch=500, print_freq=5, X_val=X_val, y_val=y_val, eval_train=False)
+    sess, network, train_op, cost, X_train, y_train, x, y_, acc=acc, batch_size=500, n_epoch=500, print_freq=5,
+    X_val=X_val, y_val=y_val, eval_train=False
+)
 
 # evaluation
 tl.utils.test(sess, network, acc, X_test, y_test, x, y_, batch_size=None, cost=cost)
diff --git a/example/tutorial_mobilenet.py b/example/tutorial_mobilenet.py
index 3cc34c81e..3301cb620 100644
--- a/example/tutorial_mobilenet.py
+++ b/example/tutorial_mobilenet.py
@@ -13,7 +13,9 @@
 import tensorflow as tf
 
 import tensorlayer as tl
-from tensorlayer.layers import (BatchNormLayer, Conv2d, DepthwiseConv2d, FlattenLayer, GlobalMeanPool2d, InputLayer, ReshapeLayer)
+from tensorlayer.layers import (
+    BatchNormLayer, Conv2d, DepthwiseConv2d, FlattenLayer, GlobalMeanPool2d, InputLayer, ReshapeLayer
+)
 
 
 def conv_block(n, n_filter, filter_size=(3, 3), strides=(1, 1), is_train=False, name='conv_block'):
@@ -36,14 +38,18 @@ def depthwise_conv_block(n, n_filter, strides=(1, 1), is_train=False, name="dept
 def decode_predictions(preds, top=5):  # keras.applications.resnet50
     fpath = os.path.join("data", "imagenet_class_index.json")
     if tl.files.file_exists(fpath) is False:
-        raise Exception("{} / download imagenet_class_index.json from: https://github.com/zsdonghao/tensorlayer/tree/master/example/data")
+        raise Exception(
+            "{} / download imagenet_class_index.json from: https://github.com/zsdonghao/tensorlayer/tree/master/example/data"
+        )
     if isinstance(preds, np.ndarray) is False:
         preds = np.asarray(preds)
     if len(preds.shape) != 2 or preds.shape[1] != 1000:
-        raise ValueError('`decode_predictions` expects '
-                         'a batch of predictions '
-                         '(i.e. a 2D array of shape (samples, 1000)). '
-                         'Found array with shape: ' + str(preds.shape))
+        raise ValueError(
+            '`decode_predictions` expects '
+            'a batch of predictions '
+            '(i.e. a 2D array of shape (samples, 1000)). '
+            'Found array with shape: ' + str(preds.shape)
+        )
     with open(fpath) as f:
         CLASS_INDEX = json.load(f)
     results = []
diff --git a/example/tutorial_ptb_lstm.py b/example/tutorial_ptb_lstm.py
index b91eb432a..418bd587a 100644
--- a/example/tutorial_ptb_lstm.py
+++ b/example/tutorial_ptb_lstm.py
@@ -194,7 +194,9 @@ def inference(x, is_training, num_steps, reuse=None):
         print("\nnum_steps : %d, is_training : %s, reuse : %s" % (num_steps, is_training, reuse))
         initializer = tf.random_uniform_initializer(-init_scale, init_scale)
         with tf.variable_scope("model", reuse=reuse):
-            network = tl.layers.EmbeddingInputlayer(inputs=x, vocabulary_size=vocab_size, embedding_size=hidden_size, E_init=initializer, name='embedding')
+            network = tl.layers.EmbeddingInputlayer(
+                inputs=x, vocabulary_size=vocab_size, embedding_size=hidden_size, E_init=initializer, name='embedding'
+            )
             network = tl.layers.DropoutLayer(network, keep=keep_prob, is_fix=True, is_train=is_training, name='drop1')
             network = tl.layers.RNNLayer(
                 network,
@@ -204,7 +206,8 @@ def inference(x, is_training, num_steps, reuse=None):
                 initializer=initializer,
                 n_steps=num_steps,
                 return_last=False,
-                name='basic_lstm_layer1')
+                name='basic_lstm_layer1'
+            )
             lstm1 = network
             network = tl.layers.DropoutLayer(network, keep=keep_prob, is_fix=True, is_train=is_training, name='drop2')
             network = tl.layers.RNNLayer(
@@ -216,14 +219,17 @@ def inference(x, is_training, num_steps, reuse=None):
                 n_steps=num_steps,
                 return_last=False,
                 return_seq_2d=True,
-                name='basic_lstm_layer2')
+                name='basic_lstm_layer2'
+            )
             lstm2 = network
             # Alternatively, if return_seq_2d=False, in the above RNN layer,
             # you can reshape the outputs as follow:
             # network = tl.layers.ReshapeLayer(network,
             #       shape=[-1, int(network.outputs._shape[-1])], name='reshape')
             network = tl.layers.DropoutLayer(network, keep=keep_prob, is_fix=True, is_train=is_training, name='drop3')
-            network = tl.layers.DenseLayer(network, n_units=vocab_size, W_init=initializer, b_init=initializer, act=tf.identity, name='output')
+            network = tl.layers.DenseLayer(
+                network, n_units=vocab_size, W_init=initializer, b_init=initializer, act=tf.identity, name='output'
+            )
         return network, lstm1, lstm2
 
     # Inference for Training
@@ -299,13 +305,17 @@ def loss_fn(outputs, targets):  #, batch_size, num_steps):
             }
             # For training, enable dropout
             feed_dict.update(network.all_drop)
-            _cost, state1, state2, _ = sess.run([cost, lstm1.final_state, lstm2.final_state, train_op], feed_dict=feed_dict)
+            _cost, state1, state2, _ = sess.run(
+                [cost, lstm1.final_state, lstm2.final_state, train_op], feed_dict=feed_dict
+            )
             costs += _cost
             iters += num_steps
 
             if step % (epoch_size // 10) == 10:
-                print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters),
-                                                                 iters * batch_size / (time.time() - start_time)))
+                print(
+                    "%.3f perplexity: %.3f speed: %.0f wps" %
+                    (step * 1.0 / epoch_size, np.exp(costs / iters), iters * batch_size / (time.time() - start_time))
+                )
         train_perplexity = np.exp(costs / iters)
         print("Epoch: %d/%d Train Perplexity: %.3f" % (i + 1, max_max_epoch, train_perplexity))
 
@@ -323,7 +333,10 @@ def loss_fn(outputs, targets):  #, batch_size, num_steps):
                 lstm1_val.initial_state: state1,
                 lstm2_val.initial_state: state2,
             }
-            _cost, state1, state2, _ = sess.run([cost_val, lstm1_val.final_state, lstm2_val.final_state, tf.no_op()], feed_dict=feed_dict)
+            _cost, state1, state2, _ = sess.run(
+                [cost_val, lstm1_val.final_state, lstm2_val.final_state,
+                 tf.no_op()], feed_dict=feed_dict
+            )
             costs += _cost
             iters += num_steps
         valid_perplexity = np.exp(costs / iters)
@@ -345,7 +358,9 @@ def loss_fn(outputs, targets):  #, batch_size, num_steps):
             lstm1_test.initial_state: state1,
             lstm2_test.initial_state: state2,
         }
-        _cost, state1, state2 = sess.run([cost_test, lstm1_test.final_state, lstm2_test.final_state], feed_dict=feed_dict)
+        _cost, state1, state2 = sess.run(
+            [cost_test, lstm1_test.final_state, lstm2_test.final_state], feed_dict=feed_dict
+        )
         costs += _cost
         iters += 1
     test_perplexity = np.exp(costs / iters)
diff --git a/example/tutorial_ptb_lstm_state_is_tuple.py b/example/tutorial_ptb_lstm_state_is_tuple.py
index 045fa7e39..1202a4524 100644
--- a/example/tutorial_ptb_lstm_state_is_tuple.py
+++ b/example/tutorial_ptb_lstm_state_is_tuple.py
@@ -194,7 +194,9 @@ def inference(x, is_training, num_steps, reuse=None):
         print("\nnum_steps : %d, is_training : %s, reuse : %s" % (num_steps, is_training, reuse))
         initializer = tf.random_uniform_initializer(-init_scale, init_scale)
         with tf.variable_scope("model", reuse=reuse):
-            network = tl.layers.EmbeddingInputlayer(inputs=x, vocabulary_size=vocab_size, embedding_size=hidden_size, E_init=initializer, name='embedding')
+            network = tl.layers.EmbeddingInputlayer(
+                inputs=x, vocabulary_size=vocab_size, embedding_size=hidden_size, E_init=initializer, name='embedding'
+            )
             network = tl.layers.DropoutLayer(network, keep=keep_prob, is_fix=True, is_train=is_training, name='drop1')
             network = tl.layers.RNNLayer(
                 network,
@@ -207,7 +209,8 @@ def inference(x, is_training, num_steps, reuse=None):
                 initializer=initializer,
                 n_steps=num_steps,
                 return_last=False,
-                name='basic_lstm1')
+                name='basic_lstm1'
+            )
             lstm1 = network
             network = tl.layers.DropoutLayer(network, keep=keep_prob, is_fix=True, is_train=is_training, name='drop2')
             network = tl.layers.RNNLayer(
@@ -222,14 +225,17 @@ def inference(x, is_training, num_steps, reuse=None):
                 n_steps=num_steps,
                 return_last=False,
                 return_seq_2d=True,
-                name='basic_lstm2')
+                name='basic_lstm2'
+            )
             lstm2 = network
             # Alternatively, if return_seq_2d=False, in the above RNN layer,
             # you can reshape the outputs as follow:
             # network = tl.layers.ReshapeLayer(network,
             #       shape=[-1, int(network.outputs._shape[-1])], name='reshape')
             network = tl.layers.DropoutLayer(network, keep=keep_prob, is_fix=True, is_train=is_training, name='drop3')
-            network = tl.layers.DenseLayer(network, n_units=vocab_size, W_init=initializer, b_init=initializer, act=tf.identity, name='output')
+            network = tl.layers.DenseLayer(
+                network, n_units=vocab_size, W_init=initializer, b_init=initializer, act=tf.identity, name='output'
+            )
         return network, lstm1, lstm2
 
     # Inference for Training
@@ -323,8 +329,10 @@ def loss_fn(outputs, targets, batch_size):
             iters += num_steps
 
             if step % (epoch_size // 10) == 10:
-                print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters),
-                                                                 iters * batch_size / (time.time() - start_time)))
+                print(
+                    "%.3f perplexity: %.3f speed: %.0f wps" %
+                    (step * 1.0 / epoch_size, np.exp(costs / iters), iters * batch_size / (time.time() - start_time))
+                )
         train_perplexity = np.exp(costs / iters)
         print("Epoch: %d/%d Train Perplexity: %.3f" % (i + 1, max_max_epoch, train_perplexity))
 
diff --git a/example/tutorial_squeezenet.py b/example/tutorial_squeezenet.py
index d3950e2ee..238174633 100644
--- a/example/tutorial_squeezenet.py
+++ b/example/tutorial_squeezenet.py
@@ -15,14 +15,18 @@
 def decode_predictions(preds, top=5):  # keras.applications.resnet50
     fpath = os.path.join("data", "imagenet_class_index.json")
     if tl.files.file_exists(fpath) is False:
-        raise Exception("{} / download imagenet_class_index.json from: https://github.com/zsdonghao/tensorlayer/tree/master/example/data")
+        raise Exception(
+            "{} / download imagenet_class_index.json from: https://github.com/zsdonghao/tensorlayer/tree/master/example/data"
+        )
     if isinstance(preds, np.ndarray) is False:
         preds = np.asarray(preds)
     if len(preds.shape) != 2 or preds.shape[1] != 1000:
-        raise ValueError('`decode_predictions` expects '
-                         'a batch of predictions '
-                         '(i.e. a 2D array of shape (samples, 1000)). '
-                         'Found array with shape: ' + str(preds.shape))
+        raise ValueError(
+            '`decode_predictions` expects '
+            'a batch of predictions '
+            '(i.e. a 2D array of shape (samples, 1000)). '
+            'Found array with shape: ' + str(preds.shape)
+        )
     with open(fpath) as f:
         CLASS_INDEX = json.load(f)
     results = []
@@ -113,7 +117,9 @@ def squeezenet(x, is_train=True, reuse=False):
 if tl.files.file_exists('squeezenet.npz'):
     tl.files.load_and_assign_npz(sess=sess, name='squeezenet.npz', network=n)
 else:
-    raise Exception("please download the pre-trained squeezenet.npz from https://github.com/tensorlayer/pretrained-models")
+    raise Exception(
+        "please download the pre-trained squeezenet.npz from https://github.com/tensorlayer/pretrained-models"
+    )
 
 img = tl.vis.read_image('data/tiger.jpeg', '')
 img = tl.prepro.imresize(img, (224, 224))
diff --git a/example/tutorial_ternaryweight_cifar10_tfrecord.py b/example/tutorial_ternaryweight_cifar10_tfrecord.py
index 84399d8eb..1f6f30357 100644
--- a/example/tutorial_ternaryweight_cifar10_tfrecord.py
+++ b/example/tutorial_ternaryweight_cifar10_tfrecord.py
@@ -82,7 +82,9 @@ def data_to_tfrecord(images, labels, filename):
                 feature={
                     "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label])),
                     'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])),
-                }))
+                }
+            )
+        )
         writer.write(example.SerializeToString())  # Serialize To String
     writer.close()
 
@@ -96,12 +98,13 @@ def read_and_decode(filename, is_train=None):
         serialized_example, features={
             'label': tf.FixedLenFeature([], tf.int64),
             'img_raw': tf.FixedLenFeature([], tf.string),
-        })
+        }
+    )
     # You can do more image distortion here for training data
     img = tf.decode_raw(features['img_raw'], tf.float32)
     img = tf.reshape(img, [32, 32, 3])
     # img = tf.cast(img, tf.float32) #* (1. / 255) - 0.5
-    if is_train == True:
+    if is_train ==True:
         # 1. Randomly crop a [height, width] section of the image.
         img = tf.random_crop(img, [24, 24, 3])
         # 2. Randomly flip the image horizontally.
@@ -146,9 +149,12 @@ def read_and_decode(filename, is_train=None):
     x_test_, y_test_ = read_and_decode("test.cifar10", False)
 
     x_train_batch, y_train_batch = tf.train.shuffle_batch(
-        [x_train_, y_train_], batch_size=batch_size, capacity=2000, min_after_dequeue=1000, num_threads=32)  # set the number of threads here
+        [x_train_, y_train_], batch_size=batch_size, capacity=2000, min_after_dequeue=1000, num_threads=32
+    )  # set the number of threads here
     # for testing, uses batch instead of shuffle_batch
-    x_test_batch, y_test_batch = tf.train.batch([x_test_, y_test_], batch_size=batch_size, capacity=50000, num_threads=32)
+    x_test_batch, y_test_batch = tf.train.batch(
+        [x_test_, y_test_], batch_size=batch_size, capacity=50000, num_threads=32
+    )
 
     def model(x_crop, y_, reuse):
         """ For more simplified CNN APIs, check tensorlayer.org """
@@ -159,14 +165,26 @@ def model(x_crop, y_, reuse):
             net = tl.layers.InputLayer(x_crop, name='input')
             net = tl.layers.Conv2d(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', W_init=W_init, name='cnn1')
             net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool1')
-            net = tl.layers.LocalResponseNormLayer(net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')
-            net = tl.layers.TernaryConv2d(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', W_init=W_init, name='cnn2')
-            net = tl.layers.LocalResponseNormLayer(net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')
+            net = tl.layers.LocalResponseNormLayer(
+                net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1'
+            )
+            net = tl.layers.TernaryConv2d(
+                net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', W_init=W_init, name='cnn2'
+            )
+            net = tl.layers.LocalResponseNormLayer(
+                net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2'
+            )
             net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool2')
             net = tl.layers.FlattenLayer(net, name='flatten')  # output: (batch_size, 2304)
-            net = tl.layers.TernaryDenseLayer(net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu')  # output: (batch_size, 384)
-            net = tl.layers.TernaryDenseLayer(net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu')  # output: (batch_size, 192)
-            net = tl.layers.DenseLayer(net, n_units=10, act=tf.identity, W_init=W_init2, name='output')  # output: (batch_size, 10)
+            net = tl.layers.TernaryDenseLayer(
+                net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu'
+            )  # output: (batch_size, 384)
+            net = tl.layers.TernaryDenseLayer(
+                net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu'
+            )  # output: (batch_size, 192)
+            net = tl.layers.DenseLayer(
+                net, n_units=10, act=tf.identity, W_init=W_init2, name='output'
+            )  # output: (batch_size, 10)
             y = net.outputs
 
             ce = tl.cost.cross_entropy(y, y_, name='cost')
@@ -196,9 +214,15 @@ def model_batch_norm(x_crop, y_, reuse, is_train):
             net = tl.layers.BatchNormLayer(net, is_train, act=tf.nn.relu, name='batch2')
             net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool2')
             net = tl.layers.FlattenLayer(net, name='flatten')  # output: (batch_size, 2304)
-            net = tl.layers.DenseLayer(net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu')  # output: (batch_size, 384)
-            net = tl.layers.DenseLayer(net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu')  # output: (batch_size, 192)
-            net = tl.layers.DenseLayer(net, n_units=10, act=tf.identity, W_init=W_init2, name='output')  # output: (batch_size, 10)
+            net = tl.layers.DenseLayer(
+                net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu'
+            )  # output: (batch_size, 384)
+            net = tl.layers.DenseLayer(
+                net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu'
+            )  # output: (batch_size, 192)
+            net = tl.layers.DenseLayer(
+                net, n_units=10, act=tf.identity, W_init=W_init2, name='output'
+            )  # output: (batch_size, 10)
             y = net.outputs
             ce = tl.cost.cross_entropy(y, y_, name='cost')
             # L2 for the MLP, without this, the accuracy will be reduced by 15%.
@@ -267,7 +291,10 @@ def model_batch_norm(x_crop, y_, reuse, is_train):
             n_batch += 1
 
         if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
-            print("Epoch %d : Step %d-%d of %d took %fs" % (epoch, step, step + n_step_epoch, n_step, time.time() - start_time))
+            print(
+                "Epoch %d : Step %d-%d of %d took %fs" %
+                (epoch, step, step + n_step_epoch, n_step, time.time() - start_time)
+            )
             print("   train loss: %f" % (train_loss / n_batch))
             print("   train acc: %f" % (train_acc / n_batch))
 
diff --git a/example/tutorial_tf_dataset_voc.py b/example/tutorial_tf_dataset_voc.py
index 4c2580fe2..43e405547 100644
--- a/example/tutorial_tf_dataset_voc.py
+++ b/example/tutorial_tf_dataset_voc.py
@@ -50,8 +50,12 @@ def _data_aug_fn(im, ann):
     ## random resize and crop
     tmp0 = random.randint(1, int(im_size[0] * jitter))
     tmp1 = random.randint(1, int(im_size[1] * jitter))
-    im, coords = tl.prepro.obj_box_imresize(im, coords, [im_size[0] + tmp0, im_size[1] + tmp1], is_rescale=True, interp='bicubic')
-    im, clas, coords = tl.prepro.obj_box_crop(im, clas, coords, wrg=im_size[1], hrg=im_size[0], is_rescale=True, is_center=True, is_random=True)
+    im, coords = tl.prepro.obj_box_imresize(
+        im, coords, [im_size[0] + tmp0, im_size[1] + tmp1], is_rescale=True, interp='bicubic'
+    )
+    im, clas, coords = tl.prepro.obj_box_crop(
+        im, clas, coords, wrg=im_size[1], hrg=im_size[0], is_rescale=True, is_center=True, is_random=True
+    )
     ## value [0, 255] to [-1, 1] (optional)
     # im = im / 127.5 - 1
     ## value [0, 255] to [0, 1] (optional)
@@ -97,4 +101,6 @@ def _map_fn(filename, annotation):
 
 ## save all images
 for i in range(len(im)):
-    tl.vis.draw_boxes_and_labels_to_image(im[i] * 255, ann[i][0], ann[i][1], [], classes, True, save_name='_bbox_vis_%d.png' % i)
+    tl.vis.draw_boxes_and_labels_to_image(
+        im[i] * 255, ann[i][0], ann[i][1], [], classes, True, save_name='_bbox_vis_%d.png' % i
+    )
diff --git a/example/tutorial_tfrecord.py b/example/tutorial_tfrecord.py
index f77cc6ade..0224e55c2 100644
--- a/example/tutorial_tfrecord.py
+++ b/example/tutorial_tfrecord.py
@@ -81,7 +81,8 @@ def read_and_decode(filename):
         features={
             'label': tf.FixedLenFeature([], tf.int64),
             'img_raw': tf.FixedLenFeature([], tf.string),
-        })
+        }
+    )
     # You can do more image distortion here for training data
     img = tf.decode_raw(features['img_raw'], tf.uint8)
     img = tf.reshape(img, [224, 224, 3])
@@ -94,7 +95,9 @@ def read_and_decode(filename):
 
 ## Use shuffle_batch or batch
 # see https://www.tensorflow.org/versions/master/api_docs/python/io_ops.html#shuffle_batch
-img_batch, label_batch = tf.train.shuffle_batch([img, label], batch_size=4, capacity=2000, min_after_dequeue=1000, num_threads=16)
+img_batch, label_batch = tf.train.shuffle_batch(
+    [img, label], batch_size=4, capacity=2000, min_after_dequeue=1000, num_threads=16
+)
 print("img_batch   : %s" % img_batch._shape)
 print("label_batch : %s" % label_batch._shape)
 # init = tf.initialize_all_variables()
diff --git a/example/tutorial_tfrecord2.py b/example/tutorial_tfrecord2.py
index 189b24167..eb210d9da 100755
--- a/example/tutorial_tfrecord2.py
+++ b/example/tutorial_tfrecord2.py
@@ -48,10 +48,13 @@
     # image = image.reshape([32, 32, 3])
     # tl.visualize.frame(np.asarray(image, dtype=np.uint8), second=1, saveable=False, name='frame', fig_idx=1236)
     example = tf.train.Example(
-        features=tf.train.Features(feature={
-            "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label])),
-            'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])),
-        }))
+        features=tf.train.Features(
+            feature={
+                "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label])),
+                'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])),
+            }
+        )
+    )
     writer.write(example.SerializeToString())  # Serialize To String
 writer.close()
 
@@ -65,7 +68,8 @@ def read_and_decode(filename):
         serialized_example, features={
             'label': tf.FixedLenFeature([], tf.int64),
             'img_raw': tf.FixedLenFeature([], tf.string),
-        })
+        }
+    )
     # You can do more image distortion here for training data
     img = tf.decode_raw(features['img_raw'], tf.float32)
     img = tf.reshape(img, [32, 32, 3])
@@ -78,7 +82,9 @@ def read_and_decode(filename):
 
 ## Use shuffle_batch or batch
 # see https://www.tensorflow.org/versions/master/api_docs/python/io_ops.html#shuffle_batch
-img_batch, label_batch = tf.train.shuffle_batch([img, label], batch_size=4, capacity=50000, min_after_dequeue=10000, num_threads=1)
+img_batch, label_batch = tf.train.shuffle_batch(
+    [img, label], batch_size=4, capacity=50000, min_after_dequeue=10000, num_threads=1
+)
 
 print("img_batch   : %s" % img_batch._shape)
 print("label_batch : %s" % label_batch._shape)
diff --git a/example/tutorial_tfrecord3.py b/example/tutorial_tfrecord3.py
index 936ab343f..6626476ec 100644
--- a/example/tutorial_tfrecord3.py
+++ b/example/tutorial_tfrecord3.py
@@ -90,10 +90,11 @@ def _bytes_feature_list(values):
     context = tf.train.Features(feature={  #  Non-serial data uses Feature
         "image/img_raw": _bytes_feature(img_raw),
     })
-    feature_lists = tf.train.FeatureLists(feature_list={  # Serial data uses FeatureLists
-        "image/caption": _bytes_feature_list(img_capt_b),
-        "image/caption_ids": _int64_feature_list(img_capt_ids)
-    })
+    feature_lists = tf.train.FeatureLists(
+        feature_list={  # Serial data uses FeatureLists
+            "image/caption": _bytes_feature_list(img_capt_b),
+            "image/caption_ids": _int64_feature_list(img_capt_ids)
+        })
     sequence_example = tf.train.SequenceExample(context=context, feature_lists=feature_lists)
     writer.write(sequence_example.SerializeToString())  # Serialize To String
 writer.close()
@@ -104,14 +105,13 @@ def _bytes_feature_list(values):
 _, serialized_example = reader.read(filename_queue)  # return the file and the name of file
 # features, sequence_features = tf.parse_single_example(serialized_example,  # see parse_single_sequence_example for sequence example
 features, sequence_features = tf.parse_single_sequence_example(
-    serialized_example,
-    context_features={
+    serialized_example, context_features={
         'image/img_raw': tf.FixedLenFeature([], tf.string),
-    },
-    sequence_features={
+    }, sequence_features={
         "image/caption": tf.FixedLenSequenceFeature([], dtype=tf.string),
         "image/caption_ids": tf.FixedLenSequenceFeature([], dtype=tf.int64),
-    })
+    }
+)
 c = tf.contrib.learn.run_n(features, n=1, feed_dict=None)
 im = Image.frombytes('RGB', (299, 299), c[0]['image/img_raw'])
 tl.visualize.frame(np.asarray(im), second=1, saveable=False, name='frame', fig_idx=1236)
@@ -230,15 +230,10 @@ def distort_image(image, thread_id):
 #   return image
 
 
-def prefetch_input_data(reader,
-                        file_pattern,
-                        is_training,
-                        batch_size,
-                        values_per_shard,
-                        input_queue_capacity_factor=16,
-                        num_reader_threads=1,
-                        shard_queue_name="filename_queue",
-                        value_queue_name="input_queue"):
+def prefetch_input_data(
+        reader, file_pattern, is_training, batch_size, values_per_shard, input_queue_capacity_factor=16,
+        num_reader_threads=1, shard_queue_name="filename_queue", value_queue_name="input_queue"
+):
     """Prefetches string values from disk into an input queue.
 
   In training the capacity of the queue is important because a larger queue
@@ -276,7 +271,10 @@ def prefetch_input_data(reader,
         filename_queue = tf.train.string_input_producer(data_files, shuffle=True, capacity=16, name=shard_queue_name)
         min_queue_examples = values_per_shard * input_queue_capacity_factor
         capacity = min_queue_examples + 100 * batch_size
-        values_queue = tf.RandomShuffleQueue(capacity=capacity, min_after_dequeue=min_queue_examples, dtypes=[tf.string], name="random_" + value_queue_name)
+        values_queue = tf.RandomShuffleQueue(
+            capacity=capacity, min_after_dequeue=min_queue_examples, dtypes=[tf.string],
+            name="random_" + value_queue_name
+        )
     else:
         print("   is_training == False : FIFOQueue")
         filename_queue = tf.train.string_input_producer(data_files, shuffle=False, capacity=1, name=shard_queue_name)
@@ -289,7 +287,10 @@ def prefetch_input_data(reader,
         enqueue_ops.append(values_queue.enqueue([value]))
     tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner(values_queue, enqueue_ops))
 
-    tf.summary.scalar("queue/%s/fraction_of_%d_full" % (values_queue.name, capacity), tf.cast(values_queue.size(), tf.float32) * (1. / capacity))
+    tf.summary.scalar(
+        "queue/%s/fraction_of_%d_full" % (values_queue.name, capacity),
+        tf.cast(values_queue.size(), tf.float32) * (1. / capacity)
+    )
 
     return values_queue
 
@@ -311,12 +312,12 @@ def prefetch_input_data(reader,
 serialized_sequence_example = input_queue.dequeue()
 # serialized_sequence_example = tf.train.string_input_producer(["train.cat_caption"])   # don't work
 context, sequence = tf.parse_single_sequence_example(
-    serialized=serialized_sequence_example,
-    context_features={"image/img_raw": tf.FixedLenFeature([], dtype=tf.string)},
+    serialized=serialized_sequence_example, context_features={"image/img_raw": tf.FixedLenFeature([], dtype=tf.string)},
     sequence_features={
         "image/caption": tf.FixedLenSequenceFeature([], dtype=tf.string),
         "image/caption_ids": tf.FixedLenSequenceFeature([], dtype=tf.int64),
-    })
+    }
+)
 
 img = tf.decode_raw(context["image/img_raw"], tf.uint8)
 img = tf.reshape(img, [height, width, 3])
@@ -327,7 +328,9 @@ def prefetch_input_data(reader,
     img = tf.image.resize_images(img, size=(resize_height, resize_width), method=tf.image.ResizeMethod.BILINEAR)
 except Exception:
     # for TensorFlow 0.10
-    img = tf.image.resize_images(img, new_height=resize_height, new_width=resize_width, method=tf.image.ResizeMethod.BILINEAR)
+    img = tf.image.resize_images(
+        img, new_height=resize_height, new_width=resize_width, method=tf.image.ResizeMethod.BILINEAR
+    )
 # Crop to final dimensions.
 if is_training:
     img = tf.random_crop(img, [height, width, 3])
@@ -347,7 +350,8 @@ def prefetch_input_data(reader,
     batch_size=4,
     capacity=50000,
     dynamic_pad=True,  # string list pad with '', int list pad with 0
-    num_threads=4)
+    num_threads=4
+)
 sess = tf.Session()
 # sess.run(tf.initialize_all_variables())
 tl.layers.initialize_global_variables(sess)
@@ -429,7 +433,8 @@ def batch_with_dynamic_pad(images_and_captions, batch_size, queue_capacity, add_
         enqueue_list.append([image, input_seq, target_seq, indicator])
 
     images, input_seqs, target_seqs, mask = tf.train.batch_join(
-        enqueue_list, batch_size=batch_size, capacity=queue_capacity, dynamic_pad=True, name="batch_and_pad")
+        enqueue_list, batch_size=batch_size, capacity=queue_capacity, dynamic_pad=True, name="batch_and_pad"
+    )
 
     if add_summaries:
         lengths = tf.add(tf.reduce_sum(mask, 1), 1)
@@ -440,7 +445,9 @@ def batch_with_dynamic_pad(images_and_captions, batch_size, queue_capacity, add_
     return images, input_seqs, target_seqs, mask
 
 
-images, input_seqs, target_seqs, input_mask = (batch_with_dynamic_pad(images_and_captions=[[img, img_cap]], batch_size=4, queue_capacity=50000))
+images, input_seqs, target_seqs, input_mask = (
+    batch_with_dynamic_pad(images_and_captions=[[img, img_cap]], batch_size=4, queue_capacity=50000)
+)
 sess = tf.Session()
 sess.run(tf.initialize_all_variables())
 coord = tf.train.Coordinator()
diff --git a/example/tutorial_vgg16.py b/example/tutorial_vgg16.py
index 606497f32..faab073b4 100644
--- a/example/tutorial_vgg16.py
+++ b/example/tutorial_vgg16.py
@@ -45,7 +45,9 @@
 try:
     from data.imagenet_classes import *
 except Exception as e:
-    raise Exception("{} / download the file from: https://github.com/zsdonghao/tensorlayer/tree/master/example/data".format(e))
+    raise Exception(
+        "{} / download the file from: https://github.com/zsdonghao/tensorlayer/tree/master/example/data".format(e)
+    )
 
 
 def conv_layers(net_in):
@@ -63,15 +65,19 @@ def conv_layers(net_in):
         shape=[3, 3, 3, 64],  # 64 features for each 3x3 patch
         strides=[1, 1, 1, 1],
         padding='SAME',
-        name='conv1_1')
+        name='conv1_1'
+    )
     network = Conv2dLayer(
         network,
         act=tf.nn.relu,
         shape=[3, 3, 64, 64],  # 64 features for each 3x3 patch
         strides=[1, 1, 1, 1],
         padding='SAME',
-        name='conv1_2')
-    network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool1')
+        name='conv1_2'
+    )
+    network = PoolLayer(
+        network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool1'
+    )
 
     # conv2
     network = Conv2dLayer(
@@ -80,15 +86,19 @@ def conv_layers(net_in):
         shape=[3, 3, 64, 128],  # 128 features for each 3x3 patch
         strides=[1, 1, 1, 1],
         padding='SAME',
-        name='conv2_1')
+        name='conv2_1'
+    )
     network = Conv2dLayer(
         network,
         act=tf.nn.relu,
         shape=[3, 3, 128, 128],  # 128 features for each 3x3 patch
         strides=[1, 1, 1, 1],
         padding='SAME',
-        name='conv2_2')
-    network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool2')
+        name='conv2_2'
+    )
+    network = PoolLayer(
+        network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool2'
+    )
 
     # conv3
     network = Conv2dLayer(
@@ -97,22 +107,27 @@ def conv_layers(net_in):
         shape=[3, 3, 128, 256],  # 256 features for each 3x3 patch
         strides=[1, 1, 1, 1],
         padding='SAME',
-        name='conv3_1')
+        name='conv3_1'
+    )
     network = Conv2dLayer(
         network,
         act=tf.nn.relu,
         shape=[3, 3, 256, 256],  # 256 features for each 3x3 patch
         strides=[1, 1, 1, 1],
         padding='SAME',
-        name='conv3_2')
+        name='conv3_2'
+    )
     network = Conv2dLayer(
         network,
         act=tf.nn.relu,
         shape=[3, 3, 256, 256],  # 256 features for each 3x3 patch
         strides=[1, 1, 1, 1],
         padding='SAME',
-        name='conv3_3')
-    network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool3')
+        name='conv3_3'
+    )
+    network = PoolLayer(
+        network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool3'
+    )
 
     # conv4
     network = Conv2dLayer(
@@ -121,22 +136,27 @@ def conv_layers(net_in):
         shape=[3, 3, 256, 512],  # 512 features for each 3x3 patch
         strides=[1, 1, 1, 1],
         padding='SAME',
-        name='conv4_1')
+        name='conv4_1'
+    )
     network = Conv2dLayer(
         network,
         act=tf.nn.relu,
         shape=[3, 3, 512, 512],  # 512 features for each 3x3 patch
         strides=[1, 1, 1, 1],
         padding='SAME',
-        name='conv4_2')
+        name='conv4_2'
+    )
     network = Conv2dLayer(
         network,
         act=tf.nn.relu,
         shape=[3, 3, 512, 512],  # 512 features for each 3x3 patch
         strides=[1, 1, 1, 1],
         padding='SAME',
-        name='conv4_3')
-    network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool4')
+        name='conv4_3'
+    )
+    network = PoolLayer(
+        network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool4'
+    )
 
     # conv5
     network = Conv2dLayer(
@@ -145,22 +165,27 @@ def conv_layers(net_in):
         shape=[3, 3, 512, 512],  # 512 features for each 3x3 patch
         strides=[1, 1, 1, 1],
         padding='SAME',
-        name='conv5_1')
+        name='conv5_1'
+    )
     network = Conv2dLayer(
         network,
         act=tf.nn.relu,
         shape=[3, 3, 512, 512],  # 512 features for each 3x3 patch
         strides=[1, 1, 1, 1],
         padding='SAME',
-        name='conv5_2')
+        name='conv5_2'
+    )
     network = Conv2dLayer(
         network,
         act=tf.nn.relu,
         shape=[3, 3, 512, 512],  # 512 features for each 3x3 patch
         strides=[1, 1, 1, 1],
         padding='SAME',
-        name='conv5_3')
-    network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool5')
+        name='conv5_3'
+    )
+    network = PoolLayer(
+        network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool5'
+    )
     return network
 
 
@@ -173,31 +198,57 @@ def conv_layers_simple_api(net_in):
         net_in.outputs = net_in.outputs - mean
 
     # conv1
-    network = Conv2d(net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_1')
-    network = Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_2')
+    network = Conv2d(
+        net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_1'
+    )
+    network = Conv2d(
+        network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_2'
+    )
     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1')
 
     # conv2
-    network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_1')
-    network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_2')
+    network = Conv2d(
+        network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_1'
+    )
+    network = Conv2d(
+        network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_2'
+    )
     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2')
 
     # conv3
-    network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_1')
-    network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_2')
-    network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_3')
+    network = Conv2d(
+        network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_1'
+    )
+    network = Conv2d(
+        network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_2'
+    )
+    network = Conv2d(
+        network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_3'
+    )
     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3')
 
     # conv4
-    network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_1')
-    network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_2')
-    network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_3')
+    network = Conv2d(
+        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_1'
+    )
+    network = Conv2d(
+        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_2'
+    )
+    network = Conv2d(
+        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_3'
+    )
     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4')
 
     # conv5
-    network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_1')
-    network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_2')
-    network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_3')
+    network = Conv2d(
+        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_1'
+    )
+    network = Conv2d(
+        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_2'
+    )
+    network = Conv2d(
+        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_3'
+    )
     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5')
     return network
 
diff --git a/example/tutorial_vgg19.py b/example/tutorial_vgg19.py
index af1bd034f..f484f3dc8 100755
--- a/example/tutorial_vgg19.py
+++ b/example/tutorial_vgg19.py
@@ -27,7 +27,9 @@
 try:
     from data.imagenet_classes import *
 except Exception as e:
-    raise Exception("{} / download the file from: https://github.com/zsdonghao/tensorlayer/tree/master/example/data".format(e))
+    raise Exception(
+        "{} / download the file from: https://github.com/zsdonghao/tensorlayer/tree/master/example/data".format(e)
+    )
 
 VGG_MEAN = [103.939, 116.779, 123.68]
 
@@ -92,42 +94,83 @@ def Vgg19(rgb):
             red - VGG_MEAN[2],
         ])
     else:
-        bgr = tf.concat(
-            [
-                blue - VGG_MEAN[0],
-                green - VGG_MEAN[1],
-                red - VGG_MEAN[2],
-            ], axis=3)
+        bgr = tf.concat([
+            blue - VGG_MEAN[0],
+            green - VGG_MEAN[1],
+            red - VGG_MEAN[2],
+        ], axis=3)
     if bgr.get_shape().as_list()[1:] != [224, 224, 3]:
         raise Exception("image size unmatch")
     # input layer
     net_in = InputLayer(bgr, name='input')
     # conv1
-    network = Conv2dLayer(net_in, act=tf.nn.relu, shape=[3, 3, 3, 64], strides=[1, 1, 1, 1], padding='SAME', name='conv1_1')
-    network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 64, 64], strides=[1, 1, 1, 1], padding='SAME', name='conv1_2')
-    network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool1')
+    network = Conv2dLayer(
+        net_in, act=tf.nn.relu, shape=[3, 3, 3, 64], strides=[1, 1, 1, 1], padding='SAME', name='conv1_1'
+    )
+    network = Conv2dLayer(
+        network, act=tf.nn.relu, shape=[3, 3, 64, 64], strides=[1, 1, 1, 1], padding='SAME', name='conv1_2'
+    )
+    network = PoolLayer(
+        network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool1'
+    )
     # conv2
-    network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 64, 128], strides=[1, 1, 1, 1], padding='SAME', name='conv2_1')
-    network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 128, 128], strides=[1, 1, 1, 1], padding='SAME', name='conv2_2')
-    network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool2')
+    network = Conv2dLayer(
+        network, act=tf.nn.relu, shape=[3, 3, 64, 128], strides=[1, 1, 1, 1], padding='SAME', name='conv2_1'
+    )
+    network = Conv2dLayer(
+        network, act=tf.nn.relu, shape=[3, 3, 128, 128], strides=[1, 1, 1, 1], padding='SAME', name='conv2_2'
+    )
+    network = PoolLayer(
+        network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool2'
+    )
     # conv3
-    network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 128, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_1')
-    network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_2')
-    network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_3')
-    network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_4')
-    network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool3')
+    network = Conv2dLayer(
+        network, act=tf.nn.relu, shape=[3, 3, 128, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_1'
+    )
+    network = Conv2dLayer(
+        network, act=tf.nn.relu, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_2'
+    )
+    network = Conv2dLayer(
+        network, act=tf.nn.relu, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_3'
+    )
+    network = Conv2dLayer(
+        network, act=tf.nn.relu, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_4'
+    )
+    network = PoolLayer(
+        network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool3'
+    )
     # conv4
-    network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 256, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_1')
-    network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_2')
-    network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_3')
-    network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_4')
-    network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool4')
+    network = Conv2dLayer(
+        network, act=tf.nn.relu, shape=[3, 3, 256, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_1'
+    )
+    network = Conv2dLayer(
+        network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_2'
+    )
+    network = Conv2dLayer(
+        network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_3'
+    )
+    network = Conv2dLayer(
+        network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_4'
+    )
+    network = PoolLayer(
+        network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool4'
+    )
     # conv5
-    network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_1')
-    network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_2')
-    network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_3')
-    network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_4')
-    network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool5')
+    network = Conv2dLayer(
+        network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_1'
+    )
+    network = Conv2dLayer(
+        network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_2'
+    )
+    network = Conv2dLayer(
+        network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_3'
+    )
+    network = Conv2dLayer(
+        network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_4'
+    )
+    network = PoolLayer(
+        network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool5'
+    )
     # fc 6~8
     network = FlattenLayer(network, name='flatten')
     network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc6')
@@ -167,41 +210,72 @@ def Vgg19_simple_api(rgb):
             red - VGG_MEAN[2],
         ])
     else:
-        bgr = tf.concat(
-            [
-                blue - VGG_MEAN[0],
-                green - VGG_MEAN[1],
-                red - VGG_MEAN[2],
-            ], axis=3)
+        bgr = tf.concat([
+            blue - VGG_MEAN[0],
+            green - VGG_MEAN[1],
+            red - VGG_MEAN[2],
+        ], axis=3)
     if bgr.get_shape().as_list()[1:] != [224, 224, 3]:
         raise Exception("image size unmatch")
     # input layer
     net_in = InputLayer(bgr, name='input')
     # conv1
-    network = Conv2d(net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_1')
-    network = Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_2')
+    network = Conv2d(
+        net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_1'
+    )
+    network = Conv2d(
+        network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_2'
+    )
     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1')
     # conv2
-    network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_1')
-    network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_2')
+    network = Conv2d(
+        network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_1'
+    )
+    network = Conv2d(
+        network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_2'
+    )
     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2')
     # conv3
-    network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_1')
-    network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_2')
-    network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_3')
-    network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_4')
+    network = Conv2d(
+        network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_1'
+    )
+    network = Conv2d(
+        network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_2'
+    )
+    network = Conv2d(
+        network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_3'
+    )
+    network = Conv2d(
+        network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_4'
+    )
     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3')
     # conv4
-    network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_1')
-    network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_2')
-    network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_3')
-    network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_4')
+    network = Conv2d(
+        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_1'
+    )
+    network = Conv2d(
+        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_2'
+    )
+    network = Conv2d(
+        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_3'
+    )
+    network = Conv2d(
+        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_4'
+    )
     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4')
     # conv5
-    network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_1')
-    network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_2')
-    network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_3')
-    network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_4')
+    network = Conv2d(
+        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_1'
+    )
+    network = Conv2d(
+        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_2'
+    )
+    network = Conv2d(
+        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_3'
+    )
+    network = Conv2d(
+        network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_4'
+    )
     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5')
     # fc 6~8
     network = FlattenLayer(network, name='flatten')
diff --git a/example/tutorial_word2vec_basic.py b/example/tutorial_word2vec_basic.py
index 0efad97ab..ad275c0ac 100644
--- a/example/tutorial_word2vec_basic.py
+++ b/example/tutorial_word2vec_basic.py
@@ -136,21 +136,26 @@ def main_word2vec_basic():
         data, count, dictionary, reverse_dictionary = \
                 tl.nlp.build_words_dataset(words, vocabulary_size, True, _UNK)
 
-    print('Most 5 common words (+UNK)', count[:5])  # [['UNK', 418391], (b'the', 1061396), (b'of', 593677), (b'and', 416629), (b'one', 411764)]
-    print('Sample data', data[:10], [
-        reverse_dictionary[i] for i in data[:10]
-    ])  # [5243, 3081, 12, 6, 195, 2, 3135, 46, 59, 156] [b'anarchism', b'originated', b'as', b'a', b'term', b'of', b'abuse', b'first', b'used', b'against']
+    print('Most 5 common words (+UNK)',
+          count[:5])  # [['UNK', 418391], (b'the', 1061396), (b'of', 593677), (b'and', 416629), (b'one', 411764)]
+    print(
+        'Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]]
+    )  # [5243, 3081, 12, 6, 195, 2, 3135, 46, 59, 156] [b'anarchism', b'originated', b'as', b'a', b'term', b'of', b'abuse', b'first', b'used', b'against']
 
     del words  # Hint to reduce memory.
 
     # Step 3: Function to generate a training batch for the Skip-Gram model.
     print()
 
-    batch, labels, data_index = tl.nlp.generate_skip_gram_batch(data=data, batch_size=8, num_skips=4, skip_window=2, data_index=0)
+    batch, labels, data_index = tl.nlp.generate_skip_gram_batch(
+        data=data, batch_size=8, num_skips=4, skip_window=2, data_index=0
+    )
     for i in range(8):
         print(batch[i], reverse_dictionary[batch[i]], '->', labels[i, 0], reverse_dictionary[labels[i, 0]])
 
-    batch, labels, data_index = tl.nlp.generate_skip_gram_batch(data=data, batch_size=8, num_skips=2, skip_window=1, data_index=0)
+    batch, labels, data_index = tl.nlp.generate_skip_gram_batch(
+        data=data, batch_size=8, num_skips=2, skip_window=1, data_index=0
+    )
     for i in range(8):
         print(batch[i], reverse_dictionary[batch[i]], '->', labels[i, 0], reverse_dictionary[labels[i, 0]])
 
@@ -195,7 +200,8 @@ def main_word2vec_basic():
     cost = emb_net.nce_cost
     train_params = emb_net.all_params
     # train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost, var_list=train_params)
-    train_op = tf.train.AdagradOptimizer(learning_rate, initial_accumulator_value=0.1, use_locking=False).minimize(cost, var_list=train_params)
+    train_op = tf.train.AdagradOptimizer(learning_rate, initial_accumulator_value=0.1,
+                                         use_locking=False).minimize(cost, var_list=train_params)
 
     # Compute the cosine similarity between minibatch examples and all embeddings.
     # For simple visualization of validation set.
@@ -229,7 +235,8 @@ def main_word2vec_basic():
     while step < num_steps:
         start_time = time.time()
         batch_inputs, batch_labels, data_index = tl.nlp.generate_skip_gram_batch(
-            data=data, batch_size=batch_size, num_skips=num_skips, skip_window=skip_window, data_index=data_index)
+            data=data, batch_size=batch_size, num_skips=num_skips, skip_window=skip_window, data_index=data_index
+        )
         feed_dict = {train_inputs: batch_inputs, train_labels: batch_labels}
         # We perform one update step by evaluating the train_op (including it
         # in the list of returned values for sess.run()
@@ -239,7 +246,10 @@ def main_word2vec_basic():
         if step % print_freq == 0:
             if step > 0:
                 average_loss /= print_freq
-            print("Average loss at step %d/%d. loss:%f took:%fs" % (step, num_steps, average_loss, time.time() - start_time))
+            print(
+                "Average loss at step %d/%d. loss:%f took:%fs" %
+                (step, num_steps, average_loss, time.time() - start_time)
+            )
             average_loss = 0
         # Prints out nearby words given a list of words.
         # Note that this is expensive (~20% slowdown if computed every 500 steps)
@@ -267,7 +277,8 @@ def main_word2vec_basic():
                     'count': count,
                     'dictionary': dictionary,
                     'reverse_dictionary': reverse_dictionary
-                }, name=model_file_name + '.npy')
+                }, name=model_file_name + '.npy'
+            )
 
         # if step == num_steps-1:
         #     keeptrain = input("Training %d finished enter 1 to keep training: " % num_steps)
@@ -281,7 +292,9 @@ def main_word2vec_basic():
     print()
 
     final_embeddings = sess.run(normalized_embeddings)  #.eval()
-    tl.visualize.tsne_embedding(final_embeddings, reverse_dictionary, plot_only=500, second=5, saveable=False, name='word2vec_basic')
+    tl.visualize.tsne_embedding(
+        final_embeddings, reverse_dictionary, plot_only=500, second=5, saveable=False, name='word2vec_basic'
+    )
 
     # Step 7: Evaluate by analogy questions. see tensorflow/models/embedding/word2vec_optimized.py
     print()
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 000000000..96f68c83d
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,77 @@
+[flake8]
+max-line-length = 120
+ignore =
+    D301
+    E221 # Space before equal sign
+    E251 # Space after equal sign
+exclude =
+    .git,
+    venv,
+    __pycache__,
+    .pytest_cache,
+    tensorlayer.egg-info,
+    build,
+    dist,
+    img
+
+[yapf]
+based_on_style = pep8
+
+# The number of columns to use for indentation.
+indent_width = 4
+
+# The column limit (or max line-length)
+column_limit = 120
+
+# Do not split consecutive brackets. Only relevant when DEDENT_CLOSING_BRACKETS is set
+coalesce_brackets = False
+
+# Put closing brackets on a separate line, dedented,
+# if the bracketed expression can't fit in a single line.
+dedent_closing_brackets = True
+
+# Place each dictionary entry onto its own line.
+each_dict_entry_on_separate_line = True
+
+# For list comprehensions and generator expressions with multiple clauses
+# (e.g multiple "for" calls, "if" filter expressions)
+# and which need to be reflowed, split each clause onto its own line
+split_complex_comprehension = True
+
+# Split before arguments if the argument list is terminated by a comma.
+split_arguments_when_comma_terminated = True
+
+# Split named assignments onto individual lines.
+split_before_named_assigns = True
+
+# If an argument / parameter list is going to be split, then split before the first argument.
+split_before_first_argument = False
+
+# Split after the opening paren which surrounds an expression if it doesn't fit on a single line.
+split_before_expression_after_opening_paren = True
+
+# Split before the closing bracket if a list or dict literal doesn't fit on a single line.
+# split_before_closing_bracket = True
+
+# Allow lambdas to be formatted on more than one line.
+allow_multiline_lambdas = True
+
+# Allow splits before the dictionary value.
+allow_split_before_dict_value = False
+
+# Join short lines into one line. E.g., single line if statements.
+join_multiple_lines = True
+
+# Do not include spaces around selected binary operators.
+# Example: 1 + 2 * 3 - 4 / 5     =>     1 + 2*3 - 4/5
+no_spaces_around_selected_binary_operators = True
+
+
+SPLIT_PENALTY_AFTER_OPENING_BRACKET = -30
+#SPLIT_PENALTY_AFTER_UNARY_OPERATOR = -5000
+#SPLIT_PENALTY_BEFORE_IF_EXPR = -5000
+SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT = -8
+#SPLIT_PENALTY_IMPORT_NAMES = -5000
+#SPLIT_PENALTY_LOGICAL_OPERATOR = -5000
+
+
diff --git a/tensorlayer/activation.py b/tensorlayer/activation.py
index 2bf741b97..ac3d23db8 100644
--- a/tensorlayer/activation.py
+++ b/tensorlayer/activation.py
@@ -1,5 +1,6 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
+"""A file containing various activation functions."""
 
 import tensorflow as tf
 from tensorflow.python.util.deprecation import deprecated
@@ -18,7 +19,8 @@
 
 @deprecated("2018-06-30", "This API will be deprecated soon as tf.identity can do the same thing.")
 def identity(x):
-    """The identity activation function.
+    """Identity activation function.
+
     Shortcut is ``linear``.
 
     Parameters
@@ -36,7 +38,7 @@ def identity(x):
 
 
 def ramp(x, v_min=0, v_max=1, name=None):
-    """The ramp activation function.
+    """Ramp activation function.
 
     Parameters
     ----------
@@ -59,15 +61,14 @@ def ramp(x, v_min=0, v_max=1, name=None):
 
 
 def leaky_relu(x, alpha=0.1, name="lrelu"):
-    """The LeakyReLU, Shortcut is ``lrelu``.
+    """LeakyReLU, Shortcut is ``lrelu``.
 
     Modified version of ReLU, introducing a nonzero gradient for negative input.
 
     Parameters
     ----------
     x : Tensor
-        Support input type ``float``, ``double``, ``int32``, ``int64``, ``uint8``,
-        ``int16``, or ``int8``.
+        Support input type ``float``, ``double``, ``int32``, ``int64``, ``uint8``, ``int16``, or ``int8``.
     alpha : float
         Slope.
     name : str
@@ -83,8 +84,9 @@ def leaky_relu(x, alpha=0.1, name="lrelu"):
         A ``Tensor`` in the same type as ``x``.
 
     References
-    ------------
-    - `Rectifier Nonlinearities Improve Neural Network Acoustic Models, Maas et al. (2013) <http://web.stanford.edu/~awni/papers/relu_hybrid_icml2013_final.pdf>`__
+    ----------
+    - `Rectifier Nonlinearities Improve Neural Network Acoustic Models, Maas et al. (2013)`
+       http://web.stanford.edu/~awni/papers/relu_hybrid_icml2013_final.pdf
 
     """
     # with tf.name_scope(name) as scope:
@@ -96,7 +98,8 @@ def leaky_relu(x, alpha=0.1, name="lrelu"):
 
 
 def swish(x, name='swish'):
-    """The Swish function.
+    """Swish function.
+
      See `Swish: a Self-Gated Activation Function <https://arxiv.org/abs/1710.05941>`__.
 
     Parameters
@@ -122,24 +125,33 @@ def _sign_grad(unused_op, grad):
     return tf.clip_by_value(tf.identity(grad), -1, 1)
 
 
-def sign(x):  # https://github.com/AngusG/tensorflow-xnor-bnn/blob/master/models/binary_net.py#L36
+def sign(x):
     """Sign function.
 
-    Clip and binarize tensor using the straight through estimator (STE) for the gradient, usually be used for quantizing values in `Binarized Neural Networks <https://arxiv.org/abs/1602.02830>`__.
+    Clip and binarize tensor using the straight through estimator (STE) for the gradient, usually be used for
+    quantizing values in `Binarized Neural Networks`: https://arxiv.org/abs/1602.02830.
 
     Parameters
     ----------
     x : Tensor
         input.
 
+    Examples
+    --------
+    >>> net = tl.layers.DenseLayer(net, 100, act=lambda x : tl.act.lrelu(x, 0.2), name='dense')
+
     Returns
     -------
     Tensor
         A ``Tensor`` in the same type as ``x``.
 
     References
-    -----------
-    - `AngusG/tensorflow-xnor-bnn <https://github.com/AngusG/tensorflow-xnor-bnn/blob/master/models/binary_net.py#L36>`__
+    ----------
+    - `Rectifier Nonlinearities Improve Neural Network Acoustic Models, Maas et al. (2013)`
+       http://web.stanford.edu/~awni/papers/relu_hybrid_icml2013_final.pdf
+
+    - `BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, \
+       Courbariaux et al. (2016)` https://arxiv.org/abs/1602.02830
 
     """
     with tf.get_default_graph().gradient_override_map({"sign": "QuantizeGrad"}):
@@ -149,7 +161,9 @@ def sign(x):  # https://github.com/AngusG/tensorflow-xnor-bnn/blob/master/models
 # if tf.__version__ > "1.7":
 #     @tf.custom_gradient
 #     def sign(x):  # https://www.tensorflow.org/versions/master/api_docs/python/tf/custom_gradient?hl=ES#top_of_page
-#         """Differentiable sign function using sigmoid as the derivation function, see `tf.sign <https://www.tensorflow.org/api_docs/python/tf/sign>`__ and `tf.custom_gradient <https://www.tensorflow.org/versions/master/api_docs/python/tf/custom_gradient?hl=ES#top_of_page>`__.
+#         """Differentiable sign function using sigmoid as the derivation function,
+#         see `tf.sign <https://www.tensorflow.org/api_docs/python/tf/sign>`__ and `tf.custom_gradient
+#         <https://www.tensorflow.org/versions/master/api_docs/python/tf/custom_gradient?hl=ES#top_of_page>`__.
 #
 #         Parameters
 #         ----------
@@ -171,7 +185,7 @@ def sign(x):  # https://github.com/AngusG/tensorflow-xnor-bnn/blob/master/models
 def hard_tanh(x, name='htanh'):
     """Hard tanh activation function.
 
-    Which is a ramp function with low bound of -1 and upper bound of 1, shortcut is ``htanh`.
+    Which is a ramp function with low bound of -1 and upper bound of 1, shortcut is `htanh`.
 
     Parameters
     ----------
@@ -193,6 +207,7 @@ def hard_tanh(x, name='htanh'):
 @deprecated("2018-06-30", "This API will be deprecated soon as tf.nn.softmax can do the same thing.")
 def pixel_wise_softmax(x, name='pixel_wise_softmax'):
     """Return the softmax outputs of images, every pixels have multiple label, the sum of a pixel is 1.
+
     Usually be used for image segmentation.
 
     Parameters
diff --git a/tensorlayer/cli/train.py b/tensorlayer/cli/train.py
index 24744a479..f5524af41 100755
--- a/tensorlayer/cli/train.py
+++ b/tensorlayer/cli/train.py
@@ -108,10 +108,12 @@ def create_tf_jobs(cluster_spec, prog, args):
     for job_type in cluster_spec:
         for task_index in range(len(cluster_spec[job_type])):
             new_env = os.environ.copy()
-            new_env.update({
-                'CUDA_VISIBLE_DEVICES': str(gpu_assignment.get((job_type, task_index), '')),
-                'TF_CONFIG': json.dumps(create_tf_config(cluster_spec, job_type, task_index)),
-            })
+            new_env.update(
+                {
+                    'CUDA_VISIBLE_DEVICES': str(gpu_assignment.get((job_type, task_index), '')),
+                    'TF_CONFIG': json.dumps(create_tf_config(cluster_spec, job_type, task_index)),
+                }
+            )
             yield subprocess.Popen(['python3', prog] + args, env=new_env)
 
 
diff --git a/tensorlayer/cost.py b/tensorlayer/cost.py
index 4a121b6ea..01ba38d94 100644
--- a/tensorlayer/cost.py
+++ b/tensorlayer/cost.py
@@ -99,7 +99,9 @@ def binary_cross_entropy(output, target, epsilon=1e-8, name='bce_loss'):
     #         output = ops.convert_to_tensor(output, name="preds")
     #         target = ops.convert_to_tensor(targets, name="target")
     with tf.name_scope(name):
-        return tf.reduce_mean(tf.reduce_sum(-(target * tf.log(output + epsilon) + (1. - target) * tf.log(1. - output + epsilon)), axis=1))
+        return tf.reduce_mean(
+            tf.reduce_sum(-(target * tf.log(output + epsilon) + (1. - target) * tf.log(1. - output + epsilon)), axis=1)
+        )
 
     # For brevity, let `x = output`, `z = target`.  The binary cross entropy loss is
     #
@@ -397,7 +399,9 @@ def cross_entropy_seq(logits, target_seqs, batch_size=None):  #, batch_size=1, n
     # except:
     #     sequence_loss_by_example_fn = tf.nn.seq2seq.sequence_loss_by_example
 
-    loss = sequence_loss_by_example_fn([logits], [tf.reshape(target_seqs, [-1])], [tf.ones_like(tf.reshape(target_seqs, [-1]), dtype=tf.float32)])
+    loss = sequence_loss_by_example_fn(
+        [logits], [tf.reshape(target_seqs, [-1])], [tf.ones_like(tf.reshape(target_seqs, [-1]), dtype=tf.float32)]
+    )
     # [tf.ones([batch_size * num_steps])])
     cost = tf.reduce_sum(loss)  #/ batch_size
     if batch_size is not None:
@@ -460,7 +464,8 @@ def cross_entropy_seq_with_mask(logits, target_seqs, input_mask, return_details=
     loss = tf.divide(
         tf.reduce_sum(losses),  # loss from mask. reduce_sum before element-wise mul with mask !!
         tf.reduce_sum(weights),
-        name="seq_loss_with_mask")
+        name="seq_loss_with_mask"
+    )
     # except: ## TF0.12
     #     loss = tf.div(tf.reduce_sum(losses),   # loss from mask. reduce_sum before element-wise mul with mask !!
     #                     tf.reduce_sum(weights),
@@ -490,7 +495,9 @@ def cosine_similarity(v1, v2):
 
     """
     # try: ## TF1.0
-    cost = tf.reduce_sum(tf.multiply(v1, v2), 1) / (tf.sqrt(tf.reduce_sum(tf.multiply(v1, v1), 1)) * tf.sqrt(tf.reduce_sum(tf.multiply(v2, v2), 1)))
+    cost = tf.reduce_sum(tf.multiply(v1, v2), 1) / (
+        tf.sqrt(tf.reduce_sum(tf.multiply(v1, v1), 1)) * tf.sqrt(tf.reduce_sum(tf.multiply(v2, v2), 1))
+    )
     # except: ## TF0.12
     #     cost = tf.reduce_sum(tf.mul(v1, v2), reduction_indices=1) / (tf.sqrt(tf.reduce_sum(tf.mul(v1, v1), reduction_indices=1)) * tf.sqrt(tf.reduce_sum(tf.mul(v2, v2), reduction_indices=1)))
     return cost
@@ -542,7 +549,10 @@ def li(weights):
             #     standard_ops_fn = standard_ops.mul
             # else:
             standard_ops_fn = standard_ops.multiply
-            return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 1))), name=scope)
+            return standard_ops_fn(
+                my_scale, standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 1))),
+                name=scope
+            )
 
     return li
 
@@ -590,7 +600,10 @@ def lo(weights, name='lo_regularizer'):
             #     standard_ops_fn = standard_ops.mul
             # else:
             standard_ops_fn = standard_ops.multiply
-            return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 0))), name=scope)
+            return standard_ops_fn(
+                my_scale, standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 0))),
+                name=scope
+            )
 
     return lo
 
@@ -687,7 +700,9 @@ def mn_o(weights, name='maxnorm_o_regularizer'):
                 standard_ops_fn = standard_ops.mul
             else:
                 standard_ops_fn = standard_ops.multiply
-            return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 0)), name=scope)
+            return standard_ops_fn(
+                my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 0)), name=scope
+            )
 
     return mn_o
 
@@ -735,6 +750,8 @@ def mn_i(weights, name='maxnorm_i_regularizer'):
                 standard_ops_fn = standard_ops.mul
             else:
                 standard_ops_fn = standard_ops.multiply
-            return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 1)), name=scope)
+            return standard_ops_fn(
+                my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 1)), name=scope
+            )
 
     return mn_i
diff --git a/tensorlayer/db.py b/tensorlayer/db.py
index 2499d7d46..27749459d 100644
--- a/tensorlayer/db.py
+++ b/tensorlayer/db.py
@@ -16,6 +16,7 @@
 
 
 def AutoFill(func):
+
     def func_wrapper(self, *args, **kwargs):
         d = inspect.getcallargs(func, self, *args, **kwargs)
         d['args'].update({"studyID": self.studyID})
@@ -59,7 +60,9 @@ class TensorDB(object):
     - You may like to install MongoChef or Mongo Management Studo APP for visualizing or testing your MongoDB.
     """
 
-    def __init__(self, ip='localhost', port=27017, db_name='db_name', user_name=None, password='password', studyID=None):
+    def __init__(
+            self, ip='localhost', port=27017, db_name='db_name', user_name=None, password='password', studyID=None
+    ):
         ## connect mongodb
         client = MongoClient(ip, port)
         self.db = client[db_name]
@@ -417,7 +420,14 @@ def push_job(self, margs, wargs, dargs, epoch):
 
         _ms, mid = self.load_model_architecture(margs)
         _weight, wid = self.find_one_params(wargs)
-        args = {"weight": wid, "model": mid, "dargs": dargs, "epoch": epoch, "time": datetime.utcnow(), "Running": False}
+        args = {
+            "weight": wid,
+            "model": mid,
+            "dargs": dargs,
+            "epoch": epoch,
+            "time": datetime.utcnow(),
+            "Running": False
+        }
         self.__autofill(args)
         self.db.JOBS.insert_one(args)
 
diff --git a/tensorlayer/deprecation.py b/tensorlayer/deprecation.py
index 61a153db6..7b0e78c8a 100644
--- a/tensorlayer/deprecation.py
+++ b/tensorlayer/deprecation.py
@@ -8,7 +8,9 @@
 
 
 def deprecated_alias(end_support_version, **aliases):
+
     def deco(f):
+
         @functools.wraps(f)
         def wrapper(*args, **kwargs):
 
@@ -36,7 +38,9 @@ def rename_kwargs(kwargs, aliases, end_support_version, func_name):
                 raise TypeError('{}() received both {} and {}'.format(func_name, alias, new))
 
             warnings.warn('{}() - {} is deprecated; use {}'.format(func_name, alias, new), DeprecationWarning)
-            logging.warning("DeprecationWarning: {}(): "
-                            "`{}` argument is deprecated and will be removed in version {}, "
-                            "please change for `{}.`".format(func_name, alias, end_support_version, new))
+            logging.warning(
+                "DeprecationWarning: {}(): "
+                "`{}` argument is deprecated and will be removed in version {}, "
+                "please change for `{}.`".format(func_name, alias, end_support_version, new)
+            )
             kwargs[new] = kwargs.pop(alias)
diff --git a/tensorlayer/distributed.py b/tensorlayer/distributed.py
index 53c1b407d..d808e50d2 100644
--- a/tensorlayer/distributed.py
+++ b/tensorlayer/distributed.py
@@ -71,7 +71,13 @@ def __init__(self, task_type='master', index=0, trial=None, ps_hosts=None, worke
             self.num_ps = len(self.ps_hosts)
             self.worker_hosts = worker_hosts if isinstance(worker_hosts, list) else worker_hosts.split(',')
             if master is not None and len(master) > 0:
-                self._cluster_spec = tf.train.ClusterSpec({'ps': self.ps_hosts, 'worker': self.worker_hosts, 'master': master})
+                self._cluster_spec = tf.train.ClusterSpec(
+                    {
+                        'ps': self.ps_hosts,
+                        'worker': self.worker_hosts,
+                        'master': master
+                    }
+                )
                 # master is a worker too
                 self.num_workers = len(self.worker_hosts) + 1
                 if self.type == 'worker':
@@ -104,7 +110,9 @@ def device_fn(self):
         """Returns the function with the specification to create the graph in this server"""
         current_device = '/job:{}/task:{}'.format(self.type, self._index)
         ps_devices = '/job:ps'
-        return tf.train.replica_device_setter(ps_device=ps_devices, worker_device=current_device, cluster=self._cluster_spec)
+        return tf.train.replica_device_setter(
+            ps_device=ps_devices, worker_device=current_device, cluster=self._cluster_spec
+        )
 
     def create_server(self):
         if self._server is None and self.ps_hosts and self.worker_hosts and not self.is_evaluator():
@@ -131,8 +139,11 @@ def use_last_worker_as_evaluator(self):
         """
         if self.num_workers <= 1:
             raise Exception('You need more than one worker instance to use one as evaluator')
+
         return TaskSpecDef(
-            task_type=self.type, index=self._index, trial=self.trial, ps_hosts=self.ps_hosts, worker_hosts=self.worker_hosts[:-1], master=self.master)
+            task_type=self.type, index=self._index, trial=self.trial, ps_hosts=self.ps_hosts,
+            worker_hosts=self.worker_hosts[:-1], master=self.master
+        )
 
 
 def create_task_spec_def():
@@ -150,35 +161,25 @@ def create_task_spec_def():
         task_data = env.get('task', None) or {'type': 'master', 'index': 0}
         cluster_data = env.get('cluster', None) or {'ps': None, 'worker': None, 'master': None}
         return TaskSpecDef(
-            task_type=task_data['type'],
-            index=task_data['index'],
-            trial=task_data['trial'] if 'trial' in task_data else None,
-            ps_hosts=cluster_data['ps'],
-            worker_hosts=cluster_data['worker'],
-            master=cluster_data['master'] if 'master' in cluster_data else None)
+            task_type=task_data['type'], index=task_data['index'], trial=task_data['trial'] if 'trial' in task_data else
+            None, ps_hosts=cluster_data['ps'], worker_hosts=cluster_data['worker'], master=cluster_data['master']
+            if 'master' in cluster_data else None
+        )
     elif 'JOB_NAME' in os.environ:
         # JOB_NAME, TASK_INDEX, PS_HOSTS, WORKER_HOSTS and MASTER_HOST are used in TensorPort
         return TaskSpecDef(
-            task_type=os.environ['JOB_NAME'],
-            index=os.environ['TASK_INDEX'],
-            ps_hosts=os.environ.get('PS_HOSTS', None),
-            worker_hosts=os.environ.get('WORKER_HOSTS', None),
-            master=os.environ.get('MASTER_HOST', None))
+            task_type=os.environ['JOB_NAME'], index=os.environ['TASK_INDEX'], ps_hosts=os.environ.get('PS_HOSTS', None),
+            worker_hosts=os.environ.get('WORKER_HOSTS', None), master=os.environ.get('MASTER_HOST', None)
+        )
     else:
         raise Exception('You need to setup TF_CONFIG or JOB_NAME to define the task.')
 
 
-def create_distributed_session(task_spec=None,
-                               checkpoint_dir=None,
-                               scaffold=None,
-                               hooks=None,
-                               chief_only_hooks=None,
-                               save_checkpoint_secs=600,
-                               save_summaries_steps=object(),
-                               save_summaries_secs=object(),
-                               config=None,
-                               stop_grace_period_secs=120,
-                               log_step_count_steps=100):
+def create_distributed_session(
+        task_spec=None, checkpoint_dir=None, scaffold=None, hooks=None, chief_only_hooks=None, save_checkpoint_secs=600,
+        save_summaries_steps=object(), save_summaries_secs=object(), config=None, stop_grace_period_secs=120,
+        log_step_count_steps=100
+):
     """Creates a distributed session.
 
     It calls `MonitoredTrainingSession` to create a :class:`MonitoredSession` for distributed training.
@@ -263,18 +264,11 @@ def create_distributed_session(task_spec=None,
     target = task_spec.target() if task_spec is not None else None
     is_chief = task_spec.is_master() if task_spec is not None else True
     return tf.train.MonitoredTrainingSession(
-        master=target,
-        is_chief=is_chief,
-        checkpoint_dir=checkpoint_dir,
-        scaffold=scaffold,
-        save_checkpoint_secs=save_checkpoint_secs,
-        save_summaries_steps=save_summaries_steps,
-        save_summaries_secs=save_summaries_secs,
-        log_step_count_steps=log_step_count_steps,
-        stop_grace_period_secs=stop_grace_period_secs,
-        config=config,
-        hooks=hooks,
-        chief_only_hooks=chief_only_hooks)
+        master=target, is_chief=is_chief, checkpoint_dir=checkpoint_dir, scaffold=scaffold,
+        save_checkpoint_secs=save_checkpoint_secs, save_summaries_steps=save_summaries_steps,
+        save_summaries_secs=save_summaries_secs, log_step_count_steps=log_step_count_steps,
+        stop_grace_period_secs=stop_grace_period_secs, config=config, hooks=hooks, chief_only_hooks=chief_only_hooks
+    )
 
 
 class StopAtTimeHook(session_run_hook.SessionRunHook):
diff --git a/tensorlayer/files.py b/tensorlayer/files.py
index 2300618da..25c38dcf4 100644
--- a/tensorlayer/files.py
+++ b/tensorlayer/files.py
@@ -107,7 +107,9 @@ def load_fashion_mnist_dataset(shape=(-1, 784), path='data'):
     >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_fashion_mnist_dataset(shape=(-1,784), path='datasets')
     >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_fashion_mnist_dataset(shape=(-1, 28, 28, 1))
     """
-    return _load_mnist_dataset(shape, path, name='fashion_mnist', url='http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/')
+    return _load_mnist_dataset(
+        shape, path, name='fashion_mnist', url='http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/'
+    )
 
 
 def _load_mnist_dataset(shape, path, name='mnist', url='http://yann.lecun.com/exdb/mnist/'):
@@ -493,7 +495,10 @@ def load_matt_mahoney_text8_dataset(path='data'):
     return word_list
 
 
-def load_imdb_dataset(path='data', nb_words=None, skip_top=0, maxlen=None, test_split=0.2, seed=113, start_char=1, oov_char=2, index_from=3):
+def load_imdb_dataset(
+        path='data', nb_words=None, skip_top=0, maxlen=None, test_split=0.2, seed=113, start_char=1, oov_char=2,
+        index_from=3
+):
     """Load IMDB dataset.
 
     Parameters
@@ -563,7 +568,10 @@ def load_imdb_dataset(path='data', nb_words=None, skip_top=0, maxlen=None, test_
         X = new_X
         labels = new_labels
     if not X:
-        raise Exception('After filtering for sequences shorter than maxlen=' + str(maxlen) + ', no sequence was kept. ' 'Increase maxlen.')
+        raise Exception(
+            'After filtering for sequences shorter than maxlen=' + str(maxlen) + ', no sequence was kept. '
+            'Increase maxlen.'
+        )
     if not nb_words:
         nb_words = max([max(x) for x in X])
 
@@ -799,7 +807,8 @@ def load_flickr1M_dataset(tag='sky', size=10, path="data", n_threads=50, printab
     path = os.path.join(path, 'flickr1M')
     logging.info("[Flickr1M] using {}% of images = {}".format(size * 10, size * 100000))
     images_zip = [
-        'images0.zip', 'images1.zip', 'images2.zip', 'images3.zip', 'images4.zip', 'images5.zip', 'images6.zip', 'images7.zip', 'images8.zip', 'images9.zip'
+        'images0.zip', 'images1.zip', 'images2.zip', 'images3.zip', 'images4.zip', 'images5.zip', 'images6.zip',
+        'images7.zip', 'images8.zip', 'images9.zip'
     ]
     tag_zip = 'tags.zip'
     url = 'http://press.liacs.nl/mirflickr/mirflickr1m/'
@@ -930,7 +939,8 @@ def download_file_from_google_drive(ID, destination):
     def save_response_content(response, destination, chunk_size=32 * 1024):
         total_size = int(response.headers.get('content-length', 0))
         with open(destination, "wb") as f:
-            for chunk in tqdm(response.iter_content(chunk_size), total=total_size, unit='B', unit_scale=True, desc=destination):
+            for chunk in tqdm(response.iter_content(chunk_size), total=total_size, unit='B', unit_scale=True,
+                              desc=destination):
                 if chunk:  # filter out keep-alive new chunks
                     f.write(chunk)
 
@@ -1097,7 +1107,9 @@ def _recursive_parse_xml_to_dict(xml):
     elif dataset == "2012test":
         extracted_filename = "VOC2012test"  #"VOCdevkit/VOC2012"
         logging.info("    [============= VOC 2012 Test Set =============]")
-        logging.info("    \nAuthor: 2012test only have person annotation, so 2007test is highly recommended for testing !\n")
+        logging.info(
+            "    \nAuthor: 2012test only have person annotation, so 2007test is highly recommended for testing !\n"
+        )
         import time
         time.sleep(3)
         if os.path.isdir(os.path.join(path, extracted_filename)) is False:
@@ -1150,8 +1162,8 @@ def _recursive_parse_xml_to_dict(xml):
             del_folder(os.path.join(path, 'VOCdevkit'))
     # object classes(labels)  NOTE: YOU CAN CUSTOMIZE THIS LIST
     classes = [
-        "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person",
-        "pottedplant", "sheep", "sofa", "train", "tvmonitor"
+        "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog",
+        "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"
     ]
     if contain_classes_in_person:
         classes_in_person = ["head", "hand", "foot"]
@@ -1168,7 +1180,10 @@ def _recursive_parse_xml_to_dict(xml):
     folder_imgs = os.path.join(path, extracted_filename, "JPEGImages")
     imgs_file_list = load_file_list(path=folder_imgs, regx='\\.jpg', printable=False)
     logging.info("[VOC] {} images found".format(len(imgs_file_list)))
-    imgs_file_list.sort(key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2]))  # 2007_000027.jpg --> 2007000027
+
+    imgs_file_list.sort(key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2])
+                       )  # 2007_000027.jpg --> 2007000027
+
     imgs_file_list = [os.path.join(folder_imgs, s) for s in imgs_file_list]
     # logging.info('IM',imgs_file_list[0::3333], imgs_file_list[-1])
     if dataset != "2012test":
@@ -1177,7 +1192,8 @@ def _recursive_parse_xml_to_dict(xml):
         folder_semseg = os.path.join(path, extracted_filename, "SegmentationClass")
         imgs_semseg_file_list = load_file_list(path=folder_semseg, regx='\\.png', printable=False)
         logging.info("[VOC] {} maps for semantic segmentation found".format(len(imgs_semseg_file_list)))
-        imgs_semseg_file_list.sort(key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2]))  # 2007_000032.png --> 2007000032
+        imgs_semseg_file_list.sort(key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2])
+                                  )  # 2007_000032.png --> 2007000032
         imgs_semseg_file_list = [os.path.join(folder_semseg, s) for s in imgs_semseg_file_list]
         # logging.info('Semantic Seg IM',imgs_semseg_file_list[0::333], imgs_semseg_file_list[-1])
         ##======== 3. instance segmentation maps path list
@@ -1185,7 +1201,8 @@ def _recursive_parse_xml_to_dict(xml):
         folder_insseg = os.path.join(path, extracted_filename, "SegmentationObject")
         imgs_insseg_file_list = load_file_list(path=folder_insseg, regx='\\.png', printable=False)
         logging.info("[VOC] {} maps for instance segmentation found".format(len(imgs_semseg_file_list)))
-        imgs_insseg_file_list.sort(key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2]))  # 2007_000032.png --> 2007000032
+        imgs_insseg_file_list.sort(key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2])
+                                  )  # 2007_000032.png --> 2007000032
         imgs_insseg_file_list = [os.path.join(folder_insseg, s) for s in imgs_insseg_file_list]
         # logging.info('Instance Seg IM',imgs_insseg_file_list[0::333], imgs_insseg_file_list[-1])
     else:
@@ -1195,8 +1212,11 @@ def _recursive_parse_xml_to_dict(xml):
     # folder_ann = path+"/"+extracted_filename+"/Annotations/"
     folder_ann = os.path.join(path, extracted_filename, "Annotations")
     imgs_ann_file_list = load_file_list(path=folder_ann, regx='\\.xml', printable=False)
-    logging.info("[VOC] {} XML annotation files for bounding box and object class found".format(len(imgs_ann_file_list)))
-    imgs_ann_file_list.sort(key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2]))  # 2007_000027.xml --> 2007000027
+    logging.info(
+        "[VOC] {} XML annotation files for bounding box and object class found".format(len(imgs_ann_file_list))
+    )
+    imgs_ann_file_list.sort(key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2])
+                           )  # 2007_000027.xml --> 2007000027
     imgs_ann_file_list = [os.path.join(folder_ann, s) for s in imgs_ann_file_list]
     # logging.info('ANN',imgs_ann_file_list[0::3333], imgs_ann_file_list[-1])
 
@@ -1248,7 +1268,10 @@ def convert_annotation(file_name):
                     continue
             cls_id = classes.index(cls)
             xmlbox = obj.find('bndbox')
-            b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
+            b = (
+                float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
+                float(xmlbox.find('ymax').text)
+            )
             bb = convert((w, h), b)
 
             out_file += str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n'
@@ -1260,7 +1283,10 @@ def convert_annotation(file_name):
                         continue
                     cls_id = classes.index(cls)
                     xmlbox = part.find('bndbox')
-                    b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
+                    b = (
+                        float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text),
+                        float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)
+                    )
                     bb = convert((w, h), b)
                     # out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
                     out_file += str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n'
@@ -1387,8 +1413,10 @@ def save_joints():
 
             head_rect = []
             if 'x1' in str(anno['annorect'].dtype):
-                head_rect = zip([x1[0, 0] for x1 in anno['annorect']['x1'][0]], [y1[0, 0] for y1 in anno['annorect']['y1'][0]],
-                                [x2[0, 0] for x2 in anno['annorect']['x2'][0]], [y2[0, 0] for y2 in anno['annorect']['y2'][0]])
+                head_rect = zip(
+                    [x1[0, 0] for x1 in anno['annorect']['x1'][0]], [y1[0, 0] for y1 in anno['annorect']['y1'][0]],
+                    [x2[0, 0] for x2 in anno['annorect']['x2'][0]], [y2[0, 0] for y2 in anno['annorect']['y2'][0]]
+                )
             else:
                 head_rect = []  # TODO
 
@@ -1398,11 +1426,18 @@ def save_joints():
                 head_y1s = anno['annorect']['y1'][0]
                 head_x2s = anno['annorect']['x2'][0]
                 head_y2s = anno['annorect']['y2'][0]
-                for annopoint, head_x1, head_y1, head_x2, head_y2 in zip(annopoints, head_x1s, head_y1s, head_x2s, head_y2s):
+
+                for annopoint, head_x1, head_y1, head_x2, head_y2 in zip(annopoints, head_x1s, head_y1s, head_x2s,
+                                                                         head_y2s):
                     # if annopoint != []:
                     # if len(annopoint) != 0:
                     if annopoint.size:
-                        head_rect = [float(head_x1[0, 0]), float(head_y1[0, 0]), float(head_x2[0, 0]), float(head_y2[0, 0])]
+                        head_rect = [
+                            float(head_x1[0, 0]),
+                            float(head_y1[0, 0]),
+                            float(head_x2[0, 0]),
+                            float(head_y2[0, 0])
+                        ]
 
                         # joint coordinates
                         annopoint = annopoint['point'][0, 0]
@@ -1422,9 +1457,15 @@ def save_joints():
                             vis = None
 
                         # if len(joint_pos) == 16:
-                        if ((is_16_pos_only == True) and (len(joint_pos) == 16)) or (is_16_pos_only == False):
+                        if ((is_16_pos_only ==True) and (len(joint_pos) == 16)) or (is_16_pos_only == False):
                             # only use image with 16 key points / or use all
-                            data = {'filename': img_fn, 'train': train_flag, 'head_rect': head_rect, 'is_visible': vis, 'joint_pos': joint_pos}
+                            data = {
+                                'filename': img_fn,
+                                'train': train_flag,
+                                'head_rect': head_rect,
+                                'is_visible': vis,
+                                'joint_pos': joint_pos
+                            }
                             # print(json.dumps(data), file=fp)  # py3
                             if train_flag:
                                 ann_train_list[-1].append(data)
@@ -1561,7 +1602,9 @@ def save_npz(save_list=None, name='model.npz', sess=None):
         try:
             save_list_var.extend([v.eval() for v in save_list])
         except Exception:
-            logging.info(" Fail to save model, Hint: pass the session into this function, tl.files.save_npz(network.all_params, name='model.npz', sess=sess)")
+            logging.info(
+                " Fail to save model, Hint: pass the session into this function, tl.files.save_npz(network.all_params, name='model.npz', sess=sess)"
+            )
     np.savez(name, params=save_list_var)
     save_list_var = None
     del save_list_var
@@ -1738,7 +1781,9 @@ def load_and_assign_npz_dict(name='model.npz', sess=None):
     logging.info("[*] Model restored from npz_dict %s" % name)
 
 
-def save_ckpt(sess=None, mode_name='model.ckpt', save_dir='checkpoint', var_list=None, global_step=None, printable=False):
+def save_ckpt(
+        sess=None, mode_name='model.ckpt', save_dir='checkpoint', var_list=None, global_step=None, printable=False
+):
     """Save parameters into `ckpt` file.
 
     Parameters
diff --git a/tensorlayer/iterate.py b/tensorlayer/iterate.py
index dc3051c7a..76d756c3a 100644
--- a/tensorlayer/iterate.py
+++ b/tensorlayer/iterate.py
@@ -59,8 +59,9 @@ def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False):
             excerpt = indices[start_idx:start_idx + batch_size]
         else:
             excerpt = slice(start_idx, start_idx + batch_size)
-        if (isinstance(inputs, list) or isinstance(targets, list)) and (shuffle == True):
-            yield [inputs[i] for i in excerpt], [targets[i] for i in excerpt]  # zsdonghao: for list indexing when shuffle==True
+        if (isinstance(inputs, list) or isinstance(targets, list)) and (shuffle ==True):
+            # zsdonghao: for list indexing when shuffle==True
+            yield [inputs[i] for i in excerpt], [targets[i] for i in excerpt]
         else:
             yield inputs[excerpt], targets[excerpt]
 
diff --git a/tensorlayer/layers/binary.py b/tensorlayer/layers/binary.py
index 4987fe767..ec22a5c5a 100644
--- a/tensorlayer/layers/binary.py
+++ b/tensorlayer/layers/binary.py
@@ -80,7 +80,10 @@ def _compute_alpha(x):
     alpha1_temp2 = tf.where(tf.less(x, -threshold), x, tf.zeros_like(x, tf.float32))
     alpha_array = tf.add(alpha1_temp1, alpha1_temp2, name=None)
     alpha_array_abs = tf.abs(alpha_array)
-    alpha_array_abs1 = tf.where(tf.greater(alpha_array_abs, 0), tf.ones_like(alpha_array_abs, tf.float32), tf.zeros_like(alpha_array_abs, tf.float32))
+    alpha_array_abs1 = tf.where(
+        tf.greater(alpha_array_abs, 0), tf.ones_like(alpha_array_abs, tf.float32),
+        tf.zeros_like(alpha_array_abs, tf.float32)
+    )
     alpha_sum = tf.reduce_sum(alpha_array_abs)
     n = tf.reduce_sum(alpha_array_abs1)
     alpha = tf.div(alpha_sum, n)
@@ -159,14 +162,18 @@ def __init__(
         self.n_units = n_units
 
         with tf.variable_scope(name):
-            W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args)
+            W = tf.get_variable(
+                name='W', shape=(n_in, n_units), initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args
+            )
             # W = tl.act.sign(W)    # dont update ...
             W = quantize(W)
             # W = tf.Variable(W)
             # print(W)
             if b_init is not None:
                 try:
-                    b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
+                    b = tf.get_variable(
+                        name='b', shape=(n_units), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args
+                    )
                 except Exception:  # If initializer is a constant, do not specify shape.
                     b = tf.get_variable(name='b', initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
                 self.outputs = act(tf.matmul(self.inputs, W) + b)
@@ -263,8 +270,10 @@ def __init__(
             name='binary_cnn2d',
     ):
         super(BinaryConv2d, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("BinaryConv2d %s: n_filter:%d filter_size:%s strides:%s pad:%s act:%s" % (name, n_filter, str(filter_size), str(strides), padding,
-                                                                                               act.__name__))
+        logging.info(
+            "BinaryConv2d %s: n_filter:%d filter_size:%s strides:%s pad:%s act:%s" %
+            (name, n_filter, str(filter_size), str(strides), padding, act.__name__)
+        )
 
         self.inputs = prev_layer.outputs
 
@@ -287,14 +296,27 @@ def __init__(
         shape = (filter_size[0], filter_size[1], pre_channel, n_filter)
         strides = (1, strides[0], strides[1], 1)
         with tf.variable_scope(name):
-            W = tf.get_variable(name='W_conv2d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args)
+            W = tf.get_variable(
+                name='W_conv2d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args
+            )
             W = quantize(W)
             if b_init:
-                b = tf.get_variable(name='b_conv2d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
+                b = tf.get_variable(
+                    name='b_conv2d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args
+                )
                 self.outputs = act(
-                    tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) + b)
+                    tf.nn.conv2d(
+                        self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu,
+                        data_format=data_format
+                    ) + b
+                )
             else:
-                self.outputs = act(tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format))
+                self.outputs = act(
+                    tf.nn.conv2d(
+                        self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu,
+                        data_format=data_format
+                    )
+                )
 
         self.all_layers.append(self.outputs)
         if b_init:
@@ -363,7 +385,9 @@ def __init__(
         self.n_units = n_units
 
         with tf.variable_scope(name):
-            W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args)
+            W = tf.get_variable(
+                name='W', shape=(n_in, n_units), initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args
+            )
             # W = tl.act.sign(W)    # dont update ...
             alpha = _compute_alpha(W)
             W = _ternary_operation(W)
@@ -372,7 +396,9 @@ def __init__(
             # print(W)
             if b_init is not None:
                 try:
-                    b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
+                    b = tf.get_variable(
+                        name='b', shape=(n_units), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args
+                    )
                 except Exception:  # If initializer is a constant, do not specify shape.
                     b = tf.get_variable(name='b', initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
                 self.outputs = act(tf.matmul(self.inputs, W) + b)
@@ -469,8 +495,10 @@ def __init__(
             name='ternary_cnn2d',
     ):
         super(TernaryConv2d, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("TernaryConv2d %s: n_filter:%d filter_size:%s strides:%s pad:%s act:%s" % (name, n_filter, str(filter_size), str(strides), padding,
-                                                                                                act.__name__))
+        logging.info(
+            "TernaryConv2d %s: n_filter:%d filter_size:%s strides:%s pad:%s act:%s" %
+            (name, n_filter, str(filter_size), str(strides), padding, act.__name__)
+        )
 
         if W_init_args is None:
             W_init_args = {}
@@ -491,16 +519,29 @@ def __init__(
         shape = (filter_size[0], filter_size[1], pre_channel, n_filter)
         strides = (1, strides[0], strides[1], 1)
         with tf.variable_scope(name):
-            W = tf.get_variable(name='W_conv2d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args)
+            W = tf.get_variable(
+                name='W_conv2d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args
+            )
             alpha = _compute_alpha(W)
             W = _ternary_operation(W)
             W = tf.multiply(alpha, W)
             if b_init:
-                b = tf.get_variable(name='b_conv2d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
+                b = tf.get_variable(
+                    name='b_conv2d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args
+                )
                 self.outputs = act(
-                    tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) + b)
+                    tf.nn.conv2d(
+                        self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu,
+                        data_format=data_format
+                    ) + b
+                )
             else:
-                self.outputs = act(tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format))
+                self.outputs = act(
+                    tf.nn.conv2d(
+                        self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu,
+                        data_format=data_format
+                    )
+                )
 
         self.all_layers.append(self.outputs)
         if b_init:
@@ -576,7 +617,9 @@ def __init__(
         self.n_units = n_units
 
         with tf.variable_scope(name):
-            W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args)
+            W = tf.get_variable(
+                name='W', shape=(n_in, n_units), initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args
+            )
             # W = tl.act.sign(W)    # dont update ...
             W = _quantize_weight(W, bitW)
             self.inputs = _quantize_active(_cabs(self.inputs), bitA)
@@ -584,7 +627,9 @@ def __init__(
             # print(W)
             if b_init is not None:
                 try:
-                    b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
+                    b = tf.get_variable(
+                        name='b', shape=(n_units), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args
+                    )
                 except Exception:  # If initializer is a constant, do not specify shape.
                     b = tf.get_variable(name='b', initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
                 self.outputs = act(tf.matmul(self.inputs, W) + b)
@@ -687,8 +732,10 @@ def __init__(
             name='dorefa_cnn2d',
     ):
         super(DorefaConv2d, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("DorefaConv2d %s: n_filter:%d filter_size:%s strides:%s pad:%s act:%s" % (name, n_filter, str(filter_size), str(strides), padding,
-                                                                                               act.__name__))
+        logging.info(
+            "DorefaConv2d %s: n_filter:%d filter_size:%s strides:%s pad:%s act:%s" %
+            (name, n_filter, str(filter_size), str(strides), padding, act.__name__)
+        )
 
         self.inputs = prev_layer.outputs
 
@@ -712,15 +759,28 @@ def __init__(
         shape = (filter_size[0], filter_size[1], pre_channel, n_filter)
         strides = (1, strides[0], strides[1], 1)
         with tf.variable_scope(name):
-            W = tf.get_variable(name='W_conv2d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args)
+            W = tf.get_variable(
+                name='W_conv2d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args
+            )
             W = _quantize_weight(W, bitW)
             self.inputs = _quantize_active(_cabs(self.inputs), bitA)
             if b_init:
-                b = tf.get_variable(name='b_conv2d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
+                b = tf.get_variable(
+                    name='b_conv2d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args
+                )
                 self.outputs = act(
-                    tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) + b)
+                    tf.nn.conv2d(
+                        self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu,
+                        data_format=data_format
+                    ) + b
+                )
             else:
-                self.outputs = act(tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format))
+                self.outputs = act(
+                    tf.nn.conv2d(
+                        self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu,
+                        data_format=data_format
+                    )
+                )
 
         self.all_layers.append(self.outputs)
         if b_init:
diff --git a/tensorlayer/layers/convolution.py b/tensorlayer/layers/convolution.py
index 468b161d5..0d47f861b 100644
--- a/tensorlayer/layers/convolution.py
+++ b/tensorlayer/layers/convolution.py
@@ -79,7 +79,9 @@ def __init__(
             name='cnn1d',
     ):
         super(Conv1dLayer, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("Conv1dLayer %s: shape:%s stride:%s pad:%s act:%s" % (name, str(shape), str(stride), padding, act.__name__))
+        logging.info(
+            "Conv1dLayer %s: shape:%s stride:%s pad:%s act:%s" % (name, str(shape), str(stride), padding, act.__name__)
+        )
 
         self.inputs = prev_layer.outputs
 
@@ -92,10 +94,16 @@ def __init__(
             b_init_args = {}
 
         with tf.variable_scope(name):
-            W = tf.get_variable(name='W_conv1d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args)
-            self.outputs = tf.nn.convolution(self.inputs, W, strides=(stride, ), padding=padding, dilation_rate=(dilation_rate, ))  # 1.2
+            W = tf.get_variable(
+                name='W_conv1d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args
+            )
+            self.outputs = tf.nn.convolution(
+                self.inputs, W, strides=(stride, ), padding=padding, dilation_rate=(dilation_rate, )
+            )  # 1.2
             if b_init:
-                b = tf.get_variable(name='b_conv1d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
+                b = tf.get_variable(
+                    name='b_conv1d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args
+                )
                 self.outputs = self.outputs + b
 
             self.outputs = act(self.outputs)
@@ -192,7 +200,10 @@ def __init__(
             name='cnn_layer',
     ):
         super(Conv2dLayer, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("Conv2dLayer %s: shape:%s strides:%s pad:%s act:%s" % (name, str(shape), str(strides), padding, act.__name__))
+        logging.info(
+            "Conv2dLayer %s: shape:%s strides:%s pad:%s act:%s" %
+            (name, str(shape), str(strides), padding, act.__name__)
+        )
 
         self.inputs = prev_layer.outputs
 
@@ -204,13 +215,26 @@ def __init__(
             act = tf.identity
 
         with tf.variable_scope(name):
-            W = tf.get_variable(name='W_conv2d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args)
+            W = tf.get_variable(
+                name='W_conv2d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args
+            )
             if b_init:
-                b = tf.get_variable(name='b_conv2d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
+                b = tf.get_variable(
+                    name='b_conv2d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args
+                )
                 self.outputs = act(
-                    tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) + b)
+                    tf.nn.conv2d(
+                        self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu,
+                        data_format=data_format
+                    ) + b
+                )
             else:
-                self.outputs = act(tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format))
+                self.outputs = act(
+                    tf.nn.conv2d(
+                        self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu,
+                        data_format=data_format
+                    )
+                )
 
         self.all_layers.append(self.outputs)
         if b_init:
@@ -312,8 +336,10 @@ def __init__(
             name='decnn2d_layer',
     ):
         super(DeConv2dLayer, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("DeConv2dLayer %s: shape:%s out_shape:%s strides:%s pad:%s act:%s" % (name, str(shape), str(output_shape), str(strides), padding,
-                                                                                           act.__name__))
+        logging.info(
+            "DeConv2dLayer %s: shape:%s out_shape:%s strides:%s pad:%s act:%s" %
+            (name, str(shape), str(output_shape), str(strides), padding, act.__name__)
+        )
 
         self.inputs = prev_layer.outputs
 
@@ -326,12 +352,21 @@ def __init__(
 
         # logging.info("  DeConv2dLayer: Untested")
         with tf.variable_scope(name):
-            W = tf.get_variable(name='W_deconv2d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args)
+            W = tf.get_variable(
+                name='W_deconv2d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args
+            )
             if b_init:
-                b = tf.get_variable(name='b_deconv2d', shape=(shape[-2]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
-                self.outputs = act(tf.nn.conv2d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b)
+                b = tf.get_variable(
+                    name='b_deconv2d', shape=(shape[-2]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args
+                )
+                self.outputs = act(
+                    tf.nn.conv2d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding)
+                    + b
+                )
             else:
-                self.outputs = act(tf.nn.conv2d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding))
+                self.outputs = act(
+                    tf.nn.conv2d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding)
+                )
 
         # self.all_layers = list(layer.all_layers)
         # self.all_params = list(layer.all_params)
@@ -394,7 +429,10 @@ def __init__(
             name='cnn3d_layer',
     ):
         super(Conv3dLayer, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("Conv3dLayer %s: shape:%s strides:%s pad:%s act:%s" % (name, str(shape), str(strides), padding, act.__name__))
+        logging.info(
+            "Conv3dLayer %s: shape:%s strides:%s pad:%s act:%s" %
+            (name, str(shape), str(strides), padding, act.__name__)
+        )
 
         self.inputs = prev_layer.outputs
 
@@ -408,9 +446,13 @@ def __init__(
         with tf.variable_scope(name):
             # W = tf.Variable(W_init(shape=shape, **W_init_args), name='W_conv')
             # b = tf.Variable(b_init(shape=[shape[-1]], **b_init_args), name='b_conv')
-            W = tf.get_variable(name='W_conv3d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args)
+            W = tf.get_variable(
+                name='W_conv3d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args
+            )
             if b_init:
-                b = tf.get_variable(name='b_conv3d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
+                b = tf.get_variable(
+                    name='b_conv3d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args
+                )
                 self.outputs = act(tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None) + b)
             else:
                 self.outputs = act(tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None))
@@ -474,8 +516,10 @@ def __init__(
             name='decnn3d_layer',
     ):
         super(DeConv3dLayer, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("DeConv3dLayer %s: shape:%s out_shape:%s strides:%s pad:%s act:%s" % (name, str(shape), str(output_shape), str(strides), padding,
-                                                                                           act.__name__))
+        logging.info(
+            "DeConv3dLayer %s: shape:%s out_shape:%s strides:%s pad:%s act:%s" %
+            (name, str(shape), str(output_shape), str(strides), padding, act.__name__)
+        )
 
         self.inputs = prev_layer.outputs
 
@@ -487,12 +531,21 @@ def __init__(
             act = tf.identity
 
         with tf.variable_scope(name):
-            W = tf.get_variable(name='W_deconv3d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args)
+            W = tf.get_variable(
+                name='W_deconv3d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args
+            )
             if b_init:
-                b = tf.get_variable(name='b_deconv3d', shape=(shape[-2]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
-                self.outputs = act(tf.nn.conv3d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b)
+                b = tf.get_variable(
+                    name='b_deconv3d', shape=(shape[-2]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args
+                )
+                self.outputs = act(
+                    tf.nn.conv3d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding)
+                    + b
+                )
             else:
-                self.outputs = act(tf.nn.conv3d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding))
+                self.outputs = act(
+                    tf.nn.conv3d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding)
+                )
 
         # self.all_layers = list(layer.all_layers)
         # self.all_params = list(layer.all_params)
@@ -539,7 +592,10 @@ def __init__(
             name='upsample2d_layer',
     ):
         super(UpSampling2dLayer, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("UpSampling2dLayer %s: is_scale:%s size:%s method:%d align_corners:%s" % (name, is_scale, size, method, align_corners))
+        logging.info(
+            "UpSampling2dLayer %s: is_scale:%s size:%s method:%d align_corners:%s" %
+            (name, is_scale, size, method, align_corners)
+        )
 
         self.inputs = prev_layer.outputs
 
@@ -563,9 +619,13 @@ def __init__(
 
         with tf.variable_scope(name):
             try:
-                self.outputs = tf.image.resize_images(self.inputs, size=size, method=method, align_corners=align_corners)
+                self.outputs = tf.image.resize_images(
+                    self.inputs, size=size, method=method, align_corners=align_corners
+                )
             except Exception:  # for TF 0.10
-                self.outputs = tf.image.resize_images(self.inputs, new_height=size[0], new_width=size[1], method=method, align_corners=align_corners)
+                self.outputs = tf.image.resize_images(
+                    self.inputs, new_height=size[0], new_width=size[1], method=method, align_corners=align_corners
+                )
 
         # self.all_layers = list(layer.all_layers)
         # self.all_params = list(layer.all_params)
@@ -608,7 +668,10 @@ def __init__(
             name='downsample2d_layer',
     ):
         super(DownSampling2dLayer, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("DownSampling2dLayer %s: is_scale:%s size:%s method:%d, align_corners:%s" % (name, is_scale, size, method, align_corners))
+        logging.info(
+            "DownSampling2dLayer %s: is_scale:%s size:%s method:%d, align_corners:%s" %
+            (name, is_scale, size, method, align_corners)
+        )
 
         self.inputs = prev_layer.outputs
 
@@ -630,9 +693,13 @@ def __init__(
 
         with tf.variable_scope(name):
             try:
-                self.outputs = tf.image.resize_images(self.inputs, size=size, method=method, align_corners=align_corners)
+                self.outputs = tf.image.resize_images(
+                    self.inputs, size=size, method=method, align_corners=align_corners
+                )
             except Exception:  # for TF 0.10
-                self.outputs = tf.image.resize_images(self.inputs, new_height=size[0], new_width=size[1], method=method, align_corners=align_corners)
+                self.outputs = tf.image.resize_images(
+                    self.inputs, new_height=size[0], new_width=size[1], method=method, align_corners=align_corners
+                )
 
         # self.all_layers = list(layer.all_layers)
         # self.all_params = list(layer.all_params)
@@ -701,10 +768,13 @@ def __init__(
             W_init=tf.truncated_normal_initializer(stddev=0.02),
             b_init=tf.constant_initializer(value=0.0),
             W_init_args=None,
-            b_init_args=None):
+            b_init_args=None
+    ):
 
         if tf.__version__ < "1.4":
-            raise Exception("Deformable CNN layer requires tensrflow 1.4 or higher version | current version %s" % tf.__version__)
+            raise Exception(
+                "Deformable CNN layer requires tensrflow 1.4 or higher version | current version %s" % tf.__version__
+            )
 
         if W_init_args is None:
             W_init_args = {}
@@ -830,8 +900,8 @@ def _tf_batch_map_offsets(inputs, offsets, grid_offset):
                 [
                     tf.clip_by_value(coords[:, :, :, :, 0], 0.0, tf.cast(input_h - 1, 'float32')),
                     tf.clip_by_value(coords[:, :, :, :, 1], 0.0, tf.cast(input_w - 1, 'float32'))
-                ],
-                axis=-1)
+                ], axis=-1
+            )
             coords = tf.tile(coords, [channel, 1, 1, 1, 1])
 
             mapped_vals = _tf_batch_map_coordinates(inputs, coords)
@@ -841,7 +911,10 @@ def _tf_batch_map_offsets(inputs, offsets, grid_offset):
             return mapped_vals
 
         super(DeformableConv2d, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("DeformableConv2d %s: n_filter: %d, filter_size: %s act:%s" % (name, n_filter, str(filter_size), act.__name__))
+        logging.info(
+            "DeformableConv2d %s: n_filter: %d, filter_size: %s act:%s" %
+            (name, n_filter, str(filter_size), act.__name__)
+        )
 
         self.inputs = prev_layer.outputs
 
@@ -864,7 +937,8 @@ def _tf_batch_map_offsets(inputs, offsets, grid_offset):
             input_h = int(self.inputs.get_shape()[1])
             input_w = int(self.inputs.get_shape()[2])
             kernel_n = shape[0] * shape[1]
-            initial_offsets = tf.stack(tf.meshgrid(tf.range(shape[0]), tf.range(shape[1]), indexing='ij'))  # initial_offsets --> (kh, kw, 2)
+            initial_offsets = tf.stack(tf.meshgrid(tf.range(shape[0]), tf.range(shape[1]),
+                                                   indexing='ij'))  # initial_offsets --> (kh, kw, 2)
             initial_offsets = tf.reshape(initial_offsets, (-1, 2))  # initial_offsets --> (n, 2)
             initial_offsets = tf.expand_dims(initial_offsets, 0)  # initial_offsets --> (1, n, 2)
             initial_offsets = tf.expand_dims(initial_offsets, 0)  # initial_offsets --> (1, 1, n, 2)
@@ -872,8 +946,8 @@ def _tf_batch_map_offsets(inputs, offsets, grid_offset):
             initial_offsets = tf.cast(initial_offsets, 'float32')
             grid = tf.meshgrid(
                 tf.range(-int((shape[0] - 1) / 2.0), int(input_h - int((shape[0] - 1) / 2.0)), 1),
-                tf.range(-int((shape[1] - 1) / 2.0), int(input_w - int((shape[1] - 1) / 2.0)), 1),
-                indexing='ij')
+                tf.range(-int((shape[1] - 1) / 2.0), int(input_w - int((shape[1] - 1) / 2.0)), 1), indexing='ij'
+            )
 
             grid = tf.stack(grid, axis=-1)
             grid = tf.cast(grid, 'float32')  # grid --> (h, w, 2)
@@ -884,21 +958,25 @@ def _tf_batch_map_offsets(inputs, offsets, grid_offset):
             input_deform = _tf_batch_map_offsets(self.inputs, offset, grid_offset)
 
             W = tf.get_variable(
-                name='W_deformableconv2d',
-                shape=[1, 1, shape[0] * shape[1], shape[-2], shape[-1]],
-                initializer=W_init,
-                dtype=LayersConfig.tf_dtype,
-                **W_init_args)
+                name='W_deformableconv2d', shape=[1, 1, shape[0] * shape[1], shape[-2], shape[-1]], initializer=W_init,
+                dtype=LayersConfig.tf_dtype, **W_init_args
+            )
 
             if b_init:
-                b = tf.get_variable(name='b_deformableconv2d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
+                b = tf.get_variable(
+                    name='b_deformableconv2d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype,
+                    **b_init_args
+                )
+                tf.reshape()
                 self.outputs = tf.reshape(
-                    act(tf.nn.conv3d(input_deform, W, strides=[1, 1, 1, 1, 1], padding='VALID', name=None) + b),
-                    (tf.shape(self.inputs)[0], input_h, input_w, shape[-1]))
+                    tensor=act(tf.nn.conv3d(input_deform, W, strides=[1, 1, 1, 1, 1], padding='VALID', name=None) + b),
+                    shape=(tf.shape(self.inputs)[0], input_h, input_w, shape[-1])
+                )
             else:
                 self.outputs = tf.reshape(
-                    act(tf.nn.conv3d(input_deform, W, strides=[1, 1, 1, 1, 1], padding='VALID', name=None)),
-                    (tf.shape(self.inputs)[0], input_h, input_w, shape[-1]))
+                    tensor=act(tf.nn.conv3d(input_deform, W, strides=[1, 1, 1, 1, 1], padding='VALID', name=None)),
+                    shape=[tf.shape(self.inputs)[0], input_h, input_w, shape[-1]]
+                )
 
         # fixed
         # self.all_layers = list(layer.all_layers)
@@ -1030,21 +1108,17 @@ class AtrousConv2dLayer(Layer):
     """
 
     @deprecated_alias(layer='prev_layer', end_support_version=1.9)  # TODO remove this line for the 1.9 release
-    def __init__(self,
-                 prev_layer,
-                 n_filter=32,
-                 filter_size=(3, 3),
-                 rate=2,
-                 act=tf.identity,
-                 padding='SAME',
-                 W_init=tf.truncated_normal_initializer(stddev=0.02),
-                 b_init=tf.constant_initializer(value=0.0),
-                 W_init_args=None,
-                 b_init_args=None,
-                 name='atrou2d'):
+    def __init__(
+            self, prev_layer, n_filter=32, filter_size=(3, 3), rate=2, act=tf.identity, padding='SAME',
+            W_init=tf.truncated_normal_initializer(stddev=0.02), b_init=tf.constant_initializer(value=0.0),
+            W_init_args=None, b_init_args=None, name='atrou2d'
+    ):
 
         super(AtrousConv2dLayer, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("AtrousConv2dLayer %s: n_filter:%d filter_size:%s rate:%d pad:%s act:%s" % (name, n_filter, filter_size, rate, padding, act.__name__))
+        logging.info(
+            "AtrousConv2dLayer %s: n_filter:%d filter_size:%s rate:%d pad:%s act:%s" %
+            (name, n_filter, filter_size, rate, padding, act.__name__)
+        )
 
         self.inputs = prev_layer.outputs
 
@@ -1057,9 +1131,13 @@ def __init__(self,
 
         with tf.variable_scope(name):
             shape = [filter_size[0], filter_size[1], int(self.inputs.get_shape()[-1]), n_filter]
-            filters = tf.get_variable(name='filter', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args)
+            filters = tf.get_variable(
+                name='filter', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args
+            )
             if b_init:
-                b = tf.get_variable(name='b', shape=(n_filter), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
+                b = tf.get_variable(
+                    name='b', shape=(n_filter), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args
+                )
                 self.outputs = act(tf.nn.atrous_conv2d(self.inputs, filters, rate, padding) + b)
             else:
                 self.outputs = act(tf.nn.atrous_conv2d(self.inputs, filters, rate, padding))
@@ -1129,29 +1207,21 @@ class _SeparableConv2dLayer(Layer):  # TODO
     """
 
     @deprecated_alias(layer='prev_layer', end_support_version=1.9)  # TODO remove this line for the 1.9 release
-    def __init__(self,
-                 prev_layer,
-                 n_filter,
-                 filter_size=5,
-                 strides=(1, 1),
-                 padding='valid',
-                 data_format='channels_last',
-                 dilation_rate=(1, 1),
-                 depth_multiplier=1,
-                 act=tf.identity,
-                 use_bias=True,
-                 depthwise_initializer=None,
-                 pointwise_initializer=None,
-                 bias_initializer=tf.zeros_initializer,
-                 depthwise_regularizer=None,
-                 pointwise_regularizer=None,
-                 bias_regularizer=None,
-                 activity_regularizer=None,
-                 name='atrou2d'):
+    def __init__(
+            self, prev_layer, n_filter, filter_size=5, strides=(1, 1), padding='valid', data_format='channels_last',
+            dilation_rate=(1, 1), depth_multiplier=1, act=tf.identity, use_bias=True, depthwise_initializer=None,
+            pointwise_initializer=None, bias_initializer=tf.zeros_initializer, depthwise_regularizer=None,
+            pointwise_regularizer=None, bias_regularizer=None, activity_regularizer=None, name='atrou2d'
+    ):
 
         super(_SeparableConv2dLayer, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("SeparableConv2dLayer %s: n_filter:%d filter_size:%s strides:%s padding:%s dilation_rate:%s depth_multiplier:%s act:%s" %
-                     (name, n_filter, filter_size, str(strides), padding, str(dilation_rate), str(depth_multiplier), act.__name__))
+        logging.info(
+            "SeparableConv2dLayer %s: n_filter:%d filter_size:%s strides:%s padding:%s dilation_rate:%s depth_multiplier:%s act:%s"
+            % (
+                name, n_filter, filter_size, str(strides), padding, str(dilation_rate), str(depth_multiplier),
+                act.__name__
+            )
+        )
 
         self.inputs = prev_layer.outputs
 
@@ -1231,7 +1301,9 @@ def deconv2d_bilinear_upsampling_initializer(shape):
     if shape[0] != shape[1]:
         raise Exception('deconv2d_bilinear_upsampling_initializer only supports symmetrical filter sizes')
     if shape[3] < shape[2]:
-        raise Exception('deconv2d_bilinear_upsampling_initializer behaviour is not defined for num_in_channels < num_out_channels ')
+        raise Exception(
+            'deconv2d_bilinear_upsampling_initializer behaviour is not defined for num_in_channels < num_out_channels '
+        )
 
     filter_size = shape[0]
     num_out_channels = shape[2]
@@ -1309,39 +1381,25 @@ class Conv1d(Layer):
     """
 
     @deprecated_alias(layer='prev_layer', end_support_version=1.9)  # TODO remove this line for the 1.9 release
-    def __init__(self,
-                 prev_layer,
-                 n_filter=32,
-                 filter_size=5,
-                 stride=1,
-                 dilation_rate=1,
-                 act=tf.identity,
-                 padding='SAME',
-                 data_format="channels_last",
-                 W_init=tf.truncated_normal_initializer(stddev=0.02),
-                 b_init=tf.constant_initializer(value=0.0),
-                 W_init_args=None,
-                 b_init_args=None,
-                 name='conv1d'):
+    def __init__(
+            self, prev_layer, n_filter=32, filter_size=5, stride=1, dilation_rate=1, act=tf.identity, padding='SAME',
+            data_format="channels_last", W_init=tf.truncated_normal_initializer(stddev=0.02),
+            b_init=tf.constant_initializer(value=0.0), W_init_args=None, b_init_args=None, name='conv1d'
+    ):
 
         super(Conv1d, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("Conv1d %s: n_filter:%d filter_size:%s stride:%d pad:%s act:%s dilation_rate:%d" % (name, n_filter, filter_size, stride, padding,
-                                                                                                         act.__name__, dilation_rate))
+        logging.info(
+            "Conv1d %s: n_filter:%d filter_size:%s stride:%d pad:%s act:%s dilation_rate:%d" %
+            (name, n_filter, filter_size, stride, padding, act.__name__, dilation_rate)
+        )
 
         self.inputs = prev_layer.outputs
         if tf.__version__ > '1.3':
             con1d = tf.layers.Conv1D(
-                filters=n_filter,
-                kernel_size=filter_size,
-                strides=stride,
-                padding=padding,
-                data_format=data_format,
-                dilation_rate=dilation_rate,
-                activation=act,
-                use_bias=(True if b_init else False),
-                kernel_initializer=W_init,
-                bias_initializer=b_init,
-                name=name)
+                filters=n_filter, kernel_size=filter_size, strides=stride, padding=padding, data_format=data_format,
+                dilation_rate=dilation_rate, activation=act, use_bias=(True if b_init else False),
+                kernel_initializer=W_init, bias_initializer=b_init, name=name
+            )
             # con1d.dtype = LayersConfig.tf_dtype   # unsupport, it will use the same dtype of inputs
             self.outputs = con1d(self.inputs)
             new_variables = con1d.weights  # new_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
@@ -1482,8 +1540,10 @@ def __init__(
             act = tf.identity
 
         if tf.__version__ > '1.5':
-            logging.info("Conv2d %s: n_filter:%d filter_size:%s strides:%s pad:%s act:%s" % (self.name, n_filter, str(filter_size), str(strides), padding,
-                                                                                             act.__name__))
+            logging.info(
+                "Conv2d %s: n_filter:%d filter_size:%s strides:%s pad:%s act:%s" %
+                (self.name, n_filter, str(filter_size), str(strides), padding, act.__name__)
+            )
             # with tf.variable_scope(name) as vs:
             conv2d = tf.layers.Conv2D(
                 # inputs=self.inputs,
@@ -1521,17 +1581,33 @@ def __init__(
             shape = (filter_size[0], filter_size[1], pre_channel, n_filter)  # 32 features for each 5x5 patch
             strides = (1, strides[0], strides[1], 1)
 
-            logging.info("Conv2d %s: shape:%s strides:%s pad:%s act:%s" % (self.name, str(shape), str(strides), padding, act.__name__))
+            logging.info(
+                "Conv2d %s: shape:%s strides:%s pad:%s act:%s" %
+                (self.name, str(shape), str(strides), padding, act.__name__)
+            )
 
             with tf.variable_scope(name):
-                W = tf.get_variable(name='W_conv2d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args)
+                W = tf.get_variable(
+                    name='W_conv2d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args
+                )
                 if b_init:
-                    b = tf.get_variable(name='b_conv2d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
+                    b = tf.get_variable(
+                        name='b_conv2d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype,
+                        **b_init_args
+                    )
                     self.outputs = act(
-                        tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) + b)
+                        tf.nn.conv2d(
+                            self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu,
+                            data_format=data_format
+                        ) + b
+                    )
                 else:
                     self.outputs = act(
-                        tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format))
+                        tf.nn.conv2d(
+                            self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu,
+                            data_format=data_format
+                        )
+                    )
 
             self.all_layers.append(self.outputs)
             if b_init:
@@ -1575,7 +1651,9 @@ class DeConv2d(Layer):
 
     """
 
-    @deprecated_alias(layer='prev_layer', n_out_channel='n_filter', end_support_version=1.9)  # TODO remove this line for the 1.9 release
+    @deprecated_alias(
+        layer='prev_layer', n_out_channel='n_filter', end_support_version=1.9
+    )  # TODO remove this line for the 1.9 release
     def __init__(
             self,
             prev_layer,
@@ -1590,9 +1668,13 @@ def __init__(
             b_init=tf.constant_initializer(value=0.0),
             W_init_args=None,  # remove
             b_init_args=None,  # remove
-            name='decnn2d'):
+            name='decnn2d'
+    ):
         super(DeConv2d, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("DeConv2d %s: n_filters:%s strides:%s pad:%s act:%s" % (name, str(n_filter), str(strides), padding, act.__name__))
+        logging.info(
+            "DeConv2d %s: n_filters:%s strides:%s pad:%s act:%s" %
+            (name, str(n_filter), str(strides), padding, act.__name__)
+        )
 
         if W_init_args is None:
             W_init_args = {}
@@ -1608,14 +1690,9 @@ def __init__(
             self.inputs = prev_layer.outputs
             # scope_name = tf.get_variable_scope().name
             conv2d_transpose = tf.layers.Conv2DTranspose(
-                filters=n_filter,
-                kernel_size=filter_size,
-                strides=strides,
-                padding=padding,
-                activation=act,
-                kernel_initializer=W_init,
-                bias_initializer=b_init,
-                name=name)
+                filters=n_filter, kernel_size=filter_size, strides=strides, padding=padding, activation=act,
+                kernel_initializer=W_init, bias_initializer=b_init, name=name
+            )
             self.outputs = conv2d_transpose(self.inputs)
             new_variables = conv2d_transpose.weights  # new_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
             self.all_layers.append(self.outputs)
@@ -1671,19 +1748,17 @@ class DeConv3d(Layer):
     """
 
     @deprecated_alias(layer='prev_layer', end_support_version=1.9)  # TODO remove this line for the 1.9 release
-    def __init__(self,
-                 prev_layer,
-                 n_filter=32,
-                 filter_size=(3, 3, 3),
-                 strides=(2, 2, 2),
-                 padding='SAME',
-                 act=tf.identity,
-                 W_init=tf.truncated_normal_initializer(stddev=0.02),
-                 b_init=tf.constant_initializer(value=0.0),
-                 name='decnn3d'):
+    def __init__(
+            self, prev_layer, n_filter=32, filter_size=(3, 3, 3), strides=(2, 2, 2), padding='SAME', act=tf.identity,
+            W_init=tf.truncated_normal_initializer(stddev=0.02), b_init=tf.constant_initializer(value=0.0),
+            name='decnn3d'
+    ):
 
         super(DeConv3d, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("DeConv3d %s: n_filters:%s strides:%s pad:%s act:%s" % (name, str(n_filter), str(strides), padding, act.__name__))
+        logging.info(
+            "DeConv3d %s: n_filters:%s strides:%s pad:%s act:%s" %
+            (name, str(n_filter), str(strides), padding, act.__name__)
+        )
 
         self.inputs = prev_layer.outputs
 
@@ -1780,7 +1855,10 @@ def __init__(
             name='depthwise_conv2d',
     ):
         super(DepthwiseConv2d, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("DepthwiseConv2d %s: shape:%s strides:%s pad:%s act:%s" % (name, str(shape), str(strides), padding, act.__name__))
+        logging.info(
+            "DepthwiseConv2d %s: shape:%s strides:%s pad:%s act:%s" %
+            (name, str(shape), str(strides), padding, act.__name__)
+        )
 
         self.inputs = prev_layer.outputs
 
@@ -1806,14 +1884,20 @@ def __init__(
 
         with tf.variable_scope(name):
             W = tf.get_variable(
-                name='W_depthwise2d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype,
-                **W_init_args)  # [filter_height, filter_width, in_channels, depth_multiplier]
+                name='W_depthwise2d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args
+            )  # [filter_height, filter_width, in_channels, depth_multiplier]
             if b_init:
                 b = tf.get_variable(
-                    name='b_depthwise2d', shape=(pre_channel * depth_multiplier), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
-                self.outputs = act(tf.nn.depthwise_conv2d(self.inputs, W, strides=strides, padding=padding, rate=dilation_rate) + b)
+                    name='b_depthwise2d', shape=(pre_channel * depth_multiplier), initializer=b_init,
+                    dtype=LayersConfig.tf_dtype, **b_init_args
+                )
+                self.outputs = act(
+                    tf.nn.depthwise_conv2d(self.inputs, W, strides=strides, padding=padding, rate=dilation_rate) + b
+                )
             else:
-                self.outputs = act(tf.nn.depthwise_conv2d(self.inputs, W, strides=strides, padding=padding, rate=dilation_rate))
+                self.outputs = act(
+                    tf.nn.depthwise_conv2d(self.inputs, W, strides=strides, padding=padding, rate=dilation_rate)
+                )
 
         # self.all_layers = list(layer.all_layers)
         # self.all_params = list(layer.all_params)
@@ -1895,9 +1979,10 @@ def __init__(
         #     b_init_args = {}
 
         super(SeparableConv2d, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("SeparableConv2d  %s: n_filter:%d filter_size:%s filter_size:%s depth_multiplier:%d act:%s" % (self.name, n_filter, str(filter_size),
-                                                                                                                    str(strides), depth_multiplier,
-                                                                                                                    act.__name__))
+        logging.info(
+            "SeparableConv2d  %s: n_filter:%d filter_size:%s filter_size:%s depth_multiplier:%d act:%s" %
+            (self.name, n_filter, str(filter_size), str(strides), depth_multiplier, act.__name__)
+        )
 
         self.inputs = prev_layer.outputs
 
@@ -1924,7 +2009,8 @@ def __init__(
                 # pointwise_constraint=None,
                 # bias_constraint=None,
                 trainable=True,
-                name=None)
+                name=None
+            )
             new_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
 
         self.all_layers.append(self.outputs)
@@ -1980,8 +2066,10 @@ def __init__(
     ):  # Windaway
 
         super(GroupConv2d, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("GroupConv2d %s: n_filter:%d size:%s strides:%s n_group:%d pad:%s act:%s" % (name, n_filter, str(filter_size), str(strides), n_group,
-                                                                                                  padding, act.__name__))
+        logging.info(
+            "GroupConv2d %s: n_filter:%d size:%s strides:%s n_group:%d pad:%s act:%s" %
+            (name, n_filter, str(filter_size), str(strides), n_group, padding, act.__name__)
+        )
 
         self.inputs = prev_layer.outputs
 
@@ -1995,14 +2083,14 @@ def __init__(
 
         with tf.variable_scope(name):
             We = tf.get_variable(
-                name='W',
-                shape=[filter_size[0], filter_size[1], channels / n_group, n_filter],
-                initializer=W_init,
-                dtype=LayersConfig.tf_dtype,
-                trainable=True,
-                **W_init_args)
+                name='W', shape=[filter_size[0], filter_size[1], channels / n_group, n_filter], initializer=W_init,
+                dtype=LayersConfig.tf_dtype, trainable=True, **W_init_args
+            )
             if b_init:
-                bi = tf.get_variable(name='b', shape=n_filter, initializer=b_init, dtype=LayersConfig.tf_dtype, trainable=True, **b_init_args)
+                bi = tf.get_variable(
+                    name='b', shape=n_filter, initializer=b_init, dtype=LayersConfig.tf_dtype, trainable=True,
+                    **b_init_args
+                )
             if n_group == 1:
                 conv = groupConv(self.inputs, We)
             else:
diff --git a/tensorlayer/layers/core.py b/tensorlayer/layers/core.py
index 73b7f1919..0bbb7188f 100644
--- a/tensorlayer/layers/core.py
+++ b/tensorlayer/layers/core.py
@@ -428,11 +428,16 @@ def print_params(self, details=True, session=None):
                 try:
                     # logging.info("  param {:3}: {:15} (mean: {:<18}, median: {:<18}, std: {:<18})   {}".format(i, str(p.eval().shape), p.eval().mean(), np.median(p.eval()), p.eval().std(), p.name))
                     val = p.eval(session=session)
-                    logging.info("  param {:3}: {:20} {:15}    {} (mean: {:<18}, median: {:<18}, std: {:<18})   ".format(
-                        i, p.name, str(val.shape), p.dtype.name, val.mean(), np.median(val), val.std()))
+                    logging.info(
+                        "  param {:3}: {:20} {:15}    {} (mean: {:<18}, median: {:<18}, std: {:<18})   ".format(
+                            i, p.name, str(val.shape), p.dtype.name, val.mean(), np.median(val), val.std()
+                        )
+                    )
                 except Exception as e:
                     logging.info(str(e))
-                    raise Exception("Hint: print params details after tl.layers.initialize_global_variables(sess) or use network.print_params(False).")
+                    raise Exception(
+                        "Hint: print params details after tl.layers.initialize_global_variables(sess) or use network.print_params(False)."
+                    )
             else:
                 logging.info("  param {:3}: {:20} {:15}    {}".format(i, p.name, str(p.get_shape()), p.dtype.name))
         logging.info("  num of params: %d" % self.count_params())
@@ -441,7 +446,9 @@ def print_layers(self):
         """Print all info of layers in the network"""
         for i, layer in enumerate(self.all_layers):
             # logging.info("  layer %d: %s" % (i, str(layer)))
-            logging.info("  layer {:3}: {:20} {:15}    {}".format(i, layer.name, str(layer.get_shape()), layer.dtype.name))
+            logging.info(
+                "  layer {:3}: {:20} {:15}    {}".format(i, layer.name, str(layer.get_shape()), layer.dtype.name)
+            )
 
     def count_params(self):
         """Return the number of parameters in the network"""
@@ -680,25 +687,29 @@ def __init__(
         # row vector with 'embedding_size' values.
         with tf.variable_scope(name):
             embeddings = tf.get_variable(
-                name='embeddings', shape=(vocabulary_size, embedding_size), initializer=E_init, dtype=LayersConfig.tf_dtype, **E_init_args)
+                name='embeddings', shape=(vocabulary_size, embedding_size), initializer=E_init,
+                dtype=LayersConfig.tf_dtype, **E_init_args
+            )
             embed = tf.nn.embedding_lookup(embeddings, self.inputs)
             # Construct the variables for the NCE loss (i.e. negative sampling)
             nce_weights = tf.get_variable(
-                name='nce_weights', shape=(vocabulary_size, embedding_size), initializer=nce_W_init, dtype=LayersConfig.tf_dtype, **nce_W_init_args)
-            nce_biases = tf.get_variable(name='nce_biases', shape=(vocabulary_size), initializer=nce_b_init, dtype=LayersConfig.tf_dtype, **nce_b_init_args)
+                name='nce_weights', shape=(vocabulary_size, embedding_size), initializer=nce_W_init,
+                dtype=LayersConfig.tf_dtype, **nce_W_init_args
+            )
+            nce_biases = tf.get_variable(
+                name='nce_biases', shape=(vocabulary_size), initializer=nce_b_init, dtype=LayersConfig.tf_dtype,
+                **nce_b_init_args
+            )
 
             # Compute the average NCE loss for the batch.
             # tf.nce_loss automatically draws a new sample of the negative labels
             # each time we evaluate the loss.
             self.nce_cost = tf.reduce_mean(
                 tf.nn.nce_loss(
-                    weights=nce_weights,
-                    biases=nce_biases,
-                    inputs=embed,
-                    labels=train_labels,
-                    num_sampled=num_sampled,
-                    num_classes=vocabulary_size,
-                    **nce_loss_args))
+                    weights=nce_weights, biases=nce_biases, inputs=embed, labels=train_labels, num_sampled=num_sampled,
+                    num_classes=vocabulary_size, **nce_loss_args
+                )
+            )
 
             self.outputs = embed
             self.normalized_embeddings = tf.nn.l2_normalize(embeddings, 1)
@@ -765,7 +776,9 @@ def __init__(
 
         with tf.variable_scope(name):
             embeddings = tf.get_variable(
-                name='embeddings', shape=(vocabulary_size, embedding_size), initializer=E_init, dtype=LayersConfig.tf_dtype, **E_init_args)
+                name='embeddings', shape=(vocabulary_size, embedding_size), initializer=E_init,
+                dtype=LayersConfig.tf_dtype, **E_init_args
+            )
             embed = tf.nn.embedding_lookup(embeddings, self.inputs)
 
         self.outputs = embed
@@ -836,9 +849,7 @@ def __init__(
 
         with tf.variable_scope(name):
             self.embeddings = tf.get_variable(
-                name='embeddings',
-                shape=(vocabulary_size, embedding_size),
-                initializer=embeddings_initializer,
+                name='embeddings', shape=(vocabulary_size, embedding_size), initializer=embeddings_initializer,
                 dtype=LayersConfig.tf_dtype,
                 **(embeddings_kwargs or {})
                 # **embeddings_kwargs
@@ -871,7 +882,8 @@ def __init__(
             sentence_embeddings = tf.divide(
                 sum_word_embeddings,
                 sentence_lengths + 1e-8,  # Add epsilon to avoid dividing by 0
-                name='sentence_embeddings')
+                name='sentence_embeddings'
+            )
 
         self.outputs = sentence_embeddings
         self.all_layers = [self.outputs]
@@ -951,10 +963,14 @@ def __init__(
         n_in = int(self.inputs.get_shape()[-1])
 
         with tf.variable_scope(name):
-            W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args)
+            W = tf.get_variable(
+                name='W', shape=(n_in, n_units), initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args
+            )
             if b_init is not None:
                 try:
-                    b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
+                    b = tf.get_variable(
+                        name='b', shape=(n_units), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args
+                    )
                 except Exception:  # If initializer is a constant, do not specify shape.
                     b = tf.get_variable(name='b', initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
                 self.outputs = act(tf.matmul(self.inputs, W) + b)
@@ -1064,7 +1080,9 @@ def __init__(
 
         # L1 of activation outputs
         activation_out = self.all_layers[-2]
-        L1_a = 0.001 * tf.reduce_mean(activation_out)  # <haodong>:  theano: T.mean( self.a[i] )         # some neuron are broken, white and black
+        L1_a = 0.001 * tf.reduce_mean(
+            activation_out
+        )  # <haodong>:  theano: T.mean( self.a[i] )         # some neuron are broken, white and black
         # L1_a = 0.001 * tf.reduce_mean( tf.reduce_sum(activation_out, 0) )         # <haodong>: some neuron are broken, white and black
         # L1_a = 0.001 * 100 * tf.reduce_mean( tf.reduce_sum(activation_out, 1) )   # <haodong>: some neuron are broken, white and black
         # KL Divergence
@@ -1072,9 +1090,13 @@ def __init__(
         rho = 0.15
         p_hat = tf.reduce_mean(activation_out, 0)  # theano: p_hat = T.mean( self.a[i], axis=0 )
         try:  # TF1.0
-            KLD = beta * tf.reduce_sum(rho * tf.log(tf.divide(rho, p_hat)) + (1 - rho) * tf.log((1 - rho) / (tf.subtract(float(1), p_hat))))
+            KLD = beta * tf.reduce_sum(
+                rho * tf.log(tf.divide(rho, p_hat)) + (1 - rho) * tf.log((1 - rho) / (tf.subtract(float(1), p_hat)))
+            )
         except Exception:  # TF0.12
-            KLD = beta * tf.reduce_sum(rho * tf.log(tf.div(rho, p_hat)) + (1 - rho) * tf.log((1 - rho) / (tf.sub(float(1), p_hat))))
+            KLD = beta * tf.reduce_sum(
+                rho * tf.log(tf.div(rho, p_hat)) + (1 - rho) * tf.log((1 - rho) / (tf.sub(float(1), p_hat)))
+            )
             # KLD = beta * tf.reduce_sum( rho * tf.log(rho/ p_hat) + (1- rho) * tf.log((1- rho)/(1- p_hat)) )
             # theano: L1_a = l1_a[i] * T.sum( rho[i] * T.log(rho[i]/ p_hat) + (1- rho[i]) * T.log((1- rho[i])/(1- p_hat)) )
         # Total cost
@@ -1102,12 +1124,14 @@ def __init__(
         else:
             raise Exception("Don't support the given reconstruct activation function")
 
-        self.train_op = tf.train.AdamOptimizer(
-            learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False).minimize(
-                self.cost, var_list=self.train_params)
+        self.train_op = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08,
+                                               use_locking=False).minimize(self.cost, var_list=self.train_params)
         # self.train_op = tf.train.GradientDescentOptimizer(1.0).minimize(self.cost, var_list=self.train_params)
 
-    def pretrain(self, sess, x, X_train, X_val, denoise_name=None, n_epoch=100, batch_size=128, print_freq=10, save=True, save_name='w1pre_'):
+    def pretrain(
+            self, sess, x, X_train, X_val, denoise_name=None, n_epoch=100, batch_size=128, print_freq=10, save=True,
+            save_name='w1pre_'
+    ):
         # ====================================================
         #
         # You need to modify the cost function in __init__() so as to
@@ -1155,11 +1179,14 @@ def pretrain(self, sess, x, X_train, X_val, denoise_name=None, n_epoch=100, batc
                 if save:
                     try:
                         visualize.draw_weights(
-                            self.train_params[0].eval(), second=10, saveable=True, shape=[28, 28], name=save_name + str(epoch + 1), fig_idx=2012)
+                            self.train_params[0].eval(), second=10, saveable=True, shape=[28, 28],
+                            name=save_name + str(epoch + 1), fig_idx=2012
+                        )
                         files.save_npz([self.all_params[0]], name=save_name + str(epoch + 1) + '.npz')
                     except Exception:
                         raise Exception(
-                            "You should change the visualize.W() in ReconLayer.pretrain(), if you want to save the feature images for different dataset")
+                            "You should change the visualize.W() in ReconLayer.pretrain(), if you want to save the feature images for different dataset"
+                        )
 
 
 class DropoutLayer(Layer):
@@ -1409,8 +1436,12 @@ def __init__(
         self.n_units = n_units
 
         with tf.variable_scope(name):
-            W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args)
-            b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args)
+            W = tf.get_variable(
+                name='W', shape=(n_in, n_units), initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args
+            )
+            b = tf.get_variable(
+                name='b', shape=(n_units), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args
+            )
             # self.outputs = act(tf.matmul(self.inputs, W) + b)
 
             LayersConfig.set_keep[name] = tf.placeholder(tf.float32)
diff --git a/tensorlayer/layers/importer.py b/tensorlayer/layers/importer.py
index 3951b01ed..632b59d99 100644
--- a/tensorlayer/layers/importer.py
+++ b/tensorlayer/layers/importer.py
@@ -138,7 +138,8 @@ def __init__(
         if slim_variables == []:
             logging.info(
                 "No variables found under %s : the name of SlimNetsLayer should be matched with the begining of the ckpt file, see tutorial_inceptionV3_tfslim.py for more details"
-                % name)
+                % name
+            )
 
         self.outputs = net
 
diff --git a/tensorlayer/layers/merge.py b/tensorlayer/layers/merge.py
index 66fe05f98..9837d4b0f 100644
--- a/tensorlayer/layers/merge.py
+++ b/tensorlayer/layers/merge.py
@@ -124,7 +124,9 @@ def __init__(
     ):
 
         super(ElementwiseLayer, self).__init__(prev_layer=layers, name=name)
-        logging.info("ElementwiseLayer %s: size:%s fn:%s" % (self.name, layers[0].outputs.get_shape(), combine_fn.__name__))
+        logging.info(
+            "ElementwiseLayer %s: size:%s fn:%s" % (self.name, layers[0].outputs.get_shape(), combine_fn.__name__)
+        )
 
         self.outputs = layers[0].outputs
 
diff --git a/tensorlayer/layers/normalization.py b/tensorlayer/layers/normalization.py
index febc70270..da2e8be3f 100644
--- a/tensorlayer/layers/normalization.py
+++ b/tensorlayer/layers/normalization.py
@@ -49,7 +49,10 @@ def __init__(
             name='lrn_layer',
     ):
         super(LocalResponseNormLayer, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("LocalResponseNormLayer %s: depth_radius: %s, bias: %s, alpha: %s, beta: %s" % (name, str(depth_radius), str(bias), str(alpha), str(beta)))
+        logging.info(
+            "LocalResponseNormLayer %s: depth_radius: %s, bias: %s, alpha: %s, beta: %s" %
+            (name, str(depth_radius), str(bias), str(alpha), str(beta))
+        )
 
         self.inputs = prev_layer.outputs
 
@@ -110,7 +113,10 @@ def __init__(
             name='batchnorm_layer',
     ):
         super(BatchNormLayer, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("BatchNormLayer %s: decay:%f epsilon:%f act:%s is_train:%s" % (name, decay, epsilon, act.__name__, is_train))
+        logging.info(
+            "BatchNormLayer %s: decay:%f epsilon:%f act:%s is_train:%s" %
+            (name, decay, epsilon, act.__name__, is_train)
+        )
 
         self.inputs = prev_layer.outputs
 
@@ -126,7 +132,9 @@ def __init__(
             if beta_init:
                 if tf.__version__ > '0.12.1' and beta_init == tf.zeros_initializer:
                     beta_init = beta_init()
-                beta = tf.get_variable('beta', shape=params_shape, initializer=beta_init, dtype=LayersConfig.tf_dtype, trainable=is_train)
+                beta = tf.get_variable(
+                    'beta', shape=params_shape, initializer=beta_init, dtype=LayersConfig.tf_dtype, trainable=is_train
+                )
                 variables.append(beta)
             else:
                 beta = None
@@ -148,7 +156,9 @@ def __init__(
                 moving_mean_init = tf.zeros_initializer()
             else:
                 moving_mean_init = tf.zeros_initializer
-            moving_mean = tf.get_variable('moving_mean', params_shape, initializer=moving_mean_init, dtype=LayersConfig.tf_dtype, trainable=False)
+            moving_mean = tf.get_variable(
+                'moving_mean', params_shape, initializer=moving_mean_init, dtype=LayersConfig.tf_dtype, trainable=False
+            )
             moving_variance = tf.get_variable(
                 'moving_variance',
                 params_shape,
@@ -161,9 +171,12 @@ def __init__(
             # These ops will only be preformed when training.
             mean, variance = tf.nn.moments(self.inputs, axis)
             try:  # TF12
-                update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, decay, zero_debias=False)  # if zero_debias=True, has bias
+                update_moving_mean = moving_averages.assign_moving_average(
+                    moving_mean, mean, decay, zero_debias=False
+                )  # if zero_debias=True, has bias
                 update_moving_variance = moving_averages.assign_moving_average(
-                    moving_variance, variance, decay, zero_debias=False)  # if zero_debias=True, has bias
+                    moving_variance, variance, decay, zero_debias=False
+                )  # if zero_debias=True, has bias
                 # logging.info("TF12 moving")
             except Exception:  # TF11
                 update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, decay)
@@ -178,7 +191,9 @@ def mean_var_with_update():
                 mean, var = mean_var_with_update()
                 self.outputs = act(tf.nn.batch_normalization(self.inputs, mean, var, beta, gamma, epsilon))
             else:
-                self.outputs = act(tf.nn.batch_normalization(self.inputs, moving_mean, moving_variance, beta, gamma, epsilon))
+                self.outputs = act(
+                    tf.nn.batch_normalization(self.inputs, moving_mean, moving_variance, beta, gamma, epsilon)
+                )
 
             variables.extend([moving_mean, moving_variance])
 
@@ -226,8 +241,13 @@ def __init__(
         with tf.variable_scope(name) as vs:
             mean, var = tf.nn.moments(self.inputs, [1, 2], keep_dims=True)
             scale = tf.get_variable(
-                'scale', [self.inputs.get_shape()[-1]], initializer=tf.truncated_normal_initializer(mean=1.0, stddev=0.02), dtype=LayersConfig.tf_dtype)
-            offset = tf.get_variable('offset', [self.inputs.get_shape()[-1]], initializer=tf.constant_initializer(0.0), dtype=LayersConfig.tf_dtype)
+                'scale', [self.inputs.get_shape()[-1]],
+                initializer=tf.truncated_normal_initializer(mean=1.0, stddev=0.02), dtype=LayersConfig.tf_dtype
+            )
+            offset = tf.get_variable(
+                'offset', [self.inputs.get_shape()[-1]], initializer=tf.constant_initializer(0.0),
+                dtype=LayersConfig.tf_dtype
+            )
             self.outputs = scale * tf.div(self.inputs - mean, tf.sqrt(var + epsilon)) + offset
             self.outputs = act(self.outputs)
             variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
@@ -255,18 +275,10 @@ class LayerNormLayer(Layer):
     """
 
     @deprecated_alias(layer='prev_layer', end_support_version=1.9)  # TODO remove this line for the 1.9 release
-    def __init__(self,
-                 prev_layer,
-                 center=True,
-                 scale=True,
-                 act=tf.identity,
-                 reuse=None,
-                 variables_collections=None,
-                 outputs_collections=None,
-                 trainable=True,
-                 begin_norm_axis=1,
-                 begin_params_axis=-1,
-                 name='layernorm'):
+    def __init__(
+            self, prev_layer, center=True, scale=True, act=tf.identity, reuse=None, variables_collections=None,
+            outputs_collections=None, trainable=True, begin_norm_axis=1, begin_params_axis=-1, name='layernorm'
+    ):
 
         super(LayerNormLayer, self).__init__(prev_layer=prev_layer, name=name)
         logging.info("LayerNormLayer %s: act:%s" % (name, act.__name__))
diff --git a/tensorlayer/layers/padding.py b/tensorlayer/layers/padding.py
index 716f9da10..64314c589 100644
--- a/tensorlayer/layers/padding.py
+++ b/tensorlayer/layers/padding.py
@@ -51,7 +51,9 @@ def __init__(
         self.inputs = prev_layer.outputs
 
         if padding is None:
-            raise Exception("padding should be a Tensor of type int32. see https://www.tensorflow.org/api_docs/python/tf/pad")
+            raise Exception(
+                "padding should be a Tensor of type int32. see https://www.tensorflow.org/api_docs/python/tf/pad"
+            )
 
         self.outputs = tf.pad(self.inputs, paddings=padding, mode=mode, name=name)
         self.all_layers.append(self.outputs)
diff --git a/tensorlayer/layers/pooling.py b/tensorlayer/layers/pooling.py
index 6ad2eb842..c3475a0ae 100644
--- a/tensorlayer/layers/pooling.py
+++ b/tensorlayer/layers/pooling.py
@@ -67,7 +67,10 @@ def __init__(
             name='pool_layer',
     ):
         super(PoolLayer, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("PoolLayer   %s: ksize:%s strides:%s padding:%s pool:%s" % (name, str(ksize), str(strides), padding, pool.__name__))
+        logging.info(
+            "PoolLayer   %s: ksize:%s strides:%s padding:%s pool:%s" %
+            (name, str(ksize), str(strides), padding, pool.__name__)
+        )
 
         self.inputs = prev_layer.outputs
         # operation (customized)
@@ -100,12 +103,18 @@ class MaxPool1d(Layer):
     """
 
     @deprecated_alias(net='prev_layer', end_support_version=1.9)  # TODO remove this line for the 1.9 release
-    def __init__(self, prev_layer, filter_size=3, strides=2, padding='valid', data_format='channels_last', name='maxpool1d'):
+    def __init__(
+            self, prev_layer, filter_size=3, strides=2, padding='valid', data_format='channels_last', name='maxpool1d'
+    ):
         super(MaxPool1d, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("MaxPool1d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding)))
+        logging.info(
+            "MaxPool1d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding))
+        )
         self.inputs = prev_layer.outputs
         # operation (customized)
-        self.outputs = tf.layers.max_pooling1d(self.inputs, filter_size, strides, padding=padding, data_format=data_format, name=name)
+        self.outputs = tf.layers.max_pooling1d(
+            self.inputs, filter_size, strides, padding=padding, data_format=data_format, name=name
+        )
         # update layer (customized)
         self.all_layers.append(self.outputs)
 
@@ -140,12 +149,19 @@ class MeanPool1d(Layer):
     # net_new.all_layers.extend([outputs])
     # return net_new
     @deprecated_alias(net='prev_layer', end_support_version=1.9)  # TODO remove this line for the 1.9 release
-    def __init__(self, prev_layer, filter_size=3, strides=2, padding='valid', data_format='channels_last', name='meanpool1d'):
+    def __init__(
+            self, prev_layer, filter_size=3, strides=2, padding='valid', data_format='channels_last', name='meanpool1d'
+    ):
         super(MeanPool1d, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("MeanPool1d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding)))
+        logging.info(
+            "MeanPool1d %s: filter_size:%s strides:%s padding:%s" %
+            (name, str(filter_size), str(strides), str(padding))
+        )
 
         # operation (customized)
-        self.outputs = tf.layers.average_pooling1d(prev_layer.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name)
+        self.outputs = tf.layers.average_pooling1d(
+            prev_layer.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name
+        )
 
         # update layer (customized)
         self.all_layers.append(self.outputs)
@@ -175,11 +191,15 @@ def __init__(self, prev_layer, filter_size=(3, 3), strides=(2, 2), padding='SAME
             strides = filter_size
 
         super(MaxPool2d, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("MaxPool2d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding)))
+        logging.info(
+            "MaxPool2d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding))
+        )
         self.inputs = prev_layer.outputs
         # operation (customized)
         if tf.__version__ > '1.5':
-            self.outputs = tf.layers.max_pooling2d(self.inputs, filter_size, strides, padding=padding, data_format='channels_last', name=name)
+            self.outputs = tf.layers.max_pooling2d(
+                self.inputs, filter_size, strides, padding=padding, data_format='channels_last', name=name
+            )
         else:
             if len(strides) == 2:
                 raise Exception("len(strides) should be 2.")
@@ -214,11 +234,16 @@ def __init__(self, prev_layer, filter_size=(3, 3), strides=(2, 2), padding='SAME
             strides = filter_size
 
         super(MeanPool2d, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("MeanPool2d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding)))
+        logging.info(
+            "MeanPool2d %s: filter_size:%s strides:%s padding:%s" %
+            (name, str(filter_size), str(strides), str(padding))
+        )
         self.inputs = prev_layer.outputs
         # operation (customized)
         if tf.__version__ > '1.5':
-            self.outputs = tf.layers.average_pooling2d(self.inputs, filter_size, strides, padding=padding, data_format='channels_last', name=name)
+            self.outputs = tf.layers.average_pooling2d(
+                self.inputs, filter_size, strides, padding=padding, data_format='channels_last', name=name
+            )
         else:
             if len(strides) == 2:
                 raise Exception("len(strides) should be 2.")
@@ -259,12 +284,20 @@ class MaxPool3d(Layer):
     """
 
     @deprecated_alias(layer='prev_layer', end_support_version=1.9)  # TODO remove this line for the 1.9 release
-    def __init__(self, prev_layer, filter_size=(3, 3, 3), strides=(2, 2, 2), padding='valid', data_format='channels_last', name='maxpool3d'):
+    def __init__(
+            self, prev_layer, filter_size=(3, 3, 3), strides=(2, 2, 2), padding='valid', data_format='channels_last',
+            name='maxpool3d'
+    ):
         super(MaxPool3d, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("MaxPool3d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding)))
+        logging.info(
+            "MaxPool3d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding))
+        )
         # operation (customized)
         self.inputs = prev_layer.outputs
-        self.outputs = tf.layers.max_pooling3d(self.inputs, filter_size, strides, padding=padding, data_format=data_format, name=name)
+        self.outputs = tf.layers.max_pooling3d(
+            self.inputs, filter_size, strides, padding=padding, data_format=data_format, name=name
+        )
+
         # update layer (customized)
         self.all_layers.append(self.outputs)
 
@@ -299,15 +332,23 @@ class MeanPool3d(Layer):
     """
 
     @deprecated_alias(layer='prev_layer', end_support_version=1.9)  # TODO remove this line for the 1.9 release
-    def __init__(self, prev_layer, filter_size=(3, 3, 3), strides=(2, 2, 2), padding='valid', data_format='channels_last', name='meanpool3d'):
+    def __init__(
+            self, prev_layer, filter_size=(3, 3, 3), strides=(2, 2, 2), padding='valid', data_format='channels_last',
+            name='meanpool3d'
+    ):
 
         super(MeanPool3d, self).__init__(prev_layer=prev_layer, name=name)
-        logging.info("MeanPool3d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding)))
+        logging.info(
+            "MeanPool3d %s: filter_size:%s strides:%s padding:%s" %
+            (name, str(filter_size), str(strides), str(padding))
+        )
 
         self.inputs = prev_layer.outputs
 
         # operation (customized)
-        self.outputs = tf.layers.average_pooling3d(prev_layer.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name)
+        self.outputs = tf.layers.average_pooling3d(
+            prev_layer.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name
+        )
 
         # update layer (customized)
         self.all_layers.append(self.outputs)
diff --git a/tensorlayer/layers/recurrent.py b/tensorlayer/layers/recurrent.py
index 09affd4c7..fb32005d9 100644
--- a/tensorlayer/layers/recurrent.py
+++ b/tensorlayer/layers/recurrent.py
@@ -160,8 +160,10 @@ def __init__(
             except Exception:
                 logging.warning('pop state_is_tuple fails.')
 
-        logging.info("RNNLayer %s: n_hidden:%d n_steps:%d in_dim:%d in_shape:%s cell_fn:%s " % (self.name, n_hidden, n_steps, self.inputs.get_shape().ndims,
-                                                                                                self.inputs.get_shape(), cell_fn.__name__))
+        logging.info(
+            "RNNLayer %s: n_hidden:%d n_steps:%d in_dim:%d in_shape:%s cell_fn:%s " %
+            (self.name, n_hidden, n_steps, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__)
+        )
         # You can get the dimension by .get_shape() or ._shape, and check the
         # dimension by .with_rank() as follow.
         # self.inputs.get_shape().with_rank(2)
@@ -351,10 +353,12 @@ def __init__(
             except Exception:
                 logging.warning("pop state_is_tuple fails.")
 
-        logging.info("BiRNNLayer %s: n_hidden:%d n_steps:%d in_dim:%d in_shape:%s cell_fn:%s dropout:%s n_layer:%d " % (self.name, n_hidden, n_steps,
-                                                                                                                        self.inputs.get_shape().ndims,
-                                                                                                                        self.inputs.get_shape(),
-                                                                                                                        cell_fn.__name__, dropout, n_layer))
+        logging.info(
+            "BiRNNLayer %s: n_hidden:%d n_steps:%d in_dim:%d in_shape:%s cell_fn:%s dropout:%s n_layer:%d " % (
+                self.name, n_hidden, n_steps, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__,
+                dropout, n_layer
+            )
+        )
 
         fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0]
 
@@ -404,8 +408,12 @@ def __init__(
                     MultiRNNCell_fn = tf.nn.rnn_cell.MultiRNNCell
                 if dropout:
                     try:
-                        self.fw_cell = MultiRNNCell_fn([cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)], state_is_tuple=True)
-                        self.bw_cell = MultiRNNCell_fn([cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)], state_is_tuple=True)
+                        self.fw_cell = MultiRNNCell_fn(
+                            [cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)], state_is_tuple=True
+                        )
+                        self.bw_cell = MultiRNNCell_fn(
+                            [cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)], state_is_tuple=True
+                        )
                     except Exception:
                         self.fw_cell = MultiRNNCell_fn([cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)])
                         self.bw_cell = MultiRNNCell_fn([cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)])
@@ -419,11 +427,15 @@ def __init__(
 
             # Initial state of RNN
             if fw_initial_state is None:
-                self.fw_initial_state = self.fw_cell.zero_state(self.batch_size, dtype=LayersConfig.tf_dtype)  # dtype=tf.float32)
+                self.fw_initial_state = self.fw_cell.zero_state(
+                    self.batch_size, dtype=LayersConfig.tf_dtype
+                )  # dtype=tf.float32)
             else:
                 self.fw_initial_state = fw_initial_state
             if bw_initial_state is None:
-                self.bw_initial_state = self.bw_cell.zero_state(self.batch_size, dtype=LayersConfig.tf_dtype)  # dtype=tf.float32)
+                self.bw_initial_state = self.bw_cell.zero_state(
+                    self.batch_size, dtype=LayersConfig.tf_dtype
+                )  # dtype=tf.float32)
             else:
                 self.bw_initial_state = bw_initial_state
             # exit()
@@ -534,7 +546,10 @@ class BasicConvLSTMCell(ConvRNNCell):
 
     """
 
-    def __init__(self, shape, filter_size, num_features, forget_bias=1.0, input_size=None, state_is_tuple=False, act=tf.nn.tanh):
+    def __init__(
+            self, shape, filter_size, num_features, forget_bias=1.0, input_size=None, state_is_tuple=False,
+            act=tf.nn.tanh
+    ):
         """Initialize the basic Conv LSTM cell."""
         # if not state_is_tuple:
         # logging.warn("%s: Using a concatenated state is slower and will soon be "
@@ -624,14 +639,18 @@ def _conv_linear(args, filter_size, num_features, bias, bias_start=0.0, scope=No
 
     # Now the computation.
     with tf.variable_scope(scope or "Conv"):
-        matrix = tf.get_variable("Matrix", [filter_size[0], filter_size[1], total_arg_size_depth, num_features], dtype=dtype)
+        matrix = tf.get_variable(
+            "Matrix", [filter_size[0], filter_size[1], total_arg_size_depth, num_features], dtype=dtype
+        )
         if len(args) == 1:
             res = tf.nn.conv2d(args[0], matrix, strides=[1, 1, 1, 1], padding='SAME')
         else:
             res = tf.nn.conv2d(tf.concat(args, 3), matrix, strides=[1, 1, 1, 1], padding='SAME')
         if not bias:
             return res
-        bias_term = tf.get_variable("Bias", [num_features], dtype=dtype, initializer=tf.constant_initializer(bias_start, dtype=dtype))
+        bias_term = tf.get_variable(
+            "Bias", [num_features], dtype=dtype, initializer=tf.constant_initializer(bias_start, dtype=dtype)
+        )
     return res + bias_term
 
 
@@ -710,8 +729,11 @@ def __init__(
 
         self.inputs = prev_layer.outputs
 
-        logging.info("ConvLSTMLayer %s: feature_map:%d, n_steps:%d, "
-                     "in_dim:%d %s, cell_fn:%s " % (self.name, feature_map, n_steps, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__))
+        logging.info(
+            "ConvLSTMLayer %s: feature_map:%d, n_steps:%d, "
+            "in_dim:%d %s, cell_fn:%s " %
+            (self.name, feature_map, n_steps, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__)
+        )
         # You can get the dimension by .get_shape() or ._shape, and check the
         # dimension by .with_rank() as follow.
         # self.inputs.get_shape().with_rank(2)
@@ -721,7 +743,10 @@ def __init__(
         try:
             self.inputs.get_shape().with_rank(5)
         except Exception:
-            raise Exception("RNN : Input dimension should be rank 5 : [batch_size, n_steps, input_x, " "input_y, feature_map]")
+            raise Exception(
+                "RNN : Input dimension should be rank 5 : [batch_size, n_steps, input_x, "
+                "input_y, feature_map]"
+            )
 
         fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0]
 
@@ -762,7 +787,9 @@ def __init__(
                 else:
                     # <akara>: stack more RNN layer after that
                     # 5D Tensor [n_example/n_steps, n_steps, h, w, c]
-                    self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_steps, cell_shape[0], cell_shape[1], feature_map])
+                    self.outputs = tf.reshape(
+                        tf.concat(outputs, 1), [-1, n_steps, cell_shape[0], cell_shape[1], feature_map]
+                    )
 
         self.final_state = state
 
@@ -905,7 +932,9 @@ def retrieve_seq_length_op3(data, pad_val=0):  # HangSheng: return tensor for se
     elif data_shape_size == 1:
         raise ValueError("retrieve_seq_length_op3: data has wrong shape!")
     else:
-        raise ValueError("retrieve_seq_length_op3: handling data_shape_size %s hasn't been implemented!" % (data_shape_size))
+        raise ValueError(
+            "retrieve_seq_length_op3: handling data_shape_size %s hasn't been implemented!" % (data_shape_size)
+        )
 
 
 def target_mask_op(data, pad_val=0):  # HangSheng: return tensor for mask,if input is tf.string
@@ -1058,8 +1087,12 @@ def __init__(
             except Exception:
                 logging.warning("pop state_is_tuple fails.")
 
-        logging.info("DynamicRNNLayer %s: n_hidden:%d, in_dim:%d in_shape:%s cell_fn:%s dropout:%s n_layer:%d" %
-                     (self.name, n_hidden, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__, dropout, n_layer))
+        logging.info(
+            "DynamicRNNLayer %s: n_hidden:%d, in_dim:%d in_shape:%s cell_fn:%s dropout:%s n_layer:%d" % (
+                self.name, n_hidden, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__, dropout,
+                n_layer
+            )
+        )
 
         # Input dimension should be rank 3 [batch_size, n_steps(max), n_features]
         try:
@@ -1119,7 +1152,9 @@ def __init__(
             if dropout:
                 try:
                     # cell_instance_fn=lambda: MultiRNNCell_fn([cell_instance_fn2() for _ in range(n_layer)], state_is_tuple=True) # HanSheng
-                    self.cell = MultiRNNCell_fn([cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)], state_is_tuple=True)
+                    self.cell = MultiRNNCell_fn(
+                        [cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)], state_is_tuple=True
+                    )
                 except Exception:  # when GRU
                     # cell_instance_fn=lambda: MultiRNNCell_fn([cell_instance_fn2() for _ in range(n_layer)]) # HanSheng
                     self.cell = MultiRNNCell_fn([cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)])
@@ -1140,9 +1175,13 @@ def __init__(
         # Computes sequence_length
         if sequence_length is None:
             try:  # TF1.0
-                sequence_length = retrieve_seq_length_op(self.inputs if isinstance(self.inputs, tf.Tensor) else tf.stack(self.inputs))
+                sequence_length = retrieve_seq_length_op(
+                    self.inputs if isinstance(self.inputs, tf.Tensor) else tf.stack(self.inputs)
+                )
             except Exception:  # TF0.12
-                sequence_length = retrieve_seq_length_op(self.inputs if isinstance(self.inputs, tf.Tensor) else tf.pack(self.inputs))
+                sequence_length = retrieve_seq_length_op(
+                    self.inputs if isinstance(self.inputs, tf.Tensor) else tf.pack(self.inputs)
+                )
 
         # Main - Computes outputs and last_states
         with tf.variable_scope(name, initializer=initializer) as vs:
@@ -1153,7 +1192,8 @@ def __init__(
                 # dtype=tf.float64,
                 sequence_length=sequence_length,
                 initial_state=self.initial_state,
-                **dynamic_rnn_init_args)
+                **dynamic_rnn_init_args
+            )
             rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
 
             # logging.info("     n_params : %d" % (len(rnn_variables)))
@@ -1312,8 +1352,12 @@ def __init__(
             except Exception:
                 logging.warning("pop state_is_tuple fails.")
 
-        logging.info("BiDynamicRNNLayer %s: n_hidden:%d in_dim:%d in_shape:%s cell_fn:%s dropout:%s n_layer:%d" %
-                     (self.name, n_hidden, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__, dropout, n_layer))
+        logging.info(
+            "BiDynamicRNNLayer %s: n_hidden:%d in_dim:%d in_shape:%s cell_fn:%s dropout:%s n_layer:%d" % (
+                self.name, n_hidden, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__, dropout,
+                n_layer
+            )
+        )
 
         # Input dimension should be rank 3 [batch_size, n_steps(max), n_features]
         try:
@@ -1376,9 +1420,13 @@ def __init__(
             # Computes sequence_length
             if sequence_length is None:
                 try:  # TF1.0
-                    sequence_length = retrieve_seq_length_op(self.inputs if isinstance(self.inputs, tf.Tensor) else tf.stack(self.inputs))
+                    sequence_length = retrieve_seq_length_op(
+                        self.inputs if isinstance(self.inputs, tf.Tensor) else tf.stack(self.inputs)
+                    )
                 except Exception:  # TF0.12
-                    sequence_length = retrieve_seq_length_op(self.inputs if isinstance(self.inputs, tf.Tensor) else tf.pack(self.inputs))
+                    sequence_length = retrieve_seq_length_op(
+                        self.inputs if isinstance(self.inputs, tf.Tensor) else tf.pack(self.inputs)
+                    )
 
             if n_layer > 1:
                 if dropout:
@@ -1389,27 +1437,19 @@ def __init__(
                     self.bw_cell = [cell_creator() for _ in range(n_layer)]
                 from tensorflow.contrib.rnn import stack_bidirectional_dynamic_rnn
                 outputs, states_fw, states_bw = stack_bidirectional_dynamic_rnn(
-                    cells_fw=self.fw_cell,
-                    cells_bw=self.bw_cell,
-                    inputs=self.inputs,
-                    sequence_length=sequence_length,
-                    initial_states_fw=self.fw_initial_state,
-                    initial_states_bw=self.bw_initial_state,
-                    dtype=LayersConfig.tf_dtype,
-                    **dynamic_rnn_init_args)
+                    cells_fw=self.fw_cell, cells_bw=self.bw_cell, inputs=self.inputs, sequence_length=sequence_length,
+                    initial_states_fw=self.fw_initial_state, initial_states_bw=self.bw_initial_state,
+                    dtype=LayersConfig.tf_dtype, **dynamic_rnn_init_args
+                )
 
             else:
                 self.fw_cell = cell_creator()
                 self.bw_cell = cell_creator()
                 outputs, (states_fw, states_bw) = tf.nn.bidirectional_dynamic_rnn(
-                    cell_fw=self.fw_cell,
-                    cell_bw=self.bw_cell,
-                    inputs=self.inputs,
-                    sequence_length=sequence_length,
-                    initial_state_fw=self.fw_initial_state,
-                    initial_state_bw=self.bw_initial_state,
-                    dtype=LayersConfig.tf_dtype,
-                    **dynamic_rnn_init_args)
+                    cell_fw=self.fw_cell, cell_bw=self.bw_cell, inputs=self.inputs, sequence_length=sequence_length,
+                    initial_state_fw=self.fw_initial_state, initial_state_bw=self.bw_initial_state,
+                    dtype=LayersConfig.tf_dtype, **dynamic_rnn_init_args
+                )
 
             rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
 
@@ -1597,39 +1637,28 @@ def __init__(
             except Exception:
                 logging.warning("pop state_is_tuple fails.")
         # self.inputs = layer.outputs
-        logging.info("[*] Seq2Seq %s: n_hidden:%d cell_fn:%s dropout:%s n_layer:%d" % (self.name, n_hidden, cell_fn.__name__, dropout, n_layer))
+        logging.info(
+            "[*] Seq2Seq %s: n_hidden:%d cell_fn:%s dropout:%s n_layer:%d" %
+            (self.name, n_hidden, cell_fn.__name__, dropout, n_layer)
+        )
 
         with tf.variable_scope(name):
             # tl.layers.set_name_reuse(reuse)
             # network = InputLayer(self.inputs, name=name+'/input')
             network_encode = DynamicRNNLayer(
-                net_encode_in,
-                cell_fn=cell_fn,
-                cell_init_args=cell_init_args,
-                n_hidden=n_hidden,
-                initializer=initializer,
-                initial_state=initial_state_encode,
-                dropout=dropout,
-                n_layer=n_layer,
-                sequence_length=encode_sequence_length,
-                return_last=False,
-                return_seq_2d=True,
-                name='encode')
+                net_encode_in, cell_fn=cell_fn, cell_init_args=cell_init_args, n_hidden=n_hidden,
+                initializer=initializer, initial_state=initial_state_encode, dropout=dropout, n_layer=n_layer,
+                sequence_length=encode_sequence_length, return_last=False, return_seq_2d=True, name='encode'
+            )
             # vs.reuse_variables()
             # tl.layers.set_name_reuse(True)
             network_decode = DynamicRNNLayer(
-                net_decode_in,
-                cell_fn=cell_fn,
-                cell_init_args=cell_init_args,
-                n_hidden=n_hidden,
+                net_decode_in, cell_fn=cell_fn, cell_init_args=cell_init_args, n_hidden=n_hidden,
                 initializer=initializer,
-                initial_state=(network_encode.final_state if initial_state_decode is None else initial_state_decode),
-                dropout=dropout,
-                n_layer=n_layer,
-                sequence_length=decode_sequence_length,
-                return_last=False,
-                return_seq_2d=return_seq_2d,
-                name='decode')
+                initial_state=(network_encode.final_state if initial_state_decode is None else
+                               initial_state_decode), dropout=dropout, n_layer=n_layer,
+                sequence_length=decode_sequence_length, return_last=False, return_seq_2d=return_seq_2d, name='decode'
+            )
             self.outputs = network_decode.outputs
 
             # rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
diff --git a/tensorlayer/layers/spatial_transformer.py b/tensorlayer/layers/spatial_transformer.py
index 3ab4fb3f4..68be2f773 100644
--- a/tensorlayer/layers/spatial_transformer.py
+++ b/tensorlayer/layers/spatial_transformer.py
@@ -133,7 +133,10 @@ def _meshgrid(height, width):
             #                         np.linspace(-1, 1, height))
             #  ones = np.ones(np.prod(x_t.shape))
             #  grid = np.vstack([x_t.flatten(), y_t.flatten(), ones])
-            x_t = tf.matmul(tf.ones(shape=tf.stack([height, 1])), tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0]))
+            x_t = tf.matmul(
+                tf.ones(shape=tf.stack([height, 1])),
+                tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0])
+            )
             y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1), tf.ones(shape=tf.stack([1, width])))
 
             x_t_flat = tf.reshape(x_t, (1, -1))
@@ -243,7 +246,10 @@ def __init__(
         if out_size is None:
             out_size = [40, 40]
 
-        logging.info("SpatialTransformer2dAffineLayer %s: in_size:%s out_size:%s" % (name, self.inputs.get_shape().as_list(), out_size))
+        logging.info(
+            "SpatialTransformer2dAffineLayer %s: in_size:%s out_size:%s" %
+            (name, self.inputs.get_shape().as_list(), out_size)
+        )
 
         with tf.variable_scope(name) as vs:
             # 1. make the localisation network to [batch, 6] via Flatten and Dense.
diff --git a/tensorlayer/layers/special_activation.py b/tensorlayer/layers/special_activation.py
index 4d5a472dd..9502bd4ac 100644
--- a/tensorlayer/layers/special_activation.py
+++ b/tensorlayer/layers/special_activation.py
@@ -43,7 +43,8 @@ def __init__(
             a_init=tf.constant_initializer(value=0.0),
             a_init_args=None,
             # restore = True,
-            name="prelu_layer"):
+            name="prelu_layer"
+    ):
 
         if a_init_args is None:
             a_init_args = {}
@@ -60,7 +61,9 @@ def __init__(
 
         # with tf.name_scope(name) as scope:
         with tf.variable_scope(name):
-            alphas = tf.get_variable(name='alphas', shape=w_shape, initializer=a_init, dtype=LayersConfig.tf_dtype, **a_init_args)
+            alphas = tf.get_variable(
+                name='alphas', shape=w_shape, initializer=a_init, dtype=LayersConfig.tf_dtype, **a_init_args
+            )
             try:  # TF 1.0
                 self.outputs = tf.nn.relu(self.inputs) + tf.multiply(alphas, (self.inputs - tf.abs(self.inputs))) * 0.5
             except Exception:  # TF 0.12
diff --git a/tensorlayer/models/mobilenetv1.py b/tensorlayer/models/mobilenetv1.py
index 2f496f94e..968b699b4 100644
--- a/tensorlayer/models/mobilenetv1.py
+++ b/tensorlayer/models/mobilenetv1.py
@@ -7,7 +7,9 @@
 # import numpy as np
 import tensorflow as tf
 from .. import _logging as logging
-from ..layers import (Layer, BatchNormLayer, Conv2d, DepthwiseConv2d, FlattenLayer, GlobalMeanPool2d, InputLayer, ReshapeLayer)
+from ..layers import (
+    Layer, BatchNormLayer, Conv2d, DepthwiseConv2d, FlattenLayer, GlobalMeanPool2d, InputLayer, ReshapeLayer
+)
 from ..files import maybe_download_and_extract, assign_params, load_npz
 
 __all__ = [
@@ -151,7 +153,9 @@ def depthwise_conv_block(cls, n, n_filter, strides=(1, 1), is_train=False, name=
     def restore_params(self, sess, path='models'):
         logging.info("Restore pre-trained parameters")
         maybe_download_and_extract(
-            'mobilenet.npz', path, 'https://github.com/tensorlayer/pretrained-models/raw/master/models/', expected_bytes=25600116)  # ls -al
+            'mobilenet.npz', path, 'https://github.com/tensorlayer/pretrained-models/raw/master/models/',
+            expected_bytes=25600116
+        )  # ls -al
         params = load_npz(name=os.path.join(path, 'mobilenet.npz'))
         assign_params(sess, params[:len(self.net.all_params)], self.net)
         del params
diff --git a/tensorlayer/models/squeezenetv1.py b/tensorlayer/models/squeezenetv1.py
index faf8f295e..0884d3c47 100644
--- a/tensorlayer/models/squeezenetv1.py
+++ b/tensorlayer/models/squeezenetv1.py
@@ -161,7 +161,9 @@ def squeezenetv1(cls, x, end_with='output', is_train=False, reuse=None):
     def restore_params(self, sess, path='models'):
         logging.info("Restore pre-trained parameters")
         maybe_download_and_extract(
-            'squeezenet.npz', path, 'https://github.com/tensorlayer/pretrained-models/raw/master/models/', expected_bytes=7405613)  # ls -al
+            'squeezenet.npz', path, 'https://github.com/tensorlayer/pretrained-models/raw/master/models/',
+            expected_bytes=7405613
+        )  # ls -al
         params = load_npz(name=os.path.join(path, 'squeezenet.npz'))
         assign_params(sess, params[:len(self.net.all_params)], self.net)
         del params
diff --git a/tensorlayer/models/vgg16.py b/tensorlayer/models/vgg16.py
index 52dd090c1..8df2f394c 100644
--- a/tensorlayer/models/vgg16.py
+++ b/tensorlayer/models/vgg16.py
@@ -55,15 +55,19 @@ def conv_layers(net_in):
             shape=[3, 3, 3, 64],  # 64 features for each 3x3 patch
             strides=[1, 1, 1, 1],
             padding='SAME',
-            name='conv1_1')
+            name='conv1_1'
+        )
         net = Conv2dLayer(
             net,
             act=tf.nn.relu,
             shape=[3, 3, 64, 64],  # 64 features for each 3x3 patch
             strides=[1, 1, 1, 1],
             padding='SAME',
-            name='conv1_2')
-        net = PoolLayer(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool1')
+            name='conv1_2'
+        )
+        net = PoolLayer(
+            net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool1'
+        )
 
         # conv2
         net = Conv2dLayer(
@@ -72,15 +76,19 @@ def conv_layers(net_in):
             shape=[3, 3, 64, 128],  # 128 features for each 3x3 patch
             strides=[1, 1, 1, 1],
             padding='SAME',
-            name='conv2_1')
+            name='conv2_1'
+        )
         net = Conv2dLayer(
             net,
             act=tf.nn.relu,
             shape=[3, 3, 128, 128],  # 128 features for each 3x3 patch
             strides=[1, 1, 1, 1],
             padding='SAME',
-            name='conv2_2')
-        net = PoolLayer(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool2')
+            name='conv2_2'
+        )
+        net = PoolLayer(
+            net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool2'
+        )
 
         # conv3
         net = Conv2dLayer(
@@ -89,22 +97,27 @@ def conv_layers(net_in):
             shape=[3, 3, 128, 256],  # 256 features for each 3x3 patch
             strides=[1, 1, 1, 1],
             padding='SAME',
-            name='conv3_1')
+            name='conv3_1'
+        )
         net = Conv2dLayer(
             net,
             act=tf.nn.relu,
             shape=[3, 3, 256, 256],  # 256 features for each 3x3 patch
             strides=[1, 1, 1, 1],
             padding='SAME',
-            name='conv3_2')
+            name='conv3_2'
+        )
         net = Conv2dLayer(
             net,
             act=tf.nn.relu,
             shape=[3, 3, 256, 256],  # 256 features for each 3x3 patch
             strides=[1, 1, 1, 1],
             padding='SAME',
-            name='conv3_3')
-        net = PoolLayer(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool3')
+            name='conv3_3'
+        )
+        net = PoolLayer(
+            net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool3'
+        )
 
         # conv4
         net = Conv2dLayer(
@@ -113,22 +126,27 @@ def conv_layers(net_in):
             shape=[3, 3, 256, 512],  # 512 features for each 3x3 patch
             strides=[1, 1, 1, 1],
             padding='SAME',
-            name='conv4_1')
+            name='conv4_1'
+        )
         net = Conv2dLayer(
             net,
             act=tf.nn.relu,
             shape=[3, 3, 512, 512],  # 512 features for each 3x3 patch
             strides=[1, 1, 1, 1],
             padding='SAME',
-            name='conv4_2')
+            name='conv4_2'
+        )
         net = Conv2dLayer(
             net,
             act=tf.nn.relu,
             shape=[3, 3, 512, 512],  # 512 features for each 3x3 patch
             strides=[1, 1, 1, 1],
             padding='SAME',
-            name='conv4_3')
-        net = PoolLayer(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool4')
+            name='conv4_3'
+        )
+        net = PoolLayer(
+            net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool4'
+        )
 
         # conv5
         net = Conv2dLayer(
@@ -137,22 +155,27 @@ def conv_layers(net_in):
             shape=[3, 3, 512, 512],  # 512 features for each 3x3 patch
             strides=[1, 1, 1, 1],
             padding='SAME',
-            name='conv5_1')
+            name='conv5_1'
+        )
         net = Conv2dLayer(
             net,
             act=tf.nn.relu,
             shape=[3, 3, 512, 512],  # 512 features for each 3x3 patch
             strides=[1, 1, 1, 1],
             padding='SAME',
-            name='conv5_2')
+            name='conv5_2'
+        )
         net = Conv2dLayer(
             net,
             act=tf.nn.relu,
             shape=[3, 3, 512, 512],  # 512 features for each 3x3 patch
             strides=[1, 1, 1, 1],
             padding='SAME',
-            name='conv5_3')
-        net = PoolLayer(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool5')
+            name='conv5_3'
+        )
+        net = PoolLayer(
+            net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool5'
+        )
         return net
 
     @staticmethod
@@ -174,31 +197,57 @@ def vgg16_simple_api(net_in, end_with):
 
         layers = [
             # conv1
-            lambda net: Conv2d(net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_1'),
-            lambda net: Conv2d(net, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_2'),
+            lambda net: Conv2d(
+                net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_1'
+            ),
+            lambda net: Conv2d(
+                net, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_2'
+            ),
             lambda net: MaxPool2d(net, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1'),
 
             # conv2
-            lambda net: Conv2d(net, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_1'),
-            lambda net: Conv2d(net, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_2'),
+            lambda net: Conv2d(
+                net, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_1'
+            ),
+            lambda net: Conv2d(
+                net, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_2'
+            ),
             lambda net: MaxPool2d(net, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2'),
 
             # conv3
-            lambda net: Conv2d(net, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_1'),
-            lambda net: Conv2d(net, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_2'),
-            lambda net: Conv2d(net, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_3'),
+            lambda net: Conv2d(
+                net, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_1'
+            ),
+            lambda net: Conv2d(
+                net, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_2'
+            ),
+            lambda net: Conv2d(
+                net, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_3'
+            ),
             lambda net: MaxPool2d(net, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3'),
 
             # conv4
-            lambda net: Conv2d(net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_1'),
-            lambda net: Conv2d(net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_2'),
-            lambda net: Conv2d(net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_3'),
+            lambda net: Conv2d(
+                net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_1'
+            ),
+            lambda net: Conv2d(
+                net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_2'
+            ),
+            lambda net: Conv2d(
+                net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_3'
+            ),
             lambda net: MaxPool2d(net, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4'),
 
             # conv5
-            lambda net: Conv2d(net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_1'),
-            lambda net: Conv2d(net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_2'),
-            lambda net: Conv2d(net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_3'),
+            lambda net: Conv2d(
+                net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_1'
+            ),
+            lambda net: Conv2d(
+                net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_2'
+            ),
+            lambda net: Conv2d(
+                net, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_3'
+            ),
             lambda net: MaxPool2d(net, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5'),
             lambda net: FlattenLayer(net, name='flatten'),
             lambda net: DenseLayer(net, n_units=4096, act=tf.nn.relu, name='fc1_relu'),
@@ -216,7 +265,9 @@ def vgg16_simple_api(net_in, end_with):
 
     def restore_params(self, sess):
         logging.info("Restore pre-trained parameters")
-        maybe_download_and_extract('vgg16_weights.npz', 'models', 'http://www.cs.toronto.edu/~frossard/vgg16/', expected_bytes=553436134)
+        maybe_download_and_extract(
+            'vgg16_weights.npz', 'models', 'http://www.cs.toronto.edu/~frossard/vgg16/', expected_bytes=553436134
+        )
         npz = np.load(os.path.join('models', 'vgg16_weights.npz'))
 
         params = []
diff --git a/tensorlayer/nlp.py b/tensorlayer/nlp.py
index 7d3df60ca..a86e7ca2d 100755
--- a/tensorlayer/nlp.py
+++ b/tensorlayer/nlp.py
@@ -693,7 +693,8 @@ def build_words_dataset(words=None, vocabulary_size=50000, printable=True, unk_k
         logging.info('Limited vocabulary size {}'.format(vocabulary_size))
     if len(collections.Counter(words).keys()) < vocabulary_size:
         raise Exception(
-            "len(collections.Counter(words).keys()) >= vocabulary_size , the limited vocabulary_size must be less than or equal to the read vocabulary_size")
+            "len(collections.Counter(words).keys()) >= vocabulary_size , the limited vocabulary_size must be less than or equal to the read vocabulary_size"
+        )
     return data, count, dictionary, reverse_dictionary
 
 
@@ -863,7 +864,10 @@ def basic_tokenizer(sentence, _WORD_SPLIT=re.compile(b"([.,!?\"':;)(])")):
     return [w for w in words if w]
 
 
-def create_vocabulary(vocabulary_path, data_path, max_vocabulary_size, tokenizer=None, normalize_digits=True, _DIGIT_RE=re.compile(br"\d"), _START_VOCAB=None):
+def create_vocabulary(
+        vocabulary_path, data_path, max_vocabulary_size, tokenizer=None, normalize_digits=True,
+        _DIGIT_RE=re.compile(br"\d"), _START_VOCAB=None
+):
     """Create vocabulary file (if it does not exist yet) from data file.
 
     Data file is assumed to contain one sentence per line. Each sentence is
@@ -968,7 +972,9 @@ def initialize_vocabulary(vocabulary_path):
         raise ValueError("Vocabulary file %s not found.", vocabulary_path)
 
 
-def sentence_to_token_ids(sentence, vocabulary, tokenizer=None, normalize_digits=True, UNK_ID=3, _DIGIT_RE=re.compile(br"\d")):
+def sentence_to_token_ids(
+        sentence, vocabulary, tokenizer=None, normalize_digits=True, UNK_ID=3, _DIGIT_RE=re.compile(br"\d")
+):
     """Convert a string to list of integers representing token-ids.
 
     For example, a sentence "I have a dog" may become tokenized into
@@ -1002,7 +1008,10 @@ def sentence_to_token_ids(sentence, vocabulary, tokenizer=None, normalize_digits
     return [vocabulary.get(re.sub(_DIGIT_RE, b"0", w), UNK_ID) for w in words]
 
 
-def data_to_token_ids(data_path, target_path, vocabulary_path, tokenizer=None, normalize_digits=True, UNK_ID=3, _DIGIT_RE=re.compile(br"\d")):
+def data_to_token_ids(
+        data_path, target_path, vocabulary_path, tokenizer=None, normalize_digits=True, UNK_ID=3,
+        _DIGIT_RE=re.compile(br"\d")
+):
     """Tokenize data file and turn into token-ids using given vocabulary file.
 
     This function loads data line-by-line from data_path, calls the above
@@ -1037,7 +1046,9 @@ def data_to_token_ids(data_path, target_path, vocabulary_path, tokenizer=None, n
                     counter += 1
                     if counter % 100000 == 0:
                         logging.info("  tokenizing line %d" % counter)
-                    token_ids = sentence_to_token_ids(line, vocab, tokenizer, normalize_digits, UNK_ID=UNK_ID, _DIGIT_RE=_DIGIT_RE)
+                    token_ids = sentence_to_token_ids(
+                        line, vocab, tokenizer, normalize_digits, UNK_ID=UNK_ID, _DIGIT_RE=_DIGIT_RE
+                    )
                     tokens_file.write(" ".join([str(tok) for tok in token_ids]) + "\n")
     else:
         logging.info("Target path %s exists" % target_path)
@@ -1077,7 +1088,10 @@ def moses_multi_bleu(hypotheses, references, lowercase=False):
 
     # Get MOSES multi-bleu script
     try:
-        multi_bleu_path, _ = urllib.request.urlretrieve("https://raw.githubusercontent.com/moses-smt/mosesdecoder/" "master/scripts/generic/multi-bleu.perl")
+        multi_bleu_path, _ = urllib.request.urlretrieve(
+            "https://raw.githubusercontent.com/moses-smt/mosesdecoder/"
+            "master/scripts/generic/multi-bleu.perl"
+        )
         os.chmod(multi_bleu_path, 0o755)
     except Exception:  # pylint: disable=W0702
         tf.logging.info("Unable to fetch multi-bleu.perl script, using local.")
diff --git a/tensorlayer/prepro.py b/tensorlayer/prepro.py
index 0105bf15e..b293b1e17 100644
--- a/tensorlayer/prepro.py
+++ b/tensorlayer/prepro.py
@@ -180,7 +180,9 @@ def apply_fn(results, i, data, kwargs):
         results = [None] * thread_count
         threads = []
         for i in range(thread_count):
-            t = threading.Thread(name='threading_and_return', target=apply_fn, args=(results, i, data[divs[i]:divs[i + 1]], kwargs))
+            t = threading.Thread(
+                name='threading_and_return', target=apply_fn, args=(results, i, data[divs[i]:divs[i + 1]], kwargs)
+            )
             t.start()
             threads.append(t)
 
@@ -196,7 +198,9 @@ def apply_fn(results, i, data, kwargs):
         return np.concatenate(results)
 
 
-def rotation(x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1):
+def rotation(
+        x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1
+):
     """Rotate an image randomly or non-randomly.
 
     Parameters
@@ -240,7 +244,9 @@ def rotation(x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=
     return x
 
 
-def rotation_multi(x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1):
+def rotation_multi(
+        x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1
+):
     """Rotate multiple images with the same arguments, randomly or non-randomly.
     Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
 
@@ -302,7 +308,10 @@ def crop(x, wrg, hrg, is_random=False, row_index=0, col_index=1):
 
     """
     h, w = x.shape[row_index], x.shape[col_index]
-    assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image"
+
+    if (h <= hrg) or (w <= wrg):
+        raise AssertionError("The size of cropping should smaller than the original image")
+
     if is_random:
         h_offset = int(np.random.uniform(0, h - hrg) - 1)
         w_offset = int(np.random.uniform(0, w - wrg) - 1)
@@ -339,7 +348,10 @@ def crop_multi(x, wrg, hrg, is_random=False, row_index=0, col_index=1):
 
     """
     h, w = x[0].shape[row_index], x[0].shape[col_index]
-    assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image"
+
+    if (h <= hrg) or (w <= wrg):
+        raise AssertionError("The size of cropping should smaller than the original image")
+
     if is_random:
         h_offset = int(np.random.uniform(0, h - hrg) - 1)
         w_offset = int(np.random.uniform(0, w - wrg) - 1)
@@ -442,7 +454,10 @@ def flip_axis_multi(x, axis, is_random=False):
 
 
 # shift
-def shift(x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1):
+def shift(
+        x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0.,
+        order=1
+):
     """Shift an image randomly or non-randomly.
 
     Parameters
@@ -483,7 +498,10 @@ def shift(x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channe
     return x
 
 
-def shift_multi(x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1):
+def shift_multi(
+        x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0.,
+        order=1
+):
     """Shift images with the same arguments, randomly or non-randomly.
     Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
 
@@ -516,7 +534,10 @@ def shift_multi(x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1,
 
 
 # shear
-def shear(x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1):
+def shear(
+        x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0.,
+        order=1
+):
     """Shear an image randomly or non-randomly.
 
     Parameters
@@ -559,7 +580,10 @@ def shear(x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_i
     return x
 
 
-def shear_multi(x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1):
+def shear_multi(
+        x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0.,
+        order=1
+):
     """Shear images with the same arguments, randomly or non-randomly.
     Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
 
@@ -590,7 +614,10 @@ def shear_multi(x, intensity=0.1, is_random=False, row_index=0, col_index=1, cha
     return np.asarray(results)
 
 
-def shear2(x, shear=(0.1, 0.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1):
+def shear2(
+        x, shear=(0.1, 0.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0.,
+        order=1
+):
     """Shear an image randomly or non-randomly.
 
     Parameters
@@ -620,7 +647,11 @@ def shear2(x, shear=(0.1, 0.1), is_random=False, row_index=0, col_index=1, chann
     - `Affine transformation <https://uk.mathworks.com/discovery/affine-transformation.html>`__
 
     """
-    assert len(shear) == 2, "shear should be tuple of 2 floats, or you want to use tl.prepro.shear rather than tl.prepro.shear2 ?"
+    if len(shear) != 2:
+        raise AssertionError(
+            "shear should be tuple of 2 floats, or you want to use tl.prepro.shear rather than tl.prepro.shear2 ?"
+        )
+
     if is_random:
         shear[0] = np.random.uniform(-shear[0], shear[0])
         shear[1] = np.random.uniform(-shear[1], shear[1])
@@ -633,7 +664,10 @@ def shear2(x, shear=(0.1, 0.1), is_random=False, row_index=0, col_index=1, chann
     return x
 
 
-def shear_multi2(x, shear=(0.1, 0.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1):
+def shear_multi2(
+        x, shear=(0.1, 0.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0.,
+        order=1
+):
     """Shear images with the same arguments, randomly or non-randomly.
     Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
 
@@ -650,7 +684,11 @@ def shear_multi2(x, shear=(0.1, 0.1), is_random=False, row_index=0, col_index=1,
         A list of processed images.
 
     """
-    assert len(shear) == 2, "shear should be tuple of 2 floats, or you want to use tl.prepro.shear_multi rather than tl.prepro.shear_multi2 ?"
+    if len(shear) != 2:
+        raise AssertionError(
+            "shear should be tuple of 2 floats, or you want to use tl.prepro.shear_multi rather than tl.prepro.shear_multi2 ?"
+        )
+
     if is_random:
         shear[0] = np.random.uniform(-shear[0], shear[0])
         shear[1] = np.random.uniform(-shear[1], shear[1])
@@ -666,18 +704,10 @@ def shear_multi2(x, shear=(0.1, 0.1), is_random=False, row_index=0, col_index=1,
 
 
 # swirl
-def swirl(x,
-          center=None,
-          strength=1,
-          radius=100,
-          rotation=0,
-          output_shape=None,
-          order=1,
-          mode='constant',
-          cval=0,
-          clip=True,
-          preserve_range=False,
-          is_random=False):
+def swirl(
+        x, center=None, strength=1, radius=100, rotation=0, output_shape=None, order=1, mode='constant', cval=0,
+        clip=True, preserve_range=False, is_random=False
+):
     """Swirl an image randomly or non-randomly, see `scikit-image swirl API <http://scikit-image.org/docs/dev/api/skimage.transform.html#skimage.transform.swirl>`__
     and `example <http://scikit-image.org/docs/dev/auto_examples/plot_swirl.html>`__.
 
@@ -724,7 +754,9 @@ def swirl(x,
     >>> x = tl.prepro.swirl(x, strength=4, radius=100)
 
     """
-    assert radius != 0, Exception("Invalid radius value")
+    if radius == 0:
+        raise AssertionError("Invalid radius value")
+
     rotation = np.pi / 180 * rotation
     if is_random:
         center_h = int(np.random.uniform(0, x.shape[0]))
@@ -738,34 +770,18 @@ def swirl(x,
     if max_v > 1:  # Note: the input of this fn should be [-1, 1], rescale is required.
         x = x / max_v
     swirled = skimage.transform.swirl(
-        x,
-        center=center,
-        strength=strength,
-        radius=radius,
-        rotation=rotation,
-        output_shape=output_shape,
-        order=order,
-        mode=mode,
-        cval=cval,
-        clip=clip,
-        preserve_range=preserve_range)
+        x, center=center, strength=strength, radius=radius, rotation=rotation, output_shape=output_shape, order=order,
+        mode=mode, cval=cval, clip=clip, preserve_range=preserve_range
+    )
     if max_v > 1:
         swirled = swirled * max_v
     return swirled
 
 
-def swirl_multi(x,
-                center=None,
-                strength=1,
-                radius=100,
-                rotation=0,
-                output_shape=None,
-                order=1,
-                mode='constant',
-                cval=0,
-                clip=True,
-                preserve_range=False,
-                is_random=False):
+def swirl_multi(
+        x, center=None, strength=1, radius=100, rotation=0, output_shape=None, order=1, mode='constant', cval=0,
+        clip=True, preserve_range=False, is_random=False
+):
     """Swirl multiple images with the same arguments, randomly or non-randomly.
     Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
 
@@ -782,7 +798,9 @@ def swirl_multi(x,
         A list of processed images.
 
     """
-    assert radius != 0, Exception("Invalid radius value")
+    if radius == 0:
+        raise AssertionError("Invalid radius value")
+
     rotation = np.pi / 180 * rotation
     if is_random:
         center_h = int(np.random.uniform(0, x[0].shape[0]))
@@ -798,17 +816,9 @@ def swirl_multi(x,
         if max_v > 1:  # Note: the input of this fn should be [-1, 1], rescale is required.
             data = data / max_v
         swirled = skimage.transform.swirl(
-            data,
-            center=center,
-            strength=strength,
-            radius=radius,
-            rotation=rotation,
-            output_shape=output_shape,
-            order=order,
-            mode=mode,
-            cval=cval,
-            clip=clip,
-            preserve_range=preserve_range)
+            data, center=center, strength=strength, radius=radius, rotation=rotation, output_shape=output_shape,
+            order=order, mode=mode, cval=cval, clip=clip, preserve_range=preserve_range
+        )
         if max_v > 1:
             swirled = swirled * max_v
         results.append(swirled)
@@ -860,7 +870,9 @@ def elastic_transform(x, alpha, sigma, mode="constant", cval=0, is_random=False)
         is_3d = True
     elif len(x.shape) == 3 and x.shape[-1] != 1:
         raise Exception("Only support greyscale image")
-    assert len(x.shape) == 2, "input should be grey-scale image"
+
+    if len(x.shape) != 2:
+        raise AssertionError("input should be grey-scale image")
 
     shape = x.shape
 
@@ -909,7 +921,9 @@ def elastic_transform_multi(x, alpha, sigma, mode="constant", cval=0, is_random=
             is_3d = True
         elif len(data.shape) == 3 and data.shape[-1] != 1:
             raise Exception("Only support greyscale image")
-        assert len(data.shape) == 2, "input should be grey-scale image"
+
+        if len(data.shape) != 2:
+            raise AssertionError("input should be grey-scale image")
 
         dx = gaussian_filter((new_shape * 2 - 1), sigma, mode=mode, cval=cval) * alpha
         dy = gaussian_filter((new_shape * 2 - 1), sigma, mode=mode, cval=cval) * alpha
@@ -925,7 +939,10 @@ def elastic_transform_multi(x, alpha, sigma, mode="constant", cval=0, is_random=
 
 
 # zoom
-def zoom(x, zoom_range=(0.9, 1.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1):
+def zoom(
+        x, zoom_range=(0.9, 1.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest',
+        cval=0., order=1
+):
     """Zoom in and out of a single image, randomly or non-randomly.
 
     Parameters
@@ -972,7 +989,10 @@ def zoom(x, zoom_range=(0.9, 1.1), is_random=False, row_index=0, col_index=1, ch
     return x
 
 
-def zoom_multi(x, zoom_range=(0.9, 1.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1):
+def zoom_multi(
+        x, zoom_range=(0.9, 1.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest',
+        cval=0., order=1
+):
     """Zoom in and out of images with the same arguments, randomly or non-randomly.
     Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
 
@@ -1119,10 +1139,10 @@ def illumination(x, gamma=1., contrast=1., saturation=1., is_random=False):
     from PIL import Image, ImageEnhance
 
     if is_random:
-        try:
-            assert len(gamma) == len(contrast) == len(saturation) == 2, "if is_random = True, the arguments are (min, max)"
-        except:
-            raise Exception("if is_random = True, the arguments are (min, max)")
+
+        if not (len(gamma) == len(contrast) == len(saturation) == 2):
+            raise AssertionError("if is_random = True, the arguments are (min, max)")
+
         ## random change brightness  # small --> brighter
         illum_settings = np.random.randint(0, 3)  # 0-brighter, 1-darker, 2 keep normal
 
@@ -1383,7 +1403,9 @@ def pixel_value_scale(im, val=0.9, clip=(-np.inf, np.inf), is_random=False):
 
 
 # normailization
-def samplewise_norm(x, rescale=None, samplewise_center=False, samplewise_std_normalization=False, channel_index=2, epsilon=1e-7):
+def samplewise_norm(
+        x, rescale=None, samplewise_center=False, samplewise_std_normalization=False, channel_index=2, epsilon=1e-7
+):
     """Normalize an image by rescale, samplewise centering and samplewise centering in order.
 
     Parameters
@@ -1717,14 +1739,19 @@ def apply_transform(x, transform_matrix, channel_index=2, fill_mode='nearest', c
     final_affine_matrix = transform_matrix[:2, :2]
     final_offset = transform_matrix[:2, 2]
     channel_images = [
-        ndi.interpolation.affine_transform(x_channel, final_affine_matrix, final_offset, order=order, mode=fill_mode, cval=cval) for x_channel in x
+        ndi.interpolation.affine_transform(
+            x_channel, final_affine_matrix, final_offset, order=order, mode=fill_mode, cval=cval
+        ) for x_channel in x
     ]
     x = np.stack(channel_images, axis=0)
     x = np.rollaxis(x, 0, channel_index + 1)
     return x
 
 
-def projective_transform_by_points(x, src, dst, map_args=None, output_shape=None, order=1, mode='constant', cval=0.0, clip=True, preserve_range=False):
+def projective_transform_by_points(
+        x, src, dst, map_args=None, output_shape=None, order=1, mode='constant', cval=0.0, clip=True,
+        preserve_range=False
+):
     """Projective transform by given coordinates, usually 4 coordinates.
 
     see `scikit-image <http://scikit-image.org/docs/dev/auto_examples/applications/plot_geometric.html>`__.
@@ -1791,7 +1818,10 @@ def projective_transform_by_points(x, src, dst, map_args=None, output_shape=None
 
     m = transform.ProjectiveTransform()
     m.estimate(dst, src)
-    warped = transform.warp(x, m, map_args=map_args, output_shape=output_shape, order=order, mode=mode, cval=cval, clip=clip, preserve_range=preserve_range)
+    warped = transform.warp(
+        x, m, map_args=map_args, output_shape=output_shape, order=order, mode=mode, cval=cval, clip=clip,
+        preserve_range=preserve_range
+    )
     return warped
 
 
@@ -1863,7 +1893,9 @@ def find_contours(x, level=0.8, fully_connected='low', positive_orientation='low
         Each contour is an ndarray of shape (n, 2), consisting of n (row, column) coordinates along the contour.
 
     """
-    return skimage.measure.find_contours(x, level, fully_connected=fully_connected, positive_orientation=positive_orientation)
+    return skimage.measure.find_contours(
+        x, level, fully_connected=fully_connected, positive_orientation=positive_orientation
+    )
 
 
 def pt2map(list_points=None, size=(100, 100), val=1):
@@ -2032,7 +2064,10 @@ def obj_box_coords_rescale(coords=None, shape=None):
     imw = imw * 1.0
     coords_new = list()
     for coord in coords:
-        assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]"
+
+        if len(coord) != 4:
+            raise AssertionError("coordinate should be 4 values : [x, y, w, h]")
+
         x = coord[0] / imw
         y = coord[1] / imh
         w = coord[2] / imw
@@ -2137,7 +2172,10 @@ def obj_box_coord_centroid_to_upleft_butright(coord, to_int=False):
     ... [20, 30, 40, 50]
 
     """
-    assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]"
+
+    if len(coord) != 4:
+        raise AssertionError("coordinate should be 4 values : [x, y, w, h]")
+
     x_center, y_center, w, h = coord
     x = x_center - w / 2.
     y = y_center - h / 2.
@@ -2169,7 +2207,8 @@ def obj_box_coord_upleft_butright_to_centroid(coord):
         New bounding box.
 
     """
-    assert len(coord) == 4, "coordinate should be 4 values : [x1, y1, x2, y2]"
+    if len(coord) != 4:
+        raise AssertionError("coordinate should be 4 values : [x1, y1, x2, y2]")
     x1, y1, x2, y2 = coord
     w = x2 - x1
     h = y2 - y1
@@ -2193,7 +2232,9 @@ def obj_box_coord_centroid_to_upleft(coord):
         New bounding box.
 
     """
-    assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]"
+    if len(coord) != 4:
+        raise AssertionError("coordinate should be 4 values : [x, y, w, h]")
+
     x_center, y_center, w, h = coord
     x = x_center - w / 2.
     y = y_center - h / 2.
@@ -2215,7 +2256,9 @@ def obj_box_coord_upleft_to_centroid(coord):
         New bounding box.
 
     """
-    assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]"
+    if len(coord) != 4:
+        raise AssertionError("coordinate should be 4 values : [x, y, w, h]")
+
     x, y, w, h = coord
     x_center = x + w / 2.
     y_center = y + h / 2.
@@ -2326,7 +2369,10 @@ def _flip(im, coords):
         coords_new = list()
 
         for coord in coords:
-            assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]"
+
+            if len(coord) != 4:
+                raise AssertionError("coordinate should be 4 values : [x, y, w, h]")
+
             if is_rescale:
                 if is_center:
                     # x_center' = 1 - x
@@ -2422,8 +2468,12 @@ def obj_box_imresize(im, coords=None, size=None, interp='bicubic', mode=None, is
 
     if is_rescale is False:
         coords_new = list()
+
         for coord in coords:
-            assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]"
+
+            if len(coord) != 4:
+                raise AssertionError("coordinate should be 4 values : [x, y, w, h]")
+
             # x' = x * (imw'/imw)
             x = int(coord[0] * (size[1] / imw))
             # y' = y * (imh'/imh)
@@ -2455,7 +2505,10 @@ def obj_box_imresize(im, coords=None, size=None, interp='bicubic', mode=None, is
 # exit()
 
 
-def obj_box_crop(im, classes=None, coords=None, wrg=100, hrg=100, is_rescale=False, is_center=False, is_random=False, thresh_wh=0.02, thresh_wh2=12.):
+def obj_box_crop(
+        im, classes=None, coords=None, wrg=100, hrg=100, is_rescale=False, is_center=False, is_random=False,
+        thresh_wh=0.02, thresh_wh2=12.
+):
     """Randomly or centrally crop an image, and compute the new bounding box coordinates.
     Objects outside the cropped image will be removed.
 
@@ -2494,7 +2547,10 @@ def obj_box_crop(im, classes=None, coords=None, wrg=100, hrg=100, is_rescale=Fal
         coords = []
 
     h, w = im.shape[0], im.shape[1]
-    assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image"
+
+    if (h <= hrg) or (w <= wrg):
+        raise AssertionError("The size of cropping should smaller than the original image")
+
     if is_random:
         h_offset = int(np.random.uniform(0, h - hrg) - 1)
         w_offset = int(np.random.uniform(0, w - wrg) - 1)
@@ -2565,7 +2621,8 @@ def _get_coord(coord):
             # logging.info('xx', w, h)
             return None
 
-        if (w / (im_new.shape[1] * 1.) < thresh_wh) or (h / (im_new.shape[0] * 1.) < thresh_wh):  # object shape strange: too narrow
+        if (w / (im_new.shape[1] * 1.) < thresh_wh) or (h / (im_new.shape[0] * 1.) <
+                                                        thresh_wh):  # object shape strange: too narrow
             # logging.info('yy', w, im_new.shape[1], h, im_new.shape[0])
             return None
 
@@ -2581,7 +2638,10 @@ def _get_coord(coord):
     classes_new = list()
     for i, _ in enumerate(coords):
         coord = coords[i]
-        assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]"
+
+        if len(coord) != 4:
+            raise AssertionError("coordinate should be 4 values : [x, y, w, h]")
+
         if is_rescale:
             # for scaled coord, upscaled before process and scale back in the end.
             coord = obj_box_coord_scale_to_pixelunit(coord, im.shape)
@@ -2598,22 +2658,10 @@ def _get_coord(coord):
     return im_new, classes_new, coords_new
 
 
-def obj_box_shift(im,
-                  classes=None,
-                  coords=None,
-                  wrg=0.1,
-                  hrg=0.1,
-                  row_index=0,
-                  col_index=1,
-                  channel_index=2,
-                  fill_mode='nearest',
-                  cval=0.,
-                  order=1,
-                  is_rescale=False,
-                  is_center=False,
-                  is_random=False,
-                  thresh_wh=0.02,
-                  thresh_wh2=12.):
+def obj_box_shift(
+        im, classes=None, coords=None, wrg=0.1, hrg=0.1, row_index=0, col_index=1, channel_index=2, fill_mode='nearest',
+        cval=0., order=1, is_rescale=False, is_center=False, is_random=False, thresh_wh=0.02, thresh_wh2=12.
+):
     """Shift an image randomly or non-randomly, and compute the new bounding box coordinates.
     Objects outside the cropped image will be removed.
 
@@ -2652,7 +2700,10 @@ def obj_box_shift(im,
         coords = []
 
     imh, imw = im.shape[row_index], im.shape[col_index]
-    assert (hrg < 1.0) and (hrg > 0.) and (wrg < 1.0) and (wrg > 0.), "shift range should be (0, 1)"
+
+    if (hrg >= 1.0) and (hrg <= 0.) and (wrg >= 1.0) and (wrg <= 0.):
+        raise AssertionError("shift range should be (0, 1)")
+
     if is_random:
         tx = np.random.uniform(-hrg, hrg) * imh
         ty = np.random.uniform(-wrg, wrg) * imw
@@ -2705,7 +2756,8 @@ def _get_coord(coord):
             # logging.info('xx', w, h)
             return None
 
-        if (w / (im_new.shape[1] * 1.) < thresh_wh) or (h / (im_new.shape[0] * 1.) < thresh_wh):  # object shape strange: too narrow
+        if (w / (im_new.shape[1] * 1.) < thresh_wh) or (h / (im_new.shape[0] * 1.) <
+                                                        thresh_wh):  # object shape strange: too narrow
             # logging.info('yy', w, im_new.shape[1], h, im_new.shape[0])
             return None
 
@@ -2721,7 +2773,10 @@ def _get_coord(coord):
     classes_new = list()
     for i, _ in enumerate(coords):
         coord = coords[i]
-        assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]"
+
+        if len(coord) != 4:
+            raise AssertionError("coordinate should be 4 values : [x, y, w, h]")
+
         if is_rescale:
             # for scaled coord, upscaled before process and scale back in the end.
             coord = obj_box_coord_scale_to_pixelunit(coord, im.shape)
@@ -2738,21 +2793,11 @@ def _get_coord(coord):
     return im_new, classes_new, coords_new
 
 
-def obj_box_zoom(im,
-                 classes=None,
-                 coords=None,
-                 zoom_range=(0.9, 1.1),
-                 row_index=0,
-                 col_index=1,
-                 channel_index=2,
-                 fill_mode='nearest',
-                 cval=0.,
-                 order=1,
-                 is_rescale=False,
-                 is_center=False,
-                 is_random=False,
-                 thresh_wh=0.02,
-                 thresh_wh2=12.):
+def obj_box_zoom(
+        im, classes=None, coords=None, zoom_range=(0.9,
+                                                   1.1), row_index=0, col_index=1, channel_index=2, fill_mode='nearest',
+        cval=0., order=1, is_rescale=False, is_center=False, is_random=False, thresh_wh=0.02, thresh_wh2=12.
+):
     """Zoom in and out of a single image, randomly or non-randomly, and compute the new bounding box coordinates.
     Objects outside the cropped image will be removed.
 
@@ -2848,7 +2893,8 @@ def _get_coord(coord):
             # logging.info('xx', w, h)
             return None
 
-        if (w / (im_new.shape[1] * 1.) < thresh_wh) or (h / (im_new.shape[0] * 1.) < thresh_wh):  # object shape strange: too narrow
+        if (w / (im_new.shape[1] * 1.) < thresh_wh) or (h / (im_new.shape[0] * 1.) <
+                                                        thresh_wh):  # object shape strange: too narrow
             # logging.info('yy', w, im_new.shape[1], h, im_new.shape[0])
             return None
 
@@ -2864,7 +2910,10 @@ def _get_coord(coord):
     classes_new = list()
     for i, _ in enumerate(coords):
         coord = coords[i]
-        assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]"
+
+        if len(coord) != 4:
+            raise AssertionError("coordinate should be 4 values : [x, y, w, h]")
+
         if is_rescale:
             # for scaled coord, upscaled before process and scale back in the end.
             coord = obj_box_coord_scale_to_pixelunit(coord, im.shape)
@@ -2948,7 +2997,10 @@ def pad_sequences(sequences, maxlen=None, dtype='int32', padding='post', truncat
         # check `trunc` has expected shape
         trunc = np.asarray(trunc, dtype=dtype)
         if trunc.shape[1:] != sample_shape:
-            raise ValueError('Shape of sample %s of sequence at position %s is different from expected shape %s' % (trunc.shape[1:], idx, sample_shape))
+            raise ValueError(
+                'Shape of sample %s of sequence at position %s is different from expected shape %s' %
+                (trunc.shape[1:], idx, sample_shape)
+            )
 
         if padding == 'post':
             x[idx, :len(trunc)] = trunc
@@ -3034,7 +3086,7 @@ def process_sequences(sequences, end_id=0, pad_val=0, is_shorten=True, remain_en
                     max_length = i_w
                 if remain_end_id is False:
                     seq[i_w] = pad_val  # set end_id to pad_val
-            elif is_end == True:
+            elif is_end ==True:
                 seq[i_w] = pad_val
 
     if remain_end_id is True:
diff --git a/tensorlayer/utils.py b/tensorlayer/utils.py
index d26ff75d3..e316ca0d7 100644
--- a/tensorlayer/utils.py
+++ b/tensorlayer/utils.py
@@ -32,25 +32,11 @@
 ]
 
 
-def fit(sess,
-        network,
-        train_op,
-        cost,
-        X_train,
-        y_train,
-        x,
-        y_,
-        acc=None,
-        batch_size=100,
-        n_epoch=100,
-        print_freq=5,
-        X_val=None,
-        y_val=None,
-        eval_train=True,
-        tensorboard=False,
-        tensorboard_epoch_freq=5,
-        tensorboard_weight_histograms=True,
-        tensorboard_graph_vis=True):
+def fit(
+        sess, network, train_op, cost, X_train, y_train, x, y_, acc=None, batch_size=100, n_epoch=100, print_freq=5,
+        X_val=None, y_val=None, eval_train=True, tensorboard=False, tensorboard_epoch_freq=5,
+        tensorboard_weight_histograms=True, tensorboard_graph_vis=True
+):
     """Training a given non time-series network by the given cost function, training data, batch_size, n_epoch etc.
 
     - MNIST example click `here <https://github.com/tensorlayer/tensorlayer/blob/master/example/tutorial_mnist_simple.py>`_.
@@ -218,7 +204,9 @@ def fit(sess,
                 if acc is not None:
                     logging.info("   val acc: %f" % (val_acc / n_batch))
             else:
-                logging.info("Epoch %d of %d took %fs, loss %f" % (epoch + 1, n_epoch, time.time() - start_time, loss_ep))
+                logging.info(
+                    "Epoch %d of %d took %fs, loss %f" % (epoch + 1, n_epoch, time.time() - start_time, loss_ep)
+                )
     logging.info("Total training time: %fs" % (time.time() - start_time_begin))
 
 
@@ -568,7 +556,8 @@ def exit_tensorflow(sess=None, port=6006):
         _exit()
     elif _platform == "darwin":
         logging.info('OS X: %s' % text)
-        subprocess.Popen("lsof -i tcp:" + str(port) + "  | grep -v PID | awk '{print $2}' | xargs kill", shell=True)  # kill tensorboard
+        subprocess.Popen("lsof -i tcp:" + str(port) + "  | grep -v PID | awk '{print $2}' | xargs kill",
+                         shell=True)  # kill tensorboard
     elif _platform == "win32":
         raise NotImplementedError("this function is not supported on the Windows platform")
     else:
@@ -597,8 +586,8 @@ def open_tensorboard(log_dir='/tmp/tensorflow', port=6006):
     elif _platform == "darwin":
         logging.info('OS X: %s' % text)
         subprocess.Popen(
-            sys.prefix + " | python -m tensorflow.tensorboard --logdir=" + log_dir + " --port=" + str(port),
-            shell=True)  # open tensorboard in localhost:6006/ or whatever port you chose
+            sys.prefix + " | python -m tensorflow.tensorboard --logdir=" + log_dir + " --port=" + str(port), shell=True
+        )  # open tensorboard in localhost:6006/ or whatever port you chose
     elif _platform == "win32":
         raise NotImplementedError("this function is not supported on the Windows platform")
     else:
diff --git a/tensorlayer/visualize.py b/tensorlayer/visualize.py
index d868dba10..4e7f44133 100644
--- a/tensorlayer/visualize.py
+++ b/tensorlayer/visualize.py
@@ -134,11 +134,15 @@ def merge(images, size):
     def imsave(images, size, path):
         return scipy.misc.imsave(path, merge(images, size))
 
-    assert len(images) <= size[0] * size[1], "number of images should be equal or less than size[0] * size[1] {}".format(len(images))
+    if len(images) > size[0] * size[1]:
+        raise AssertionError("number of images should be equal or less than size[0] * size[1] {}".format(len(images)))
+
     return imsave(images, size, image_path)
 
 
-def draw_boxes_and_labels_to_image(image, classes, coords, scores, classes_list, is_center=True, is_rescale=True, save_name=None):
+def draw_boxes_and_labels_to_image(
+        image, classes, coords, scores, classes_list, is_center=True, is_rescale=True, save_name=None
+):
     """Draw bboxes and class labels on image. Return or save the image with bboxes, example in the docs of ``tl.prepro``.
 
     Parameters
@@ -177,11 +181,13 @@ def draw_boxes_and_labels_to_image(image, classes, coords, scores, classes_list,
     - `scikit-image <http://scikit-image.org/docs/dev/api/skimage.draw.html#skimage.draw.rectangle>`__.
 
     """
-    assert len(coords) == len(classes), "number of coordinates and classes are equal"
-    if len(scores) > 0:
-        assert len(scores) == len(classes), "number of scores and classes are equal"
+    if len(coords) != len(classes):
+        raise AssertionError("number of coordinates and classes are equal")
 
-    import cv2
+    if len(scores) > 0 and len(scores) != len(classes):
+        raise AssertionError("number of scores and classes are equal")
+
+    import cv2  # TODO: OpenCV is not in the requirements.
 
     # don't change the original image, and avoid error https://stackoverflow.com/questions/30249053/python-opencv-drawing-errors-after-manipulating-array-with-numpy
     image = image.copy()
@@ -203,7 +209,8 @@ def draw_boxes_and_labels_to_image(image, classes, coords, scores, classes_list,
             (int(x), int(y)),
             (int(x2), int(y2)),  # up-left and botton-right
             [0, 255, 0],
-            thick)
+            thick
+        )
 
         cv2.putText(
             image,
@@ -212,7 +219,8 @@ def draw_boxes_and_labels_to_image(image, classes, coords, scores, classes_list,
             0,
             1.5e-3 * imh,  # bigger = larger font
             [0, 0, 256],  # self.meta['colors'][max_indx],
-            int(thick / 2) + 1)  # bold
+            int(thick / 2) + 1
+        )  # bold
 
     if save_name is not None:
         # cv2.imwrite('_my.png', image)
@@ -313,7 +321,8 @@ def draw_mpii_pose_to_image(image, poses, save_name='image.png'):
                     (int(joint_pos[start][0]), int(joint_pos[start][1])),
                     (int(joint_pos[end][0]), int(joint_pos[end][1])),  # up-left and botton-right
                     line[1],
-                    thick)
+                    thick
+                )
                 # rr, cc, val = skimage.draw.line_aa(int(joint_pos[start][1]), int(joint_pos[start][0]), int(joint_pos[end][1]), int(joint_pos[end][0]))
                 # image[rr, cc] = line[1]
         # draw circles
@@ -332,7 +341,8 @@ def draw_mpii_pose_to_image(image, poses, save_name='image.png'):
                 (int(head_rect[0]), int(head_rect[1])),
                 (int(head_rect[2]), int(head_rect[3])),  # up-left and botton-right
                 [0, 180, 0],
-                thick)
+                thick
+            )
 
     if save_name is not None:
         # cv2.imwrite(save_name, image)
@@ -434,7 +444,9 @@ def CNN2d(CNN=None, second=10, saveable=True, name='cnn', fig_idx=3119362):
             if n_color == 1:
                 plt.imshow(np.reshape(CNN[:, :, :, count - 1], (n_row, n_col)), cmap='gray', interpolation="nearest")
             elif n_color == 3:
-                plt.imshow(np.reshape(CNN[:, :, :, count - 1], (n_row, n_col, n_color)), cmap='gray', interpolation="nearest")
+                plt.imshow(
+                    np.reshape(CNN[:, :, :, count - 1], (n_row, n_col, n_color)), cmap='gray', interpolation="nearest"
+                )
             else:
                 raise Exception("Unknown n_color")
             plt.gca().xaxis.set_major_locator(plt.NullLocator())  # distable tick
@@ -543,15 +555,21 @@ def tsne_embedding(embeddings, reverse_dictionary, plot_only=500, second=5, save
     import matplotlib.pyplot as plt
 
     def plot_with_labels(low_dim_embs, labels, figsize=(18, 18), second=5, saveable=True, name='tsne', fig_idx=9862):
-        assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings"
+
+        if low_dim_embs.shape[0] < len(labels):
+            raise AssertionError("More labels than embeddings")
+
         if saveable is False:
             plt.ion()
             plt.figure(fig_idx)
+
         plt.figure(figsize=figsize)  #in inches
+
         for i, label in enumerate(labels):
             x, y = low_dim_embs[i, :]
             plt.scatter(x, y)
             plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom')
+
         if saveable:
             plt.savefig(name + '.pdf', format='pdf')
         else:
@@ -623,7 +641,8 @@ def draw_weights(W=None, second=10, saveable=True, shape=None, name='mnist', fig
             #     feature = np.zeros_like(feature)
             # if np.mean(feature) < -0.015:      # condition threshold
             #     feature = np.zeros_like(feature)
-            plt.imshow(np.reshape(feature, (shape[0], shape[1])), cmap='gray', interpolation="nearest")  #, vmin=np.min(feature), vmax=np.max(feature))
+            plt.imshow(np.reshape(feature, (shape[0], shape[1])), cmap='gray',
+                       interpolation="nearest")  #, vmin=np.min(feature), vmax=np.max(feature))
             # plt.title(name)
             # ------------------------------------------------------------
             # plt.imshow(np.reshape(W[:,count-1] ,(np.sqrt(size),np.sqrt(size))), cmap='gray', interpolation="nearest")
diff --git a/tests/requirements.txt b/tests/requirements.txt
index 30ea1b241..43144cf40 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1,4 +1,5 @@
 keras>=2.1,<2.2
+pycodestyle<2.4.0,>=2.0.0
 pydocstyle>=2.1,<2.2
 pytest>=3.4,<3.5
 pytest-cache>=1.0,<1.1
diff --git a/tests/test_layers_basic.py b/tests/test_layers_basic.py
index c42952744..46fc9a434 100644
--- a/tests/test_layers_basic.py
+++ b/tests/test_layers_basic.py
@@ -7,6 +7,7 @@
 
 
 class Layer_Basic_Test(unittest.TestCase):
+
     @classmethod
     def setUpClass(cls):
 
diff --git a/tests/test_layers_convolution.py b/tests/test_layers_convolution.py
index d408ec101..e49c2b046 100644
--- a/tests/test_layers_convolution.py
+++ b/tests/test_layers_convolution.py
@@ -7,6 +7,7 @@
 
 
 class Layer_Convolution_Test(unittest.TestCase):
+
     @classmethod
     def setUpClass(cls):
         ############
@@ -30,38 +31,42 @@ def setUpClass(cls):
         nin2 = tl.layers.InputLayer(x2, name='in2')
 
         n3 = tl.layers.Conv2dLayer(
-            nin2,
-            act=tf.nn.relu,
-            shape=(5, 5, 3, 32),
-            strides=(1, 2, 2, 1),
-            padding='SAME',
-            W_init=tf.truncated_normal_initializer(stddev=5e-2),
-            b_init=tf.constant_initializer(value=0.0),
-            name='conv2dlayer')
+            nin2, act=tf.nn.relu, shape=(5, 5, 3, 32), strides=(1, 2, 2, 1), padding='SAME',
+            W_init=tf.truncated_normal_initializer(stddev=5e-2), b_init=tf.constant_initializer(value=0.0),
+            name='conv2dlayer'
+        )
         cls.shape_n3 = n3.outputs.get_shape().as_list()
 
         n4 = tl.layers.Conv2d(nin2, n_filter=32, filter_size=(3, 3), strides=(2, 2), act=None, name='conv2d')
         cls.shape_n4 = n4.outputs.get_shape().as_list()
         cls.n4_params = n4.all_params
 
-        n5 = tl.layers.Conv2d(nin2, n_filter=32, filter_size=(3, 3), strides=(2, 2), act=tf.nn.relu, b_init=None, name='conv2d_no_bias')
+        n5 = tl.layers.Conv2d(
+            nin2, n_filter=32, filter_size=(3, 3), strides=(2, 2), act=tf.nn.relu, b_init=None, name='conv2d_no_bias'
+        )
         cls.shape_n5 = n5.outputs.get_shape().as_list()
         cls.n5_params = n5.all_params
 
-        n6 = tl.layers.DeConv2dLayer(nin2, shape=(5, 5, 32, 3), output_shape=(100, 200, 200, 32), strides=(1, 2, 2, 1), name='deconv2dlayer')
+        n6 = tl.layers.DeConv2dLayer(
+            nin2, shape=(5, 5, 32, 3), output_shape=(100, 200, 200, 32), strides=(1, 2, 2, 1), name='deconv2dlayer'
+        )
         cls.shape_n6 = n6.outputs.get_shape().as_list()
 
         n7 = tl.layers.DeConv2d(nin2, n_filter=32, filter_size=(3, 3), strides=(2, 2), name='DeConv2d')
         cls.shape_n7 = n7.outputs.get_shape().as_list()
 
-        n8 = tl.layers.DepthwiseConv2d(nin2, shape=(3, 3), strides=(2, 2), act=tf.nn.relu, depth_multiplier=2, name='depthwise')
+        n8 = tl.layers.DepthwiseConv2d(
+            nin2, shape=(3, 3), strides=(2, 2), act=tf.nn.relu, depth_multiplier=2, name='depthwise'
+        )
         cls.shape_n8 = n8.outputs.get_shape().as_list()
 
         n9 = tl.layers.Conv2d(nin2, n_filter=32, filter_size=(3, 3), strides=(2, 2), act=tf.nn.relu, name='conv2d2')
         n9 = tl.layers.GroupConv2d(n9, n_filter=32, filter_size=(3, 3), strides=(2, 2), name='group')
         cls.shape_n9 = n9.outputs.get_shape().as_list()
 
-        n10 = tl.layers.SeparableConv2d(nin2, n_filter=32, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, name='seperable1')
+        n10 = tl.layers.SeparableConv2d(
+            nin2, n_filter=32, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, name='seperable1'
+        )
         cls.shape_n10 = n10.outputs.get_shape().as_list()
         cls.n10_all_layers = n10.all_layers
         cls.n10_params = n10.all_params
diff --git a/tests/test_layers_core.py b/tests/test_layers_core.py
index c83d14b4f..fb0cfb956 100644
--- a/tests/test_layers_core.py
+++ b/tests/test_layers_core.py
@@ -7,6 +7,7 @@
 
 
 class Layer_Core_Test(unittest.TestCase):
+
     @classmethod
     def setUpClass(cls):
 
@@ -44,7 +45,9 @@ def setUpClass(cls):
         train_inputs = tf.placeholder(tf.int32, shape=cls.batch_size)
         train_labels = tf.placeholder(tf.int32, shape=(cls.batch_size, 1))
         net3 = tl.layers.Word2vecEmbeddingInputlayer(
-            inputs=train_inputs, train_labels=train_labels, vocabulary_size=1000, embedding_size=200, num_sampled=64, name='word2vec')
+            inputs=train_inputs, train_labels=train_labels, vocabulary_size=1000, embedding_size=200, num_sampled=64,
+            name='word2vec'
+        )
 
         net3.print_layers()
         net3.print_params(False)
diff --git a/tests/test_layers_extend.py b/tests/test_layers_extend.py
index 240612f87..6b8847744 100644
--- a/tests/test_layers_extend.py
+++ b/tests/test_layers_extend.py
@@ -7,6 +7,7 @@
 
 
 class Layer_Extend_Test(unittest.TestCase):
+
     @classmethod
     def setUpClass(cls):
 
diff --git a/tests/test_layers_flow_control.py b/tests/test_layers_flow_control.py
index 9ff900b1d..2cfce2790 100644
--- a/tests/test_layers_flow_control.py
+++ b/tests/test_layers_flow_control.py
@@ -7,6 +7,7 @@
 
 
 class Layer_Flow_Control_Test(unittest.TestCase):
+
     @classmethod
     def setUpClass(cls):
         x = tf.placeholder(tf.float32, shape=(None, 784), name='x')
diff --git a/tests/test_layers_importer.py b/tests/test_layers_importer.py
index 985a3211b..d998a4cf8 100644
--- a/tests/test_layers_importer.py
+++ b/tests/test_layers_importer.py
@@ -19,6 +19,7 @@
 
 
 class Layer_Importer_Test(CustomTestCase):
+
     @classmethod
     def setUpClass(cls):
 
@@ -41,6 +42,7 @@ def tearDownClass(cls):
         tf.reset_default_graph()
 
     def test_lambda_layer(self):
+
         def keras_block(x):
             x = k.layers.Dropout(0.8)(x)
             x = k.layers.Dense(100, activation='relu')(x)
diff --git a/tests/test_layers_merge.py b/tests/test_layers_merge.py
index 42c19607a..551bf9fc6 100644
--- a/tests/test_layers_merge.py
+++ b/tests/test_layers_merge.py
@@ -7,6 +7,7 @@
 
 
 class Layer_Merge_Test(unittest.TestCase):
+
     @classmethod
     def setUpClass(cls):
 
diff --git a/tests/test_layers_normalization.py b/tests/test_layers_normalization.py
index 4900f211c..79b3148d8 100644
--- a/tests/test_layers_normalization.py
+++ b/tests/test_layers_normalization.py
@@ -19,6 +19,7 @@ def model(x, is_train=True, reuse=False):
 
 
 class Layer_Normalization_Test(unittest.TestCase):
+
     @classmethod
     def setUpClass(cls):
 
diff --git a/tests/test_layers_padding.py b/tests/test_layers_padding.py
index 984e388b7..29a8b0062 100644
--- a/tests/test_layers_padding.py
+++ b/tests/test_layers_padding.py
@@ -7,6 +7,7 @@
 
 
 class Layer_Padding_Test(unittest.TestCase):
+
     @classmethod
     def setUpClass(cls):
 
diff --git a/tests/test_layers_pooling.py b/tests/test_layers_pooling.py
index 2f987e7cc..c6560ea17 100644
--- a/tests/test_layers_pooling.py
+++ b/tests/test_layers_pooling.py
@@ -7,6 +7,7 @@
 
 
 class Layer_Pooling_Test(unittest.TestCase):
+
     @classmethod
     def setUpClass(cls):
 
diff --git a/tests/test_layers_recurrent.py b/tests/test_layers_recurrent.py
index feb19dfd5..4cc13bd73 100644
--- a/tests/test_layers_recurrent.py
+++ b/tests/test_layers_recurrent.py
@@ -7,6 +7,7 @@
 
 
 class Layer_Recurrent_Test(unittest.TestCase):
+
     @classmethod
     def setUpClass(cls):
 
@@ -30,14 +31,22 @@ def setUpClass(cls):
 
         input_data = tf.placeholder(tf.int32, [cls.net1_batch_size, cls.num_steps])
 
-        net1 = tl.layers.EmbeddingInputlayer(inputs=input_data, vocabulary_size=cls.vocab_size, embedding_size=cls.hidden_size, name='embed')
+        net1 = tl.layers.EmbeddingInputlayer(
+            inputs=input_data, vocabulary_size=cls.vocab_size, embedding_size=cls.hidden_size, name='embed'
+        )
         net1 = tl.layers.DropoutLayer(net1, keep=cls.keep_prob, is_fix=True, is_train=cls.is_train, name='drop1')
-        net1 = tl.layers.RNNLayer(net1, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=cls.hidden_size, n_steps=cls.num_steps, return_last=False, name='lstm1')
+        net1 = tl.layers.RNNLayer(
+            net1, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=cls.hidden_size, n_steps=cls.num_steps,
+            return_last=False, name='lstm1'
+        )
 
         # lstm1 = net1
 
         net1 = tl.layers.DropoutLayer(net1, keep=cls.keep_prob, is_fix=True, is_train=cls.is_train, name='drop2')
-        net1 = tl.layers.RNNLayer(net1, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=cls.hidden_size, n_steps=cls.num_steps, return_last=True, name='lstm2')
+        net1 = tl.layers.RNNLayer(
+            net1, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=cls.hidden_size, n_steps=cls.num_steps,
+            return_last=True, name='lstm2'
+        )
 
         # lstm2 = net1
 
@@ -66,7 +75,9 @@ def setUpClass(cls):
         net2 = tl.layers.ReshapeLayer(net2, shape=(-1, cls.num_steps, int(net2.outputs._shape[-1])))
 
         rnn = tl.layers.RNNLayer(
-            net2, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=200, n_steps=cls.num_steps, return_last=False, return_seq_2d=True, name='rnn')
+            net2, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=200, n_steps=cls.num_steps, return_last=False,
+            return_seq_2d=True, name='rnn'
+        )
 
         net2 = tl.layers.DenseLayer(rnn, n_units=3, name='out')
 
@@ -82,9 +93,13 @@ def setUpClass(cls):
 
         x3 = tf.placeholder(tf.int32, [cls.net3_batch_size, cls.num_steps])
 
-        net3 = tl.layers.EmbeddingInputlayer(inputs=x3, vocabulary_size=cls.vocab_size, embedding_size=cls.hidden_size, name='emb')
+        net3 = tl.layers.EmbeddingInputlayer(
+            inputs=x3, vocabulary_size=cls.vocab_size, embedding_size=cls.hidden_size, name='emb'
+        )
         net3 = tl.layers.BiRNNLayer(
-            net3, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=cls.hidden_size, n_steps=cls.num_steps, return_last=False, return_seq_2d=False, name='birnn')
+            net3, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=cls.hidden_size, n_steps=cls.num_steps,
+            return_last=False, return_seq_2d=False, name='birnn'
+        )
 
         net3.print_layers()
         net3.print_params(False)
@@ -95,16 +110,13 @@ def setUpClass(cls):
         cls.net3_n_params = net3.count_params()
 
         # n_layer=2
-        net4 = tl.layers.EmbeddingInputlayer(inputs=x3, vocabulary_size=cls.vocab_size, embedding_size=cls.hidden_size, name='emb2')
+        net4 = tl.layers.EmbeddingInputlayer(
+            inputs=x3, vocabulary_size=cls.vocab_size, embedding_size=cls.hidden_size, name='emb2'
+        )
         net4 = tl.layers.BiRNNLayer(
-            net4,
-            cell_fn=tf.contrib.rnn.BasicLSTMCell,
-            n_hidden=cls.hidden_size,
-            n_steps=cls.num_steps,
-            n_layer=2,
-            return_last=False,
-            return_seq_2d=False,
-            name='birnn2')
+            net4, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=cls.hidden_size, n_steps=cls.num_steps, n_layer=2,
+            return_last=False, return_seq_2d=False, name='birnn2'
+        )
 
         net4.print_layers()
         net4.print_params(False)
@@ -134,17 +146,16 @@ def setUpClass(cls):
         # =============================== Dynamic Synced input and output ===============================
 
         input_seqs = tf.placeholder(dtype=tf.int64, shape=[cls.net5_batch_size, None], name="input")
-        nin = tl.layers.EmbeddingInputlayer(inputs=input_seqs, vocabulary_size=cls.vocab_size, embedding_size=cls.embedding_size, name='seq_embedding')
+        nin = tl.layers.EmbeddingInputlayer(
+            inputs=input_seqs, vocabulary_size=cls.vocab_size, embedding_size=cls.embedding_size, name='seq_embedding'
+        )
 
         rnn = tl.layers.DynamicRNNLayer(
-            nin,
-            cell_fn=tf.contrib.rnn.BasicLSTMCell,
-            n_hidden=cls.embedding_size,
-            dropout=(cls.keep_prob if cls.is_train else None),
-            sequence_length=tl.layers.retrieve_seq_length_op2(input_seqs),
-            return_last=False,
-            return_seq_2d=True,
-            name='dynamicrnn')
+            nin, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=cls.embedding_size,
+            dropout=(cls.keep_prob
+                     if cls.is_train else None), sequence_length=tl.layers.retrieve_seq_length_op2(input_seqs),
+            return_last=False, return_seq_2d=True, name='dynamicrnn'
+        )
 
         net5 = tl.layers.DenseLayer(rnn, n_units=cls.vocab_size, name="o")
 
@@ -158,30 +169,23 @@ def setUpClass(cls):
         cls.net5_n_params = net5.count_params()
 
         # n_layer=3
-        nin = tl.layers.EmbeddingInputlayer(inputs=input_seqs, vocabulary_size=cls.vocab_size, embedding_size=cls.embedding_size, name='seq_embedding2')
+        nin = tl.layers.EmbeddingInputlayer(
+            inputs=input_seqs, vocabulary_size=cls.vocab_size, embedding_size=cls.embedding_size, name='seq_embedding2'
+        )
         rnn = tl.layers.DynamicRNNLayer(
-            nin,
-            cell_fn=tf.contrib.rnn.BasicLSTMCell,
-            n_hidden=cls.embedding_size,
-            dropout=(cls.keep_prob if cls.is_train else None),
-            sequence_length=tl.layers.retrieve_seq_length_op2(input_seqs),
-            n_layer=3,
-            return_last=False,
-            return_seq_2d=True,
-            name='dynamicrnn2')
+            nin, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=cls.embedding_size,
+            dropout=(cls.keep_prob
+                     if cls.is_train else None), sequence_length=tl.layers.retrieve_seq_length_op2(input_seqs),
+            n_layer=3, return_last=False, return_seq_2d=True, name='dynamicrnn2'
+        )
 
         # net6 = tl.layers.DenseLayer(rnn, n_units=cls.vocab_size, name="o2")
 
         net6 = tl.layers.DynamicRNNLayer(
-            nin,
-            cell_fn=tf.contrib.rnn.BasicLSTMCell,
-            n_hidden=cls.embedding_size,
-            dropout=None,
-            sequence_length=tl.layers.retrieve_seq_length_op2(input_seqs),
-            n_layer=3,
-            return_last=False,
-            return_seq_2d=False,
-            name='dynamicrnn3')
+            nin, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=cls.embedding_size, dropout=None,
+            sequence_length=tl.layers.retrieve_seq_length_op2(input_seqs), n_layer=3, return_last=False,
+            return_seq_2d=False, name='dynamicrnn3'
+        )
 
         # net6 = tl.layers.DenseLayer(rnn, n_units=vocab_size, name="o3")
 
@@ -192,15 +196,10 @@ def setUpClass(cls):
         cls.net6_rnn_shape = rnn.outputs.get_shape().as_list()
 
         net7 = tl.layers.DynamicRNNLayer(
-            nin,
-            cell_fn=tf.contrib.rnn.BasicLSTMCell,
-            n_hidden=cls.embedding_size,
-            dropout=None,
-            sequence_length=tl.layers.retrieve_seq_length_op2(input_seqs),
-            n_layer=1,
-            return_last=True,
-            return_seq_2d=False,
-            name='dynamicrnn4')
+            nin, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=cls.embedding_size, dropout=None,
+            sequence_length=tl.layers.retrieve_seq_length_op2(input_seqs), n_layer=1, return_last=True,
+            return_seq_2d=False, name='dynamicrnn4'
+        )
 
         net7.print_layers()
         net7.print_params(False)
@@ -208,15 +207,10 @@ def setUpClass(cls):
         cls.net7_shape = net7.outputs.get_shape().as_list()
 
         net8 = tl.layers.DynamicRNNLayer(
-            nin,
-            cell_fn=tf.contrib.rnn.BasicLSTMCell,
-            n_hidden=cls.embedding_size,
-            dropout=None,
-            sequence_length=tl.layers.retrieve_seq_length_op2(input_seqs),
-            n_layer=1,
-            return_last=True,
-            return_seq_2d=True,
-            name='dynamicrnn5')
+            nin, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=cls.embedding_size, dropout=None,
+            sequence_length=tl.layers.retrieve_seq_length_op2(input_seqs), n_layer=1, return_last=True,
+            return_seq_2d=True, name='dynamicrnn5'
+        )
 
         net8.print_layers()
         net8.print_params(False)
@@ -226,14 +220,11 @@ def setUpClass(cls):
         # =============================== BiDynamic Synced input and output ===============================
 
         rnn = tl.layers.BiDynamicRNNLayer(
-            nin,
-            cell_fn=tf.contrib.rnn.BasicLSTMCell,
-            n_hidden=cls.embedding_size,
-            dropout=(cls.keep_prob if cls.is_train else None),
-            sequence_length=tl.layers.retrieve_seq_length_op2(input_seqs),
-            return_last=False,
-            return_seq_2d=True,
-            name='bidynamicrnn')
+            nin, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=cls.embedding_size,
+            dropout=(cls.keep_prob
+                     if cls.is_train else None), sequence_length=tl.layers.retrieve_seq_length_op2(input_seqs),
+            return_last=False, return_seq_2d=True, name='bidynamicrnn'
+        )
 
         net9 = tl.layers.DenseLayer(rnn, n_units=cls.vocab_size, name="o4")
 
@@ -248,15 +239,11 @@ def setUpClass(cls):
 
         # n_layer=2
         rnn = tl.layers.BiDynamicRNNLayer(
-            nin,
-            cell_fn=tf.contrib.rnn.BasicLSTMCell,
-            n_hidden=cls.embedding_size,
-            dropout=(cls.keep_prob if cls.is_train else None),
-            sequence_length=tl.layers.retrieve_seq_length_op2(input_seqs),
-            n_layer=2,
-            return_last=False,
-            return_seq_2d=True,
-            name='bidynamicrnn2')
+            nin, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=cls.embedding_size,
+            dropout=(cls.keep_prob
+                     if cls.is_train else None), sequence_length=tl.layers.retrieve_seq_length_op2(input_seqs),
+            n_layer=2, return_last=False, return_seq_2d=True, name='bidynamicrnn2'
+        )
 
         net10 = tl.layers.DenseLayer(rnn, n_units=cls.vocab_size, name="o5")
 
@@ -281,24 +268,22 @@ def setUpClass(cls):
             # for translation, you may want to use 2 seperated embedding layers
 
             with tf.variable_scope("embedding") as vs:
-                net_encode = tl.layers.EmbeddingInputlayer(inputs=encode_seqs, vocabulary_size=10000, embedding_size=200, name='seq_embed')
+                net_encode = tl.layers.EmbeddingInputlayer(
+                    inputs=encode_seqs, vocabulary_size=10000, embedding_size=200, name='seq_embed'
+                )
                 vs.reuse_variables()
                 # tl.layers.set_name_reuse(True)
-                net_decode = tl.layers.EmbeddingInputlayer(inputs=decode_seqs, vocabulary_size=10000, embedding_size=200, name='seq_embed')
+                net_decode = tl.layers.EmbeddingInputlayer(
+                    inputs=decode_seqs, vocabulary_size=10000, embedding_size=200, name='seq_embed'
+                )
 
             net11 = tl.layers.Seq2Seq(
-                net_encode,
-                net_decode,
-                cell_fn=tf.contrib.rnn.BasicLSTMCell,
-                n_hidden=200,
+                net_encode, net_decode, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=200,
                 initializer=tf.random_uniform_initializer(-0.1, 0.1),
                 encode_sequence_length=tl.layers.retrieve_seq_length_op2(encode_seqs),
-                decode_sequence_length=tl.layers.retrieve_seq_length_op2(decode_seqs),
-                initial_state_encode=None,
-                dropout=None,
-                n_layer=2,
-                return_seq_2d=True,
-                name='Seq2seq')
+                decode_sequence_length=tl.layers.retrieve_seq_length_op2(decode_seqs), initial_state_encode=None,
+                dropout=None, n_layer=2, return_seq_2d=True, name='Seq2seq'
+            )
 
         net11 = tl.layers.DenseLayer(net11, n_units=10000, act=tf.identity, name='oo')
 
diff --git a/tests/test_layers_shape.py b/tests/test_layers_shape.py
index 22eb87952..501946397 100644
--- a/tests/test_layers_shape.py
+++ b/tests/test_layers_shape.py
@@ -7,6 +7,7 @@
 
 
 class Layer_Shape_Test(unittest.TestCase):
+
     @classmethod
     def setUpClass(cls):
 
diff --git a/tests/test_layers_spatial_transformer.py b/tests/test_layers_spatial_transformer.py
index 0976d0f49..b415b5a04 100644
--- a/tests/test_layers_spatial_transformer.py
+++ b/tests/test_layers_spatial_transformer.py
@@ -26,8 +26,12 @@ def model(x, is_train, reuse):
         n = tl.layers.SpatialTransformer2dAffineLayer(nin, theta_layer=nt, out_size=[40, 40], name='spatial')
         s = n
         ## 3. Classifier
-        n = tl.layers.Conv2d(n, n_filter=16, filter_size=(3, 3), strides=(2, 2), act=tf.nn.relu, padding='SAME', name='conv1')
-        n = tl.layers.Conv2d(n, n_filter=16, filter_size=(3, 3), strides=(2, 2), act=tf.nn.relu, padding='SAME', name='conv2')
+        n = tl.layers.Conv2d(
+            n, n_filter=16, filter_size=(3, 3), strides=(2, 2), act=tf.nn.relu, padding='SAME', name='conv1'
+        )
+        n = tl.layers.Conv2d(
+            n, n_filter=16, filter_size=(3, 3), strides=(2, 2), act=tf.nn.relu, padding='SAME', name='conv2'
+        )
         n = tl.layers.FlattenLayer(n, name='flatten2')
         n = tl.layers.DenseLayer(n, n_units=1024, act=tf.nn.relu, name='out1')
         n = tl.layers.DenseLayer(n, n_units=10, act=tf.identity, name='out2')
@@ -35,6 +39,7 @@ def model(x, is_train, reuse):
 
 
 class Layer_Spatial_Transformer_Test(CustomTestCase):
+
     @classmethod
     def setUpClass(cls):
         cls.x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
diff --git a/tests/test_layers_special_activation.py b/tests/test_layers_special_activation.py
index 9a087c96c..7527dfb3e 100644
--- a/tests/test_layers_special_activation.py
+++ b/tests/test_layers_special_activation.py
@@ -7,6 +7,7 @@
 
 
 class Layer_Special_Activation_Test(unittest.TestCase):
+
     @classmethod
     def setUpClass(cls):
 
diff --git a/tests/test_layers_stack.py b/tests/test_layers_stack.py
index fe10113c1..5ff8cb784 100644
--- a/tests/test_layers_stack.py
+++ b/tests/test_layers_stack.py
@@ -7,6 +7,7 @@
 
 
 class Layer_Stack_Test(unittest.TestCase):
+
     @classmethod
     def setUpClass(cls):
 
diff --git a/tests/test_layers_super_resolution.py b/tests/test_layers_super_resolution.py
index b2427195d..71d2faba8 100644
--- a/tests/test_layers_super_resolution.py
+++ b/tests/test_layers_super_resolution.py
@@ -7,6 +7,7 @@
 
 
 class Layer_Super_Resolution_Test(unittest.TestCase):
+
     @classmethod
     def setUpClass(cls):
         t_signal = tf.placeholder('float32', [10, 100, 4], name='x')
diff --git a/tests/test_layers_time_distributed.py b/tests/test_layers_time_distributed.py
index bac25201a..d2618c9ac 100644
--- a/tests/test_layers_time_distributed.py
+++ b/tests/test_layers_time_distributed.py
@@ -14,11 +14,17 @@
 def model(x, is_train=True, reuse=False, name_scope="env1"):
     with tf.variable_scope(name_scope, reuse=reuse):
         net = tl.layers.InputLayer(x, name='input')
-        net = tl.layers.TimeDistributedLayer(net, layer_class=tl.layers.DenseLayer, args={'n_units': 50, 'name': 'dense'}, name='time_dense')
+        net = tl.layers.TimeDistributedLayer(
+            net, layer_class=tl.layers.DenseLayer, args={
+                'n_units': 50,
+                'name': 'dense'
+            }, name='time_dense'
+        )
     return net
 
 
 class Layer_Time_Distributed_Test(CustomTestCase):
+
     @classmethod
     def setUpClass(cls):
 
diff --git a/tests/test_mnist_simple.py b/tests/test_mnist_simple.py
index 5829d0017..eec717f64 100644
--- a/tests/test_mnist_simple.py
+++ b/tests/test_mnist_simple.py
@@ -12,6 +12,7 @@
 
 
 class Simple_MNIST_Test(CustomTestCase):
+
     @classmethod
     def setUpClass(cls):
 
@@ -69,24 +70,14 @@ def test_reuse_vgg(self):
 
                 # train the network
                 tl.utils.fit(
-                    sess,
-                    self.network,
-                    self.train_op,
-                    self.cost,
-                    X_train,
-                    y_train,
-                    self.x,
-                    self.y_,
-                    acc=self.acc,
-                    batch_size=500,
-                    n_epoch=1,
-                    print_freq=1,
-                    X_val=X_val,
-                    y_val=y_val,
-                    eval_train=False)
+                    sess, self.network, self.train_op, self.cost, X_train, y_train, self.x, self.y_, acc=self.acc,
+                    batch_size=500, n_epoch=1, print_freq=1, X_val=X_val, y_val=y_val, eval_train=False
+                )
 
                 # evaluation
-                tl.utils.test(sess, self.network, self.acc, X_test, y_test, self.x, self.y_, batch_size=None, cost=self.cost)
+                tl.utils.test(
+                    sess, self.network, self.acc, X_test, y_test, self.x, self.y_, batch_size=None, cost=self.cost
+                )
 
                 # save the network to .npz file
                 tl.files.save_npz(self.network.all_params, name='model.npz')
diff --git a/tests/test_models.py b/tests/test_models.py
index 206e93807..a6dbb3a34 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -12,6 +12,7 @@
 
 
 class VGG_Model_Test(CustomTestCase):
+
     @classmethod
     def setUpClass(cls):
 
diff --git a/tests/test_pydocstyle.py b/tests/test_pydocstyle.py
index 0d6f0059b..bd2d6efe9 100644
--- a/tests/test_pydocstyle.py
+++ b/tests/test_pydocstyle.py
@@ -22,6 +22,7 @@ def lookup_error_params(code):
 
 
 class PyDOC_Style_Test(unittest.TestCase):
+
     @classmethod
     def setUpClass(cls):
 
diff --git a/tests/test_reuse_mlp.py b/tests/test_reuse_mlp.py
index bb346eed8..7a2d9eab2 100644
--- a/tests/test_reuse_mlp.py
+++ b/tests/test_reuse_mlp.py
@@ -21,6 +21,7 @@ def mlp(x, is_train=True, reuse=False):
 
 
 class MLP_Reuse_Test(unittest.TestCase):
+
     @classmethod
     def setUpClass(cls):
 
diff --git a/tests/test_yapf_format.py b/tests/test_yapf_format.py
index c2569ca51..1eb6cc497 100644
--- a/tests/test_yapf_format.py
+++ b/tests/test_yapf_format.py
@@ -6,8 +6,10 @@
 
 try:
     import tests.testing as testing
+    from tests.unittests_helper import CustomTestCase
 except ImportError:
     import testing
+    from unittests_helper import CustomTestCase
 
 from yapf.yapflib.yapf_api import FormatCode
 
@@ -21,32 +23,42 @@ def _read_utf_8_file(filename):
             return f.read()
 
 
-class YAPF_Style_Test(unittest.TestCase):
+class YAPF_Style_Test(CustomTestCase):
+
     @classmethod
     def setUpClass(cls):
 
-        cls.files_badly_formated = list()
+        cls.badly_formatted_files = list()
+        cls.files_2_test = testing.list_all_py_files()
+
+    def test_files_format(self):
 
-        for filename in testing.list_all_py_files():
+        for file in testing.list_all_py_files():
 
-            print(filename)
-            code = _read_utf_8_file(filename)
+            print(file)
+            code = _read_utf_8_file(file)
 
             # https://pypi.python.org/pypi/yapf/0.20.2#example-as-a-module
-            diff, changed = FormatCode(code, filename=filename, style_config='.style.yapf', print_diff=True)
+            diff, changed = FormatCode(code, filename=file, style_config='.style.yapf', print_diff=True)
 
             if changed:
                 print(diff)
-                cls.files_badly_formated.append(filename)
+                self.badly_formatted_files.append(file)
+
+        with self.assertNotRaises(Exception):
+
+            str_err = ""
 
-    def test_unformated_files(self):
-        if self.files_badly_formated:
-            print()
+            if self.badly_formatted_files:
+                for filename in self.badly_formatted_files:
+                    str_err += 'yapf -i --style=.style.yapf %s\n' % filename
 
-            for filename in self.files_badly_formated:
-                print('yapf -i %s' % filename)
+                str_err = "\n======================================================================================\n" \
+                          "Bad Coding Style: %d file(s) need to be formatted, run the following commands to fix: \n%s" \
+                          "======================================================================================" % (
+                    len(self.badly_formatted_files), str_err)
 
-            raise Exception("Bad Coding Style: %d files need to be formatted, run the following commands to fix" % len(self.files_badly_formated))
+                raise Exception(str_err)
 
 
 if __name__ == '__main__':
diff --git a/tests/unittests_helper.py b/tests/unittests_helper.py
index b4137c9b0..71e49ffdf 100644
--- a/tests/unittests_helper.py
+++ b/tests/unittests_helper.py
@@ -5,6 +5,7 @@
 
 
 class CustomTestCase(unittest.TestCase):
+
     @contextmanager
     def assertNotRaises(self, exc_type):
         try: