From f2f87b0466fbe34e519dfa88536fd550fb0229b1 Mon Sep 17 00:00:00 2001
From: quantumiracle <1402434478@qq.com>
Date: Mon, 13 May 2019 19:04:13 +0100
Subject: [PATCH 1/4] modify RL examples to TF2 TL2

---
 .../tutorial_cifar10_cnn_static.py            |   2 +-
 .../tutorial_mnist_mlp_dynamic.py             |   2 +-
 .../tutorial_mnist_mlp_dynamic_2.py           |   2 +-
 .../tutorial_mnist_mlp_static.py              |   2 +-
 .../tutorial_mnist_mlp_static_2.py            |   2 +-
 .../basic_tutorials/tutorial_mnist_siamese.py |   2 +-
 .../basic_tutorials/tutorial_mnist_simple.py  |   3 +-
 .../tutorial_fast_affine_transform.py         |   4 +-
 .../data_process/tutorial_tf_dataset_voc.py   |   2 +-
 examples/data_process/tutorial_tfrecord.py    |   3 +-
 examples/data_process/tutorial_tfrecord2.py   |   2 +-
 examples/data_process/tutorial_tfrecord3.py   |   2 +-
 examples/database/dispatch_tasks.py           |   1 -
 examples/database/task_script.py              |   1 -
 ...torial_imagenet_inceptionV3_distributed.py |   4 +-
 .../tutorial_mnist_distributed.py             |   1 -
 .../tutorial_cifar10_distributed_trainer.py   |   2 +-
 .../tutorial_mnist_distributed_trainer.py     |   2 +-
 examples/keras_tfslim/tutorial_keras.py       |   2 +-
 .../tutorial_models_mobilenetv1.py            |   2 +-
 .../tutorial_models_squeezenetv1.py           |   2 +-
 .../pretrained_cnn/tutorial_models_vgg16.py   |   2 +-
 .../pretrained_cnn/tutorial_models_vgg19.py   |   2 +-
 .../tutorial_models_vgg_static.py             |   2 +-
 .../tutorial_binarynet_cifar10_tfrecord.py    |   1 -
 .../tutorial_binarynet_mnist_cnn.py           |   1 -
 .../tutorial_dorefanet_cifar10_tfrecord.py    |   1 -
 .../tutorial_dorefanet_mnist_cnn.py           |   1 -
 .../tutorial_quanconv_cifar10.py              |   2 +-
 .../quantized_net/tutorial_quanconv_mnist.py  |   1 -
 ...tutorial_ternaryweight_cifar10_tfrecord.py |   1 -
 .../tutorial_ternaryweight_mnist_cnn.py       |   1 -
 .../tutorial_atari_pong.py                    |  14 +-
 ...ial_bipedalwalker_a3c_continuous_action.py | 390 ++++++++++++------
 .../tutorial_cartpole_ac.py                   |  31 +-
 .../tutorial_frozenlake_dqn.py                |  16 +-
 ...ial_spatial_transformer_network_dynamic.py |   2 +
 ...rial_spatial_transformer_network_static.py |   2 +
 .../tutorial_imdb_fasttext.py                 |   2 +-
 .../text_generation/tutorial_generate_text.py |   2 +-
 examples/text_ptb/tutorial_ptb_lstm.py        |   2 +-
 .../tutorial_ptb_lstm_state_is_tuple.py       |   2 +-
 .../tutorial_word2vec_basic.py                |   2 +-
 examples/tutorial_work_with_onnx.py           |   4 +-
 tensorlayer/activation.py                     |   1 -
 tensorlayer/cost.py                           |   3 +-
 tensorlayer/db.py                             |  13 +-
 tensorlayer/distributed.py                    |   1 -
 .../files/dataset_loaders/celebA_dataset.py   |   3 +-
 .../files/dataset_loaders/cyclegan_dataset.py |   3 +-
 .../dataset_loaders/flickr_1M_dataset.py      |   6 +-
 .../dataset_loaders/flickr_25k_dataset.py     |   6 +-
 .../files/dataset_loaders/mpii_dataset.py     |   3 +-
 .../files/dataset_loaders/voc_dataset.py      |   5 +-
 .../dataset_loaders/wmt_en_fr_dataset.py      |   1 -
 tensorlayer/files/utils.py                    |  18 +-
 tensorlayer/initializers.py                   |   1 +
 tensorlayer/layers/activation.py              |   1 -
 tensorlayer/layers/convolution/binary_conv.py |   1 -
 .../layers/convolution/deformable_conv.py     |   1 -
 .../layers/convolution/depthwise_conv.py      |   1 -
 tensorlayer/layers/convolution/dorefa_conv.py |   1 -
 tensorlayer/layers/convolution/expert_conv.py |   1 -
 .../layers/convolution/expert_deconv.py       |   1 -
 tensorlayer/layers/convolution/group_conv.py  |   1 -
 tensorlayer/layers/convolution/quan_conv.py   |   4 +-
 .../layers/convolution/quan_conv_bn.py        |   4 +-
 .../layers/convolution/separable_conv.py      |   2 +-
 .../layers/convolution/simplified_conv.py     |   1 -
 .../layers/convolution/simplified_deconv.py   |   2 +-
 .../layers/convolution/super_resolution.py    |   1 -
 .../layers/convolution/ternary_conv.py        |   1 -
 tensorlayer/layers/core.py                    |   6 +-
 tensorlayer/layers/dense/base_dense.py        |   2 +-
 tensorlayer/layers/dense/binary_dense.py      |   1 -
 tensorlayer/layers/dense/dorefa_dense.py      |   1 -
 tensorlayer/layers/dense/dropconnect.py       |   4 +-
 tensorlayer/layers/dense/quan_dense.py        |   4 +-
 tensorlayer/layers/dense/quan_dense_bn.py     |   4 +-
 tensorlayer/layers/dense/ternary_dense.py     |   1 -
 tensorlayer/layers/dropout.py                 |   1 -
 tensorlayer/layers/embedding.py               |   2 +-
 tensorlayer/layers/extend.py                  |   1 -
 tensorlayer/layers/image_resampling.py        |   1 -
 tensorlayer/layers/inputs.py                  |   2 +-
 tensorlayer/layers/lambda_layers.py           |   3 +-
 tensorlayer/layers/merge.py                   |   1 -
 tensorlayer/layers/noise.py                   |   1 -
 tensorlayer/layers/normalization.py           |   3 +-
 tensorlayer/layers/padding.py                 |   1 -
 tensorlayer/layers/pooling.py                 |   1 -
 tensorlayer/layers/quantize.py                |   1 -
 tensorlayer/layers/recurrent.py               |   1 -
 tensorlayer/layers/scale.py                   |   1 -
 tensorlayer/layers/shape.py                   |   1 -
 tensorlayer/layers/spatial_transformer.py     |   4 +-
 tensorlayer/layers/stack.py                   |   1 -
 tensorlayer/layers/utils.py                   |   2 +-
 tensorlayer/logging/contrib/hyperdash.py      |   1 -
 tensorlayer/models/core.py                    |   3 +-
 tensorlayer/models/mobilenetv1.py             |   1 -
 tensorlayer/models/squeezenetv1.py            |   3 +-
 tensorlayer/models/vgg.py                     |   4 +-
 tensorlayer/nlp.py                            |   4 +-
 tensorlayer/rein.py                           |   3 +-
 tensorlayer/utils.py                          |   2 +-
 106 files changed, 389 insertions(+), 299 deletions(-)

diff --git a/examples/basic_tutorials/tutorial_cifar10_cnn_static.py b/examples/basic_tutorials/tutorial_cifar10_cnn_static.py
index c12c791a1..93794c414 100644
--- a/examples/basic_tutorials/tutorial_cifar10_cnn_static.py
+++ b/examples/basic_tutorials/tutorial_cifar10_cnn_static.py
@@ -5,8 +5,8 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer.layers import (BatchNorm, Conv2d, Dense, Flatten, Input,
                                 LocalResponseNorm, MaxPool2d)
diff --git a/examples/basic_tutorials/tutorial_mnist_mlp_dynamic.py b/examples/basic_tutorials/tutorial_mnist_mlp_dynamic.py
index 1ffa7fbe0..13db1abae 100644
--- a/examples/basic_tutorials/tutorial_mnist_mlp_dynamic.py
+++ b/examples/basic_tutorials/tutorial_mnist_mlp_dynamic.py
@@ -1,8 +1,8 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer.layers import Dense, Dropout, Input
 from tensorlayer.models import Model
diff --git a/examples/basic_tutorials/tutorial_mnist_mlp_dynamic_2.py b/examples/basic_tutorials/tutorial_mnist_mlp_dynamic_2.py
index b752012b0..0d94b1dfa 100644
--- a/examples/basic_tutorials/tutorial_mnist_mlp_dynamic_2.py
+++ b/examples/basic_tutorials/tutorial_mnist_mlp_dynamic_2.py
@@ -1,8 +1,8 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer.layers import Dense, Dropout, Input, LayerList
 from tensorlayer.models import Model
diff --git a/examples/basic_tutorials/tutorial_mnist_mlp_static.py b/examples/basic_tutorials/tutorial_mnist_mlp_static.py
index c9c15f911..de811a8d8 100644
--- a/examples/basic_tutorials/tutorial_mnist_mlp_static.py
+++ b/examples/basic_tutorials/tutorial_mnist_mlp_static.py
@@ -1,8 +1,8 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer.layers import Dense, Dropout, Input
 from tensorlayer.models import Model
diff --git a/examples/basic_tutorials/tutorial_mnist_mlp_static_2.py b/examples/basic_tutorials/tutorial_mnist_mlp_static_2.py
index f0836c528..a9a2c7d48 100644
--- a/examples/basic_tutorials/tutorial_mnist_mlp_static_2.py
+++ b/examples/basic_tutorials/tutorial_mnist_mlp_static_2.py
@@ -1,8 +1,8 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer.layers import Dense, Dropout, Input
 from tensorlayer.models import Model
diff --git a/examples/basic_tutorials/tutorial_mnist_siamese.py b/examples/basic_tutorials/tutorial_mnist_siamese.py
index db43f1163..fe4abdc52 100644
--- a/examples/basic_tutorials/tutorial_mnist_siamese.py
+++ b/examples/basic_tutorials/tutorial_mnist_siamese.py
@@ -14,8 +14,8 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer.layers import Dense, Dropout, Flatten, Input
 from tensorlayer.models import Model
diff --git a/examples/basic_tutorials/tutorial_mnist_simple.py b/examples/basic_tutorials/tutorial_mnist_simple.py
index 04e233819..ceaee0c48 100644
--- a/examples/basic_tutorials/tutorial_mnist_simple.py
+++ b/examples/basic_tutorials/tutorial_mnist_simple.py
@@ -1,9 +1,10 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
 
+import numpy as np
+
 import tensorflow as tf
 import tensorlayer as tl
-import numpy as np
 
 tl.logging.set_verbosity(tl.logging.DEBUG)
 
diff --git a/examples/data_process/tutorial_fast_affine_transform.py b/examples/data_process/tutorial_fast_affine_transform.py
index 52452ffd5..71890f5bd 100644
--- a/examples/data_process/tutorial_fast_affine_transform.py
+++ b/examples/data_process/tutorial_fast_affine_transform.py
@@ -8,10 +8,10 @@
 import multiprocessing
 import time
 
-import cv2
 import numpy as np
-import tensorflow as tf
 
+import cv2
+import tensorflow as tf
 import tensorlayer as tl
 
 # tl.logging.set_verbosity(tl.logging.DEBUG)
diff --git a/examples/data_process/tutorial_tf_dataset_voc.py b/examples/data_process/tutorial_tf_dataset_voc.py
index fab1612f7..9779b1f60 100644
--- a/examples/data_process/tutorial_tf_dataset_voc.py
+++ b/examples/data_process/tutorial_tf_dataset_voc.py
@@ -13,8 +13,8 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 
 # tf.logging.set_verbosity(tf.logging.DEBUG)
diff --git a/examples/data_process/tutorial_tfrecord.py b/examples/data_process/tutorial_tfrecord.py
index 4cb832c1d..bcf3fe46a 100644
--- a/examples/data_process/tutorial_tfrecord.py
+++ b/examples/data_process/tutorial_tfrecord.py
@@ -22,9 +22,9 @@
 import os
 
 import numpy as np
-import tensorflow as tf
 from PIL import Image
 
+import tensorflow as tf
 import tensorlayer as tl
 
 ## Save data ==================================================================
@@ -97,4 +97,3 @@ def read_and_decode(filename):
 print("img_batch   : %s" % img_batch.shape)
 print("label_batch : %s" % label_batch.shape)
 tl.visualize.images2d(img_batch, second=1, saveable=False, name='batch', dtype=None, fig_idx=2020121)
-
diff --git a/examples/data_process/tutorial_tfrecord2.py b/examples/data_process/tutorial_tfrecord2.py
index be41b697f..22b3d7757 100755
--- a/examples/data_process/tutorial_tfrecord2.py
+++ b/examples/data_process/tutorial_tfrecord2.py
@@ -14,10 +14,10 @@
 import os
 
 import numpy as np
+
 # import matplotlib
 # matplotlib.use('GTK')
 import tensorflow as tf
-
 import tensorlayer as tl
 
 # Download data, and convert to TFRecord format, see ```tutorial_tfrecord.py```
diff --git a/examples/data_process/tutorial_tfrecord3.py b/examples/data_process/tutorial_tfrecord3.py
index 9e5751a25..bc8752f2a 100644
--- a/examples/data_process/tutorial_tfrecord3.py
+++ b/examples/data_process/tutorial_tfrecord3.py
@@ -19,9 +19,9 @@
 import os
 
 import numpy as np
-import tensorflow as tf
 from PIL import Image
 
+import tensorflow as tf
 import tensorlayer as tl
 
 
diff --git a/examples/database/dispatch_tasks.py b/examples/database/dispatch_tasks.py
index d1204bcd4..260257e77 100644
--- a/examples/database/dispatch_tasks.py
+++ b/examples/database/dispatch_tasks.py
@@ -6,7 +6,6 @@
 import time
 
 import tensorflow as tf
-
 import tensorlayer as tl
 
 tl.logging.set_verbosity(tl.logging.DEBUG)
diff --git a/examples/database/task_script.py b/examples/database/task_script.py
index ad51dd3ed..58ef60d1a 100644
--- a/examples/database/task_script.py
+++ b/examples/database/task_script.py
@@ -1,7 +1,6 @@
 """Sample task script."""
 
 import tensorflow as tf
-
 import tensorlayer as tl
 
 tf.logging.set_verbosity(tf.logging.DEBUG)
diff --git a/examples/deprecated_tutorials/tutorial_imagenet_inceptionV3_distributed.py b/examples/deprecated_tutorials/tutorial_imagenet_inceptionV3_distributed.py
index 936ae9702..15c0a3f3c 100644
--- a/examples/deprecated_tutorials/tutorial_imagenet_inceptionV3_distributed.py
+++ b/examples/deprecated_tutorials/tutorial_imagenet_inceptionV3_distributed.py
@@ -19,7 +19,9 @@
 from xml.etree import ElementTree
 
 import numpy as np
+
 import tensorflow as tf
+import tensorlayer as tl
 from tensorflow.contrib import slim
 from tensorflow.contrib.slim.python.slim.nets.inception_v3 import (inception_v3,
                                                                    inception_v3_arg_scope)
@@ -29,8 +31,6 @@
 from tensorflow.python.training.monitored_session import \
     SingularMonitoredSession
 
-import tensorlayer as tl
-
 tf.logging.set_verbosity(tf.logging.DEBUG)
 tl.logging.set_verbosity(tl.logging.DEBUG)
 
diff --git a/examples/deprecated_tutorials/tutorial_mnist_distributed.py b/examples/deprecated_tutorials/tutorial_mnist_distributed.py
index 29d291ba4..18f7cdb92 100644
--- a/examples/deprecated_tutorials/tutorial_mnist_distributed.py
+++ b/examples/deprecated_tutorials/tutorial_mnist_distributed.py
@@ -13,7 +13,6 @@
 """
 
 import tensorflow as tf
-
 import tensorlayer as tl
 
 tf.logging.set_verbosity(tf.logging.DEBUG)
diff --git a/examples/distributed_training/tutorial_cifar10_distributed_trainer.py b/examples/distributed_training/tutorial_cifar10_distributed_trainer.py
index 1ddc2d937..ce3aec007 100644
--- a/examples/distributed_training/tutorial_cifar10_distributed_trainer.py
+++ b/examples/distributed_training/tutorial_cifar10_distributed_trainer.py
@@ -15,8 +15,8 @@
 import multiprocessing
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer.layers import (BatchNormLayer, Conv2d, DenseLayer,
                                 FlattenLayer, InputLayer, MaxPool2d)
diff --git a/examples/distributed_training/tutorial_mnist_distributed_trainer.py b/examples/distributed_training/tutorial_mnist_distributed_trainer.py
index 0f1b8b6dd..0cf916370 100755
--- a/examples/distributed_training/tutorial_mnist_distributed_trainer.py
+++ b/examples/distributed_training/tutorial_mnist_distributed_trainer.py
@@ -2,8 +2,8 @@
 # -*- coding: utf-8 -*-
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 
 tf.logging.set_verbosity(tf.logging.DEBUG)
diff --git a/examples/keras_tfslim/tutorial_keras.py b/examples/keras_tfslim/tutorial_keras.py
index 0622bc745..33a9ca860 100644
--- a/examples/keras_tfslim/tutorial_keras.py
+++ b/examples/keras_tfslim/tutorial_keras.py
@@ -4,8 +4,8 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer.layers import Input, Lambda
 
diff --git a/examples/pretrained_cnn/tutorial_models_mobilenetv1.py b/examples/pretrained_cnn/tutorial_models_mobilenetv1.py
index 8d7b35a6b..6b797a075 100644
--- a/examples/pretrained_cnn/tutorial_models_mobilenetv1.py
+++ b/examples/pretrained_cnn/tutorial_models_mobilenetv1.py
@@ -10,8 +10,8 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer.models.imagenet_classes import class_names
 
diff --git a/examples/pretrained_cnn/tutorial_models_squeezenetv1.py b/examples/pretrained_cnn/tutorial_models_squeezenetv1.py
index 9b6ee4e7f..755d6c28b 100644
--- a/examples/pretrained_cnn/tutorial_models_squeezenetv1.py
+++ b/examples/pretrained_cnn/tutorial_models_squeezenetv1.py
@@ -5,8 +5,8 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer.models.imagenet_classes import class_names
 
diff --git a/examples/pretrained_cnn/tutorial_models_vgg16.py b/examples/pretrained_cnn/tutorial_models_vgg16.py
index e6bb1c22e..b1bd3823f 100644
--- a/examples/pretrained_cnn/tutorial_models_vgg16.py
+++ b/examples/pretrained_cnn/tutorial_models_vgg16.py
@@ -5,8 +5,8 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer.models.imagenet_classes import class_names
 
diff --git a/examples/pretrained_cnn/tutorial_models_vgg19.py b/examples/pretrained_cnn/tutorial_models_vgg19.py
index 850412c38..922c3bdf5 100644
--- a/examples/pretrained_cnn/tutorial_models_vgg19.py
+++ b/examples/pretrained_cnn/tutorial_models_vgg19.py
@@ -5,8 +5,8 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer.models.imagenet_classes import class_names
 
diff --git a/examples/pretrained_cnn/tutorial_models_vgg_static.py b/examples/pretrained_cnn/tutorial_models_vgg_static.py
index 40a3ed865..a0e056e4d 100644
--- a/examples/pretrained_cnn/tutorial_models_vgg_static.py
+++ b/examples/pretrained_cnn/tutorial_models_vgg_static.py
@@ -5,8 +5,8 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer.models.imagenet_classes import class_names
 
diff --git a/examples/quantized_net/tutorial_binarynet_cifar10_tfrecord.py b/examples/quantized_net/tutorial_binarynet_cifar10_tfrecord.py
index 98532debb..d3205045a 100644
--- a/examples/quantized_net/tutorial_binarynet_cifar10_tfrecord.py
+++ b/examples/quantized_net/tutorial_binarynet_cifar10_tfrecord.py
@@ -43,7 +43,6 @@
 import time
 
 import tensorflow as tf
-
 import tensorlayer as tl
 
 tf.logging.set_verbosity(tf.logging.DEBUG)
diff --git a/examples/quantized_net/tutorial_binarynet_mnist_cnn.py b/examples/quantized_net/tutorial_binarynet_mnist_cnn.py
index 248812e23..84fbf7fc9 100644
--- a/examples/quantized_net/tutorial_binarynet_mnist_cnn.py
+++ b/examples/quantized_net/tutorial_binarynet_mnist_cnn.py
@@ -4,7 +4,6 @@
 import time
 
 import tensorflow as tf
-
 import tensorlayer as tl
 
 tf.logging.set_verbosity(tf.logging.DEBUG)
diff --git a/examples/quantized_net/tutorial_dorefanet_cifar10_tfrecord.py b/examples/quantized_net/tutorial_dorefanet_cifar10_tfrecord.py
index 9c8ab1239..fe7666bab 100644
--- a/examples/quantized_net/tutorial_dorefanet_cifar10_tfrecord.py
+++ b/examples/quantized_net/tutorial_dorefanet_cifar10_tfrecord.py
@@ -43,7 +43,6 @@
 import time
 
 import tensorflow as tf
-
 import tensorlayer as tl
 
 tf.logging.set_verbosity(tf.logging.DEBUG)
diff --git a/examples/quantized_net/tutorial_dorefanet_mnist_cnn.py b/examples/quantized_net/tutorial_dorefanet_mnist_cnn.py
index 90d7b0893..d8cab9bc8 100644
--- a/examples/quantized_net/tutorial_dorefanet_mnist_cnn.py
+++ b/examples/quantized_net/tutorial_dorefanet_mnist_cnn.py
@@ -4,7 +4,6 @@
 import time
 
 import tensorflow as tf
-
 import tensorlayer as tl
 
 tf.logging.set_verbosity(tf.logging.DEBUG)
diff --git a/examples/quantized_net/tutorial_quanconv_cifar10.py b/examples/quantized_net/tutorial_quanconv_cifar10.py
index 6eb35ed67..f93368467 100644
--- a/examples/quantized_net/tutorial_quanconv_cifar10.py
+++ b/examples/quantized_net/tutorial_quanconv_cifar10.py
@@ -41,8 +41,8 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 
 bitW = 8
diff --git a/examples/quantized_net/tutorial_quanconv_mnist.py b/examples/quantized_net/tutorial_quanconv_mnist.py
index 4060c6137..66d52d13c 100644
--- a/examples/quantized_net/tutorial_quanconv_mnist.py
+++ b/examples/quantized_net/tutorial_quanconv_mnist.py
@@ -4,7 +4,6 @@
 import time
 
 import tensorflow as tf
-
 import tensorlayer as tl
 
 tf.logging.set_verbosity(tf.logging.DEBUG)
diff --git a/examples/quantized_net/tutorial_ternaryweight_cifar10_tfrecord.py b/examples/quantized_net/tutorial_ternaryweight_cifar10_tfrecord.py
index f1ee7b4bb..b695fa88a 100644
--- a/examples/quantized_net/tutorial_ternaryweight_cifar10_tfrecord.py
+++ b/examples/quantized_net/tutorial_ternaryweight_cifar10_tfrecord.py
@@ -42,7 +42,6 @@
 import time
 
 import tensorflow as tf
-
 import tensorlayer as tl
 
 tf.logging.set_verbosity(tf.logging.DEBUG)
diff --git a/examples/quantized_net/tutorial_ternaryweight_mnist_cnn.py b/examples/quantized_net/tutorial_ternaryweight_mnist_cnn.py
index e1c305db6..6850b9591 100644
--- a/examples/quantized_net/tutorial_ternaryweight_mnist_cnn.py
+++ b/examples/quantized_net/tutorial_ternaryweight_mnist_cnn.py
@@ -4,7 +4,6 @@
 import time
 
 import tensorflow as tf
-
 import tensorlayer as tl
 
 tf.logging.set_verbosity(tf.logging.DEBUG)
diff --git a/examples/reinforcement_learning/tutorial_atari_pong.py b/examples/reinforcement_learning/tutorial_atari_pong.py
index ad8e264df..0e8c62009 100644
--- a/examples/reinforcement_learning/tutorial_atari_pong.py
+++ b/examples/reinforcement_learning/tutorial_atari_pong.py
@@ -29,16 +29,16 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 
 import gym
+import tensorflow as tf
 import tensorlayer as tl
 
 ## enable eager mode
-tf.enable_eager_execution()
+# tf.enable_eager_execution()
 
 
-tf.logging.set_verbosity(tf.logging.DEBUG) # enable logging
+# tf.logging.set_verbosity(tf.logging.DEBUG) # enable logging
 tl.logging.set_verbosity(tl.logging.DEBUG)
 
 # hyper-parameters
@@ -52,7 +52,7 @@
 render = False  # display the game environment
 # resume = True         # load existing policy network
 model_file_name = "model_pong"
-np.set_printoptions(threshold=np.nan)
+np.set_printoptions(threshold=np.inf)
 
 
 def prepro(I):
@@ -91,7 +91,7 @@ def get_model(inputs_shape):
 # t_actions = tf.placeholder(tf.int32, shape=[None])
 # t_discount_rewards = tf.placeholder(tf.float32, shape=[None])
 # loss = tl.rein.cross_entropy_reward_loss(probs, t_actions, t_discount_rewards)
-optimizer = tf.train.RMSPropOptimizer(learning_rate, decay_rate)#.minimize(loss)
+optimizer = tf.optimizers.RMSprop(lr=learning_rate, decay=decay_rate)#.minimize(loss)
 
 # with tf.Session() as sess:
 #     sess.run(tf.global_variables_initializer())
@@ -115,7 +115,7 @@ def get_model(inputs_shape):
     prev_x = cur_x
 
     # prob = sess.run(sampling_prob, feed_dict={t_states: x})
-    _prob = model(x).outputs
+    _prob = model(x)
     prob = tf.nn.softmax(_prob)
 
     # action. 1: STOP  2: UP  3: DOWN
@@ -150,7 +150,7 @@ def get_model(inputs_shape):
                 # t_discount_rewards = tf.placeholder(tf.float32, shape=[None])
                 # loss = tl.rein.cross_entropy_reward_loss(probs, t_actions, t_discount_rewards)
             with tf.GradientTape() as tape:
-                _prob = model(epx).outputs
+                _prob = model(epx)
                 _loss = tl.rein.cross_entropy_reward_loss(_prob, epy, disR)
             grad = tape.gradient(_loss, train_weights)
             optimizer.apply_gradients(zip(grad, train_weights))
diff --git a/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py b/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py
index 2f1f96d67..145024ba9 100644
--- a/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py
+++ b/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py
@@ -29,32 +29,46 @@
 and joints angular speed, legs contact with ground, and 10 lidar rangefinder
 measurements. There's no coordinates in the state vector.
 
+tensorflow 2.0.0a0
+tensorflow-probability 0.6.0
+tensorlayer 2.0.0
+
+&&
+pip install box2d box2d-kengz --user
+
 """
 
 import multiprocessing
 import threading
 
 import numpy as np
-import tensorflow as tf
 
 import gym
+import tensorflow as tf
+import tensorflow_probability as tfp
 import tensorlayer as tl
 from tensorlayer.layers import DenseLayer, InputLayer
 
-tf.logging.set_verbosity(tf.logging.DEBUG)
+tfd = tfp.distributions
+
+
+# tf.logging.set_verbosity(tf.logging.DEBUG)
 tl.logging.set_verbosity(tl.logging.DEBUG)
 
-GAME = 'BipedalWalker-v2'  # BipedalWalkerHardcore-v2
+# np.random.seed(2)
+# tf.random.set_seed(2)  # reproducible
+
+GAME = 'BipedalWalker-v2'  # BipedalWalkerHardcore-v2   BipedalWalker-v2  LunarLanderContinuous-v2
 OUTPUT_GRAPH = False
 LOG_DIR = './log'
-N_WORKERS = multiprocessing.cpu_count()
-# N_WORKERS = 4
-MAX_GLOBAL_EP = 20000  # 8000
+# N_WORKERS = multiprocessing.cpu_count()
+N_WORKERS = 2
+MAX_GLOBAL_EP = 8000  # 8000
 GLOBAL_NET_SCOPE = 'Global_Net'
 UPDATE_GLOBAL_ITER = 10
-GAMMA = 0.999
+GAMMA = 0.99
 ENTROPY_BETA = 0.005
-LR_A = 0.00002  # learning rate for actor
+LR_A = 0.00005  # learning rate for actor
 LR_C = 0.0001  # learning rate for critic
 GLOBAL_RUNNING_R = []
 GLOBAL_EP = 0  # will increase during training, stop training when it >= MAX_GLOBAL_EP
@@ -63,10 +77,13 @@
 
 N_S = env.observation_space.shape[0]
 N_A = env.action_space.shape[0]
+# N_A = env.action_space.n
+
 # A_BOUND = [env.action_space.low, env.action_space.high]
 A_BOUND = [env.action_space.low, env.action_space.high]
-A_BOUND[0] = A_BOUND[0].reshape(1, 4)
-A_BOUND[1] = A_BOUND[1].reshape(1, 4)
+# A_BOUND[0] = A_BOUND[0].reshape(1, N_A)
+# A_BOUND[1] = A_BOUND[1].reshape(1, N_A)
+# print(A_BOUND)
 
 # print(env.unwrapped.hull.position[0])
 # exit()
@@ -74,106 +91,196 @@
 
 class ACNet(object):
 
-    def __init__(self, scope, globalAC=None):
+    def __init__(self, scope, globalAC=None):  # no need for scope
         self.scope = scope
-        if scope == GLOBAL_NET_SCOPE:
-            ## global network only do inference
-            with tf.variable_scope(scope):
-                self.s = tf.placeholder(tf.float32, [None, N_S], 'S')
-                self._build_net()
-                self.a_params = tl.layers.get_variables_with_name(scope + '/actor', True, False)
-                self.c_params = tl.layers.get_variables_with_name(scope + '/critic', True, False)
-
-                normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma)  # for continuous action space
-
-                with tf.name_scope('choose_a'):  # use local params to choose action
-                    self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND)
-
-        else:
-            ## worker network calculate gradient locally, update on global network
-            with tf.variable_scope(scope):
-                self.s = tf.placeholder(tf.float32, [None, N_S], 'S')
-                self.a_his = tf.placeholder(tf.float32, [None, N_A], 'A')
-                self.v_target = tf.placeholder(tf.float32, [None, 1], 'Vtarget')
-
-                self._build_net()
-
-                td = tf.subtract(self.v_target, self.v, name='TD_error')
-                with tf.name_scope('c_loss'):
-                    self.c_loss = tf.reduce_mean(tf.square(td))
-
-                with tf.name_scope('wrap_a_out'):
-                    self.test = self.sigma[0]
-                    self.mu, self.sigma = self.mu * A_BOUND[1], self.sigma + 1e-5
-
-                normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma)  # for continuous action space
-
-                with tf.name_scope('a_loss'):
-                    log_prob = normal_dist.log_prob(self.a_his)
-                    exp_v = log_prob * td
-                    entropy = normal_dist.entropy()  # encourage exploration
-                    self.exp_v = ENTROPY_BETA * entropy + exp_v
-                    self.a_loss = tf.reduce_mean(-self.exp_v)
-
-                with tf.name_scope('choose_a'):  # use local params to choose action
-                    self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND)
-
-                with tf.name_scope('local_grad'):
-                    self.a_params = tl.layers.get_variables_with_name(scope + '/actor', True, False)
-                    self.c_params = tl.layers.get_variables_with_name(scope + '/critic', True, False)
-                    self.a_grads = tf.gradients(self.a_loss, self.a_params)
-                    self.c_grads = tf.gradients(self.c_loss, self.c_params)
-
-            with tf.name_scope('sync'):
-                with tf.name_scope('pull'):
-                    self.pull_a_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.a_params, globalAC.a_params)]
-                    self.pull_c_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.c_params, globalAC.c_params)]
-                with tf.name_scope('push'):
-                    self.update_a_op = OPT_A.apply_gradients(zip(self.a_grads, globalAC.a_params))
-                    self.update_c_op = OPT_C.apply_gradients(zip(self.c_grads, globalAC.c_params))
-
-    def _build_net(self):
-        w_init = tf.contrib.layers.xavier_initializer()
-        with tf.variable_scope('actor'):  # Policy network
-            nn = InputLayer(self.s, name='in')
-            nn = DenseLayer(nn, n_units=500, act=tf.nn.relu6, W_init=w_init, name='la')
-            nn = DenseLayer(nn, n_units=300, act=tf.nn.relu6, W_init=w_init, name='la2')
-            mu = DenseLayer(nn, n_units=N_A, act=tf.nn.tanh, W_init=w_init, name='mu')
-            sigma = DenseLayer(nn, n_units=N_A, act=tf.nn.softplus, W_init=w_init, name='sigma')
-            self.mu = mu.outputs
-            self.sigma = sigma.outputs
-
-        with tf.variable_scope('critic'):  # we use Value-function here, but not Q-function.
-            nn = InputLayer(self.s, name='in')
-            nn = DenseLayer(nn, n_units=500, act=tf.nn.relu6, W_init=w_init, name='lc')
-            nn = DenseLayer(nn, n_units=200, act=tf.nn.relu6, W_init=w_init, name='lc2')
-            v = DenseLayer(nn, n_units=1, W_init=w_init, name='v')
-            self.v = v.outputs
-
-    def update_global(self, feed_dict):  # run by a local
-        _, _, t = sess.run(
-            [self.update_a_op, self.update_c_op, self.test], feed_dict
-        )  # local grads applies to global net
-        return t
-
-    def pull_global(self):  # run by a local
-        sess.run([self.pull_a_params_op, self.pull_c_params_op])
+        self.save_path = './model'
+        # if scope == GLOBAL_NET_SCOPE:
+        #     ## global network only do inference
+        #     with tf.variable_scope(scope):
+        #         self.s = tf.placeholder(tf.float32, [None, N_S], 'S')
+        #         self._build_net()
+
+
+        #         normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma)  # for continuous action space
+
+        #         with tf.name_scope('choose_a'):  # use local params to choose action
+        #             self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND)
+
+        # else:
+        #     ## worker network calculate gradient locally, update on global network
+        #     # with tf.variable_scope(scope):
+        #     #     self.s = tf.placeholder(tf.float32, [None, N_S], 'S')
+        #     #     self.a_his = tf.placeholder(tf.float32, [None, N_A], 'A')
+        #     #     self.v_target = tf.placeholder(tf.float32, [None, 1], 'Vtarget')
+
+
+
+        #         self._build_net()
+
+            #     td = tf.subtract(self.v_target, self.v, name='TD_error')
+            #     with tf.name_scope('c_loss'):
+            #         self.c_loss = tf.reduce_mean(tf.square(td))
+
+            #     with tf.name_scope('wrap_a_out'):
+            #         self.test = self.sigma[0]
+            #         self.mu, self.sigma = self.mu * A_BOUND[1], self.sigma + 1e-5
+
+            #     normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma)  # for continuous action space
+
+            #     with tf.name_scope('a_loss'):
+            #         log_prob = normal_dist.log_prob(self.a_his)
+            #         exp_v = log_prob * td
+            #         entropy = normal_dist.entropy()  # encourage exploration
+            #         self.exp_v = ENTROPY_BETA * entropy + exp_v
+            #         self.a_loss = tf.reduce_mean(-self.exp_v)
+
+            #     with tf.name_scope('choose_a'):  # use local params to choose action
+            #         self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND)
+
+            #     with tf.name_scope('local_grad'):
+            #         self.a_params = tl.layers.get_variables_with_name(scope + '/actor', True, False)
+            #         self.c_params = tl.layers.get_variables_with_name(scope + '/critic', True, False)
+            #         self.a_grads = tf.gradients(self.a_loss, self.a_params)
+            #         self.c_grads = tf.gradients(self.c_loss, self.c_params)
+
+            # with tf.name_scope('sync'):
+            #     with tf.name_scope('pull'):
+            #         self.pull_a_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.a_params, globalAC.a_params)]
+            #         self.pull_c_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.c_params, globalAC.c_params)]
+            #     with tf.name_scope('push'):
+            #         self.update_a_op = OPT_A.apply_gradients(zip(self.a_grads, globalAC.a_params))
+            #         self.update_c_op = OPT_C.apply_gradients(zip(self.c_grads, globalAC.c_params))
+
+    # def _build_net(self):
+        # w_init = tf.contrib.layers.xavier_initializer()
+        # w_init = tf.random_uniform_initializer(0, 0.01)
+        w_init = tf.keras.initializers.glorot_normal(seed=None)
+        # with tf.variable_scope('actor'):  # Policy network
+        #     nn = InputLayer(self.s, name='in')
+        #     nn = DenseLayer(nn, n_units=500, act=tf.nn.relu6, W_init=w_init, name='la')
+        #     nn = DenseLayer(nn, n_units=300, act=tf.nn.relu6, W_init=w_init, name='la2')
+        #     mu = DenseLayer(nn, n_units=N_A, act=tf.nn.tanh, W_init=w_init, name='mu')
+        #     sigma = DenseLayer(nn, n_units=N_A, act=tf.nn.softplus, W_init=w_init, name='sigma')
+        #     self.mu = mu.outputs
+        #     self.sigma = sigma.outputs
+        def get_actor(input_shape):
+            with tf.name_scope(self.scope):
+                ni = tl.layers.Input(input_shape, name='in')
+                nn = tl.layers.Dense(n_units=50, act=tf.nn.relu6, W_init=w_init, name='la')(ni)
+                nn = tl.layers.Dense(n_units=30, act=tf.nn.relu6, W_init=w_init, name='la2')(nn)
+                mu = tl.layers.Dense(n_units=N_A, act=tf.nn.tanh, W_init=w_init, name='mu')(nn)
+                sigma = tl.layers.Dense(n_units=N_A, act=tf.nn.softplus, W_init=w_init, name='sigma')(nn)
+            return tl.models.Model(inputs=ni, outputs=[mu, sigma], name=scope+'/Actor')
+        self.actor = get_actor( [None, N_S])
+        self.actor.train() # set training mode, also for workers?
+
+        def get_critic(input_shape):
+            with tf.name_scope(self.scope):
+                ni = tl.layers.Input(input_shape, name='in')
+                nn = tl.layers.Dense(n_units=50, act=tf.nn.relu6, W_init=w_init, name='lc')(ni)
+                nn = tl.layers.Dense(n_units=30, act=tf.nn.relu6, W_init=w_init, name='lc2')(nn)
+                v = tl.layers.Dense(n_units=1, W_init=w_init, name='v')(nn)
+            return tl.models.Model(inputs=ni, outputs=v, name=scope+'/Critic')
+        self.critic = get_critic( [None, N_S])
+        self.critic.train()
+
+        # self.a_params = tl.layers.get_variables_with_name(scope + '/Actor', True, False)
+        # self.c_params = tl.layers.get_variables_with_name(scope + '/Critic', True, False)
+
+        # with tf.variable_scope('critic'):  # we use Value-function here, but not Q-function.
+        #     nn = InputLayer(self.s, name='in')
+        #     nn = DenseLayer(nn, n_units=500, act=tf.nn.relu6, W_init=w_init, name='lc')
+        #     nn = DenseLayer(nn, n_units=200, act=tf.nn.relu6, W_init=w_init, name='lc2')
+        #     v = DenseLayer(nn, n_units=1, W_init=w_init, name='v')
+        #     self.v = v.outputs
+
+    # def update_global(self, feed_dict):  # run by a local
+    def update_global(self, buffer_s, buffer_a, buffer_v_target, globalAC):
+        # _, _, t = sess.run(
+        #     [self.update_a_op, self.update_c_op, self.test], feed_dict
+        # )  # local grads applies to global net
+        with tf.GradientTape() as tape:
+            self.v = self.critic(buffer_s)
+            self.v_target = buffer_v_target  # tensor float?
+            td = tf.subtract(self.v_target, self.v, name='TD_error')
+            # with tf.name_scope('c_loss'):
+            self.c_loss = tf.reduce_mean(tf.square(td))
+        self.c_grads = tape.gradient(self.c_loss, self.critic.trainable_weights)
+        OPT_C.apply_gradients(zip(self.c_grads, globalAC.critic.trainable_weights))
+        del tape # Drop the reference to the tape
+
+
+        with tf.GradientTape() as tape:
+            self.mu, self.sigma = self.actor(buffer_s)
+            # print('mu: ', self.mu)
+            # print('sigma: ', self.sigma)
+            # with tf.name_scope('wrap_a_out'):
+            self.test = self.sigma[0]
+            self.mu, self.sigma = self.mu * A_BOUND[1], self.sigma + 1e-5
+
+
+            # normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma)  # for continuous action space
+            normal_dist = tfd.Normal(self.mu, self.sigma)
+            # with tf.name_scope('a_loss'):
+            self.a_his = buffer_a # tensor float?
+            log_prob = normal_dist.log_prob(self.a_his)
+            exp_v = log_prob * td
+            entropy = normal_dist.entropy()  # encourage exploration
+            self.exp_v = ENTROPY_BETA * entropy + exp_v
+            self.a_loss = tf.reduce_mean(-self.exp_v)
+        # with tf.name_scope('local_grad'):
+            # self.a_params = tl.layers.get_variables_with_name(scope + '/actor', True, False)
+            # self.c_params = tl.layers.get_variables_with_name(scope + '/critic', True, False)
+        #     self.a_grads = tf.gradients(self.a_loss, self.a_params)
+        #     self.c_grads = tf.gradients(self.c_loss, self.c_params)
+        self.a_grads = tape.gradient(self.a_loss, self.actor.trainable_weights)
+        OPT_A.apply_gradients(zip(self.a_grads, globalAC.actor.trainable_weights))
+                
+
+        # return t
+        return self.test.numpy()
+
+    def pull_global(self, globalAC):  # run by a local
+        # sess.run([self.pull_a_params_op, self.pull_c_params_op])
+        # with tf.name_scope('sync'):
+        #     with tf.name_scope('pull'):
+        # self.pull_a_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.actor.trainable_weights, globalAC.a_params)]
+        # self.pull_c_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.critic.trainable_weights, globalAC.c_params)]
+        # print(self.scope, 'before: ', self.actor.trainable_weights[0][5][2])
+        # print(self.scope, 'global: ', globalAC.actor.trainable_weights[0][5][2])
+        for l_p, g_p in zip(self.actor.trainable_weights, globalAC.actor.trainable_weights):
+            l_p.assign(g_p)
+        for l_p, g_p in zip(self.critic.trainable_weights, globalAC.critic.trainable_weights):
+            l_p.assign(g_p)
+        # print(self.scope, 'after: ', self.actor.trainable_weights[0][5][2])
 
     def choose_action(self, s):  # run by a local
         s = s[np.newaxis, :]
-        return sess.run(self.A, {self.s: s})[0]
-
-    def save_ckpt(self):
-        tl.files.exists_or_mkdir(self.scope)
-        tl.files.save_ckpt(
-            sess=sess, mode_name='model.ckpt', var_list=self.a_params + self.c_params, save_dir=self.scope,
-            printable=True
-        )
+        self.mu, self.sigma = self.actor(s)
+        # print('mu1: ', self.mu)
+        # print('sigma1: ', self.sigma)
+        with tf.name_scope('wrap_a_out'):
+            # self.test = self.sigma[0]
+            self.mu, self.sigma = self.mu * A_BOUND[1], self.sigma + 1e-5
+        # normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma)
+        normal_dist = tfd.Normal(self.mu, self.sigma)
+        self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND)
+        # return sess.run(self.A, {self.s: s})[0]
+        return self.A.numpy()[0]
+
+    def save_ckpt(self): 
+        tl.files.save_npz(self.actor.trainable_weights, name='model_actor.npz')
+        tl.files.save_npz(self.critic.trainable_weights, name='model_critic.npz')
+        # tl.files.save_ckpt(
+        #     sess=sess, mode_name='model.ckpt', var_list=self.a_params + self.c_params, save_dir=self.scope,
+        #     printable=True
+        # )
 
     def load_ckpt(self):
-        tl.files.load_ckpt(sess=sess, var_list=self.a_params + self.c_params, save_dir=self.scope, printable=True)
-        # tl.files.load_ckpt(sess=sess, mode_name='model.ckpt', var_list=self.a_params+self.c_params, save_dir=self.scope, is_latest=False, printable=True)
-
+        # tl.files.load_hdf5_to_trainable_weights(self.save_path+'/actor', self.actor)
+        # tl.files.load_hdf5_to_trainable_weights(self.save_path+'/critic', self.critic)
+        tl.files.load_and_assign_npz(name='model_actor.npz', network=self.actor)
+        tl.files.load_and_assign_npz(name='model_critic.npz', network=self.critic)
 
 class Worker(object):
 
@@ -182,7 +289,8 @@ def __init__(self, name, globalAC):
         self.name = name
         self.AC = ACNet(name, globalAC)
 
-    def work(self):
+    # def work(self):
+    def work(self, globalAC):
         global GLOBAL_RUNNING_R, GLOBAL_EP
         total_step = 1
         buffer_s, buffer_a, buffer_r = [], [], []
@@ -193,8 +301,13 @@ def work(self):
                 # visualize Worker_0 during training
                 if self.name == 'Worker_0' and total_step % 30 == 0:
                     self.env.render()
-                a = self.AC.choose_action(s)
+                s = s.astype('float32') # double to float
+                a = self.AC.choose_action(s) 
                 s_, r, done, _info = self.env.step(a)
+                s_ = s_.astype('float32') # double to float
+                # print('s:',s)
+                # print('a:', a)
+                # print('r:',r)
 
                 # set robot falls reward to -2 instead of -100
                 if r == -100: r = -2
@@ -209,7 +322,8 @@ def work(self):
                     if done:
                         v_s_ = 0  # terminal
                     else:
-                        v_s_ = sess.run(self.AC.v, {self.AC.s: s_[np.newaxis, :]})[0, 0]
+                        # v_s_ = sess.run(self.AC.v, {self.AC.s: s_[np.newaxis, :]})[0, 0]
+                        v_s_ = self.AC.critic(s_[np.newaxis, :])[0,0] # reduce dim from 2 to 0
 
                     buffer_v_target = []
 
@@ -218,46 +332,49 @@ def work(self):
                         buffer_v_target.append(v_s_)
 
                     buffer_v_target.reverse()
-
+                    
                     buffer_s, buffer_a, buffer_v_target = (
                         np.vstack(buffer_s), np.vstack(buffer_a), np.vstack(buffer_v_target)
                     )
-                    feed_dict = {self.AC.s: buffer_s, self.AC.a_his: buffer_a, self.AC.v_target: buffer_v_target}
+                    # print(buffer_s, buffer_a, buffer_v_target)
+                    # feed_dict = {self.AC.s: buffer_s, self.AC.a_his: buffer_a, self.AC.v_target: buffer_v_target}
                     # update gradients on global network
-                    self.AC.update_global(feed_dict)
+                    # self.AC.update_global(feed_dict)
+                    self.AC.update_global(buffer_s, buffer_a, buffer_v_target, globalAC)
                     buffer_s, buffer_a, buffer_r = [], [], []
 
                     # update local network from global network
-                    self.AC.pull_global()
+                    self.AC.pull_global(globalAC)
 
                 s = s_
                 total_step += 1
                 if done:
                     if len(GLOBAL_RUNNING_R) == 0:  # record running episode reward
                         GLOBAL_RUNNING_R.append(ep_r)
-                    else:
+                    else:  # moving average
                         GLOBAL_RUNNING_R.append(0.95 * GLOBAL_RUNNING_R[-1] + 0.05 * ep_r)
                     print(
                         self.name,
                         "episode:",
                         GLOBAL_EP,
-                        "| pos: %i" % self.env.unwrapped.hull.position[0],  # number of move
+                        # "| pos: %i" % self.env.unwrapped.hull.position[0],  # number of move
                         '| reward: %.1f' % ep_r,
                         "| running_reward: %.1f" % GLOBAL_RUNNING_R[-1],
                         # '| sigma:', test, # debug
-                        'WIN ' * 5 if self.env.unwrapped.hull.position[0] >= 88 else '',
+                        # 'WIN ' * 5 if self.env.unwrapped.hull.position[0] >= 88 else '',
                     )
                     GLOBAL_EP += 1
                     break
 
 
 if __name__ == "__main__":
-    sess = tf.Session()
-
+    # sess = tf.Session()
     # ============================= TRAINING ===============================
     with tf.device("/cpu:0"):
-        OPT_A = tf.train.RMSPropOptimizer(LR_A, name='RMSPropA')
-        OPT_C = tf.train.RMSPropOptimizer(LR_C, name='RMSPropC')
+        
+        OPT_A = tf.optimizers.RMSprop(LR_A, name='RMSPropA')
+        OPT_C = tf.optimizers.RMSprop(LR_C, name='RMSPropC')
+
         GLOBAL_AC = ACNet(GLOBAL_NET_SCOPE)  # we only need its params
         workers = []
         # Create worker
@@ -266,15 +383,23 @@ def work(self):
             workers.append(Worker(i_name, GLOBAL_AC))
 
     COORD = tf.train.Coordinator()
-    sess.run(tf.global_variables_initializer())
+    # sess.run(tf.global_variables_initializer())
 
     # start TF threading
     worker_threads = []
     for worker in workers:
-        t = threading.Thread(target=worker.work)
+        # t = threading.Thread(target=worker.work)
+        job = lambda: worker.work(GLOBAL_AC)
+        t = threading.Thread(target=job)  
         t.start()
         worker_threads.append(t)
     COORD.join(worker_threads)
+    import matplotlib.pyplot as plt
+    plt.plot(GLOBAL_RUNNING_R)
+    plt.xlabel('episode')
+    plt.ylabel('global running reward')
+    plt.savefig('a3c.png')
+    plt.show()
 
     GLOBAL_AC.save_ckpt()
 
@@ -282,15 +407,16 @@ def work(self):
     # env = gym.make(GAME)
     # GLOBAL_AC = ACNet(GLOBAL_NET_SCOPE)
     # sess.run(tf.global_variables_initializer())
-    # GLOBAL_AC.load_ckpt()
-    # while True:
-    #     s = env.reset()
-    #     rall = 0
-    #     while True:
-    #         env.render()
-    #         a = GLOBAL_AC.choose_action(s)
-    #         s, r, d, _ = env.step(a)
-    #         rall += r
-    #         if d:
-    #             print("reward", rall)
-    #             break
+    GLOBAL_AC.load_ckpt()
+    while True:
+        s = env.reset()
+        rall = 0
+        while True:
+            env.render()
+            s = s.astype('float32') # double to float
+            a = GLOBAL_AC.choose_action(s)
+            s, r, d, _ = env.step(a)
+            rall += r
+            if d:
+                print("reward", rall)
+                break
diff --git a/examples/reinforcement_learning/tutorial_cartpole_ac.py b/examples/reinforcement_learning/tutorial_cartpole_ac.py
index 4d8b6f8ea..ecdf68084 100644
--- a/examples/reinforcement_learning/tutorial_cartpole_ac.py
+++ b/examples/reinforcement_learning/tutorial_cartpole_ac.py
@@ -34,20 +34,20 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 
 import gym
+import tensorflow as tf
 import tensorlayer as tl
 
 ## enable eager mode
-tf.enable_eager_execution()
+# tf.enable_eager_execution()
 
 
-tf.logging.set_verbosity(tf.logging.DEBUG)
+# tf.logging.set_verbosity(tf.logging.DEBUG)
 tl.logging.set_verbosity(tl.logging.DEBUG)
 
 np.random.seed(2)
-tf.set_random_seed(2)  # reproducible
+tf.random.set_seed(2)  # reproducible
 
 # hyper-parameters
 OUTPUT_GRAPH = False
@@ -93,7 +93,7 @@ def get_model(inputs_shape):
             nn = tl.layers.Dense(n_units=10, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden2')(nn)
             nn = tl.layers.Dense(n_units=n_actions, name='actions')(nn)
             return tl.models.Model(inputs=ni, outputs=nn, name="Actor")
-        self.model = get_model([1, n_features])
+        self.model = get_model([None, n_features])
         self.model.train()
             # self.acts_logits = n.outputs
             # self.acts_prob = tf.nn.softmax(self.acts_logits)
@@ -106,12 +106,12 @@ def get_model(inputs_shape):
 
             # with tf.variable_scope('train'):
             #     self.train_op = tf.train.AdamOptimizer(lr).minimize(self.exp_v)
-        self.optimizer = tf.train.AdamOptimizer(lr)
+        self.optimizer = tf.optimizers.Adam(lr)
         # Morvan Zhou (the same)
         # with tf.variable_scope('exp_v'):
         #     # log_prob = tf.log(self.acts_prob[0, self.a[0]])
         #     # self.exp_v = tf.reduce_mean(log_prob * self.td_error[0])  # advantage (TD_error) guided loss
-        #     self.exp_v = tl.rein.log_weight(probs=self.acts_prob[0, self.a[0]], weights=self.td_error)
+        #     self.exp_v = tl.rein.log_weight(probs=self.acts_prob[0, self.a[0]], trainable_weights=self.td_error)
         #
         # with tf.variable_scope('train'):
         #     self.train_op = tf.train.AdamOptimizer(lr).minimize(-self.exp_v)  # minimize(-exp_v) = maximize(exp_v)
@@ -119,22 +119,25 @@ def get_model(inputs_shape):
     def learn(self, s, a, td):
             # _, exp_v = self.sess.run([self.train_op, self.exp_v], {self.s: [s], self.a: [a], self.td_error: td[0]})
         with tf.GradientTape() as tape:
-            _logits = self.model([s]).outputs
+
+            # _logits = self.model([s]).outputs
+            _logits = self.model(np.array([s]))
             # _probs = tf.nn.softmax(_logits)
-            _exp_v = tl.rein.cross_entropy_reward_loss(logits=_logits, actions=[a], rewards=td[0])
+            _exp_v = tl.rein.cross_entropy_reward_loss(logits=_logits, actions=[a], rewards=td[0])  # cross-entropy loss weighted by rewards
         grad = tape.gradient(_exp_v, self.model.trainable_weights)
         self.optimizer.apply_gradients(zip(grad, self.model.trainable_weights))
         return _exp_v
 
     def choose_action(self, s):
             # probs = self.sess.run(self.acts_prob, {self.s: [s]})  # get probabilities of all actions
-        _logits = self.model([s]).outputs
+        # _logits = self.model(np.array([s])).outputs
+        _logits = self.model(np.array([s]))
         _probs = tf.nn.softmax(_logits).numpy()
         return tl.rein.choice_action_by_probs(_probs.ravel())
 
     def choose_action_greedy(self, s):
             # probs = self.sess.run(self.acts_prob, {self.s: [s]})  # get probabilities of all actions
-        _logits = self.model([s]).outputs
+        _logits = self.model(np.array([s]))
         _probs = tf.nn.softmax(_logits).numpy()
         return np.argmax(_probs.ravel())
 
@@ -167,14 +170,14 @@ def get_model(inputs_shape):
             #     self.loss = tf.square(self.td_error)
             # with tf.variable_scope('train'):
                 # self.train_op = tf.train.AdamOptimizer(lr).minimize(self.loss)
-        self.optimizer = tf.train.AdamOptimizer(lr)
+        self.optimizer = tf.optimizers.Adam(lr)
 
     def learn(self, s, r, s_):
             # v_ = self.sess.run(self.v, {self.s: [s_]})
-        v_ = self.model([s_]).outputs
+        v_ = self.model(np.array([s_]))
             # td_error, _ = self.sess.run([self.td_error, self.train_op], {self.s: [s], self.v_: v_, self.r: r})
         with tf.GradientTape() as tape:
-            v = self.model([s]).outputs
+            v = self.model(np.array([s]))
             # TD_error = r + lambd * V(newS) - V(S)
             td_error = r + LAMBDA * v_ - v
             loss = tf.square(td_error)
diff --git a/examples/reinforcement_learning/tutorial_frozenlake_dqn.py b/examples/reinforcement_learning/tutorial_frozenlake_dqn.py
index 9411da423..8e96a279e 100644
--- a/examples/reinforcement_learning/tutorial_frozenlake_dqn.py
+++ b/examples/reinforcement_learning/tutorial_frozenlake_dqn.py
@@ -28,16 +28,16 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 
 import gym
+import tensorflow as tf
 import tensorlayer as tl
 
 ## enable eager mode
-tf.enable_eager_execution()
+# tf.enable_eager_execution()
 
 
-tf.logging.set_verbosity(tf.logging.DEBUG)
+# tf.logging.set_verbosity(tf.logging.DEBUG)
 tl.logging.set_verbosity(tl.logging.DEBUG)
 
 env = gym.make('FrozenLake-v0')
@@ -61,7 +61,7 @@ def get_model(inputs_shape):
     ni = tl.layers.Input(inputs_shape, name='observation')
     nn = tl.layers.Dense(4, act=None, W_init=tf.random_uniform_initializer(0, 0.01), b_init=None, name='q_a_s')(ni)
     return tl.models.Model(inputs=ni, outputs=nn, name="Q-Network")
-qnetwork = get_model([1, 16])
+qnetwork = get_model([None, 16])
 qnetwork.train()
 train_weights = qnetwork.trainable_weights
 
@@ -72,7 +72,7 @@ def get_model(inputs_shape):
     # nextQ = tf.placeholder(shape=[1, 4], dtype=tf.float32)
     # loss = tl.cost.mean_squared_error(nextQ, y, is_mean=False)  # tf.reduce_sum(tf.square(nextQ - y))
     # train_op = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(loss)
-optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
+optimizer = tf.optimizers.SGD(learning_rate=0.1)
 
 ## Set learning parameters
 lambd = .99  # decay factor
@@ -90,7 +90,7 @@ def get_model(inputs_shape):
         if render: env.render()
         ## Choose an action by greedily (with e chance of random action) from the Q-network
             # a, allQ = sess.run([predict, y], feed_dict={inputs: [to_one_hot(s, 16)]})
-        allQ = qnetwork(np.asarray([to_one_hot(s, 16)], dtype=np.float32)).outputs.numpy()
+        allQ = qnetwork(np.asarray([to_one_hot(s, 16)], dtype=np.float32)).numpy()
         a = np.argmax(allQ, 1)
 
         ## e-Greedy Exploration !!! sample random action
@@ -100,7 +100,7 @@ def get_model(inputs_shape):
         s1, r, d, _ = env.step(a[0])
         ## Obtain the Q' values by feeding the new state through our network
             # Q1 = sess.run(y, feed_dict={inputs: [to_one_hot(s1, 16)]})
-        Q1 = qnetwork(np.asarray([to_one_hot(s1, 16)], dtype=np.float32)).outputs.numpy()
+        Q1 = qnetwork(np.asarray([to_one_hot(s1, 16)], dtype=np.float32)).numpy()
 
         ## Obtain maxQ' and set our target value for chosen action.
         maxQ1 = np.max(Q1)  # in Q-Learning, policy is greedy, so we use "max" to select the next action.
@@ -114,7 +114,7 @@ def get_model(inputs_shape):
         #   Q'(s,a) ≈ Q(s,a)
             # _ = sess.run(train_op, {inputs: [to_one_hot(s, 16)], nextQ: targetQ})
         with tf.GradientTape() as tape:
-            _qvalues = qnetwork(np.asarray([to_one_hot(s, 16)], dtype=np.float32)).outputs
+            _qvalues = qnetwork(np.asarray([to_one_hot(s, 16)], dtype=np.float32))
             _loss = tl.cost.mean_squared_error(targetQ, _qvalues, is_mean=False)
         grad = tape.gradient(_loss, train_weights)
         optimizer.apply_gradients(zip(grad, train_weights))
diff --git a/examples/spatial_transformer_network/tutorial_spatial_transformer_network_dynamic.py b/examples/spatial_transformer_network/tutorial_spatial_transformer_network_dynamic.py
index aecc69f61..3170585e4 100644
--- a/examples/spatial_transformer_network/tutorial_spatial_transformer_network_dynamic.py
+++ b/examples/spatial_transformer_network/tutorial_spatial_transformer_network_dynamic.py
@@ -1,7 +1,9 @@
 #! /usr/bin/python
 # -*- coding: utf8 -*-
 import time
+
 import numpy as np
+
 import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer.layers import *
diff --git a/examples/spatial_transformer_network/tutorial_spatial_transformer_network_static.py b/examples/spatial_transformer_network/tutorial_spatial_transformer_network_static.py
index c9a93629f..5f09db68b 100644
--- a/examples/spatial_transformer_network/tutorial_spatial_transformer_network_static.py
+++ b/examples/spatial_transformer_network/tutorial_spatial_transformer_network_static.py
@@ -1,7 +1,9 @@
 #! /usr/bin/python
 # -*- coding: utf8 -*-
 import time
+
 import numpy as np
+
 import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer.layers import *
diff --git a/examples/text_classification/tutorial_imdb_fasttext.py b/examples/text_classification/tutorial_imdb_fasttext.py
index 2c2c7aed0..731d2fce4 100644
--- a/examples/text_classification/tutorial_imdb_fasttext.py
+++ b/examples/text_classification/tutorial_imdb_fasttext.py
@@ -31,8 +31,8 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer.layers import *
 from tensorlayer.models import *
diff --git a/examples/text_generation/tutorial_generate_text.py b/examples/text_generation/tutorial_generate_text.py
index 22a17ea37..4c42d0b12 100644
--- a/examples/text_generation/tutorial_generate_text.py
+++ b/examples/text_generation/tutorial_generate_text.py
@@ -28,8 +28,8 @@
 
 import nltk
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer.layers import *
 
diff --git a/examples/text_ptb/tutorial_ptb_lstm.py b/examples/text_ptb/tutorial_ptb_lstm.py
index de08399c9..77c7c3425 100644
--- a/examples/text_ptb/tutorial_ptb_lstm.py
+++ b/examples/text_ptb/tutorial_ptb_lstm.py
@@ -104,8 +104,8 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 
 tf.logging.set_verbosity(tf.logging.DEBUG)
diff --git a/examples/text_ptb/tutorial_ptb_lstm_state_is_tuple.py b/examples/text_ptb/tutorial_ptb_lstm_state_is_tuple.py
index 0021a7bfc..9fccca66a 100644
--- a/examples/text_ptb/tutorial_ptb_lstm_state_is_tuple.py
+++ b/examples/text_ptb/tutorial_ptb_lstm_state_is_tuple.py
@@ -105,8 +105,8 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 
 tf.logging.set_verbosity(tf.logging.DEBUG)
diff --git a/examples/text_word_embedding/tutorial_word2vec_basic.py b/examples/text_word_embedding/tutorial_word2vec_basic.py
index 6310699ad..5a1dc842c 100644
--- a/examples/text_word_embedding/tutorial_word2vec_basic.py
+++ b/examples/text_word_embedding/tutorial_word2vec_basic.py
@@ -44,9 +44,9 @@
 import time
 
 import numpy as np
-import tensorflow as tf
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
+import tensorflow as tf
 import tensorlayer as tl
 import wget
 
diff --git a/examples/tutorial_work_with_onnx.py b/examples/tutorial_work_with_onnx.py
index 522f2ad8c..46fd0cb42 100644
--- a/examples/tutorial_work_with_onnx.py
+++ b/examples/tutorial_work_with_onnx.py
@@ -117,13 +117,13 @@
 import time
 
 import numpy as np
-import tensorflow as tf
-from tensorflow.python.tools.freeze_graph import freeze_graph as _freeze_graph
 
 import onnx
+import tensorflow as tf
 import tensorlayer as tl
 from onnx_tf.backend import prepare
 from onnx_tf.frontend import tensorflow_graph_to_onnx_model
+from tensorflow.python.tools.freeze_graph import freeze_graph as _freeze_graph
 
 tf.logging.set_verbosity(tf.logging.DEBUG)
 tl.logging.set_verbosity(tl.logging.DEBUG)
diff --git a/tensorlayer/activation.py b/tensorlayer/activation.py
index 7c7b833c3..4aef4a429 100644
--- a/tensorlayer/activation.py
+++ b/tensorlayer/activation.py
@@ -3,7 +3,6 @@
 """A file containing various activation functions."""
 
 import tensorflow as tf
-
 from tensorlayer.decorators import deprecated
 
 __all__ = [
diff --git a/tensorlayer/cost.py b/tensorlayer/cost.py
index 252178502..2664d8d72 100644
--- a/tensorlayer/cost.py
+++ b/tensorlayer/cost.py
@@ -5,8 +5,7 @@
 
 import tensorflow as tf
 from tensorflow.python.framework import ops
-from tensorflow.python.ops import standard_ops, math_ops, nn_ops, array_ops
-
+from tensorflow.python.ops import array_ops, math_ops, nn_ops, standard_ops
 from tensorlayer import logging
 
 __all__ = [
diff --git a/tensorlayer/db.py b/tensorlayer/db.py
index cb8db8e10..1de73bf6a 100644
--- a/tensorlayer/db.py
+++ b/tensorlayer/db.py
@@ -7,15 +7,16 @@
 import time
 from datetime import datetime
 
-import gridfs
 import numpy as np
+
+import gridfs
 import pymongo
 import tensorflow as tf
-
 from tensorlayer import logging
-from tensorlayer.files import net2static_graph, static_graph2net, assign_weights
-from tensorlayer.files import save_weights_to_hdf5, load_hdf5_to_weights
-from tensorlayer.files import del_folder, exists_or_mkdir
+from tensorlayer.files import (
+    assign_weights, del_folder, exists_or_mkdir, load_hdf5_to_weights, net2static_graph, save_weights_to_hdf5,
+    static_graph2net
+)
 
 
 class TensorHub(object):
@@ -640,7 +641,7 @@ def run_top_task(self, task_name=None, sort=None, **kwargs):
         logging.info("[Database] Start Task: key: {} sort: {} push time: {}".format(task_name, sort, _datetime))
         _script = _script.decode('utf-8')
         with tf.Graph().as_default():  #  # as graph: # clear all TF graphs
-            exec(_script, globals())
+            exec (_script, globals())
 
         # set status to finished
         _ = self.db.Task.find_one_and_update({'_id': _id}, {'$set': {'status': 'finished'}})
diff --git a/tensorlayer/distributed.py b/tensorlayer/distributed.py
index 544aac87e..d3fbdd38f 100644
--- a/tensorlayer/distributed.py
+++ b/tensorlayer/distributed.py
@@ -6,7 +6,6 @@
 
 import tensorflow as tf
 from tensorflow.python.training import session_run_hook
-
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated
 from tensorlayer.lazy_imports import LazyImport
diff --git a/tensorlayer/files/dataset_loaders/celebA_dataset.py b/tensorlayer/files/dataset_loaders/celebA_dataset.py
index d5dc5755f..3563d58f9 100644
--- a/tensorlayer/files/dataset_loaders/celebA_dataset.py
+++ b/tensorlayer/files/dataset_loaders/celebA_dataset.py
@@ -5,7 +5,8 @@
 import zipfile
 
 from tensorlayer import logging
-from tensorlayer.files.utils import (download_file_from_google_drive, exists_or_mkdir, load_file_list)
+from tensorlayer.files.utils import (download_file_from_google_drive,
+                                     exists_or_mkdir, load_file_list)
 
 __all__ = ['load_celebA_dataset']
 
diff --git a/tensorlayer/files/dataset_loaders/cyclegan_dataset.py b/tensorlayer/files/dataset_loaders/cyclegan_dataset.py
index e327b3b4c..6c465f6c5 100644
--- a/tensorlayer/files/dataset_loaders/cyclegan_dataset.py
+++ b/tensorlayer/files/dataset_loaders/cyclegan_dataset.py
@@ -6,7 +6,8 @@
 import numpy as np
 
 from tensorlayer import logging, visualize
-from tensorlayer.files.utils import (del_file, folder_exists, load_file_list, maybe_download_and_extract)
+from tensorlayer.files.utils import (del_file, folder_exists, load_file_list,
+                                     maybe_download_and_extract)
 
 __all__ = ['load_cyclegan_dataset']
 
diff --git a/tensorlayer/files/dataset_loaders/flickr_1M_dataset.py b/tensorlayer/files/dataset_loaders/flickr_1M_dataset.py
index f2e582ae5..9f466c0eb 100644
--- a/tensorlayer/files/dataset_loaders/flickr_1M_dataset.py
+++ b/tensorlayer/files/dataset_loaders/flickr_1M_dataset.py
@@ -4,9 +4,9 @@
 import os
 
 from tensorlayer import logging, visualize
-from tensorlayer.files.utils import (
-    del_file, folder_exists, load_file_list, load_folder_list, maybe_download_and_extract, read_file
-)
+from tensorlayer.files.utils import (del_file, folder_exists, load_file_list,
+                                     load_folder_list,
+                                     maybe_download_and_extract, read_file)
 
 __all__ = ['load_flickr1M_dataset']
 
diff --git a/tensorlayer/files/dataset_loaders/flickr_25k_dataset.py b/tensorlayer/files/dataset_loaders/flickr_25k_dataset.py
index 8049a0653..0492371b0 100644
--- a/tensorlayer/files/dataset_loaders/flickr_25k_dataset.py
+++ b/tensorlayer/files/dataset_loaders/flickr_25k_dataset.py
@@ -4,9 +4,9 @@
 import os
 
 from tensorlayer import logging, visualize
-from tensorlayer.files.utils import (
-    del_file, folder_exists, load_file_list, maybe_download_and_extract, natural_keys, read_file
-)
+from tensorlayer.files.utils import (del_file, folder_exists, load_file_list,
+                                     maybe_download_and_extract, natural_keys,
+                                     read_file)
 
 __all__ = ['load_flickr25k_dataset']
 
diff --git a/tensorlayer/files/dataset_loaders/mpii_dataset.py b/tensorlayer/files/dataset_loaders/mpii_dataset.py
index a6f88f609..8b90dcdec 100644
--- a/tensorlayer/files/dataset_loaders/mpii_dataset.py
+++ b/tensorlayer/files/dataset_loaders/mpii_dataset.py
@@ -4,7 +4,8 @@
 import os
 
 from tensorlayer import logging
-from tensorlayer.files.utils import (del_file, folder_exists, load_file_list, maybe_download_and_extract)
+from tensorlayer.files.utils import (del_file, folder_exists, load_file_list,
+                                     maybe_download_and_extract)
 
 __all__ = ['load_mpii_pose_dataset']
 
diff --git a/tensorlayer/files/dataset_loaders/voc_dataset.py b/tensorlayer/files/dataset_loaders/voc_dataset.py
index 5584864ae..c5ccadbcf 100644
--- a/tensorlayer/files/dataset_loaders/voc_dataset.py
+++ b/tensorlayer/files/dataset_loaders/voc_dataset.py
@@ -4,9 +4,10 @@
 import os
 
 import tensorflow as tf
-
 from tensorlayer import logging, utils
-from tensorlayer.files.utils import (del_file, del_folder, folder_exists, load_file_list, maybe_download_and_extract)
+from tensorlayer.files.utils import (del_file, del_folder, folder_exists,
+                                     load_file_list,
+                                     maybe_download_and_extract)
 
 __all__ = ['load_voc_dataset']
 
diff --git a/tensorlayer/files/dataset_loaders/wmt_en_fr_dataset.py b/tensorlayer/files/dataset_loaders/wmt_en_fr_dataset.py
index 0261a8581..77c1f93f9 100644
--- a/tensorlayer/files/dataset_loaders/wmt_en_fr_dataset.py
+++ b/tensorlayer/files/dataset_loaders/wmt_en_fr_dataset.py
@@ -6,7 +6,6 @@
 import tarfile
 
 from tensorflow.python.platform import gfile
-
 from tensorlayer import logging
 from tensorlayer.files.utils import maybe_download_and_extract
 
diff --git a/tensorlayer/files/utils.py b/tensorlayer/files/utils.py
index 72fcb1824..e4b0f6f8e 100644
--- a/tensorlayer/files/utils.py
+++ b/tensorlayer/files/utils.py
@@ -1,8 +1,9 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
 
+import base64
 import gzip
-import importlib
+import json
 import math
 import os
 import pickle
@@ -14,23 +15,20 @@
 import time
 import zipfile
 
+import cloudpickle
 import h5py
 import numpy as np
-import progressbar
 import scipy.io as sio
-import tensorflow as tf
 from six.moves import cPickle
-from tensorflow.python.platform import gfile
 
+import progressbar
+import tensorflow as tf
 import tensorlayer as tl
-from tensorlayer import logging, nlp, utils, visualize
-
-import cloudpickle
-import base64
 from tensorflow.python.keras.saving import model_config as model_config_lib
-from tensorflow.python.util.tf_export import keras_export
+from tensorflow.python.platform import gfile
 from tensorflow.python.util import serialization
-import json
+from tensorflow.python.util.tf_export import keras_export
+from tensorlayer import logging, nlp, utils, visualize
 
 # from six.moves import zip
 
diff --git a/tensorlayer/initializers.py b/tensorlayer/initializers.py
index 666777824..f68c05c1d 100644
--- a/tensorlayer/initializers.py
+++ b/tensorlayer/initializers.py
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 
 import numpy as np
+
 import tensorflow as tf
 
 __all__ = [
diff --git a/tensorlayer/layers/activation.py b/tensorlayer/layers/activation.py
index 44fcc47a9..9abb19ce7 100644
--- a/tensorlayer/layers/activation.py
+++ b/tensorlayer/layers/activation.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 from tensorlayer import logging
 from tensorlayer.activation import leaky_relu6, leaky_twice_relu6
 from tensorlayer.decorators import deprecated_alias
diff --git a/tensorlayer/layers/convolution/binary_conv.py b/tensorlayer/layers/convolution/binary_conv.py
index 23448cf6f..14e5a8721 100644
--- a/tensorlayer/layers/convolution/binary_conv.py
+++ b/tensorlayer/layers/convolution/binary_conv.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
diff --git a/tensorlayer/layers/convolution/deformable_conv.py b/tensorlayer/layers/convolution/deformable_conv.py
index 5f75bbe15..b9a8224db 100644
--- a/tensorlayer/layers/convolution/deformable_conv.py
+++ b/tensorlayer/layers/convolution/deformable_conv.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias, private_method
diff --git a/tensorlayer/layers/convolution/depthwise_conv.py b/tensorlayer/layers/convolution/depthwise_conv.py
index d6136ede3..4fe4dc34c 100644
--- a/tensorlayer/layers/convolution/depthwise_conv.py
+++ b/tensorlayer/layers/convolution/depthwise_conv.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
diff --git a/tensorlayer/layers/convolution/dorefa_conv.py b/tensorlayer/layers/convolution/dorefa_conv.py
index ed9b32dd8..1f8944382 100644
--- a/tensorlayer/layers/convolution/dorefa_conv.py
+++ b/tensorlayer/layers/convolution/dorefa_conv.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
diff --git a/tensorlayer/layers/convolution/expert_conv.py b/tensorlayer/layers/convolution/expert_conv.py
index d7e59a0e8..fb27b9df6 100644
--- a/tensorlayer/layers/convolution/expert_conv.py
+++ b/tensorlayer/layers/convolution/expert_conv.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
diff --git a/tensorlayer/layers/convolution/expert_deconv.py b/tensorlayer/layers/convolution/expert_deconv.py
index cb5cd6773..a1571b2cb 100644
--- a/tensorlayer/layers/convolution/expert_deconv.py
+++ b/tensorlayer/layers/convolution/expert_deconv.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
diff --git a/tensorlayer/layers/convolution/group_conv.py b/tensorlayer/layers/convolution/group_conv.py
index 34d8c10e6..2923a10ae 100644
--- a/tensorlayer/layers/convolution/group_conv.py
+++ b/tensorlayer/layers/convolution/group_conv.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
diff --git a/tensorlayer/layers/convolution/quan_conv.py b/tensorlayer/layers/convolution/quan_conv.py
index e235dfeb4..662df2661 100644
--- a/tensorlayer/layers/convolution/quan_conv.py
+++ b/tensorlayer/layers/convolution/quan_conv.py
@@ -2,12 +2,12 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
 from tensorlayer.layers.core import Layer
-from tensorlayer.layers.utils import (quantize_active_overflow, quantize_weight_overflow)
+from tensorlayer.layers.utils import (quantize_active_overflow,
+                                      quantize_weight_overflow)
 
 __all__ = ['QuanConv2d']
 
diff --git a/tensorlayer/layers/convolution/quan_conv_bn.py b/tensorlayer/layers/convolution/quan_conv_bn.py
index ef0f9bfda..1c1593373 100644
--- a/tensorlayer/layers/convolution/quan_conv_bn.py
+++ b/tensorlayer/layers/convolution/quan_conv_bn.py
@@ -3,11 +3,11 @@
 
 import tensorflow as tf
 from tensorflow.python.training import moving_averages
-
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
 from tensorlayer.layers.core import Layer
-from tensorlayer.layers.utils import (quantize_active_overflow, quantize_weight_overflow)
+from tensorlayer.layers.utils import (quantize_active_overflow,
+                                      quantize_weight_overflow)
 
 # from tensorlayer.layers.core import LayersConfig
 
diff --git a/tensorlayer/layers/convolution/separable_conv.py b/tensorlayer/layers/convolution/separable_conv.py
index b6ae62446..ff67672ba 100644
--- a/tensorlayer/layers/convolution/separable_conv.py
+++ b/tensorlayer/layers/convolution/separable_conv.py
@@ -2,8 +2,8 @@
 # -*- coding: utf-8 -*-
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
diff --git a/tensorlayer/layers/convolution/simplified_conv.py b/tensorlayer/layers/convolution/simplified_conv.py
index c00ff8fe7..8c8eebece 100644
--- a/tensorlayer/layers/convolution/simplified_conv.py
+++ b/tensorlayer/layers/convolution/simplified_conv.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
diff --git a/tensorlayer/layers/convolution/simplified_deconv.py b/tensorlayer/layers/convolution/simplified_deconv.py
index 847062859..569fe0810 100644
--- a/tensorlayer/layers/convolution/simplified_deconv.py
+++ b/tensorlayer/layers/convolution/simplified_deconv.py
@@ -2,8 +2,8 @@
 # -*- coding: utf-8 -*-
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
diff --git a/tensorlayer/layers/convolution/super_resolution.py b/tensorlayer/layers/convolution/super_resolution.py
index 35fee8722..a3f51e2a8 100644
--- a/tensorlayer/layers/convolution/super_resolution.py
+++ b/tensorlayer/layers/convolution/super_resolution.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias, private_method
diff --git a/tensorlayer/layers/convolution/ternary_conv.py b/tensorlayer/layers/convolution/ternary_conv.py
index 9a97c7bec..512350ba5 100644
--- a/tensorlayer/layers/convolution/ternary_conv.py
+++ b/tensorlayer/layers/convolution/ternary_conv.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
diff --git a/tensorlayer/layers/core.py b/tensorlayer/layers/core.py
index ce98f156c..8e13631b7 100644
--- a/tensorlayer/layers/core.py
+++ b/tensorlayer/layers/core.py
@@ -1,17 +1,15 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
 
+import inspect
 from abc import abstractmethod
 
 import tensorflow as tf
-
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.decorators import (deprecated_alias, private_method, protected_method)
-from tensorlayer.layers.utils import (get_variable_with_initializer, list_remove_repeat)
 from tensorlayer.files import utils
-
-import inspect
+from tensorlayer.layers.utils import (get_variable_with_initializer, list_remove_repeat)
 
 __all__ = ['Layer', 'ModelLayer', 'LayerList']
 
diff --git a/tensorlayer/layers/dense/base_dense.py b/tensorlayer/layers/dense/base_dense.py
index a5b800f04..bec9d3f6f 100644
--- a/tensorlayer/layers/dense/base_dense.py
+++ b/tensorlayer/layers/dense/base_dense.py
@@ -2,8 +2,8 @@
 # -*- coding: utf-8 -*-
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
diff --git a/tensorlayer/layers/dense/binary_dense.py b/tensorlayer/layers/dense/binary_dense.py
index 4067ac4c3..74d5208cd 100644
--- a/tensorlayer/layers/dense/binary_dense.py
+++ b/tensorlayer/layers/dense/binary_dense.py
@@ -3,7 +3,6 @@
 
 import tensorflow as tf
 import tensorlayer as tl
-
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
 from tensorlayer.layers.core import Layer
diff --git a/tensorlayer/layers/dense/dorefa_dense.py b/tensorlayer/layers/dense/dorefa_dense.py
index 80ae3365c..73069d478 100644
--- a/tensorlayer/layers/dense/dorefa_dense.py
+++ b/tensorlayer/layers/dense/dorefa_dense.py
@@ -3,7 +3,6 @@
 
 import tensorflow as tf
 import tensorlayer as tl
-
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
 from tensorlayer.layers.core import Layer
diff --git a/tensorlayer/layers/dense/dropconnect.py b/tensorlayer/layers/dense/dropconnect.py
index d68e6c762..371ed2e6b 100644
--- a/tensorlayer/layers/dense/dropconnect.py
+++ b/tensorlayer/layers/dense/dropconnect.py
@@ -1,13 +1,13 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
 
+import numbers
+
 import tensorflow as tf
 import tensorlayer as tl
-
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
 from tensorlayer.layers.core import Layer
-import numbers
 
 __all__ = [
     'DropconnectDense',
diff --git a/tensorlayer/layers/dense/quan_dense.py b/tensorlayer/layers/dense/quan_dense.py
index 5a2513259..8d5c594c7 100644
--- a/tensorlayer/layers/dense/quan_dense.py
+++ b/tensorlayer/layers/dense/quan_dense.py
@@ -3,11 +3,11 @@
 
 import tensorflow as tf
 import tensorlayer as tl
-
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
 from tensorlayer.layers.core import Layer
-from tensorlayer.layers.utils import (quantize_active_overflow, quantize_weight_overflow)
+from tensorlayer.layers.utils import (quantize_active_overflow,
+                                      quantize_weight_overflow)
 
 __all__ = [
     'QuanDense',
diff --git a/tensorlayer/layers/dense/quan_dense_bn.py b/tensorlayer/layers/dense/quan_dense_bn.py
index e647a7e6e..bcbd70950 100644
--- a/tensorlayer/layers/dense/quan_dense_bn.py
+++ b/tensorlayer/layers/dense/quan_dense_bn.py
@@ -4,11 +4,11 @@
 import tensorflow as tf
 # from tensorlayer.layers.core import LayersConfig
 from tensorflow.python.training import moving_averages
-
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
 from tensorlayer.layers.core import Layer
-from tensorlayer.layers.utils import (quantize_active_overflow, quantize_weight_overflow)
+from tensorlayer.layers.utils import (quantize_active_overflow,
+                                      quantize_weight_overflow)
 
 __all__ = [
     'QuanDenseLayerWithBN',
diff --git a/tensorlayer/layers/dense/ternary_dense.py b/tensorlayer/layers/dense/ternary_dense.py
index 27efb9090..28d84297e 100644
--- a/tensorlayer/layers/dense/ternary_dense.py
+++ b/tensorlayer/layers/dense/ternary_dense.py
@@ -3,7 +3,6 @@
 
 import tensorflow as tf
 import tensorlayer as tl
-
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
 from tensorlayer.layers.core import Layer
diff --git a/tensorlayer/layers/dropout.py b/tensorlayer/layers/dropout.py
index 3724d8b43..25fe80a36 100644
--- a/tensorlayer/layers/dropout.py
+++ b/tensorlayer/layers/dropout.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
 from tensorlayer.layers.core import Layer
diff --git a/tensorlayer/layers/embedding.py b/tensorlayer/layers/embedding.py
index 80c5cadfa..a82c1a93b 100644
--- a/tensorlayer/layers/embedding.py
+++ b/tensorlayer/layers/embedding.py
@@ -2,8 +2,8 @@
 # -*- coding: utf-8 -*-
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.layers.core import Layer
diff --git a/tensorlayer/layers/extend.py b/tensorlayer/layers/extend.py
index 42395a537..09d5508db 100644
--- a/tensorlayer/layers/extend.py
+++ b/tensorlayer/layers/extend.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
 from tensorlayer.layers.core import Layer
diff --git a/tensorlayer/layers/image_resampling.py b/tensorlayer/layers/image_resampling.py
index 3b2a2825a..4713200d3 100644
--- a/tensorlayer/layers/image_resampling.py
+++ b/tensorlayer/layers/image_resampling.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
 from tensorlayer.layers.core import Layer
diff --git a/tensorlayer/layers/inputs.py b/tensorlayer/layers/inputs.py
index 0330347fe..4f2544b06 100644
--- a/tensorlayer/layers/inputs.py
+++ b/tensorlayer/layers/inputs.py
@@ -2,8 +2,8 @@
 # -*- coding: utf-8 -*-
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.layers.core import Layer, LayerNode
diff --git a/tensorlayer/layers/lambda_layers.py b/tensorlayer/layers/lambda_layers.py
index 13bc3ecbe..9b82ad603 100644
--- a/tensorlayer/layers/lambda_layers.py
+++ b/tensorlayer/layers/lambda_layers.py
@@ -2,11 +2,10 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
-from tensorlayer.layers.core import Layer
 from tensorlayer.files import utils
+from tensorlayer.layers.core import Layer
 
 # from tensorlayer.layers.core import TF_GRAPHKEYS_VARIABLES
 
diff --git a/tensorlayer/layers/merge.py b/tensorlayer/layers/merge.py
index 346a65962..2509d35a6 100644
--- a/tensorlayer/layers/merge.py
+++ b/tensorlayer/layers/merge.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 from tensorlayer import logging
 from tensorlayer.layers.core import Layer
 
diff --git a/tensorlayer/layers/noise.py b/tensorlayer/layers/noise.py
index bd9c2df9c..c658f8e19 100644
--- a/tensorlayer/layers/noise.py
+++ b/tensorlayer/layers/noise.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
diff --git a/tensorlayer/layers/normalization.py b/tensorlayer/layers/normalization.py
index d8cec274c..0de0e8ed1 100644
--- a/tensorlayer/layers/normalization.py
+++ b/tensorlayer/layers/normalization.py
@@ -2,11 +2,10 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
+import tensorlayer as tl
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.training import moving_averages
-
-import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.layers.core import Layer
 
diff --git a/tensorlayer/layers/padding.py b/tensorlayer/layers/padding.py
index db1bbb304..edcb720a5 100644
--- a/tensorlayer/layers/padding.py
+++ b/tensorlayer/layers/padding.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
diff --git a/tensorlayer/layers/pooling.py b/tensorlayer/layers/pooling.py
index 2046de6c5..a22cea358 100644
--- a/tensorlayer/layers/pooling.py
+++ b/tensorlayer/layers/pooling.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
diff --git a/tensorlayer/layers/quantize.py b/tensorlayer/layers/quantize.py
index 3b5b19635..47ad2a088 100644
--- a/tensorlayer/layers/quantize.py
+++ b/tensorlayer/layers/quantize.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
 from tensorlayer.layers.core import Layer
diff --git a/tensorlayer/layers/recurrent.py b/tensorlayer/layers/recurrent.py
index 16b7208d0..2364c6a7d 100644
--- a/tensorlayer/layers/recurrent.py
+++ b/tensorlayer/layers/recurrent.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
diff --git a/tensorlayer/layers/scale.py b/tensorlayer/layers/scale.py
index ac1800529..6546d70af 100644
--- a/tensorlayer/layers/scale.py
+++ b/tensorlayer/layers/scale.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 from tensorlayer import logging
 from tensorlayer.initializers import constant
 from tensorlayer.layers.core import Layer
diff --git a/tensorlayer/layers/shape.py b/tensorlayer/layers/shape.py
index f8e7b47db..e308eb0c4 100644
--- a/tensorlayer/layers/shape.py
+++ b/tensorlayer/layers/shape.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
 from tensorlayer.layers.core import Layer
diff --git a/tensorlayer/layers/spatial_transformer.py b/tensorlayer/layers/spatial_transformer.py
index e456625a7..262108a68 100644
--- a/tensorlayer/layers/spatial_transformer.py
+++ b/tensorlayer/layers/spatial_transformer.py
@@ -2,11 +2,11 @@
 # -*- coding: utf-8 -*-
 
 import numpy as np
+from six.moves import xrange
+
 import tensorflow as tf
 import tensorlayer as tl
-from six.moves import xrange
 from tensorflow.python.ops import array_ops
-
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
 from tensorlayer.layers.core import Layer
diff --git a/tensorlayer/layers/stack.py b/tensorlayer/layers/stack.py
index c31327989..c35e3837f 100644
--- a/tensorlayer/layers/stack.py
+++ b/tensorlayer/layers/stack.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
-
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated_alias
 from tensorlayer.layers.core import Layer
diff --git a/tensorlayer/layers/utils.py b/tensorlayer/layers/utils.py
index 10cc1fc18..6d411589f 100644
--- a/tensorlayer/layers/utils.py
+++ b/tensorlayer/layers/utils.py
@@ -2,10 +2,10 @@
 # -*- coding: utf-8 -*-
 
 import numpy as np
+
 import tensorflow as tf
 import tensorlayer as tl
 from tensorflow.python.ops.rnn_cell import LSTMStateTuple
-
 from tensorlayer import logging
 from tensorlayer.decorators import deprecated, deprecated_alias
 
diff --git a/tensorlayer/logging/contrib/hyperdash.py b/tensorlayer/logging/contrib/hyperdash.py
index 122a8c7e5..6e19c8e9b 100644
--- a/tensorlayer/logging/contrib/hyperdash.py
+++ b/tensorlayer/logging/contrib/hyperdash.py
@@ -4,7 +4,6 @@
 from __future__ import absolute_import
 
 import hyperdash as hd
-
 import tensorlayer as tl
 
 __all__ = ["HyperDashHandler", "monitor", "Experiment", "IPythonMagicsWrapper"]
diff --git a/tensorlayer/models/core.py b/tensorlayer/models/core.py
index c811b9648..cbcff4bf3 100644
--- a/tensorlayer/models/core.py
+++ b/tensorlayer/models/core.py
@@ -3,9 +3,8 @@
 from queue import Queue
 
 import tensorflow as tf
-from tensorflow.python.framework import ops as tf_ops
-
 import tensorlayer as tl
+from tensorflow.python.framework import ops as tf_ops
 from tensorlayer import logging
 from tensorlayer.files import utils
 from tensorlayer.layers import Layer, ModelLayer
diff --git a/tensorlayer/models/mobilenetv1.py b/tensorlayer/models/mobilenetv1.py
index 8065eeef3..4908b3d89 100644
--- a/tensorlayer/models/mobilenetv1.py
+++ b/tensorlayer/models/mobilenetv1.py
@@ -5,7 +5,6 @@
 import os
 
 import tensorflow as tf
-
 from tensorlayer import logging
 from tensorlayer.files import (assign_weights, load_npz, maybe_download_and_extract)
 from tensorlayer.layers import (BatchNorm, Conv2d, DepthwiseConv2d, Flatten, GlobalMeanPool2d, Input, Reshape)
diff --git a/tensorlayer/models/squeezenetv1.py b/tensorlayer/models/squeezenetv1.py
index 6d6a70535..a2d7e4304 100644
--- a/tensorlayer/models/squeezenetv1.py
+++ b/tensorlayer/models/squeezenetv1.py
@@ -5,10 +5,9 @@
 import os
 
 import tensorflow as tf
-
 from tensorlayer import logging
 from tensorlayer.files import (assign_weights, load_npz, maybe_download_and_extract)
-from tensorlayer.layers import (Concat, Conv2d, Dropout, GlobalMeanPool2d, Input, MaxPool2d, Lambda)
+from tensorlayer.layers import (Concat, Conv2d, Dropout, GlobalMeanPool2d, Input, Lambda, MaxPool2d)
 from tensorlayer.models import Model
 
 __all__ = [
diff --git a/tensorlayer/models/vgg.py b/tensorlayer/models/vgg.py
index 391878c61..06648cb53 100644
--- a/tensorlayer/models/vgg.py
+++ b/tensorlayer/models/vgg.py
@@ -30,12 +30,12 @@
 import os
 
 import numpy as np
-import tensorflow as tf
 
+import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.files import assign_weights, maybe_download_and_extract
-from tensorlayer.layers import (BatchNorm, Conv2d, Dense, Flatten, Input, LayerList, MaxPool2d, Lambda)
+from tensorlayer.layers import (BatchNorm, Conv2d, Dense, Flatten, Input, Lambda, LayerList, MaxPool2d)
 from tensorlayer.models import Model
 
 __all__ = [
diff --git a/tensorlayer/nlp.py b/tensorlayer/nlp.py
index d96a7acf1..ed1ce975d 100755
--- a/tensorlayer/nlp.py
+++ b/tensorlayer/nlp.py
@@ -11,11 +11,11 @@
 from collections import Counter
 
 import numpy as np
-import tensorflow as tf
 from six.moves import urllib, xrange
-from tensorflow.python.platform import gfile
 
+import tensorflow as tf
 import tensorlayer as tl
+from tensorflow.python.platform import gfile
 from tensorlayer.lazy_imports import LazyImport
 
 nltk = LazyImport("nltk")
diff --git a/tensorlayer/rein.py b/tensorlayer/rein.py
index e5cbe6bd4..8ddce7316 100644
--- a/tensorlayer/rein.py
+++ b/tensorlayer/rein.py
@@ -2,9 +2,10 @@
 # -*- coding: utf-8 -*-
 
 import numpy as np
-import tensorflow as tf
 from six.moves import xrange
 
+import tensorflow as tf
+
 __all__ = [
     'discount_episode_rewards',
     'cross_entropy_reward_loss',
diff --git a/tensorlayer/utils.py b/tensorlayer/utils.py
index d6b8e6d78..35e054afb 100644
--- a/tensorlayer/utils.py
+++ b/tensorlayer/utils.py
@@ -11,9 +11,9 @@
 from sys import platform as _platform
 
 import numpy as np
-import tensorflow as tf
 from sklearn.metrics import accuracy_score, confusion_matrix, f1_score
 
+import tensorflow as tf
 import tensorlayer as tl
 
 __all__ = [

From c6e4ab3f0277ee50ae3f1eb0d50af0c72b76fa36 Mon Sep 17 00:00:00 2001
From: quantumiracle <1402434478@qq.com>
Date: Tue, 14 May 2019 12:24:37 +0100
Subject: [PATCH 2/4] rl examples clean code, add notation, add tf.function

---
 CHANGELOG.md                                  |   3 +
 .../tutorial_atari_pong.py                    |  33 +--
 ...ial_bipedalwalker_a3c_continuous_action.py | 208 ++++--------------
 .../tutorial_cartpole_ac.py                   |  76 +------
 .../tutorial_frozenlake_dqn.py                |  35 +--
 .../tutorial_frozenlake_q_table.py            |   7 +-
 6 files changed, 69 insertions(+), 293 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e932d4a6e..176b91013 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -77,6 +77,7 @@ To release a new version, please update the changelog as followed:
 - remove `tl.layers.initialize_global_variables(sess)` (PR #931)
 - change `tl.layers.core`, `tl.models.core` (PR #966)
    - change `weights` into `all_weights`, `trainable_weights`, `nontrainable_weights`
+- change reinforcement learning examples from TL1 to TL2 (PR #970)
 
 ### Dependencies Update
 - nltk>=3.3,<3.4 => nltk>=3.3,<3.5 (PR #892)
@@ -319,8 +320,10 @@ To release a new version, please update the changelog as followed:
   - AtrousDeConv2dLayer added (PR #662)
   - Fix bugs of using `tf.layers` in CNN (PR #686)
 - Optimizer:
+
   - AMSGrad Optimizer added based on `On the Convergence of Adam and Beyond (ICLR 2018)` (PR #636)
 - Setup:
+
   - Creation of installation flaggs `all`, `all_cpu`, and `all_gpu` (PR #660)
 - Test:
   - `test_utils_predict.py` added to reproduce and fix issue #288 (PR #566)
diff --git a/examples/reinforcement_learning/tutorial_atari_pong.py b/examples/reinforcement_learning/tutorial_atari_pong.py
index 0e8c62009..7e1b28822 100644
--- a/examples/reinforcement_learning/tutorial_atari_pong.py
+++ b/examples/reinforcement_learning/tutorial_atari_pong.py
@@ -34,11 +34,6 @@
 import tensorflow as tf
 import tensorlayer as tl
 
-## enable eager mode
-# tf.enable_eager_execution()
-
-
-# tf.logging.set_verbosity(tf.logging.DEBUG) # enable logging
 tl.logging.set_verbosity(tl.logging.DEBUG)
 
 # hyper-parameters
@@ -73,10 +68,7 @@ def prepro(I):
 episode_number = 0
 
 xs, ys, rs = [], [], []
-# observation for training and inference
-# t_states = tf.placeholder(tf.float32, shape=[None, D])
 # policy network
-
 def get_model(inputs_shape):
     ni = tl.layers.Input(inputs_shape)
     nn = tl.layers.Dense(n_units=H, act=tf.nn.relu, name='hidden')(ni)
@@ -85,22 +77,9 @@ def get_model(inputs_shape):
     return M
 model = get_model([None, D])
 train_weights = model.trainable_weights
-# probs = model(t_states, is_train=True).outputs
-# sampling_prob = tf.nn.softmax(probs)
-
-# t_actions = tf.placeholder(tf.int32, shape=[None])
-# t_discount_rewards = tf.placeholder(tf.float32, shape=[None])
-# loss = tl.rein.cross_entropy_reward_loss(probs, t_actions, t_discount_rewards)
-optimizer = tf.optimizers.RMSprop(lr=learning_rate, decay=decay_rate)#.minimize(loss)
-
-# with tf.Session() as sess:
-#     sess.run(tf.global_variables_initializer())
-    # if resume: TODO
-    #     load_params = tl.files.load_npz(name=model_file_name+'.npz')
-    #     tl.files.assign_params(sess, load_params, network)
-    # tl.files.load_and_assign_npz(sess, model_file_name + '.npz', network)
-    # network.print_params()
-    # network.print_layers()
+
+optimizer = tf.optimizers.RMSprop(lr=learning_rate, decay=decay_rate)
+
 model.train() # set model to train mode (in case you add dropout into the model)
 
 start_time = time.time()
@@ -114,14 +93,12 @@ def get_model(inputs_shape):
     x = x.reshape(1, D)
     prev_x = cur_x
 
-    # prob = sess.run(sampling_prob, feed_dict={t_states: x})
     _prob = model(x)
     prob = tf.nn.softmax(_prob)
 
     # action. 1: STOP  2: UP  3: DOWN
         # action = np.random.choice([1,2,3], p=prob.flatten())
         # action = tl.rein.choice_action_by_probs(prob.flatten(), [1, 2, 3])
-    # action = np.random.choice([1,2,3], p=prob.numpy())
     action = tl.rein.choice_action_by_probs(prob[0].numpy(), [1, 2, 3])
 
     observation, reward, done, _ = env.step(action)
@@ -145,10 +122,6 @@ def get_model(inputs_shape):
 
             xs, ys, rs = [], [], []
 
-            # sess.run(train_op, feed_dict={t_states: epx, t_actions: epy, t_discount_rewards: disR})
-                # t_actions = tf.placeholder(tf.int32, shape=[None])
-                # t_discount_rewards = tf.placeholder(tf.float32, shape=[None])
-                # loss = tl.rein.cross_entropy_reward_loss(probs, t_actions, t_discount_rewards)
             with tf.GradientTape() as tape:
                 _prob = model(epx)
                 _loss = tl.rein.cross_entropy_reward_loss(_prob, epy, disR)
diff --git a/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py b/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py
index 145024ba9..796ef9d74 100644
--- a/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py
+++ b/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py
@@ -52,17 +52,16 @@
 tfd = tfp.distributions
 
 
-# tf.logging.set_verbosity(tf.logging.DEBUG)
 tl.logging.set_verbosity(tl.logging.DEBUG)
 
-# np.random.seed(2)
-# tf.random.set_seed(2)  # reproducible
+np.random.seed(2)
+tf.random.set_seed(2)  # reproducible
 
 GAME = 'BipedalWalker-v2'  # BipedalWalkerHardcore-v2   BipedalWalker-v2  LunarLanderContinuous-v2
 OUTPUT_GRAPH = False
 LOG_DIR = './log'
-# N_WORKERS = multiprocessing.cpu_count()
-N_WORKERS = 2
+N_WORKERS = multiprocessing.cpu_count()
+# N_WORKERS = 2
 MAX_GLOBAL_EP = 8000  # 8000
 GLOBAL_NET_SCOPE = 'Global_Net'
 UPDATE_GLOBAL_ITER = 10
@@ -77,208 +76,91 @@
 
 N_S = env.observation_space.shape[0]
 N_A = env.action_space.shape[0]
-# N_A = env.action_space.n
 
-# A_BOUND = [env.action_space.low, env.action_space.high]
 A_BOUND = [env.action_space.low, env.action_space.high]
-# A_BOUND[0] = A_BOUND[0].reshape(1, N_A)
-# A_BOUND[1] = A_BOUND[1].reshape(1, N_A)
+A_BOUND[0] = A_BOUND[0].reshape(1, N_A)
+A_BOUND[1] = A_BOUND[1].reshape(1, N_A)
 # print(A_BOUND)
 
-# print(env.unwrapped.hull.position[0])
-# exit()
-
 
 class ACNet(object):
 
-    def __init__(self, scope, globalAC=None):  # no need for scope
+    def __init__(self, scope, globalAC=None):  
         self.scope = scope
         self.save_path = './model'
-        # if scope == GLOBAL_NET_SCOPE:
-        #     ## global network only do inference
-        #     with tf.variable_scope(scope):
-        #         self.s = tf.placeholder(tf.float32, [None, N_S], 'S')
-        #         self._build_net()
-
-
-        #         normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma)  # for continuous action space
-
-        #         with tf.name_scope('choose_a'):  # use local params to choose action
-        #             self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND)
-
-        # else:
-        #     ## worker network calculate gradient locally, update on global network
-        #     # with tf.variable_scope(scope):
-        #     #     self.s = tf.placeholder(tf.float32, [None, N_S], 'S')
-        #     #     self.a_his = tf.placeholder(tf.float32, [None, N_A], 'A')
-        #     #     self.v_target = tf.placeholder(tf.float32, [None, 1], 'Vtarget')
-
-
-
-        #         self._build_net()
-
-            #     td = tf.subtract(self.v_target, self.v, name='TD_error')
-            #     with tf.name_scope('c_loss'):
-            #         self.c_loss = tf.reduce_mean(tf.square(td))
-
-            #     with tf.name_scope('wrap_a_out'):
-            #         self.test = self.sigma[0]
-            #         self.mu, self.sigma = self.mu * A_BOUND[1], self.sigma + 1e-5
-
-            #     normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma)  # for continuous action space
-
-            #     with tf.name_scope('a_loss'):
-            #         log_prob = normal_dist.log_prob(self.a_his)
-            #         exp_v = log_prob * td
-            #         entropy = normal_dist.entropy()  # encourage exploration
-            #         self.exp_v = ENTROPY_BETA * entropy + exp_v
-            #         self.a_loss = tf.reduce_mean(-self.exp_v)
-
-            #     with tf.name_scope('choose_a'):  # use local params to choose action
-            #         self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND)
-
-            #     with tf.name_scope('local_grad'):
-            #         self.a_params = tl.layers.get_variables_with_name(scope + '/actor', True, False)
-            #         self.c_params = tl.layers.get_variables_with_name(scope + '/critic', True, False)
-            #         self.a_grads = tf.gradients(self.a_loss, self.a_params)
-            #         self.c_grads = tf.gradients(self.c_loss, self.c_params)
-
-            # with tf.name_scope('sync'):
-            #     with tf.name_scope('pull'):
-            #         self.pull_a_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.a_params, globalAC.a_params)]
-            #         self.pull_c_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.c_params, globalAC.c_params)]
-            #     with tf.name_scope('push'):
-            #         self.update_a_op = OPT_A.apply_gradients(zip(self.a_grads, globalAC.a_params))
-            #         self.update_c_op = OPT_C.apply_gradients(zip(self.c_grads, globalAC.c_params))
-
-    # def _build_net(self):
-        # w_init = tf.contrib.layers.xavier_initializer()
-        # w_init = tf.random_uniform_initializer(0, 0.01)
-        w_init = tf.keras.initializers.glorot_normal(seed=None)
-        # with tf.variable_scope('actor'):  # Policy network
-        #     nn = InputLayer(self.s, name='in')
-        #     nn = DenseLayer(nn, n_units=500, act=tf.nn.relu6, W_init=w_init, name='la')
-        #     nn = DenseLayer(nn, n_units=300, act=tf.nn.relu6, W_init=w_init, name='la2')
-        #     mu = DenseLayer(nn, n_units=N_A, act=tf.nn.tanh, W_init=w_init, name='mu')
-        #     sigma = DenseLayer(nn, n_units=N_A, act=tf.nn.softplus, W_init=w_init, name='sigma')
-        #     self.mu = mu.outputs
-        #     self.sigma = sigma.outputs
-        def get_actor(input_shape):
+
+        w_init = tf.keras.initializers.glorot_normal(seed=None)  # initializer, glorot=xavier
+        def get_actor(input_shape):  # policy network
             with tf.name_scope(self.scope):
                 ni = tl.layers.Input(input_shape, name='in')
-                nn = tl.layers.Dense(n_units=50, act=tf.nn.relu6, W_init=w_init, name='la')(ni)
-                nn = tl.layers.Dense(n_units=30, act=tf.nn.relu6, W_init=w_init, name='la2')(nn)
+                nn = tl.layers.Dense(n_units=500, act=tf.nn.relu6, W_init=w_init, name='la')(ni)
+                nn = tl.layers.Dense(n_units=300, act=tf.nn.relu6, W_init=w_init, name='la2')(nn)
                 mu = tl.layers.Dense(n_units=N_A, act=tf.nn.tanh, W_init=w_init, name='mu')(nn)
                 sigma = tl.layers.Dense(n_units=N_A, act=tf.nn.softplus, W_init=w_init, name='sigma')(nn)
             return tl.models.Model(inputs=ni, outputs=[mu, sigma], name=scope+'/Actor')
         self.actor = get_actor( [None, N_S])
-        self.actor.train() # set training mode, also for workers?
-
-        def get_critic(input_shape):
+        self.actor.train()  # train mode for Dropout, BatchNorm
+        def get_critic(input_shape): # we use Value-function here, but not Q-function.
             with tf.name_scope(self.scope):
                 ni = tl.layers.Input(input_shape, name='in')
-                nn = tl.layers.Dense(n_units=50, act=tf.nn.relu6, W_init=w_init, name='lc')(ni)
-                nn = tl.layers.Dense(n_units=30, act=tf.nn.relu6, W_init=w_init, name='lc2')(nn)
+                nn = tl.layers.Dense(n_units=500, act=tf.nn.relu6, W_init=w_init, name='lc')(ni)
+                nn = tl.layers.Dense(n_units=300, act=tf.nn.relu6, W_init=w_init, name='lc2')(nn)
                 v = tl.layers.Dense(n_units=1, W_init=w_init, name='v')(nn)
             return tl.models.Model(inputs=ni, outputs=v, name=scope+'/Critic')
         self.critic = get_critic( [None, N_S])
-        self.critic.train()
-
-        # self.a_params = tl.layers.get_variables_with_name(scope + '/Actor', True, False)
-        # self.c_params = tl.layers.get_variables_with_name(scope + '/Critic', True, False)
-
-        # with tf.variable_scope('critic'):  # we use Value-function here, but not Q-function.
-        #     nn = InputLayer(self.s, name='in')
-        #     nn = DenseLayer(nn, n_units=500, act=tf.nn.relu6, W_init=w_init, name='lc')
-        #     nn = DenseLayer(nn, n_units=200, act=tf.nn.relu6, W_init=w_init, name='lc2')
-        #     v = DenseLayer(nn, n_units=1, W_init=w_init, name='v')
-        #     self.v = v.outputs
-
-    # def update_global(self, feed_dict):  # run by a local
-    def update_global(self, buffer_s, buffer_a, buffer_v_target, globalAC):
-        # _, _, t = sess.run(
-        #     [self.update_a_op, self.update_c_op, self.test], feed_dict
-        # )  # local grads applies to global net
+        self.critic.train() # train mode for Dropout, BatchNorm
+    
+    @tf.function # convert numpy functions to tf.Operations in the TFgraph, return tensor
+    def update_global(self, buffer_s, buffer_a, buffer_v_target, globalAC):  # refer to the global Actor-Crtic network for updating it with samples  
+        ''' update the global critic '''
         with tf.GradientTape() as tape:
             self.v = self.critic(buffer_s)
-            self.v_target = buffer_v_target  # tensor float?
+            self.v_target = buffer_v_target
             td = tf.subtract(self.v_target, self.v, name='TD_error')
-            # with tf.name_scope('c_loss'):
             self.c_loss = tf.reduce_mean(tf.square(td))
         self.c_grads = tape.gradient(self.c_loss, self.critic.trainable_weights)
-        OPT_C.apply_gradients(zip(self.c_grads, globalAC.critic.trainable_weights))
-        del tape # Drop the reference to the tape
-
+        OPT_C.apply_gradients(zip(self.c_grads, globalAC.critic.trainable_weights))  # local grads applies to global net
+        # del tape # Drop the reference to the tape
 
+        ''' update the global actor '''
         with tf.GradientTape() as tape:
             self.mu, self.sigma = self.actor(buffer_s)
-            # print('mu: ', self.mu)
-            # print('sigma: ', self.sigma)
-            # with tf.name_scope('wrap_a_out'):
             self.test = self.sigma[0]
             self.mu, self.sigma = self.mu * A_BOUND[1], self.sigma + 1e-5
 
-
-            # normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma)  # for continuous action space
-            normal_dist = tfd.Normal(self.mu, self.sigma)
-            # with tf.name_scope('a_loss'):
-            self.a_his = buffer_a # tensor float?
+            normal_dist = tfd.Normal(self.mu, self.sigma)  # no tf.contrib for tf2.0
+            self.a_his = buffer_a # float32
             log_prob = normal_dist.log_prob(self.a_his)
-            exp_v = log_prob * td
+            exp_v = log_prob * td  # td is from the critic part, no gradients for it
             entropy = normal_dist.entropy()  # encourage exploration
             self.exp_v = ENTROPY_BETA * entropy + exp_v
             self.a_loss = tf.reduce_mean(-self.exp_v)
-        # with tf.name_scope('local_grad'):
-            # self.a_params = tl.layers.get_variables_with_name(scope + '/actor', True, False)
-            # self.c_params = tl.layers.get_variables_with_name(scope + '/critic', True, False)
-        #     self.a_grads = tf.gradients(self.a_loss, self.a_params)
-        #     self.c_grads = tf.gradients(self.c_loss, self.c_params)
         self.a_grads = tape.gradient(self.a_loss, self.actor.trainable_weights)
-        OPT_A.apply_gradients(zip(self.a_grads, globalAC.actor.trainable_weights))
-                
-
-        # return t
-        return self.test.numpy()
-
-    def pull_global(self, globalAC):  # run by a local
-        # sess.run([self.pull_a_params_op, self.pull_c_params_op])
-        # with tf.name_scope('sync'):
-        #     with tf.name_scope('pull'):
-        # self.pull_a_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.actor.trainable_weights, globalAC.a_params)]
-        # self.pull_c_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.critic.trainable_weights, globalAC.c_params)]
-        # print(self.scope, 'before: ', self.actor.trainable_weights[0][5][2])
-        # print(self.scope, 'global: ', globalAC.actor.trainable_weights[0][5][2])
+        OPT_A.apply_gradients(zip(self.a_grads, globalAC.actor.trainable_weights))  # local grads applies to global net
+        return self.test  # for test purpose
+    
+    @tf.function
+    def pull_global(self, globalAC):  # run by a local, pull weights from the global nets
         for l_p, g_p in zip(self.actor.trainable_weights, globalAC.actor.trainable_weights):
             l_p.assign(g_p)
         for l_p, g_p in zip(self.critic.trainable_weights, globalAC.critic.trainable_weights):
             l_p.assign(g_p)
-        # print(self.scope, 'after: ', self.actor.trainable_weights[0][5][2])
 
     def choose_action(self, s):  # run by a local
         s = s[np.newaxis, :]
         self.mu, self.sigma = self.actor(s)
-        # print('mu1: ', self.mu)
-        # print('sigma1: ', self.sigma)
+
         with tf.name_scope('wrap_a_out'):
-            # self.test = self.sigma[0]
             self.mu, self.sigma = self.mu * A_BOUND[1], self.sigma + 1e-5
-        # normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma)
-        normal_dist = tfd.Normal(self.mu, self.sigma)
+        normal_dist = tfd.Normal(self.mu, self.sigma)   # for continuous action space
         self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND)
-        # return sess.run(self.A, {self.s: s})[0]
         return self.A.numpy()[0]
 
-    def save_ckpt(self): 
+    def save_ckpt(self): # save trained weights
         tl.files.save_npz(self.actor.trainable_weights, name='model_actor.npz')
         tl.files.save_npz(self.critic.trainable_weights, name='model_critic.npz')
-        # tl.files.save_ckpt(
-        #     sess=sess, mode_name='model.ckpt', var_list=self.a_params + self.c_params, save_dir=self.scope,
-        #     printable=True
-        # )
-
-    def load_ckpt(self):
-        # tl.files.load_hdf5_to_trainable_weights(self.save_path+'/actor', self.actor)
-        # tl.files.load_hdf5_to_trainable_weights(self.save_path+'/critic', self.critic)
+
+    def load_ckpt(self): # load trained weights
         tl.files.load_and_assign_npz(name='model_actor.npz', network=self.actor)
         tl.files.load_and_assign_npz(name='model_critic.npz', network=self.critic)
 
@@ -304,11 +186,8 @@ def work(self, globalAC):
                 s = s.astype('float32') # double to float
                 a = self.AC.choose_action(s) 
                 s_, r, done, _info = self.env.step(a)
+                
                 s_ = s_.astype('float32') # double to float
-                # print('s:',s)
-                # print('a:', a)
-                # print('r:',r)
-
                 # set robot falls reward to -2 instead of -100
                 if r == -100: r = -2
 
@@ -322,7 +201,6 @@ def work(self, globalAC):
                     if done:
                         v_s_ = 0  # terminal
                     else:
-                        # v_s_ = sess.run(self.AC.v, {self.AC.s: s_[np.newaxis, :]})[0, 0]
                         v_s_ = self.AC.critic(s_[np.newaxis, :])[0,0] # reduce dim from 2 to 0
 
                     buffer_v_target = []
@@ -336,11 +214,8 @@ def work(self, globalAC):
                     buffer_s, buffer_a, buffer_v_target = (
                         np.vstack(buffer_s), np.vstack(buffer_a), np.vstack(buffer_v_target)
                     )
-                    # print(buffer_s, buffer_a, buffer_v_target)
-                    # feed_dict = {self.AC.s: buffer_s, self.AC.a_his: buffer_a, self.AC.v_target: buffer_v_target}
                     # update gradients on global network
-                    # self.AC.update_global(feed_dict)
-                    self.AC.update_global(buffer_s, buffer_a, buffer_v_target, globalAC)
+                    self.AC.update_global(buffer_s, buffer_a, buffer_v_target.astype('float32'), globalAC)
                     buffer_s, buffer_a, buffer_r = [], [], []
 
                     # update local network from global network
@@ -368,7 +243,6 @@ def work(self, globalAC):
 
 
 if __name__ == "__main__":
-    # sess = tf.Session()
     # ============================= TRAINING ===============================
     with tf.device("/cpu:0"):
         
@@ -383,7 +257,6 @@ def work(self, globalAC):
             workers.append(Worker(i_name, GLOBAL_AC))
 
     COORD = tf.train.Coordinator()
-    # sess.run(tf.global_variables_initializer())
 
     # start TF threading
     worker_threads = []
@@ -406,7 +279,6 @@ def work(self, globalAC):
     # ============================= EVALUATION =============================
     # env = gym.make(GAME)
     # GLOBAL_AC = ACNet(GLOBAL_NET_SCOPE)
-    # sess.run(tf.global_variables_initializer())
     GLOBAL_AC.load_ckpt()
     while True:
         s = env.reset()
diff --git a/examples/reinforcement_learning/tutorial_cartpole_ac.py b/examples/reinforcement_learning/tutorial_cartpole_ac.py
index ecdf68084..19faf05eb 100644
--- a/examples/reinforcement_learning/tutorial_cartpole_ac.py
+++ b/examples/reinforcement_learning/tutorial_cartpole_ac.py
@@ -39,11 +39,6 @@
 import tensorflow as tf
 import tensorlayer as tl
 
-## enable eager mode
-# tf.enable_eager_execution()
-
-
-# tf.logging.set_verbosity(tf.logging.DEBUG)
 tl.logging.set_verbosity(tl.logging.DEBUG)
 
 np.random.seed(2)
@@ -65,7 +60,6 @@
 
 N_F = env.observation_space.shape[0]
 N_A = env.action_space.n
-# env.action_space.sample() random sample
 
 print("observation dimension: %d" % N_F)  # 4
 print("observation high: %s" % env.observation_space.high)  # [ 2.4 , inf , 0.41887902 , inf]
@@ -76,16 +70,6 @@
 class Actor(object):
 
     def __init__(self, n_features, n_actions, lr=0.001):
-            # self.sess = sess
-            # self.s = tf.placeholder(tf.float32, [1, n_features], "state")
-            # self.a = tf.placeholder(tf.int32, [None], "act")
-            # self.td_error = tf.placeholder(tf.float32, [None], "td_error")  # TD_error
-
-            # with tf.variable_scope('Actor'):  # Policy network
-            #     n = InputLayer(self.s, name='in')
-            #     n = DenseLayer(n, n_units=30, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden')
-            #     # n = DenseLayer(n, n_units=10, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden2')
-            #     n = DenseLayer(n, n_units=n_actions, name='Pi')
 
         def get_model(inputs_shape):
             ni = tl.layers.Input(inputs_shape, name='state')
@@ -95,49 +79,26 @@ def get_model(inputs_shape):
             return tl.models.Model(inputs=ni, outputs=nn, name="Actor")
         self.model = get_model([None, n_features])
         self.model.train()
-            # self.acts_logits = n.outputs
-            # self.acts_prob = tf.nn.softmax(self.acts_logits)
-
-            # Hao Dong
-            # with tf.variable_scope('loss'):
-            #     self.exp_v = tl.rein.cross_entropy_reward_loss(
-            #         logits=self.acts_logits, actions=self.a, rewards=self.td_error, name='actor_weighted_loss'
-            #     )
-
-            # with tf.variable_scope('train'):
-            #     self.train_op = tf.train.AdamOptimizer(lr).minimize(self.exp_v)
         self.optimizer = tf.optimizers.Adam(lr)
-        # Morvan Zhou (the same)
-        # with tf.variable_scope('exp_v'):
-        #     # log_prob = tf.log(self.acts_prob[0, self.a[0]])
-        #     # self.exp_v = tf.reduce_mean(log_prob * self.td_error[0])  # advantage (TD_error) guided loss
-        #     self.exp_v = tl.rein.log_weight(probs=self.acts_prob[0, self.a[0]], trainable_weights=self.td_error)
-        #
-        # with tf.variable_scope('train'):
-        #     self.train_op = tf.train.AdamOptimizer(lr).minimize(-self.exp_v)  # minimize(-exp_v) = maximize(exp_v)
 
     def learn(self, s, a, td):
-            # _, exp_v = self.sess.run([self.train_op, self.exp_v], {self.s: [s], self.a: [a], self.td_error: td[0]})
         with tf.GradientTape() as tape:
-
-            # _logits = self.model([s]).outputs
             _logits = self.model(np.array([s]))
-            # _probs = tf.nn.softmax(_logits)
-            _exp_v = tl.rein.cross_entropy_reward_loss(logits=_logits, actions=[a], rewards=td[0])  # cross-entropy loss weighted by rewards
+            ## cross-entropy loss weighted by td-error (advantage), 
+            # the cross-entropy mearsures the difference of two probability distributions: the predicted logits and sampled action distribution,
+            # then weighted by the td-error: small difference of real and predict actions for large td-error (advantage); and vice versa. 
+            _exp_v = tl.rein.cross_entropy_reward_loss(logits=_logits, actions=[a], rewards=td[0])  
         grad = tape.gradient(_exp_v, self.model.trainable_weights)
         self.optimizer.apply_gradients(zip(grad, self.model.trainable_weights))
         return _exp_v
 
     def choose_action(self, s):
-            # probs = self.sess.run(self.acts_prob, {self.s: [s]})  # get probabilities of all actions
-        # _logits = self.model(np.array([s])).outputs
         _logits = self.model(np.array([s]))
         _probs = tf.nn.softmax(_logits).numpy()
-        return tl.rein.choice_action_by_probs(_probs.ravel())
+        return tl.rein.choice_action_by_probs(_probs.ravel()) # sample according to probability distribution
 
     def choose_action_greedy(self, s):
-            # probs = self.sess.run(self.acts_prob, {self.s: [s]})  # get probabilities of all actions
-        _logits = self.model(np.array([s]))
+        _logits = self.model(np.array([s]))  # logits: probability distribution of actions
         _probs = tf.nn.softmax(_logits).numpy()
         return np.argmax(_probs.ravel())
 
@@ -145,17 +106,7 @@ def choose_action_greedy(self, s):
 class Critic(object):
 
     def __init__(self, n_features, lr=0.01):
-            # self.sess = sess
-            # self.s = tf.placeholder(tf.float32, [1, n_features], "state")
-            # self.v_ = tf.placeholder(tf.float32, [1, 1], "v_next")
-            # self.r = tf.placeholder(tf.float32, None, 'r')
-
-            # with tf.variable_scope('Critic'):  # we use Value-function here, not Action-Value-function
-            #     n = InputLayer(self.s, name='in')
-            #     n = DenseLayer(n, n_units=30, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden')
-            #     # n = DenseLayer(n, n_units=5, act=tf.nn.relu, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden2')
-            #     n = DenseLayer(n, n_units=1, act=None, name='V')
-            #     self.v = n.outputs
+
         def get_model(inputs_shape):
             ni = tl.layers.Input(inputs_shape, name='state')
             nn = tl.layers.Dense(n_units=30, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden')(ni)
@@ -164,21 +115,14 @@ def get_model(inputs_shape):
             return tl.models.Model(inputs=ni, outputs=nn, name="Critic")
         self.model = get_model([1, n_features])
         self.model.train()
-            # with tf.variable_scope('squared_TD_error'):
-            #     # TD_error = r + lambd * V(newS) - V(S)
-            #     self.td_error = self.r + LAMBDA * self.v_ - self.v
-            #     self.loss = tf.square(self.td_error)
-            # with tf.variable_scope('train'):
-                # self.train_op = tf.train.AdamOptimizer(lr).minimize(self.loss)
+
         self.optimizer = tf.optimizers.Adam(lr)
 
     def learn(self, s, r, s_):
-            # v_ = self.sess.run(self.v, {self.s: [s_]})
         v_ = self.model(np.array([s_]))
-            # td_error, _ = self.sess.run([self.td_error, self.train_op], {self.s: [s], self.v_: v_, self.r: r})
         with tf.GradientTape() as tape:
             v = self.model(np.array([s]))
-            # TD_error = r + lambd * V(newS) - V(S)
+            ## TD_error = r + lambd * V(newS) - V(S)
             td_error = r + LAMBDA * v_ - v
             loss = tf.square(td_error)
         grad = tape.gradient(loss, self.model.trainable_weights)
@@ -215,7 +159,7 @@ def learn(self, s, r, s_):
         all_r.append(r)
 
         td_error = critic.learn(s, r, s_new)  # learn Value-function : gradient = grad[r + lambda * V(s_new) - V(s)]
-        actor.learn(s, a, td_error)  # learn Policy         : true_gradient = grad[logPi(s, a) * td_error]
+        actor.learn(s, a, td_error)  # learn Policy : true_gradient = grad[logPi(s, a) * td_error]
 
         s = s_new
         t += 1
diff --git a/examples/reinforcement_learning/tutorial_frozenlake_dqn.py b/examples/reinforcement_learning/tutorial_frozenlake_dqn.py
index 8e96a279e..935e3e04b 100644
--- a/examples/reinforcement_learning/tutorial_frozenlake_dqn.py
+++ b/examples/reinforcement_learning/tutorial_frozenlake_dqn.py
@@ -24,6 +24,10 @@
 The episode ends when you reach the goal or fall in a hole. You receive a reward
 of 1 if you reach the goal, and zero otherwise.
 
+
+tensorflow==2.0.0a0
+tensorlayer==2.0.0
+
 """
 import time
 
@@ -33,11 +37,6 @@
 import tensorflow as tf
 import tensorlayer as tl
 
-## enable eager mode
-# tf.enable_eager_execution()
-
-
-# tf.logging.set_verbosity(tf.logging.DEBUG)
 tl.logging.set_verbosity(tl.logging.DEBUG)
 
 env = gym.make('FrozenLake-v0')
@@ -50,13 +49,8 @@ def to_one_hot(i, n_classes=None):
 render = False  # display the game environment
 running_reward = None
 
-    # tf.reset_default_graph()
 ## Define Q-network q(a,s) that ouput the rewards of 4 actions by given state, i.e. Action-Value Function.
-# 4x4 grid can be represented by one-hot vector with 16 integers.
-    # inputs = tf.placeholder(shape=[1, 16], dtype=tf.float32)
-    # net = InputLayer(inputs, name='observation')
-    # net = DenseLayer(net, 4, act=None, W_init=tf.random_uniform_initializer(0, 0.01), b_init=None, name='q_a_s')
-    # y = net.outputs  # action-value / rewards of 4 actions
+# encoding for state: 4x4 grid can be represented by one-hot vector with 16 integers.
 def get_model(inputs_shape):
     ni = tl.layers.Input(inputs_shape, name='observation')
     nn = tl.layers.Dense(4, act=None, W_init=tf.random_uniform_initializer(0, 0.01), b_init=None, name='q_a_s')(ni)
@@ -65,13 +59,6 @@ def get_model(inputs_shape):
 qnetwork.train()
 train_weights = qnetwork.trainable_weights
 
-# chose action greedily with reward. in Q-Learning, policy is greedy, so we use "max" to select the next action.
-    # predict = tf.argmax(y, 1)
-
-## Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values.
-    # nextQ = tf.placeholder(shape=[1, 4], dtype=tf.float32)
-    # loss = tl.cost.mean_squared_error(nextQ, y, is_mean=False)  # tf.reduce_sum(tf.square(nextQ - y))
-    # train_op = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(loss)
 optimizer = tf.optimizers.SGD(learning_rate=0.1)
 
 ## Set learning parameters
@@ -79,8 +66,6 @@ def get_model(inputs_shape):
 e = 0.1  # e-Greedy Exploration, the larger the more random
 num_episodes = 10000
 
-# with tf.Session() as sess:
-    # tl.layers.initialize_global_variables(sess)
 for i in range(num_episodes):
     ## Reset environment and get first new observation
     episode_time = time.time()
@@ -89,7 +74,6 @@ def get_model(inputs_shape):
     for j in range(99):  # step index, maximum step is 99
         if render: env.render()
         ## Choose an action by greedily (with e chance of random action) from the Q-network
-            # a, allQ = sess.run([predict, y], feed_dict={inputs: [to_one_hot(s, 16)]})
         allQ = qnetwork(np.asarray([to_one_hot(s, 16)], dtype=np.float32)).numpy()
         a = np.argmax(allQ, 1)
 
@@ -99,7 +83,6 @@ def get_model(inputs_shape):
         ## Get new state and reward from environment
         s1, r, d, _ = env.step(a[0])
         ## Obtain the Q' values by feeding the new state through our network
-            # Q1 = sess.run(y, feed_dict={inputs: [to_one_hot(s1, 16)]})
         Q1 = qnetwork(np.asarray([to_one_hot(s1, 16)], dtype=np.float32)).numpy()
 
         ## Obtain maxQ' and set our target value for chosen action.
@@ -110,9 +93,7 @@ def get_model(inputs_shape):
         # it is not real target Q value, it is just an estimation,
         # but check the Q-Learning update formula:
         #    Q'(s,a) <- Q(s,a) + alpha(r + lambd * maxQ(s',a') - Q(s, a))
-        # minimizing |r + lambd * maxQ(s',a') - Q(s, a)|^2 equal to force
-        #   Q'(s,a) ≈ Q(s,a)
-            # _ = sess.run(train_op, {inputs: [to_one_hot(s, 16)], nextQ: targetQ})
+        # minimizing |r + lambd * maxQ(s',a') - Q(s, a)|^2 equals to force Q'(s,a) ≈ Q(s,a)
         with tf.GradientTape() as tape:
             _qvalues = qnetwork(np.asarray([to_one_hot(s, 16)], dtype=np.float32))
             _loss = tl.cost.mean_squared_error(targetQ, _qvalues, is_mean=False)
@@ -128,5 +109,5 @@ def get_model(inputs_shape):
 
     ## Note that, the rewards here with random action
     running_reward = rAll if running_reward is None else running_reward * 0.99 + rAll * 0.01
-    print("Episode [%d/%d] sum reward: %f running reward: %f took: %.5fs %s" % \
-        (i, num_episodes, rAll, running_reward, time.time() - episode_time, '' if rAll == 0 else ' !!!!!!!!'))
+    print("Episode [%d/%d] sum reward: %f running reward: %f took: %.5fs " % \
+        (i, num_episodes, rAll, running_reward, time.time() - episode_time))
diff --git a/examples/reinforcement_learning/tutorial_frozenlake_q_table.py b/examples/reinforcement_learning/tutorial_frozenlake_q_table.py
index a5b44059a..a8decb273 100644
--- a/examples/reinforcement_learning/tutorial_frozenlake_q_table.py
+++ b/examples/reinforcement_learning/tutorial_frozenlake_q_table.py
@@ -11,6 +11,9 @@
 EN: https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-0-q-learning-with-tables-and-neural-networks-d195264329d0#.5m3361vlw
 CN: https://zhuanlan.zhihu.com/p/25710327
 
+tensorflow==2.0.0a0
+tensorlayer==2.0.0
+
 """
 
 import time
@@ -52,7 +55,7 @@
             break
     rList.append(rAll)
     running_reward = r if running_reward is None else running_reward * 0.99 + r * 0.01
-    print("Episode [%d/%d] sum reward: %f running reward: %f took: %.5fs %s" % \
-        (i, num_episodes, rAll, running_reward, time.time() - episode_time, '' if rAll == 0 else ' !!!!!!!!'))
+    print("Episode [%d/%d] sum reward: %f running reward: %f took: %.5fs " % \
+        (i, num_episodes, rAll, running_reward, time.time() - episode_time))
 
 print("Final Q-Table Values:/n %s" % Q)

From e0e903b178ef0c5029b17d64fe3d8c3506e6052a Mon Sep 17 00:00:00 2001
From: quantumiracle <1402434478@qq.com>
Date: Tue, 14 May 2019 12:33:39 +0100
Subject: [PATCH 3/4] change log

---
 CHANGELOG.md | 457 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 457 insertions(+)
 create mode 100644 CHANGELOG.md

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 000000000..54f3d153f
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,457 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/)
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+<!--
+
+============== Guiding Principles ==============
+
+* Changelogs are for humans, not machines.
+* There should be an entry for every single version.
+* The same types of changes should be grouped.
+* Versions and sections should be linkable.
+* The latest version comes first.
+* The release date of each version is displayed.
+* Mention whether you follow Semantic Versioning.
+
+============== Types of changes (keep the order) ==============
+
+* `Added` for new features.
+* `Changed` for changes in existing functionality.
+* `Deprecated` for soon-to-be removed features.
+* `Removed` for now removed features.
+* `Fixed` for any bug fixes.
+* `Security` in case of vulnerabilities.
+* `Dependencies Update` in case of vulnerabilities.
+* `Contributors` to thank the contributors that worked on this PR.
+
+============== How To Update The Changelog for a New Release ==============
+
+** Always Keep The Unreleased On Top **
+
+To release a new version, please update the changelog as followed:
+1. Rename the `Unreleased` Section to the Section Number
+2. Recreate an `Unreleased` Section on top
+3. Update the links at the very bottom
+
+======================= START: TEMPLATE TO KEEP IN CASE OF NEED ===================
+
+** DO NOT MODIFY THIS SECTION ! **
+
+## [Unreleased]
+
+### Added
+
+### Changed
+
+### Dependencies Update
+
+### Deprecated
+
+### Fixed
+
+### Removed
+
+### Security
+
+### Contributors
+
+** DO NOT MODIFY THIS SECTION ! **
+
+======================= END: TEMPLATE TO KEEP IN CASE OF NEED ===================
+
+-->
+
+<!-- YOU CAN EDIT FROM HERE -->
+
+## [Unreleased]
+
+### Added
+ - Layer
+    - `InstanceNorm`, `InstanceNorm1d`, `InstanceNorm2d`, `InstanceNorm3d` (PR #963)
+
+### Changed
+- remove `tl.layers.initialize_global_variables(sess)` (PR #931)
+- change `tl.layers.core`, `tl.models.core` (PR #966)
+- change `weights` into `all_weights`, `trainable_weights`, `nontrainable_weights`
+- change reinforcement learning examples from TL1 to TL2 (PR #970)
+
+### Dependencies Update
+- nltk>=3.3,<3.4 => nltk>=3.3,<3.5 (PR #892)
+- pytest>=3.6,<3.11 => pytest>=3.6,<4.1 (PR #889)
+- yapf>=0.22,<0.25 => yapf==0.25.0 (PR #896)
+- imageio==2.5.0 progressbar2==3.39.3  scikit-learn==0.21.0 scikit-image==0.15.0 scipy==1.2.1 wrapt==1.11.1 pymongo==3.8.0 sphinx==2.0.1 wrapt==1.11.1 opencv-python==4.1.0.25 requests==2.21.0 tqdm==4.31.1	lxml==4.3.3 pycodestyle==2.5.0 sphinx==2.0.1 yapf==0.27.0(PR #967)
+
+### Deprecated
+
+### Fixed
+- fix docs of models @zsdonghao #957
+- In `BatchNorm`, keep dimensions of mean and variance to suit `channels first` (PR #963)
+
+
+### Removed
+
+### Security
+
+### Contributors
+- @zsdonghao: #931
+- @yd-yin: #963
+
+
+## [2.0.0-alpha] - 2019-05-04
+
+### Changed
+* update for TensorLayer 2.0.0 alpha version (PR #952)
+* support TensorFlow 2.0.0-alpha
+* support both static and dynamic model building
+
+### Dependencies Update
+- tensorflow>=1.6,<1.13 => tensorflow>=2.0.0-alpha (PR #952)
+- h5py>=2.9 (PR #952)
+- cloudpickle>=0.8.1 (PR #952)
+- remove matplotlib
+
+### Contributors
+- @zsdonghao
+- @JingqingZ
+- @ChrisWu1997
+- @warshallrho
+
+## [1.11.1] - 2018-11-15
+
+### Changed
+* guide for pose estimation - flipping (PR #884)
+* cv2 transform support 2 modes (PR #885)
+
+### Dependencies Update
+- pytest>=3.6,<3.9 => pytest>=3.6,<3.10 (PR #874)
+- requests>=2.19,<2.20 => requests>=2.19,<2.21 (PR #874)
+- tqdm>=4.23,<4.28 => tqdm>=4.23,<4.29 (PR #878)
+- pytest>=3.6,<3.10 => pytest>=3.6,<3.11 (PR #886)
+- pytest-xdist>=1.22,<1.24 => pytest-xdist>=1.22,<1.25 (PR #883)
+- tensorflow>=1.6,<1.12 => tensorflow>=1.6,<1.13 (PR #886)
+
+### Contributors
+- @zsdonghao: #884 #885
+
+## [1.11.0] - 2018-10-18
+
+### Added
+- Layer:
+  - Release `GroupNormLayer` (PR #850)
+- Image affine transformation APIs
+  - `affine_rotation_matrix` (PR #857)
+  - `affine_horizontal_flip_matrix` (PR #857)
+  - `affine_vertical_flip_matrix` (PR #857)
+  - `affine_shift_matrix` (PR #857)
+  - `affine_shear_matrix` (PR #857)
+  - `affine_zoom_matrix` (PR #857)
+  - `affine_transform_cv2` (PR #857)
+  - `affine_transform_keypoints` (PR #857)
+- Affine transformation tutorial
+  - `examples/data_process/tutorial_fast_affine_transform.py` (PR #857)
+
+### Changed
+- BatchNormLayer: support `data_format`
+
+### Dependencies Update
+- matplotlib>=2.2,<2.3 => matplotlib>=2.2,<3.1 (PR #845)
+- pydocstyle>=2.1,<2.2 => pydocstyle>=2.1,<3.1 (PR #866)
+- scikit-learn>=0.19,<0.20 => scikit-learn>=0.19,<0.21 (PR #851)
+- sphinx>=1.7,<1.8 => sphinx>=1.7,<1.9 (PR #842)
+- tensorflow>=1.6,<1.11 => tensorflow>=1.6,<1.12 (PR #853)
+- tqdm>=4.23,<4.26 => tqdm>=4.23,<4.28 (PR #862 & #868)
+- yapf>=0.22,<0.24 => yapf>=0.22,<0.25 (PR #829)
+
+### Fixed
+- Correct offset calculation in `tl.prepro.transform_matrix_offset_center` (PR #855)
+
+### Contributors
+- @2wins: #850 #855
+- @DEKHTIARJonathan: #853
+- @zsdonghao: #857
+- @luomai: #857
+
+## [1.10.1] - 2018-09-07
+
+### Added
+- unittest `tests\test_timeout.py` has been added to ensure the network creation process does not freeze.
+
+### Changed
+ - remove 'tensorboard' param, replaced by 'tensorboard_dir' in `tensorlayer/utils.py` with customizable tensorboard directory (PR #819)
+
+### Removed
+- TL Graph API removed. Memory Leaks Issues with this API, will be fixed and integrated in TL 2.0 (PR #818)
+
+### Fixed
+- Issue #817 fixed: TL 1.10.0 - Memory Leaks and very slow network creation.
+
+### Dependencies Update
+- autopep8>=1.3,<1.4 => autopep8>=1.3,<1.5 (PR #815)
+- imageio>=2.3,<2.4 => imageio>=2.3,<2.5 (PR #823)
+- pytest>=3.6,<3.8 => pytest>=3.6,<3.9 (PR #823)
+- pytest-cov>=2.5,<2.6 => pytest-cov>=2.5,<2.7 (PR #820)
+
+### Contributors
+- @DEKHTIARJonathan: #815 #818 #820 #823
+- @ndiy: #819
+- @zsdonghao: #818
+
+
+## [1.10.0] - 2018-09-02
+
+### Added
+- API:
+  - Add `tl.model.vgg19` (PR #698)
+  - Add `tl.logging.contrib.hyperdash` (PR #739)
+  - Add `tl.distributed.trainer` (PR #700)
+  - Add `prefetch_buffer_size` to the `tl.distributed.trainer` (PR #766)
+  - Add `tl.db.TensorHub` (PR ＃751)
+  - Add `tl.files.save_graph` (PR ＃751)
+  - Add `tl.files.load_graph_` (PR ＃751)
+  - Add `tl.files.save_graph_and_params` (PR ＃751)
+  - Add `tl.files.load_graph_and_params` (PR ＃751)
+  - Add `tl.prepro.keypoint_random_xxx` (PR #787)
+- Documentation:
+  - Add binary, ternary and dorefa links (PR #711)
+  - Update input scale of VGG16 and VGG19 to 0~1 (PR #736)
+  - Update database (PR ＃751)
+- Layer:
+  - Release SwitchNormLayer (PR #737)
+  - Release QuanConv2d, QuanConv2dWithBN, QuanDenseLayer, QuanDenseLayerWithBN (PR#735)
+  - Update Core Layer to support graph (PR ＃751)
+  - All Pooling layers support `data_format` (PR #809)
+- Setup:
+  - Creation of installation flaggs `all_dev`, `all_cpu_dev`, and `all_gpu_dev` (PR #739)
+- Examples:
+  - change folder struction (PR #802)
+  - `tutorial_models_vgg19` has been introduced to show how to use `tl.model.vgg19` (PR #698).
+  - fix bug of `tutorial_bipedalwalker_a3c_continuous_action.py` (PR #734, Issue #732)
+  - `tutorial_models_vgg16` and `tutorial_models_vgg19` has been changed the input scale from [0,255] to [0,1](PR #710)
+  - `tutorial_mnist_distributed_trainer.py` and `tutorial_cifar10_distributed_trainer.py` are added to explain the uses of Distributed Trainer (PR #700)
+  - add `tutorial_quanconv_cifar10.py` and `tutorial_quanconv_mnist.py` (PR #735)
+  - add `tutorial_work_with_onnx.py`(PR #775)
+- Applications:
+  - [Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization](https://arxiv.org/abs/1703.06868) (PR #799)
+
+### Changed
+  - function minibatches changed to avoid wasting samples.(PR #762)
+  - all the input scale in both vgg16 and vgg19 has been changed the input scale from [0,255] to [0,1](PR #710)
+  - Dockerfiles merged and refactored into one file (PR #747)
+  - LazyImports move to the most **top level** imports as possible (PR #739)
+  - some new test functions have been added in `test_layers_convolution.py`, `test_layers_normalization.py`, `test_layers_core.py` (PR #735)
+  - documentation now uses mock imports reducing the number of dependencies to compile the documentation (PR #785)
+  - fixed and enforced pydocstyle D210, D200, D301, D207, D403, D204, D412, D402, D300, D208 (PR #784)
+
+### Deprecated
+  - `tl.logging.warn` has been deprecated in favor of `tl.logging.warning` (PR #739)
+
+### Removed
+  - `conv_layers()`  has been removed in both vgg16 and vgg19(PR #710)
+  - graph API (PR #818)
+
+### Fixed
+- import error caused by matplotlib on OSX (PR #705)
+- missing import in tl.prepro (PR #712)
+- Dockerfiles import error fixed - issue #733 (PR #747)
+- Fix a typo in `absolute_difference_error` in file: `tensorlayer/cost.py` - Issue #753 (PR #759)
+- Fix the bug of scaling the learning rate of trainer (PR #776)
+- log error instead of info when npz file not found. (PR #812)
+
+### Dependencies Update
+- numpy>=1.14,<1.15 => numpy>=1.14,<1.16 (PR #754)
+- pymongo>=3.6,<3.7 => pymongo>=3.6,<3.8 (PR #750)
+- pytest>=3.6,<3.7 => tqdm>=3.6,<3.8 (PR #798)
+- pytest-xdist>=1.22,<1.23 => pytest-xdist>=1.22,<1.24 (PR #805 and #806)
+- tensorflow>=1.8,<1.9 => tensorflow>=1.6,<1.11 (PR #739 and PR #798)
+- tqdm>=4.23,<4.25 => tqdm>=4.23,<4.26 (PR #798)
+- yapf>=0.21,<0.22 => yapf>=0.22,<0.24 (PR #798 #808)
+
+### Contributors
+- @DEKHTIARJonathan: #739 #747 #750 #754
+- @lgarithm: #705 #700
+- @OwenLiuzZ: #698 #710 #775 #776
+- @zsdonghao: #711 #712 #734 #736 #737 #700 #751 #809 #818
+- @luomai: #700 #751 #766 #802
+- @XJTUWYD: #735
+- @mutewall: #735
+- @thangvubk: #759
+- @JunbinWang: #796
+- @boldjoel: #787
+
+## [1.9.1] - 2018-07-30
+
+### Fixed
+- Issue with tensorflow 1.10.0 fixed
+
+## [1.9.0] - 2018-06-16
+
+### Added
+- API:
+  - `tl.alphas` and `tl.alphas_like` added following the tf.ones/zeros and tf.zeros_like/ones_like (PR #580)
+  - `tl.lazy_imports.LazyImport` to import heavy libraries only when necessary (PR #667)
+  - `tl.act.leaky_relu6` and `tl.layers.PRelu6Layer` have been deprecated (PR #686)
+  - `tl.act.leaky_twice_relu6` and `tl.layers.PTRelu6Layer` have been deprecated (PR #686)
+- CI Tool:
+  - [Stale Probot](https://github.com/probot/stale) added to clean stale issues (PR #573)
+  - [Changelog Probot](https://github.com/mikz/probot-changelog) Configuration added (PR #637)
+  - Travis Builds now handling a matrix of TF Version from TF==1.6.0 to TF==1.8.0 (PR #644)
+  - CircleCI added to build and upload Docker Containers for each PR merged and tag release (PR #648)
+- Decorator:
+  - `tl.decorators` API created including `deprecated_alias` and `private_method` (PR #660)
+  - `tl.decorators` API enriched with `protected_method` (PR #675)
+  - `tl.decorators` API enriched with `deprecated` directly raising warning and modifying documentation (PR #691)
+- Docker:
+  - Containers for each release and for each PR merged on master built (PR #648)
+  - Containers built in the following configurations (PR #648):
+    - py2 + cpu
+    - py2 + gpu
+    - py3 + cpu
+    - py3 + gpu
+- Documentation:
+  - Clean README.md (PR #677)
+  - Release semantic version added on index page (PR #633)
+  - Optimizers page added (PR #636)
+  - `AMSGrad` added on Optimizers page added (PR #636)
+- Layer:
+  - ElementwiseLambdaLayer added to use custom function to connect multiple layer inputs (PR #579)
+  - AtrousDeConv2dLayer added (PR #662)
+  - Fix bugs of using `tf.layers` in CNN (PR #686)
+- Optimizer:
+
+  - AMSGrad Optimizer added based on `On the Convergence of Adam and Beyond (ICLR 2018)` (PR #636)
+- Setup:
+
+  - Creation of installation flaggs `all`, `all_cpu`, and `all_gpu` (PR #660)
+- Test:
+  - `test_utils_predict.py` added to reproduce and fix issue #288 (PR #566)
+  - `Layer_DeformableConvolution_Test` added to reproduce issue #572 with deformable convolution (PR #573)
+  - `Array_Op_Alphas_Test` and `Array_Op_Alphas_Like_Test` added to test `tensorlayer/array_ops.py` file (PR #580)
+  - `test_optimizer_amsgrad.py` added to test `AMSGrad` optimizer (PR #636)
+  - `test_logging.py` added to insure robustness of the logging API (PR #645)
+  - `test_decorators.py` added (PR #660)
+  - `test_activations.py` added (PR #686)
+- Tutorials:
+  - `tutorial_tfslim` has been introduced to show how to use `SlimNetsLayer` (PR #560).
+  - add the following to all tutorials (PR #697):
+    ```python
+    tf.logging.set_verbosity(tf.logging.DEBUG)
+    tl.logging.set_verbosity(tl.logging.DEBUG)
+    ```
+
+### Changed
+- Tensorflow CPU & GPU dependencies moved to separated requirement files in order to allow PyUP.io to parse them (PR #573)
+- The document of LambdaLayer for linking it with ElementwiseLambdaLayer (PR #587)
+- RTD links point to stable documentation instead of latest used for development (PR #633)
+- TF Version older than 1.6.0 are officially unsupported and raises an exception (PR #644)
+- README.md Badges Updated with Support Python and Tensorflow Versions (PR #644)
+- TL logging API has been consistent with TF logging API and thread-safe (PR #645)
+- Relative Imports changed for absolute imports (PR #657)
+- `tl.files` refactored into a directory with numerous files (PR #657)
+- `tl.files.voc_dataset` fixed because of original Pascal VOC website was down (PR #657)
+- extra requirements hidden inside the library added in the project requirements (PR #657)
+- requirements files refactored in `requirements/` directory (PR #657)
+- README.md and other markdown files have been refactored and cleaned. (PR #639)
+- Ternary Convolution Layer added in unittest (PR #658)
+- Convolution Layers unittests have been cleaned & refactored (PR #658)
+- All the tests are now using a DEBUG level verbosity when run individualy (PR #660)
+- `tf.identity` as activation is **ignored**, thus reducing the size of the graph by removing useless operation (PR #667)
+- argument dictionaries are now checked and saved within the `Layer` Base Class (PR #667)
+- `Layer` Base Class now presenting methods to update faultlessly `all_layers`, `all_params`, and `all_drop` (PR #675)
+- Input Layers have been removed from `tl.layers.core` and added to `tl.layers.inputs` (PR #675)
+- Input Layers are now considered as true layers in the graph (they represent a placeholder), unittests have been updated (PR #675)
+- Layer API is simplified, with automatic feeding `prev_layer` into `self.inputs` (PR #675)
+- Complete Documentation Refactoring and Reorganization (namely Layer APIs) (PR #691)
+
+### Deprecated
+- `tl.layers.TimeDistributedLayer` argurment `args` is deprecated in favor of `layer_args` (PR #667)
+- `tl.act.leaky_relu` have been deprecated in favor of `tf.nn.leaky_relu` (PR #686)
+
+### Removed
+- `assert()` calls remove and replaced by `raise AssertionError()` (PR #667)
+- `tl.identity` is removed, not used anymore and deprecated for a long time (PR #667)
+- All Code specific to `TF.__version__ < "1.6"` have been removed (PR #675)
+
+### Fixed
+- Issue #498 - Deprecation Warning Fix in `tl.layers.RNNLayer` with `inspect` (PR #574)
+- Issue #498 - Deprecation Warning Fix in `tl.files` with truth value of an empty array is ambiguous (PR #575)
+- Issue #565 related to `tl.utils.predict` fixed - `np.hstack` problem in which the results for multiple batches are stacked along `axis=1` (PR #566)
+- Issue #572 with `tl.layers.DeformableConv2d` fixed (PR #573)
+- Issue #664 with `tl.layers.ConvLSTMLayer` fixed (PR #676)
+- Typo of the document of ElementwiseLambdaLayer (PR #588)
+- Error in `tl.layers.TernaryConv2d` fixed - self.inputs not defined (PR #658)
+- Deprecation warning fixed in `tl.layers.binary._compute_threshold()` (PR #658)
+- All references to `tf.logging` replaced by `tl.logging` (PR #661)
+- Duplicated code removed when bias was used (PR #667)
+- `tensorlayer.third_party.roi_pooling.roi_pooling.roi_pooling_ops` is now lazy loaded to prevent systematic error raised (PR #675)
+- Documentation not build in RTD due to old version of theme in docs directory fixed (PR #703)
+- Tutorial:
+  - `tutorial_word2vec_basic.py` saving issue #476 fixed (PR #635)
+  - All tutorials tested and errors have been fixed (PR #635)
+
+### Dependencies Update
+- Update pytest from 3.5.1 to 3.6.0 (PR #647)
+- Update progressbar2 from 3.37.1 to 3.38.0 (PR #651)
+- Update scikit-image from 0.13.1 to 0.14.0 (PR #656)
+- Update keras from 2.1.6 to 2.2.0 (PR #684)
+- Update requests from 2.18.4 to 2.19.0 (PR #695)
+
+### Contributors
+- @lgarithm: #563
+- @DEKHTIARJonathan: #573 #574 #575 #580 #633 #635 #636 #639 #644 #645 #648 #657 #667 #658 #659 #660 #661 #666 #667 #672 #675 #683 #686 #687 #690 #691 #692 #703
+- @2wins: #560 #566 #662
+- @One-sixth: #579
+- @zsdonghao: #587 #588 #639 #685 #697
+- @luomai: #639 #677
+- @dengyueyun666: #676
+
+## [1.8.5] - 2018-05-09
+
+### Added
+- Github Templates added (by @DEKHTIARJonathan)
+  - New issues Template
+  - New PR Template
+- Travis Deploy Automation on new Tag (by @DEKHTIARJonathan)
+  - Deploy to PyPI and create a new version.
+  - Deploy to Github Releases and upload the wheel files
+- PyUP.io has been added to ensure we are compatible with the latest libraries (by @DEKHTIARJonathan)
+- `deconv2d` now handling dilation_rate (by @zsdonghao)
+- Documentation unittest added (by @DEKHTIARJonathan)
+- `test_layers_core` has been added to ensure that `LayersConfig` is abstract.
+
+### Changed
+- All Tests Refactored - Now using unittests and runned with PyTest (by @DEKHTIARJonathan)
+- Documentation updated (by @zsdonghao)
+- Package Setup Refactored (by @DEKHTIARJonathan)
+- Dataset Downlaod now using library progressbar2 (by @DEKHTIARJonathan)
+- `deconv2d` function transformed into Class (by @zsdonghao)
+- `conv1d` function transformed into Class (by @zsdonghao)
+- super resolution functions transformed into Class (by @zsdonghao)
+- YAPF coding style improved and enforced (by @DEKHTIARJonathan)
+
+### Fixed
+- Backward Compatibility Restored with deprecation warnings (by @DEKHTIARJonathan)
+- Tensorflow Deprecation Fix (Issue #498):
+  - AverageEmbeddingInputlayer (by @zsdonghao)
+  - load_mpii_pose_dataset (by @zsdonghao)
+- maxPool2D initializer issue #551 (by @zsdonghao)
+- `LayersConfig` class has been enforced as abstract
+- Pooling Layer Issue #557 fixed (by @zsdonghao)
+
+### Dependencies Update
+- scipy>=1.0,<1.1 => scipy>=1.1,<1.2
+
+### Contributors
+@zsdonghao @luomai @DEKHTIARJonathan
+
+[Unreleased]: https://github.com/tensorlayer/tensorlayer/compare/1.11....master
+[1.11.1]: https://github.com/tensorlayer/tensorlayer/compare/1.11.0...1.11.0
+[1.11.0]: https://github.com/tensorlayer/tensorlayer/compare/1.10.1...1.11.0
+[1.10.1]: https://github.com/tensorlayer/tensorlayer/compare/1.10.0...1.10.1
+[1.10.0]: https://github.com/tensorlayer/tensorlayer/compare/1.9.1...1.10.0
+[1.9.1]: https://github.com/tensorlayer/tensorlayer/compare/1.9.0...1.9.1
+[1.9.0]: https://github.com/tensorlayer/tensorlayer/compare/1.8.5...1.9.0
+[1.8.5]: https://github.com/tensorlayer/tensorlayer/compare/1.8.4...1.8.5
\ No newline at end of file

From e5f75932957845b1f50e65427c85e459a44a6f37 Mon Sep 17 00:00:00 2001
From: quantumiracle <1402434478@qq.com>
Date: Tue, 14 May 2019 12:39:20 +0100
Subject: [PATCH 4/4] change log add name

---
 CHANGELOG.md                                  |  3 +-
 .../tutorial_atari_pong.py                    | 10 +++-
 ...ial_bipedalwalker_a3c_continuous_action.py | 57 ++++++++++---------
 .../tutorial_cartpole_ac.py                   | 28 ++++++---
 .../tutorial_frozenlake_dqn.py                |  5 ++
 5 files changed, 64 insertions(+), 39 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 54f3d153f..c6aee1b4d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -77,7 +77,7 @@ To release a new version, please update the changelog as followed:
 - remove `tl.layers.initialize_global_variables(sess)` (PR #931)
 - change `tl.layers.core`, `tl.models.core` (PR #966)
 - change `weights` into `all_weights`, `trainable_weights`, `nontrainable_weights`
-- change reinforcement learning examples from TL1 to TL2 (PR #970)
+- change reinforcement learning examples from TL1 to TL2 (PR #969)
 
 ### Dependencies Update
 - nltk>=3.3,<3.4 => nltk>=3.3,<3.5 (PR #892)
@@ -99,6 +99,7 @@ To release a new version, please update the changelog as followed:
 ### Contributors
 - @zsdonghao: #931
 - @yd-yin: #963
+- @quantumiracle: #969
 
 
 ## [2.0.0-alpha] - 2019-05-04
diff --git a/examples/reinforcement_learning/tutorial_atari_pong.py b/examples/reinforcement_learning/tutorial_atari_pong.py
index 7e1b28822..0ffee9174 100644
--- a/examples/reinforcement_learning/tutorial_atari_pong.py
+++ b/examples/reinforcement_learning/tutorial_atari_pong.py
@@ -68,6 +68,8 @@ def prepro(I):
 episode_number = 0
 
 xs, ys, rs = [], [], []
+
+
 # policy network
 def get_model(inputs_shape):
     ni = tl.layers.Input(inputs_shape)
@@ -75,12 +77,14 @@ def get_model(inputs_shape):
     nn = tl.layers.Dense(n_units=3, name='output')(nn)
     M = tl.models.Model(inputs=ni, outputs=nn, name="mlp")
     return M
+
+
 model = get_model([None, D])
 train_weights = model.trainable_weights
 
 optimizer = tf.optimizers.RMSprop(lr=learning_rate, decay=decay_rate)
 
-model.train() # set model to train mode (in case you add dropout into the model)
+model.train()  # set model to train mode (in case you add dropout into the model)
 
 start_time = time.time()
 game_number = 0
@@ -97,8 +101,8 @@ def get_model(inputs_shape):
     prob = tf.nn.softmax(_prob)
 
     # action. 1: STOP  2: UP  3: DOWN
-        # action = np.random.choice([1,2,3], p=prob.flatten())
-        # action = tl.rein.choice_action_by_probs(prob.flatten(), [1, 2, 3])
+    # action = np.random.choice([1,2,3], p=prob.flatten())
+    # action = tl.rein.choice_action_by_probs(prob.flatten(), [1, 2, 3])
     action = tl.rein.choice_action_by_probs(prob[0].numpy(), [1, 2, 3])
 
     observation, reward, done, _ = env.step(action)
diff --git a/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py b/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py
index 796ef9d74..2ad7f229a 100644
--- a/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py
+++ b/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py
@@ -51,7 +51,6 @@
 
 tfd = tfp.distributions
 
-
 tl.logging.set_verbosity(tl.logging.DEBUG)
 
 np.random.seed(2)
@@ -85,11 +84,12 @@
 
 class ACNet(object):
 
-    def __init__(self, scope, globalAC=None):  
+    def __init__(self, scope, globalAC=None):
         self.scope = scope
         self.save_path = './model'
 
         w_init = tf.keras.initializers.glorot_normal(seed=None)  # initializer, glorot=xavier
+
         def get_actor(input_shape):  # policy network
             with tf.name_scope(self.scope):
                 ni = tl.layers.Input(input_shape, name='in')
@@ -97,21 +97,26 @@ def get_actor(input_shape):  # policy network
                 nn = tl.layers.Dense(n_units=300, act=tf.nn.relu6, W_init=w_init, name='la2')(nn)
                 mu = tl.layers.Dense(n_units=N_A, act=tf.nn.tanh, W_init=w_init, name='mu')(nn)
                 sigma = tl.layers.Dense(n_units=N_A, act=tf.nn.softplus, W_init=w_init, name='sigma')(nn)
-            return tl.models.Model(inputs=ni, outputs=[mu, sigma], name=scope+'/Actor')
-        self.actor = get_actor( [None, N_S])
+            return tl.models.Model(inputs=ni, outputs=[mu, sigma], name=scope + '/Actor')
+
+        self.actor = get_actor([None, N_S])
         self.actor.train()  # train mode for Dropout, BatchNorm
-        def get_critic(input_shape): # we use Value-function here, but not Q-function.
+
+        def get_critic(input_shape):  # we use Value-function here, but not Q-function.
             with tf.name_scope(self.scope):
                 ni = tl.layers.Input(input_shape, name='in')
                 nn = tl.layers.Dense(n_units=500, act=tf.nn.relu6, W_init=w_init, name='lc')(ni)
                 nn = tl.layers.Dense(n_units=300, act=tf.nn.relu6, W_init=w_init, name='lc2')(nn)
                 v = tl.layers.Dense(n_units=1, W_init=w_init, name='v')(nn)
-            return tl.models.Model(inputs=ni, outputs=v, name=scope+'/Critic')
-        self.critic = get_critic( [None, N_S])
-        self.critic.train() # train mode for Dropout, BatchNorm
-    
-    @tf.function # convert numpy functions to tf.Operations in the TFgraph, return tensor
-    def update_global(self, buffer_s, buffer_a, buffer_v_target, globalAC):  # refer to the global Actor-Crtic network for updating it with samples  
+            return tl.models.Model(inputs=ni, outputs=v, name=scope + '/Critic')
+
+        self.critic = get_critic([None, N_S])
+        self.critic.train()  # train mode for Dropout, BatchNorm
+
+    @tf.function  # convert numpy functions to tf.Operations in the TFgraph, return tensor
+    def update_global(
+            self, buffer_s, buffer_a, buffer_v_target, globalAC
+    ):  # refer to the global Actor-Crtic network for updating it with samples
         ''' update the global critic '''
         with tf.GradientTape() as tape:
             self.v = self.critic(buffer_s)
@@ -121,7 +126,6 @@ def update_global(self, buffer_s, buffer_a, buffer_v_target, globalAC):  # refer
         self.c_grads = tape.gradient(self.c_loss, self.critic.trainable_weights)
         OPT_C.apply_gradients(zip(self.c_grads, globalAC.critic.trainable_weights))  # local grads applies to global net
         # del tape # Drop the reference to the tape
-
         ''' update the global actor '''
         with tf.GradientTape() as tape:
             self.mu, self.sigma = self.actor(buffer_s)
@@ -129,7 +133,7 @@ def update_global(self, buffer_s, buffer_a, buffer_v_target, globalAC):  # refer
             self.mu, self.sigma = self.mu * A_BOUND[1], self.sigma + 1e-5
 
             normal_dist = tfd.Normal(self.mu, self.sigma)  # no tf.contrib for tf2.0
-            self.a_his = buffer_a # float32
+            self.a_his = buffer_a  # float32
             log_prob = normal_dist.log_prob(self.a_his)
             exp_v = log_prob * td  # td is from the critic part, no gradients for it
             entropy = normal_dist.entropy()  # encourage exploration
@@ -138,7 +142,7 @@ def update_global(self, buffer_s, buffer_a, buffer_v_target, globalAC):  # refer
         self.a_grads = tape.gradient(self.a_loss, self.actor.trainable_weights)
         OPT_A.apply_gradients(zip(self.a_grads, globalAC.actor.trainable_weights))  # local grads applies to global net
         return self.test  # for test purpose
-    
+
     @tf.function
     def pull_global(self, globalAC):  # run by a local, pull weights from the global nets
         for l_p, g_p in zip(self.actor.trainable_weights, globalAC.actor.trainable_weights):
@@ -152,18 +156,19 @@ def choose_action(self, s):  # run by a local
 
         with tf.name_scope('wrap_a_out'):
             self.mu, self.sigma = self.mu * A_BOUND[1], self.sigma + 1e-5
-        normal_dist = tfd.Normal(self.mu, self.sigma)   # for continuous action space
+        normal_dist = tfd.Normal(self.mu, self.sigma)  # for continuous action space
         self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND)
         return self.A.numpy()[0]
 
-    def save_ckpt(self): # save trained weights
+    def save_ckpt(self):  # save trained weights
         tl.files.save_npz(self.actor.trainable_weights, name='model_actor.npz')
         tl.files.save_npz(self.critic.trainable_weights, name='model_critic.npz')
 
-    def load_ckpt(self): # load trained weights
+    def load_ckpt(self):  # load trained weights
         tl.files.load_and_assign_npz(name='model_actor.npz', network=self.actor)
         tl.files.load_and_assign_npz(name='model_critic.npz', network=self.critic)
 
+
 class Worker(object):
 
     def __init__(self, name, globalAC):
@@ -183,11 +188,11 @@ def work(self, globalAC):
                 # visualize Worker_0 during training
                 if self.name == 'Worker_0' and total_step % 30 == 0:
                     self.env.render()
-                s = s.astype('float32') # double to float
-                a = self.AC.choose_action(s) 
+                s = s.astype('float32')  # double to float
+                a = self.AC.choose_action(s)
                 s_, r, done, _info = self.env.step(a)
-                
-                s_ = s_.astype('float32') # double to float
+
+                s_ = s_.astype('float32')  # double to float
                 # set robot falls reward to -2 instead of -100
                 if r == -100: r = -2
 
@@ -201,7 +206,7 @@ def work(self, globalAC):
                     if done:
                         v_s_ = 0  # terminal
                     else:
-                        v_s_ = self.AC.critic(s_[np.newaxis, :])[0,0] # reduce dim from 2 to 0
+                        v_s_ = self.AC.critic(s_[np.newaxis, :])[0, 0]  # reduce dim from 2 to 0
 
                     buffer_v_target = []
 
@@ -210,7 +215,7 @@ def work(self, globalAC):
                         buffer_v_target.append(v_s_)
 
                     buffer_v_target.reverse()
-                    
+
                     buffer_s, buffer_a, buffer_v_target = (
                         np.vstack(buffer_s), np.vstack(buffer_a), np.vstack(buffer_v_target)
                     )
@@ -245,7 +250,7 @@ def work(self, globalAC):
 if __name__ == "__main__":
     # ============================= TRAINING ===============================
     with tf.device("/cpu:0"):
-        
+
         OPT_A = tf.optimizers.RMSprop(LR_A, name='RMSPropA')
         OPT_C = tf.optimizers.RMSprop(LR_C, name='RMSPropC')
 
@@ -263,7 +268,7 @@ def work(self, globalAC):
     for worker in workers:
         # t = threading.Thread(target=worker.work)
         job = lambda: worker.work(GLOBAL_AC)
-        t = threading.Thread(target=job)  
+        t = threading.Thread(target=job)
         t.start()
         worker_threads.append(t)
     COORD.join(worker_threads)
@@ -285,7 +290,7 @@ def work(self, globalAC):
         rall = 0
         while True:
             env.render()
-            s = s.astype('float32') # double to float
+            s = s.astype('float32')  # double to float
             a = GLOBAL_AC.choose_action(s)
             s, r, d, _ = env.step(a)
             rall += r
diff --git a/examples/reinforcement_learning/tutorial_cartpole_ac.py b/examples/reinforcement_learning/tutorial_cartpole_ac.py
index 19faf05eb..8b7aee5a5 100644
--- a/examples/reinforcement_learning/tutorial_cartpole_ac.py
+++ b/examples/reinforcement_learning/tutorial_cartpole_ac.py
@@ -73,10 +73,15 @@ def __init__(self, n_features, n_actions, lr=0.001):
 
         def get_model(inputs_shape):
             ni = tl.layers.Input(inputs_shape, name='state')
-            nn = tl.layers.Dense(n_units=30, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden')(ni)
-            nn = tl.layers.Dense(n_units=10, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden2')(nn)
+            nn = tl.layers.Dense(
+                n_units=30, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden'
+            )(ni)
+            nn = tl.layers.Dense(
+                n_units=10, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden2'
+            )(nn)
             nn = tl.layers.Dense(n_units=n_actions, name='actions')(nn)
             return tl.models.Model(inputs=ni, outputs=nn, name="Actor")
+
         self.model = get_model([None, n_features])
         self.model.train()
         self.optimizer = tf.optimizers.Adam(lr)
@@ -84,10 +89,10 @@ def get_model(inputs_shape):
     def learn(self, s, a, td):
         with tf.GradientTape() as tape:
             _logits = self.model(np.array([s]))
-            ## cross-entropy loss weighted by td-error (advantage), 
+            ## cross-entropy loss weighted by td-error (advantage),
             # the cross-entropy mearsures the difference of two probability distributions: the predicted logits and sampled action distribution,
-            # then weighted by the td-error: small difference of real and predict actions for large td-error (advantage); and vice versa. 
-            _exp_v = tl.rein.cross_entropy_reward_loss(logits=_logits, actions=[a], rewards=td[0])  
+            # then weighted by the td-error: small difference of real and predict actions for large td-error (advantage); and vice versa.
+            _exp_v = tl.rein.cross_entropy_reward_loss(logits=_logits, actions=[a], rewards=td[0])
         grad = tape.gradient(_exp_v, self.model.trainable_weights)
         self.optimizer.apply_gradients(zip(grad, self.model.trainable_weights))
         return _exp_v
@@ -95,7 +100,7 @@ def learn(self, s, a, td):
     def choose_action(self, s):
         _logits = self.model(np.array([s]))
         _probs = tf.nn.softmax(_logits).numpy()
-        return tl.rein.choice_action_by_probs(_probs.ravel()) # sample according to probability distribution
+        return tl.rein.choice_action_by_probs(_probs.ravel())  # sample according to probability distribution
 
     def choose_action_greedy(self, s):
         _logits = self.model(np.array([s]))  # logits: probability distribution of actions
@@ -109,10 +114,15 @@ def __init__(self, n_features, lr=0.01):
 
         def get_model(inputs_shape):
             ni = tl.layers.Input(inputs_shape, name='state')
-            nn = tl.layers.Dense(n_units=30, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden')(ni)
-            nn = tl.layers.Dense(n_units=5, act=tf.nn.relu, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden2')(nn)
+            nn = tl.layers.Dense(
+                n_units=30, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden'
+            )(ni)
+            nn = tl.layers.Dense(
+                n_units=5, act=tf.nn.relu, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden2'
+            )(nn)
             nn = tl.layers.Dense(n_units=1, act=None, name='value')(nn)
             return tl.models.Model(inputs=ni, outputs=nn, name="Critic")
+
         self.model = get_model([1, n_features])
         self.model.train()
 
@@ -130,11 +140,11 @@ def learn(self, s, r, s_):
 
         return td_error
 
+
 actor = Actor(n_features=N_F, n_actions=N_A, lr=LR_A)
 # we need a good teacher, so the teacher should learn faster than the actor
 critic = Critic(n_features=N_F, lr=LR_C)
 
-
 for i_episode in range(MAX_EPISODE):
     episode_time = time.time()
     s = env.reset().astype(np.float32)
diff --git a/examples/reinforcement_learning/tutorial_frozenlake_dqn.py b/examples/reinforcement_learning/tutorial_frozenlake_dqn.py
index 935e3e04b..0fb58be9d 100644
--- a/examples/reinforcement_learning/tutorial_frozenlake_dqn.py
+++ b/examples/reinforcement_learning/tutorial_frozenlake_dqn.py
@@ -41,20 +41,25 @@
 
 env = gym.make('FrozenLake-v0')
 
+
 def to_one_hot(i, n_classes=None):
     a = np.zeros(n_classes, 'uint8')
     a[i] = 1
     return a
 
+
 render = False  # display the game environment
 running_reward = None
 
+
 ## Define Q-network q(a,s) that ouput the rewards of 4 actions by given state, i.e. Action-Value Function.
 # encoding for state: 4x4 grid can be represented by one-hot vector with 16 integers.
 def get_model(inputs_shape):
     ni = tl.layers.Input(inputs_shape, name='observation')
     nn = tl.layers.Dense(4, act=None, W_init=tf.random_uniform_initializer(0, 0.01), b_init=None, name='q_a_s')(ni)
     return tl.models.Model(inputs=ni, outputs=nn, name="Q-Network")
+
+
 qnetwork = get_model([None, 16])
 qnetwork.train()
 train_weights = qnetwork.trainable_weights