From f2f87b0466fbe34e519dfa88536fd550fb0229b1 Mon Sep 17 00:00:00 2001 From: quantumiracle <1402434478@qq.com> Date: Mon, 13 May 2019 19:04:13 +0100 Subject: [PATCH 1/4] modify RL examples to TF2 TL2 --- .../tutorial_cifar10_cnn_static.py | 2 +- .../tutorial_mnist_mlp_dynamic.py | 2 +- .../tutorial_mnist_mlp_dynamic_2.py | 2 +- .../tutorial_mnist_mlp_static.py | 2 +- .../tutorial_mnist_mlp_static_2.py | 2 +- .../basic_tutorials/tutorial_mnist_siamese.py | 2 +- .../basic_tutorials/tutorial_mnist_simple.py | 3 +- .../tutorial_fast_affine_transform.py | 4 +- .../data_process/tutorial_tf_dataset_voc.py | 2 +- examples/data_process/tutorial_tfrecord.py | 3 +- examples/data_process/tutorial_tfrecord2.py | 2 +- examples/data_process/tutorial_tfrecord3.py | 2 +- examples/database/dispatch_tasks.py | 1 - examples/database/task_script.py | 1 - ...torial_imagenet_inceptionV3_distributed.py | 4 +- .../tutorial_mnist_distributed.py | 1 - .../tutorial_cifar10_distributed_trainer.py | 2 +- .../tutorial_mnist_distributed_trainer.py | 2 +- examples/keras_tfslim/tutorial_keras.py | 2 +- .../tutorial_models_mobilenetv1.py | 2 +- .../tutorial_models_squeezenetv1.py | 2 +- .../pretrained_cnn/tutorial_models_vgg16.py | 2 +- .../pretrained_cnn/tutorial_models_vgg19.py | 2 +- .../tutorial_models_vgg_static.py | 2 +- .../tutorial_binarynet_cifar10_tfrecord.py | 1 - .../tutorial_binarynet_mnist_cnn.py | 1 - .../tutorial_dorefanet_cifar10_tfrecord.py | 1 - .../tutorial_dorefanet_mnist_cnn.py | 1 - .../tutorial_quanconv_cifar10.py | 2 +- .../quantized_net/tutorial_quanconv_mnist.py | 1 - ...tutorial_ternaryweight_cifar10_tfrecord.py | 1 - .../tutorial_ternaryweight_mnist_cnn.py | 1 - .../tutorial_atari_pong.py | 14 +- ...ial_bipedalwalker_a3c_continuous_action.py | 390 ++++++++++++------ .../tutorial_cartpole_ac.py | 31 +- .../tutorial_frozenlake_dqn.py | 16 +- ...ial_spatial_transformer_network_dynamic.py | 2 + ...rial_spatial_transformer_network_static.py | 2 + .../tutorial_imdb_fasttext.py | 2 +- .../text_generation/tutorial_generate_text.py | 2 +- examples/text_ptb/tutorial_ptb_lstm.py | 2 +- .../tutorial_ptb_lstm_state_is_tuple.py | 2 +- .../tutorial_word2vec_basic.py | 2 +- examples/tutorial_work_with_onnx.py | 4 +- tensorlayer/activation.py | 1 - tensorlayer/cost.py | 3 +- tensorlayer/db.py | 13 +- tensorlayer/distributed.py | 1 - .../files/dataset_loaders/celebA_dataset.py | 3 +- .../files/dataset_loaders/cyclegan_dataset.py | 3 +- .../dataset_loaders/flickr_1M_dataset.py | 6 +- .../dataset_loaders/flickr_25k_dataset.py | 6 +- .../files/dataset_loaders/mpii_dataset.py | 3 +- .../files/dataset_loaders/voc_dataset.py | 5 +- .../dataset_loaders/wmt_en_fr_dataset.py | 1 - tensorlayer/files/utils.py | 18 +- tensorlayer/initializers.py | 1 + tensorlayer/layers/activation.py | 1 - tensorlayer/layers/convolution/binary_conv.py | 1 - .../layers/convolution/deformable_conv.py | 1 - .../layers/convolution/depthwise_conv.py | 1 - tensorlayer/layers/convolution/dorefa_conv.py | 1 - tensorlayer/layers/convolution/expert_conv.py | 1 - .../layers/convolution/expert_deconv.py | 1 - tensorlayer/layers/convolution/group_conv.py | 1 - tensorlayer/layers/convolution/quan_conv.py | 4 +- .../layers/convolution/quan_conv_bn.py | 4 +- .../layers/convolution/separable_conv.py | 2 +- .../layers/convolution/simplified_conv.py | 1 - .../layers/convolution/simplified_deconv.py | 2 +- .../layers/convolution/super_resolution.py | 1 - .../layers/convolution/ternary_conv.py | 1 - tensorlayer/layers/core.py | 6 +- tensorlayer/layers/dense/base_dense.py | 2 +- tensorlayer/layers/dense/binary_dense.py | 1 - tensorlayer/layers/dense/dorefa_dense.py | 1 - tensorlayer/layers/dense/dropconnect.py | 4 +- tensorlayer/layers/dense/quan_dense.py | 4 +- tensorlayer/layers/dense/quan_dense_bn.py | 4 +- tensorlayer/layers/dense/ternary_dense.py | 1 - tensorlayer/layers/dropout.py | 1 - tensorlayer/layers/embedding.py | 2 +- tensorlayer/layers/extend.py | 1 - tensorlayer/layers/image_resampling.py | 1 - tensorlayer/layers/inputs.py | 2 +- tensorlayer/layers/lambda_layers.py | 3 +- tensorlayer/layers/merge.py | 1 - tensorlayer/layers/noise.py | 1 - tensorlayer/layers/normalization.py | 3 +- tensorlayer/layers/padding.py | 1 - tensorlayer/layers/pooling.py | 1 - tensorlayer/layers/quantize.py | 1 - tensorlayer/layers/recurrent.py | 1 - tensorlayer/layers/scale.py | 1 - tensorlayer/layers/shape.py | 1 - tensorlayer/layers/spatial_transformer.py | 4 +- tensorlayer/layers/stack.py | 1 - tensorlayer/layers/utils.py | 2 +- tensorlayer/logging/contrib/hyperdash.py | 1 - tensorlayer/models/core.py | 3 +- tensorlayer/models/mobilenetv1.py | 1 - tensorlayer/models/squeezenetv1.py | 3 +- tensorlayer/models/vgg.py | 4 +- tensorlayer/nlp.py | 4 +- tensorlayer/rein.py | 3 +- tensorlayer/utils.py | 2 +- 106 files changed, 389 insertions(+), 299 deletions(-) diff --git a/examples/basic_tutorials/tutorial_cifar10_cnn_static.py b/examples/basic_tutorials/tutorial_cifar10_cnn_static.py index c12c791a1..93794c414 100644 --- a/examples/basic_tutorials/tutorial_cifar10_cnn_static.py +++ b/examples/basic_tutorials/tutorial_cifar10_cnn_static.py @@ -5,8 +5,8 @@ import time import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer.layers import (BatchNorm, Conv2d, Dense, Flatten, Input, LocalResponseNorm, MaxPool2d) diff --git a/examples/basic_tutorials/tutorial_mnist_mlp_dynamic.py b/examples/basic_tutorials/tutorial_mnist_mlp_dynamic.py index 1ffa7fbe0..13db1abae 100644 --- a/examples/basic_tutorials/tutorial_mnist_mlp_dynamic.py +++ b/examples/basic_tutorials/tutorial_mnist_mlp_dynamic.py @@ -1,8 +1,8 @@ import time import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer.layers import Dense, Dropout, Input from tensorlayer.models import Model diff --git a/examples/basic_tutorials/tutorial_mnist_mlp_dynamic_2.py b/examples/basic_tutorials/tutorial_mnist_mlp_dynamic_2.py index b752012b0..0d94b1dfa 100644 --- a/examples/basic_tutorials/tutorial_mnist_mlp_dynamic_2.py +++ b/examples/basic_tutorials/tutorial_mnist_mlp_dynamic_2.py @@ -1,8 +1,8 @@ import time import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer.layers import Dense, Dropout, Input, LayerList from tensorlayer.models import Model diff --git a/examples/basic_tutorials/tutorial_mnist_mlp_static.py b/examples/basic_tutorials/tutorial_mnist_mlp_static.py index c9c15f911..de811a8d8 100644 --- a/examples/basic_tutorials/tutorial_mnist_mlp_static.py +++ b/examples/basic_tutorials/tutorial_mnist_mlp_static.py @@ -1,8 +1,8 @@ import time import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer.layers import Dense, Dropout, Input from tensorlayer.models import Model diff --git a/examples/basic_tutorials/tutorial_mnist_mlp_static_2.py b/examples/basic_tutorials/tutorial_mnist_mlp_static_2.py index f0836c528..a9a2c7d48 100644 --- a/examples/basic_tutorials/tutorial_mnist_mlp_static_2.py +++ b/examples/basic_tutorials/tutorial_mnist_mlp_static_2.py @@ -1,8 +1,8 @@ import time import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer.layers import Dense, Dropout, Input from tensorlayer.models import Model diff --git a/examples/basic_tutorials/tutorial_mnist_siamese.py b/examples/basic_tutorials/tutorial_mnist_siamese.py index db43f1163..fe4abdc52 100644 --- a/examples/basic_tutorials/tutorial_mnist_siamese.py +++ b/examples/basic_tutorials/tutorial_mnist_siamese.py @@ -14,8 +14,8 @@ import time import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer.layers import Dense, Dropout, Flatten, Input from tensorlayer.models import Model diff --git a/examples/basic_tutorials/tutorial_mnist_simple.py b/examples/basic_tutorials/tutorial_mnist_simple.py index 04e233819..ceaee0c48 100644 --- a/examples/basic_tutorials/tutorial_mnist_simple.py +++ b/examples/basic_tutorials/tutorial_mnist_simple.py @@ -1,9 +1,10 @@ #! /usr/bin/python # -*- coding: utf-8 -*- +import numpy as np + import tensorflow as tf import tensorlayer as tl -import numpy as np tl.logging.set_verbosity(tl.logging.DEBUG) diff --git a/examples/data_process/tutorial_fast_affine_transform.py b/examples/data_process/tutorial_fast_affine_transform.py index 52452ffd5..71890f5bd 100644 --- a/examples/data_process/tutorial_fast_affine_transform.py +++ b/examples/data_process/tutorial_fast_affine_transform.py @@ -8,10 +8,10 @@ import multiprocessing import time -import cv2 import numpy as np -import tensorflow as tf +import cv2 +import tensorflow as tf import tensorlayer as tl # tl.logging.set_verbosity(tl.logging.DEBUG) diff --git a/examples/data_process/tutorial_tf_dataset_voc.py b/examples/data_process/tutorial_tf_dataset_voc.py index fab1612f7..9779b1f60 100644 --- a/examples/data_process/tutorial_tf_dataset_voc.py +++ b/examples/data_process/tutorial_tf_dataset_voc.py @@ -13,8 +13,8 @@ import time import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl # tf.logging.set_verbosity(tf.logging.DEBUG) diff --git a/examples/data_process/tutorial_tfrecord.py b/examples/data_process/tutorial_tfrecord.py index 4cb832c1d..bcf3fe46a 100644 --- a/examples/data_process/tutorial_tfrecord.py +++ b/examples/data_process/tutorial_tfrecord.py @@ -22,9 +22,9 @@ import os import numpy as np -import tensorflow as tf from PIL import Image +import tensorflow as tf import tensorlayer as tl ## Save data ================================================================== @@ -97,4 +97,3 @@ def read_and_decode(filename): print("img_batch : %s" % img_batch.shape) print("label_batch : %s" % label_batch.shape) tl.visualize.images2d(img_batch, second=1, saveable=False, name='batch', dtype=None, fig_idx=2020121) - diff --git a/examples/data_process/tutorial_tfrecord2.py b/examples/data_process/tutorial_tfrecord2.py index be41b697f..22b3d7757 100755 --- a/examples/data_process/tutorial_tfrecord2.py +++ b/examples/data_process/tutorial_tfrecord2.py @@ -14,10 +14,10 @@ import os import numpy as np + # import matplotlib # matplotlib.use('GTK') import tensorflow as tf - import tensorlayer as tl # Download data, and convert to TFRecord format, see ```tutorial_tfrecord.py``` diff --git a/examples/data_process/tutorial_tfrecord3.py b/examples/data_process/tutorial_tfrecord3.py index 9e5751a25..bc8752f2a 100644 --- a/examples/data_process/tutorial_tfrecord3.py +++ b/examples/data_process/tutorial_tfrecord3.py @@ -19,9 +19,9 @@ import os import numpy as np -import tensorflow as tf from PIL import Image +import tensorflow as tf import tensorlayer as tl diff --git a/examples/database/dispatch_tasks.py b/examples/database/dispatch_tasks.py index d1204bcd4..260257e77 100644 --- a/examples/database/dispatch_tasks.py +++ b/examples/database/dispatch_tasks.py @@ -6,7 +6,6 @@ import time import tensorflow as tf - import tensorlayer as tl tl.logging.set_verbosity(tl.logging.DEBUG) diff --git a/examples/database/task_script.py b/examples/database/task_script.py index ad51dd3ed..58ef60d1a 100644 --- a/examples/database/task_script.py +++ b/examples/database/task_script.py @@ -1,7 +1,6 @@ """Sample task script.""" import tensorflow as tf - import tensorlayer as tl tf.logging.set_verbosity(tf.logging.DEBUG) diff --git a/examples/deprecated_tutorials/tutorial_imagenet_inceptionV3_distributed.py b/examples/deprecated_tutorials/tutorial_imagenet_inceptionV3_distributed.py index 936ae9702..15c0a3f3c 100644 --- a/examples/deprecated_tutorials/tutorial_imagenet_inceptionV3_distributed.py +++ b/examples/deprecated_tutorials/tutorial_imagenet_inceptionV3_distributed.py @@ -19,7 +19,9 @@ from xml.etree import ElementTree import numpy as np + import tensorflow as tf +import tensorlayer as tl from tensorflow.contrib import slim from tensorflow.contrib.slim.python.slim.nets.inception_v3 import (inception_v3, inception_v3_arg_scope) @@ -29,8 +31,6 @@ from tensorflow.python.training.monitored_session import \ SingularMonitoredSession -import tensorlayer as tl - tf.logging.set_verbosity(tf.logging.DEBUG) tl.logging.set_verbosity(tl.logging.DEBUG) diff --git a/examples/deprecated_tutorials/tutorial_mnist_distributed.py b/examples/deprecated_tutorials/tutorial_mnist_distributed.py index 29d291ba4..18f7cdb92 100644 --- a/examples/deprecated_tutorials/tutorial_mnist_distributed.py +++ b/examples/deprecated_tutorials/tutorial_mnist_distributed.py @@ -13,7 +13,6 @@ """ import tensorflow as tf - import tensorlayer as tl tf.logging.set_verbosity(tf.logging.DEBUG) diff --git a/examples/distributed_training/tutorial_cifar10_distributed_trainer.py b/examples/distributed_training/tutorial_cifar10_distributed_trainer.py index 1ddc2d937..ce3aec007 100644 --- a/examples/distributed_training/tutorial_cifar10_distributed_trainer.py +++ b/examples/distributed_training/tutorial_cifar10_distributed_trainer.py @@ -15,8 +15,8 @@ import multiprocessing import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer.layers import (BatchNormLayer, Conv2d, DenseLayer, FlattenLayer, InputLayer, MaxPool2d) diff --git a/examples/distributed_training/tutorial_mnist_distributed_trainer.py b/examples/distributed_training/tutorial_mnist_distributed_trainer.py index 0f1b8b6dd..0cf916370 100755 --- a/examples/distributed_training/tutorial_mnist_distributed_trainer.py +++ b/examples/distributed_training/tutorial_mnist_distributed_trainer.py @@ -2,8 +2,8 @@ # -*- coding: utf-8 -*- import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl tf.logging.set_verbosity(tf.logging.DEBUG) diff --git a/examples/keras_tfslim/tutorial_keras.py b/examples/keras_tfslim/tutorial_keras.py index 0622bc745..33a9ca860 100644 --- a/examples/keras_tfslim/tutorial_keras.py +++ b/examples/keras_tfslim/tutorial_keras.py @@ -4,8 +4,8 @@ import time import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer.layers import Input, Lambda diff --git a/examples/pretrained_cnn/tutorial_models_mobilenetv1.py b/examples/pretrained_cnn/tutorial_models_mobilenetv1.py index 8d7b35a6b..6b797a075 100644 --- a/examples/pretrained_cnn/tutorial_models_mobilenetv1.py +++ b/examples/pretrained_cnn/tutorial_models_mobilenetv1.py @@ -10,8 +10,8 @@ import time import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer.models.imagenet_classes import class_names diff --git a/examples/pretrained_cnn/tutorial_models_squeezenetv1.py b/examples/pretrained_cnn/tutorial_models_squeezenetv1.py index 9b6ee4e7f..755d6c28b 100644 --- a/examples/pretrained_cnn/tutorial_models_squeezenetv1.py +++ b/examples/pretrained_cnn/tutorial_models_squeezenetv1.py @@ -5,8 +5,8 @@ import time import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer.models.imagenet_classes import class_names diff --git a/examples/pretrained_cnn/tutorial_models_vgg16.py b/examples/pretrained_cnn/tutorial_models_vgg16.py index e6bb1c22e..b1bd3823f 100644 --- a/examples/pretrained_cnn/tutorial_models_vgg16.py +++ b/examples/pretrained_cnn/tutorial_models_vgg16.py @@ -5,8 +5,8 @@ import time import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer.models.imagenet_classes import class_names diff --git a/examples/pretrained_cnn/tutorial_models_vgg19.py b/examples/pretrained_cnn/tutorial_models_vgg19.py index 850412c38..922c3bdf5 100644 --- a/examples/pretrained_cnn/tutorial_models_vgg19.py +++ b/examples/pretrained_cnn/tutorial_models_vgg19.py @@ -5,8 +5,8 @@ import time import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer.models.imagenet_classes import class_names diff --git a/examples/pretrained_cnn/tutorial_models_vgg_static.py b/examples/pretrained_cnn/tutorial_models_vgg_static.py index 40a3ed865..a0e056e4d 100644 --- a/examples/pretrained_cnn/tutorial_models_vgg_static.py +++ b/examples/pretrained_cnn/tutorial_models_vgg_static.py @@ -5,8 +5,8 @@ import time import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer.models.imagenet_classes import class_names diff --git a/examples/quantized_net/tutorial_binarynet_cifar10_tfrecord.py b/examples/quantized_net/tutorial_binarynet_cifar10_tfrecord.py index 98532debb..d3205045a 100644 --- a/examples/quantized_net/tutorial_binarynet_cifar10_tfrecord.py +++ b/examples/quantized_net/tutorial_binarynet_cifar10_tfrecord.py @@ -43,7 +43,6 @@ import time import tensorflow as tf - import tensorlayer as tl tf.logging.set_verbosity(tf.logging.DEBUG) diff --git a/examples/quantized_net/tutorial_binarynet_mnist_cnn.py b/examples/quantized_net/tutorial_binarynet_mnist_cnn.py index 248812e23..84fbf7fc9 100644 --- a/examples/quantized_net/tutorial_binarynet_mnist_cnn.py +++ b/examples/quantized_net/tutorial_binarynet_mnist_cnn.py @@ -4,7 +4,6 @@ import time import tensorflow as tf - import tensorlayer as tl tf.logging.set_verbosity(tf.logging.DEBUG) diff --git a/examples/quantized_net/tutorial_dorefanet_cifar10_tfrecord.py b/examples/quantized_net/tutorial_dorefanet_cifar10_tfrecord.py index 9c8ab1239..fe7666bab 100644 --- a/examples/quantized_net/tutorial_dorefanet_cifar10_tfrecord.py +++ b/examples/quantized_net/tutorial_dorefanet_cifar10_tfrecord.py @@ -43,7 +43,6 @@ import time import tensorflow as tf - import tensorlayer as tl tf.logging.set_verbosity(tf.logging.DEBUG) diff --git a/examples/quantized_net/tutorial_dorefanet_mnist_cnn.py b/examples/quantized_net/tutorial_dorefanet_mnist_cnn.py index 90d7b0893..d8cab9bc8 100644 --- a/examples/quantized_net/tutorial_dorefanet_mnist_cnn.py +++ b/examples/quantized_net/tutorial_dorefanet_mnist_cnn.py @@ -4,7 +4,6 @@ import time import tensorflow as tf - import tensorlayer as tl tf.logging.set_verbosity(tf.logging.DEBUG) diff --git a/examples/quantized_net/tutorial_quanconv_cifar10.py b/examples/quantized_net/tutorial_quanconv_cifar10.py index 6eb35ed67..f93368467 100644 --- a/examples/quantized_net/tutorial_quanconv_cifar10.py +++ b/examples/quantized_net/tutorial_quanconv_cifar10.py @@ -41,8 +41,8 @@ import time import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl bitW = 8 diff --git a/examples/quantized_net/tutorial_quanconv_mnist.py b/examples/quantized_net/tutorial_quanconv_mnist.py index 4060c6137..66d52d13c 100644 --- a/examples/quantized_net/tutorial_quanconv_mnist.py +++ b/examples/quantized_net/tutorial_quanconv_mnist.py @@ -4,7 +4,6 @@ import time import tensorflow as tf - import tensorlayer as tl tf.logging.set_verbosity(tf.logging.DEBUG) diff --git a/examples/quantized_net/tutorial_ternaryweight_cifar10_tfrecord.py b/examples/quantized_net/tutorial_ternaryweight_cifar10_tfrecord.py index f1ee7b4bb..b695fa88a 100644 --- a/examples/quantized_net/tutorial_ternaryweight_cifar10_tfrecord.py +++ b/examples/quantized_net/tutorial_ternaryweight_cifar10_tfrecord.py @@ -42,7 +42,6 @@ import time import tensorflow as tf - import tensorlayer as tl tf.logging.set_verbosity(tf.logging.DEBUG) diff --git a/examples/quantized_net/tutorial_ternaryweight_mnist_cnn.py b/examples/quantized_net/tutorial_ternaryweight_mnist_cnn.py index e1c305db6..6850b9591 100644 --- a/examples/quantized_net/tutorial_ternaryweight_mnist_cnn.py +++ b/examples/quantized_net/tutorial_ternaryweight_mnist_cnn.py @@ -4,7 +4,6 @@ import time import tensorflow as tf - import tensorlayer as tl tf.logging.set_verbosity(tf.logging.DEBUG) diff --git a/examples/reinforcement_learning/tutorial_atari_pong.py b/examples/reinforcement_learning/tutorial_atari_pong.py index ad8e264df..0e8c62009 100644 --- a/examples/reinforcement_learning/tutorial_atari_pong.py +++ b/examples/reinforcement_learning/tutorial_atari_pong.py @@ -29,16 +29,16 @@ import time import numpy as np -import tensorflow as tf import gym +import tensorflow as tf import tensorlayer as tl ## enable eager mode -tf.enable_eager_execution() +# tf.enable_eager_execution() -tf.logging.set_verbosity(tf.logging.DEBUG) # enable logging +# tf.logging.set_verbosity(tf.logging.DEBUG) # enable logging tl.logging.set_verbosity(tl.logging.DEBUG) # hyper-parameters @@ -52,7 +52,7 @@ render = False # display the game environment # resume = True # load existing policy network model_file_name = "model_pong" -np.set_printoptions(threshold=np.nan) +np.set_printoptions(threshold=np.inf) def prepro(I): @@ -91,7 +91,7 @@ def get_model(inputs_shape): # t_actions = tf.placeholder(tf.int32, shape=[None]) # t_discount_rewards = tf.placeholder(tf.float32, shape=[None]) # loss = tl.rein.cross_entropy_reward_loss(probs, t_actions, t_discount_rewards) -optimizer = tf.train.RMSPropOptimizer(learning_rate, decay_rate)#.minimize(loss) +optimizer = tf.optimizers.RMSprop(lr=learning_rate, decay=decay_rate)#.minimize(loss) # with tf.Session() as sess: # sess.run(tf.global_variables_initializer()) @@ -115,7 +115,7 @@ def get_model(inputs_shape): prev_x = cur_x # prob = sess.run(sampling_prob, feed_dict={t_states: x}) - _prob = model(x).outputs + _prob = model(x) prob = tf.nn.softmax(_prob) # action. 1: STOP 2: UP 3: DOWN @@ -150,7 +150,7 @@ def get_model(inputs_shape): # t_discount_rewards = tf.placeholder(tf.float32, shape=[None]) # loss = tl.rein.cross_entropy_reward_loss(probs, t_actions, t_discount_rewards) with tf.GradientTape() as tape: - _prob = model(epx).outputs + _prob = model(epx) _loss = tl.rein.cross_entropy_reward_loss(_prob, epy, disR) grad = tape.gradient(_loss, train_weights) optimizer.apply_gradients(zip(grad, train_weights)) diff --git a/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py b/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py index 2f1f96d67..145024ba9 100644 --- a/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py +++ b/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py @@ -29,32 +29,46 @@ and joints angular speed, legs contact with ground, and 10 lidar rangefinder measurements. There's no coordinates in the state vector. +tensorflow 2.0.0a0 +tensorflow-probability 0.6.0 +tensorlayer 2.0.0 + +&& +pip install box2d box2d-kengz --user + """ import multiprocessing import threading import numpy as np -import tensorflow as tf import gym +import tensorflow as tf +import tensorflow_probability as tfp import tensorlayer as tl from tensorlayer.layers import DenseLayer, InputLayer -tf.logging.set_verbosity(tf.logging.DEBUG) +tfd = tfp.distributions + + +# tf.logging.set_verbosity(tf.logging.DEBUG) tl.logging.set_verbosity(tl.logging.DEBUG) -GAME = 'BipedalWalker-v2' # BipedalWalkerHardcore-v2 +# np.random.seed(2) +# tf.random.set_seed(2) # reproducible + +GAME = 'BipedalWalker-v2' # BipedalWalkerHardcore-v2 BipedalWalker-v2 LunarLanderContinuous-v2 OUTPUT_GRAPH = False LOG_DIR = './log' -N_WORKERS = multiprocessing.cpu_count() -# N_WORKERS = 4 -MAX_GLOBAL_EP = 20000 # 8000 +# N_WORKERS = multiprocessing.cpu_count() +N_WORKERS = 2 +MAX_GLOBAL_EP = 8000 # 8000 GLOBAL_NET_SCOPE = 'Global_Net' UPDATE_GLOBAL_ITER = 10 -GAMMA = 0.999 +GAMMA = 0.99 ENTROPY_BETA = 0.005 -LR_A = 0.00002 # learning rate for actor +LR_A = 0.00005 # learning rate for actor LR_C = 0.0001 # learning rate for critic GLOBAL_RUNNING_R = [] GLOBAL_EP = 0 # will increase during training, stop training when it >= MAX_GLOBAL_EP @@ -63,10 +77,13 @@ N_S = env.observation_space.shape[0] N_A = env.action_space.shape[0] +# N_A = env.action_space.n + # A_BOUND = [env.action_space.low, env.action_space.high] A_BOUND = [env.action_space.low, env.action_space.high] -A_BOUND[0] = A_BOUND[0].reshape(1, 4) -A_BOUND[1] = A_BOUND[1].reshape(1, 4) +# A_BOUND[0] = A_BOUND[0].reshape(1, N_A) +# A_BOUND[1] = A_BOUND[1].reshape(1, N_A) +# print(A_BOUND) # print(env.unwrapped.hull.position[0]) # exit() @@ -74,106 +91,196 @@ class ACNet(object): - def __init__(self, scope, globalAC=None): + def __init__(self, scope, globalAC=None): # no need for scope self.scope = scope - if scope == GLOBAL_NET_SCOPE: - ## global network only do inference - with tf.variable_scope(scope): - self.s = tf.placeholder(tf.float32, [None, N_S], 'S') - self._build_net() - self.a_params = tl.layers.get_variables_with_name(scope + '/actor', True, False) - self.c_params = tl.layers.get_variables_with_name(scope + '/critic', True, False) - - normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma) # for continuous action space - - with tf.name_scope('choose_a'): # use local params to choose action - self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND) - - else: - ## worker network calculate gradient locally, update on global network - with tf.variable_scope(scope): - self.s = tf.placeholder(tf.float32, [None, N_S], 'S') - self.a_his = tf.placeholder(tf.float32, [None, N_A], 'A') - self.v_target = tf.placeholder(tf.float32, [None, 1], 'Vtarget') - - self._build_net() - - td = tf.subtract(self.v_target, self.v, name='TD_error') - with tf.name_scope('c_loss'): - self.c_loss = tf.reduce_mean(tf.square(td)) - - with tf.name_scope('wrap_a_out'): - self.test = self.sigma[0] - self.mu, self.sigma = self.mu * A_BOUND[1], self.sigma + 1e-5 - - normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma) # for continuous action space - - with tf.name_scope('a_loss'): - log_prob = normal_dist.log_prob(self.a_his) - exp_v = log_prob * td - entropy = normal_dist.entropy() # encourage exploration - self.exp_v = ENTROPY_BETA * entropy + exp_v - self.a_loss = tf.reduce_mean(-self.exp_v) - - with tf.name_scope('choose_a'): # use local params to choose action - self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND) - - with tf.name_scope('local_grad'): - self.a_params = tl.layers.get_variables_with_name(scope + '/actor', True, False) - self.c_params = tl.layers.get_variables_with_name(scope + '/critic', True, False) - self.a_grads = tf.gradients(self.a_loss, self.a_params) - self.c_grads = tf.gradients(self.c_loss, self.c_params) - - with tf.name_scope('sync'): - with tf.name_scope('pull'): - self.pull_a_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.a_params, globalAC.a_params)] - self.pull_c_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.c_params, globalAC.c_params)] - with tf.name_scope('push'): - self.update_a_op = OPT_A.apply_gradients(zip(self.a_grads, globalAC.a_params)) - self.update_c_op = OPT_C.apply_gradients(zip(self.c_grads, globalAC.c_params)) - - def _build_net(self): - w_init = tf.contrib.layers.xavier_initializer() - with tf.variable_scope('actor'): # Policy network - nn = InputLayer(self.s, name='in') - nn = DenseLayer(nn, n_units=500, act=tf.nn.relu6, W_init=w_init, name='la') - nn = DenseLayer(nn, n_units=300, act=tf.nn.relu6, W_init=w_init, name='la2') - mu = DenseLayer(nn, n_units=N_A, act=tf.nn.tanh, W_init=w_init, name='mu') - sigma = DenseLayer(nn, n_units=N_A, act=tf.nn.softplus, W_init=w_init, name='sigma') - self.mu = mu.outputs - self.sigma = sigma.outputs - - with tf.variable_scope('critic'): # we use Value-function here, but not Q-function. - nn = InputLayer(self.s, name='in') - nn = DenseLayer(nn, n_units=500, act=tf.nn.relu6, W_init=w_init, name='lc') - nn = DenseLayer(nn, n_units=200, act=tf.nn.relu6, W_init=w_init, name='lc2') - v = DenseLayer(nn, n_units=1, W_init=w_init, name='v') - self.v = v.outputs - - def update_global(self, feed_dict): # run by a local - _, _, t = sess.run( - [self.update_a_op, self.update_c_op, self.test], feed_dict - ) # local grads applies to global net - return t - - def pull_global(self): # run by a local - sess.run([self.pull_a_params_op, self.pull_c_params_op]) + self.save_path = './model' + # if scope == GLOBAL_NET_SCOPE: + # ## global network only do inference + # with tf.variable_scope(scope): + # self.s = tf.placeholder(tf.float32, [None, N_S], 'S') + # self._build_net() + + + # normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma) # for continuous action space + + # with tf.name_scope('choose_a'): # use local params to choose action + # self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND) + + # else: + # ## worker network calculate gradient locally, update on global network + # # with tf.variable_scope(scope): + # # self.s = tf.placeholder(tf.float32, [None, N_S], 'S') + # # self.a_his = tf.placeholder(tf.float32, [None, N_A], 'A') + # # self.v_target = tf.placeholder(tf.float32, [None, 1], 'Vtarget') + + + + # self._build_net() + + # td = tf.subtract(self.v_target, self.v, name='TD_error') + # with tf.name_scope('c_loss'): + # self.c_loss = tf.reduce_mean(tf.square(td)) + + # with tf.name_scope('wrap_a_out'): + # self.test = self.sigma[0] + # self.mu, self.sigma = self.mu * A_BOUND[1], self.sigma + 1e-5 + + # normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma) # for continuous action space + + # with tf.name_scope('a_loss'): + # log_prob = normal_dist.log_prob(self.a_his) + # exp_v = log_prob * td + # entropy = normal_dist.entropy() # encourage exploration + # self.exp_v = ENTROPY_BETA * entropy + exp_v + # self.a_loss = tf.reduce_mean(-self.exp_v) + + # with tf.name_scope('choose_a'): # use local params to choose action + # self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND) + + # with tf.name_scope('local_grad'): + # self.a_params = tl.layers.get_variables_with_name(scope + '/actor', True, False) + # self.c_params = tl.layers.get_variables_with_name(scope + '/critic', True, False) + # self.a_grads = tf.gradients(self.a_loss, self.a_params) + # self.c_grads = tf.gradients(self.c_loss, self.c_params) + + # with tf.name_scope('sync'): + # with tf.name_scope('pull'): + # self.pull_a_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.a_params, globalAC.a_params)] + # self.pull_c_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.c_params, globalAC.c_params)] + # with tf.name_scope('push'): + # self.update_a_op = OPT_A.apply_gradients(zip(self.a_grads, globalAC.a_params)) + # self.update_c_op = OPT_C.apply_gradients(zip(self.c_grads, globalAC.c_params)) + + # def _build_net(self): + # w_init = tf.contrib.layers.xavier_initializer() + # w_init = tf.random_uniform_initializer(0, 0.01) + w_init = tf.keras.initializers.glorot_normal(seed=None) + # with tf.variable_scope('actor'): # Policy network + # nn = InputLayer(self.s, name='in') + # nn = DenseLayer(nn, n_units=500, act=tf.nn.relu6, W_init=w_init, name='la') + # nn = DenseLayer(nn, n_units=300, act=tf.nn.relu6, W_init=w_init, name='la2') + # mu = DenseLayer(nn, n_units=N_A, act=tf.nn.tanh, W_init=w_init, name='mu') + # sigma = DenseLayer(nn, n_units=N_A, act=tf.nn.softplus, W_init=w_init, name='sigma') + # self.mu = mu.outputs + # self.sigma = sigma.outputs + def get_actor(input_shape): + with tf.name_scope(self.scope): + ni = tl.layers.Input(input_shape, name='in') + nn = tl.layers.Dense(n_units=50, act=tf.nn.relu6, W_init=w_init, name='la')(ni) + nn = tl.layers.Dense(n_units=30, act=tf.nn.relu6, W_init=w_init, name='la2')(nn) + mu = tl.layers.Dense(n_units=N_A, act=tf.nn.tanh, W_init=w_init, name='mu')(nn) + sigma = tl.layers.Dense(n_units=N_A, act=tf.nn.softplus, W_init=w_init, name='sigma')(nn) + return tl.models.Model(inputs=ni, outputs=[mu, sigma], name=scope+'/Actor') + self.actor = get_actor( [None, N_S]) + self.actor.train() # set training mode, also for workers? + + def get_critic(input_shape): + with tf.name_scope(self.scope): + ni = tl.layers.Input(input_shape, name='in') + nn = tl.layers.Dense(n_units=50, act=tf.nn.relu6, W_init=w_init, name='lc')(ni) + nn = tl.layers.Dense(n_units=30, act=tf.nn.relu6, W_init=w_init, name='lc2')(nn) + v = tl.layers.Dense(n_units=1, W_init=w_init, name='v')(nn) + return tl.models.Model(inputs=ni, outputs=v, name=scope+'/Critic') + self.critic = get_critic( [None, N_S]) + self.critic.train() + + # self.a_params = tl.layers.get_variables_with_name(scope + '/Actor', True, False) + # self.c_params = tl.layers.get_variables_with_name(scope + '/Critic', True, False) + + # with tf.variable_scope('critic'): # we use Value-function here, but not Q-function. + # nn = InputLayer(self.s, name='in') + # nn = DenseLayer(nn, n_units=500, act=tf.nn.relu6, W_init=w_init, name='lc') + # nn = DenseLayer(nn, n_units=200, act=tf.nn.relu6, W_init=w_init, name='lc2') + # v = DenseLayer(nn, n_units=1, W_init=w_init, name='v') + # self.v = v.outputs + + # def update_global(self, feed_dict): # run by a local + def update_global(self, buffer_s, buffer_a, buffer_v_target, globalAC): + # _, _, t = sess.run( + # [self.update_a_op, self.update_c_op, self.test], feed_dict + # ) # local grads applies to global net + with tf.GradientTape() as tape: + self.v = self.critic(buffer_s) + self.v_target = buffer_v_target # tensor float? + td = tf.subtract(self.v_target, self.v, name='TD_error') + # with tf.name_scope('c_loss'): + self.c_loss = tf.reduce_mean(tf.square(td)) + self.c_grads = tape.gradient(self.c_loss, self.critic.trainable_weights) + OPT_C.apply_gradients(zip(self.c_grads, globalAC.critic.trainable_weights)) + del tape # Drop the reference to the tape + + + with tf.GradientTape() as tape: + self.mu, self.sigma = self.actor(buffer_s) + # print('mu: ', self.mu) + # print('sigma: ', self.sigma) + # with tf.name_scope('wrap_a_out'): + self.test = self.sigma[0] + self.mu, self.sigma = self.mu * A_BOUND[1], self.sigma + 1e-5 + + + # normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma) # for continuous action space + normal_dist = tfd.Normal(self.mu, self.sigma) + # with tf.name_scope('a_loss'): + self.a_his = buffer_a # tensor float? + log_prob = normal_dist.log_prob(self.a_his) + exp_v = log_prob * td + entropy = normal_dist.entropy() # encourage exploration + self.exp_v = ENTROPY_BETA * entropy + exp_v + self.a_loss = tf.reduce_mean(-self.exp_v) + # with tf.name_scope('local_grad'): + # self.a_params = tl.layers.get_variables_with_name(scope + '/actor', True, False) + # self.c_params = tl.layers.get_variables_with_name(scope + '/critic', True, False) + # self.a_grads = tf.gradients(self.a_loss, self.a_params) + # self.c_grads = tf.gradients(self.c_loss, self.c_params) + self.a_grads = tape.gradient(self.a_loss, self.actor.trainable_weights) + OPT_A.apply_gradients(zip(self.a_grads, globalAC.actor.trainable_weights)) + + + # return t + return self.test.numpy() + + def pull_global(self, globalAC): # run by a local + # sess.run([self.pull_a_params_op, self.pull_c_params_op]) + # with tf.name_scope('sync'): + # with tf.name_scope('pull'): + # self.pull_a_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.actor.trainable_weights, globalAC.a_params)] + # self.pull_c_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.critic.trainable_weights, globalAC.c_params)] + # print(self.scope, 'before: ', self.actor.trainable_weights[0][5][2]) + # print(self.scope, 'global: ', globalAC.actor.trainable_weights[0][5][2]) + for l_p, g_p in zip(self.actor.trainable_weights, globalAC.actor.trainable_weights): + l_p.assign(g_p) + for l_p, g_p in zip(self.critic.trainable_weights, globalAC.critic.trainable_weights): + l_p.assign(g_p) + # print(self.scope, 'after: ', self.actor.trainable_weights[0][5][2]) def choose_action(self, s): # run by a local s = s[np.newaxis, :] - return sess.run(self.A, {self.s: s})[0] - - def save_ckpt(self): - tl.files.exists_or_mkdir(self.scope) - tl.files.save_ckpt( - sess=sess, mode_name='model.ckpt', var_list=self.a_params + self.c_params, save_dir=self.scope, - printable=True - ) + self.mu, self.sigma = self.actor(s) + # print('mu1: ', self.mu) + # print('sigma1: ', self.sigma) + with tf.name_scope('wrap_a_out'): + # self.test = self.sigma[0] + self.mu, self.sigma = self.mu * A_BOUND[1], self.sigma + 1e-5 + # normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma) + normal_dist = tfd.Normal(self.mu, self.sigma) + self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND) + # return sess.run(self.A, {self.s: s})[0] + return self.A.numpy()[0] + + def save_ckpt(self): + tl.files.save_npz(self.actor.trainable_weights, name='model_actor.npz') + tl.files.save_npz(self.critic.trainable_weights, name='model_critic.npz') + # tl.files.save_ckpt( + # sess=sess, mode_name='model.ckpt', var_list=self.a_params + self.c_params, save_dir=self.scope, + # printable=True + # ) def load_ckpt(self): - tl.files.load_ckpt(sess=sess, var_list=self.a_params + self.c_params, save_dir=self.scope, printable=True) - # tl.files.load_ckpt(sess=sess, mode_name='model.ckpt', var_list=self.a_params+self.c_params, save_dir=self.scope, is_latest=False, printable=True) - + # tl.files.load_hdf5_to_trainable_weights(self.save_path+'/actor', self.actor) + # tl.files.load_hdf5_to_trainable_weights(self.save_path+'/critic', self.critic) + tl.files.load_and_assign_npz(name='model_actor.npz', network=self.actor) + tl.files.load_and_assign_npz(name='model_critic.npz', network=self.critic) class Worker(object): @@ -182,7 +289,8 @@ def __init__(self, name, globalAC): self.name = name self.AC = ACNet(name, globalAC) - def work(self): + # def work(self): + def work(self, globalAC): global GLOBAL_RUNNING_R, GLOBAL_EP total_step = 1 buffer_s, buffer_a, buffer_r = [], [], [] @@ -193,8 +301,13 @@ def work(self): # visualize Worker_0 during training if self.name == 'Worker_0' and total_step % 30 == 0: self.env.render() - a = self.AC.choose_action(s) + s = s.astype('float32') # double to float + a = self.AC.choose_action(s) s_, r, done, _info = self.env.step(a) + s_ = s_.astype('float32') # double to float + # print('s:',s) + # print('a:', a) + # print('r:',r) # set robot falls reward to -2 instead of -100 if r == -100: r = -2 @@ -209,7 +322,8 @@ def work(self): if done: v_s_ = 0 # terminal else: - v_s_ = sess.run(self.AC.v, {self.AC.s: s_[np.newaxis, :]})[0, 0] + # v_s_ = sess.run(self.AC.v, {self.AC.s: s_[np.newaxis, :]})[0, 0] + v_s_ = self.AC.critic(s_[np.newaxis, :])[0,0] # reduce dim from 2 to 0 buffer_v_target = [] @@ -218,46 +332,49 @@ def work(self): buffer_v_target.append(v_s_) buffer_v_target.reverse() - + buffer_s, buffer_a, buffer_v_target = ( np.vstack(buffer_s), np.vstack(buffer_a), np.vstack(buffer_v_target) ) - feed_dict = {self.AC.s: buffer_s, self.AC.a_his: buffer_a, self.AC.v_target: buffer_v_target} + # print(buffer_s, buffer_a, buffer_v_target) + # feed_dict = {self.AC.s: buffer_s, self.AC.a_his: buffer_a, self.AC.v_target: buffer_v_target} # update gradients on global network - self.AC.update_global(feed_dict) + # self.AC.update_global(feed_dict) + self.AC.update_global(buffer_s, buffer_a, buffer_v_target, globalAC) buffer_s, buffer_a, buffer_r = [], [], [] # update local network from global network - self.AC.pull_global() + self.AC.pull_global(globalAC) s = s_ total_step += 1 if done: if len(GLOBAL_RUNNING_R) == 0: # record running episode reward GLOBAL_RUNNING_R.append(ep_r) - else: + else: # moving average GLOBAL_RUNNING_R.append(0.95 * GLOBAL_RUNNING_R[-1] + 0.05 * ep_r) print( self.name, "episode:", GLOBAL_EP, - "| pos: %i" % self.env.unwrapped.hull.position[0], # number of move + # "| pos: %i" % self.env.unwrapped.hull.position[0], # number of move '| reward: %.1f' % ep_r, "| running_reward: %.1f" % GLOBAL_RUNNING_R[-1], # '| sigma:', test, # debug - 'WIN ' * 5 if self.env.unwrapped.hull.position[0] >= 88 else '', + # 'WIN ' * 5 if self.env.unwrapped.hull.position[0] >= 88 else '', ) GLOBAL_EP += 1 break if __name__ == "__main__": - sess = tf.Session() - + # sess = tf.Session() # ============================= TRAINING =============================== with tf.device("/cpu:0"): - OPT_A = tf.train.RMSPropOptimizer(LR_A, name='RMSPropA') - OPT_C = tf.train.RMSPropOptimizer(LR_C, name='RMSPropC') + + OPT_A = tf.optimizers.RMSprop(LR_A, name='RMSPropA') + OPT_C = tf.optimizers.RMSprop(LR_C, name='RMSPropC') + GLOBAL_AC = ACNet(GLOBAL_NET_SCOPE) # we only need its params workers = [] # Create worker @@ -266,15 +383,23 @@ def work(self): workers.append(Worker(i_name, GLOBAL_AC)) COORD = tf.train.Coordinator() - sess.run(tf.global_variables_initializer()) + # sess.run(tf.global_variables_initializer()) # start TF threading worker_threads = [] for worker in workers: - t = threading.Thread(target=worker.work) + # t = threading.Thread(target=worker.work) + job = lambda: worker.work(GLOBAL_AC) + t = threading.Thread(target=job) t.start() worker_threads.append(t) COORD.join(worker_threads) + import matplotlib.pyplot as plt + plt.plot(GLOBAL_RUNNING_R) + plt.xlabel('episode') + plt.ylabel('global running reward') + plt.savefig('a3c.png') + plt.show() GLOBAL_AC.save_ckpt() @@ -282,15 +407,16 @@ def work(self): # env = gym.make(GAME) # GLOBAL_AC = ACNet(GLOBAL_NET_SCOPE) # sess.run(tf.global_variables_initializer()) - # GLOBAL_AC.load_ckpt() - # while True: - # s = env.reset() - # rall = 0 - # while True: - # env.render() - # a = GLOBAL_AC.choose_action(s) - # s, r, d, _ = env.step(a) - # rall += r - # if d: - # print("reward", rall) - # break + GLOBAL_AC.load_ckpt() + while True: + s = env.reset() + rall = 0 + while True: + env.render() + s = s.astype('float32') # double to float + a = GLOBAL_AC.choose_action(s) + s, r, d, _ = env.step(a) + rall += r + if d: + print("reward", rall) + break diff --git a/examples/reinforcement_learning/tutorial_cartpole_ac.py b/examples/reinforcement_learning/tutorial_cartpole_ac.py index 4d8b6f8ea..ecdf68084 100644 --- a/examples/reinforcement_learning/tutorial_cartpole_ac.py +++ b/examples/reinforcement_learning/tutorial_cartpole_ac.py @@ -34,20 +34,20 @@ import time import numpy as np -import tensorflow as tf import gym +import tensorflow as tf import tensorlayer as tl ## enable eager mode -tf.enable_eager_execution() +# tf.enable_eager_execution() -tf.logging.set_verbosity(tf.logging.DEBUG) +# tf.logging.set_verbosity(tf.logging.DEBUG) tl.logging.set_verbosity(tl.logging.DEBUG) np.random.seed(2) -tf.set_random_seed(2) # reproducible +tf.random.set_seed(2) # reproducible # hyper-parameters OUTPUT_GRAPH = False @@ -93,7 +93,7 @@ def get_model(inputs_shape): nn = tl.layers.Dense(n_units=10, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden2')(nn) nn = tl.layers.Dense(n_units=n_actions, name='actions')(nn) return tl.models.Model(inputs=ni, outputs=nn, name="Actor") - self.model = get_model([1, n_features]) + self.model = get_model([None, n_features]) self.model.train() # self.acts_logits = n.outputs # self.acts_prob = tf.nn.softmax(self.acts_logits) @@ -106,12 +106,12 @@ def get_model(inputs_shape): # with tf.variable_scope('train'): # self.train_op = tf.train.AdamOptimizer(lr).minimize(self.exp_v) - self.optimizer = tf.train.AdamOptimizer(lr) + self.optimizer = tf.optimizers.Adam(lr) # Morvan Zhou (the same) # with tf.variable_scope('exp_v'): # # log_prob = tf.log(self.acts_prob[0, self.a[0]]) # # self.exp_v = tf.reduce_mean(log_prob * self.td_error[0]) # advantage (TD_error) guided loss - # self.exp_v = tl.rein.log_weight(probs=self.acts_prob[0, self.a[0]], weights=self.td_error) + # self.exp_v = tl.rein.log_weight(probs=self.acts_prob[0, self.a[0]], trainable_weights=self.td_error) # # with tf.variable_scope('train'): # self.train_op = tf.train.AdamOptimizer(lr).minimize(-self.exp_v) # minimize(-exp_v) = maximize(exp_v) @@ -119,22 +119,25 @@ def get_model(inputs_shape): def learn(self, s, a, td): # _, exp_v = self.sess.run([self.train_op, self.exp_v], {self.s: [s], self.a: [a], self.td_error: td[0]}) with tf.GradientTape() as tape: - _logits = self.model([s]).outputs + + # _logits = self.model([s]).outputs + _logits = self.model(np.array([s])) # _probs = tf.nn.softmax(_logits) - _exp_v = tl.rein.cross_entropy_reward_loss(logits=_logits, actions=[a], rewards=td[0]) + _exp_v = tl.rein.cross_entropy_reward_loss(logits=_logits, actions=[a], rewards=td[0]) # cross-entropy loss weighted by rewards grad = tape.gradient(_exp_v, self.model.trainable_weights) self.optimizer.apply_gradients(zip(grad, self.model.trainable_weights)) return _exp_v def choose_action(self, s): # probs = self.sess.run(self.acts_prob, {self.s: [s]}) # get probabilities of all actions - _logits = self.model([s]).outputs + # _logits = self.model(np.array([s])).outputs + _logits = self.model(np.array([s])) _probs = tf.nn.softmax(_logits).numpy() return tl.rein.choice_action_by_probs(_probs.ravel()) def choose_action_greedy(self, s): # probs = self.sess.run(self.acts_prob, {self.s: [s]}) # get probabilities of all actions - _logits = self.model([s]).outputs + _logits = self.model(np.array([s])) _probs = tf.nn.softmax(_logits).numpy() return np.argmax(_probs.ravel()) @@ -167,14 +170,14 @@ def get_model(inputs_shape): # self.loss = tf.square(self.td_error) # with tf.variable_scope('train'): # self.train_op = tf.train.AdamOptimizer(lr).minimize(self.loss) - self.optimizer = tf.train.AdamOptimizer(lr) + self.optimizer = tf.optimizers.Adam(lr) def learn(self, s, r, s_): # v_ = self.sess.run(self.v, {self.s: [s_]}) - v_ = self.model([s_]).outputs + v_ = self.model(np.array([s_])) # td_error, _ = self.sess.run([self.td_error, self.train_op], {self.s: [s], self.v_: v_, self.r: r}) with tf.GradientTape() as tape: - v = self.model([s]).outputs + v = self.model(np.array([s])) # TD_error = r + lambd * V(newS) - V(S) td_error = r + LAMBDA * v_ - v loss = tf.square(td_error) diff --git a/examples/reinforcement_learning/tutorial_frozenlake_dqn.py b/examples/reinforcement_learning/tutorial_frozenlake_dqn.py index 9411da423..8e96a279e 100644 --- a/examples/reinforcement_learning/tutorial_frozenlake_dqn.py +++ b/examples/reinforcement_learning/tutorial_frozenlake_dqn.py @@ -28,16 +28,16 @@ import time import numpy as np -import tensorflow as tf import gym +import tensorflow as tf import tensorlayer as tl ## enable eager mode -tf.enable_eager_execution() +# tf.enable_eager_execution() -tf.logging.set_verbosity(tf.logging.DEBUG) +# tf.logging.set_verbosity(tf.logging.DEBUG) tl.logging.set_verbosity(tl.logging.DEBUG) env = gym.make('FrozenLake-v0') @@ -61,7 +61,7 @@ def get_model(inputs_shape): ni = tl.layers.Input(inputs_shape, name='observation') nn = tl.layers.Dense(4, act=None, W_init=tf.random_uniform_initializer(0, 0.01), b_init=None, name='q_a_s')(ni) return tl.models.Model(inputs=ni, outputs=nn, name="Q-Network") -qnetwork = get_model([1, 16]) +qnetwork = get_model([None, 16]) qnetwork.train() train_weights = qnetwork.trainable_weights @@ -72,7 +72,7 @@ def get_model(inputs_shape): # nextQ = tf.placeholder(shape=[1, 4], dtype=tf.float32) # loss = tl.cost.mean_squared_error(nextQ, y, is_mean=False) # tf.reduce_sum(tf.square(nextQ - y)) # train_op = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(loss) -optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1) +optimizer = tf.optimizers.SGD(learning_rate=0.1) ## Set learning parameters lambd = .99 # decay factor @@ -90,7 +90,7 @@ def get_model(inputs_shape): if render: env.render() ## Choose an action by greedily (with e chance of random action) from the Q-network # a, allQ = sess.run([predict, y], feed_dict={inputs: [to_one_hot(s, 16)]}) - allQ = qnetwork(np.asarray([to_one_hot(s, 16)], dtype=np.float32)).outputs.numpy() + allQ = qnetwork(np.asarray([to_one_hot(s, 16)], dtype=np.float32)).numpy() a = np.argmax(allQ, 1) ## e-Greedy Exploration !!! sample random action @@ -100,7 +100,7 @@ def get_model(inputs_shape): s1, r, d, _ = env.step(a[0]) ## Obtain the Q' values by feeding the new state through our network # Q1 = sess.run(y, feed_dict={inputs: [to_one_hot(s1, 16)]}) - Q1 = qnetwork(np.asarray([to_one_hot(s1, 16)], dtype=np.float32)).outputs.numpy() + Q1 = qnetwork(np.asarray([to_one_hot(s1, 16)], dtype=np.float32)).numpy() ## Obtain maxQ' and set our target value for chosen action. maxQ1 = np.max(Q1) # in Q-Learning, policy is greedy, so we use "max" to select the next action. @@ -114,7 +114,7 @@ def get_model(inputs_shape): # Q'(s,a) ≈ Q(s,a) # _ = sess.run(train_op, {inputs: [to_one_hot(s, 16)], nextQ: targetQ}) with tf.GradientTape() as tape: - _qvalues = qnetwork(np.asarray([to_one_hot(s, 16)], dtype=np.float32)).outputs + _qvalues = qnetwork(np.asarray([to_one_hot(s, 16)], dtype=np.float32)) _loss = tl.cost.mean_squared_error(targetQ, _qvalues, is_mean=False) grad = tape.gradient(_loss, train_weights) optimizer.apply_gradients(zip(grad, train_weights)) diff --git a/examples/spatial_transformer_network/tutorial_spatial_transformer_network_dynamic.py b/examples/spatial_transformer_network/tutorial_spatial_transformer_network_dynamic.py index aecc69f61..3170585e4 100644 --- a/examples/spatial_transformer_network/tutorial_spatial_transformer_network_dynamic.py +++ b/examples/spatial_transformer_network/tutorial_spatial_transformer_network_dynamic.py @@ -1,7 +1,9 @@ #! /usr/bin/python # -*- coding: utf8 -*- import time + import numpy as np + import tensorflow as tf import tensorlayer as tl from tensorlayer.layers import * diff --git a/examples/spatial_transformer_network/tutorial_spatial_transformer_network_static.py b/examples/spatial_transformer_network/tutorial_spatial_transformer_network_static.py index c9a93629f..5f09db68b 100644 --- a/examples/spatial_transformer_network/tutorial_spatial_transformer_network_static.py +++ b/examples/spatial_transformer_network/tutorial_spatial_transformer_network_static.py @@ -1,7 +1,9 @@ #! /usr/bin/python # -*- coding: utf8 -*- import time + import numpy as np + import tensorflow as tf import tensorlayer as tl from tensorlayer.layers import * diff --git a/examples/text_classification/tutorial_imdb_fasttext.py b/examples/text_classification/tutorial_imdb_fasttext.py index 2c2c7aed0..731d2fce4 100644 --- a/examples/text_classification/tutorial_imdb_fasttext.py +++ b/examples/text_classification/tutorial_imdb_fasttext.py @@ -31,8 +31,8 @@ import time import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer.layers import * from tensorlayer.models import * diff --git a/examples/text_generation/tutorial_generate_text.py b/examples/text_generation/tutorial_generate_text.py index 22a17ea37..4c42d0b12 100644 --- a/examples/text_generation/tutorial_generate_text.py +++ b/examples/text_generation/tutorial_generate_text.py @@ -28,8 +28,8 @@ import nltk import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer.layers import * diff --git a/examples/text_ptb/tutorial_ptb_lstm.py b/examples/text_ptb/tutorial_ptb_lstm.py index de08399c9..77c7c3425 100644 --- a/examples/text_ptb/tutorial_ptb_lstm.py +++ b/examples/text_ptb/tutorial_ptb_lstm.py @@ -104,8 +104,8 @@ import time import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl tf.logging.set_verbosity(tf.logging.DEBUG) diff --git a/examples/text_ptb/tutorial_ptb_lstm_state_is_tuple.py b/examples/text_ptb/tutorial_ptb_lstm_state_is_tuple.py index 0021a7bfc..9fccca66a 100644 --- a/examples/text_ptb/tutorial_ptb_lstm_state_is_tuple.py +++ b/examples/text_ptb/tutorial_ptb_lstm_state_is_tuple.py @@ -105,8 +105,8 @@ import time import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl tf.logging.set_verbosity(tf.logging.DEBUG) diff --git a/examples/text_word_embedding/tutorial_word2vec_basic.py b/examples/text_word_embedding/tutorial_word2vec_basic.py index 6310699ad..5a1dc842c 100644 --- a/examples/text_word_embedding/tutorial_word2vec_basic.py +++ b/examples/text_word_embedding/tutorial_word2vec_basic.py @@ -44,9 +44,9 @@ import time import numpy as np -import tensorflow as tf from six.moves import xrange # pylint: disable=redefined-builtin +import tensorflow as tf import tensorlayer as tl import wget diff --git a/examples/tutorial_work_with_onnx.py b/examples/tutorial_work_with_onnx.py index 522f2ad8c..46fd0cb42 100644 --- a/examples/tutorial_work_with_onnx.py +++ b/examples/tutorial_work_with_onnx.py @@ -117,13 +117,13 @@ import time import numpy as np -import tensorflow as tf -from tensorflow.python.tools.freeze_graph import freeze_graph as _freeze_graph import onnx +import tensorflow as tf import tensorlayer as tl from onnx_tf.backend import prepare from onnx_tf.frontend import tensorflow_graph_to_onnx_model +from tensorflow.python.tools.freeze_graph import freeze_graph as _freeze_graph tf.logging.set_verbosity(tf.logging.DEBUG) tl.logging.set_verbosity(tl.logging.DEBUG) diff --git a/tensorlayer/activation.py b/tensorlayer/activation.py index 7c7b833c3..4aef4a429 100644 --- a/tensorlayer/activation.py +++ b/tensorlayer/activation.py @@ -3,7 +3,6 @@ """A file containing various activation functions.""" import tensorflow as tf - from tensorlayer.decorators import deprecated __all__ = [ diff --git a/tensorlayer/cost.py b/tensorlayer/cost.py index 252178502..2664d8d72 100644 --- a/tensorlayer/cost.py +++ b/tensorlayer/cost.py @@ -5,8 +5,7 @@ import tensorflow as tf from tensorflow.python.framework import ops -from tensorflow.python.ops import standard_ops, math_ops, nn_ops, array_ops - +from tensorflow.python.ops import array_ops, math_ops, nn_ops, standard_ops from tensorlayer import logging __all__ = [ diff --git a/tensorlayer/db.py b/tensorlayer/db.py index cb8db8e10..1de73bf6a 100644 --- a/tensorlayer/db.py +++ b/tensorlayer/db.py @@ -7,15 +7,16 @@ import time from datetime import datetime -import gridfs import numpy as np + +import gridfs import pymongo import tensorflow as tf - from tensorlayer import logging -from tensorlayer.files import net2static_graph, static_graph2net, assign_weights -from tensorlayer.files import save_weights_to_hdf5, load_hdf5_to_weights -from tensorlayer.files import del_folder, exists_or_mkdir +from tensorlayer.files import ( + assign_weights, del_folder, exists_or_mkdir, load_hdf5_to_weights, net2static_graph, save_weights_to_hdf5, + static_graph2net +) class TensorHub(object): @@ -640,7 +641,7 @@ def run_top_task(self, task_name=None, sort=None, **kwargs): logging.info("[Database] Start Task: key: {} sort: {} push time: {}".format(task_name, sort, _datetime)) _script = _script.decode('utf-8') with tf.Graph().as_default(): # # as graph: # clear all TF graphs - exec(_script, globals()) + exec (_script, globals()) # set status to finished _ = self.db.Task.find_one_and_update({'_id': _id}, {'$set': {'status': 'finished'}}) diff --git a/tensorlayer/distributed.py b/tensorlayer/distributed.py index 544aac87e..d3fbdd38f 100644 --- a/tensorlayer/distributed.py +++ b/tensorlayer/distributed.py @@ -6,7 +6,6 @@ import tensorflow as tf from tensorflow.python.training import session_run_hook - from tensorlayer import logging from tensorlayer.decorators import deprecated from tensorlayer.lazy_imports import LazyImport diff --git a/tensorlayer/files/dataset_loaders/celebA_dataset.py b/tensorlayer/files/dataset_loaders/celebA_dataset.py index d5dc5755f..3563d58f9 100644 --- a/tensorlayer/files/dataset_loaders/celebA_dataset.py +++ b/tensorlayer/files/dataset_loaders/celebA_dataset.py @@ -5,7 +5,8 @@ import zipfile from tensorlayer import logging -from tensorlayer.files.utils import (download_file_from_google_drive, exists_or_mkdir, load_file_list) +from tensorlayer.files.utils import (download_file_from_google_drive, + exists_or_mkdir, load_file_list) __all__ = ['load_celebA_dataset'] diff --git a/tensorlayer/files/dataset_loaders/cyclegan_dataset.py b/tensorlayer/files/dataset_loaders/cyclegan_dataset.py index e327b3b4c..6c465f6c5 100644 --- a/tensorlayer/files/dataset_loaders/cyclegan_dataset.py +++ b/tensorlayer/files/dataset_loaders/cyclegan_dataset.py @@ -6,7 +6,8 @@ import numpy as np from tensorlayer import logging, visualize -from tensorlayer.files.utils import (del_file, folder_exists, load_file_list, maybe_download_and_extract) +from tensorlayer.files.utils import (del_file, folder_exists, load_file_list, + maybe_download_and_extract) __all__ = ['load_cyclegan_dataset'] diff --git a/tensorlayer/files/dataset_loaders/flickr_1M_dataset.py b/tensorlayer/files/dataset_loaders/flickr_1M_dataset.py index f2e582ae5..9f466c0eb 100644 --- a/tensorlayer/files/dataset_loaders/flickr_1M_dataset.py +++ b/tensorlayer/files/dataset_loaders/flickr_1M_dataset.py @@ -4,9 +4,9 @@ import os from tensorlayer import logging, visualize -from tensorlayer.files.utils import ( - del_file, folder_exists, load_file_list, load_folder_list, maybe_download_and_extract, read_file -) +from tensorlayer.files.utils import (del_file, folder_exists, load_file_list, + load_folder_list, + maybe_download_and_extract, read_file) __all__ = ['load_flickr1M_dataset'] diff --git a/tensorlayer/files/dataset_loaders/flickr_25k_dataset.py b/tensorlayer/files/dataset_loaders/flickr_25k_dataset.py index 8049a0653..0492371b0 100644 --- a/tensorlayer/files/dataset_loaders/flickr_25k_dataset.py +++ b/tensorlayer/files/dataset_loaders/flickr_25k_dataset.py @@ -4,9 +4,9 @@ import os from tensorlayer import logging, visualize -from tensorlayer.files.utils import ( - del_file, folder_exists, load_file_list, maybe_download_and_extract, natural_keys, read_file -) +from tensorlayer.files.utils import (del_file, folder_exists, load_file_list, + maybe_download_and_extract, natural_keys, + read_file) __all__ = ['load_flickr25k_dataset'] diff --git a/tensorlayer/files/dataset_loaders/mpii_dataset.py b/tensorlayer/files/dataset_loaders/mpii_dataset.py index a6f88f609..8b90dcdec 100644 --- a/tensorlayer/files/dataset_loaders/mpii_dataset.py +++ b/tensorlayer/files/dataset_loaders/mpii_dataset.py @@ -4,7 +4,8 @@ import os from tensorlayer import logging -from tensorlayer.files.utils import (del_file, folder_exists, load_file_list, maybe_download_and_extract) +from tensorlayer.files.utils import (del_file, folder_exists, load_file_list, + maybe_download_and_extract) __all__ = ['load_mpii_pose_dataset'] diff --git a/tensorlayer/files/dataset_loaders/voc_dataset.py b/tensorlayer/files/dataset_loaders/voc_dataset.py index 5584864ae..c5ccadbcf 100644 --- a/tensorlayer/files/dataset_loaders/voc_dataset.py +++ b/tensorlayer/files/dataset_loaders/voc_dataset.py @@ -4,9 +4,10 @@ import os import tensorflow as tf - from tensorlayer import logging, utils -from tensorlayer.files.utils import (del_file, del_folder, folder_exists, load_file_list, maybe_download_and_extract) +from tensorlayer.files.utils import (del_file, del_folder, folder_exists, + load_file_list, + maybe_download_and_extract) __all__ = ['load_voc_dataset'] diff --git a/tensorlayer/files/dataset_loaders/wmt_en_fr_dataset.py b/tensorlayer/files/dataset_loaders/wmt_en_fr_dataset.py index 0261a8581..77c1f93f9 100644 --- a/tensorlayer/files/dataset_loaders/wmt_en_fr_dataset.py +++ b/tensorlayer/files/dataset_loaders/wmt_en_fr_dataset.py @@ -6,7 +6,6 @@ import tarfile from tensorflow.python.platform import gfile - from tensorlayer import logging from tensorlayer.files.utils import maybe_download_and_extract diff --git a/tensorlayer/files/utils.py b/tensorlayer/files/utils.py index 72fcb1824..e4b0f6f8e 100644 --- a/tensorlayer/files/utils.py +++ b/tensorlayer/files/utils.py @@ -1,8 +1,9 @@ #! /usr/bin/python # -*- coding: utf-8 -*- +import base64 import gzip -import importlib +import json import math import os import pickle @@ -14,23 +15,20 @@ import time import zipfile +import cloudpickle import h5py import numpy as np -import progressbar import scipy.io as sio -import tensorflow as tf from six.moves import cPickle -from tensorflow.python.platform import gfile +import progressbar +import tensorflow as tf import tensorlayer as tl -from tensorlayer import logging, nlp, utils, visualize - -import cloudpickle -import base64 from tensorflow.python.keras.saving import model_config as model_config_lib -from tensorflow.python.util.tf_export import keras_export +from tensorflow.python.platform import gfile from tensorflow.python.util import serialization -import json +from tensorflow.python.util.tf_export import keras_export +from tensorlayer import logging, nlp, utils, visualize # from six.moves import zip diff --git a/tensorlayer/initializers.py b/tensorlayer/initializers.py index 666777824..f68c05c1d 100644 --- a/tensorlayer/initializers.py +++ b/tensorlayer/initializers.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- import numpy as np + import tensorflow as tf __all__ = [ diff --git a/tensorlayer/layers/activation.py b/tensorlayer/layers/activation.py index 44fcc47a9..9abb19ce7 100644 --- a/tensorlayer/layers/activation.py +++ b/tensorlayer/layers/activation.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - from tensorlayer import logging from tensorlayer.activation import leaky_relu6, leaky_twice_relu6 from tensorlayer.decorators import deprecated_alias diff --git a/tensorlayer/layers/convolution/binary_conv.py b/tensorlayer/layers/convolution/binary_conv.py index 23448cf6f..14e5a8721 100644 --- a/tensorlayer/layers/convolution/binary_conv.py +++ b/tensorlayer/layers/convolution/binary_conv.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - import tensorlayer as tl from tensorlayer import logging from tensorlayer.decorators import deprecated_alias diff --git a/tensorlayer/layers/convolution/deformable_conv.py b/tensorlayer/layers/convolution/deformable_conv.py index 5f75bbe15..b9a8224db 100644 --- a/tensorlayer/layers/convolution/deformable_conv.py +++ b/tensorlayer/layers/convolution/deformable_conv.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - import tensorlayer as tl from tensorlayer import logging from tensorlayer.decorators import deprecated_alias, private_method diff --git a/tensorlayer/layers/convolution/depthwise_conv.py b/tensorlayer/layers/convolution/depthwise_conv.py index d6136ede3..4fe4dc34c 100644 --- a/tensorlayer/layers/convolution/depthwise_conv.py +++ b/tensorlayer/layers/convolution/depthwise_conv.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - import tensorlayer as tl from tensorlayer import logging from tensorlayer.decorators import deprecated_alias diff --git a/tensorlayer/layers/convolution/dorefa_conv.py b/tensorlayer/layers/convolution/dorefa_conv.py index ed9b32dd8..1f8944382 100644 --- a/tensorlayer/layers/convolution/dorefa_conv.py +++ b/tensorlayer/layers/convolution/dorefa_conv.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - import tensorlayer as tl from tensorlayer import logging from tensorlayer.decorators import deprecated_alias diff --git a/tensorlayer/layers/convolution/expert_conv.py b/tensorlayer/layers/convolution/expert_conv.py index d7e59a0e8..fb27b9df6 100644 --- a/tensorlayer/layers/convolution/expert_conv.py +++ b/tensorlayer/layers/convolution/expert_conv.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - import tensorlayer as tl from tensorlayer import logging from tensorlayer.decorators import deprecated_alias diff --git a/tensorlayer/layers/convolution/expert_deconv.py b/tensorlayer/layers/convolution/expert_deconv.py index cb5cd6773..a1571b2cb 100644 --- a/tensorlayer/layers/convolution/expert_deconv.py +++ b/tensorlayer/layers/convolution/expert_deconv.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - import tensorlayer as tl from tensorlayer import logging from tensorlayer.decorators import deprecated_alias diff --git a/tensorlayer/layers/convolution/group_conv.py b/tensorlayer/layers/convolution/group_conv.py index 34d8c10e6..2923a10ae 100644 --- a/tensorlayer/layers/convolution/group_conv.py +++ b/tensorlayer/layers/convolution/group_conv.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - import tensorlayer as tl from tensorlayer import logging from tensorlayer.decorators import deprecated_alias diff --git a/tensorlayer/layers/convolution/quan_conv.py b/tensorlayer/layers/convolution/quan_conv.py index e235dfeb4..662df2661 100644 --- a/tensorlayer/layers/convolution/quan_conv.py +++ b/tensorlayer/layers/convolution/quan_conv.py @@ -2,12 +2,12 @@ # -*- coding: utf-8 -*- import tensorflow as tf - import tensorlayer as tl from tensorlayer import logging from tensorlayer.decorators import deprecated_alias from tensorlayer.layers.core import Layer -from tensorlayer.layers.utils import (quantize_active_overflow, quantize_weight_overflow) +from tensorlayer.layers.utils import (quantize_active_overflow, + quantize_weight_overflow) __all__ = ['QuanConv2d'] diff --git a/tensorlayer/layers/convolution/quan_conv_bn.py b/tensorlayer/layers/convolution/quan_conv_bn.py index ef0f9bfda..1c1593373 100644 --- a/tensorlayer/layers/convolution/quan_conv_bn.py +++ b/tensorlayer/layers/convolution/quan_conv_bn.py @@ -3,11 +3,11 @@ import tensorflow as tf from tensorflow.python.training import moving_averages - from tensorlayer import logging from tensorlayer.decorators import deprecated_alias from tensorlayer.layers.core import Layer -from tensorlayer.layers.utils import (quantize_active_overflow, quantize_weight_overflow) +from tensorlayer.layers.utils import (quantize_active_overflow, + quantize_weight_overflow) # from tensorlayer.layers.core import LayersConfig diff --git a/tensorlayer/layers/convolution/separable_conv.py b/tensorlayer/layers/convolution/separable_conv.py index b6ae62446..ff67672ba 100644 --- a/tensorlayer/layers/convolution/separable_conv.py +++ b/tensorlayer/layers/convolution/separable_conv.py @@ -2,8 +2,8 @@ # -*- coding: utf-8 -*- import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer import logging from tensorlayer.decorators import deprecated_alias diff --git a/tensorlayer/layers/convolution/simplified_conv.py b/tensorlayer/layers/convolution/simplified_conv.py index c00ff8fe7..8c8eebece 100644 --- a/tensorlayer/layers/convolution/simplified_conv.py +++ b/tensorlayer/layers/convolution/simplified_conv.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - import tensorlayer as tl from tensorlayer import logging from tensorlayer.decorators import deprecated_alias diff --git a/tensorlayer/layers/convolution/simplified_deconv.py b/tensorlayer/layers/convolution/simplified_deconv.py index 847062859..569fe0810 100644 --- a/tensorlayer/layers/convolution/simplified_deconv.py +++ b/tensorlayer/layers/convolution/simplified_deconv.py @@ -2,8 +2,8 @@ # -*- coding: utf-8 -*- import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer import logging from tensorlayer.decorators import deprecated_alias diff --git a/tensorlayer/layers/convolution/super_resolution.py b/tensorlayer/layers/convolution/super_resolution.py index 35fee8722..a3f51e2a8 100644 --- a/tensorlayer/layers/convolution/super_resolution.py +++ b/tensorlayer/layers/convolution/super_resolution.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - import tensorlayer as tl from tensorlayer import logging from tensorlayer.decorators import deprecated_alias, private_method diff --git a/tensorlayer/layers/convolution/ternary_conv.py b/tensorlayer/layers/convolution/ternary_conv.py index 9a97c7bec..512350ba5 100644 --- a/tensorlayer/layers/convolution/ternary_conv.py +++ b/tensorlayer/layers/convolution/ternary_conv.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - import tensorlayer as tl from tensorlayer import logging from tensorlayer.decorators import deprecated_alias diff --git a/tensorlayer/layers/core.py b/tensorlayer/layers/core.py index ce98f156c..8e13631b7 100644 --- a/tensorlayer/layers/core.py +++ b/tensorlayer/layers/core.py @@ -1,17 +1,15 @@ #! /usr/bin/python # -*- coding: utf-8 -*- +import inspect from abc import abstractmethod import tensorflow as tf - import tensorlayer as tl from tensorlayer import logging from tensorlayer.decorators import (deprecated_alias, private_method, protected_method) -from tensorlayer.layers.utils import (get_variable_with_initializer, list_remove_repeat) from tensorlayer.files import utils - -import inspect +from tensorlayer.layers.utils import (get_variable_with_initializer, list_remove_repeat) __all__ = ['Layer', 'ModelLayer', 'LayerList'] diff --git a/tensorlayer/layers/dense/base_dense.py b/tensorlayer/layers/dense/base_dense.py index a5b800f04..bec9d3f6f 100644 --- a/tensorlayer/layers/dense/base_dense.py +++ b/tensorlayer/layers/dense/base_dense.py @@ -2,8 +2,8 @@ # -*- coding: utf-8 -*- import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer import logging from tensorlayer.decorators import deprecated_alias diff --git a/tensorlayer/layers/dense/binary_dense.py b/tensorlayer/layers/dense/binary_dense.py index 4067ac4c3..74d5208cd 100644 --- a/tensorlayer/layers/dense/binary_dense.py +++ b/tensorlayer/layers/dense/binary_dense.py @@ -3,7 +3,6 @@ import tensorflow as tf import tensorlayer as tl - from tensorlayer import logging from tensorlayer.decorators import deprecated_alias from tensorlayer.layers.core import Layer diff --git a/tensorlayer/layers/dense/dorefa_dense.py b/tensorlayer/layers/dense/dorefa_dense.py index 80ae3365c..73069d478 100644 --- a/tensorlayer/layers/dense/dorefa_dense.py +++ b/tensorlayer/layers/dense/dorefa_dense.py @@ -3,7 +3,6 @@ import tensorflow as tf import tensorlayer as tl - from tensorlayer import logging from tensorlayer.decorators import deprecated_alias from tensorlayer.layers.core import Layer diff --git a/tensorlayer/layers/dense/dropconnect.py b/tensorlayer/layers/dense/dropconnect.py index d68e6c762..371ed2e6b 100644 --- a/tensorlayer/layers/dense/dropconnect.py +++ b/tensorlayer/layers/dense/dropconnect.py @@ -1,13 +1,13 @@ #! /usr/bin/python # -*- coding: utf-8 -*- +import numbers + import tensorflow as tf import tensorlayer as tl - from tensorlayer import logging from tensorlayer.decorators import deprecated_alias from tensorlayer.layers.core import Layer -import numbers __all__ = [ 'DropconnectDense', diff --git a/tensorlayer/layers/dense/quan_dense.py b/tensorlayer/layers/dense/quan_dense.py index 5a2513259..8d5c594c7 100644 --- a/tensorlayer/layers/dense/quan_dense.py +++ b/tensorlayer/layers/dense/quan_dense.py @@ -3,11 +3,11 @@ import tensorflow as tf import tensorlayer as tl - from tensorlayer import logging from tensorlayer.decorators import deprecated_alias from tensorlayer.layers.core import Layer -from tensorlayer.layers.utils import (quantize_active_overflow, quantize_weight_overflow) +from tensorlayer.layers.utils import (quantize_active_overflow, + quantize_weight_overflow) __all__ = [ 'QuanDense', diff --git a/tensorlayer/layers/dense/quan_dense_bn.py b/tensorlayer/layers/dense/quan_dense_bn.py index e647a7e6e..bcbd70950 100644 --- a/tensorlayer/layers/dense/quan_dense_bn.py +++ b/tensorlayer/layers/dense/quan_dense_bn.py @@ -4,11 +4,11 @@ import tensorflow as tf # from tensorlayer.layers.core import LayersConfig from tensorflow.python.training import moving_averages - from tensorlayer import logging from tensorlayer.decorators import deprecated_alias from tensorlayer.layers.core import Layer -from tensorlayer.layers.utils import (quantize_active_overflow, quantize_weight_overflow) +from tensorlayer.layers.utils import (quantize_active_overflow, + quantize_weight_overflow) __all__ = [ 'QuanDenseLayerWithBN', diff --git a/tensorlayer/layers/dense/ternary_dense.py b/tensorlayer/layers/dense/ternary_dense.py index 27efb9090..28d84297e 100644 --- a/tensorlayer/layers/dense/ternary_dense.py +++ b/tensorlayer/layers/dense/ternary_dense.py @@ -3,7 +3,6 @@ import tensorflow as tf import tensorlayer as tl - from tensorlayer import logging from tensorlayer.decorators import deprecated_alias from tensorlayer.layers.core import Layer diff --git a/tensorlayer/layers/dropout.py b/tensorlayer/layers/dropout.py index 3724d8b43..25fe80a36 100644 --- a/tensorlayer/layers/dropout.py +++ b/tensorlayer/layers/dropout.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - from tensorlayer import logging from tensorlayer.decorators import deprecated_alias from tensorlayer.layers.core import Layer diff --git a/tensorlayer/layers/embedding.py b/tensorlayer/layers/embedding.py index 80c5cadfa..a82c1a93b 100644 --- a/tensorlayer/layers/embedding.py +++ b/tensorlayer/layers/embedding.py @@ -2,8 +2,8 @@ # -*- coding: utf-8 -*- import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer import logging from tensorlayer.layers.core import Layer diff --git a/tensorlayer/layers/extend.py b/tensorlayer/layers/extend.py index 42395a537..09d5508db 100644 --- a/tensorlayer/layers/extend.py +++ b/tensorlayer/layers/extend.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - from tensorlayer import logging from tensorlayer.decorators import deprecated_alias from tensorlayer.layers.core import Layer diff --git a/tensorlayer/layers/image_resampling.py b/tensorlayer/layers/image_resampling.py index 3b2a2825a..4713200d3 100644 --- a/tensorlayer/layers/image_resampling.py +++ b/tensorlayer/layers/image_resampling.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - from tensorlayer import logging from tensorlayer.decorators import deprecated_alias from tensorlayer.layers.core import Layer diff --git a/tensorlayer/layers/inputs.py b/tensorlayer/layers/inputs.py index 0330347fe..4f2544b06 100644 --- a/tensorlayer/layers/inputs.py +++ b/tensorlayer/layers/inputs.py @@ -2,8 +2,8 @@ # -*- coding: utf-8 -*- import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer import logging from tensorlayer.layers.core import Layer, LayerNode diff --git a/tensorlayer/layers/lambda_layers.py b/tensorlayer/layers/lambda_layers.py index 13bc3ecbe..9b82ad603 100644 --- a/tensorlayer/layers/lambda_layers.py +++ b/tensorlayer/layers/lambda_layers.py @@ -2,11 +2,10 @@ # -*- coding: utf-8 -*- import tensorflow as tf - from tensorlayer import logging from tensorlayer.decorators import deprecated_alias -from tensorlayer.layers.core import Layer from tensorlayer.files import utils +from tensorlayer.layers.core import Layer # from tensorlayer.layers.core import TF_GRAPHKEYS_VARIABLES diff --git a/tensorlayer/layers/merge.py b/tensorlayer/layers/merge.py index 346a65962..2509d35a6 100644 --- a/tensorlayer/layers/merge.py +++ b/tensorlayer/layers/merge.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - from tensorlayer import logging from tensorlayer.layers.core import Layer diff --git a/tensorlayer/layers/noise.py b/tensorlayer/layers/noise.py index bd9c2df9c..c658f8e19 100644 --- a/tensorlayer/layers/noise.py +++ b/tensorlayer/layers/noise.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - import tensorlayer as tl from tensorlayer import logging from tensorlayer.decorators import deprecated_alias diff --git a/tensorlayer/layers/normalization.py b/tensorlayer/layers/normalization.py index d8cec274c..0de0e8ed1 100644 --- a/tensorlayer/layers/normalization.py +++ b/tensorlayer/layers/normalization.py @@ -2,11 +2,10 @@ # -*- coding: utf-8 -*- import tensorflow as tf +import tensorlayer as tl from tensorflow.python.framework import ops from tensorflow.python.ops import math_ops from tensorflow.python.training import moving_averages - -import tensorlayer as tl from tensorlayer import logging from tensorlayer.layers.core import Layer diff --git a/tensorlayer/layers/padding.py b/tensorlayer/layers/padding.py index db1bbb304..edcb720a5 100644 --- a/tensorlayer/layers/padding.py +++ b/tensorlayer/layers/padding.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - import tensorlayer as tl from tensorlayer import logging from tensorlayer.decorators import deprecated_alias diff --git a/tensorlayer/layers/pooling.py b/tensorlayer/layers/pooling.py index 2046de6c5..a22cea358 100644 --- a/tensorlayer/layers/pooling.py +++ b/tensorlayer/layers/pooling.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - import tensorlayer as tl from tensorlayer import logging from tensorlayer.decorators import deprecated_alias diff --git a/tensorlayer/layers/quantize.py b/tensorlayer/layers/quantize.py index 3b5b19635..47ad2a088 100644 --- a/tensorlayer/layers/quantize.py +++ b/tensorlayer/layers/quantize.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - from tensorlayer import logging from tensorlayer.decorators import deprecated_alias from tensorlayer.layers.core import Layer diff --git a/tensorlayer/layers/recurrent.py b/tensorlayer/layers/recurrent.py index 16b7208d0..2364c6a7d 100644 --- a/tensorlayer/layers/recurrent.py +++ b/tensorlayer/layers/recurrent.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - import tensorlayer as tl from tensorlayer import logging from tensorlayer.decorators import deprecated_alias diff --git a/tensorlayer/layers/scale.py b/tensorlayer/layers/scale.py index ac1800529..6546d70af 100644 --- a/tensorlayer/layers/scale.py +++ b/tensorlayer/layers/scale.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - from tensorlayer import logging from tensorlayer.initializers import constant from tensorlayer.layers.core import Layer diff --git a/tensorlayer/layers/shape.py b/tensorlayer/layers/shape.py index f8e7b47db..e308eb0c4 100644 --- a/tensorlayer/layers/shape.py +++ b/tensorlayer/layers/shape.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - from tensorlayer import logging from tensorlayer.decorators import deprecated_alias from tensorlayer.layers.core import Layer diff --git a/tensorlayer/layers/spatial_transformer.py b/tensorlayer/layers/spatial_transformer.py index e456625a7..262108a68 100644 --- a/tensorlayer/layers/spatial_transformer.py +++ b/tensorlayer/layers/spatial_transformer.py @@ -2,11 +2,11 @@ # -*- coding: utf-8 -*- import numpy as np +from six.moves import xrange + import tensorflow as tf import tensorlayer as tl -from six.moves import xrange from tensorflow.python.ops import array_ops - from tensorlayer import logging from tensorlayer.decorators import deprecated_alias from tensorlayer.layers.core import Layer diff --git a/tensorlayer/layers/stack.py b/tensorlayer/layers/stack.py index c31327989..c35e3837f 100644 --- a/tensorlayer/layers/stack.py +++ b/tensorlayer/layers/stack.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import tensorflow as tf - from tensorlayer import logging from tensorlayer.decorators import deprecated_alias from tensorlayer.layers.core import Layer diff --git a/tensorlayer/layers/utils.py b/tensorlayer/layers/utils.py index 10cc1fc18..6d411589f 100644 --- a/tensorlayer/layers/utils.py +++ b/tensorlayer/layers/utils.py @@ -2,10 +2,10 @@ # -*- coding: utf-8 -*- import numpy as np + import tensorflow as tf import tensorlayer as tl from tensorflow.python.ops.rnn_cell import LSTMStateTuple - from tensorlayer import logging from tensorlayer.decorators import deprecated, deprecated_alias diff --git a/tensorlayer/logging/contrib/hyperdash.py b/tensorlayer/logging/contrib/hyperdash.py index 122a8c7e5..6e19c8e9b 100644 --- a/tensorlayer/logging/contrib/hyperdash.py +++ b/tensorlayer/logging/contrib/hyperdash.py @@ -4,7 +4,6 @@ from __future__ import absolute_import import hyperdash as hd - import tensorlayer as tl __all__ = ["HyperDashHandler", "monitor", "Experiment", "IPythonMagicsWrapper"] diff --git a/tensorlayer/models/core.py b/tensorlayer/models/core.py index c811b9648..cbcff4bf3 100644 --- a/tensorlayer/models/core.py +++ b/tensorlayer/models/core.py @@ -3,9 +3,8 @@ from queue import Queue import tensorflow as tf -from tensorflow.python.framework import ops as tf_ops - import tensorlayer as tl +from tensorflow.python.framework import ops as tf_ops from tensorlayer import logging from tensorlayer.files import utils from tensorlayer.layers import Layer, ModelLayer diff --git a/tensorlayer/models/mobilenetv1.py b/tensorlayer/models/mobilenetv1.py index 8065eeef3..4908b3d89 100644 --- a/tensorlayer/models/mobilenetv1.py +++ b/tensorlayer/models/mobilenetv1.py @@ -5,7 +5,6 @@ import os import tensorflow as tf - from tensorlayer import logging from tensorlayer.files import (assign_weights, load_npz, maybe_download_and_extract) from tensorlayer.layers import (BatchNorm, Conv2d, DepthwiseConv2d, Flatten, GlobalMeanPool2d, Input, Reshape) diff --git a/tensorlayer/models/squeezenetv1.py b/tensorlayer/models/squeezenetv1.py index 6d6a70535..a2d7e4304 100644 --- a/tensorlayer/models/squeezenetv1.py +++ b/tensorlayer/models/squeezenetv1.py @@ -5,10 +5,9 @@ import os import tensorflow as tf - from tensorlayer import logging from tensorlayer.files import (assign_weights, load_npz, maybe_download_and_extract) -from tensorlayer.layers import (Concat, Conv2d, Dropout, GlobalMeanPool2d, Input, MaxPool2d, Lambda) +from tensorlayer.layers import (Concat, Conv2d, Dropout, GlobalMeanPool2d, Input, Lambda, MaxPool2d) from tensorlayer.models import Model __all__ = [ diff --git a/tensorlayer/models/vgg.py b/tensorlayer/models/vgg.py index 391878c61..06648cb53 100644 --- a/tensorlayer/models/vgg.py +++ b/tensorlayer/models/vgg.py @@ -30,12 +30,12 @@ import os import numpy as np -import tensorflow as tf +import tensorflow as tf import tensorlayer as tl from tensorlayer import logging from tensorlayer.files import assign_weights, maybe_download_and_extract -from tensorlayer.layers import (BatchNorm, Conv2d, Dense, Flatten, Input, LayerList, MaxPool2d, Lambda) +from tensorlayer.layers import (BatchNorm, Conv2d, Dense, Flatten, Input, Lambda, LayerList, MaxPool2d) from tensorlayer.models import Model __all__ = [ diff --git a/tensorlayer/nlp.py b/tensorlayer/nlp.py index d96a7acf1..ed1ce975d 100755 --- a/tensorlayer/nlp.py +++ b/tensorlayer/nlp.py @@ -11,11 +11,11 @@ from collections import Counter import numpy as np -import tensorflow as tf from six.moves import urllib, xrange -from tensorflow.python.platform import gfile +import tensorflow as tf import tensorlayer as tl +from tensorflow.python.platform import gfile from tensorlayer.lazy_imports import LazyImport nltk = LazyImport("nltk") diff --git a/tensorlayer/rein.py b/tensorlayer/rein.py index e5cbe6bd4..8ddce7316 100644 --- a/tensorlayer/rein.py +++ b/tensorlayer/rein.py @@ -2,9 +2,10 @@ # -*- coding: utf-8 -*- import numpy as np -import tensorflow as tf from six.moves import xrange +import tensorflow as tf + __all__ = [ 'discount_episode_rewards', 'cross_entropy_reward_loss', diff --git a/tensorlayer/utils.py b/tensorlayer/utils.py index d6b8e6d78..35e054afb 100644 --- a/tensorlayer/utils.py +++ b/tensorlayer/utils.py @@ -11,9 +11,9 @@ from sys import platform as _platform import numpy as np -import tensorflow as tf from sklearn.metrics import accuracy_score, confusion_matrix, f1_score +import tensorflow as tf import tensorlayer as tl __all__ = [ From c6e4ab3f0277ee50ae3f1eb0d50af0c72b76fa36 Mon Sep 17 00:00:00 2001 From: quantumiracle <1402434478@qq.com> Date: Tue, 14 May 2019 12:24:37 +0100 Subject: [PATCH 2/4] rl examples clean code, add notation, add tf.function --- CHANGELOG.md | 3 + .../tutorial_atari_pong.py | 33 +-- ...ial_bipedalwalker_a3c_continuous_action.py | 208 ++++-------------- .../tutorial_cartpole_ac.py | 76 +------ .../tutorial_frozenlake_dqn.py | 35 +-- .../tutorial_frozenlake_q_table.py | 7 +- 6 files changed, 69 insertions(+), 293 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e932d4a6e..176b91013 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -77,6 +77,7 @@ To release a new version, please update the changelog as followed: - remove `tl.layers.initialize_global_variables(sess)` (PR #931) - change `tl.layers.core`, `tl.models.core` (PR #966) - change `weights` into `all_weights`, `trainable_weights`, `nontrainable_weights` +- change reinforcement learning examples from TL1 to TL2 (PR #970) ### Dependencies Update - nltk>=3.3,<3.4 => nltk>=3.3,<3.5 (PR #892) @@ -319,8 +320,10 @@ To release a new version, please update the changelog as followed: - AtrousDeConv2dLayer added (PR #662) - Fix bugs of using `tf.layers` in CNN (PR #686) - Optimizer: + - AMSGrad Optimizer added based on `On the Convergence of Adam and Beyond (ICLR 2018)` (PR #636) - Setup: + - Creation of installation flaggs `all`, `all_cpu`, and `all_gpu` (PR #660) - Test: - `test_utils_predict.py` added to reproduce and fix issue #288 (PR #566) diff --git a/examples/reinforcement_learning/tutorial_atari_pong.py b/examples/reinforcement_learning/tutorial_atari_pong.py index 0e8c62009..7e1b28822 100644 --- a/examples/reinforcement_learning/tutorial_atari_pong.py +++ b/examples/reinforcement_learning/tutorial_atari_pong.py @@ -34,11 +34,6 @@ import tensorflow as tf import tensorlayer as tl -## enable eager mode -# tf.enable_eager_execution() - - -# tf.logging.set_verbosity(tf.logging.DEBUG) # enable logging tl.logging.set_verbosity(tl.logging.DEBUG) # hyper-parameters @@ -73,10 +68,7 @@ def prepro(I): episode_number = 0 xs, ys, rs = [], [], [] -# observation for training and inference -# t_states = tf.placeholder(tf.float32, shape=[None, D]) # policy network - def get_model(inputs_shape): ni = tl.layers.Input(inputs_shape) nn = tl.layers.Dense(n_units=H, act=tf.nn.relu, name='hidden')(ni) @@ -85,22 +77,9 @@ def get_model(inputs_shape): return M model = get_model([None, D]) train_weights = model.trainable_weights -# probs = model(t_states, is_train=True).outputs -# sampling_prob = tf.nn.softmax(probs) - -# t_actions = tf.placeholder(tf.int32, shape=[None]) -# t_discount_rewards = tf.placeholder(tf.float32, shape=[None]) -# loss = tl.rein.cross_entropy_reward_loss(probs, t_actions, t_discount_rewards) -optimizer = tf.optimizers.RMSprop(lr=learning_rate, decay=decay_rate)#.minimize(loss) - -# with tf.Session() as sess: -# sess.run(tf.global_variables_initializer()) - # if resume: TODO - # load_params = tl.files.load_npz(name=model_file_name+'.npz') - # tl.files.assign_params(sess, load_params, network) - # tl.files.load_and_assign_npz(sess, model_file_name + '.npz', network) - # network.print_params() - # network.print_layers() + +optimizer = tf.optimizers.RMSprop(lr=learning_rate, decay=decay_rate) + model.train() # set model to train mode (in case you add dropout into the model) start_time = time.time() @@ -114,14 +93,12 @@ def get_model(inputs_shape): x = x.reshape(1, D) prev_x = cur_x - # prob = sess.run(sampling_prob, feed_dict={t_states: x}) _prob = model(x) prob = tf.nn.softmax(_prob) # action. 1: STOP 2: UP 3: DOWN # action = np.random.choice([1,2,3], p=prob.flatten()) # action = tl.rein.choice_action_by_probs(prob.flatten(), [1, 2, 3]) - # action = np.random.choice([1,2,3], p=prob.numpy()) action = tl.rein.choice_action_by_probs(prob[0].numpy(), [1, 2, 3]) observation, reward, done, _ = env.step(action) @@ -145,10 +122,6 @@ def get_model(inputs_shape): xs, ys, rs = [], [], [] - # sess.run(train_op, feed_dict={t_states: epx, t_actions: epy, t_discount_rewards: disR}) - # t_actions = tf.placeholder(tf.int32, shape=[None]) - # t_discount_rewards = tf.placeholder(tf.float32, shape=[None]) - # loss = tl.rein.cross_entropy_reward_loss(probs, t_actions, t_discount_rewards) with tf.GradientTape() as tape: _prob = model(epx) _loss = tl.rein.cross_entropy_reward_loss(_prob, epy, disR) diff --git a/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py b/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py index 145024ba9..796ef9d74 100644 --- a/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py +++ b/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py @@ -52,17 +52,16 @@ tfd = tfp.distributions -# tf.logging.set_verbosity(tf.logging.DEBUG) tl.logging.set_verbosity(tl.logging.DEBUG) -# np.random.seed(2) -# tf.random.set_seed(2) # reproducible +np.random.seed(2) +tf.random.set_seed(2) # reproducible GAME = 'BipedalWalker-v2' # BipedalWalkerHardcore-v2 BipedalWalker-v2 LunarLanderContinuous-v2 OUTPUT_GRAPH = False LOG_DIR = './log' -# N_WORKERS = multiprocessing.cpu_count() -N_WORKERS = 2 +N_WORKERS = multiprocessing.cpu_count() +# N_WORKERS = 2 MAX_GLOBAL_EP = 8000 # 8000 GLOBAL_NET_SCOPE = 'Global_Net' UPDATE_GLOBAL_ITER = 10 @@ -77,208 +76,91 @@ N_S = env.observation_space.shape[0] N_A = env.action_space.shape[0] -# N_A = env.action_space.n -# A_BOUND = [env.action_space.low, env.action_space.high] A_BOUND = [env.action_space.low, env.action_space.high] -# A_BOUND[0] = A_BOUND[0].reshape(1, N_A) -# A_BOUND[1] = A_BOUND[1].reshape(1, N_A) +A_BOUND[0] = A_BOUND[0].reshape(1, N_A) +A_BOUND[1] = A_BOUND[1].reshape(1, N_A) # print(A_BOUND) -# print(env.unwrapped.hull.position[0]) -# exit() - class ACNet(object): - def __init__(self, scope, globalAC=None): # no need for scope + def __init__(self, scope, globalAC=None): self.scope = scope self.save_path = './model' - # if scope == GLOBAL_NET_SCOPE: - # ## global network only do inference - # with tf.variable_scope(scope): - # self.s = tf.placeholder(tf.float32, [None, N_S], 'S') - # self._build_net() - - - # normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma) # for continuous action space - - # with tf.name_scope('choose_a'): # use local params to choose action - # self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND) - - # else: - # ## worker network calculate gradient locally, update on global network - # # with tf.variable_scope(scope): - # # self.s = tf.placeholder(tf.float32, [None, N_S], 'S') - # # self.a_his = tf.placeholder(tf.float32, [None, N_A], 'A') - # # self.v_target = tf.placeholder(tf.float32, [None, 1], 'Vtarget') - - - - # self._build_net() - - # td = tf.subtract(self.v_target, self.v, name='TD_error') - # with tf.name_scope('c_loss'): - # self.c_loss = tf.reduce_mean(tf.square(td)) - - # with tf.name_scope('wrap_a_out'): - # self.test = self.sigma[0] - # self.mu, self.sigma = self.mu * A_BOUND[1], self.sigma + 1e-5 - - # normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma) # for continuous action space - - # with tf.name_scope('a_loss'): - # log_prob = normal_dist.log_prob(self.a_his) - # exp_v = log_prob * td - # entropy = normal_dist.entropy() # encourage exploration - # self.exp_v = ENTROPY_BETA * entropy + exp_v - # self.a_loss = tf.reduce_mean(-self.exp_v) - - # with tf.name_scope('choose_a'): # use local params to choose action - # self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND) - - # with tf.name_scope('local_grad'): - # self.a_params = tl.layers.get_variables_with_name(scope + '/actor', True, False) - # self.c_params = tl.layers.get_variables_with_name(scope + '/critic', True, False) - # self.a_grads = tf.gradients(self.a_loss, self.a_params) - # self.c_grads = tf.gradients(self.c_loss, self.c_params) - - # with tf.name_scope('sync'): - # with tf.name_scope('pull'): - # self.pull_a_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.a_params, globalAC.a_params)] - # self.pull_c_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.c_params, globalAC.c_params)] - # with tf.name_scope('push'): - # self.update_a_op = OPT_A.apply_gradients(zip(self.a_grads, globalAC.a_params)) - # self.update_c_op = OPT_C.apply_gradients(zip(self.c_grads, globalAC.c_params)) - - # def _build_net(self): - # w_init = tf.contrib.layers.xavier_initializer() - # w_init = tf.random_uniform_initializer(0, 0.01) - w_init = tf.keras.initializers.glorot_normal(seed=None) - # with tf.variable_scope('actor'): # Policy network - # nn = InputLayer(self.s, name='in') - # nn = DenseLayer(nn, n_units=500, act=tf.nn.relu6, W_init=w_init, name='la') - # nn = DenseLayer(nn, n_units=300, act=tf.nn.relu6, W_init=w_init, name='la2') - # mu = DenseLayer(nn, n_units=N_A, act=tf.nn.tanh, W_init=w_init, name='mu') - # sigma = DenseLayer(nn, n_units=N_A, act=tf.nn.softplus, W_init=w_init, name='sigma') - # self.mu = mu.outputs - # self.sigma = sigma.outputs - def get_actor(input_shape): + + w_init = tf.keras.initializers.glorot_normal(seed=None) # initializer, glorot=xavier + def get_actor(input_shape): # policy network with tf.name_scope(self.scope): ni = tl.layers.Input(input_shape, name='in') - nn = tl.layers.Dense(n_units=50, act=tf.nn.relu6, W_init=w_init, name='la')(ni) - nn = tl.layers.Dense(n_units=30, act=tf.nn.relu6, W_init=w_init, name='la2')(nn) + nn = tl.layers.Dense(n_units=500, act=tf.nn.relu6, W_init=w_init, name='la')(ni) + nn = tl.layers.Dense(n_units=300, act=tf.nn.relu6, W_init=w_init, name='la2')(nn) mu = tl.layers.Dense(n_units=N_A, act=tf.nn.tanh, W_init=w_init, name='mu')(nn) sigma = tl.layers.Dense(n_units=N_A, act=tf.nn.softplus, W_init=w_init, name='sigma')(nn) return tl.models.Model(inputs=ni, outputs=[mu, sigma], name=scope+'/Actor') self.actor = get_actor( [None, N_S]) - self.actor.train() # set training mode, also for workers? - - def get_critic(input_shape): + self.actor.train() # train mode for Dropout, BatchNorm + def get_critic(input_shape): # we use Value-function here, but not Q-function. with tf.name_scope(self.scope): ni = tl.layers.Input(input_shape, name='in') - nn = tl.layers.Dense(n_units=50, act=tf.nn.relu6, W_init=w_init, name='lc')(ni) - nn = tl.layers.Dense(n_units=30, act=tf.nn.relu6, W_init=w_init, name='lc2')(nn) + nn = tl.layers.Dense(n_units=500, act=tf.nn.relu6, W_init=w_init, name='lc')(ni) + nn = tl.layers.Dense(n_units=300, act=tf.nn.relu6, W_init=w_init, name='lc2')(nn) v = tl.layers.Dense(n_units=1, W_init=w_init, name='v')(nn) return tl.models.Model(inputs=ni, outputs=v, name=scope+'/Critic') self.critic = get_critic( [None, N_S]) - self.critic.train() - - # self.a_params = tl.layers.get_variables_with_name(scope + '/Actor', True, False) - # self.c_params = tl.layers.get_variables_with_name(scope + '/Critic', True, False) - - # with tf.variable_scope('critic'): # we use Value-function here, but not Q-function. - # nn = InputLayer(self.s, name='in') - # nn = DenseLayer(nn, n_units=500, act=tf.nn.relu6, W_init=w_init, name='lc') - # nn = DenseLayer(nn, n_units=200, act=tf.nn.relu6, W_init=w_init, name='lc2') - # v = DenseLayer(nn, n_units=1, W_init=w_init, name='v') - # self.v = v.outputs - - # def update_global(self, feed_dict): # run by a local - def update_global(self, buffer_s, buffer_a, buffer_v_target, globalAC): - # _, _, t = sess.run( - # [self.update_a_op, self.update_c_op, self.test], feed_dict - # ) # local grads applies to global net + self.critic.train() # train mode for Dropout, BatchNorm + + @tf.function # convert numpy functions to tf.Operations in the TFgraph, return tensor + def update_global(self, buffer_s, buffer_a, buffer_v_target, globalAC): # refer to the global Actor-Crtic network for updating it with samples + ''' update the global critic ''' with tf.GradientTape() as tape: self.v = self.critic(buffer_s) - self.v_target = buffer_v_target # tensor float? + self.v_target = buffer_v_target td = tf.subtract(self.v_target, self.v, name='TD_error') - # with tf.name_scope('c_loss'): self.c_loss = tf.reduce_mean(tf.square(td)) self.c_grads = tape.gradient(self.c_loss, self.critic.trainable_weights) - OPT_C.apply_gradients(zip(self.c_grads, globalAC.critic.trainable_weights)) - del tape # Drop the reference to the tape - + OPT_C.apply_gradients(zip(self.c_grads, globalAC.critic.trainable_weights)) # local grads applies to global net + # del tape # Drop the reference to the tape + ''' update the global actor ''' with tf.GradientTape() as tape: self.mu, self.sigma = self.actor(buffer_s) - # print('mu: ', self.mu) - # print('sigma: ', self.sigma) - # with tf.name_scope('wrap_a_out'): self.test = self.sigma[0] self.mu, self.sigma = self.mu * A_BOUND[1], self.sigma + 1e-5 - - # normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma) # for continuous action space - normal_dist = tfd.Normal(self.mu, self.sigma) - # with tf.name_scope('a_loss'): - self.a_his = buffer_a # tensor float? + normal_dist = tfd.Normal(self.mu, self.sigma) # no tf.contrib for tf2.0 + self.a_his = buffer_a # float32 log_prob = normal_dist.log_prob(self.a_his) - exp_v = log_prob * td + exp_v = log_prob * td # td is from the critic part, no gradients for it entropy = normal_dist.entropy() # encourage exploration self.exp_v = ENTROPY_BETA * entropy + exp_v self.a_loss = tf.reduce_mean(-self.exp_v) - # with tf.name_scope('local_grad'): - # self.a_params = tl.layers.get_variables_with_name(scope + '/actor', True, False) - # self.c_params = tl.layers.get_variables_with_name(scope + '/critic', True, False) - # self.a_grads = tf.gradients(self.a_loss, self.a_params) - # self.c_grads = tf.gradients(self.c_loss, self.c_params) self.a_grads = tape.gradient(self.a_loss, self.actor.trainable_weights) - OPT_A.apply_gradients(zip(self.a_grads, globalAC.actor.trainable_weights)) - - - # return t - return self.test.numpy() - - def pull_global(self, globalAC): # run by a local - # sess.run([self.pull_a_params_op, self.pull_c_params_op]) - # with tf.name_scope('sync'): - # with tf.name_scope('pull'): - # self.pull_a_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.actor.trainable_weights, globalAC.a_params)] - # self.pull_c_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.critic.trainable_weights, globalAC.c_params)] - # print(self.scope, 'before: ', self.actor.trainable_weights[0][5][2]) - # print(self.scope, 'global: ', globalAC.actor.trainable_weights[0][5][2]) + OPT_A.apply_gradients(zip(self.a_grads, globalAC.actor.trainable_weights)) # local grads applies to global net + return self.test # for test purpose + + @tf.function + def pull_global(self, globalAC): # run by a local, pull weights from the global nets for l_p, g_p in zip(self.actor.trainable_weights, globalAC.actor.trainable_weights): l_p.assign(g_p) for l_p, g_p in zip(self.critic.trainable_weights, globalAC.critic.trainable_weights): l_p.assign(g_p) - # print(self.scope, 'after: ', self.actor.trainable_weights[0][5][2]) def choose_action(self, s): # run by a local s = s[np.newaxis, :] self.mu, self.sigma = self.actor(s) - # print('mu1: ', self.mu) - # print('sigma1: ', self.sigma) + with tf.name_scope('wrap_a_out'): - # self.test = self.sigma[0] self.mu, self.sigma = self.mu * A_BOUND[1], self.sigma + 1e-5 - # normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma) - normal_dist = tfd.Normal(self.mu, self.sigma) + normal_dist = tfd.Normal(self.mu, self.sigma) # for continuous action space self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND) - # return sess.run(self.A, {self.s: s})[0] return self.A.numpy()[0] - def save_ckpt(self): + def save_ckpt(self): # save trained weights tl.files.save_npz(self.actor.trainable_weights, name='model_actor.npz') tl.files.save_npz(self.critic.trainable_weights, name='model_critic.npz') - # tl.files.save_ckpt( - # sess=sess, mode_name='model.ckpt', var_list=self.a_params + self.c_params, save_dir=self.scope, - # printable=True - # ) - - def load_ckpt(self): - # tl.files.load_hdf5_to_trainable_weights(self.save_path+'/actor', self.actor) - # tl.files.load_hdf5_to_trainable_weights(self.save_path+'/critic', self.critic) + + def load_ckpt(self): # load trained weights tl.files.load_and_assign_npz(name='model_actor.npz', network=self.actor) tl.files.load_and_assign_npz(name='model_critic.npz', network=self.critic) @@ -304,11 +186,8 @@ def work(self, globalAC): s = s.astype('float32') # double to float a = self.AC.choose_action(s) s_, r, done, _info = self.env.step(a) + s_ = s_.astype('float32') # double to float - # print('s:',s) - # print('a:', a) - # print('r:',r) - # set robot falls reward to -2 instead of -100 if r == -100: r = -2 @@ -322,7 +201,6 @@ def work(self, globalAC): if done: v_s_ = 0 # terminal else: - # v_s_ = sess.run(self.AC.v, {self.AC.s: s_[np.newaxis, :]})[0, 0] v_s_ = self.AC.critic(s_[np.newaxis, :])[0,0] # reduce dim from 2 to 0 buffer_v_target = [] @@ -336,11 +214,8 @@ def work(self, globalAC): buffer_s, buffer_a, buffer_v_target = ( np.vstack(buffer_s), np.vstack(buffer_a), np.vstack(buffer_v_target) ) - # print(buffer_s, buffer_a, buffer_v_target) - # feed_dict = {self.AC.s: buffer_s, self.AC.a_his: buffer_a, self.AC.v_target: buffer_v_target} # update gradients on global network - # self.AC.update_global(feed_dict) - self.AC.update_global(buffer_s, buffer_a, buffer_v_target, globalAC) + self.AC.update_global(buffer_s, buffer_a, buffer_v_target.astype('float32'), globalAC) buffer_s, buffer_a, buffer_r = [], [], [] # update local network from global network @@ -368,7 +243,6 @@ def work(self, globalAC): if __name__ == "__main__": - # sess = tf.Session() # ============================= TRAINING =============================== with tf.device("/cpu:0"): @@ -383,7 +257,6 @@ def work(self, globalAC): workers.append(Worker(i_name, GLOBAL_AC)) COORD = tf.train.Coordinator() - # sess.run(tf.global_variables_initializer()) # start TF threading worker_threads = [] @@ -406,7 +279,6 @@ def work(self, globalAC): # ============================= EVALUATION ============================= # env = gym.make(GAME) # GLOBAL_AC = ACNet(GLOBAL_NET_SCOPE) - # sess.run(tf.global_variables_initializer()) GLOBAL_AC.load_ckpt() while True: s = env.reset() diff --git a/examples/reinforcement_learning/tutorial_cartpole_ac.py b/examples/reinforcement_learning/tutorial_cartpole_ac.py index ecdf68084..19faf05eb 100644 --- a/examples/reinforcement_learning/tutorial_cartpole_ac.py +++ b/examples/reinforcement_learning/tutorial_cartpole_ac.py @@ -39,11 +39,6 @@ import tensorflow as tf import tensorlayer as tl -## enable eager mode -# tf.enable_eager_execution() - - -# tf.logging.set_verbosity(tf.logging.DEBUG) tl.logging.set_verbosity(tl.logging.DEBUG) np.random.seed(2) @@ -65,7 +60,6 @@ N_F = env.observation_space.shape[0] N_A = env.action_space.n -# env.action_space.sample() random sample print("observation dimension: %d" % N_F) # 4 print("observation high: %s" % env.observation_space.high) # [ 2.4 , inf , 0.41887902 , inf] @@ -76,16 +70,6 @@ class Actor(object): def __init__(self, n_features, n_actions, lr=0.001): - # self.sess = sess - # self.s = tf.placeholder(tf.float32, [1, n_features], "state") - # self.a = tf.placeholder(tf.int32, [None], "act") - # self.td_error = tf.placeholder(tf.float32, [None], "td_error") # TD_error - - # with tf.variable_scope('Actor'): # Policy network - # n = InputLayer(self.s, name='in') - # n = DenseLayer(n, n_units=30, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden') - # # n = DenseLayer(n, n_units=10, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden2') - # n = DenseLayer(n, n_units=n_actions, name='Pi') def get_model(inputs_shape): ni = tl.layers.Input(inputs_shape, name='state') @@ -95,49 +79,26 @@ def get_model(inputs_shape): return tl.models.Model(inputs=ni, outputs=nn, name="Actor") self.model = get_model([None, n_features]) self.model.train() - # self.acts_logits = n.outputs - # self.acts_prob = tf.nn.softmax(self.acts_logits) - - # Hao Dong - # with tf.variable_scope('loss'): - # self.exp_v = tl.rein.cross_entropy_reward_loss( - # logits=self.acts_logits, actions=self.a, rewards=self.td_error, name='actor_weighted_loss' - # ) - - # with tf.variable_scope('train'): - # self.train_op = tf.train.AdamOptimizer(lr).minimize(self.exp_v) self.optimizer = tf.optimizers.Adam(lr) - # Morvan Zhou (the same) - # with tf.variable_scope('exp_v'): - # # log_prob = tf.log(self.acts_prob[0, self.a[0]]) - # # self.exp_v = tf.reduce_mean(log_prob * self.td_error[0]) # advantage (TD_error) guided loss - # self.exp_v = tl.rein.log_weight(probs=self.acts_prob[0, self.a[0]], trainable_weights=self.td_error) - # - # with tf.variable_scope('train'): - # self.train_op = tf.train.AdamOptimizer(lr).minimize(-self.exp_v) # minimize(-exp_v) = maximize(exp_v) def learn(self, s, a, td): - # _, exp_v = self.sess.run([self.train_op, self.exp_v], {self.s: [s], self.a: [a], self.td_error: td[0]}) with tf.GradientTape() as tape: - - # _logits = self.model([s]).outputs _logits = self.model(np.array([s])) - # _probs = tf.nn.softmax(_logits) - _exp_v = tl.rein.cross_entropy_reward_loss(logits=_logits, actions=[a], rewards=td[0]) # cross-entropy loss weighted by rewards + ## cross-entropy loss weighted by td-error (advantage), + # the cross-entropy mearsures the difference of two probability distributions: the predicted logits and sampled action distribution, + # then weighted by the td-error: small difference of real and predict actions for large td-error (advantage); and vice versa. + _exp_v = tl.rein.cross_entropy_reward_loss(logits=_logits, actions=[a], rewards=td[0]) grad = tape.gradient(_exp_v, self.model.trainable_weights) self.optimizer.apply_gradients(zip(grad, self.model.trainable_weights)) return _exp_v def choose_action(self, s): - # probs = self.sess.run(self.acts_prob, {self.s: [s]}) # get probabilities of all actions - # _logits = self.model(np.array([s])).outputs _logits = self.model(np.array([s])) _probs = tf.nn.softmax(_logits).numpy() - return tl.rein.choice_action_by_probs(_probs.ravel()) + return tl.rein.choice_action_by_probs(_probs.ravel()) # sample according to probability distribution def choose_action_greedy(self, s): - # probs = self.sess.run(self.acts_prob, {self.s: [s]}) # get probabilities of all actions - _logits = self.model(np.array([s])) + _logits = self.model(np.array([s])) # logits: probability distribution of actions _probs = tf.nn.softmax(_logits).numpy() return np.argmax(_probs.ravel()) @@ -145,17 +106,7 @@ def choose_action_greedy(self, s): class Critic(object): def __init__(self, n_features, lr=0.01): - # self.sess = sess - # self.s = tf.placeholder(tf.float32, [1, n_features], "state") - # self.v_ = tf.placeholder(tf.float32, [1, 1], "v_next") - # self.r = tf.placeholder(tf.float32, None, 'r') - - # with tf.variable_scope('Critic'): # we use Value-function here, not Action-Value-function - # n = InputLayer(self.s, name='in') - # n = DenseLayer(n, n_units=30, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden') - # # n = DenseLayer(n, n_units=5, act=tf.nn.relu, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden2') - # n = DenseLayer(n, n_units=1, act=None, name='V') - # self.v = n.outputs + def get_model(inputs_shape): ni = tl.layers.Input(inputs_shape, name='state') nn = tl.layers.Dense(n_units=30, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden')(ni) @@ -164,21 +115,14 @@ def get_model(inputs_shape): return tl.models.Model(inputs=ni, outputs=nn, name="Critic") self.model = get_model([1, n_features]) self.model.train() - # with tf.variable_scope('squared_TD_error'): - # # TD_error = r + lambd * V(newS) - V(S) - # self.td_error = self.r + LAMBDA * self.v_ - self.v - # self.loss = tf.square(self.td_error) - # with tf.variable_scope('train'): - # self.train_op = tf.train.AdamOptimizer(lr).minimize(self.loss) + self.optimizer = tf.optimizers.Adam(lr) def learn(self, s, r, s_): - # v_ = self.sess.run(self.v, {self.s: [s_]}) v_ = self.model(np.array([s_])) - # td_error, _ = self.sess.run([self.td_error, self.train_op], {self.s: [s], self.v_: v_, self.r: r}) with tf.GradientTape() as tape: v = self.model(np.array([s])) - # TD_error = r + lambd * V(newS) - V(S) + ## TD_error = r + lambd * V(newS) - V(S) td_error = r + LAMBDA * v_ - v loss = tf.square(td_error) grad = tape.gradient(loss, self.model.trainable_weights) @@ -215,7 +159,7 @@ def learn(self, s, r, s_): all_r.append(r) td_error = critic.learn(s, r, s_new) # learn Value-function : gradient = grad[r + lambda * V(s_new) - V(s)] - actor.learn(s, a, td_error) # learn Policy : true_gradient = grad[logPi(s, a) * td_error] + actor.learn(s, a, td_error) # learn Policy : true_gradient = grad[logPi(s, a) * td_error] s = s_new t += 1 diff --git a/examples/reinforcement_learning/tutorial_frozenlake_dqn.py b/examples/reinforcement_learning/tutorial_frozenlake_dqn.py index 8e96a279e..935e3e04b 100644 --- a/examples/reinforcement_learning/tutorial_frozenlake_dqn.py +++ b/examples/reinforcement_learning/tutorial_frozenlake_dqn.py @@ -24,6 +24,10 @@ The episode ends when you reach the goal or fall in a hole. You receive a reward of 1 if you reach the goal, and zero otherwise. + +tensorflow==2.0.0a0 +tensorlayer==2.0.0 + """ import time @@ -33,11 +37,6 @@ import tensorflow as tf import tensorlayer as tl -## enable eager mode -# tf.enable_eager_execution() - - -# tf.logging.set_verbosity(tf.logging.DEBUG) tl.logging.set_verbosity(tl.logging.DEBUG) env = gym.make('FrozenLake-v0') @@ -50,13 +49,8 @@ def to_one_hot(i, n_classes=None): render = False # display the game environment running_reward = None - # tf.reset_default_graph() ## Define Q-network q(a,s) that ouput the rewards of 4 actions by given state, i.e. Action-Value Function. -# 4x4 grid can be represented by one-hot vector with 16 integers. - # inputs = tf.placeholder(shape=[1, 16], dtype=tf.float32) - # net = InputLayer(inputs, name='observation') - # net = DenseLayer(net, 4, act=None, W_init=tf.random_uniform_initializer(0, 0.01), b_init=None, name='q_a_s') - # y = net.outputs # action-value / rewards of 4 actions +# encoding for state: 4x4 grid can be represented by one-hot vector with 16 integers. def get_model(inputs_shape): ni = tl.layers.Input(inputs_shape, name='observation') nn = tl.layers.Dense(4, act=None, W_init=tf.random_uniform_initializer(0, 0.01), b_init=None, name='q_a_s')(ni) @@ -65,13 +59,6 @@ def get_model(inputs_shape): qnetwork.train() train_weights = qnetwork.trainable_weights -# chose action greedily with reward. in Q-Learning, policy is greedy, so we use "max" to select the next action. - # predict = tf.argmax(y, 1) - -## Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values. - # nextQ = tf.placeholder(shape=[1, 4], dtype=tf.float32) - # loss = tl.cost.mean_squared_error(nextQ, y, is_mean=False) # tf.reduce_sum(tf.square(nextQ - y)) - # train_op = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(loss) optimizer = tf.optimizers.SGD(learning_rate=0.1) ## Set learning parameters @@ -79,8 +66,6 @@ def get_model(inputs_shape): e = 0.1 # e-Greedy Exploration, the larger the more random num_episodes = 10000 -# with tf.Session() as sess: - # tl.layers.initialize_global_variables(sess) for i in range(num_episodes): ## Reset environment and get first new observation episode_time = time.time() @@ -89,7 +74,6 @@ def get_model(inputs_shape): for j in range(99): # step index, maximum step is 99 if render: env.render() ## Choose an action by greedily (with e chance of random action) from the Q-network - # a, allQ = sess.run([predict, y], feed_dict={inputs: [to_one_hot(s, 16)]}) allQ = qnetwork(np.asarray([to_one_hot(s, 16)], dtype=np.float32)).numpy() a = np.argmax(allQ, 1) @@ -99,7 +83,6 @@ def get_model(inputs_shape): ## Get new state and reward from environment s1, r, d, _ = env.step(a[0]) ## Obtain the Q' values by feeding the new state through our network - # Q1 = sess.run(y, feed_dict={inputs: [to_one_hot(s1, 16)]}) Q1 = qnetwork(np.asarray([to_one_hot(s1, 16)], dtype=np.float32)).numpy() ## Obtain maxQ' and set our target value for chosen action. @@ -110,9 +93,7 @@ def get_model(inputs_shape): # it is not real target Q value, it is just an estimation, # but check the Q-Learning update formula: # Q'(s,a) <- Q(s,a) + alpha(r + lambd * maxQ(s',a') - Q(s, a)) - # minimizing |r + lambd * maxQ(s',a') - Q(s, a)|^2 equal to force - # Q'(s,a) ≈ Q(s,a) - # _ = sess.run(train_op, {inputs: [to_one_hot(s, 16)], nextQ: targetQ}) + # minimizing |r + lambd * maxQ(s',a') - Q(s, a)|^2 equals to force Q'(s,a) ≈ Q(s,a) with tf.GradientTape() as tape: _qvalues = qnetwork(np.asarray([to_one_hot(s, 16)], dtype=np.float32)) _loss = tl.cost.mean_squared_error(targetQ, _qvalues, is_mean=False) @@ -128,5 +109,5 @@ def get_model(inputs_shape): ## Note that, the rewards here with random action running_reward = rAll if running_reward is None else running_reward * 0.99 + rAll * 0.01 - print("Episode [%d/%d] sum reward: %f running reward: %f took: %.5fs %s" % \ - (i, num_episodes, rAll, running_reward, time.time() - episode_time, '' if rAll == 0 else ' !!!!!!!!')) + print("Episode [%d/%d] sum reward: %f running reward: %f took: %.5fs " % \ + (i, num_episodes, rAll, running_reward, time.time() - episode_time)) diff --git a/examples/reinforcement_learning/tutorial_frozenlake_q_table.py b/examples/reinforcement_learning/tutorial_frozenlake_q_table.py index a5b44059a..a8decb273 100644 --- a/examples/reinforcement_learning/tutorial_frozenlake_q_table.py +++ b/examples/reinforcement_learning/tutorial_frozenlake_q_table.py @@ -11,6 +11,9 @@ EN: https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-0-q-learning-with-tables-and-neural-networks-d195264329d0#.5m3361vlw CN: https://zhuanlan.zhihu.com/p/25710327 +tensorflow==2.0.0a0 +tensorlayer==2.0.0 + """ import time @@ -52,7 +55,7 @@ break rList.append(rAll) running_reward = r if running_reward is None else running_reward * 0.99 + r * 0.01 - print("Episode [%d/%d] sum reward: %f running reward: %f took: %.5fs %s" % \ - (i, num_episodes, rAll, running_reward, time.time() - episode_time, '' if rAll == 0 else ' !!!!!!!!')) + print("Episode [%d/%d] sum reward: %f running reward: %f took: %.5fs " % \ + (i, num_episodes, rAll, running_reward, time.time() - episode_time)) print("Final Q-Table Values:/n %s" % Q) From e0e903b178ef0c5029b17d64fe3d8c3506e6052a Mon Sep 17 00:00:00 2001 From: quantumiracle <1402434478@qq.com> Date: Tue, 14 May 2019 12:33:39 +0100 Subject: [PATCH 3/4] change log --- CHANGELOG.md | 457 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 457 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000..54f3d153f --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,457 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/) +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + + + + + +## [Unreleased] + +### Added + - Layer + - `InstanceNorm`, `InstanceNorm1d`, `InstanceNorm2d`, `InstanceNorm3d` (PR #963) + +### Changed +- remove `tl.layers.initialize_global_variables(sess)` (PR #931) +- change `tl.layers.core`, `tl.models.core` (PR #966) +- change `weights` into `all_weights`, `trainable_weights`, `nontrainable_weights` +- change reinforcement learning examples from TL1 to TL2 (PR #970) + +### Dependencies Update +- nltk>=3.3,<3.4 => nltk>=3.3,<3.5 (PR #892) +- pytest>=3.6,<3.11 => pytest>=3.6,<4.1 (PR #889) +- yapf>=0.22,<0.25 => yapf==0.25.0 (PR #896) +- imageio==2.5.0 progressbar2==3.39.3 scikit-learn==0.21.0 scikit-image==0.15.0 scipy==1.2.1 wrapt==1.11.1 pymongo==3.8.0 sphinx==2.0.1 wrapt==1.11.1 opencv-python==4.1.0.25 requests==2.21.0 tqdm==4.31.1 lxml==4.3.3 pycodestyle==2.5.0 sphinx==2.0.1 yapf==0.27.0(PR #967) + +### Deprecated + +### Fixed +- fix docs of models @zsdonghao #957 +- In `BatchNorm`, keep dimensions of mean and variance to suit `channels first` (PR #963) + + +### Removed + +### Security + +### Contributors +- @zsdonghao: #931 +- @yd-yin: #963 + + +## [2.0.0-alpha] - 2019-05-04 + +### Changed +* update for TensorLayer 2.0.0 alpha version (PR #952) +* support TensorFlow 2.0.0-alpha +* support both static and dynamic model building + +### Dependencies Update +- tensorflow>=1.6,<1.13 => tensorflow>=2.0.0-alpha (PR #952) +- h5py>=2.9 (PR #952) +- cloudpickle>=0.8.1 (PR #952) +- remove matplotlib + +### Contributors +- @zsdonghao +- @JingqingZ +- @ChrisWu1997 +- @warshallrho + +## [1.11.1] - 2018-11-15 + +### Changed +* guide for pose estimation - flipping (PR #884) +* cv2 transform support 2 modes (PR #885) + +### Dependencies Update +- pytest>=3.6,<3.9 => pytest>=3.6,<3.10 (PR #874) +- requests>=2.19,<2.20 => requests>=2.19,<2.21 (PR #874) +- tqdm>=4.23,<4.28 => tqdm>=4.23,<4.29 (PR #878) +- pytest>=3.6,<3.10 => pytest>=3.6,<3.11 (PR #886) +- pytest-xdist>=1.22,<1.24 => pytest-xdist>=1.22,<1.25 (PR #883) +- tensorflow>=1.6,<1.12 => tensorflow>=1.6,<1.13 (PR #886) + +### Contributors +- @zsdonghao: #884 #885 + +## [1.11.0] - 2018-10-18 + +### Added +- Layer: + - Release `GroupNormLayer` (PR #850) +- Image affine transformation APIs + - `affine_rotation_matrix` (PR #857) + - `affine_horizontal_flip_matrix` (PR #857) + - `affine_vertical_flip_matrix` (PR #857) + - `affine_shift_matrix` (PR #857) + - `affine_shear_matrix` (PR #857) + - `affine_zoom_matrix` (PR #857) + - `affine_transform_cv2` (PR #857) + - `affine_transform_keypoints` (PR #857) +- Affine transformation tutorial + - `examples/data_process/tutorial_fast_affine_transform.py` (PR #857) + +### Changed +- BatchNormLayer: support `data_format` + +### Dependencies Update +- matplotlib>=2.2,<2.3 => matplotlib>=2.2,<3.1 (PR #845) +- pydocstyle>=2.1,<2.2 => pydocstyle>=2.1,<3.1 (PR #866) +- scikit-learn>=0.19,<0.20 => scikit-learn>=0.19,<0.21 (PR #851) +- sphinx>=1.7,<1.8 => sphinx>=1.7,<1.9 (PR #842) +- tensorflow>=1.6,<1.11 => tensorflow>=1.6,<1.12 (PR #853) +- tqdm>=4.23,<4.26 => tqdm>=4.23,<4.28 (PR #862 & #868) +- yapf>=0.22,<0.24 => yapf>=0.22,<0.25 (PR #829) + +### Fixed +- Correct offset calculation in `tl.prepro.transform_matrix_offset_center` (PR #855) + +### Contributors +- @2wins: #850 #855 +- @DEKHTIARJonathan: #853 +- @zsdonghao: #857 +- @luomai: #857 + +## [1.10.1] - 2018-09-07 + +### Added +- unittest `tests\test_timeout.py` has been added to ensure the network creation process does not freeze. + +### Changed + - remove 'tensorboard' param, replaced by 'tensorboard_dir' in `tensorlayer/utils.py` with customizable tensorboard directory (PR #819) + +### Removed +- TL Graph API removed. Memory Leaks Issues with this API, will be fixed and integrated in TL 2.0 (PR #818) + +### Fixed +- Issue #817 fixed: TL 1.10.0 - Memory Leaks and very slow network creation. + +### Dependencies Update +- autopep8>=1.3,<1.4 => autopep8>=1.3,<1.5 (PR #815) +- imageio>=2.3,<2.4 => imageio>=2.3,<2.5 (PR #823) +- pytest>=3.6,<3.8 => pytest>=3.6,<3.9 (PR #823) +- pytest-cov>=2.5,<2.6 => pytest-cov>=2.5,<2.7 (PR #820) + +### Contributors +- @DEKHTIARJonathan: #815 #818 #820 #823 +- @ndiy: #819 +- @zsdonghao: #818 + + +## [1.10.0] - 2018-09-02 + +### Added +- API: + - Add `tl.model.vgg19` (PR #698) + - Add `tl.logging.contrib.hyperdash` (PR #739) + - Add `tl.distributed.trainer` (PR #700) + - Add `prefetch_buffer_size` to the `tl.distributed.trainer` (PR #766) + - Add `tl.db.TensorHub` (PR #751) + - Add `tl.files.save_graph` (PR #751) + - Add `tl.files.load_graph_` (PR #751) + - Add `tl.files.save_graph_and_params` (PR #751) + - Add `tl.files.load_graph_and_params` (PR #751) + - Add `tl.prepro.keypoint_random_xxx` (PR #787) +- Documentation: + - Add binary, ternary and dorefa links (PR #711) + - Update input scale of VGG16 and VGG19 to 0~1 (PR #736) + - Update database (PR #751) +- Layer: + - Release SwitchNormLayer (PR #737) + - Release QuanConv2d, QuanConv2dWithBN, QuanDenseLayer, QuanDenseLayerWithBN (PR#735) + - Update Core Layer to support graph (PR #751) + - All Pooling layers support `data_format` (PR #809) +- Setup: + - Creation of installation flaggs `all_dev`, `all_cpu_dev`, and `all_gpu_dev` (PR #739) +- Examples: + - change folder struction (PR #802) + - `tutorial_models_vgg19` has been introduced to show how to use `tl.model.vgg19` (PR #698). + - fix bug of `tutorial_bipedalwalker_a3c_continuous_action.py` (PR #734, Issue #732) + - `tutorial_models_vgg16` and `tutorial_models_vgg19` has been changed the input scale from [0,255] to [0,1](PR #710) + - `tutorial_mnist_distributed_trainer.py` and `tutorial_cifar10_distributed_trainer.py` are added to explain the uses of Distributed Trainer (PR #700) + - add `tutorial_quanconv_cifar10.py` and `tutorial_quanconv_mnist.py` (PR #735) + - add `tutorial_work_with_onnx.py`(PR #775) +- Applications: + - [Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization](https://arxiv.org/abs/1703.06868) (PR #799) + +### Changed + - function minibatches changed to avoid wasting samples.(PR #762) + - all the input scale in both vgg16 and vgg19 has been changed the input scale from [0,255] to [0,1](PR #710) + - Dockerfiles merged and refactored into one file (PR #747) + - LazyImports move to the most **top level** imports as possible (PR #739) + - some new test functions have been added in `test_layers_convolution.py`, `test_layers_normalization.py`, `test_layers_core.py` (PR #735) + - documentation now uses mock imports reducing the number of dependencies to compile the documentation (PR #785) + - fixed and enforced pydocstyle D210, D200, D301, D207, D403, D204, D412, D402, D300, D208 (PR #784) + +### Deprecated + - `tl.logging.warn` has been deprecated in favor of `tl.logging.warning` (PR #739) + +### Removed + - `conv_layers()` has been removed in both vgg16 and vgg19(PR #710) + - graph API (PR #818) + +### Fixed +- import error caused by matplotlib on OSX (PR #705) +- missing import in tl.prepro (PR #712) +- Dockerfiles import error fixed - issue #733 (PR #747) +- Fix a typo in `absolute_difference_error` in file: `tensorlayer/cost.py` - Issue #753 (PR #759) +- Fix the bug of scaling the learning rate of trainer (PR #776) +- log error instead of info when npz file not found. (PR #812) + +### Dependencies Update +- numpy>=1.14,<1.15 => numpy>=1.14,<1.16 (PR #754) +- pymongo>=3.6,<3.7 => pymongo>=3.6,<3.8 (PR #750) +- pytest>=3.6,<3.7 => tqdm>=3.6,<3.8 (PR #798) +- pytest-xdist>=1.22,<1.23 => pytest-xdist>=1.22,<1.24 (PR #805 and #806) +- tensorflow>=1.8,<1.9 => tensorflow>=1.6,<1.11 (PR #739 and PR #798) +- tqdm>=4.23,<4.25 => tqdm>=4.23,<4.26 (PR #798) +- yapf>=0.21,<0.22 => yapf>=0.22,<0.24 (PR #798 #808) + +### Contributors +- @DEKHTIARJonathan: #739 #747 #750 #754 +- @lgarithm: #705 #700 +- @OwenLiuzZ: #698 #710 #775 #776 +- @zsdonghao: #711 #712 #734 #736 #737 #700 #751 #809 #818 +- @luomai: #700 #751 #766 #802 +- @XJTUWYD: #735 +- @mutewall: #735 +- @thangvubk: #759 +- @JunbinWang: #796 +- @boldjoel: #787 + +## [1.9.1] - 2018-07-30 + +### Fixed +- Issue with tensorflow 1.10.0 fixed + +## [1.9.0] - 2018-06-16 + +### Added +- API: + - `tl.alphas` and `tl.alphas_like` added following the tf.ones/zeros and tf.zeros_like/ones_like (PR #580) + - `tl.lazy_imports.LazyImport` to import heavy libraries only when necessary (PR #667) + - `tl.act.leaky_relu6` and `tl.layers.PRelu6Layer` have been deprecated (PR #686) + - `tl.act.leaky_twice_relu6` and `tl.layers.PTRelu6Layer` have been deprecated (PR #686) +- CI Tool: + - [Stale Probot](https://github.com/probot/stale) added to clean stale issues (PR #573) + - [Changelog Probot](https://github.com/mikz/probot-changelog) Configuration added (PR #637) + - Travis Builds now handling a matrix of TF Version from TF==1.6.0 to TF==1.8.0 (PR #644) + - CircleCI added to build and upload Docker Containers for each PR merged and tag release (PR #648) +- Decorator: + - `tl.decorators` API created including `deprecated_alias` and `private_method` (PR #660) + - `tl.decorators` API enriched with `protected_method` (PR #675) + - `tl.decorators` API enriched with `deprecated` directly raising warning and modifying documentation (PR #691) +- Docker: + - Containers for each release and for each PR merged on master built (PR #648) + - Containers built in the following configurations (PR #648): + - py2 + cpu + - py2 + gpu + - py3 + cpu + - py3 + gpu +- Documentation: + - Clean README.md (PR #677) + - Release semantic version added on index page (PR #633) + - Optimizers page added (PR #636) + - `AMSGrad` added on Optimizers page added (PR #636) +- Layer: + - ElementwiseLambdaLayer added to use custom function to connect multiple layer inputs (PR #579) + - AtrousDeConv2dLayer added (PR #662) + - Fix bugs of using `tf.layers` in CNN (PR #686) +- Optimizer: + + - AMSGrad Optimizer added based on `On the Convergence of Adam and Beyond (ICLR 2018)` (PR #636) +- Setup: + + - Creation of installation flaggs `all`, `all_cpu`, and `all_gpu` (PR #660) +- Test: + - `test_utils_predict.py` added to reproduce and fix issue #288 (PR #566) + - `Layer_DeformableConvolution_Test` added to reproduce issue #572 with deformable convolution (PR #573) + - `Array_Op_Alphas_Test` and `Array_Op_Alphas_Like_Test` added to test `tensorlayer/array_ops.py` file (PR #580) + - `test_optimizer_amsgrad.py` added to test `AMSGrad` optimizer (PR #636) + - `test_logging.py` added to insure robustness of the logging API (PR #645) + - `test_decorators.py` added (PR #660) + - `test_activations.py` added (PR #686) +- Tutorials: + - `tutorial_tfslim` has been introduced to show how to use `SlimNetsLayer` (PR #560). + - add the following to all tutorials (PR #697): + ```python + tf.logging.set_verbosity(tf.logging.DEBUG) + tl.logging.set_verbosity(tl.logging.DEBUG) + ``` + +### Changed +- Tensorflow CPU & GPU dependencies moved to separated requirement files in order to allow PyUP.io to parse them (PR #573) +- The document of LambdaLayer for linking it with ElementwiseLambdaLayer (PR #587) +- RTD links point to stable documentation instead of latest used for development (PR #633) +- TF Version older than 1.6.0 are officially unsupported and raises an exception (PR #644) +- README.md Badges Updated with Support Python and Tensorflow Versions (PR #644) +- TL logging API has been consistent with TF logging API and thread-safe (PR #645) +- Relative Imports changed for absolute imports (PR #657) +- `tl.files` refactored into a directory with numerous files (PR #657) +- `tl.files.voc_dataset` fixed because of original Pascal VOC website was down (PR #657) +- extra requirements hidden inside the library added in the project requirements (PR #657) +- requirements files refactored in `requirements/` directory (PR #657) +- README.md and other markdown files have been refactored and cleaned. (PR #639) +- Ternary Convolution Layer added in unittest (PR #658) +- Convolution Layers unittests have been cleaned & refactored (PR #658) +- All the tests are now using a DEBUG level verbosity when run individualy (PR #660) +- `tf.identity` as activation is **ignored**, thus reducing the size of the graph by removing useless operation (PR #667) +- argument dictionaries are now checked and saved within the `Layer` Base Class (PR #667) +- `Layer` Base Class now presenting methods to update faultlessly `all_layers`, `all_params`, and `all_drop` (PR #675) +- Input Layers have been removed from `tl.layers.core` and added to `tl.layers.inputs` (PR #675) +- Input Layers are now considered as true layers in the graph (they represent a placeholder), unittests have been updated (PR #675) +- Layer API is simplified, with automatic feeding `prev_layer` into `self.inputs` (PR #675) +- Complete Documentation Refactoring and Reorganization (namely Layer APIs) (PR #691) + +### Deprecated +- `tl.layers.TimeDistributedLayer` argurment `args` is deprecated in favor of `layer_args` (PR #667) +- `tl.act.leaky_relu` have been deprecated in favor of `tf.nn.leaky_relu` (PR #686) + +### Removed +- `assert()` calls remove and replaced by `raise AssertionError()` (PR #667) +- `tl.identity` is removed, not used anymore and deprecated for a long time (PR #667) +- All Code specific to `TF.__version__ < "1.6"` have been removed (PR #675) + +### Fixed +- Issue #498 - Deprecation Warning Fix in `tl.layers.RNNLayer` with `inspect` (PR #574) +- Issue #498 - Deprecation Warning Fix in `tl.files` with truth value of an empty array is ambiguous (PR #575) +- Issue #565 related to `tl.utils.predict` fixed - `np.hstack` problem in which the results for multiple batches are stacked along `axis=1` (PR #566) +- Issue #572 with `tl.layers.DeformableConv2d` fixed (PR #573) +- Issue #664 with `tl.layers.ConvLSTMLayer` fixed (PR #676) +- Typo of the document of ElementwiseLambdaLayer (PR #588) +- Error in `tl.layers.TernaryConv2d` fixed - self.inputs not defined (PR #658) +- Deprecation warning fixed in `tl.layers.binary._compute_threshold()` (PR #658) +- All references to `tf.logging` replaced by `tl.logging` (PR #661) +- Duplicated code removed when bias was used (PR #667) +- `tensorlayer.third_party.roi_pooling.roi_pooling.roi_pooling_ops` is now lazy loaded to prevent systematic error raised (PR #675) +- Documentation not build in RTD due to old version of theme in docs directory fixed (PR #703) +- Tutorial: + - `tutorial_word2vec_basic.py` saving issue #476 fixed (PR #635) + - All tutorials tested and errors have been fixed (PR #635) + +### Dependencies Update +- Update pytest from 3.5.1 to 3.6.0 (PR #647) +- Update progressbar2 from 3.37.1 to 3.38.0 (PR #651) +- Update scikit-image from 0.13.1 to 0.14.0 (PR #656) +- Update keras from 2.1.6 to 2.2.0 (PR #684) +- Update requests from 2.18.4 to 2.19.0 (PR #695) + +### Contributors +- @lgarithm: #563 +- @DEKHTIARJonathan: #573 #574 #575 #580 #633 #635 #636 #639 #644 #645 #648 #657 #667 #658 #659 #660 #661 #666 #667 #672 #675 #683 #686 #687 #690 #691 #692 #703 +- @2wins: #560 #566 #662 +- @One-sixth: #579 +- @zsdonghao: #587 #588 #639 #685 #697 +- @luomai: #639 #677 +- @dengyueyun666: #676 + +## [1.8.5] - 2018-05-09 + +### Added +- Github Templates added (by @DEKHTIARJonathan) + - New issues Template + - New PR Template +- Travis Deploy Automation on new Tag (by @DEKHTIARJonathan) + - Deploy to PyPI and create a new version. + - Deploy to Github Releases and upload the wheel files +- PyUP.io has been added to ensure we are compatible with the latest libraries (by @DEKHTIARJonathan) +- `deconv2d` now handling dilation_rate (by @zsdonghao) +- Documentation unittest added (by @DEKHTIARJonathan) +- `test_layers_core` has been added to ensure that `LayersConfig` is abstract. + +### Changed +- All Tests Refactored - Now using unittests and runned with PyTest (by @DEKHTIARJonathan) +- Documentation updated (by @zsdonghao) +- Package Setup Refactored (by @DEKHTIARJonathan) +- Dataset Downlaod now using library progressbar2 (by @DEKHTIARJonathan) +- `deconv2d` function transformed into Class (by @zsdonghao) +- `conv1d` function transformed into Class (by @zsdonghao) +- super resolution functions transformed into Class (by @zsdonghao) +- YAPF coding style improved and enforced (by @DEKHTIARJonathan) + +### Fixed +- Backward Compatibility Restored with deprecation warnings (by @DEKHTIARJonathan) +- Tensorflow Deprecation Fix (Issue #498): + - AverageEmbeddingInputlayer (by @zsdonghao) + - load_mpii_pose_dataset (by @zsdonghao) +- maxPool2D initializer issue #551 (by @zsdonghao) +- `LayersConfig` class has been enforced as abstract +- Pooling Layer Issue #557 fixed (by @zsdonghao) + +### Dependencies Update +- scipy>=1.0,<1.1 => scipy>=1.1,<1.2 + +### Contributors +@zsdonghao @luomai @DEKHTIARJonathan + +[Unreleased]: https://github.com/tensorlayer/tensorlayer/compare/1.11....master +[1.11.1]: https://github.com/tensorlayer/tensorlayer/compare/1.11.0...1.11.0 +[1.11.0]: https://github.com/tensorlayer/tensorlayer/compare/1.10.1...1.11.0 +[1.10.1]: https://github.com/tensorlayer/tensorlayer/compare/1.10.0...1.10.1 +[1.10.0]: https://github.com/tensorlayer/tensorlayer/compare/1.9.1...1.10.0 +[1.9.1]: https://github.com/tensorlayer/tensorlayer/compare/1.9.0...1.9.1 +[1.9.0]: https://github.com/tensorlayer/tensorlayer/compare/1.8.5...1.9.0 +[1.8.5]: https://github.com/tensorlayer/tensorlayer/compare/1.8.4...1.8.5 \ No newline at end of file From e5f75932957845b1f50e65427c85e459a44a6f37 Mon Sep 17 00:00:00 2001 From: quantumiracle <1402434478@qq.com> Date: Tue, 14 May 2019 12:39:20 +0100 Subject: [PATCH 4/4] change log add name --- CHANGELOG.md | 3 +- .../tutorial_atari_pong.py | 10 +++- ...ial_bipedalwalker_a3c_continuous_action.py | 57 ++++++++++--------- .../tutorial_cartpole_ac.py | 28 ++++++--- .../tutorial_frozenlake_dqn.py | 5 ++ 5 files changed, 64 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 54f3d153f..c6aee1b4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -77,7 +77,7 @@ To release a new version, please update the changelog as followed: - remove `tl.layers.initialize_global_variables(sess)` (PR #931) - change `tl.layers.core`, `tl.models.core` (PR #966) - change `weights` into `all_weights`, `trainable_weights`, `nontrainable_weights` -- change reinforcement learning examples from TL1 to TL2 (PR #970) +- change reinforcement learning examples from TL1 to TL2 (PR #969) ### Dependencies Update - nltk>=3.3,<3.4 => nltk>=3.3,<3.5 (PR #892) @@ -99,6 +99,7 @@ To release a new version, please update the changelog as followed: ### Contributors - @zsdonghao: #931 - @yd-yin: #963 +- @quantumiracle: #969 ## [2.0.0-alpha] - 2019-05-04 diff --git a/examples/reinforcement_learning/tutorial_atari_pong.py b/examples/reinforcement_learning/tutorial_atari_pong.py index 7e1b28822..0ffee9174 100644 --- a/examples/reinforcement_learning/tutorial_atari_pong.py +++ b/examples/reinforcement_learning/tutorial_atari_pong.py @@ -68,6 +68,8 @@ def prepro(I): episode_number = 0 xs, ys, rs = [], [], [] + + # policy network def get_model(inputs_shape): ni = tl.layers.Input(inputs_shape) @@ -75,12 +77,14 @@ def get_model(inputs_shape): nn = tl.layers.Dense(n_units=3, name='output')(nn) M = tl.models.Model(inputs=ni, outputs=nn, name="mlp") return M + + model = get_model([None, D]) train_weights = model.trainable_weights optimizer = tf.optimizers.RMSprop(lr=learning_rate, decay=decay_rate) -model.train() # set model to train mode (in case you add dropout into the model) +model.train() # set model to train mode (in case you add dropout into the model) start_time = time.time() game_number = 0 @@ -97,8 +101,8 @@ def get_model(inputs_shape): prob = tf.nn.softmax(_prob) # action. 1: STOP 2: UP 3: DOWN - # action = np.random.choice([1,2,3], p=prob.flatten()) - # action = tl.rein.choice_action_by_probs(prob.flatten(), [1, 2, 3]) + # action = np.random.choice([1,2,3], p=prob.flatten()) + # action = tl.rein.choice_action_by_probs(prob.flatten(), [1, 2, 3]) action = tl.rein.choice_action_by_probs(prob[0].numpy(), [1, 2, 3]) observation, reward, done, _ = env.step(action) diff --git a/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py b/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py index 796ef9d74..2ad7f229a 100644 --- a/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py +++ b/examples/reinforcement_learning/tutorial_bipedalwalker_a3c_continuous_action.py @@ -51,7 +51,6 @@ tfd = tfp.distributions - tl.logging.set_verbosity(tl.logging.DEBUG) np.random.seed(2) @@ -85,11 +84,12 @@ class ACNet(object): - def __init__(self, scope, globalAC=None): + def __init__(self, scope, globalAC=None): self.scope = scope self.save_path = './model' w_init = tf.keras.initializers.glorot_normal(seed=None) # initializer, glorot=xavier + def get_actor(input_shape): # policy network with tf.name_scope(self.scope): ni = tl.layers.Input(input_shape, name='in') @@ -97,21 +97,26 @@ def get_actor(input_shape): # policy network nn = tl.layers.Dense(n_units=300, act=tf.nn.relu6, W_init=w_init, name='la2')(nn) mu = tl.layers.Dense(n_units=N_A, act=tf.nn.tanh, W_init=w_init, name='mu')(nn) sigma = tl.layers.Dense(n_units=N_A, act=tf.nn.softplus, W_init=w_init, name='sigma')(nn) - return tl.models.Model(inputs=ni, outputs=[mu, sigma], name=scope+'/Actor') - self.actor = get_actor( [None, N_S]) + return tl.models.Model(inputs=ni, outputs=[mu, sigma], name=scope + '/Actor') + + self.actor = get_actor([None, N_S]) self.actor.train() # train mode for Dropout, BatchNorm - def get_critic(input_shape): # we use Value-function here, but not Q-function. + + def get_critic(input_shape): # we use Value-function here, but not Q-function. with tf.name_scope(self.scope): ni = tl.layers.Input(input_shape, name='in') nn = tl.layers.Dense(n_units=500, act=tf.nn.relu6, W_init=w_init, name='lc')(ni) nn = tl.layers.Dense(n_units=300, act=tf.nn.relu6, W_init=w_init, name='lc2')(nn) v = tl.layers.Dense(n_units=1, W_init=w_init, name='v')(nn) - return tl.models.Model(inputs=ni, outputs=v, name=scope+'/Critic') - self.critic = get_critic( [None, N_S]) - self.critic.train() # train mode for Dropout, BatchNorm - - @tf.function # convert numpy functions to tf.Operations in the TFgraph, return tensor - def update_global(self, buffer_s, buffer_a, buffer_v_target, globalAC): # refer to the global Actor-Crtic network for updating it with samples + return tl.models.Model(inputs=ni, outputs=v, name=scope + '/Critic') + + self.critic = get_critic([None, N_S]) + self.critic.train() # train mode for Dropout, BatchNorm + + @tf.function # convert numpy functions to tf.Operations in the TFgraph, return tensor + def update_global( + self, buffer_s, buffer_a, buffer_v_target, globalAC + ): # refer to the global Actor-Crtic network for updating it with samples ''' update the global critic ''' with tf.GradientTape() as tape: self.v = self.critic(buffer_s) @@ -121,7 +126,6 @@ def update_global(self, buffer_s, buffer_a, buffer_v_target, globalAC): # refer self.c_grads = tape.gradient(self.c_loss, self.critic.trainable_weights) OPT_C.apply_gradients(zip(self.c_grads, globalAC.critic.trainable_weights)) # local grads applies to global net # del tape # Drop the reference to the tape - ''' update the global actor ''' with tf.GradientTape() as tape: self.mu, self.sigma = self.actor(buffer_s) @@ -129,7 +133,7 @@ def update_global(self, buffer_s, buffer_a, buffer_v_target, globalAC): # refer self.mu, self.sigma = self.mu * A_BOUND[1], self.sigma + 1e-5 normal_dist = tfd.Normal(self.mu, self.sigma) # no tf.contrib for tf2.0 - self.a_his = buffer_a # float32 + self.a_his = buffer_a # float32 log_prob = normal_dist.log_prob(self.a_his) exp_v = log_prob * td # td is from the critic part, no gradients for it entropy = normal_dist.entropy() # encourage exploration @@ -138,7 +142,7 @@ def update_global(self, buffer_s, buffer_a, buffer_v_target, globalAC): # refer self.a_grads = tape.gradient(self.a_loss, self.actor.trainable_weights) OPT_A.apply_gradients(zip(self.a_grads, globalAC.actor.trainable_weights)) # local grads applies to global net return self.test # for test purpose - + @tf.function def pull_global(self, globalAC): # run by a local, pull weights from the global nets for l_p, g_p in zip(self.actor.trainable_weights, globalAC.actor.trainable_weights): @@ -152,18 +156,19 @@ def choose_action(self, s): # run by a local with tf.name_scope('wrap_a_out'): self.mu, self.sigma = self.mu * A_BOUND[1], self.sigma + 1e-5 - normal_dist = tfd.Normal(self.mu, self.sigma) # for continuous action space + normal_dist = tfd.Normal(self.mu, self.sigma) # for continuous action space self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND) return self.A.numpy()[0] - def save_ckpt(self): # save trained weights + def save_ckpt(self): # save trained weights tl.files.save_npz(self.actor.trainable_weights, name='model_actor.npz') tl.files.save_npz(self.critic.trainable_weights, name='model_critic.npz') - def load_ckpt(self): # load trained weights + def load_ckpt(self): # load trained weights tl.files.load_and_assign_npz(name='model_actor.npz', network=self.actor) tl.files.load_and_assign_npz(name='model_critic.npz', network=self.critic) + class Worker(object): def __init__(self, name, globalAC): @@ -183,11 +188,11 @@ def work(self, globalAC): # visualize Worker_0 during training if self.name == 'Worker_0' and total_step % 30 == 0: self.env.render() - s = s.astype('float32') # double to float - a = self.AC.choose_action(s) + s = s.astype('float32') # double to float + a = self.AC.choose_action(s) s_, r, done, _info = self.env.step(a) - - s_ = s_.astype('float32') # double to float + + s_ = s_.astype('float32') # double to float # set robot falls reward to -2 instead of -100 if r == -100: r = -2 @@ -201,7 +206,7 @@ def work(self, globalAC): if done: v_s_ = 0 # terminal else: - v_s_ = self.AC.critic(s_[np.newaxis, :])[0,0] # reduce dim from 2 to 0 + v_s_ = self.AC.critic(s_[np.newaxis, :])[0, 0] # reduce dim from 2 to 0 buffer_v_target = [] @@ -210,7 +215,7 @@ def work(self, globalAC): buffer_v_target.append(v_s_) buffer_v_target.reverse() - + buffer_s, buffer_a, buffer_v_target = ( np.vstack(buffer_s), np.vstack(buffer_a), np.vstack(buffer_v_target) ) @@ -245,7 +250,7 @@ def work(self, globalAC): if __name__ == "__main__": # ============================= TRAINING =============================== with tf.device("/cpu:0"): - + OPT_A = tf.optimizers.RMSprop(LR_A, name='RMSPropA') OPT_C = tf.optimizers.RMSprop(LR_C, name='RMSPropC') @@ -263,7 +268,7 @@ def work(self, globalAC): for worker in workers: # t = threading.Thread(target=worker.work) job = lambda: worker.work(GLOBAL_AC) - t = threading.Thread(target=job) + t = threading.Thread(target=job) t.start() worker_threads.append(t) COORD.join(worker_threads) @@ -285,7 +290,7 @@ def work(self, globalAC): rall = 0 while True: env.render() - s = s.astype('float32') # double to float + s = s.astype('float32') # double to float a = GLOBAL_AC.choose_action(s) s, r, d, _ = env.step(a) rall += r diff --git a/examples/reinforcement_learning/tutorial_cartpole_ac.py b/examples/reinforcement_learning/tutorial_cartpole_ac.py index 19faf05eb..8b7aee5a5 100644 --- a/examples/reinforcement_learning/tutorial_cartpole_ac.py +++ b/examples/reinforcement_learning/tutorial_cartpole_ac.py @@ -73,10 +73,15 @@ def __init__(self, n_features, n_actions, lr=0.001): def get_model(inputs_shape): ni = tl.layers.Input(inputs_shape, name='state') - nn = tl.layers.Dense(n_units=30, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden')(ni) - nn = tl.layers.Dense(n_units=10, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden2')(nn) + nn = tl.layers.Dense( + n_units=30, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden' + )(ni) + nn = tl.layers.Dense( + n_units=10, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden2' + )(nn) nn = tl.layers.Dense(n_units=n_actions, name='actions')(nn) return tl.models.Model(inputs=ni, outputs=nn, name="Actor") + self.model = get_model([None, n_features]) self.model.train() self.optimizer = tf.optimizers.Adam(lr) @@ -84,10 +89,10 @@ def get_model(inputs_shape): def learn(self, s, a, td): with tf.GradientTape() as tape: _logits = self.model(np.array([s])) - ## cross-entropy loss weighted by td-error (advantage), + ## cross-entropy loss weighted by td-error (advantage), # the cross-entropy mearsures the difference of two probability distributions: the predicted logits and sampled action distribution, - # then weighted by the td-error: small difference of real and predict actions for large td-error (advantage); and vice versa. - _exp_v = tl.rein.cross_entropy_reward_loss(logits=_logits, actions=[a], rewards=td[0]) + # then weighted by the td-error: small difference of real and predict actions for large td-error (advantage); and vice versa. + _exp_v = tl.rein.cross_entropy_reward_loss(logits=_logits, actions=[a], rewards=td[0]) grad = tape.gradient(_exp_v, self.model.trainable_weights) self.optimizer.apply_gradients(zip(grad, self.model.trainable_weights)) return _exp_v @@ -95,7 +100,7 @@ def learn(self, s, a, td): def choose_action(self, s): _logits = self.model(np.array([s])) _probs = tf.nn.softmax(_logits).numpy() - return tl.rein.choice_action_by_probs(_probs.ravel()) # sample according to probability distribution + return tl.rein.choice_action_by_probs(_probs.ravel()) # sample according to probability distribution def choose_action_greedy(self, s): _logits = self.model(np.array([s])) # logits: probability distribution of actions @@ -109,10 +114,15 @@ def __init__(self, n_features, lr=0.01): def get_model(inputs_shape): ni = tl.layers.Input(inputs_shape, name='state') - nn = tl.layers.Dense(n_units=30, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden')(ni) - nn = tl.layers.Dense(n_units=5, act=tf.nn.relu, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden2')(nn) + nn = tl.layers.Dense( + n_units=30, act=tf.nn.relu6, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden' + )(ni) + nn = tl.layers.Dense( + n_units=5, act=tf.nn.relu, W_init=tf.random_uniform_initializer(0, 0.01), name='hidden2' + )(nn) nn = tl.layers.Dense(n_units=1, act=None, name='value')(nn) return tl.models.Model(inputs=ni, outputs=nn, name="Critic") + self.model = get_model([1, n_features]) self.model.train() @@ -130,11 +140,11 @@ def learn(self, s, r, s_): return td_error + actor = Actor(n_features=N_F, n_actions=N_A, lr=LR_A) # we need a good teacher, so the teacher should learn faster than the actor critic = Critic(n_features=N_F, lr=LR_C) - for i_episode in range(MAX_EPISODE): episode_time = time.time() s = env.reset().astype(np.float32) diff --git a/examples/reinforcement_learning/tutorial_frozenlake_dqn.py b/examples/reinforcement_learning/tutorial_frozenlake_dqn.py index 935e3e04b..0fb58be9d 100644 --- a/examples/reinforcement_learning/tutorial_frozenlake_dqn.py +++ b/examples/reinforcement_learning/tutorial_frozenlake_dqn.py @@ -41,20 +41,25 @@ env = gym.make('FrozenLake-v0') + def to_one_hot(i, n_classes=None): a = np.zeros(n_classes, 'uint8') a[i] = 1 return a + render = False # display the game environment running_reward = None + ## Define Q-network q(a,s) that ouput the rewards of 4 actions by given state, i.e. Action-Value Function. # encoding for state: 4x4 grid can be represented by one-hot vector with 16 integers. def get_model(inputs_shape): ni = tl.layers.Input(inputs_shape, name='observation') nn = tl.layers.Dense(4, act=None, W_init=tf.random_uniform_initializer(0, 0.01), b_init=None, name='q_a_s')(ni) return tl.models.Model(inputs=ni, outputs=nn, name="Q-Network") + + qnetwork = get_model([None, 16]) qnetwork.train() train_weights = qnetwork.trainable_weights