Move time_step, policy_step and trajectory to trajectories/

PiperOrigin-RevId: 243322818 Change-Id: I30fb233282c3a45a946fe6ec155b5a49220661ed
tensorflow · Apr 12, 2019 · 4abdace · 4abdace
1 parent 184e87f
commit 4abdace
Show file tree

Hide file tree

Showing 101 changed files with 148 additions and 187 deletions.
diff --git a/tf_agents/agents/behavioral_cloning/behavioral_cloning_agent_test.py b/tf_agents/agents/behavioral_cloning/behavioral_cloning_agent_test.py
@@ -24,13 +24,13 @@
 
 from tf_agents.agents.behavioral_cloning import behavioral_cloning_agent
 from tf_agents.drivers import test_utils as driver_test_utils
-from tf_agents.environments import time_step as ts
-from tf_agents.environments import trajectory
 from tf_agents.environments import trajectory_replay
 from tf_agents.networks import network
 from tf_agents.networks import q_network
 from tf_agents.networks import q_rnn_network
 from tf_agents.specs import tensor_spec
+from tf_agents.trajectories import time_step as ts
+from tf_agents.trajectories import trajectory
 from tf_agents.utils import common
 
 # Number of times to train in test loops.

diff --git a/tf_agents/agents/ddpg/actor_rnn_network.py b/tf_agents/agents/ddpg/actor_rnn_network.py
@@ -18,12 +18,11 @@
 import functools
 import gin
 import tensorflow as tf
-
-from tf_agents.environments import time_step
 from tf_agents.networks import dynamic_unroll_layer
 from tf_agents.networks import network
 from tf_agents.networks import utils
 from tf_agents.specs import tensor_spec
+from tf_agents.trajectories import time_step
 from tf_agents.utils import common
 from tf_agents.utils import nest_utils
 

diff --git a/tf_agents/agents/ddpg/actor_rnn_network_test.py b/tf_agents/agents/ddpg/actor_rnn_network_test.py
@@ -23,9 +23,8 @@
 import tensorflow as tf
 
 from tf_agents.agents.ddpg import actor_rnn_network
-from tf_agents.environments import time_step as ts
 from tf_agents.specs import tensor_spec
-
+from tf_agents.trajectories import time_step as ts
 from tensorflow.python.framework import test_util  # TF internal
 
 

diff --git a/tf_agents/agents/ddpg/critic_rnn_network.py b/tf_agents/agents/ddpg/critic_rnn_network.py
@@ -18,12 +18,11 @@
 import functools
 import gin
 import tensorflow as tf
-
-from tf_agents.environments import time_step
 from tf_agents.networks import dynamic_unroll_layer
 from tf_agents.networks import network
 from tf_agents.networks import utils
 from tf_agents.specs import tensor_spec
+from tf_agents.trajectories import time_step
 from tf_agents.utils import nest_utils
 
 

diff --git a/tf_agents/agents/ddpg/critic_rnn_network_test.py b/tf_agents/agents/ddpg/critic_rnn_network_test.py
@@ -22,9 +22,8 @@
 import tensorflow as tf
 
 from tf_agents.agents.ddpg import critic_rnn_network
-from tf_agents.environments import time_step as ts
 from tf_agents.specs import tensor_spec
-
+from tf_agents.trajectories import time_step as ts
 from tensorflow.python.framework import test_util  # TF internal
 
 

diff --git a/tf_agents/agents/ddpg/ddpg_agent.py b/tf_agents/agents/ddpg/ddpg_agent.py
@@ -28,9 +28,9 @@
 import tensorflow as tf
 
 from tf_agents.agents import tf_agent
-from tf_agents.environments import trajectory
 from tf_agents.policies import actor_policy
 from tf_agents.policies import ou_noise_policy
+from tf_agents.trajectories import trajectory
 from tf_agents.utils import common
 from tf_agents.utils import eager_utils
 from tf_agents.utils import nest_utils

diff --git a/tf_agents/agents/ddpg/ddpg_agent_test.py b/tf_agents/agents/ddpg/ddpg_agent_test.py
@@ -22,9 +22,9 @@
 import tensorflow as tf
 
 from tf_agents.agents.ddpg import ddpg_agent
-from tf_agents.environments import time_step as ts
 from tf_agents.networks import network
 from tf_agents.specs import tensor_spec
+from tf_agents.trajectories import time_step as ts
 from tf_agents.utils import common
 from tf_agents.utils import test_utils
 

diff --git a/tf_agents/agents/dqn/dqn_agent.py b/tf_agents/agents/dqn/dqn_agent.py
@@ -30,11 +30,11 @@
 import tensorflow as tf
 
 from tf_agents.agents import tf_agent
-from tf_agents.environments import trajectory
 from tf_agents.policies import boltzmann_policy
 from tf_agents.policies import epsilon_greedy_policy
 from tf_agents.policies import greedy_policy
 from tf_agents.policies import q_policy
+from tf_agents.trajectories import trajectory
 from tf_agents.utils import common
 from tf_agents.utils import eager_utils
 from tf_agents.utils import nest_utils

diff --git a/tf_agents/agents/dqn/dqn_agent_test.py b/tf_agents/agents/dqn/dqn_agent_test.py
@@ -23,9 +23,9 @@
 import tensorflow as tf
 
 from tf_agents.agents.dqn import dqn_agent
-from tf_agents.environments import time_step as ts
 from tf_agents.networks import network
 from tf_agents.specs import tensor_spec
+from tf_agents.trajectories import time_step as ts
 from tf_agents.utils import common
 
 from tensorflow.python.eager import context  # pylint:disable=g-direct-tensorflow-import  # TF internal

diff --git a/tf_agents/agents/dqn/examples/v1/oog_train_eval.py b/tf_agents/agents/dqn/examples/v1/oog_train_eval.py
@@ -44,15 +44,15 @@
 from tf_agents.agents.dqn import dqn_agent
 from tf_agents.environments import batched_py_environment
 from tf_agents.environments import suite_gym
-from tf_agents.environments import time_step as ts
-from tf_agents.environments import trajectory
 from tf_agents.eval import metric_utils
 from tf_agents.metrics import py_metrics
 from tf_agents.networks import q_network
 from tf_agents.policies import py_tf_policy
 from tf_agents.policies import random_py_policy
 from tf_agents.replay_buffers import py_uniform_replay_buffer
 from tf_agents.specs import tensor_spec
+from tf_agents.trajectories import time_step as ts
+from tf_agents.trajectories import trajectory
 from tf_agents.utils import common
 
 flags.DEFINE_string('root_dir', os.getenv('TEST_UNDECLARED_OUTPUTS_DIR'),

diff --git a/tf_agents/agents/dqn/examples/v1/train_eval_atari.py b/tf_agents/agents/dqn/examples/v1/train_eval_atari.py
@@ -59,18 +59,18 @@
 from tf_agents.agents.dqn import dqn_agent
 from tf_agents.environments import batched_py_environment
 from tf_agents.environments import suite_atari
-from tf_agents.environments import time_step as ts
-from tf_agents.environments import trajectory
 from tf_agents.eval import metric_utils
 from tf_agents.metrics import py_metric
 from tf_agents.metrics import py_metrics
 from tf_agents.networks import q_network
 from tf_agents.policies import epsilon_greedy_policy
-from tf_agents.policies import policy_step
 from tf_agents.policies import py_tf_policy
 from tf_agents.policies import random_py_policy
 from tf_agents.replay_buffers import py_hashed_replay_buffer
 from tf_agents.specs import tensor_spec
+from tf_agents.trajectories import policy_step
+from tf_agents.trajectories import time_step as ts
+from tf_agents.trajectories import trajectory
 from tf_agents.utils import common
 from tf_agents.utils import timer
 import gin.tf

diff --git a/tf_agents/agents/ppo/ppo_agent.py b/tf_agents/agents/ppo/ppo_agent.py
@@ -65,11 +65,11 @@
 from tf_agents.agents import tf_agent
 from tf_agents.agents.ppo import ppo_policy
 from tf_agents.agents.ppo import ppo_utils
-from tf_agents.environments import trajectory
 from tf_agents.networks import network
 from tf_agents.policies import greedy_policy
 from tf_agents.specs import distribution_spec
 from tf_agents.specs import tensor_spec
+from tf_agents.trajectories import trajectory
 from tf_agents.utils import common
 from tf_agents.utils import eager_utils
 from tf_agents.utils import nest_utils

diff --git a/tf_agents/agents/ppo/ppo_agent_test.py b/tf_agents/agents/ppo/ppo_agent_test.py
@@ -28,14 +28,14 @@
 import tensorflow_probability as tfp
 
 from tf_agents.agents.ppo import ppo_agent
-from tf_agents.environments import time_step as ts
-from tf_agents.environments import trajectory
 from tf_agents.networks import actor_distribution_network
 from tf_agents.networks import network
 from tf_agents.networks import utils as network_utils
 from tf_agents.networks import value_network
 from tf_agents.specs import distribution_spec
 from tf_agents.specs import tensor_spec
+from tf_agents.trajectories import time_step as ts
+from tf_agents.trajectories import trajectory
 from tf_agents.utils import common
 from tf_agents.utils import nest_utils
 from tf_agents.utils import test_utils

diff --git a/tf_agents/agents/ppo/ppo_policy.py b/tf_agents/agents/ppo/ppo_policy.py
@@ -23,11 +23,11 @@
 import tensorflow_probability as tfp
 
 from tf_agents.agents.ppo import ppo_utils
-from tf_agents.environments import time_step as ts
 from tf_agents.networks import network
 from tf_agents.policies import actor_policy
-from tf_agents.policies import policy_step
 from tf_agents.specs import distribution_spec
+from tf_agents.trajectories import policy_step
+from tf_agents.trajectories import time_step as ts
 
 tfd = tfp.distributions
 

diff --git a/tf_agents/agents/ppo/ppo_policy_test.py b/tf_agents/agents/ppo/ppo_policy_test.py
@@ -24,10 +24,10 @@
 import tensorflow_probability as tfp
 
 from tf_agents.agents.ppo import ppo_policy
-from tf_agents.environments import time_step as ts
 from tf_agents.networks import network
 from tf_agents.specs import distribution_spec
 from tf_agents.specs import tensor_spec
+from tf_agents.trajectories import time_step as ts
 from tf_agents.utils import test_utils
 
 

diff --git a/tf_agents/agents/ppo/ppo_utils.py b/tf_agents/agents/ppo/ppo_utils.py
@@ -20,9 +20,8 @@
 from __future__ import print_function
 
 import tensorflow as tf
-
-from tf_agents.environments import trajectory
-from tf_agents.policies import policy_step
+from tf_agents.trajectories import policy_step
+from tf_agents.trajectories import trajectory
 
 
 def make_timestep_mask(batched_next_time_step):

diff --git a/tf_agents/agents/ppo/ppo_utils_test.py b/tf_agents/agents/ppo/ppo_utils_test.py
@@ -25,7 +25,7 @@
 import tensorflow_probability as tfp
 
 from tf_agents.agents.ppo import ppo_utils
-from tf_agents.environments import time_step as ts
+from tf_agents.trajectories import time_step as ts
 
 
 class PPOUtilsTest(parameterized.TestCase, tf.test.TestCase):

diff --git a/tf_agents/agents/reinforce/reinforce_agent.py b/tf_agents/agents/reinforce/reinforce_agent.py
@@ -26,9 +26,9 @@
 import tensorflow as tf
 
 from tf_agents.agents import tf_agent
-from tf_agents.environments import time_step as ts
 from tf_agents.policies import actor_policy
 from tf_agents.policies import greedy_policy
+from tf_agents.trajectories import time_step as ts
 from tf_agents.utils import common
 from tf_agents.utils import eager_utils
 from tf_agents.utils import value_ops

diff --git a/tf_agents/agents/reinforce/reinforce_agent_test.py b/tf_agents/agents/reinforce/reinforce_agent_test.py
@@ -24,11 +24,11 @@
 import tensorflow_probability as tfp
 
 from tf_agents.agents.reinforce import reinforce_agent
-from tf_agents.environments import time_step as ts
-from tf_agents.environments import trajectory
 from tf_agents.networks import actor_distribution_rnn_network
 from tf_agents.networks import network
 from tf_agents.specs import tensor_spec
+from tf_agents.trajectories import time_step as ts
+from tf_agents.trajectories import trajectory
 from tf_agents.utils import common
 
 from tensorflow.python.util import nest  # pylint:disable=g-direct-tensorflow-import  # TF internal

diff --git a/tf_agents/agents/sac/sac_agent.py b/tf_agents/agents/sac/sac_agent.py
@@ -28,8 +28,8 @@
 import tensorflow as tf
 
 from tf_agents.agents import tf_agent
-from tf_agents.environments import trajectory
 from tf_agents.policies import actor_policy
+from tf_agents.trajectories import trajectory
 from tf_agents.utils import common
 from tf_agents.utils import eager_utils
 

diff --git a/tf_agents/agents/sac/sac_agent_test.py b/tf_agents/agents/sac/sac_agent_test.py
@@ -23,9 +23,9 @@
 import tensorflow as tf
 
 from tf_agents.agents.sac import sac_agent
-from tf_agents.environments import time_step as ts
-from tf_agents.policies.policy_step import PolicyStep
 from tf_agents.specs import tensor_spec
+from tf_agents.trajectories import time_step as ts
+from tf_agents.trajectories.policy_step import PolicyStep
 
 
 class _MockDistribution(object):

diff --git a/tf_agents/agents/td3/td3_agent.py b/tf_agents/agents/td3/td3_agent.py
@@ -33,9 +33,9 @@
 import tensorflow_probability as tfp
 
 from tf_agents.agents import tf_agent
-from tf_agents.environments import trajectory
 from tf_agents.policies import actor_policy
 from tf_agents.policies import gaussian_policy
+from tf_agents.trajectories import trajectory
 from tf_agents.utils import common
 from tf_agents.utils import eager_utils
 from tf_agents.utils import nest_utils

diff --git a/tf_agents/agents/td3/td3_agent_test.py b/tf_agents/agents/td3/td3_agent_test.py
@@ -21,9 +21,9 @@
 
 import tensorflow as tf
 from tf_agents.agents.td3 import td3_agent
-from tf_agents.environments import time_step as ts
 from tf_agents.networks import network
 from tf_agents.specs import tensor_spec
+from tf_agents.trajectories import time_step as ts
 from tf_agents.utils import common
 from tf_agents.utils import test_utils
 

diff --git a/tf_agents/agents/tf_agent.py b/tf_agents/agents/tf_agent.py
@@ -23,7 +23,7 @@
 import collections
 import tensorflow as tf
 
-from tf_agents.environments import trajectory
+from tf_agents.trajectories import trajectory
 from tf_agents.utils import common
 from tf_agents.utils import nest_utils
 

diff --git a/tf_agents/colabs/1_dqn_tutorial.ipynb b/tf_agents/colabs/1_dqn_tutorial.ipynb
@@ -111,12 +111,12 @@
         "from tf_agents.drivers import dynamic_step_driver\n",
         "from tf_agents.environments import suite_gym\n",
         "from tf_agents.environments import tf_py_environment\n",
-        "from tf_agents.environments import trajectory\n",
         "from tf_agents.eval import metric_utils\n",
         "from tf_agents.metrics import tf_metrics\n",
         "from tf_agents.networks import q_network\n",
         "from tf_agents.policies import random_tf_policy\n",
         "from tf_agents.replay_buffers import tf_uniform_replay_buffer\n",
+        "from tf_agents.trajectories import trajectory\n",
         "from tf_agents.utils import common\n",
         "\n",
         "tf.compat.v1.enable_v2_behavior()\n",

diff --git a/tf_agents/colabs/2_environments_tutorial.ipynb b/tf_agents/colabs/2_environments_tutorial.ipynb
@@ -77,9 +77,9 @@
         "from tf_agents.environments import tf_py_environment\n",
         "from tf_agents.environments import utils\n",
         "from tf_agents.specs import array_spec\n",
-        "from tf_agents.environments import time_step as ts\n",
         "from tf_agents.environments import wrappers\n",
         "from tf_agents.environments import suite_gym\n",
+        "from tf_agents.trajectories import time_step as ts\n",
         "\n",
         "tf.compat.v1.enable_v2_behavior()"
       ]

diff --git a/tf_agents/colabs/3_policies_tutorial.ipynb b/tf_agents/colabs/3_policies_tutorial.ipynb
@@ -75,7 +75,6 @@
         "\n",
         "from tf_agents.specs import array_spec\n",
         "from tf_agents.specs import tensor_spec\n",
-        "from tf_agents.environments import time_step as ts\n",
         "from tf_agents.networks import network\n",
         "\n",
         "from tf_agents.policies import py_policy\n",
@@ -88,6 +87,8 @@
         "from tf_agents.policies import q_policy\n",
         "from tf_agents.policies import greedy_policy\n",
         "\n",
+        "from tf_agents.trajectories import time_step as ts\n",
+        "\n",
         "tf.compat.v1.enable_v2_behavior()"
       ]
     },

diff --git a/tf_agents/colabs/5_replay_buffers_tutorial.ipynb b/tf_agents/colabs/5_replay_buffers_tutorial.ipynb
@@ -76,11 +76,11 @@
         "from tf_agents.drivers import dynamic_step_driver\n",
         "from tf_agents.environments import suite_gym\n",
         "from tf_agents.environments import tf_py_environment\n",
-        "from tf_agents.environments import time_step\n",
         "from tf_agents.networks import q_network\n",
         "from tf_agents.replay_buffers import py_uniform_replay_buffer\n",
         "from tf_agents.replay_buffers import tf_uniform_replay_buffer\n",
         "from tf_agents.specs import tensor_spec\n",
+        "from tf_agents.trajectories import time_step\n",
         "\n",
         "tf.compat.v1.enable_v2_behavior()"
       ]

diff --git a/tf_agents/drivers/dynamic_episode_driver.py b/tf_agents/drivers/dynamic_episode_driver.py
@@ -21,7 +21,7 @@
 
 import tensorflow as tf
 from tf_agents.drivers import driver
-from tf_agents.environments import trajectory
+from tf_agents.trajectories import trajectory
 from tf_agents.utils import common
 from tf_agents.utils import nest_utils
 import gin.tf

diff --git a/tf_agents/drivers/dynamic_step_driver.py b/tf_agents/drivers/dynamic_step_driver.py
@@ -21,7 +21,7 @@
 
 import tensorflow as tf
 from tf_agents.drivers import driver
-from tf_agents.environments import trajectory
+from tf_agents.trajectories import trajectory
 from tf_agents.utils import common
 from tf_agents.utils import nest_utils
 import gin.tf

diff --git a/tf_agents/drivers/py_driver.py b/tf_agents/drivers/py_driver.py
@@ -21,7 +21,7 @@
 
 import numpy as np
 from tf_agents.drivers import driver
-from tf_agents.environments import trajectory
+from tf_agents.trajectories import trajectory
 
 
 class PyDriver(driver.Driver):

diff --git a/tf_agents/drivers/py_driver_test.py b/tf_agents/drivers/py_driver_test.py
@@ -26,7 +26,7 @@
 from tf_agents.drivers import py_driver
 from tf_agents.drivers import test_utils as driver_test_utils
 from tf_agents.environments import batched_py_environment
-from tf_agents.environments import trajectory
+from tf_agents.trajectories import trajectory
 
 
 class MockReplayBufferObserver(object):