Skip to content

Commit

Permalink
Move time_step, policy_step and trajectory to trajectories/
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 243322818
Change-Id: I30fb233282c3a45a946fe6ec155b5a49220661ed
  • Loading branch information
kbanoop authored and Copybara-Service committed Apr 12, 2019
1 parent 184e87f commit 4abdace
Show file tree
Hide file tree
Showing 101 changed files with 148 additions and 187 deletions.
Expand Up @@ -24,13 +24,13 @@

from tf_agents.agents.behavioral_cloning import behavioral_cloning_agent
from tf_agents.drivers import test_utils as driver_test_utils
from tf_agents.environments import time_step as ts
from tf_agents.environments import trajectory
from tf_agents.environments import trajectory_replay
from tf_agents.networks import network
from tf_agents.networks import q_network
from tf_agents.networks import q_rnn_network
from tf_agents.specs import tensor_spec
from tf_agents.trajectories import time_step as ts
from tf_agents.trajectories import trajectory
from tf_agents.utils import common

# Number of times to train in test loops.
Expand Down
3 changes: 1 addition & 2 deletions tf_agents/agents/ddpg/actor_rnn_network.py
Expand Up @@ -18,12 +18,11 @@
import functools
import gin
import tensorflow as tf

from tf_agents.environments import time_step
from tf_agents.networks import dynamic_unroll_layer
from tf_agents.networks import network
from tf_agents.networks import utils
from tf_agents.specs import tensor_spec
from tf_agents.trajectories import time_step
from tf_agents.utils import common
from tf_agents.utils import nest_utils

Expand Down
3 changes: 1 addition & 2 deletions tf_agents/agents/ddpg/actor_rnn_network_test.py
Expand Up @@ -23,9 +23,8 @@
import tensorflow as tf

from tf_agents.agents.ddpg import actor_rnn_network
from tf_agents.environments import time_step as ts
from tf_agents.specs import tensor_spec

from tf_agents.trajectories import time_step as ts
from tensorflow.python.framework import test_util # TF internal


Expand Down
3 changes: 1 addition & 2 deletions tf_agents/agents/ddpg/critic_rnn_network.py
Expand Up @@ -18,12 +18,11 @@
import functools
import gin
import tensorflow as tf

from tf_agents.environments import time_step
from tf_agents.networks import dynamic_unroll_layer
from tf_agents.networks import network
from tf_agents.networks import utils
from tf_agents.specs import tensor_spec
from tf_agents.trajectories import time_step
from tf_agents.utils import nest_utils


Expand Down
3 changes: 1 addition & 2 deletions tf_agents/agents/ddpg/critic_rnn_network_test.py
Expand Up @@ -22,9 +22,8 @@
import tensorflow as tf

from tf_agents.agents.ddpg import critic_rnn_network
from tf_agents.environments import time_step as ts
from tf_agents.specs import tensor_spec

from tf_agents.trajectories import time_step as ts
from tensorflow.python.framework import test_util # TF internal


Expand Down
2 changes: 1 addition & 1 deletion tf_agents/agents/ddpg/ddpg_agent.py
Expand Up @@ -28,9 +28,9 @@
import tensorflow as tf

from tf_agents.agents import tf_agent
from tf_agents.environments import trajectory
from tf_agents.policies import actor_policy
from tf_agents.policies import ou_noise_policy
from tf_agents.trajectories import trajectory
from tf_agents.utils import common
from tf_agents.utils import eager_utils
from tf_agents.utils import nest_utils
Expand Down
2 changes: 1 addition & 1 deletion tf_agents/agents/ddpg/ddpg_agent_test.py
Expand Up @@ -22,9 +22,9 @@
import tensorflow as tf

from tf_agents.agents.ddpg import ddpg_agent
from tf_agents.environments import time_step as ts
from tf_agents.networks import network
from tf_agents.specs import tensor_spec
from tf_agents.trajectories import time_step as ts
from tf_agents.utils import common
from tf_agents.utils import test_utils

Expand Down
2 changes: 1 addition & 1 deletion tf_agents/agents/dqn/dqn_agent.py
Expand Up @@ -30,11 +30,11 @@
import tensorflow as tf

from tf_agents.agents import tf_agent
from tf_agents.environments import trajectory
from tf_agents.policies import boltzmann_policy
from tf_agents.policies import epsilon_greedy_policy
from tf_agents.policies import greedy_policy
from tf_agents.policies import q_policy
from tf_agents.trajectories import trajectory
from tf_agents.utils import common
from tf_agents.utils import eager_utils
from tf_agents.utils import nest_utils
Expand Down
2 changes: 1 addition & 1 deletion tf_agents/agents/dqn/dqn_agent_test.py
Expand Up @@ -23,9 +23,9 @@
import tensorflow as tf

from tf_agents.agents.dqn import dqn_agent
from tf_agents.environments import time_step as ts
from tf_agents.networks import network
from tf_agents.specs import tensor_spec
from tf_agents.trajectories import time_step as ts
from tf_agents.utils import common

from tensorflow.python.eager import context # pylint:disable=g-direct-tensorflow-import # TF internal
Expand Down
4 changes: 2 additions & 2 deletions tf_agents/agents/dqn/examples/v1/oog_train_eval.py
Expand Up @@ -44,15 +44,15 @@
from tf_agents.agents.dqn import dqn_agent
from tf_agents.environments import batched_py_environment
from tf_agents.environments import suite_gym
from tf_agents.environments import time_step as ts
from tf_agents.environments import trajectory
from tf_agents.eval import metric_utils
from tf_agents.metrics import py_metrics
from tf_agents.networks import q_network
from tf_agents.policies import py_tf_policy
from tf_agents.policies import random_py_policy
from tf_agents.replay_buffers import py_uniform_replay_buffer
from tf_agents.specs import tensor_spec
from tf_agents.trajectories import time_step as ts
from tf_agents.trajectories import trajectory
from tf_agents.utils import common

flags.DEFINE_string('root_dir', os.getenv('TEST_UNDECLARED_OUTPUTS_DIR'),
Expand Down
6 changes: 3 additions & 3 deletions tf_agents/agents/dqn/examples/v1/train_eval_atari.py
Expand Up @@ -59,18 +59,18 @@
from tf_agents.agents.dqn import dqn_agent
from tf_agents.environments import batched_py_environment
from tf_agents.environments import suite_atari
from tf_agents.environments import time_step as ts
from tf_agents.environments import trajectory
from tf_agents.eval import metric_utils
from tf_agents.metrics import py_metric
from tf_agents.metrics import py_metrics
from tf_agents.networks import q_network
from tf_agents.policies import epsilon_greedy_policy
from tf_agents.policies import policy_step
from tf_agents.policies import py_tf_policy
from tf_agents.policies import random_py_policy
from tf_agents.replay_buffers import py_hashed_replay_buffer
from tf_agents.specs import tensor_spec
from tf_agents.trajectories import policy_step
from tf_agents.trajectories import time_step as ts
from tf_agents.trajectories import trajectory
from tf_agents.utils import common
from tf_agents.utils import timer
import gin.tf
Expand Down
2 changes: 1 addition & 1 deletion tf_agents/agents/ppo/ppo_agent.py
Expand Up @@ -65,11 +65,11 @@
from tf_agents.agents import tf_agent
from tf_agents.agents.ppo import ppo_policy
from tf_agents.agents.ppo import ppo_utils
from tf_agents.environments import trajectory
from tf_agents.networks import network
from tf_agents.policies import greedy_policy
from tf_agents.specs import distribution_spec
from tf_agents.specs import tensor_spec
from tf_agents.trajectories import trajectory
from tf_agents.utils import common
from tf_agents.utils import eager_utils
from tf_agents.utils import nest_utils
Expand Down
4 changes: 2 additions & 2 deletions tf_agents/agents/ppo/ppo_agent_test.py
Expand Up @@ -28,14 +28,14 @@
import tensorflow_probability as tfp

from tf_agents.agents.ppo import ppo_agent
from tf_agents.environments import time_step as ts
from tf_agents.environments import trajectory
from tf_agents.networks import actor_distribution_network
from tf_agents.networks import network
from tf_agents.networks import utils as network_utils
from tf_agents.networks import value_network
from tf_agents.specs import distribution_spec
from tf_agents.specs import tensor_spec
from tf_agents.trajectories import time_step as ts
from tf_agents.trajectories import trajectory
from tf_agents.utils import common
from tf_agents.utils import nest_utils
from tf_agents.utils import test_utils
Expand Down
4 changes: 2 additions & 2 deletions tf_agents/agents/ppo/ppo_policy.py
Expand Up @@ -23,11 +23,11 @@
import tensorflow_probability as tfp

from tf_agents.agents.ppo import ppo_utils
from tf_agents.environments import time_step as ts
from tf_agents.networks import network
from tf_agents.policies import actor_policy
from tf_agents.policies import policy_step
from tf_agents.specs import distribution_spec
from tf_agents.trajectories import policy_step
from tf_agents.trajectories import time_step as ts

tfd = tfp.distributions

Expand Down
2 changes: 1 addition & 1 deletion tf_agents/agents/ppo/ppo_policy_test.py
Expand Up @@ -24,10 +24,10 @@
import tensorflow_probability as tfp

from tf_agents.agents.ppo import ppo_policy
from tf_agents.environments import time_step as ts
from tf_agents.networks import network
from tf_agents.specs import distribution_spec
from tf_agents.specs import tensor_spec
from tf_agents.trajectories import time_step as ts
from tf_agents.utils import test_utils


Expand Down
5 changes: 2 additions & 3 deletions tf_agents/agents/ppo/ppo_utils.py
Expand Up @@ -20,9 +20,8 @@
from __future__ import print_function

import tensorflow as tf

from tf_agents.environments import trajectory
from tf_agents.policies import policy_step
from tf_agents.trajectories import policy_step
from tf_agents.trajectories import trajectory


def make_timestep_mask(batched_next_time_step):
Expand Down
2 changes: 1 addition & 1 deletion tf_agents/agents/ppo/ppo_utils_test.py
Expand Up @@ -25,7 +25,7 @@
import tensorflow_probability as tfp

from tf_agents.agents.ppo import ppo_utils
from tf_agents.environments import time_step as ts
from tf_agents.trajectories import time_step as ts


class PPOUtilsTest(parameterized.TestCase, tf.test.TestCase):
Expand Down
2 changes: 1 addition & 1 deletion tf_agents/agents/reinforce/reinforce_agent.py
Expand Up @@ -26,9 +26,9 @@
import tensorflow as tf

from tf_agents.agents import tf_agent
from tf_agents.environments import time_step as ts
from tf_agents.policies import actor_policy
from tf_agents.policies import greedy_policy
from tf_agents.trajectories import time_step as ts
from tf_agents.utils import common
from tf_agents.utils import eager_utils
from tf_agents.utils import value_ops
Expand Down
4 changes: 2 additions & 2 deletions tf_agents/agents/reinforce/reinforce_agent_test.py
Expand Up @@ -24,11 +24,11 @@
import tensorflow_probability as tfp

from tf_agents.agents.reinforce import reinforce_agent
from tf_agents.environments import time_step as ts
from tf_agents.environments import trajectory
from tf_agents.networks import actor_distribution_rnn_network
from tf_agents.networks import network
from tf_agents.specs import tensor_spec
from tf_agents.trajectories import time_step as ts
from tf_agents.trajectories import trajectory
from tf_agents.utils import common

from tensorflow.python.util import nest # pylint:disable=g-direct-tensorflow-import # TF internal
Expand Down
2 changes: 1 addition & 1 deletion tf_agents/agents/sac/sac_agent.py
Expand Up @@ -28,8 +28,8 @@
import tensorflow as tf

from tf_agents.agents import tf_agent
from tf_agents.environments import trajectory
from tf_agents.policies import actor_policy
from tf_agents.trajectories import trajectory
from tf_agents.utils import common
from tf_agents.utils import eager_utils

Expand Down
4 changes: 2 additions & 2 deletions tf_agents/agents/sac/sac_agent_test.py
Expand Up @@ -23,9 +23,9 @@
import tensorflow as tf

from tf_agents.agents.sac import sac_agent
from tf_agents.environments import time_step as ts
from tf_agents.policies.policy_step import PolicyStep
from tf_agents.specs import tensor_spec
from tf_agents.trajectories import time_step as ts
from tf_agents.trajectories.policy_step import PolicyStep


class _MockDistribution(object):
Expand Down
2 changes: 1 addition & 1 deletion tf_agents/agents/td3/td3_agent.py
Expand Up @@ -33,9 +33,9 @@
import tensorflow_probability as tfp

from tf_agents.agents import tf_agent
from tf_agents.environments import trajectory
from tf_agents.policies import actor_policy
from tf_agents.policies import gaussian_policy
from tf_agents.trajectories import trajectory
from tf_agents.utils import common
from tf_agents.utils import eager_utils
from tf_agents.utils import nest_utils
Expand Down
2 changes: 1 addition & 1 deletion tf_agents/agents/td3/td3_agent_test.py
Expand Up @@ -21,9 +21,9 @@

import tensorflow as tf
from tf_agents.agents.td3 import td3_agent
from tf_agents.environments import time_step as ts
from tf_agents.networks import network
from tf_agents.specs import tensor_spec
from tf_agents.trajectories import time_step as ts
from tf_agents.utils import common
from tf_agents.utils import test_utils

Expand Down
2 changes: 1 addition & 1 deletion tf_agents/agents/tf_agent.py
Expand Up @@ -23,7 +23,7 @@
import collections
import tensorflow as tf

from tf_agents.environments import trajectory
from tf_agents.trajectories import trajectory
from tf_agents.utils import common
from tf_agents.utils import nest_utils

Expand Down
2 changes: 1 addition & 1 deletion tf_agents/colabs/1_dqn_tutorial.ipynb
Expand Up @@ -111,12 +111,12 @@
"from tf_agents.drivers import dynamic_step_driver\n",
"from tf_agents.environments import suite_gym\n",
"from tf_agents.environments import tf_py_environment\n",
"from tf_agents.environments import trajectory\n",
"from tf_agents.eval import metric_utils\n",
"from tf_agents.metrics import tf_metrics\n",
"from tf_agents.networks import q_network\n",
"from tf_agents.policies import random_tf_policy\n",
"from tf_agents.replay_buffers import tf_uniform_replay_buffer\n",
"from tf_agents.trajectories import trajectory\n",
"from tf_agents.utils import common\n",
"\n",
"tf.compat.v1.enable_v2_behavior()\n",
Expand Down
2 changes: 1 addition & 1 deletion tf_agents/colabs/2_environments_tutorial.ipynb
Expand Up @@ -77,9 +77,9 @@
"from tf_agents.environments import tf_py_environment\n",
"from tf_agents.environments import utils\n",
"from tf_agents.specs import array_spec\n",
"from tf_agents.environments import time_step as ts\n",
"from tf_agents.environments import wrappers\n",
"from tf_agents.environments import suite_gym\n",
"from tf_agents.trajectories import time_step as ts\n",
"\n",
"tf.compat.v1.enable_v2_behavior()"
]
Expand Down
3 changes: 2 additions & 1 deletion tf_agents/colabs/3_policies_tutorial.ipynb
Expand Up @@ -75,7 +75,6 @@
"\n",
"from tf_agents.specs import array_spec\n",
"from tf_agents.specs import tensor_spec\n",
"from tf_agents.environments import time_step as ts\n",
"from tf_agents.networks import network\n",
"\n",
"from tf_agents.policies import py_policy\n",
Expand All @@ -88,6 +87,8 @@
"from tf_agents.policies import q_policy\n",
"from tf_agents.policies import greedy_policy\n",
"\n",
"from tf_agents.trajectories import time_step as ts\n",
"\n",
"tf.compat.v1.enable_v2_behavior()"
]
},
Expand Down
2 changes: 1 addition & 1 deletion tf_agents/colabs/5_replay_buffers_tutorial.ipynb
Expand Up @@ -76,11 +76,11 @@
"from tf_agents.drivers import dynamic_step_driver\n",
"from tf_agents.environments import suite_gym\n",
"from tf_agents.environments import tf_py_environment\n",
"from tf_agents.environments import time_step\n",
"from tf_agents.networks import q_network\n",
"from tf_agents.replay_buffers import py_uniform_replay_buffer\n",
"from tf_agents.replay_buffers import tf_uniform_replay_buffer\n",
"from tf_agents.specs import tensor_spec\n",
"from tf_agents.trajectories import time_step\n",
"\n",
"tf.compat.v1.enable_v2_behavior()"
]
Expand Down
2 changes: 1 addition & 1 deletion tf_agents/drivers/dynamic_episode_driver.py
Expand Up @@ -21,7 +21,7 @@

import tensorflow as tf
from tf_agents.drivers import driver
from tf_agents.environments import trajectory
from tf_agents.trajectories import trajectory
from tf_agents.utils import common
from tf_agents.utils import nest_utils
import gin.tf
Expand Down
2 changes: 1 addition & 1 deletion tf_agents/drivers/dynamic_step_driver.py
Expand Up @@ -21,7 +21,7 @@

import tensorflow as tf
from tf_agents.drivers import driver
from tf_agents.environments import trajectory
from tf_agents.trajectories import trajectory
from tf_agents.utils import common
from tf_agents.utils import nest_utils
import gin.tf
Expand Down
2 changes: 1 addition & 1 deletion tf_agents/drivers/py_driver.py
Expand Up @@ -21,7 +21,7 @@

import numpy as np
from tf_agents.drivers import driver
from tf_agents.environments import trajectory
from tf_agents.trajectories import trajectory


class PyDriver(driver.Driver):
Expand Down
2 changes: 1 addition & 1 deletion tf_agents/drivers/py_driver_test.py
Expand Up @@ -26,7 +26,7 @@
from tf_agents.drivers import py_driver
from tf_agents.drivers import test_utils as driver_test_utils
from tf_agents.environments import batched_py_environment
from tf_agents.environments import trajectory
from tf_agents.trajectories import trajectory


class MockReplayBufferObserver(object):
Expand Down

0 comments on commit 4abdace

Please sign in to comment.