Upgrade akro to version 0.0.5 (#740)

* Upgrade akro to version 0.0.5 * Update akro imports to match guidelines * Updated API calls to point to new methods * Fix tests broken by akro upgrade * Make some test large to pass CI
rlworkgroup · Jul 4, 2019 · 452c9be · 452c9be
1 parent e7d2fc0
commit 452c9be
Show file tree

Hide file tree

Showing 38 changed files with 106 additions and 149 deletions.
diff --git a/docs/user/implement_algo_advanced.rst b/docs/user/implement_algo_advanced.rst
@@ -84,13 +84,13 @@ extra diagnostic information as well as supporting recurrent policies):
     # ...
 
     def init_opt(self):
-        obs_var = self.env.observation_space.new_tensor_variable(
+        obs_var = self.env.observation_space.to_tf_placeholder(
             'obs',
-            extra_dims=1,
+            batch_dims=1,
         )
-        action_var = self.env.action_space.new_tensor_variable(
+        action_var = self.env.action_space.to_tf_placeholder(
             'action',
-            extra_dims=1,
+            batch_dims=1,
         )
         advantage_var = tf.placeholder('advantage')
         dist = self.policy.distribution

diff --git a/docs/user/implement_algo_basic.rst b/docs/user/implement_algo_basic.rst
@@ -198,14 +198,14 @@ First, we construct symbolic variables for the input data:
     # doing it in a slightly more abstract way allows us to delegate to the environment for handling the correct data
     # type for the variable. For instance, for an environment with discrete observations, we might want to use integer
     # types if the observations are represented as one-hot vectors.
-    observations_var = env.observation_space.new_tensor_variable(
+    observations_var = env.observation_space.to_tf_placeholder(
         name='observations',
         # It should have 1 extra dimension since we want to represent a list of observations
-        extra_dims=1
+        batch_dims=1
     )
-    actions_var = env.action_space.new_tensor_variable(
+    actions_var = env.action_space.to_tf_placeholder(
         name='actions',
-        extra_dims=1
+        batch_dims=1
     )
     returns_var = tf.placeholder(name='returns')
 

diff --git a/docs/user/implement_env.rst b/docs/user/implement_env.rst
@@ -68,10 +68,11 @@ the base environment and add some imports:
 
 .. code-block:: python
 
+    import akro
+    import numpy as np
+
     from garage.envs.base import Env
     from garage.envs.base import Step
-    from akro import Box
-    import numpy as np
 
 
     class PointEnv(Env):
@@ -90,11 +91,11 @@ property methods:
 
         @property
         def observation_space(self):
-            return Box(low=-np.inf, high=np.inf, shape=(2,))
+            return akro.Box(low=-np.inf, high=np.inf, shape=(2,))
 
         @property
         def action_space(self):
-            return Box(low=-0.1, high=0.1, shape=(2,))
+            return akro.Box(low=-0.1, high=0.1, shape=(2,))
 
 The :code:`Box` space means that the observations and actions are 2D vectors
 with continuous values. The observations can have arbitrary values, while the

diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
 # Required dependencies
 required = [
     # Please keep alphabetized
-    'akro==0.0.2',
+    'akro==0.0.5',
     'cached_property',
     'click',
     'cloudpickle',

diff --git a/src/garage/envs/base.py b/src/garage/envs/base.py
@@ -1,18 +1,11 @@
 """Wrapper class that converts gym.Env into GarageEnv."""
 import collections
 
-from akro import Box
-from akro import Dict
-from akro import Discrete
-from akro import Tuple
+import akro
 import glfw
 import gym
 from gym.envs.classic_control.rendering import SimpleImageViewer
 from gym.envs.classic_control.rendering import Viewer
-from gym.spaces import Box as GymBox
-from gym.spaces import Dict as GymDict
-from gym.spaces import Discrete as GymDiscrete
-from gym.spaces import Tuple as GymTuple
 
 from garage.core import Serializable
 from garage.envs.env_spec import EnvSpec
@@ -55,9 +48,8 @@ def __init__(self, env=None, env_name=''):
         else:
             super().__init__(env)
 
-        self.action_space = self._to_akro_space(self.env.action_space)
-        self.observation_space = self._to_akro_space(
-            self.env.observation_space)
+        self.action_space = akro.from_gym(self.env.action_space)
+        self.observation_space = akro.from_gym(self.env.observation_space)
         if self.spec:
             self.spec.action_space = self.action_space
             self.spec.observation_space = self.observation_space
@@ -125,27 +117,6 @@ def step(self, action):
         """
         return self.env.step(action)
 
-    def _to_akro_space(self, space):
-        """
-        Converts a gym.space into an akro.space.
-
-        Args:
-            space (gym.spaces)
-
-        Returns:
-            space (akro.spaces)
-        """
-        if isinstance(space, GymBox):
-            return Box(low=space.low, high=space.high, dtype=space.dtype)
-        elif isinstance(space, GymDict):
-            return Dict(space.spaces)
-        elif isinstance(space, GymDiscrete):
-            return Discrete(space.n)
-        elif isinstance(space, GymTuple):
-            return Tuple(list(map(self._to_akro_space, space.spaces)))
-        else:
-            raise NotImplementedError
-
 
 def Step(observation, reward, done, **kwargs):  # noqa: N802
     """

diff --git a/src/garage/misc/tensor_utils.py b/src/garage/misc/tensor_utils.py
@@ -1,4 +1,4 @@
-from akro import Box
+import gym.spaces
 import numpy as np
 
 
@@ -166,7 +166,7 @@ def normalize_pixel_batch(env_spec, observations):
         env_spec (garage.envs.EnvSpec): Environment specification.
         observations (numpy.ndarray): Observations from environment.
     """
-    if isinstance(env_spec.observation_space, Box):
+    if isinstance(env_spec.observation_space, gym.spaces.Box):
         if len(env_spec.observation_space.shape) == 3:
             return [obs.astype(np.float32) / 255.0 for obs in observations]
     return observations
diff --git a/src/garage/tf/algos/npo.py b/src/garage/tf/algos/npo.py
@@ -192,10 +192,10 @@ def _build_inputs(self):
         policy_dist = self.policy.distribution
 
         with tf.name_scope('inputs'):
-            obs_var = observation_space.new_tensor_variable(
-                name='obs', extra_dims=2)
-            action_var = action_space.new_tensor_variable(
-                name='action', extra_dims=2)
+            obs_var = observation_space.to_tf_placeholder(
+                name='obs', batch_dims=2)
+            action_var = action_space.to_tf_placeholder(
+                name='action', batch_dims=2)
             reward_var = tensor_utils.new_tensor(
                 name='reward', ndim=2, dtype=tf.float32)
             valid_var = tf.placeholder(

diff --git a/src/garage/tf/algos/reps.py b/src/garage/tf/algos/reps.py
@@ -206,12 +206,12 @@ def _build_inputs(self):
         policy_dist = self.policy.distribution
 
         with tf.name_scope('inputs'):
-            obs_var = observation_space.new_tensor_variable(
+            obs_var = observation_space.to_tf_placeholder(
                 name='obs',
-                extra_dims=2)   # yapf: disable
-            action_var = action_space.new_tensor_variable(
+                batch_dims=2)   # yapf: disable
+            action_var = action_space.to_tf_placeholder(
                 name='action',
-                extra_dims=2)   # yapf: disable
+                batch_dims=2)   # yapf: disable
             reward_var = tensor_utils.new_tensor(
                 name='reward',
                 ndim=2,

diff --git a/src/garage/tf/envs/base.py b/src/garage/tf/envs/base.py
@@ -1,12 +1,5 @@
-from akro.tf import Box
-from akro.tf import Dict
-from akro.tf import Discrete
-from akro.tf import Tuple
+import akro
 from cached_property import cached_property
-from gym.spaces import Box as GymBox
-from gym.spaces import Dict as GymDict
-from gym.spaces import Discrete as GymDiscrete
-from gym.spaces import Tuple as GymTuple
 
 from garage.envs import GarageEnv
 from garage.misc.overrides import overrides
@@ -20,31 +13,17 @@ class TfEnv(GarageEnv):
         env (gym.Env): the env that will be wrapped
     """
 
+    def __init__(self, env=None, env_name=''):
+        super().__init__(env, env_name)
+        self.action_space = akro.from_gym(self.env.action_space)
+        self.observation_space = akro.from_gym(self.env.observation_space)
+
     @classmethod
     def wrap(cls, env_cls, **extra_kwargs):
         # Use a class wrapper rather than a lambda method for smoother
         # serialization
         return WrappedCls(cls, env_cls, extra_kwargs)
 
-    @overrides
-    def _to_akro_space(self, space):
-        """
-        Converts a gym.space to a akro.tf space.
-
-        Returns:
-            space (akro.tf space)
-        """
-        if isinstance(space, GymBox):
-            return Box(low=space.low, high=space.high, dtype=space.dtype)
-        elif isinstance(space, GymDict):
-            return Dict(space.spaces)
-        elif isinstance(space, GymDiscrete):
-            return Discrete(space.n)
-        elif isinstance(space, GymTuple):
-            return Tuple(list(map(self._to_akro_space, space.spaces)))
-        else:
-            raise NotImplementedError
-
     @cached_property
     @overrides
     def max_episode_steps(self):

diff --git a/src/garage/tf/policies/categorical_conv_policy.py b/src/garage/tf/policies/categorical_conv_policy.py
@@ -1,4 +1,4 @@
-from akro.tf import Discrete
+import akro
 import tensorflow as tf
 
 from garage.core import Serializable
@@ -35,7 +35,7 @@ def __init__(
         are ignored
         :return:
         """
-        assert isinstance(env_spec.action_space, Discrete)
+        assert isinstance(env_spec.action_space, akro.Discrete)
 
         Serializable.quick_init(self, locals())
 

diff --git a/src/garage/tf/policies/categorical_conv_policy_with_model.py b/src/garage/tf/policies/categorical_conv_policy_with_model.py
@@ -1,5 +1,5 @@
 """CategoricalConvPolicy with model."""
-from akro.tf import Discrete
+import akro
 import tensorflow as tf
 
 from garage.misc.overrides import overrides
@@ -17,7 +17,7 @@ class CategoricalConvPolicyWithModel(StochasticPolicy2):
     A policy that contains a CNN and a MLP to make prediction based on
     a categorical distribution.
 
-    It only works with akro.tf.Discrete action space.
+    It only works with akro.Discrete action space.
 
     Args:
         env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
@@ -75,9 +75,9 @@ def __init__(self,
                  output_w_init=tf.glorot_uniform_initializer(),
                  output_b_init=tf.zeros_initializer(),
                  layer_normalization=False):
-        assert isinstance(env_spec.action_space, Discrete), (
-            'CategoricalConvPolicy only works with akro.tf.Discrete'
-            'action space.')
+        assert isinstance(env_spec.action_space, akro.Discrete), (
+            'CategoricalConvPolicy only works with akro.Discrete action '
+            'space.')
         super().__init__(name, env_spec)
         self.obs_dim = env_spec.observation_space.shape
         self.action_dim = env_spec.action_space.n

diff --git a/src/garage/tf/policies/categorical_gru_policy.py b/src/garage/tf/policies/categorical_gru_policy.py
@@ -1,4 +1,4 @@
-from akro.tf import Discrete
+import akro
 import numpy as np
 import tensorflow as tf
 
@@ -32,7 +32,7 @@ def __init__(self,
         :param hidden_nonlinearity: nonlinearity used for each hidden layer
         :return:
         """
-        assert isinstance(env_spec.action_space, Discrete)
+        assert isinstance(env_spec.action_space, akro.Discrete)
 
         self._prob_network_name = 'prob_network'
         with tf.variable_scope(name, 'CategoricalGRUPolicy'):

diff --git a/src/garage/tf/policies/categorical_gru_policy_with_model.py b/src/garage/tf/policies/categorical_gru_policy_with_model.py
@@ -1,5 +1,5 @@
 """CategoricalGRUPolicy with model."""
-from akro.tf import Discrete
+import akro
 import numpy as np
 import tensorflow as tf
 
@@ -15,7 +15,7 @@ class CategoricalGRUPolicyWithModel(StochasticPolicy2):
     A policy that contains a GRU to make prediction based on
     a categorical distribution.
 
-    It only works with akro.tf.Discrete action space.
+    It only works with akro.Discrete action space.
 
     Args:
         env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
@@ -71,9 +71,9 @@ def __init__(self,
                  hidden_state_init_trainable=False,
                  state_include_action=True,
                  layer_normalization=False):
-        if not isinstance(env_spec.action_space, Discrete):
+        if not isinstance(env_spec.action_space, akro.Discrete):
             raise ValueError('CategoricalGRUPolicy only works'
-                             'with akro.tf.Discrete action space.')
+                             'with akro.Discrete action space.')
 
         super().__init__(name, env_spec)
         self._obs_dim = env_spec.observation_space.flat_dim

diff --git a/src/garage/tf/policies/categorical_lstm_policy.py b/src/garage/tf/policies/categorical_lstm_policy.py
@@ -1,4 +1,4 @@
-from akro.tf import Discrete
+import akro
 import numpy as np
 import tensorflow as tf
 
@@ -35,7 +35,7 @@ def __init__(self,
         :param hidden_nonlinearity: nonlinearity used for each hidden layer
         :return:
         """
-        assert isinstance(env_spec.action_space, Discrete)
+        assert isinstance(env_spec.action_space, akro.Discrete)
 
         self._prob_network_name = 'prob_network'
         with tf.variable_scope(name, 'CategoricalLSTMPolicy'):

diff --git a/src/garage/tf/policies/categorical_lstm_policy_with_model.py b/src/garage/tf/policies/categorical_lstm_policy_with_model.py
@@ -1,5 +1,5 @@
 """CategoricalLSTMPolicy with model."""
-from akro.tf import Discrete
+import akro
 import numpy as np
 import tensorflow as tf
 
@@ -15,7 +15,7 @@ class CategoricalLSTMPolicyWithModel(StochasticPolicy2):
     A policy that contains a LSTM to make prediction based on
     a categorical distribution.
 
-    It only works with akro.tf.Discrete action space.
+    It only works with akro.Discrete action space.
 
     Args:
         env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
@@ -81,9 +81,9 @@ def __init__(self,
                  state_include_action=True,
                  forget_bias=True,
                  layer_normalization=False):
-        if not isinstance(env_spec.action_space, Discrete):
+        if not isinstance(env_spec.action_space, akro.Discrete):
             raise ValueError('CategoricalLSTMPolicy only works'
-                             'with akro.tf.Discrete action space.')
+                             'with akro.Discrete action space.')
 
         super().__init__(name, env_spec)
         self._obs_dim = env_spec.observation_space.flat_dim

diff --git a/src/garage/tf/policies/categorical_mlp_policy.py b/src/garage/tf/policies/categorical_mlp_policy.py
@@ -1,4 +1,4 @@
-from akro.tf import Discrete
+import akro
 import tensorflow as tf
 
 from garage.core import Serializable
@@ -36,7 +36,7 @@ def __init__(
                 policy. If None, a MLP with the network parameters will be
                 created. If not None, other network params are ignored.
         """
-        assert isinstance(env_spec.action_space, Discrete)
+        assert isinstance(env_spec.action_space, akro.Discrete)
 
         Serializable.quick_init(self, locals())