This repository has been archived by the owner. It is now read-only.
Permalink
Browse files

Support gym without configure (#162)

* Major surgery to avoid gym.Env.configure

* track flashgames

* Forward configure through Timer-Render-Throttle

Logger now takes print_frequency in constructor

* More configure fixes. py.test passes locally

* Upgrade to gym 0.8

* Require 0.8.1

* configure bugfixes

* Record env metadata & spec. Use semantics.async instead of spec.vnc

* Bump tox version of gym for travis

* Fix race condition when recording from multiple threads on non-async envs

* sub-bump

* Bump
  • Loading branch information...
tlbtlbtlb committed Mar 28, 2017
1 parent f6a005b commit a4f78eb928ef2dc26247b16a75a4e862c2552f00
View
@@ -1,7 +1,7 @@
from setuptools import setup, find_packages
setup(name='universe',
version='0.21.4-dev',
version='0.21.5',
description="Universe: a software platform for measuring and training an AI's general intelligence across the world's supply of games, websites and other applications.",
url='https://github.com/openai/universe',
author='OpenAI',
@@ -14,7 +14,7 @@
'docker-pycreds==0.2.1',
'fastzbarlight>=0.0.13',
'go-vncdriver>=0.4.8',
'gym>=0.7,<0.8', # gym v0.8 removes env.configure, which required for this version of universe
'gym>=0.8.1',
'Pillow>=3.3.0',
'PyYAML>=3.12',
'six>=1.10.0',
@@ -90,9 +90,10 @@ def test_nice_vnc_semantics_match(spec, matcher, wrapper):
spaces.seed(0)
vnc_env = spec.make()
if vnc_env.metadata.get('configure.required', False):
vnc_env.configure(remotes=1)
vnc_env = wrapper(vnc_env)
vnc_env = wrappers.Unvectorize(vnc_env)
vnc_env.configure(remotes=1)
env = gym.make(spec._kwargs['gym_core_id'])
@@ -26,12 +26,13 @@ def test_smoke(env_id):
logging.getLogger().setLevel(logging.INFO)
env = gym.make(env_id)
env = wrappers.Unvectorize(env)
if env.metadata.get('configure.required', False):
if os.environ.get('FORCE_LATEST_UNIVERSE_DOCKER_RUNTIMES'): # Used to test universe-envs in CI
configure_with_latest_docker_runtime_tag(env)
else:
env.configure(remotes=1)
if os.environ.get('FORCE_LATEST_UNIVERSE_DOCKER_RUNTIMES'): # Used to test universe-envs in CI
configure_with_latest_docker_runtime_tag(env)
else:
env.configure(remotes=1)
env = wrappers.Unvectorize(env)
env.reset()
_rollout(env, timestep_limit=60*30) # Check a rollout
View
@@ -11,7 +11,7 @@ skipsdist=True
passenv=DISPLAY DOCKER_USERNAME DOCKER_PASSWORD FORCE_LATEST_UNIVERSE_DOCKER_RUNTIMES TRAVIS*
deps =
pytest
gym[atari]<0.8
gym[atari]<0.9
docker-py==1.10.3
Pillow
autobahn
@@ -36,7 +36,7 @@ def __init__(self):
self.observation_space = spaces.VNCObservationSpace()
self.action_space = spaces.VNCActionSpace()
def _configure(self, remotes=None,
def configure(self, remotes=None,
client_id=None,
start_timeout=None, docker_image=None,
ignore_clock_skew=False, disable_action_probes=False,
@@ -45,7 +45,6 @@ def _configure(self, remotes=None,
observer=False,
_n=3,
):
super(DummyVNCEnv, self)._configure()
self.n = _n
self._reward_buffers = [rewarder.RewardBuffer('dummy:{}'.format(i)) for i in range(self.n)]
self._started = True
View
@@ -135,18 +135,18 @@ def _seed(self, seed):
self._seed_value = seed
return [seed]
def _configure(self, remotes=None,
client_id=None,
start_timeout=None, docker_image=None,
ignore_clock_skew=False, disable_action_probes=False,
vnc_driver=None, vnc_kwargs=None,
rewarder_driver=None,
replace_on_crash=False, allocate_sync=True,
observer=False, api_key=None,
record=False,
sample_env_ids=None,
def configure(self, remotes=None,
client_id=None,
start_timeout=None, docker_image=None,
ignore_clock_skew=False, disable_action_probes=False,
vnc_driver=None, vnc_kwargs=None,
rewarder_driver=None,
replace_on_crash=False, allocate_sync=True,
observer=False, api_key=None,
record=False,
sample_env_ids=None,
):
"""Standard Gym hook to configure the environment.
"""Universe method to configure the environment.
Args:
@@ -552,7 +552,11 @@ def _render(self, mode='human', close=False):
self.vnc_session.render(self.connection_names[0])
def __str__(self):
return 'VNCEnv<{}>'.format(self.spec.id)
if self.spec:
return '<VNCEnv{}>'.format(self.spec.id)
else:
return 'VNCEnv'
class Mask(object):
"""Blocks the agent from interacting with the environment while the
@@ -15,11 +15,12 @@ class Env(gym.Env):
# Number of remotes. User should set this.
n = None
class Wrapper(Env, gym.Wrapper):
"""Use this instead of gym.Wrapper iff you're wrapping a vectorized env,
(or a vanilla env you wish to be vectorized).
"""
# If True and this is instantiated with a non-vectorized environment,
# If True and this is instantiated with a non-vectorized environment,
# automatically wrap it with the Vectorize wrapper.
autovectorize = True
@@ -35,10 +36,12 @@ def __init__(self, env):
self.env = env
def _configure(self, **kwargs):
super(Wrapper, self)._configure(**kwargs)
assert self.env.n is not None, "Did not set self.env.n: self.n={} self.env={} self={}".format(self.env.n, self.env, self)
self.n = self.env.n
@property
def n(self):
return self.env.n
def configure(self, **kwargs):
self.env.configure(**kwargs)
class ObservationWrapper(Wrapper, gym.ObservationWrapper):
pass
@@ -48,4 +51,3 @@ class RewardWrapper(Wrapper, gym.RewardWrapper):
class ActionWrapper(Wrapper, gym.ActionWrapper):
pass
@@ -261,6 +261,7 @@ def close_n(worker_n):
class MultiprocessingEnv(core.Env):
metadata = {
'runtime.vectorized': True,
'configure.required': True,
}
def __init__(self, env_id):
@@ -278,8 +279,7 @@ def __init__(self, env_id):
self.observation_space = env.observation_space
self.reward_range = env.reward_range
def _configure(self, n=1, pool_size=None, episode_limit=None):
super(MultiprocessingEnv, self)._configure()
def configure(self, n=1, pool_size=None, episode_limit=None):
self.n = n
self.envs = [self.spec.make() for _ in range(self.n)]
@@ -8,8 +8,8 @@
def test_multiprocessing_env_monitoring():
with helpers.tempdir() as temp:
env = wrappers.WrappedMultiprocessingEnv('Pong-v3')
env = wrappers.Monitor(env, temp)
env.configure(n=2)
env = wrappers.Monitor(env, temp)
env.reset()
for i in range(2):
env.step([0, 0])
@@ -23,10 +23,10 @@ def test_multiprocessing_env_monitoring():
def test_vnc_monitoring():
with helpers.tempdir() as temp:
env = gym.make('gym-core.Pong-v3')
env.configure(remotes=2)
env = wrappers.GymCoreAction(env)
env = wrappers.Monitor(env, temp)
env.configure(remotes=2)
env.reset()
for i in range(2):
env.step([0, 0])
@@ -18,14 +18,13 @@ class VectorizeFilter(core.Wrapper):
def __init__(self, env, filter_factory, *args, **kwargs):
super(VectorizeFilter, self).__init__(env)
self.filter_factory = filter_factory
self.filter_n = None
self._args = args
self._kwargs = kwargs
def _configure(self, **kwargs):
super(VectorizeFilter, self)._configure(**kwargs)
self.filter_n = [self.filter_factory(*self._args, **self._kwargs) for _ in range(self.n)]
def _reset(self):
if self.filter_n is None:
self.filter_n = [self.filter_factory(*self._args, **self._kwargs) for _ in range(self.n)]
observation_n = self.env.reset()
observation_n = [filter._after_reset(observation) for filter, observation in zip(self.filter_n, observation_n)]
return observation_n
@@ -6,15 +6,12 @@
# Not used in core; but used in play_flashgames
class Diagnostics(vectorized.Wrapper):
def _configure(self, **kwargs):
super(Diagnostics, self)._configure(**kwargs)
self.diagnostics = self.unwrapped.diagnostics
def _step(self, action_n):
observation_n, reward_n, done_n, info = self.env.step(action_n)
# We want this to be above Mask, so we know whether or not a
# particular index is resetting.
if self.diagnostics:
if self.unwrapped.diagnostics:
with pyprofile.push('vnc_env.diagnostics.add_metadata'):
self.diagnostics.add_metadata(observation_n, info['n'])
self.unwrapped.diagnostics.add_metadata(observation_n, info['n'])
return observation_n, reward_n, done_n, info
@@ -14,8 +14,8 @@ def __init__(self, env, env_ids):
super(RandomEnv, self).__init__(env)
self.env_ids = env_ids
def _configure(self, **kwargs):
super(RandomEnv, self)._configure(sample_env_ids=self.env_ids, **kwargs)
def configure(self, **kwargs):
super(RandomEnv, self).configure(sample_env_ids=self.env_ids, **kwargs)
def _reset(self):
observation_n = self.env.reset()
View
@@ -4,9 +4,6 @@
class Joint(vectorized.Wrapper):
def __init__(self, env_m):
self.env_m = env_m
for env in self.env_m:
if not env._configured:
raise error.Error('Joint env should have been initialized: {}'.format(env))
# TODO: generalize this. Doing so requires adding a vectorized
# space mode.
@@ -15,26 +12,21 @@ def __init__(self, env_m):
self.pool = pool.ThreadPool(min(len(env_m), 5))
def _close(self):
if hasattr(self, 'pool'):
self.pool.close()
self._n = sum(env.n for env in self.env_m)
self.metadata = self.metadata.copy()
self.metadata['render.modes'] = self.env_m[0].metadata['render.modes']
@property
def spec(self):
return None
def n(self):
return self._n
@spec.setter
def spec(self, value):
pass
def _close(self):
if hasattr(self, 'pool'):
self.pool.close()
def _render(self, mode='human', close=False):
return self.env_m[0]._render(mode=mode, close=close)
def _configure(self, **kwargs):
self.n = sum(env.n for env in self.env_m)
self.metadata = self.metadata.copy()
self.metadata['render.modes'] = self.env_m[0].metadata['render.modes']
def _reset(self):
# Keep all env[0] action on the main thread, in case we ever
# need to render. Otherwise we get segfaults from the
@@ -23,18 +23,21 @@ def stats(count):
return s
class Logger(vectorized.Wrapper):
def __init__(self, env):
metadata = {
'configure.required': True
}
def __init__(self, env, print_frequency=5):
super(Logger, self).__init__(env)
def _configure(self, print_frequency=5, **kwargs):
self.print_frequency = print_frequency
extra_logger.info('Running VNC environments with Logger set to print_frequency=%s. To change this, pass "print_frequency=k" or "print_frequency=None" to "env.configure".', self.print_frequency)
super(Logger, self)._configure(**kwargs)
self._clear_step_state()
self.metadata['render.modes'] = self.env.metadata['render.modes']
if self.n is not None:
self._clear_step_state()
self._last_step_time = None
def configure(self, **kwargs):
self.env.configure(**kwargs)
self._clear_step_state()
def _clear_step_state(self):
self.frames = 0
self.last_print = time.time()
@@ -30,13 +30,15 @@ def _step(self, action_n):
return observation_n, reward_n, done_n, info
class EpisodeID(vectorized.Wrapper):
metadata = {
'configure.required': True
}
"""
For each episode, return its id, and also return the total number of contiguous
episodes that are now done.
For each episode, return its id, and also return the total number of contiguous
episodes that are now done.
"""
def _configure(self, episode_limit=None, **kwargs):
super(EpisodeID, self)._configure(**kwargs)
assert self.metadata.get('runtime.vectorized')
def configure(self, episode_limit=None, **kwargs):
self.env.configure(**kwargs)
self.episode_limit = episode_limit
self._clear_state()
Oops, something went wrong.

0 comments on commit a4f78eb

Please sign in to comment.