Skip to content
This repository has been archived by the owner on Apr 6, 2018. It is now read-only.

Commit

Permalink
Support gym without configure (#162)
Browse files Browse the repository at this point in the history
* Major surgery to avoid gym.Env.configure

* track flashgames

* Forward configure through Timer-Render-Throttle

Logger now takes print_frequency in constructor

* More configure fixes. py.test passes locally

* Upgrade to gym 0.8

* Require 0.8.1

* configure bugfixes

* Record env metadata & spec. Use semantics.async instead of spec.vnc

* Bump tox version of gym for travis

* Fix race condition when recording from multiple threads on non-async envs

* sub-bump

* Bump
  • Loading branch information
tlbtlbtlb committed Mar 28, 2017
1 parent f6a005b commit a4f78eb
Show file tree
Hide file tree
Showing 23 changed files with 127 additions and 130 deletions.
4 changes: 2 additions & 2 deletions setup.py
@@ -1,7 +1,7 @@
from setuptools import setup, find_packages

setup(name='universe',
version='0.21.4-dev',
version='0.21.5',
description="Universe: a software platform for measuring and training an AI's general intelligence across the world's supply of games, websites and other applications.",
url='https://github.com/openai/universe',
author='OpenAI',
Expand All @@ -14,7 +14,7 @@
'docker-pycreds==0.2.1',
'fastzbarlight>=0.0.13',
'go-vncdriver>=0.4.8',
'gym>=0.7,<0.8', # gym v0.8 removes env.configure, which required for this version of universe
'gym>=0.8.1',
'Pillow>=3.3.0',
'PyYAML>=3.12',
'six>=1.10.0',
Expand Down
3 changes: 2 additions & 1 deletion tests/functional/test_core_envs_semantics.py
Expand Up @@ -90,9 +90,10 @@ def test_nice_vnc_semantics_match(spec, matcher, wrapper):
spaces.seed(0)

vnc_env = spec.make()
if vnc_env.metadata.get('configure.required', False):
vnc_env.configure(remotes=1)
vnc_env = wrapper(vnc_env)
vnc_env = wrappers.Unvectorize(vnc_env)
vnc_env.configure(remotes=1)

env = gym.make(spec._kwargs['gym_core_id'])

Expand Down
11 changes: 6 additions & 5 deletions tests/functional/test_envs.py
Expand Up @@ -26,12 +26,13 @@ def test_smoke(env_id):
logging.getLogger().setLevel(logging.INFO)

env = gym.make(env_id)
env = wrappers.Unvectorize(env)
if env.metadata.get('configure.required', False):
if os.environ.get('FORCE_LATEST_UNIVERSE_DOCKER_RUNTIMES'): # Used to test universe-envs in CI
configure_with_latest_docker_runtime_tag(env)
else:
env.configure(remotes=1)

if os.environ.get('FORCE_LATEST_UNIVERSE_DOCKER_RUNTIMES'): # Used to test universe-envs in CI
configure_with_latest_docker_runtime_tag(env)
else:
env.configure(remotes=1)
env = wrappers.Unvectorize(env)

env.reset()
_rollout(env, timestep_limit=60*30) # Check a rollout
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Expand Up @@ -11,7 +11,7 @@ skipsdist=True
passenv=DISPLAY DOCKER_USERNAME DOCKER_PASSWORD FORCE_LATEST_UNIVERSE_DOCKER_RUNTIMES TRAVIS*
deps =
pytest
gym[atari]<0.8
gym[atari]<0.9
docker-py==1.10.3
Pillow
autobahn
Expand Down
3 changes: 1 addition & 2 deletions universe/envs/dummy_vnc_env.py
Expand Up @@ -36,7 +36,7 @@ def __init__(self):
self.observation_space = spaces.VNCObservationSpace()
self.action_space = spaces.VNCActionSpace()

def _configure(self, remotes=None,
def configure(self, remotes=None,
client_id=None,
start_timeout=None, docker_image=None,
ignore_clock_skew=False, disable_action_probes=False,
Expand All @@ -45,7 +45,6 @@ def _configure(self, remotes=None,
observer=False,
_n=3,
):
super(DummyVNCEnv, self)._configure()
self.n = _n
self._reward_buffers = [rewarder.RewardBuffer('dummy:{}'.format(i)) for i in range(self.n)]
self._started = True
Expand Down
28 changes: 16 additions & 12 deletions universe/envs/vnc_env.py
Expand Up @@ -135,18 +135,18 @@ def _seed(self, seed):
self._seed_value = seed
return [seed]

def _configure(self, remotes=None,
client_id=None,
start_timeout=None, docker_image=None,
ignore_clock_skew=False, disable_action_probes=False,
vnc_driver=None, vnc_kwargs=None,
rewarder_driver=None,
replace_on_crash=False, allocate_sync=True,
observer=False, api_key=None,
record=False,
sample_env_ids=None,
def configure(self, remotes=None,
client_id=None,
start_timeout=None, docker_image=None,
ignore_clock_skew=False, disable_action_probes=False,
vnc_driver=None, vnc_kwargs=None,
rewarder_driver=None,
replace_on_crash=False, allocate_sync=True,
observer=False, api_key=None,
record=False,
sample_env_ids=None,
):
"""Standard Gym hook to configure the environment.
"""Universe method to configure the environment.
Args:
Expand Down Expand Up @@ -552,7 +552,11 @@ def _render(self, mode='human', close=False):
self.vnc_session.render(self.connection_names[0])

def __str__(self):
return 'VNCEnv<{}>'.format(self.spec.id)
if self.spec:
return '<VNCEnv{}>'.format(self.spec.id)
else:
return 'VNCEnv'


class Mask(object):
"""Blocks the agent from interacting with the environment while the
Expand Down
14 changes: 8 additions & 6 deletions universe/vectorized/core.py
Expand Up @@ -15,11 +15,12 @@ class Env(gym.Env):
# Number of remotes. User should set this.
n = None


class Wrapper(Env, gym.Wrapper):
"""Use this instead of gym.Wrapper iff you're wrapping a vectorized env,
(or a vanilla env you wish to be vectorized).
"""
# If True and this is instantiated with a non-vectorized environment,
# If True and this is instantiated with a non-vectorized environment,
# automatically wrap it with the Vectorize wrapper.
autovectorize = True

Expand All @@ -35,10 +36,12 @@ def __init__(self, env):

self.env = env

def _configure(self, **kwargs):
super(Wrapper, self)._configure(**kwargs)
assert self.env.n is not None, "Did not set self.env.n: self.n={} self.env={} self={}".format(self.env.n, self.env, self)
self.n = self.env.n
@property
def n(self):
return self.env.n

def configure(self, **kwargs):
self.env.configure(**kwargs)

class ObservationWrapper(Wrapper, gym.ObservationWrapper):
pass
Expand All @@ -48,4 +51,3 @@ class RewardWrapper(Wrapper, gym.RewardWrapper):

class ActionWrapper(Wrapper, gym.ActionWrapper):
pass

4 changes: 2 additions & 2 deletions universe/vectorized/multiprocessing_env.py
Expand Up @@ -261,6 +261,7 @@ def close_n(worker_n):
class MultiprocessingEnv(core.Env):
metadata = {
'runtime.vectorized': True,
'configure.required': True,
}

def __init__(self, env_id):
Expand All @@ -278,8 +279,7 @@ def __init__(self, env_id):
self.observation_space = env.observation_space
self.reward_range = env.reward_range

def _configure(self, n=1, pool_size=None, episode_limit=None):
super(MultiprocessingEnv, self)._configure()
def configure(self, n=1, pool_size=None, episode_limit=None):
self.n = n
self.envs = [self.spec.make() for _ in range(self.n)]

Expand Down
4 changes: 2 additions & 2 deletions universe/vectorized/tests/test_monitoring.py
Expand Up @@ -8,8 +8,8 @@
def test_multiprocessing_env_monitoring():
with helpers.tempdir() as temp:
env = wrappers.WrappedMultiprocessingEnv('Pong-v3')
env = wrappers.Monitor(env, temp)
env.configure(n=2)
env = wrappers.Monitor(env, temp)
env.reset()
for i in range(2):
env.step([0, 0])
Expand All @@ -23,10 +23,10 @@ def test_multiprocessing_env_monitoring():
def test_vnc_monitoring():
with helpers.tempdir() as temp:
env = gym.make('gym-core.Pong-v3')
env.configure(remotes=2)
env = wrappers.GymCoreAction(env)
env = wrappers.Monitor(env, temp)

env.configure(remotes=2)
env.reset()
for i in range(2):
env.step([0, 0])
Expand Down
7 changes: 3 additions & 4 deletions universe/vectorized/vectorize_filter.py
Expand Up @@ -18,14 +18,13 @@ class VectorizeFilter(core.Wrapper):
def __init__(self, env, filter_factory, *args, **kwargs):
super(VectorizeFilter, self).__init__(env)
self.filter_factory = filter_factory
self.filter_n = None
self._args = args
self._kwargs = kwargs

def _configure(self, **kwargs):
super(VectorizeFilter, self)._configure(**kwargs)
self.filter_n = [self.filter_factory(*self._args, **self._kwargs) for _ in range(self.n)]

def _reset(self):
if self.filter_n is None:
self.filter_n = [self.filter_factory(*self._args, **self._kwargs) for _ in range(self.n)]
observation_n = self.env.reset()
observation_n = [filter._after_reset(observation) for filter, observation in zip(self.filter_n, observation_n)]
return observation_n
Expand Down
7 changes: 2 additions & 5 deletions universe/wrappers/diagnostics.py
Expand Up @@ -6,15 +6,12 @@

# Not used in core; but used in play_flashgames
class Diagnostics(vectorized.Wrapper):
def _configure(self, **kwargs):
super(Diagnostics, self)._configure(**kwargs)
self.diagnostics = self.unwrapped.diagnostics

def _step(self, action_n):
observation_n, reward_n, done_n, info = self.env.step(action_n)
# We want this to be above Mask, so we know whether or not a
# particular index is resetting.
if self.diagnostics:
if self.unwrapped.diagnostics:
with pyprofile.push('vnc_env.diagnostics.add_metadata'):
self.diagnostics.add_metadata(observation_n, info['n'])
self.unwrapped.diagnostics.add_metadata(observation_n, info['n'])
return observation_n, reward_n, done_n, info
4 changes: 2 additions & 2 deletions universe/wrappers/experimental/random_env.py
Expand Up @@ -14,8 +14,8 @@ def __init__(self, env, env_ids):
super(RandomEnv, self).__init__(env)
self.env_ids = env_ids

def _configure(self, **kwargs):
super(RandomEnv, self)._configure(sample_env_ids=self.env_ids, **kwargs)
def configure(self, **kwargs):
super(RandomEnv, self).configure(sample_env_ids=self.env_ids, **kwargs)

def _reset(self):
observation_n = self.env.reset()
Expand Down
24 changes: 8 additions & 16 deletions universe/wrappers/joint.py
Expand Up @@ -4,9 +4,6 @@
class Joint(vectorized.Wrapper):
def __init__(self, env_m):
self.env_m = env_m
for env in self.env_m:
if not env._configured:
raise error.Error('Joint env should have been initialized: {}'.format(env))

# TODO: generalize this. Doing so requires adding a vectorized
# space mode.
Expand All @@ -15,26 +12,21 @@ def __init__(self, env_m):

self.pool = pool.ThreadPool(min(len(env_m), 5))

def _close(self):
if hasattr(self, 'pool'):
self.pool.close()
self._n = sum(env.n for env in self.env_m)
self.metadata = self.metadata.copy()
self.metadata['render.modes'] = self.env_m[0].metadata['render.modes']

@property
def spec(self):
return None
def n(self):
return self._n

@spec.setter
def spec(self, value):
pass
def _close(self):
if hasattr(self, 'pool'):
self.pool.close()

def _render(self, mode='human', close=False):
return self.env_m[0]._render(mode=mode, close=close)

def _configure(self, **kwargs):
self.n = sum(env.n for env in self.env_m)
self.metadata = self.metadata.copy()
self.metadata['render.modes'] = self.env_m[0].metadata['render.modes']

def _reset(self):
# Keep all env[0] action on the main thread, in case we ever
# need to render. Otherwise we get segfaults from the
Expand Down
17 changes: 10 additions & 7 deletions universe/wrappers/logger.py
Expand Up @@ -23,18 +23,21 @@ def stats(count):
return s

class Logger(vectorized.Wrapper):
def __init__(self, env):
metadata = {
'configure.required': True
}
def __init__(self, env, print_frequency=5):
super(Logger, self).__init__(env)

def _configure(self, print_frequency=5, **kwargs):
self.print_frequency = print_frequency
extra_logger.info('Running VNC environments with Logger set to print_frequency=%s. To change this, pass "print_frequency=k" or "print_frequency=None" to "env.configure".', self.print_frequency)
super(Logger, self)._configure(**kwargs)
self._clear_step_state()
self.metadata['render.modes'] = self.env.metadata['render.modes']

if self.n is not None:
self._clear_step_state()
self._last_step_time = None

def configure(self, **kwargs):
self.env.configure(**kwargs)
self._clear_step_state()

def _clear_step_state(self):
self.frames = 0
self.last_print = time.time()
Expand Down
12 changes: 7 additions & 5 deletions universe/wrappers/multiprocessing_env.py
Expand Up @@ -30,13 +30,15 @@ def _step(self, action_n):
return observation_n, reward_n, done_n, info

class EpisodeID(vectorized.Wrapper):
metadata = {
'configure.required': True
}
"""
For each episode, return its id, and also return the total number of contiguous
episodes that are now done.
For each episode, return its id, and also return the total number of contiguous
episodes that are now done.
"""
def _configure(self, episode_limit=None, **kwargs):
super(EpisodeID, self)._configure(**kwargs)
assert self.metadata.get('runtime.vectorized')
def configure(self, episode_limit=None, **kwargs):
self.env.configure(**kwargs)
self.episode_limit = episode_limit
self._clear_state()

Expand Down

0 comments on commit a4f78eb

Please sign in to comment.