From 92d37caed188478fa11ae59c6d1e7fcb53922d30 Mon Sep 17 00:00:00 2001 From: seba-1511 Date: Sat, 31 Aug 2019 09:05:10 -0700 Subject: [PATCH 01/10] Improve environment compatibility of ActionDistribution. --- cherry/distributions.py | 2 +- cherry/envs/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cherry/distributions.py b/cherry/distributions.py index 4cb67b2..f1a7070 100644 --- a/cherry/distributions.py +++ b/cherry/distributions.py @@ -104,7 +104,7 @@ def __init__(self, env, logstd=None, use_probs=False, reparam=False): super(ActionDistribution, self).__init__() self.use_probs = use_probs self.reparam = reparam - self.is_discrete = env.discrete_action + self.is_discrete = ch.envs.is_discrete(env.action_space) if not self.is_discrete: if logstd is None: action_size = ch.envs.get_space_dimension(env.action_space) diff --git a/cherry/envs/__init__.py b/cherry/envs/__init__.py index ffda10c..f44d385 100644 --- a/cherry/envs/__init__.py +++ b/cherry/envs/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -from .utils import get_space_dimension +from .utils import * from .base import Wrapper from .runner_wrapper import Runner from .logger_wrapper import Logger From addead9f4ec27d4f6d9773002fcb80aba07c2b9c Mon Sep 17 00:00:00 2001 From: seba-1511 Date: Sat, 31 Aug 2019 10:01:06 -0700 Subject: [PATCH 02/10] Fix is_vectorized when only 1 env. --- cherry/envs/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cherry/envs/utils.py b/cherry/envs/utils.py index 37ff147..11966b8 100644 --- a/cherry/envs/utils.py +++ b/cherry/envs/utils.py @@ -16,7 +16,7 @@ def is_vectorized(env): - return hasattr(env, 'num_envs') + return hasattr(env, 'num_envs') and env.num_envs > 1 def is_discrete(space, vectorized=False): From 5f86d7f0e0b1fe188824b760afcf230689a56611 Mon Sep 17 00:00:00 2001 From: seba-1511 Date: Mon, 2 Sep 2019 12:14:53 -0700 Subject: [PATCH 03/10] Add VecEnv compatibility to ActionScaler. --- cherry/envs/action_space_scaler_wrapper.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/cherry/envs/action_space_scaler_wrapper.py b/cherry/envs/action_space_scaler_wrapper.py index fe35fd5..a6f2d5c 100644 --- a/cherry/envs/action_space_scaler_wrapper.py +++ b/cherry/envs/action_space_scaler_wrapper.py @@ -25,9 +25,16 @@ def __init__(self, env, clip=1.0): def reset(self, *args, **kwargs): return self.env.reset(*args, **kwargs) - def step(self, action): + def _normalize(self, action): lb = self.env.action_space.low ub = self.env.action_space.high scaled_action = lb + (action + self.clip) * 0.5 * (ub - lb) scaled_action = np.clip(scaled_action, lb, ub) - return self.env.step(scaled_action) + return scaled_action + + def step(self, action): + if self.is_vectorized: + action = [self._normalize(a) for a in action] + else: + action = self._normalize(action) + return self.env.step(action) From 10f0203740a4e8f8126e2e196eb2bf4bda20d3d5 Mon Sep 17 00:00:00 2001 From: seba-1511 Date: Tue, 3 Sep 2019 16:03:50 -0700 Subject: [PATCH 04/10] Add Reward and State normalizer wrappers. --- cherry/envs/__init__.py | 2 + cherry/envs/reward_normalizer_wrapper.py | 69 ++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 cherry/envs/reward_normalizer_wrapper.py diff --git a/cherry/envs/__init__.py b/cherry/envs/__init__.py index f44d385..86363df 100644 --- a/cherry/envs/__init__.py +++ b/cherry/envs/__init__.py @@ -11,6 +11,8 @@ from .monitor_wrapper import Monitor from .recorder_wrapper import Recorder from .normalizer_wrapper import Normalizer +from .state_normalizer_wrapper import StateNormalizer +from .reward_normalizer_wrapper import RewardNormalizer from .state_lambda_wrapper import StateLambda from .action_lambda_wrapper import ActionLambda from .action_space_scaler_wrapper import ActionSpaceScaler diff --git a/cherry/envs/reward_normalizer_wrapper.py b/cherry/envs/reward_normalizer_wrapper.py new file mode 100644 index 0000000..47f5b29 --- /dev/null +++ b/cherry/envs/reward_normalizer_wrapper.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 + +import numpy as np +from .base import Wrapper + + +class RewardNormalizer(Wrapper): + + """ + [[Source]](https://github.com/seba-1511/cherry/blob/master/cherry/envs/normalizer_wrapper.py) + + **Description** + + Normalizes the rewards with a running average. + + **Arguments** + + * **env** (Environment) - Environment to normalize. + * **statistics** (dict, *optional*, default=None) - Dictionary used to + bootstrap the normalizing statistics. + * **beta** (float, *optional*, default=0.99) - Moving average weigth. + * **eps** (float, *optional*, default=1e-8) - Numerical stability. + + **Credit** + + Adapted from Tristan Deleu's implementation. + + **Example** + ~~~python + env = gym.make('CartPole-v0') + env = cherry.envs.RewardNormalizer(env) + env2 = gym.make('CartPole-v0') + env2 = cherry.envs.RewardNormalizer(env2, + statistics=env.statistics) + ~~~ + """ + + def __init__(self, env, statistics=None, beta=0.99, eps=1e-8): + super(RewardNormalizer, self).__init__(env) + self.beta = beta + self.eps = eps + if statistics is not None and 'mean' in statistics: + self._reward_mean = np.copy(statistics['mean']) + else: + self._reward_mean = np.zeros(self.observation_space.shape) + + if statistics is not None and 'var' in statistics: + self._reward_var = np.copy(statistics['var']) + else: + self._reward_var = np.ones(self.observation_space.shape) + + @property + def statistics(self): + return { + 'mean': self._reward_mean, + 'var': self._reward_var, + } + + def _reward_normalize(self, reward): + self._reward_mean = self.beta * self._reward_mean + (1.0 - self.beta) * reward + self._reward_var = self.beta * self._reward_var + (1.0 - self.beta) * np.square(reward, self._reward_mean) + + def reset(self, *args, **kwargs): + reward = self.env.reset(*args, **kwargs) + return self._reward_normalize(reward) + + def step(self, *args, **kwargs): + state, reward, done, infos = self.env.step(*args, **kwargs) + return state, self._reward_normalize(reward), done, infos From 5ddf134ef6fc416849ca08f6223a8f05db47ab0d Mon Sep 17 00:00:00 2001 From: seba-1511 Date: Tue, 3 Sep 2019 16:15:43 -0700 Subject: [PATCH 05/10] Add state normalizer wrapper. --- cherry/envs/state_normalizer_wrapper.py | 69 +++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 cherry/envs/state_normalizer_wrapper.py diff --git a/cherry/envs/state_normalizer_wrapper.py b/cherry/envs/state_normalizer_wrapper.py new file mode 100644 index 0000000..e9ffbd1 --- /dev/null +++ b/cherry/envs/state_normalizer_wrapper.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 + +import numpy as np +from .base import Wrapper + + +class StateNormalizer(Wrapper): + + """ + [[Source]](https://github.com/seba-1511/cherry/blob/master/cherry/envs/normalizer_wrapper.py) + + **Description** + + Normalizes the states with a running average. + + **Arguments** + + * **env** (Environment) - Environment to normalize. + * **statistics** (dict, *optional*, default=None) - Dictionary used to + bootstrap the normalizing statistics. + * **beta** (float, *optional*, default=0.99) - Moving average weigth. + * **eps** (float, *optional*, default=1e-8) - Numerical stability. + + **Credit** + + Adapted from Tristan Deleu's implementation. + + **Example** + ~~~python + env = gym.make('CartPole-v0') + env = cherry.envs.StateNormalizer(env) + env2 = gym.make('CartPole-v0') + env2 = cherry.envs.StateNormalizer(env2, + statistics=env.statistics) + ~~~ + """ + + def __init__(self, env, statistics=None, beta=0.99, eps=1e-8): + super(StateNormalizer, self).__init__(env) + self.beta = beta + self.eps = eps + if statistics is not None and 'mean' in statistics: + self._state_mean = np.copy(statistics['mean']) + else: + self._state_mean = np.zeros(self.observation_space.shape) + + if statistics is not None and 'var' in statistics: + self._state_var = np.copy(statistics['var']) + else: + self._state_var = np.ones(self.observation_space.shape) + + @property + def statistics(self): + return { + 'mean': self._state_mean, + 'var': self._state_var, + } + + def _state_normalize(self, state): + self._state_mean = self.beta * self._state_mean + (1.0 - self.beta) * state + self._state_var = self.beta * self._state_var + (1.0 - self.beta) * np.square(state, self._state_mean) + + def reset(self, *args, **kwargs): + state = self.env.reset(*args, **kwargs) + return self._state_normalize(state) + + def step(self, *args, **kwargs): + state, reward, done, infos = self.env.step(*args, **kwargs) + return self._state_normalize(state), reward, done, infos From 8df33a1093ab7f49b8926173443ab69a668f59a2 Mon Sep 17 00:00:00 2001 From: seba-1511 Date: Tue, 3 Sep 2019 16:21:52 -0700 Subject: [PATCH 06/10] Add new normalizers to docs. --- docs/pydocmd.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/pydocmd.yml b/docs/pydocmd.yml index 4a9815e..04d408a 100644 --- a/docs/pydocmd.yml +++ b/docs/pydocmd.yml @@ -28,6 +28,8 @@ generate: - cherry.envs.visdom_logger_wrapper.VisdomLogger++ - cherry.envs.torch_wrapper.Torch++ - cherry.envs.normalizer_wrapper.Normalizer++ + - cherry.envs.state_normalizer_wrapper.StateNormalizer++ + - cherry.envs.reward_normalizer_wrapper.RewardNormalizer++ - cherry.envs.reward_clipper_wrapper.RewardClipper++ - cherry.envs.monitor_wrapper.Monitor++ - cherry.envs.openai_atari_wrapper.OpenAIAtari++ From af49e283176036b96cd6398fe00761dea1704941 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9b=20Arnold?= Date: Sat, 7 Sep 2019 09:37:00 -0700 Subject: [PATCH 07/10] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 250ab88..9ec588f 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -

+

-------------------------------------------------------------------------------- From e5238577b276ef0c20388397cf94ac3390c3404f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9b=20Arnold?= Date: Sat, 7 Sep 2019 09:38:53 -0700 Subject: [PATCH 08/10] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9ec588f..7882adb 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -

+

-------------------------------------------------------------------------------- From 0d52345cd9979335bc31cd1449f6a4be85827703 Mon Sep 17 00:00:00 2001 From: seba-1511 Date: Sat, 7 Sep 2019 09:57:17 -0700 Subject: [PATCH 09/10] Update URLs. --- README.md | 2 +- docs/pydocmd.yml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7882adb..a7e51a8 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ -------------------------------------------------------------------------------- -[![Build Status](https://travis-ci.org/seba-1511/cherry.svg?branch=master)](https://travis-ci.org/seba-1511/cherry) +[![Build Status](https://travis-ci.org/learnables/cherry.svg?branch=master)](https://travis-ci.org/learnables/cherry) Cherry is a reinforcement learning framework for researchers built on top of PyTorch. diff --git a/docs/pydocmd.yml b/docs/pydocmd.yml index 04d408a..c307eea 100644 --- a/docs/pydocmd.yml +++ b/docs/pydocmd.yml @@ -79,8 +79,8 @@ pages: - cherry.pg: docs/cherry.pg.md - cherry.plot: docs/cherry.plot.md - cherry.td: docs/cherry.td.md -- Examples: https://github.com/seba-1511/cherry/tree/master/examples -- GitHub: https://github.com/seba-1511/cherry/ +- Examples: https://github.com/learnables/cherry/tree/master/examples +- GitHub: https://github.com/learnables/cherry/ # These options all show off their default values. You don't have to add # them to your configuration if you're fine with the default. @@ -89,7 +89,7 @@ gens_dir: _build/pydocmd # This will end up as the MkDocs 'docs_dir' site_dir: _build/site site_url: http://cherry-rl.net site_author: Seb Arnold -google_analytics: ['UA-68693545-3', 'seba-1511.github.com'] +google_analytics: ['UA-68693545-3', 'learnables.github.com'] theme: name: mkdocs custom_dir: 'cherry_theme/' From 7ef3868e1bc94a2ed37e7ee43d9d2a2fec0873a1 Mon Sep 17 00:00:00 2001 From: seba-1511 Date: Sat, 7 Sep 2019 10:06:14 -0700 Subject: [PATCH 10/10] Update more URLs. --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 86792c1..a7f8aa6 100644 --- a/setup.py +++ b/setup.py @@ -26,8 +26,8 @@ long_description_content_type='text/markdown', author='Seb Arnold', author_email='smr.arnold@gmail.com', - url='https://seba-1511.github.com/cherry', - download_url='https://github.com/seba-1511/cherry/archive/' + str(VERSION) + '.zip', + url='https://learnables.github.com/cherry', + download_url='https://github.com/learnables/cherry/archive/' + str(VERSION) + '.zip', license='License :: OSI Approved :: Apache Software License', classifiers=[], scripts=[],