diff --git a/tensorforce/core/explorations/__init__.py b/tensorforce/core/explorations/__init__.py index f1668623..420f8d4a 100755 --- a/tensorforce/core/explorations/__init__.py +++ b/tensorforce/core/explorations/__init__.py @@ -17,6 +17,7 @@ from tensorforce.core.explorations.exploration import Exploration from tensorforce.core.explorations.constant import Constant from tensorforce.core.explorations.linear_decay import LinearDecay +from tensorforce.core.explorations.epsilon_anneal import EpsilonAnneal from tensorforce.core.explorations.epsilon_decay import EpsilonDecay from tensorforce.core.explorations.ornstein_uhlenbeck_process import OrnsteinUhlenbeckProcess @@ -24,6 +25,7 @@ explorations = dict( constant=Constant, linear_decay=LinearDecay, + epsilon_anneal=EpsilonAnneal, epsilon_decay=EpsilonDecay, ornstein_uhlenbeck=OrnsteinUhlenbeckProcess ) diff --git a/tensorforce/core/explorations/epsilon_anneal.py b/tensorforce/core/explorations/epsilon_anneal.py new file mode 100644 index 00000000..e8af713e --- /dev/null +++ b/tensorforce/core/explorations/epsilon_anneal.py @@ -0,0 +1,37 @@ +# Copyright 2017 reinforce.io. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from tensorforce.core.explorations import Exploration + + +class EpsilonAnneal(Exploration): + """ + Annealing epsilon parameter based on ratio of current timestep to total timesteps. + """ + + def __init__(self, epsilon=1.0, epsilon_final=0.1, epsilon_timesteps=10000): + self.epsilon = epsilon + self.epsilon_final = epsilon_final + self.epsilon_timesteps = epsilon_timesteps + + def __call__(self, episode=0, timestep=0): + # TODO: Trim by length of `first_update`, removing steps with no learning. + offset = 0 # self.first_update + self.epsilon = min(1.0, max( + self.epsilon_final, + 1.0-(timestep - offset)/(self.epsilon_timesteps - offset) + )); + + return self.epsilon diff --git a/tensorforce/core/explorations/epsilon_decay.py b/tensorforce/core/explorations/epsilon_decay.py index fb70bd5b..5d184251 100755 --- a/tensorforce/core/explorations/epsilon_decay.py +++ b/tensorforce/core/explorations/epsilon_decay.py @@ -18,8 +18,8 @@ class EpsilonDecay(Exploration): """ - Linearly decaying epsilon parameter based on number of states, - an initial random epsilon and a final random epsilon. + Exponentially decaying epsilon parameter based on ratio of + difference between current and final epsilon to total timesteps. """ def __init__(self, epsilon=1.0, epsilon_final=0.1, epsilon_timesteps=10000):