Skip to content

Commit

Permalink
Add Max-Boltzman policy, combination of eps-greedy and Boltzman (#122)
Browse files Browse the repository at this point in the history
* Add Max-Boltzman policy, combination of eps-greedy and boltzman

Wiering, M.: Explorations in Efficient Reinforcement Learning. PhD
thesis, University of Amserdam, Amsterdam (1999)

* add reference
  • Loading branch information
RyanHope authored and matthiasplappert committed Nov 30, 2017
1 parent 09ca76a commit bb79f78
Showing 1 changed file with 36 additions and 0 deletions.
36 changes: 36 additions & 0 deletions rl/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,42 @@ def get_config(self):
return config


class MaxBoltzmannQPolicy(Policy):
"""
A combination of the eps-greedy and Boltzman q-policy.
Wiering, M.: Explorations in Efficient Reinforcement Learning.
PhD thesis, University of Amserdam, Amsterdam (1999)
https://pure.uva.nl/ws/files/3153478/8461_UBA003000033.pdf
"""
def __init__(self, eps=.1, tau=1., clip=(-500., 500.)):
super(MaxBoltzmannQPolicy, self).__init__()
self.eps = eps
self.tau = tau
self.clip = clip

def select_action(self, q_values):
assert q_values.ndim == 1
q_values = q_values.astype('float64')
nb_actions = q_values.shape[0]

if np.random.uniform() < self.eps:
exp_values = np.exp(np.clip(q_values / self.tau, self.clip[0], self.clip[1]))
probs = exp_values / np.sum(exp_values)
action = np.random.choice(range(nb_actions), p=probs)
else:
action = np.argmax(q_values)
return action

def get_config(self):
config = super(MaxBoltzmannQPolicy, self).get_config()
config['eps'] = self.eps
config['tau'] = self.tau
config['clip'] = self.clip
return config


class BoltzmannGumbelQPolicy(Policy):
"""Implements Boltzmann-Gumbel exploration (BGE) adapted for Q learning
based on the paper Boltzmann Exploration Done Right
Expand Down

0 comments on commit bb79f78

Please sign in to comment.