Skip to content

Commit 16f86ff

Browse files
fix(a3c): remove root noise from action selector
- the idea was to include noise at the beginning of episodes, sort of how Zero friends do it. - instead just use eGreedy like the selector says
1 parent 39f17ab commit 16f86ff

File tree

1 file changed

+0
-13
lines changed

1 file changed

+0
-13
lines changed

libraries/mathy_python/mathy/agents/action_selectors.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,6 @@ class A3CEpsilonGreedyActionSelector(ActionSelector):
4848
def __init__(self, *, epsilon: float, **kwargs):
4949
super(A3CEpsilonGreedyActionSelector, self).__init__(**kwargs)
5050
self.epsilon = epsilon
51-
self.noise_alpha = 0.3
52-
self.use_noise = True
53-
self.first_step = True
5451

5552
def select(
5653
self,
@@ -63,16 +60,6 @@ def select(
6360

6461
probs, value = self.model.predict_next(last_window.to_inputs())
6562
last_move_mask = last_window.mask[-1]
66-
# Apply noise to the root node (like AlphaGoZero MCTS)
67-
if self.use_noise is True and self.first_step is True:
68-
noise = np.random.dirichlet([self.noise_alpha] * len(probs))
69-
noise = noise * np.array(last_move_mask)
70-
probs += noise
71-
pi_sum = np.sum(probs)
72-
if pi_sum > 0:
73-
probs /= pi_sum
74-
self.first_step = False
75-
7663
no_random = bool(self.worker_id == 0)
7764
if not no_random and np.random.random() < self.epsilon:
7865
# Select a random action

0 commit comments

Comments
 (0)