We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 0849e3c commit 3d2d78bCopy full SHA for 3d2d78b
libraries/mathy_python/mathy/env.py
@@ -161,8 +161,7 @@ def get_win_signal(self, env_state: MathyEnvState) -> float:
161
# the number of allowed steps, double the bonus signal
162
if total_moves > 10 and current_move < total_moves / 2:
163
bonus *= 2
164
- # Don't let a win go negative
165
- return max(EnvRewards.WIN + bonus, 0.1)
+ return min(2.0, EnvRewards.WIN + bonus)
166
167
def get_lose_signal(self, env_state: MathyEnvState) -> float:
168
"""Calculate the reward value for failing to complete the episode. This is done
0 commit comments