File tree Expand file tree Collapse file tree 1 file changed +9
-2
lines changed
Expand file tree Collapse file tree 1 file changed +9
-2
lines changed Original file line number Diff line number Diff line change @@ -36,15 +36,22 @@ def play_game(grid, policy):
3636 # but r(t) results from taking action a(t-1) from s(t-1) and landing in s(t)
3737 states_actions_rewards = [(s , a , 0 )]
3838 seen_states = set ()
39+ seen_states .add (grid .current_state ())
40+ num_steps = 0
3941 while True :
40- old_s = grid .current_state ()
4142 r = grid .move (a )
43+ num_steps += 1
4244 s = grid .current_state ()
4345
4446 if s in seen_states :
4547 # hack so that we don't end up in an infinitely long episode
4648 # bumping into the wall repeatedly
47- states_actions_rewards .append ((s , None , - 100 ))
49+ # if num_steps == 1 -> bumped into a wall and haven't moved anywhere
50+ # reward = -10
51+ # else:
52+ # reward = falls off by 1 / num_steps
53+ reward = - 10. / num_steps
54+ states_actions_rewards .append ((s , None , reward ))
4855 break
4956 elif grid .game_over ():
5057 states_actions_rewards .append ((s , None , r ))
You can’t perform that action at this time.
0 commit comments