From 521269e8fc80a82c11166e8dbe8cdb1e420b2f80 Mon Sep 17 00:00:00 2001 From: Xingdong Zuo Date: Tue, 11 Apr 2017 19:51:12 +0200 Subject: [PATCH 1/2] Fix typo --- reinforcement_learning/reinforce.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/reinforcement_learning/reinforce.py b/reinforcement_learning/reinforce.py index 8c8ff485f6..6f5d7042db 100644 --- a/reinforcement_learning/reinforce.py +++ b/reinforcement_learning/reinforce.py @@ -15,10 +15,10 @@ parser.add_argument('--gamma', type=float, default=0.99, metavar='G', help='discount factor (default: 0.99)') parser.add_argument('--seed', type=int, default=543, metavar='N', - help='random seed (default: 1)') + help='random seed (default: 543)') parser.add_argument('--render', action='store_true', help='render the environment') -parser.add_argument('--log-interval', type=int, default=10, metavar='N', +parser.add_argument('--log_interval', type=int, default=10, metavar='N', help='interval between training status logs (default: 10)') args = parser.parse_args() @@ -57,7 +57,6 @@ def select_action(state): def finish_episode(): R = 0 - saved_actions = model.saved_actions rewards = [] for r in model.rewards[::-1]: R = r + args.gamma * R @@ -73,7 +72,7 @@ def finish_episode(): del model.saved_actions[:] -running_reward = 10 +running_reward = 0 for i_episode in count(1): state = env.reset() for t in range(10000): # Don't infinite loop while learning From 19f8d5a5180aef5bd885a03d2a9b85380f7af73c Mon Sep 17 00:00:00 2001 From: Xingdong Zuo Date: Tue, 11 Apr 2017 20:07:45 +0200 Subject: [PATCH 2/2] Update reinforce.py --- reinforcement_learning/reinforce.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reinforcement_learning/reinforce.py b/reinforcement_learning/reinforce.py index 6f5d7042db..77a733c702 100644 --- a/reinforcement_learning/reinforce.py +++ b/reinforcement_learning/reinforce.py @@ -72,7 +72,7 @@ def finish_episode(): del model.saved_actions[:] -running_reward = 0 +running_reward = 10 for i_episode in count(1): state = env.reset() for t in range(10000): # Don't infinite loop while learning