Skip to content

Commit 02b11ee

Browse files
fix(training): use n-step windows during a3c training
- the current model has a bug where it uses the entire history during training. This means that predictions progressively get slower with longer episode lengths. - clipping to 3-step by default because that seems to be popular, and the a3c agent seems capable of using it to quickly solve easy poly problems
1 parent 92695d6 commit 02b11ee

File tree

2 files changed

+3
-4
lines changed

2 files changed

+3
-4
lines changed

libraries/mathy_python/mathy/agents/episode_memory.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,9 @@ def clear(self):
5858
self.grouping_changes = []
5959

6060
def to_window_observation(
61-
self, observation: MathyObservation, window_size: int = 1
61+
self, observation: MathyObservation, window_size: int = 3
6262
) -> MathyWindowObservation:
63-
previous = -(window_size - 1)
63+
previous = -(max(window_size - 1, 1))
6464
window_observations = self.observations[previous:] + [observation]
6565
return observations_to_window(window_observations)
6666

libraries/mathy_python/mathy/agents/policy_value_model.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from pathlib import Path
55
from shutil import copyfile
66
from typing import Any, Callable, Dict, List, Optional, Tuple, cast
7-
from memory_profiler import profile
87

98
import numpy as np
109
import srsly
@@ -275,7 +274,7 @@ def handshake_keras(m: ThincPolicyValueModel):
275274
# If we're doing transfer, reset optimizer steps
276275
if is_main and args.init_model_from is not None:
277276
msg.info("reset optimizer steps to 0 for transfer model")
278-
model.optimizer.iterations.assign(0)
277+
model.unwrapped.optimizer.iterations.assign(0)
279278
elif required:
280279
print_error(
281280
ValueError("Model Not Found"),

0 commit comments

Comments
 (0)