Skip to content

Commit fc27522

Browse files
fix(mathy_alpha_sm): revert defaults to last known good model
- reduce Grouping Control and Entropy loss signals - Clip grouping control
1 parent 4d84048 commit fc27522

File tree

4 files changed

+9
-9
lines changed

4 files changed

+9
-9
lines changed

libraries/mathy_python/mathy/agents/a3c/config.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ class A3CConfig(BaseConfig):
3232
# MCTS provides higher quality observations at extra computational cost.
3333
mcts_sims: int = 200
3434

35-
3635
# Whether to use the grouping change aux task
3736
use_grouping_control = True
3837
# Clip signal at 0.0 so it does not optimize into the negatives
@@ -45,19 +44,19 @@ class A3CConfig(BaseConfig):
4544
# other threads time to process. This is useful for
4645
# running more threads than you have processors to
4746
# get a better diversity of experience.
48-
worker_wait: float = 0.01
47+
worker_wait: float = 0.1
4948

5049
# The number of worker agents to create.
5150
num_workers: int = 3
5251

5352
# NOTE: scaling down h_loss is observed to be important to keep it from
5453
# destabilizing the overall loss when it grows very small
55-
entropy_loss_scaling = 1.0
54+
entropy_loss_scaling = 0.05
5655
# Whether to scale entropy loss so it's 0-1
5756
normalize_entropy_loss = True
5857

5958
# How much to scale down loss values from auxiliary tasks
60-
aux_tasks_weight_scale = 1.0
59+
aux_tasks_weight_scale = 0.1
6160
# The lambda value for generalized lambda returns to calculate value loss
6261
# 0.0 = bootstrap values, 1.0 = discounted
6362
td_lambda: float = 0.2

libraries/mathy_python/mathy/agents/base_config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class Config:
3737
num_workers: int = 3
3838
# The lambda value for generalized lambda returns to calculate value loss
3939
# 0.0 = bootstrap values, 1.0 = discounted
40-
td_lambda: float = 0.3
40+
td_lambda: float = 0.2
4141
# Verbose setting to print out worker_0 training steps. Useful for trying
4242
# to find problems.
4343
print_training: bool = False
@@ -52,7 +52,7 @@ class Config:
5252
# Whether to use the grouping change aux task
5353
use_grouping_control = False
5454
# Clip signal at 0.0 so it does not optimize into the negatives
55-
clip_grouping_control = False
55+
clip_grouping_control = True
5656

5757
# Include the time/type environment features in the embeddings
5858
use_env_features = False

libraries/mathy_python/mathy/agents/policy_value_model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ def get_or_create_policy_model(
237237

238238

239239
def load_policy_value_model(
240-
model_data_folder: str,
240+
model_data_folder: str, silent: bool = False
241241
) -> Tuple[PolicyValueModel, BaseConfig]:
242242
meta_file = Path(model_data_folder) / "model.config.json"
243243
if not meta_file.exists():
@@ -263,7 +263,7 @@ def load_policy_value_model(
263263
model.build(initial_state.to_input_shapes())
264264
model.predict(init_inputs)
265265
model.predict_next(init_inputs)
266-
if args.verbose:
266+
if not silent:
267267
with msg.loading(f"Loading model: {model_file}..."):
268268
_load_model(model, str(model_file), str(optimizer_file))
269269
msg.good(f"Loaded model: {model_file}")

libraries/mathy_python/mathy/mathy.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@ def __init__(
1717
model_path: str = None,
1818
model: PolicyValueModel = None,
1919
config: BaseConfig = None,
20+
silent: bool = False,
2021
):
2122
if model_path is not None:
22-
self.model, self.config = load_policy_value_model(model_path)
23+
self.model, self.config = load_policy_value_model(model_path, silent=silent)
2324
elif model is not None and config is not None:
2425
if not isinstance(model, PolicyValueModel):
2526
raise ValueError("model must derive PolicyValueModel for compatibility")

0 commit comments

Comments
 (0)