File tree Expand file tree Collapse file tree 4 files changed +9
-9
lines changed
libraries/mathy_python/mathy Expand file tree Collapse file tree 4 files changed +9
-9
lines changed Original file line number Diff line number Diff line change @@ -32,7 +32,6 @@ class A3CConfig(BaseConfig):
3232 # MCTS provides higher quality observations at extra computational cost.
3333 mcts_sims : int = 200
3434
35-
3635 # Whether to use the grouping change aux task
3736 use_grouping_control = True
3837 # Clip signal at 0.0 so it does not optimize into the negatives
@@ -45,19 +44,19 @@ class A3CConfig(BaseConfig):
4544 # other threads time to process. This is useful for
4645 # running more threads than you have processors to
4746 # get a better diversity of experience.
48- worker_wait : float = 0.01
47+ worker_wait : float = 0.1
4948
5049 # The number of worker agents to create.
5150 num_workers : int = 3
5251
5352 # NOTE: scaling down h_loss is observed to be important to keep it from
5453 # destabilizing the overall loss when it grows very small
55- entropy_loss_scaling = 1.0
54+ entropy_loss_scaling = 0.05
5655 # Whether to scale entropy loss so it's 0-1
5756 normalize_entropy_loss = True
5857
5958 # How much to scale down loss values from auxiliary tasks
60- aux_tasks_weight_scale = 1.0
59+ aux_tasks_weight_scale = 0.1
6160 # The lambda value for generalized lambda returns to calculate value loss
6261 # 0.0 = bootstrap values, 1.0 = discounted
6362 td_lambda : float = 0.2
Original file line number Diff line number Diff line change @@ -37,7 +37,7 @@ class Config:
3737 num_workers : int = 3
3838 # The lambda value for generalized lambda returns to calculate value loss
3939 # 0.0 = bootstrap values, 1.0 = discounted
40- td_lambda : float = 0.3
40+ td_lambda : float = 0.2
4141 # Verbose setting to print out worker_0 training steps. Useful for trying
4242 # to find problems.
4343 print_training : bool = False
@@ -52,7 +52,7 @@ class Config:
5252 # Whether to use the grouping change aux task
5353 use_grouping_control = False
5454 # Clip signal at 0.0 so it does not optimize into the negatives
55- clip_grouping_control = False
55+ clip_grouping_control = True
5656
5757 # Include the time/type environment features in the embeddings
5858 use_env_features = False
Original file line number Diff line number Diff line change @@ -237,7 +237,7 @@ def get_or_create_policy_model(
237237
238238
239239def load_policy_value_model (
240- model_data_folder : str ,
240+ model_data_folder : str , silent : bool = False
241241) -> Tuple [PolicyValueModel , BaseConfig ]:
242242 meta_file = Path (model_data_folder ) / "model.config.json"
243243 if not meta_file .exists ():
@@ -263,7 +263,7 @@ def load_policy_value_model(
263263 model .build (initial_state .to_input_shapes ())
264264 model .predict (init_inputs )
265265 model .predict_next (init_inputs )
266- if args . verbose :
266+ if not silent :
267267 with msg .loading (f"Loading model: { model_file } ..." ):
268268 _load_model (model , str (model_file ), str (optimizer_file ))
269269 msg .good (f"Loaded model: { model_file } " )
Original file line number Diff line number Diff line change @@ -17,9 +17,10 @@ def __init__(
1717 model_path : str = None ,
1818 model : PolicyValueModel = None ,
1919 config : BaseConfig = None ,
20+ silent : bool = False ,
2021 ):
2122 if model_path is not None :
22- self .model , self .config = load_policy_value_model (model_path )
23+ self .model , self .config = load_policy_value_model (model_path , silent = silent )
2324 elif model is not None and config is not None :
2425 if not isinstance (model , PolicyValueModel ):
2526 raise ValueError ("model must derive PolicyValueModel for compatibility" )
You can’t perform that action at this time.
0 commit comments