chore(exp): fix CompOscillator learning length (#424)

This commit changed the CompOscillator learning length from 1e5 to 2e5. This was done to deal with an inconsistency in Han et al.'s reserach.
rickstaa · Mar 7, 2024 · 88e2ff1 · 88e2ff1
1 parent 35a4d69
commit 88e2ff1
Show file tree

Hide file tree

Showing 36 changed files with 330 additions and 42 deletions.
diff --git a/...pha3_tune_experiment_extra_seeds26000.yml → ...lpha3_tune_experiment_extra_seed26000.yml b/...pha3_tune_experiment_extra_seeds26000.yml → ...lpha3_tune_experiment_extra_seed26000.yml
@@ -12,7 +12,7 @@ ac_kwargs:
     actor: "nn.ReLU"
 opt_type: "minimize"
 max_ep_len: 400
-epochs: 49
+epochs: 98
 steps_per_epoch: 2048
 start_steps: 0
 update_every: 100

diff --git a/...ha3_tune_experiment_extra_seeds388389.yml → ...pha3_tune_experiment_extra_seed388389.yml b/...ha3_tune_experiment_extra_seeds388389.yml → ...pha3_tune_experiment_extra_seed388389.yml
@@ -12,7 +12,7 @@ ac_kwargs:
     actor: "nn.ReLU"
 opt_type: "minimize"
 max_ep_len: 400
-epochs: 49
+epochs: 98
 steps_per_epoch: 2048
 start_steps: 0
 update_every: 100

diff --git a/...ha3_tune_experiment_extra_seeds408660.yml → ...pha3_tune_experiment_extra_seed408660.yml b/...ha3_tune_experiment_extra_seeds408660.yml → ...pha3_tune_experiment_extra_seed408660.yml
@@ -12,7 +12,7 @@ ac_kwargs:
     actor: "nn.ReLU"
 opt_type: "minimize"
 max_ep_len: 400
-epochs: 49
+epochs: 98
 steps_per_epoch: 2048
 start_steps: 0
 update_every: 100

diff --git a/...pha3_tune_experiment_extra_seeds49672.yml → ...lpha3_tune_experiment_extra_seed49672.yml b/...pha3_tune_experiment_extra_seeds49672.yml → ...lpha3_tune_experiment_extra_seed49672.yml
@@ -12,7 +12,7 @@ ac_kwargs:
     actor: "nn.ReLU"
 opt_type: "minimize"
 max_ep_len: 400
-epochs: 49
+epochs: 98
 steps_per_epoch: 2048
 start_steps: 0
 update_every: 100

diff --git a/...ha3_tune_experiment_extra_seeds858762.yml → ...pha3_tune_experiment_extra_seed858762.yml b/...ha3_tune_experiment_extra_seeds858762.yml → ...pha3_tune_experiment_extra_seed858762.yml
@@ -12,7 +12,7 @@ ac_kwargs:
     actor: "nn.ReLU"
 opt_type: "minimize"
 max_ep_len: 400
-epochs: 49
+epochs: 98
 steps_per_epoch: 2048
 start_steps: 0
 update_every: 100

diff --git a/...ds/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_extra_seeds.yml b/...ds/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_extra_seeds.yml
@@ -12,7 +12,7 @@ ac_kwargs:
     actor: "nn.ReLU"
 opt_type: "minimize"
 max_ep_len: 400
-epochs: 49
+epochs: 98
 steps_per_epoch: 2048
 start_steps: 0
 update_every: 100

diff --git a/..._reproduction_lac_oscillator_complicated_alpha3_tune_experiment_extra_seed26000_short.yml b/..._reproduction_lac_oscillator_complicated_alpha3_tune_experiment_extra_seed26000_short.yml
@@ -0,0 +1,48 @@
+alg_name: lac
+exp_name: han2020_reproduction_lac_oscillator_complicated_alpha3_tune_exp_short
+env_name: "stable_gym:OscillatorComplicated-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [256, 256, 16]  # NOTE: Using [256, 256, 16] for consistency with the article.
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 400
+epochs: 49
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 2.0
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
+labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
+# gamma: 0.995   # NOTE: Not used for finite horizon tasks.
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
+lr_a_final: "1e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "3e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_alpha_final: "1e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "3e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+horizon_length: 5
+seed: 26000 # 49672 858762 388389  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction_extra_seeds_short"
+device: "gpu:1"
diff --git a/...reproduction_lac_oscillator_complicated_alpha3_tune_experiment_extra_seed388389_short.yml b/...reproduction_lac_oscillator_complicated_alpha3_tune_experiment_extra_seed388389_short.yml
@@ -0,0 +1,48 @@
+alg_name: lac
+exp_name: han2020_reproduction_lac_oscillator_complicated_alpha3_tune_exp_short
+env_name: "stable_gym:OscillatorComplicated-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [256, 256, 16]  # NOTE: Using [256, 256, 16] for consistency with the article.
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 400
+epochs: 49
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 2.0
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
+labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
+# gamma: 0.995   # NOTE: Not used for finite horizon tasks.
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
+lr_a_final: "1e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "3e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_alpha_final: "1e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "3e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+horizon_length: 5
+seed: 388389  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction_extra_seeds_short"
+device: "gpu:1"
diff --git a/...reproduction_lac_oscillator_complicated_alpha3_tune_experiment_extra_seed408660_short.yml b/...reproduction_lac_oscillator_complicated_alpha3_tune_experiment_extra_seed408660_short.yml
@@ -0,0 +1,48 @@
+alg_name: lac
+exp_name: han2020_reproduction_lac_oscillator_complicated_alpha3_tune_exp_short
+env_name: "stable_gym:OscillatorComplicated-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [256, 256, 16]  # NOTE: Using [256, 256, 16] for consistency with the article.
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 400
+epochs: 49
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 2.0
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
+labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
+# gamma: 0.995   # NOTE: Not used for finite horizon tasks.
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
+lr_a_final: "1e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "3e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_alpha_final: "1e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "3e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+horizon_length: 5
+seed: 408660 # 26000 49672 858762 388389  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction_extra_seeds_short"
+device: "gpu:1"
diff --git a/..._reproduction_lac_oscillator_complicated_alpha3_tune_experiment_extra_seed49672_short.yml b/..._reproduction_lac_oscillator_complicated_alpha3_tune_experiment_extra_seed49672_short.yml
@@ -0,0 +1,48 @@
+alg_name: lac
+exp_name: han2020_reproduction_lac_oscillator_complicated_alpha3_tune_exp_short
+env_name: "stable_gym:OscillatorComplicated-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [256, 256, 16]  # NOTE: Using [256, 256, 16] for consistency with the article.
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 400
+epochs: 49
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 2.0
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
+labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
+# gamma: 0.995   # NOTE: Not used for finite horizon tasks.
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
+lr_a_final: "1e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "3e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_alpha_final: "1e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "3e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+horizon_length: 5
+seed: 49672 # 858762 388389  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction_extra_seeds_short"
+device: "gpu:1"
diff --git a/...reproduction_lac_oscillator_complicated_alpha3_tune_experiment_extra_seed858762_short.yml b/...reproduction_lac_oscillator_complicated_alpha3_tune_experiment_extra_seed858762_short.yml
@@ -0,0 +1,48 @@
+alg_name: lac
+exp_name: han2020_reproduction_lac_oscillator_complicated_alpha3_tune_exp_short
+env_name: "stable_gym:OscillatorComplicated-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [256, 256, 16]  # NOTE: Using [256, 256, 16] for consistency with the article.
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 400
+epochs: 49
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 2.0
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
+labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
+# gamma: 0.995   # NOTE: Not used for finite horizon tasks.
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
+lr_a_final: "1e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "3e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_alpha_final: "1e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "3e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+horizon_length: 5
+seed: 858762 # 388389  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction_extra_seeds_short"
+device: "gpu:1"
diff --git a/...2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_extra_seeds_short.yml b/...2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_extra_seeds_short.yml
@@ -0,0 +1,48 @@
+alg_name: lac
+exp_name: han2020_reproduction_lac_oscillator_complicated_alpha3_tune_exp_short
+env_name: "stable_gym:OscillatorComplicated-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [256, 256, 16]  # NOTE: Using [256, 256, 16] for consistency with the article.
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 400
+epochs: 49
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 2.0
+alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5  # NOTE: Tuning alpha3.
+labda: 0.99  # NOTE: Decreased from 1.0 to 0.99 for stability.
+# gamma: 0.995   # NOTE: Not used for finite horizon tasks.
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_labda: "3e-4"
+lr_a_final: "1e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "3e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_alpha_final: "1e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_labda_final: "3e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_labda_decay_type: "constant"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+horizon_length: 5
+seed: 408660 26000 49672 858762 388389  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction_extra_seeds_short"
+device: "gpu:1"
diff --git a/...omp_oscillator/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment.yml b/...omp_oscillator/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment.yml
@@ -12,7 +12,7 @@ ac_kwargs:
     actor: "nn.ReLU"
 opt_type: "minimize"
 max_ep_len: 400
-epochs: 49
+epochs: 98
 steps_per_epoch: 2048
 start_steps: 0
 update_every: 100

diff --git a/...llator/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed234.yml b/...llator/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed234.yml
@@ -12,7 +12,7 @@ ac_kwargs:
     actor: "nn.ReLU"
 opt_type: "minimize"
 max_ep_len: 400
-epochs: 49
+epochs: 98
 steps_per_epoch: 2048
 start_steps: 0
 update_every: 100

diff --git a/...lator/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed3658.yml b/...lator/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed3658.yml
@@ -12,7 +12,7 @@ ac_kwargs:
     actor: "nn.ReLU"
 opt_type: "minimize"
 max_ep_len: 400
-epochs: 49
+epochs: 98
 steps_per_epoch: 2048
 start_steps: 0
 update_every: 100

diff --git a/...ator/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed48104.yml b/...ator/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed48104.yml
@@ -12,7 +12,7 @@ ac_kwargs:
     actor: "nn.ReLU"
 opt_type: "minimize"
 max_ep_len: 400
-epochs: 49
+epochs: 98
 steps_per_epoch: 2048
 start_steps: 0
 update_every: 100

diff --git a/...llator/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed567.yml b/...llator/han2020_reproduction_lac_oscillator_complicated_alpha3_tune_experiment_seed567.yml
@@ -12,7 +12,7 @@ ac_kwargs:
     actor: "nn.ReLU"
 opt_type: "minimize"
 max_ep_len: 400
-epochs: 49
+epochs: 98
 steps_per_epoch: 2048
 start_steps: 0
 update_every: 100