chore(exp): add extra sac experiments

This commit adds some additional SAC hyperparameter experiments that are used in my master thesis.
rickstaa · Mar 11, 2024 · fad6c28 · fad6c28
1 parent 7a7e8c0
commit fad6c28
Show file tree

Hide file tree

Showing 29 changed files with 799 additions and 18 deletions.
diff --git a/..._reach_alpha3_tune_experiment_seed234.yml → ...periment_seed234_bigger_initial_alpha.yml b/..._reach_alpha3_tune_experiment_seed234.yml → ...periment_seed234_bigger_initial_alpha.yml
@@ -1,5 +1,5 @@
 alg_name: sac
-exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_infinite_horizon_exp
+exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_bigger_initial_alpha
 env_name: "stable_gym:FetchReachCost-v1"
 ac_kwargs:
   hidden_sizes:
@@ -19,7 +19,7 @@ update_every: 100
 update_after: 1000
 steps_per_update: 50 # NOTE: Decreased to 50 for consistency with the codebase.
 num_test_episodes: 10
-alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
+alpha: 2.0
 gamma: 0.995
 polyak: 0.995
 adaptive_temperature: True 
@@ -38,5 +38,5 @@ replay_size: "int(1e6)"
 seed: 234  # NOTE: Using 5 seeds for tuning.
 save_freq: 10
 use_wandb: True
-wandb_group: "han2020_reproduction_sac"
+wandb_group: "han2020_reproduction_sac_bigger_initial_alpha"
 device: "gpu:1"
diff --git a/...reach_alpha3_tune_experiment_seed3658.yml → ...eriment_seed3658_bigger_initial_alpha.yml b/...reach_alpha3_tune_experiment_seed3658.yml → ...eriment_seed3658_bigger_initial_alpha.yml
@@ -1,5 +1,5 @@
 alg_name: sac
-exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_infinite_horizon_exp
+exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_bigger_initial_alpha
 env_name: "stable_gym:FetchReachCost-v1"
 ac_kwargs:
   hidden_sizes:
@@ -19,7 +19,7 @@ update_every: 100
 update_after: 1000
 steps_per_update: 50 # NOTE: Decreased to 50 for consistency with the codebase.
 num_test_episodes: 10
-alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
+alpha: 2.0
 gamma: 0.995
 polyak: 0.995
 adaptive_temperature: True 
@@ -38,5 +38,5 @@ replay_size: "int(1e6)"
 seed: 3658 # 234  # NOTE: Using 5 seeds for tuning.
 save_freq: 10
 use_wandb: True
-wandb_group: "han2020_reproduction_sac"
+wandb_group: "han2020_reproduction_sac_bigger_initial_alpha"
 device: "gpu:1"
diff --git a/...each_alpha3_tune_experiment_seed48104.yml → ...riment_seed48104_bigger_initial_alpha.yml b/...each_alpha3_tune_experiment_seed48104.yml → ...riment_seed48104_bigger_initial_alpha.yml
@@ -1,5 +1,5 @@
 alg_name: sac
-exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_infinite_horizon_exp
+exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_bigger_initial_alpha
 env_name: "stable_gym:FetchReachCost-v1"
 ac_kwargs:
   hidden_sizes:
@@ -19,7 +19,7 @@ update_every: 100
 update_after: 1000
 steps_per_update: 50 # NOTE: Decreased to 50 for consistency with the codebase.
 num_test_episodes: 10
-alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
+alpha: 2.0
 gamma: 0.995
 polyak: 0.995
 adaptive_temperature: True 
@@ -38,5 +38,5 @@ replay_size: "int(1e6)"
 seed: 48104 # 567 3658 234  # NOTE: Using 5 seeds for tuning.
 save_freq: 10
 use_wandb: True
-wandb_group: "han2020_reproduction_sac"
+wandb_group: "han2020_reproduction_sac_bigger_initial_alpha"
 device: "gpu:1"
diff --git a/..._reach_alpha3_tune_experiment_seed567.yml → ...periment_seed567_bigger_initial_alpha.yml b/..._reach_alpha3_tune_experiment_seed567.yml → ...periment_seed567_bigger_initial_alpha.yml
@@ -1,5 +1,5 @@
 alg_name: sac
-exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_infinite_horizon_exp
+exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_bigger_initial_alpha
 env_name: "stable_gym:FetchReachCost-v1"
 ac_kwargs:
   hidden_sizes:
@@ -19,7 +19,7 @@ update_every: 100
 update_after: 1000
 steps_per_update: 50 # NOTE: Decreased to 50 for consistency with the codebase.
 num_test_episodes: 10
-alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
+alpha: 2.0
 gamma: 0.995
 polyak: 0.995
 adaptive_temperature: True 
@@ -38,5 +38,5 @@ replay_size: "int(1e6)"
 seed: 567 # 3658 234  # NOTE: Using 5 seeds for tuning.
 save_freq: 10
 use_wandb: True
-wandb_group: "han2020_reproduction_sac"
+wandb_group: "han2020_reproduction_sac_bigger_initial_alpha"
 device: "gpu:1"
diff --git a/...each_alpha3_tune_experiment_seed78456.yml → ...riment_seed78456_bigger_initial_alpha.yml b/...each_alpha3_tune_experiment_seed78456.yml → ...riment_seed78456_bigger_initial_alpha.yml
@@ -1,5 +1,5 @@
 alg_name: sac
-exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_infinite_horizon_exp
+exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_bigger_initial_alpha
 env_name: "stable_gym:FetchReachCost-v1"
 ac_kwargs:
   hidden_sizes:
@@ -19,7 +19,7 @@ update_every: 100
 update_after: 1000
 steps_per_update: 50 # NOTE: Decreased to 50 for consistency with the codebase.
 num_test_episodes: 10
-alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
+alpha: 2.0
 gamma: 0.995
 polyak: 0.995
 adaptive_temperature: True 
@@ -38,5 +38,5 @@ replay_size: "int(1e6)"
 seed: 78456 # 48104 567 3658 234  # NOTE: Using 5 seeds for tuning.
 save_freq: 10
 use_wandb: True
-wandb_group: "han2020_reproduction_sac"
+wandb_group: "han2020_reproduction_sac_bigger_initial_alpha"
 device: "gpu:1"
diff --git a/...pha3_tune_infinite_horizon_experiment.yml → ...periment_smaller_bigger_initial_alpha.yml b/...pha3_tune_infinite_horizon_experiment.yml → ...periment_smaller_bigger_initial_alpha.yml
@@ -1,5 +1,5 @@
 alg_name: sac
-exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_infinite_horizon_exp
+exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_bigger_initial_alpha
 env_name: "stable_gym:FetchReachCost-v1"
 ac_kwargs:
   hidden_sizes:
@@ -19,7 +19,7 @@ update_every: 100
 update_after: 1000
 steps_per_update: 50 # NOTE: Decreased to 50 for consistency with the codebase.
 num_test_episodes: 10
-alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
+alpha: 2.0
 gamma: 0.995
 polyak: 0.995
 adaptive_temperature: True 
@@ -38,5 +38,5 @@ replay_size: "int(1e6)"
 seed: 78456 48104 567 3658 234  # NOTE: Using 5 seeds for tuning.
 save_freq: 10
 use_wandb: True
-wandb_group: "han2020_reproduction_sac"
+wandb_group: "han2020_reproduction_sac_bigger_initial_alpha"
 device: "gpu:1"
diff --git a/...an2020_reproduction_sac_fetch_reach_alpha3_tune_experiment_different_steps_per_update.yml b/...an2020_reproduction_sac_fetch_reach_alpha3_tune_experiment_different_steps_per_update.yml
@@ -0,0 +1,42 @@
+alg_name: sac
+exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_different_steps_per_update
+env_name: "stable_gym:FetchReachCost-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 200
+epochs: 147
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
+gamma: 0.995
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_a_final: "3.3333333e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "1e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_alpha_final: "3.3333333e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+seed: 78456 48104 567 3658 234  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction_sac_different_steps_per_update"
+device: "gpu:1"
diff --git a/...eproduction_sac_fetch_reach_alpha3_tune_experiment_seed234_different_steps_per_update.yml b/...eproduction_sac_fetch_reach_alpha3_tune_experiment_seed234_different_steps_per_update.yml
@@ -0,0 +1,42 @@
+alg_name: sac
+exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_different_steps_per_update
+env_name: "stable_gym:FetchReachCost-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 200
+epochs: 147
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
+gamma: 0.995
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_a_final: "3.3333333e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "1e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_alpha_final: "3.3333333e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+seed: 234  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction_sac_different_steps_per_update"
+device: "gpu:1"
diff --git a/...production_sac_fetch_reach_alpha3_tune_experiment_seed3658_different_steps_per_update.yml b/...production_sac_fetch_reach_alpha3_tune_experiment_seed3658_different_steps_per_update.yml
@@ -0,0 +1,42 @@
+alg_name: sac
+exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_different_steps_per_update
+env_name: "stable_gym:FetchReachCost-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 200
+epochs: 147
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
+gamma: 0.995
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_a_final: "3.3333333e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "1e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_alpha_final: "3.3333333e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+seed: 3658 # 234  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction_sac_different_steps_per_update"
+device: "gpu:1"
diff --git a/...roduction_sac_fetch_reach_alpha3_tune_experiment_seed48104_different_steps_per_update.yml b/...roduction_sac_fetch_reach_alpha3_tune_experiment_seed48104_different_steps_per_update.yml
@@ -0,0 +1,42 @@
+alg_name: sac
+exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_different_steps_per_update
+env_name: "stable_gym:FetchReachCost-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 200
+epochs: 147
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
+gamma: 0.995
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_a_final: "3.3333333e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "1e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_alpha_final: "3.3333333e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+seed: 48104 # 567 3658 234  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction_sac_different_steps_per_update"
+device: "gpu:1"
diff --git a/...eproduction_sac_fetch_reach_alpha3_tune_experiment_seed567_different_steps_per_update.yml b/...eproduction_sac_fetch_reach_alpha3_tune_experiment_seed567_different_steps_per_update.yml
@@ -0,0 +1,42 @@
+alg_name: sac
+exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_different_steps_per_update
+env_name: "stable_gym:FetchReachCost-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 200
+epochs: 147
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
+gamma: 0.995
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_a_final: "3.3333333e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "1e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_alpha_final: "3.3333333e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+seed: 567 # 3658 234  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction_sac_different_steps_per_update"
+device: "gpu:1"
diff --git a/...roduction_sac_fetch_reach_alpha3_tune_experiment_seed78456_different_steps_per_update.yml b/...roduction_sac_fetch_reach_alpha3_tune_experiment_seed78456_different_steps_per_update.yml
@@ -0,0 +1,42 @@
+alg_name: sac
+exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_different_steps_per_update
+env_name: "stable_gym:FetchReachCost-v1"
+ac_kwargs:
+  hidden_sizes:
+    actor: [256, 256]  # NOTE: Using [256, 256] for consistency with the article.
+    critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
+  activation:
+    actor: "nn.ReLU"
+    critic: "nn.ReLU"
+  output_activation:
+    actor: "nn.ReLU"
+opt_type: "minimize"
+max_ep_len: 200
+epochs: 147
+steps_per_epoch: 2048
+start_steps: 0
+update_every: 100
+update_after: 1000
+steps_per_update: 80
+num_test_episodes: 10
+alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
+gamma: 0.995
+polyak: 0.995
+adaptive_temperature: True 
+lr_a: "1e-4"
+lr_c: "3e-4"
+lr_alpha: "1e-4"
+lr_a_final: "3.3333333e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_c_final: "1e-9"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_alpha_final: "3.3333333e-10"  # NOTE: Not in Han et al. 2020, but aligned with original code.
+lr_a_decay_type: "linear"
+lr_c_decay_type: "linear"
+lr_alpha_decay_type: "linear"
+lr_decay_ref: "step"
+batch_size: 256
+replay_size: "int(1e6)"
+seed: 78456 # 48104 567 3658 234  # NOTE: Using 5 seeds for tuning.
+save_freq: 10
+use_wandb: True
+wandb_group: "han2020_reproduction_sac_different_steps_per_update"
+device: "gpu:1"