Skip to content

Commit

Permalink
chore(exp): add extra sac experiments
Browse files Browse the repository at this point in the history
This commit adds some additional SAC hyperparameter experiments that are
used in my master thesis.
  • Loading branch information
rickstaa committed Mar 11, 2024
1 parent 7a7e8c0 commit fad6c28
Show file tree
Hide file tree
Showing 29 changed files with 799 additions and 18 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
alg_name: sac
exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_infinite_horizon_exp
exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_bigger_initial_alpha
env_name: "stable_gym:FetchReachCost-v1"
ac_kwargs:
hidden_sizes:
Expand All @@ -19,7 +19,7 @@ update_every: 100
update_after: 1000
steps_per_update: 50 # NOTE: Decreased to 50 for consistency with the codebase.
num_test_episodes: 10
alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
alpha: 2.0
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
Expand All @@ -38,5 +38,5 @@ replay_size: "int(1e6)"
seed: 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac"
wandb_group: "han2020_reproduction_sac_bigger_initial_alpha"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
alg_name: sac
exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_infinite_horizon_exp
exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_bigger_initial_alpha
env_name: "stable_gym:FetchReachCost-v1"
ac_kwargs:
hidden_sizes:
Expand All @@ -19,7 +19,7 @@ update_every: 100
update_after: 1000
steps_per_update: 50 # NOTE: Decreased to 50 for consistency with the codebase.
num_test_episodes: 10
alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
alpha: 2.0
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
Expand All @@ -38,5 +38,5 @@ replay_size: "int(1e6)"
seed: 3658 # 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac"
wandb_group: "han2020_reproduction_sac_bigger_initial_alpha"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
alg_name: sac
exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_infinite_horizon_exp
exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_bigger_initial_alpha
env_name: "stable_gym:FetchReachCost-v1"
ac_kwargs:
hidden_sizes:
Expand All @@ -19,7 +19,7 @@ update_every: 100
update_after: 1000
steps_per_update: 50 # NOTE: Decreased to 50 for consistency with the codebase.
num_test_episodes: 10
alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
alpha: 2.0
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
Expand All @@ -38,5 +38,5 @@ replay_size: "int(1e6)"
seed: 48104 # 567 3658 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac"
wandb_group: "han2020_reproduction_sac_bigger_initial_alpha"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
alg_name: sac
exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_infinite_horizon_exp
exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_bigger_initial_alpha
env_name: "stable_gym:FetchReachCost-v1"
ac_kwargs:
hidden_sizes:
Expand All @@ -19,7 +19,7 @@ update_every: 100
update_after: 1000
steps_per_update: 50 # NOTE: Decreased to 50 for consistency with the codebase.
num_test_episodes: 10
alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
alpha: 2.0
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
Expand All @@ -38,5 +38,5 @@ replay_size: "int(1e6)"
seed: 567 # 3658 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac"
wandb_group: "han2020_reproduction_sac_bigger_initial_alpha"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
alg_name: sac
exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_infinite_horizon_exp
exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_bigger_initial_alpha
env_name: "stable_gym:FetchReachCost-v1"
ac_kwargs:
hidden_sizes:
Expand All @@ -19,7 +19,7 @@ update_every: 100
update_after: 1000
steps_per_update: 50 # NOTE: Decreased to 50 for consistency with the codebase.
num_test_episodes: 10
alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
alpha: 2.0
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
Expand All @@ -38,5 +38,5 @@ replay_size: "int(1e6)"
seed: 78456 # 48104 567 3658 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac"
wandb_group: "han2020_reproduction_sac_bigger_initial_alpha"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
alg_name: sac
exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_infinite_horizon_exp
exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_bigger_initial_alpha
env_name: "stable_gym:FetchReachCost-v1"
ac_kwargs:
hidden_sizes:
Expand All @@ -19,7 +19,7 @@ update_every: 100
update_after: 1000
steps_per_update: 50 # NOTE: Decreased to 50 for consistency with the codebase.
num_test_episodes: 10
alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
alpha: 2.0
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
Expand All @@ -38,5 +38,5 @@ replay_size: "int(1e6)"
seed: 78456 48104 567 3658 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac"
wandb_group: "han2020_reproduction_sac_bigger_initial_alpha"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
alg_name: sac
exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_different_steps_per_update
env_name: "stable_gym:FetchReachCost-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 200
epochs: 147
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_a_final: "3.3333333e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "1e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "3.3333333e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
seed: 78456 48104 567 3658 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac_different_steps_per_update"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
alg_name: sac
exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_different_steps_per_update
env_name: "stable_gym:FetchReachCost-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 200
epochs: 147
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_a_final: "3.3333333e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "1e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "3.3333333e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
seed: 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac_different_steps_per_update"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
alg_name: sac
exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_different_steps_per_update
env_name: "stable_gym:FetchReachCost-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 200
epochs: 147
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_a_final: "3.3333333e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "1e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "3.3333333e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
seed: 3658 # 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac_different_steps_per_update"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
alg_name: sac
exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_different_steps_per_update
env_name: "stable_gym:FetchReachCost-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 200
epochs: 147
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_a_final: "3.3333333e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "1e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "3.3333333e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
seed: 48104 # 567 3658 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac_different_steps_per_update"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
alg_name: sac
exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_different_steps_per_update
env_name: "stable_gym:FetchReachCost-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 200
epochs: 147
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_a_final: "3.3333333e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "1e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "3.3333333e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
seed: 567 # 3658 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac_different_steps_per_update"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
alg_name: sac
exp_name: han2020_reproduction_sac_fetch_reach_alpha3_tune_exp_different_steps_per_update
env_name: "stable_gym:FetchReachCost-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 200
epochs: 147
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_a_final: "3.3333333e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "1e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "3.3333333e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
seed: 78456 # 48104 567 3658 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac_different_steps_per_update"
device: "gpu:1"
Loading

0 comments on commit fad6c28

Please sign in to comment.