Skip to content

Commit

Permalink
chore(exp): add SAC extra experiments (#426)
Browse files Browse the repository at this point in the history
This commit adds the extra experiments for the SAC algorithm in the
cartpole, oscillator and comp oscillator environments.
  • Loading branch information
rickstaa authored Mar 13, 2024
1 parent 15080e3 commit 8ee23ea
Show file tree
Hide file tree
Showing 118 changed files with 3,084 additions and 30 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
alg_name: sac
exp_name: han2020_reproduction_sac_cartpole_cost_alpha3_tune_exp_bigger_initial_alpha
env_name: "stable_gym:CartPoleCost-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 250
epochs: 489
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 50 # NOTE: Decreased to 50 for consistency with the codebase.
num_test_episodes: 10
alpha: 2.0
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "3e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
seed: 78456 48104 567 3658 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac_bigger_initial_alpha"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
alg_name: sac
exp_name: han2020_reproduction_sac_cartpole_cost_alpha3_tune_exp_bigger_initial_alpha
env_name: "stable_gym:CartPoleCost-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 250
epochs: 489
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 50 # NOTE: Decreased to 50 for consistency with the codebase.
num_test_episodes: 10
alpha: 2.0
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "3e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
seed: 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac_bigger_initial_alpha"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
alg_name: sac
exp_name: han2020_reproduction_sac_cartpole_cost_alpha3_tune_exp_bigger_initial_alpha
env_name: "stable_gym:CartPoleCost-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 250
epochs: 489
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 50 # NOTE: Decreased to 50 for consistency with the codebase.
num_test_episodes: 10
alpha: 2.0
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "3e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
seed: 3658 # 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac_bigger_initial_alpha"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
alg_name: sac
exp_name: han2020_reproduction_sac_cartpole_cost_alpha3_tune_exp_bigger_initial_alpha
env_name: "stable_gym:CartPoleCost-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 250
epochs: 489
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 50 # NOTE: Decreased to 50 for consistency with the codebase.
num_test_episodes: 10
alpha: 2.0
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "3e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
seed: 48104 # 567 3658 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac_bigger_initial_alpha"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
alg_name: sac
exp_name: han2020_reproduction_sac_cartpole_cost_alpha3_tune_exp_bigger_initial_alpha
env_name: "stable_gym:CartPoleCost-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 250
epochs: 489
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 50 # NOTE: Decreased to 50 for consistency with the codebase.
num_test_episodes: 10
alpha: 2.0
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "3e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
seed: 567 # 3658 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac_bigger_initial_alpha"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
alg_name: sac
exp_name: han2020_reproduction_sac_cartpole_cost_alpha3_tune_exp_bigger_initial_alpha
env_name: "stable_gym:CartPoleCost-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 250
epochs: 489
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 50 # NOTE: Decreased to 50 for consistency with the codebase.
num_test_episodes: 10
alpha: 2.0
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "3e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
seed: 78456 # 48104 567 3658 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac_bigger_initial_alpha"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
alg_name: sac
exp_name: han2020_reproduction_sac_cartpole_cost_alpha3_tune_exp_different_steps_per_update
env_name: "stable_gym:CartPoleCost-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 250
epochs: 489
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "3e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
seed: 78456 48104 567 3658 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac_different_steps_per_update"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
alg_name: sac
exp_name: han2020_reproduction_sac_cartpole_cost_alpha3_tune_exp_different_steps_per_update
env_name: "stable_gym:CartPoleCost-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 250
epochs: 489
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "3e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
seed: 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac_different_steps_per_update"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
alg_name: sac
exp_name: han2020_reproduction_sac_cartpole_cost_alpha3_tune_exp_different_steps_per_update
env_name: "stable_gym:CartPoleCost-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256] # NOTE: Use [256, 256] for consistency with the codebase.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 250
epochs: 489
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 1.0 # NOTE: Decreased to 1.0 for consistency with the codebase.
gamma: 0.995
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_a_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "3e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "1e-10" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
seed: 3658 # 234 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_sac_different_steps_per_update"
device: "gpu:1"
Loading

0 comments on commit 8ee23ea

Please sign in to comment.