Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lunar spec #388

Merged
merged 36 commits into from Jul 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
fdba8fc
ddqn lunar search
lgraesser Jul 21, 2019
78a073f
limit search
lgraesser Jul 21, 2019
219af61
limit search
lgraesser Jul 21, 2019
caaf633
a2c lunar spec
lgraesser Jul 21, 2019
356440c
dont normalize state
lgraesser Jul 21, 2019
8413b71
increase a2c lunar frames
lgraesser Jul 21, 2019
5c4c05c
fix bolztmann action pd
lgraesser Jul 21, 2019
05a7f54
a2c gae spec
lgraesser Jul 21, 2019
b24cfbf
a2c nstep lunar
lgraesser Jul 21, 2019
a601f38
rename file
lgraesser Jul 21, 2019
f59af03
ppo lunar spec
lgraesser Jul 21, 2019
2e5234f
fix name
lgraesser Jul 21, 2019
5d1c01d
update lunar specs
lgraesser Jul 21, 2019
1b73524
change nstep a2c lunar
lgraesser Jul 21, 2019
a913bcf
change steps
lgraesser Jul 22, 2019
b17863a
dqn tuning
lgraesser Jul 22, 2019
1735167
normalize state dqn
lgraesser Jul 22, 2019
6389110
increase a2c lr
lgraesser Jul 22, 2019
9354d75
change NormalizeStateEnv method name
lgraesser Jul 22, 2019
e7eb255
tune dqn lunar
lgraesser Jul 22, 2019
ace9171
Merge branch 'master' into lunar-spec
lgraesser Jul 22, 2019
d579c28
change lr
lgraesser Jul 22, 2019
5bdbc33
Merge branch 'lunar-spec' of https://github.com/kengz/SLM-Lab into lu…
lgraesser Jul 22, 2019
7791039
change ac lr
lgraesser Jul 22, 2019
9d2d43c
fix lr
lgraesser Jul 22, 2019
54f2544
change lr
lgraesser Jul 22, 2019
7ee41b4
reduce training frequency
lgraesser Jul 22, 2019
bb23f32
dqn lunar spec
lgraesser Jul 22, 2019
727adfb
increase replace freq
lgraesser Jul 22, 2019
8371084
dont normalize state
lgraesser Jul 22, 2019
2a1d075
move dqn lunar to benchmark
lgraesser Jul 22, 2019
e42e948
update_freq to 100
lgraesser Jul 22, 2019
3171483
update traning freq
lgraesser Jul 22, 2019
30d3998
revert method rename
lgraesser Jul 22, 2019
ab0a25a
update dqn lunar test spec
lgraesser Jul 22, 2019
030c19e
Merge branch 'master' into lunar-spec
kengz Jul 22, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
71 changes: 71 additions & 0 deletions slm_lab/spec/benchmark/a2c/a2c_gae_lunar.json
@@ -0,0 +1,71 @@
{
"a2c_gae_lunar": {
"agent": [{
"name": "A2C",
"algorithm": {
"name": "ActorCritic",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.99,
"lam": 0.95,
"num_step_returns": null,
"entropy_coef_spec": {
"name": "no_decay",
"start_val": 0.01,
"end_val": 0.01,
"start_step": 0,
"end_step": 0
},
"val_loss_coef": 1.0,
"training_frequency": 128
},
"memory": {
"name": "OnPolicyBatchReplay",
},
"net": {
"type": "MLPNet",
"shared": false,
"hid_layers": [64, 64, 32],
"hid_layers_activation": "relu",
"init_fn": "orthogonal_",
"batch_norm": false,
"clip_grad_val": 0.5,
"use_same_optim": false,
"loss_spec": {
"name": "MSELoss"
},
"actor_optim_spec": {
"name": "Adam",
"lr": 2e-3,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 2e-3,
},
"lr_scheduler_spec": null,
"gpu": false
}
}],
"env": [{
"name": "LunarLander-v2",
"frame_op": "concat",
"frame_op_len": 4,
"max_t": null,
"max_frame": 300000,
"num_envs": 8,
"normalize_state": false
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"log_frequency": 1000,
"eval_frequency": 1000,
"max_session": 4,
"max_trial": 1
}
},
}
71 changes: 71 additions & 0 deletions slm_lab/spec/benchmark/a2c/a2c_nstep_lunar.json
@@ -0,0 +1,71 @@
{
"a2c_nstep_lunar": {
"agent": [{
"name": "A2C",
"algorithm": {
"name": "ActorCritic",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.99,
"lam": null,
"num_step_returns": 5,
"entropy_coef_spec": {
"name": "no_decay",
"start_val": 0.01,
"end_val": 0.01,
"start_step": 0,
"end_step": 0
},
"val_loss_coef": 1.0,
"training_frequency": 64
},
"memory": {
"name": "OnPolicyBatchReplay",
},
"net": {
"type": "MLPNet",
"shared": false,
"hid_layers": [64, 64, 32],
"hid_layers_activation": "relu",
"init_fn": "orthogonal_",
"batch_norm": false,
"clip_grad_val": 0.5,
"use_same_optim": false,
"loss_spec": {
"name": "MSELoss"
},
"actor_optim_spec": {
"name": "Adam",
"lr": 2e-3,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 2e-3,
},
"lr_scheduler_spec": null,
"gpu": false
}
}],
"env": [{
"name": "LunarLander-v2",
"frame_op": "concat",
"frame_op_len": 4,
"max_t": null,
"max_frame": 300000,
"num_envs": 8,
"normalize_state": false
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"log_frequency": 1000,
"eval_frequency": 1000,
"max_session": 4,
"max_trial": 1
}
},
}
69 changes: 69 additions & 0 deletions slm_lab/spec/benchmark/dqn/ddqn_per_lunar.json
@@ -0,0 +1,69 @@
{
"ddqn_per_concat_lunar": {
"agent": [{
"name": "DoubleDQN",
"algorithm": {
"name": "DoubleDQN",
"action_pdtype": "Argmax",
"action_policy": "epsilon_greedy",
"explore_var_spec": {
"name": "linear_decay",
"start_val": 1.0,
"end_val": 0.01,
"start_step": 0,
"end_step": 50000
},
"gamma": 0.99,
"training_batch_iter": 1,
"training_iter": 1,
"training_frequency": 1,
"training_start_step": 32
},
"memory": {
"name": "PrioritizedReplay",
"alpha": 0.6,
"epsilon": 0.0001,
"batch_size": 32,
"max_size": 50000,
"use_cer": false,
},
"net": {
"type": "MLPNet",
"hid_layers": [256, 128],
"hid_layers_activation": "relu",
"clip_grad_val": 10.0,
"loss_spec": {
"name": "SmoothL1Loss"
},
"optim_spec": {
"name": "Adam",
"lr": 2.5e-4
},
"lr_scheduler_spec": null,
"update_type": "replace",
"update_frequency": 100,
"gpu": false
}
}],
"env": [{
"name": "LunarLander-v2",
"frame_op": "concat",
"frame_op_len": 4,
"max_t": null,
"max_frame": 300000,
"normalize_state": false
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"log_frequency": 1000,
"eval_frequency": 1000,
"max_session": 4,
"max_trial": 1,
"search": "RandomSearch",
},
}
}
67 changes: 67 additions & 0 deletions slm_lab/spec/benchmark/dqn/dqn_lunar.json
@@ -0,0 +1,67 @@
{
"dqn_concat_lunar": {
"agent": [{
"name": "DQN",
"algorithm": {
"name": "DQN",
"action_pdtype": "Argmax",
"action_policy": "epsilon_greedy",
"explore_var_spec": {
"name": "linear_decay",
"start_val": 1.0,
"end_val": 0.01,
"start_step": 0,
"end_step": 50000
},
"gamma": 0.99,
"training_batch_iter": 1,
"training_iter": 1,
"training_frequency": 1,
"training_start_step": 32
},
"memory": {
"name": "Replay",
"batch_size": 32,
"max_size": 50000,
"use_cer": false
},
"net": {
"type": "MLPNet",
"hid_layers": [256, 128],
"hid_layers_activation": "relu",
"clip_grad_val": 10.0,
"loss_spec": {
"name": "SmoothL1Loss"
},
"optim_spec": {
"name": "Adam",
"lr": 1e-3
},
"lr_scheduler_spec": null,
"update_type": "replace",
"update_frequency": 100,
"gpu": false
}
}],
"env": [{
"name": "LunarLander-v2",
"frame_op": "concat",
"frame_op_len": 4,
"max_t": null,
"max_frame": 300000,
"normalize_state": false
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"log_frequency": 1000,
"eval_frequency": 1000,
"max_session": 4,
"max_trial": 1,
"search": "RandomSearch",
},
}
}
79 changes: 79 additions & 0 deletions slm_lab/spec/benchmark/ppo/ppo_lunar.json
@@ -0,0 +1,79 @@
{
"ppo_lunar": {
"agent": [{
"name": "PPO",
"algorithm": {
"name": "PPO",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.99,
"lam": 0.95,
"clip_eps_spec": {
"name": "no_decay",
"start_val": 0.20,
"end_val": 0.0,
"start_step": 10000,
"end_step": 300000
},
"entropy_coef_spec": {
"name": "no_decay",
"start_val": 0.01,
"end_val": 0.01,
"start_step": 0,
"end_step": 0
},
"val_loss_coef": 1.0,
"time_horizon": 128,
"minibatch_size": 256,
"training_epoch": 10
},
"memory": {
"name": "OnPolicyBatchReplay",
},
"net": {
"type": "MLPNet",
"shared": false,
"hid_layers": [64, 64, 32],
"hid_layers_activation": "relu",
"init_fn": "orthogonal_",
"batch_norm": false,
"clip_grad_val": 0.5,
"use_same_optim": true,
"loss_spec": {
"name": "MSELoss"
},
"actor_optim_spec": {
"name": "Adam",
"lr": 5e-4,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 5e-4,
},
"lr_scheduler_spec": null,
"gpu": false
}
}],
"env": [{
"name": "LunarLander-v2",
"frame_op": "concat",
"frame_op_len": 4,
"max_t": null,
"max_frame": 300000,
"num_envs": 8,
"normalize_state": false
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"log_frequency": 1000,
"eval_frequency": 1000,
"max_session": 4,
"max_trial": 1
}
},
}