Permalink
e4bea8d Oct 1, 2018
1 contributor

Users who have contributed to this file

21 lines (20 sloc) 456 Bytes
humanoid-ppo-gae:
env: Humanoid-v1
run: PPO
stop:
episode_reward_mean: 6000
config:
gamma: 0.995
lambda: 0.95
clip_param: 0.2
kl_coeff: 1.0
num_sgd_iter: 20
lr: .0001
sgd_minibatch_size: 32768
horizon: 5000
train_batch_size: 320000
model:
free_log_std: true
num_workers: 64
num_gpus: 4
batch_mode: complete_episodes