Permalink
Fetching contributors…
Cannot retrieve contributors at this time
27 lines (26 sloc) 669 Bytes
# On a single GPU, this achieves maximum reward in ~15-20 minutes.
#
# $ python train.py -f tuned_examples/pong-ppo.yaml
#
pong-ppo:
env: PongNoFrameskip-v4
run: PPO
config:
lambda: 0.95
kl_coeff: 0.5
clip_rewards: True
clip_param: 0.1
vf_clip_param: 10.0
entropy_coeff: 0.01
train_batch_size: 5000
sample_batch_size: 20
sgd_minibatch_size: 500
num_sgd_iter: 10
num_workers: 32
num_envs_per_worker: 5
batch_mode: truncate_episodes
observation_filter: NoFilter
vf_share_layers: true
num_gpus: 1
model:
dim: 42