Permalink
31 lines (30 sloc) 872 Bytes
# This gets to ~19-20 reward in ~30 minutes / 4m steps on a m4.10xl instance
# TODO(rliaw): this has regressed in performance
pong-a3c:
env: PongDeterministic-v4
run: A3C
config:
num_workers: 16
sample_batch_size: 20
use_pytorch: false
vf_loss_coeff: 0.5
entropy_coeff: -0.01
gamma: 0.99
grad_clip: 40.0
lambda: 1.0
lr: 0.0001
observation_filter: NoFilter
preprocessor_pref: rllib
model:
use_lstm: true
conv_activation: elu
dim: 42
grayscale: true
zero_mean: false
# Reduced channel depth and kernel size from default
conv_filters: [
[32, [3, 3], 2],
[32, [3, 3], 2],
[32, [3, 3], 2],
[32, [3, 3], 2],
]