forked from openai/baselines
-
Notifications
You must be signed in to change notification settings - Fork 0
/
defaults.py
120 lines (115 loc) · 3.51 KB
/
defaults.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
def mujoco():
return dict(
nsteps=2048,
nminibatches=32,
lam=0.95,
gamma=0.99,
noptepochs=10,
log_interval=1,
ent_coef=0.0,
lr=lambda f: 3e-4 * f,
cliprange=0.2,
value_network='copy'
)
def atari():
return dict(
nsteps=128, nminibatches=4,
lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
ent_coef=.01,
lr=lambda f : f * 2.5e-4,
cliprange=0.1,
)
def retro():
return atari()
def mara_mlp():
return dict(
num_layers = 2,
num_hidden = 16,
layer_norm = False,
nsteps = 1024,
nminibatches = 4, #batchsize = nevn * nsteps // nminibatches
lam = 0.95,
gamma = 0.99,
noptepochs = 10,
log_interval = 1,
ent_coef = 0.0,
lr = lambda f: 3e-3 * f,
cliprange = 0.25,
vf_coef = 1,
max_grad_norm = 0.5,
seed = 0,
value_network = 'copy',
network = 'mlp',
total_timesteps = 1e8,
save_interval = 10,
env_name = 'MARA-v0',
#env_name = 'MARAReal-v0',
#env_name = 'MARAOrient-v0',
# env_name = 'MARACollision-v0',
# env_name = 'MARACollisionOrient-v0',
transfer_path = None,
# transfer_path = '/tmp/ros2learn/MARA-v0/ppo2_mlp/2019-02-19_12h47min/checkpoints/best',
trained_path = '/tmp/ros2learn/MARA-v0/ppo2_mlp/2019-04-02_13h18min/checkpoints/best'
)
def phantomx_mlp():
return dict(
num_layers = 2,
num_hidden = 64,
layer_norm = False,
nsteps = 1024,
nminibatches = 32, #batchsize = nevn * nsteps // nminibatches
lam = 0.95,
gamma = 0.99,
noptepochs = 10,
log_interval = 1,
ent_coef = 0.0,
lr = lambda f: 0.0001,
cliprange = 0.2,
vf_coef = 0.5,
max_grad_norm = 0.5,
seed = 13,
value_network = 'copy',
network = 'mlp',
total_timesteps = 1e8,
save_interval = 10,
target_kl = -1,
env_name = 'PhantomX-v0',
#env_name = 'MARAReal-v0',
#env_name = 'MARAOrient-v0',
# env_name = 'MARACollision-v0',
# env_name = 'MARACollisionOrient-v0',
transfer_path = None,
# transfer_path = '/tmp/ros2learn/MARA-v0/ppo2_mlp/2019-02-19_12h47min/checkpoints/best',
trained_path = '/tmp/ros2learn/PhantomX-v0/ppo2_mlp/2019-04-02_13h18min/checkpoints/best'
)
def mara_lstm():
return dict(
nlstm = 256,
layer_norm = False,
# nbatch = nenvs * nsteps
# nbatch_train = nbatch // nminibatches
# assert nbatch % nminibatches == 0
# assert batchsize == nbatch_train >= nsteps
nsteps = 1024,
#otherwise, last minibatch gets noisy gradient,
nminibatches = 2, #batchsize = nevn * nsteps // nminibatches
lam = 0.95,
gamma = 0.99,
noptepochs = 10,
log_interval = 1,
ent_coef = 0.0,
lr = lambda f: 3e-4 * f,
cliprange = 0.2,
vf_coef = 0.5,
max_grad_norm = 0.5,
seed = 0,
value_network = 'shared',
network = 'lstm',
total_timesteps = 1e8,
save_interval = 10,
env_name = 'MARA-v0',
num_envs = 4,
transfer_path = None,
# transfer_path = '/tmp/ros2learn/MARACollisionOrientRandomTarget-v0/ppo2_lstm/checkpoints/00090',
trained_path = '/tmp/ros2learn/MARACollisionOrientRandomTarget-v0/ppo2_lstm/checkpoints/00090'
)