Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make Monitor optional #167

Merged
merged 10 commits into from
Mar 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions examples/atari/reproduction/a3c/train_a3c.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def main():
parser.add_argument("--t-max", type=int, default=5)
parser.add_argument("--beta", type=float, default=1e-2)
parser.add_argument("--profile", action="store_true")
parser.add_argument("--steps", type=int, default=8 * 10 ** 7)
parser.add_argument("--steps", type=int, default=8 * 10**7)
parser.add_argument(
"--max-frames",
type=int,
Expand Down Expand Up @@ -84,15 +84,15 @@ def main():
# If seed=0 and processes=4, subprocess seeds are [0, 1, 2, 3].
# If seed=1 and processes=4, subprocess seeds are [4, 5, 6, 7].
process_seeds = np.arange(args.processes) + args.seed * args.processes
assert process_seeds.max() < 2 ** 31
assert process_seeds.max() < 2**31

args.outdir = experiments.prepare_output_dir(args, args.outdir)
print("Output files are saved in {}".format(args.outdir))

def make_env(process_idx, test):
# Use different random seeds for train and test envs
process_seed = process_seeds[process_idx]
env_seed = 2 ** 31 - 1 - process_seed if test else process_seed
env_seed = 2**31 - 1 - process_seed if test else process_seed
env = atari_wrappers.wrap_deepmind(
atari_wrappers.make_atari(args.env, max_frames=args.max_frames),
episode_life=not test,
Expand Down
12 changes: 6 additions & 6 deletions examples/atari/reproduction/dqn/train_dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,13 @@ def main():
parser.add_argument(
"--steps",
type=int,
default=5 * 10 ** 7,
default=5 * 10**7,
help="Total number of timesteps to train the agent.",
)
parser.add_argument(
"--replay-start-size",
type=int,
default=5 * 10 ** 4,
default=5 * 10**4,
help="Minimum replay buffer size before " + "performing gradient updates.",
)
parser.add_argument("--eval-n-steps", type=int, default=125000)
Expand All @@ -87,7 +87,7 @@ def main():

# Set different random seeds for train and test envs.
train_seed = args.seed
test_seed = 2 ** 31 - 1 - args.seed
test_seed = 2**31 - 1 - args.seed

args.outdir = experiments.prepare_output_dir(args, args.outdir)
print("Output files are saved in {}".format(args.outdir))
Expand Down Expand Up @@ -133,12 +133,12 @@ def make_env(test):
centered=True,
)

rbuf = replay_buffers.ReplayBuffer(10 ** 6)
rbuf = replay_buffers.ReplayBuffer(10**6)

explorer = explorers.LinearDecayEpsilonGreedy(
start_epsilon=1.0,
end_epsilon=0.1,
decay_steps=10 ** 6,
decay_steps=10**6,
random_action_func=lambda: np.random.randint(n_actions),
)

Expand All @@ -155,7 +155,7 @@ def phi(x):
gamma=0.99,
explorer=explorer,
replay_start_size=args.replay_start_size,
target_update_interval=10 ** 4,
target_update_interval=10**4,
clip_delta=True,
update_interval=4,
batch_accumulator="sum",
Expand Down
12 changes: 6 additions & 6 deletions examples/atari/reproduction/iqn/train_iqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,18 @@ def main():
"--pretrained-type", type=str, default="best", choices=["best", "final"]
)
parser.add_argument("--load", type=str, default=None)
parser.add_argument("--final-exploration-frames", type=int, default=10 ** 6)
parser.add_argument("--final-exploration-frames", type=int, default=10**6)
parser.add_argument("--final-epsilon", type=float, default=0.01)
parser.add_argument("--eval-epsilon", type=float, default=0.001)
parser.add_argument("--steps", type=int, default=5 * 10 ** 7)
parser.add_argument("--steps", type=int, default=5 * 10**7)
parser.add_argument(
"--max-frames",
type=int,
default=30 * 60 * 60, # 30 minutes with 60 fps
help="Maximum number of frames for each episode.",
)
parser.add_argument("--replay-start-size", type=int, default=5 * 10 ** 4)
parser.add_argument("--target-update-interval", type=int, default=10 ** 4)
parser.add_argument("--replay-start-size", type=int, default=5 * 10**4)
parser.add_argument("--target-update-interval", type=int, default=10**4)
parser.add_argument("--eval-interval", type=int, default=250000)
parser.add_argument("--eval-n-steps", type=int, default=125000)
parser.add_argument("--update-interval", type=int, default=4)
Expand Down Expand Up @@ -85,7 +85,7 @@ def main():

# Set different random seeds for train and test envs.
train_seed = args.seed
test_seed = 2 ** 31 - 1 - args.seed
test_seed = 2**31 - 1 - args.seed

args.outdir = experiments.prepare_output_dir(args, args.outdir)
print("Output files are saved in {}".format(args.outdir))
Expand Down Expand Up @@ -138,7 +138,7 @@ def make_env(test):
# Use the same hyper parameters as https://arxiv.org/abs/1710.10044
opt = torch.optim.Adam(q_func.parameters(), lr=5e-5, eps=1e-2 / args.batch_size)

rbuf = replay_buffers.ReplayBuffer(10 ** 6)
rbuf = replay_buffers.ReplayBuffer(10**6)

explorer = explorers.LinearDecayEpsilonGreedy(
1.0,
Expand Down
10 changes: 5 additions & 5 deletions examples/atari/reproduction/rainbow/train_rainbow.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ def main():
parser.add_argument("--load", type=str, default=None)
parser.add_argument("--eval-epsilon", type=float, default=0.0)
parser.add_argument("--noisy-net-sigma", type=float, default=0.5)
parser.add_argument("--steps", type=int, default=5 * 10 ** 7)
parser.add_argument("--steps", type=int, default=5 * 10**7)
parser.add_argument(
"--max-frames",
type=int,
default=30 * 60 * 60, # 30 minutes with 60 fps
help="Maximum number of frames for each episode.",
)
parser.add_argument("--replay-start-size", type=int, default=2 * 10 ** 4)
parser.add_argument("--replay-start-size", type=int, default=2 * 10**4)
parser.add_argument("--eval-n-steps", type=int, default=125000)
parser.add_argument("--eval-interval", type=int, default=250000)
parser.add_argument(
Expand Down Expand Up @@ -77,7 +77,7 @@ def main():

# Set different random seeds for train and test envs.
train_seed = args.seed
test_seed = 2 ** 31 - 1 - args.seed
test_seed = 2**31 - 1 - args.seed

args.outdir = experiments.prepare_output_dir(args, args.outdir)
print("Output files are saved in {}".format(args.outdir))
Expand Down Expand Up @@ -123,14 +123,14 @@ def make_env(test):
explorer = explorers.Greedy()

# Use the same hyper parameters as https://arxiv.org/abs/1710.02298
opt = torch.optim.Adam(q_func.parameters(), 6.25e-5, eps=1.5 * 10 ** -4)
opt = torch.optim.Adam(q_func.parameters(), 6.25e-5, eps=1.5 * 10**-4)

# Prioritized Replay
# Anneal beta from beta0 to 1 throughout training
update_interval = 4
betasteps = args.steps / update_interval
rbuf = replay_buffers.PrioritizedReplayBuffer(
10 ** 6,
10**6,
alpha=0.5,
beta0=0.4,
betasteps=betasteps,
Expand Down
8 changes: 4 additions & 4 deletions examples/atari/train_a2c_ale.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def main():
default=30 * 60 * 60, # 30 minutes with 60 fps
help="Maximum number of frames for each episode.",
)
parser.add_argument("--steps", type=int, default=8 * 10 ** 7)
parser.add_argument("--steps", type=int, default=8 * 10**7)
parser.add_argument("--update-steps", type=int, default=5)
parser.add_argument("--lr", type=float, default=7e-4)
parser.add_argument("--gamma", type=float, default=0.99, help="discount factor")
Expand All @@ -43,7 +43,7 @@ def main():
parser.add_argument(
"--alpha", type=float, default=0.99, help="RMSprop optimizer alpha"
)
parser.add_argument("--eval-interval", type=int, default=10 ** 6)
parser.add_argument("--eval-interval", type=int, default=10**6)
parser.add_argument("--eval-n-runs", type=int, default=10)
parser.add_argument("--demo", action="store_true", default=False)
parser.add_argument("--load", type=str, default="")
Expand Down Expand Up @@ -92,15 +92,15 @@ def main():
# If seed=0 and processes=4, subprocess seeds are [0, 1, 2, 3].
# If seed=1 and processes=4, subprocess seeds are [4, 5, 6, 7].
process_seeds = np.arange(args.num_envs) + args.seed * args.num_envs
assert process_seeds.max() < 2 ** 31
assert process_seeds.max() < 2**31

args.outdir = experiments.prepare_output_dir(args, args.outdir)
print("Output files are saved in {}".format(args.outdir))

def make_env(process_idx, test):
# Use different random seeds for train and test envs
process_seed = process_seeds[process_idx]
env_seed = 2 ** 31 - 1 - process_seed if test else process_seed
env_seed = 2**31 - 1 - process_seed if test else process_seed
env = atari_wrappers.wrap_deepmind(
atari_wrappers.make_atari(args.env, max_frames=args.max_frames),
episode_life=not test,
Expand Down
10 changes: 5 additions & 5 deletions examples/atari/train_acer_ale.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,15 @@ def main():
parser.add_argument("--n-times-replay", type=int, default=4)
parser.add_argument("--beta", type=float, default=1e-2)
parser.add_argument("--profile", action="store_true")
parser.add_argument("--steps", type=int, default=10 ** 7)
parser.add_argument("--steps", type=int, default=10**7)
parser.add_argument(
"--max-frames",
type=int,
default=30 * 60 * 60, # 30 minutes with 60 fps
help="Maximum number of frames for each episode.",
)
parser.add_argument("--lr", type=float, default=7e-4)
parser.add_argument("--eval-interval", type=int, default=10 ** 5)
parser.add_argument("--eval-interval", type=int, default=10**5)
parser.add_argument("--eval-n-runs", type=int, default=10)
parser.add_argument("--use-lstm", action="store_true")
parser.add_argument("--demo", action="store_true", default=False)
Expand Down Expand Up @@ -87,7 +87,7 @@ def main():
# If seed=0 and processes=4, subprocess seeds are [0, 1, 2, 3].
# If seed=1 and processes=4, subprocess seeds are [4, 5, 6, 7].
process_seeds = np.arange(args.processes) + args.seed * args.processes
assert process_seeds.max() < 2 ** 31
assert process_seeds.max() < 2**31

args.outdir = experiments.prepare_output_dir(args, args.outdir)
print("Output files are saved in {}".format(args.outdir))
Expand Down Expand Up @@ -130,7 +130,7 @@ def main():
model.parameters(), lr=args.lr, eps=4e-3, alpha=0.99
)

replay_buffer = EpisodicReplayBuffer(10 ** 6 // args.processes)
replay_buffer = EpisodicReplayBuffer(10**6 // args.processes)

def phi(x):
# Feature extractor
Expand All @@ -156,7 +156,7 @@ def phi(x):
def make_env(process_idx, test):
# Use different random seeds for train and test envs
process_seed = process_seeds[process_idx]
env_seed = 2 ** 31 - 1 - process_seed if test else process_seed
env_seed = 2**31 - 1 - process_seed if test else process_seed
env = atari_wrappers.wrap_deepmind(
atari_wrappers.make_atari(args.env, max_frames=args.max_frames),
episode_life=not test,
Expand Down
14 changes: 7 additions & 7 deletions examples/atari/train_categorical_dqn_ale.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,19 @@ def main():
parser.add_argument("--gpu", type=int, default=0)
parser.add_argument("--demo", action="store_true", default=False)
parser.add_argument("--load", type=str, default=None)
parser.add_argument("--final-exploration-frames", type=int, default=10 ** 6)
parser.add_argument("--final-exploration-frames", type=int, default=10**6)
parser.add_argument("--final-epsilon", type=float, default=0.1)
parser.add_argument("--eval-epsilon", type=float, default=0.05)
parser.add_argument("--steps", type=int, default=10 ** 7)
parser.add_argument("--steps", type=int, default=10**7)
parser.add_argument(
"--max-frames",
type=int,
default=30 * 60 * 60, # 30 minutes with 60 fps
help="Maximum number of frames for each episode.",
)
parser.add_argument("--replay-start-size", type=int, default=5 * 10 ** 4)
parser.add_argument("--target-update-interval", type=int, default=10 ** 4)
parser.add_argument("--eval-interval", type=int, default=10 ** 5)
parser.add_argument("--replay-start-size", type=int, default=5 * 10**4)
parser.add_argument("--target-update-interval", type=int, default=10**4)
parser.add_argument("--eval-interval", type=int, default=10**5)
parser.add_argument("--update-interval", type=int, default=4)
parser.add_argument("--eval-n-runs", type=int, default=10)
parser.add_argument("--batch-size", type=int, default=32)
Expand Down Expand Up @@ -71,7 +71,7 @@ def main():

# Set different random seeds for train and test envs.
train_seed = args.seed
test_seed = 2 ** 31 - 1 - args.seed
test_seed = 2**31 - 1 - args.seed

args.outdir = experiments.prepare_output_dir(args, args.outdir)
print("Output files are saved in {}".format(args.outdir))
Expand Down Expand Up @@ -120,7 +120,7 @@ def make_env(test):
# Use the same hyper parameters as https://arxiv.org/abs/1707.06887
opt = torch.optim.Adam(q_func.parameters(), 2.5e-4, eps=1e-2 / args.batch_size)

rbuf = replay_buffers.ReplayBuffer(10 ** 6)
rbuf = replay_buffers.ReplayBuffer(10**6)

explorer = explorers.LinearDecayEpsilonGreedy(
1.0,
Expand Down
16 changes: 8 additions & 8 deletions examples/atari/train_dqn_ale.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def main():
parser.add_argument(
"--final-exploration-frames",
type=int,
default=10 ** 6,
default=10**6,
help="Timesteps after which we stop " + "annealing exploration rate",
)
parser.add_argument(
Expand All @@ -112,7 +112,7 @@ def main():
parser.add_argument(
"--steps",
type=int,
default=5 * 10 ** 7,
default=5 * 10**7,
help="Total number of timesteps to train the agent.",
)
parser.add_argument(
Expand All @@ -124,19 +124,19 @@ def main():
parser.add_argument(
"--replay-start-size",
type=int,
default=5 * 10 ** 4,
default=5 * 10**4,
help="Minimum replay buffer size before " + "performing gradient updates.",
)
parser.add_argument(
"--target-update-interval",
type=int,
default=3 * 10 ** 4,
default=3 * 10**4,
help="Frequency (in timesteps) at which " + "the target network is updated.",
)
parser.add_argument(
"--eval-interval",
type=int,
default=10 ** 5,
default=10**5,
help="Frequency (in timesteps) of evaluation phase.",
)
parser.add_argument(
Expand Down Expand Up @@ -196,7 +196,7 @@ def main():

# Set different random seeds for train and test envs.
train_seed = args.seed
test_seed = 2 ** 31 - 1 - args.seed
test_seed = 2**31 - 1 - args.seed

args.outdir = experiments.prepare_output_dir(args, args.outdir)
print("Output files are saved in {}".format(args.outdir))
Expand Down Expand Up @@ -254,14 +254,14 @@ def make_env(test):
# Anneal beta from beta0 to 1 throughout training
betasteps = args.steps / args.update_interval
rbuf = replay_buffers.PrioritizedReplayBuffer(
10 ** 6,
10**6,
alpha=0.6,
beta0=0.4,
betasteps=betasteps,
num_steps=args.num_step_return,
)
else:
rbuf = replay_buffers.ReplayBuffer(10 ** 6, args.num_step_return)
rbuf = replay_buffers.ReplayBuffer(10**6, args.num_step_return)

def phi(x):
# Feature extractor
Expand Down
Loading