Skip to content

Commit

Permalink
[RLlib] Fix DDPG learning/release tests and make MARWIL CI test crite…
Browse files Browse the repository at this point in the history
…rium more difficult. (#39386)
  • Loading branch information
sven1977 committed Sep 7, 2023
1 parent f33b8eb commit c324f38
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 8 deletions.
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
ddpg-hopper-v4:
env: Hopper-v4
ddpg-halfcheetah-v4:
env: HalfCheetah-v4
run: DDPG
# Minimum reward and total ts (in given time_total_s) to pass this test.
pass_criteria:
sampler_results/episode_reward_mean: 110.0
timesteps_total: 50000
sampler_results/episode_reward_mean: 1000.0
timesteps_total: 100000
stop:
time_total_s: 1800
time_total_s: 3600
config:
actor_hiddens: [256, 256]
critic_hiddens: [256, 256]
Expand Down
7 changes: 4 additions & 3 deletions rllib/algorithms/marwil/tests/test_marwil.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,16 @@ def test_marwil_compilation_and_learning_from_offline_file(self):
evaluation_parallel_to_training=True,
evaluation_config=marwil.MARWILConfig.overrides(input_="sampler"),
off_policy_estimation_methods={},
always_attach_evaluation_results=True,
)
.offline_data(input_=[data_file])
)

num_iterations = 350
min_reward = 70.0
min_reward = 100.0

# Test for all frameworks.
for _ in framework_iterator(config, frameworks=("tf", "torch")):
for _ in framework_iterator(config, frameworks=("torch", "tf")):
algo = config.build()
learnt = False
for i in range(num_iterations):
Expand Down Expand Up @@ -127,7 +128,7 @@ def test_marwil_cont_actions_from_offline_file(self):
num_iterations = 3

# Test for all frameworks.
for _ in framework_iterator(config, frameworks=("tf", "torch")):
for _ in framework_iterator(config, frameworks=("torch", "tf")):
algo = config.build(env="Pendulum-v1")
for i in range(num_iterations):
print(algo.train())
Expand Down

0 comments on commit c324f38

Please sign in to comment.