Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RLlib] Fix all example scripts to run on GPUs. #11105

Merged
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
74ef8f4
WIP
sven1977 Aug 26, 2020
c82ae60
Merge branch 'master' of https://github.com/ray-project/ray
sven1977 Aug 29, 2020
8ac9dd0
Merge branch 'master' of https://github.com/ray-project/ray into master
sven1977 Aug 31, 2020
a843af7
Merge branch 'master' of https://github.com/ray-project/ray into master
sven1977 Sep 2, 2020
e1266bb
Merge branch 'master' of https://github.com/ray-project/ray into master
sven1977 Sep 3, 2020
cebe1d9
Merge branch 'master' of https://github.com/ray-project/ray into master
sven1977 Sep 4, 2020
86bfeb6
Merge branch 'master' of https://github.com/ray-project/ray into master
sven1977 Sep 5, 2020
903fae9
Merge branch 'master' of https://github.com/ray-project/ray into master
sven1977 Sep 6, 2020
13509d2
Merge branch 'master' of https://github.com/ray-project/ray into master
sven1977 Sep 9, 2020
ecbf30a
Merge branch 'master' of https://github.com/ray-project/ray into master
sven1977 Sep 9, 2020
8341c6d
Merge branch 'master' of https://github.com/ray-project/ray into master
sven1977 Sep 11, 2020
07ad4f2
Merge branch 'master' of https://github.com/ray-project/ray into master
sven1977 Sep 16, 2020
b917d46
Merge branch 'master' of https://github.com/ray-project/ray into master
sven1977 Sep 17, 2020
1f40e0a
Merge branch 'master' of https://github.com/ray-project/ray into master
sven1977 Sep 17, 2020
a1b73c5
Merge branch 'master' of https://github.com/ray-project/ray into master
sven1977 Sep 17, 2020
9fc078b
Merge branch 'master' of https://github.com/ray-project/ray into master
sven1977 Sep 18, 2020
22d3f43
Merge branch 'master' of https://github.com/ray-project/ray into master
sven1977 Sep 18, 2020
998f0db
Merge branch 'master' of https://github.com/ray-project/ray into master
sven1977 Sep 19, 2020
cef2368
Merge branch 'master' of https://github.com/ray-project/ray
sven1977 Sep 20, 2020
d2f9a52
Merge branch 'master' of https://github.com/ray-project/ray into master
sven1977 Sep 21, 2020
dfbaafb
Merge branch 'master' of https://github.com/ray-project/ray into master
sven1977 Sep 24, 2020
52b7ad0
Merge branch 'master' of https://github.com/ray-project/ray into master
sven1977 Sep 27, 2020
0353bfc
Merge branch 'master' of https://github.com/ray-project/ray
sven1977 Sep 27, 2020
311fc90
Merge branch 'master' of https://github.com/ray-project/ray
sven1977 Sep 28, 2020
6525cf1
Merge branch 'master' of https://github.com/ray-project/ray into master
sven1977 Sep 28, 2020
812f4ae
WIP.
sven1977 Sep 28, 2020
a9aef70
Merge https://github.com/ray-project/ray into fix_example_scripts_to_…
sven1977 Sep 29, 2020
ea412f9
Merge branch 'master' of https://github.com/ray-project/ray into fix_…
sven1977 Oct 2, 2020
700bc8a
LINT.
sven1977 Oct 2, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 0 additions & 6 deletions rllib/agents/ddpg/apex.py
Expand Up @@ -28,13 +28,7 @@
)


def validate_config(config):
if config.get("framework") == "tfe":
raise ValueError("APEX_DDPG does not support tf-eager yet!")


ApexDDPGTrainer = DDPGTrainer.with_updates(
name="APEX_DDPG",
default_config=APEX_DDPG_DEFAULT_CONFIG,
validate_config=validate_config,
execution_plan=apex_execution_plan)
5 changes: 4 additions & 1 deletion rllib/agents/ppo/ppo_tf_policy.py
Expand Up @@ -251,7 +251,10 @@ def __init__(self, config):
self.kl_coeff_val = config["kl_coeff"]
# The current KL value (as tf Variable for in-graph operations).
self.kl_coeff = get_variable(
float(self.kl_coeff_val), tf_name="kl_coeff", trainable=False)
float(self.kl_coeff_val),
tf_name="kl_coeff",
trainable=False,
framework=config["framework"])
# Constant target value.
self.kl_target = config["kl_target"]

Expand Down
2 changes: 1 addition & 1 deletion rllib/agents/ppo/tests/test_ppo.py
Expand Up @@ -37,7 +37,7 @@
class TestPPO(unittest.TestCase):
@classmethod
def setUpClass(cls):
ray.init(local_mode=True)
ray.init()

@classmethod
def tearDownClass(cls):
Expand Down
3 changes: 2 additions & 1 deletion rllib/agents/sac/tests/test_sac.py
Expand Up @@ -166,7 +166,8 @@ def test_sac_loss_function(self):

# Set all weights (of all nets) to fixed values.
if weights_dict is None:
assert fw in ["tf", "tfe"] # Start with the tf vars-dict.
# Start with the tf vars-dict.
assert fw in ["tf2", "tf", "tfe"]
weights_dict = policy.get_weights()
if fw == "tfe":
log_alpha = weights_dict[10]
Expand Down
3 changes: 3 additions & 0 deletions rllib/examples/attention_net.py
@@ -1,4 +1,5 @@
import argparse
import os

import ray
from ray import tune
Expand Down Expand Up @@ -42,6 +43,8 @@
"repeat_delay": 2,
},
"gamma": 0.99,
# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", 0)),
"num_workers": 0,
"num_envs_per_worker": 20,
"entropy_coeff": 0.001,
Expand Down
6 changes: 4 additions & 2 deletions rllib/examples/autoregressive_action_dist.py
Expand Up @@ -11,6 +11,7 @@
"""

import argparse
import os

import ray
from ray import tune
Expand Down Expand Up @@ -44,7 +45,8 @@
config = {
"env": CorrelatedActionsEnv,
"gamma": 0.5,
"num_gpus": 0,
# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
"model": {
"custom_model": "autoregressive_model",
"custom_action_dist": "binary_autoreg_dist",
Expand All @@ -58,7 +60,7 @@
"episode_reward_mean": args.stop_reward,
}

results = tune.run(args.run, stop=stop, config=config)
results = tune.run(args.run, stop=stop, config=config, verbose=1)

if args.as_test:
check_learning_achieved(results, args.stop_reward)
Expand Down
7 changes: 5 additions & 2 deletions rllib/examples/batch_norm_model.py
@@ -1,6 +1,7 @@
"""Example of using a custom model with batch norm."""

import argparse
import os

import ray
from ray import tune
Expand All @@ -22,7 +23,7 @@

if __name__ == "__main__":
args = parser.parse_args()
ray.init(local_mode=True)
ray.init()

ModelCatalog.register_custom_model(
"bn_model", TorchBatchNormModel if args.torch else BatchNormModel)
Expand All @@ -32,6 +33,8 @@
"model": {
"custom_model": "bn_model",
},
# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
"num_workers": 0,
"framework": "torch" if args.torch else "tf",
}
Expand All @@ -42,7 +45,7 @@
"episode_reward_mean": args.stop_reward,
}

results = tune.run(args.run, stop=stop, config=config)
results = tune.run(args.run, stop=stop, config=config, verbose=1)

if args.as_test:
check_learning_achieved(results, args.stop_reward)
Expand Down
8 changes: 6 additions & 2 deletions rllib/examples/cartpole_lstm.py
@@ -1,4 +1,5 @@
import argparse
import os

from ray.rllib.examples.env.stateless_cartpole import StatelessCartPole
from ray.rllib.utils.test_utils import check_learning_achieved
Expand Down Expand Up @@ -35,8 +36,11 @@
}

config = dict(
configs[args.run], **{
configs[args.run],
**{
"env": StatelessCartPole,
# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
"model": {
"use_lstm": True,
"lstm_use_prev_action_reward": args.use_prev_action_reward,
Expand All @@ -50,7 +54,7 @@
"episode_reward_mean": args.stop_reward,
}

results = tune.run(args.run, config=config, stop=stop)
results = tune.run(args.run, config=config, stop=stop, verbose=1)

if args.as_test:
check_learning_achieved(results, args.stop_reward)
Expand Down
25 changes: 18 additions & 7 deletions rllib/examples/centralized_critic.py
Expand Up @@ -16,6 +16,7 @@
import argparse
import numpy as np
from gym.spaces import Discrete
import os

import ray
from ray import tune
Expand Down Expand Up @@ -90,7 +91,7 @@ def centralized_critic_postprocessing(policy,
sample_batch[OPPONENT_OBS], policy.device),
convert_to_torch_tensor(
sample_batch[OPPONENT_ACTION], policy.device)) \
.detach().numpy()
.cpu().detach().numpy()
else:
sample_batch[SampleBatch.VF_PREDS] = policy.compute_central_vf(
sample_batch[SampleBatch.CUR_OBS], sample_batch[OPPONENT_OBS],
Expand Down Expand Up @@ -137,14 +138,22 @@ def loss_with_central_critic(policy, model, dist_class, train_batch):
return loss


def setup_mixins(policy, obs_space, action_space, config):
# copied from PPO
def setup_tf_mixins(policy, obs_space, action_space, config):
# Copied from PPOTFPolicy (w/o ValueNetworkMixin).
KLCoeffMixin.__init__(policy, config)
EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"],
config["entropy_coeff_schedule"])
LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"])


def setup_torch_mixins(policy, obs_space, action_space, config):
# Copied from PPOTorchPolicy (w/o ValueNetworkMixin).
TorchKLCoeffMixin.__init__(policy, config)
TorchEntropyCoeffSchedule.__init__(policy, config["entropy_coeff"],
config["entropy_coeff_schedule"])
TorchLR.__init__(policy, config["lr"], config["lr_schedule"])


def central_vf_stats(policy, train_batch, grads):
# Report the explained variance of the central value function.
return {
Expand All @@ -158,7 +167,7 @@ def central_vf_stats(policy, train_batch, grads):
name="CCPPOTFPolicy",
postprocess_fn=centralized_critic_postprocessing,
loss_fn=loss_with_central_critic,
before_loss_init=setup_mixins,
before_loss_init=setup_tf_mixins,
grad_stats_fn=central_vf_stats,
mixins=[
LearningRateSchedule, EntropyCoeffSchedule, KLCoeffMixin,
Expand All @@ -169,7 +178,7 @@ def central_vf_stats(policy, train_batch, grads):
name="CCPPOTorchPolicy",
postprocess_fn=centralized_critic_postprocessing,
loss_fn=loss_with_central_critic,
before_init=setup_mixins,
before_init=setup_torch_mixins,
mixins=[
TorchLR, TorchEntropyCoeffSchedule, TorchKLCoeffMixin,
CentralizedValueMixin
Expand All @@ -188,7 +197,7 @@ def get_policy_class(config):
)

if __name__ == "__main__":
ray.init(local_mode=True)
ray.init()
args = parser.parse_args()

ModelCatalog.register_custom_model(
Expand All @@ -198,6 +207,8 @@ def get_policy_class(config):
config = {
"env": TwoStepGame,
"batch_mode": "complete_episodes",
# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
"num_workers": 0,
"multiagent": {
"policies": {
Expand All @@ -222,7 +233,7 @@ def get_policy_class(config):
"episode_reward_mean": args.stop_reward,
}

results = tune.run(CCTrainer, config=config, stop=stop)
results = tune.run(CCTrainer, config=config, stop=stop, verbose=1)

if args.as_test:
check_learning_achieved(results, args.stop_reward)
3 changes: 3 additions & 0 deletions rllib/examples/centralized_critic_2.py
Expand Up @@ -12,6 +12,7 @@
import numpy as np
from gym.spaces import Dict, Discrete
import argparse
import os

from ray import tune
from ray.rllib.agents.callbacks import DefaultCallbacks
Expand Down Expand Up @@ -87,6 +88,8 @@ def central_critic_observer(agent_obs, **kw):
"env": TwoStepGame,
"batch_mode": "complete_episodes",
"callbacks": FillInActions,
# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
"num_workers": 0,
"multiagent": {
"policies": {
Expand Down
7 changes: 5 additions & 2 deletions rllib/examples/complex_struct_space.py
Expand Up @@ -8,6 +8,7 @@
"""

import argparse
import os

import ray
from ray import tune
Expand All @@ -18,10 +19,10 @@

parser = argparse.ArgumentParser()
parser.add_argument(
"--framework", choices=["tf", "tfe", "torch"], default="tf")
"--framework", choices=["tf2", "tf", "tfe", "torch"], default="tf2")

if __name__ == "__main__":
ray.init(local_mode=True)
ray.init()
args = parser.parse_args()
if args.framework == "torch":
ModelCatalog.register_custom_model("my_model", CustomTorchRPGModel)
Expand All @@ -37,6 +38,8 @@
"env": SimpleRPG,
"rollout_fragment_length": 1,
"train_batch_size": 2,
# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
"num_workers": 0,
"model": {
"custom_model": "my_model",
Expand Down
3 changes: 3 additions & 0 deletions rllib/examples/custom_env.py
Expand Up @@ -11,6 +11,7 @@
import gym
from gym.spaces import Discrete, Box
import numpy as np
import os

import ray
from ray import tune
Expand Down Expand Up @@ -114,6 +115,8 @@ def value_function(self):
"env_config": {
"corridor_length": 5,
},
# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
"model": {
"custom_model": "my_model",
},
Expand Down
5 changes: 4 additions & 1 deletion rllib/examples/custom_eval.py
Expand Up @@ -67,6 +67,7 @@
"""

import argparse
import os

import ray
from ray import tune
Expand Down Expand Up @@ -137,7 +138,9 @@ def custom_eval_function(trainer, eval_workers):
"corridor_length": 10,
},
"horizon": 20,
"log_level": "INFO",

# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),

# Training rollouts will be collected using just the learner
# process, but evaluation will be done in parallel with two
Expand Down
8 changes: 5 additions & 3 deletions rllib/examples/custom_fast_model.py
Expand Up @@ -5,6 +5,7 @@
"""

import argparse
import os

import ray
import ray.tune as tune
Expand Down Expand Up @@ -32,15 +33,16 @@
"model": {
"custom_model": "fast_model"
},
"num_gpus": 0,
# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
"num_workers": 2,
"num_envs_per_worker": 10,
"num_data_loader_buffers": 1,
"num_aggregation_workers": 1,
"broadcast_interval": 50,
"rollout_fragment_length": 100,
"train_batch_size": sample_from(
lambda spec: 1000 * max(1, spec.config.num_gpus)),
lambda spec: 1000 * max(1, spec.config.num_gpus or 1)),
"fake_sampler": True,
"framework": "torch" if args.torch else "tf",
}
Expand All @@ -50,6 +52,6 @@
"timesteps_total": args.stop_timesteps,
}

tune.run("IMPALA", config=config, stop=stop)
tune.run("IMPALA", config=config, stop=stop, verbose=1)

ray.shutdown()
8 changes: 5 additions & 3 deletions rllib/examples/custom_keras_model.py
@@ -1,6 +1,7 @@
"""Example of using a custom ModelV2 Keras-style model."""

import argparse
import os

import ray
from ray import tune
Expand Down Expand Up @@ -119,11 +120,12 @@ def check_has_custom_metric(result):
args.run,
stop={"episode_reward_mean": args.stop},
config=dict(
extra_config, **{
"log_level": "INFO",
extra_config,
**{
"env": "BreakoutNoFrameskip-v4"
if args.use_vision_network else "CartPole-v0",
"num_gpus": 0,
# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
"callbacks": {
"on_train_result": check_has_custom_metric,
},
Expand Down
4 changes: 3 additions & 1 deletion rllib/examples/custom_loss.py
Expand Up @@ -50,6 +50,8 @@

config = {
"env": "CartPole-v0",
# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
"num_workers": 0,
"model": {
"custom_model": "custom_loss",
Expand All @@ -64,4 +66,4 @@
"training_iteration": args.stop_iters,
}

tune.run("PG", config=config, stop=stop)
tune.run("PG", config=config, stop=stop, verbose=1)