In [1]:
import ray, json
from ray import tune
from ray.rllib.agents.ppo import PPOTrainer, DEFAULT_CONFIG
from ray.tune.registry import register_env
from flow.multiagent_envs import MultiWaveAttenuationPOEnv
from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder

In [2]:
ray.init(num_cpus=3, logging_level=40, ignore_reinit_error=True)

benchmark_name = 'multi_merge'
benchmark = __import__(
    "flow.benchmarks.%s" % benchmark_name, fromlist=["flow_params"])
flow_params = benchmark.flow_params

In [3]:
# Hyper parameters
config = DEFAULT_CONFIG
num_cpus = 3
num_rollouts = 10
horizon = 750
gae_lambda = 0.97
step_size = 5e-4
sample_batch_size = 750
alg_run = 'PPO'
config["num_workers"] = min(num_cpus, num_rollouts)
config["train_batch_size"] = horizon * num_rollouts
config["sample_batch_size"] = sample_batch_size
config["use_gae"] = True
config["horizon"] = horizon
config["lambda"] = gae_lambda
config["lr"] = step_size
config["vf_clip_param"] = 1e6
config["num_sgd_iter"] = 10
config['clip_actions'] = False  # FIXME(ev) temporary ray bug
config["model"]["fcnet_hiddens"] = [128, 64, 32]
config["observation_filter"] = "NoFilter"
config["entropy_coeff"] = 0.0

# save the flow params for replay
flow_json = json.dumps(
    flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
config['env_config']['flow_params'] = flow_json
config['env_config']['run'] = alg_run

In [4]:
create_env, env_name = make_create_env(flow_params)
register_env(env_name, create_env)
env = create_env()

In [5]:
def my_train_fn(config, reporter):
    agent = PPOTrainer(env=env_name, config=config)
    for _ in range(1):
        result = agent.train()
        reporter(**result)
    state = agent.save()
    agent.stop()

In [6]:
agent = PPOTrainer(env=env_name, config=config)

2019-05-24 07:51:39,534	INFO policy_evaluator.py:311 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
2019-05-24 07:51:40,915	INFO policy_evaluator.py:728 -- Built policy map: {'default_policy': <ray.rllib.agents.ppo.ppo_policy_graph.PPOPolicyGraph object at 0x7ff4a529a5f8>}
2019-05-24 07:51:40,916	INFO policy_evaluator.py:729 -- Built preprocessor map: {'default_policy': <ray.rllib.models.preprocessors.NoPreprocessor object at 0x7ff4a529a198>}
2019-05-24 07:51:40,917	INFO policy_evaluator.py:343 -- Built filter map: {'default_policy': <ray.rllib.utils.filter.NoFilter object at 0x7ff4a529a1d0>}
2019-05-24 07:51:40,976	INFO multi_gpu_optimizer.py:78 -- LocalMultiGPUOptimizer devices ['/cpu:0']


[2m[36m(pid=778)[0m Loading configuration... done.
[2m[36m(pid=778)[0m Success.
[2m[36m(pid=778)[0m Loading configuration... done.
[2m[36m(pid=775)[0m Loading configuration... done.
[2m[36m(pid=775)[0m Success.
[2m[36m(pid=775)[0m Loading configuration... done.
[2m[36m(pid=777)[0m Loading configuration... done.
[2m[36m(pid=777)[0m Success.
[2m[36m(pid=777)[0m Loading configuration... done.
[2m[36m(pid=778)[0m 2019-05-24 07:51:52,212	INFO policy_evaluator.py:311 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=778)[0m 2019-05-24 07:51:52.214541: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 AVX512F FMA
[2m[36m(pid=775)[0m 2019-05-24 07:51:52,330	INFO policy_evaluator.py:311 -- Creating policy evaluation worker 3 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=775)[0m 2019-05-24 07:51:52.332653: I tens

In [11]:
agent.train()

[2m[36m(pid=778)[0m Loading configuration... done.
[2m[36m(pid=778)[0m Success.
[2m[36m(pid=778)[0m Loading configuration... done.
[2m[36m(pid=775)[0m Loading configuration... done.
[2m[36m(pid=775)[0m Success.
[2m[36m(pid=775)[0m Loading configuration... done.
[2m[36m(pid=777)[0m Loading configuration... done.
[2m[36m(pid=777)[0m Success.
[2m[36m(pid=777)[0m Loading configuration... done.
[2m[36m(pid=778)[0m Loading configuration... done.
[2m[36m(pid=778)[0m Success.
[2m[36m(pid=778)[0m Loading configuration... done.
[2m[36m(pid=775)[0m Loading configuration... done.
[2m[36m(pid=775)[0m Success.
[2m[36m(pid=775)[0m Loading configuration... done.
[2m[36m(pid=777)[0m Loading configuration... done.
[2m[36m(pid=777)[0m Success.
[2m[36m(pid=777)[0m Loading configuration... done.
[2m[36m(pid=778)[0m Loading configuration... done.
[2m[36m(pid=778)[0m Success.
[2m[36m(pid=778)[0m Loading configuration... done.
[2m[36m(pid=775)[

{'config': {'batch_mode': 'truncate_episodes',
  'callbacks': {'on_episode_end': None,
   'on_episode_start': None,
   'on_episode_step': None,
   'on_postprocess_traj': None,
   'on_sample_end': None,
   'on_train_result': None},
  'clip_actions': False,
  'clip_param': 0.3,
  'clip_rewards': None,
  'collect_metrics_timeout': 180,
  'compress_observations': False,
  'custom_resources_per_worker': {},
  'entropy_coeff': 0.0,
  'env': 'MultiWaveAttenuationMergePOEnv-v0',
   'run': 'PPO'},
  'gamma': 0.99,
  'grad_clip': None,
  'horizon': 750,
  'ignore_worker_failures': False,
  'input': 'sampler',
  'input_evaluation': ['is', 'wis'],
  'kl_coeff': 0.2,
  'kl_target': 0.01,
  'lambda': 0.97,
  'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8,
   'intra_op_parallelism_threads': 8},
  'log_level': 'INFO',
  'lr': 0.0005,
  'lr_schedule': None,
  'metrics_smoothing_episodes': 100,
  'model': {'conv_activation': 'relu',
   'conv_filters': None,
   'custom_model': None

In [11]:
tune.run(
    my_train_fn,
    resources_per_trial={
        "cpu": 3,
    },
    config=config
)

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 1.5/33.4 GB

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 1.5/33.4 GB
Result logdir: /headless/ray_results/my_train_fn
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - my_train_fn_None_0:	RUNNING

[2m[36m(pid=620)[0m Loading configuration... done.
[2m[36m(pid=620)[0m Success.
[2m[36m(pid=620)[0m Loading configuration... done.
[2m[36m(pid=620)[0m 2019-05-24 07:47:46,585	INFO policy_evaluator.py:311 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=620)[0m 2019-05-24 07:47:46.587282: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 AVX512F FMA
[2m[36m(pid=620)[0m   "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[2m[36m(pid=6

Exception in thread ray_print_logs:
Traceback (most recent call last):
  File "/opt/conda/envs/flow-latest/lib/python3.5/site-packages/redis/connection.py", line 177, in _read_from_socket
    raise socket.error(SERVER_CLOSED_CONNECTION_ERROR)
OSError: Connection closed by server.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/envs/flow-latest/lib/python3.5/site-packages/redis/client.py", line 2408, in _execute
    return command(*args)
  File "/opt/conda/envs/flow-latest/lib/python3.5/site-packages/redis/connection.py", line 624, in read_response
    response = self._parser.read_response()
  File "/opt/conda/envs/flow-latest/lib/python3.5/site-packages/redis/connection.py", line 284, in read_response
    response = self._buffer.readline()
  File "/opt/conda/envs/flow-latest/lib/python3.5/site-packages/redis/connection.py", line 216, in readline
    self._read_from_socket()
  File "/opt/conda/envs/flow-latest/l

KeyboardInterrupt: 