In [1]:
import os, sys
sys.path.append(os.path.join(os.getcwd(), '..'))
sys.path = list(set(sys.path))

from common.evaluate import make_vis_env, test_env

from flow.multiagent_envs import MultiWaveAttenuationMergePOEnv
from flow.scenarios import MergeScenario
from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder
from ray.tune import run_experiments

import gym, ray
from ray.rllib.agents.ppo import PPOAgent, DEFAULT_CONFIG
from ray.tune.registry import register_env
from ray.tune.logger import pretty_print

benchmark_name = 'multi_merge'

In [2]:
alg_run = "PPO"
num_rollouts = 50
num_cpus = 63
gae_lambda = 0.97
step_size = 5e-4

In [3]:
ray.init(num_cpus=num_cpus, include_webui=False, ignore_reinit_error=True)

Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-04-17_06-00-49_261/logs.
Waiting for redis server at 127.0.0.1:54675 to respond...
Waiting for redis server at 127.0.0.1:49507 to respond...
Starting the Plasma object store with 20.0 GB memory using /dev/shm.


{'node_ip_address': '10.138.0.2',
 'object_store_addresses': ['/tmp/ray/session_2019-04-17_06-00-49_261/sockets/plasma_store'],
 'raylet_socket_names': ['/tmp/ray/session_2019-04-17_06-00-49_261/sockets/raylet'],
 'redis_address': '10.138.0.2:54675',
 'webui_url': ''}

In [4]:
benchmark = __import__(
    "flow.benchmarks.%s" % benchmark_name, fromlist=["flow_params"])
flow_params = benchmark.flow_params
horizon = flow_params['env'].horizon

def make_env(create_env):
    def _thunk():
        env = create_env()
        return env
    return _thunk

create_env, env_name = make_create_env(params=flow_params, version=0)

In [5]:
config = DEFAULT_CONFIG.copy()

In [6]:
config["num_workers"] = min(num_cpus, num_rollouts) - 1
config["train_batch_size"] = horizon * num_rollouts
config["use_gae"] = True
config["horizon"] = horizon
config["lambda"] = gae_lambda
config["lr"] = step_size
config["vf_clip_param"] = 1e6
config["num_sgd_iter"] = 10
config['clip_actions'] = False  # FIXME(ev) temporary ray bug
config["model"]["fcnet_hiddens"] = [100, 50, 25]
config["observation_filter"] = "NoFilter"

# save the flow params for replay
flow_json = json.dumps(
    flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
config['env_config']['flow_params'] = flow_json
config['env_config']['run'] = alg_run

In [7]:
# Register as rllib env
register_env(env_name, create_env)

In [8]:
exp_tag = {
    "run": alg_run,
    "env": env_name,
    "config": {
        **config
    },
    "checkpoint_freq": 25,
    "max_failures": 999,
    "stop": {
        "training_iteration": 500
    },
    "num_samples": 3,

}

In [None]:
trials = run_experiments({
        flow_params["exp_tag"]: exp_tag
    })

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/63 CPUs, 0/0 GPUs
Memory usage on this node: 3.0/94.8 GB

Created LogSyncer for /headless/ray_results/multi_merge/PPO_MultiWaveAttenuationMergePOEnv-v0_0_2019-04-17_06-00-52g_2xwa_e -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 3.3/94.8 GB
Result logdir: /headless/ray_results/multi_merge
PENDING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_1:	PENDING
 - PPO_MultiWaveAttenuationMergePOEnv-v0_2:	PENDING
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-17_06-03-02
  done: false
  episode_len_mean: 410.88235294117646
  episode_reward_max: 408.2201457660644
  episode_reward_mean: 185.37389003147123
  episode_reward_min: 5.575368265206399
  episodes_this_iter: 102
  episodes_total: 102
  experiment_id: 8bca9731a4fb412e9ccac6ce0e606964
  host

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 18.1/94.8 GB
Result logdir: /headless/ray_results/multi_merge
PENDING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_1:	PENDING
 - PPO_MultiWaveAttenuationMergePOEnv-v0_2:	PENDING
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=566], 418 s, 5 iter, 1017043 ts, 414 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-17_06-09-39
  done: false
  episode_len_mean: 322.63677130044846
  episode_reward_max: 1537.8385921315187
  episode_reward_mean: 419.5152319637074
  episode_reward_min: 235.75892884553411
  episodes_this_iter: 223
  episodes_total: 1020
  experiment_id: 8bca9731a4fb412e9ccac6ce0e606964
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.025000005960464478
      cur_lr: 0.0005000000237487257
      entropy: 1.3537639379501343
      kl: 0.004741123877465725
      policy_loss: -0.00065

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 18.3/94.8 GB
Result logdir: /headless/ray_results/multi_merge
PENDING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_1:	PENDING
 - PPO_MultiWaveAttenuationMergePOEnv-v0_2:	PENDING
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=566], 822 s, 10 iter, 2000355 ts, 385 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-17_06-16-24
  done: false
  episode_len_mean: 289.20216606498195
  episode_reward_max: 1266.370557465504
  episode_reward_mean: 387.63450172982795
  episode_reward_min: 5.541417074973862
  episodes_this_iter: 277
  episodes_total: 2346
  experiment_id: 8bca9731a4fb412e9ccac6ce0e606964
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.0031250002793967724
      cur_lr: 0.0005000000237487257
      entropy: 1.3117157220840454
      kl: 0.005692962557077408
      policy_loss: -0.0003

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 18.5/94.8 GB
Result logdir: /headless/ray_results/multi_merge
PENDING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_1:	PENDING
 - PPO_MultiWaveAttenuationMergePOEnv-v0_2:	PENDING
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=566], 1224 s, 15 iter, 2977998 ts, 380 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-17_06-23-05
  done: false
  episode_len_mean: 294.42045454545456
  episode_reward_max: 1096.5871848348536
  episode_reward_mean: 399.6196790974582
  episode_reward_min: 5.623138455095354
  episodes_this_iter: 264
  episodes_total: 3680
  experiment_id: 8bca9731a4fb412e9ccac6ce0e606964
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.0031250007450580597
      cur_lr: 0.0005000000237487257
      entropy: 1.2588763236999512
      kl: 0.00578535720705986
      policy_loss: -0.0007

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 18.7/94.8 GB
Result logdir: /headless/ray_results/multi_merge
PENDING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_1:	PENDING
 - PPO_MultiWaveAttenuationMergePOEnv-v0_2:	PENDING
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=566], 1626 s, 20 iter, 3952749 ts, 432 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-17_06-29-47
  done: false
  episode_len_mean: 306.72289156626505
  episode_reward_max: 1593.3599674826921
  episode_reward_mean: 424.46954841022387
  episode_reward_min: 5.582840816849594
  episodes_this_iter: 249
  episodes_total: 4869
  experiment_id: 8bca9731a4fb412e9ccac6ce0e606964
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.001562500256113708
      cur_lr: 0.0005000000237487257
      entropy: 1.1902176141738892
      kl: 0.0057222237810492516
      policy_loss: -0.00

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 18.9/94.8 GB
Result logdir: /headless/ray_results/multi_merge
PENDING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_1:	PENDING
 - PPO_MultiWaveAttenuationMergePOEnv-v0_2:	PENDING
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=566], 2026 s, 25 iter, 4930505 ts, 491 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-17_06-36-26
  done: false
  episode_len_mean: 379.4375
  episode_reward_max: 1586.0854740321763
  episode_reward_mean: 561.2249228866693
  episode_reward_min: 265.9206253857991
  episodes_this_iter: 192
  episodes_total: 6043
  experiment_id: 8bca9731a4fb412e9ccac6ce0e606964
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.0015625003725290298
      cur_lr: 0.0005000000237487257
      entropy: 1.1717679500579834
      kl: 0.005786319728940725
      policy_loss: -9.7012576588895

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 19.0/94.8 GB
Result logdir: /headless/ray_results/multi_merge
PENDING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_1:	PENDING
 - PPO_MultiWaveAttenuationMergePOEnv-v0_2:	PENDING
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=566], 2421 s, 30 iter, 5908094 ts, 559 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-17_06-43-02
  done: false
  episode_len_mean: 383.8333333333333
  episode_reward_max: 1586.989670831219
  episode_reward_mean: 574.0714454697419
  episode_reward_min: 5.604956558130155
  episodes_this_iter: 198
  episodes_total: 7105
  experiment_id: 8bca9731a4fb412e9ccac6ce0e606964
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.000781250128056854
      cur_lr: 0.0005000000237487257
      entropy: 1.140788197517395
      kl: 0.00822516717016697
      policy_loss: -0.00042650

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 19.2/94.8 GB
Result logdir: /headless/ray_results/multi_merge
PENDING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_1:	PENDING
 - PPO_MultiWaveAttenuationMergePOEnv-v0_2:	PENDING
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=566], 2819 s, 35 iter, 6889485 ts, 517 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-17_06-49-40
  done: false
  episode_len_mean: 357.3636363636364
  episode_reward_max: 1921.163569034266
  episode_reward_mean: 519.7238144127398
  episode_reward_min: 5.5856702505576585
  episodes_this_iter: 220
  episodes_total: 8147
  experiment_id: 8bca9731a4fb412e9ccac6ce0e606964
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.000781250128056854
      cur_lr: 0.0005000000237487257
      entropy: 1.1182554960250854
      kl: 0.006332791410386562
      policy_loss: 0.000137

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 19.4/94.8 GB
Result logdir: /headless/ray_results/multi_merge
PENDING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_1:	PENDING
 - PPO_MultiWaveAttenuationMergePOEnv-v0_2:	PENDING
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=566], 3219 s, 40 iter, 7872198 ts, 571 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-17_06-56-20
  done: false
  episode_len_mean: 392.29787234042556
  episode_reward_max: 1753.9917543493236
  episode_reward_mean: 580.2630496587838
  episode_reward_min: 5.595775278868924
  episodes_this_iter: 188
  episodes_total: 9121
  experiment_id: 8bca9731a4fb412e9ccac6ce0e606964
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.000781250128056854
      cur_lr: 0.0005000000237487257
      entropy: 1.100580096244812
      kl: 0.00752174761146307
      policy_loss: -0.000391

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 19.6/94.8 GB
Result logdir: /headless/ray_results/multi_merge
PENDING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_1:	PENDING
 - PPO_MultiWaveAttenuationMergePOEnv-v0_2:	PENDING
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=566], 3613 s, 45 iter, 8859794 ts, 547 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-17_07-02-54
  done: false
  episode_len_mean: 387.10994764397907
  episode_reward_max: 2242.968245377818
  episode_reward_mean: 569.0681197870942
  episode_reward_min: 5.995976163639468
  episodes_this_iter: 191
  episodes_total: 10111
  experiment_id: 8bca9731a4fb412e9ccac6ce0e606964
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.0007812500698491931
      cur_lr: 0.0005000000237487257
      entropy: 1.0360578298568726
      kl: 0.008691069670021534
      policy_loss: -0.000