In [1]:
from common.evaluate import make_vis_env, test_env

from flow.multiagent_envs import MultiWaveAttenuationMergePOEnv
from flow.scenarios import MergeScenario
from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder
from ray.tune import run_experiments

import gym, ray
from ray.rllib.agents.ppo import PPOAgent, DEFAULT_CONFIG
from ray.tune.registry import register_env
from ray.tune.logger import pretty_print

benchmark_name = 'multi_merge'

In [2]:
alg_run = "PPO"
num_rollouts = 60
num_cpus = 63
gae_lambda = 0.97
step_size = 5e-4

In [3]:
ray.init(num_cpus=num_cpus, include_webui=False, ignore_reinit_error=True)

Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-04-18_22-06-04_113/logs.
Waiting for redis server at 127.0.0.1:22417 to respond...
Waiting for redis server at 127.0.0.1:20317 to respond...
Starting the Plasma object store with 20.0 GB memory using /dev/shm.


{'node_ip_address': '10.138.0.2',
 'object_store_addresses': ['/tmp/ray/session_2019-04-18_22-06-04_113/sockets/plasma_store'],
 'raylet_socket_names': ['/tmp/ray/session_2019-04-18_22-06-04_113/sockets/raylet'],
 'redis_address': '10.138.0.2:22417',
 'webui_url': ''}

In [4]:
benchmark = __import__(
    "flow.benchmarks.%s" % benchmark_name, fromlist=["flow_params"])
flow_params = benchmark.flow_params
horizon = flow_params['env'].horizon

def make_env(create_env):
    def _thunk():
        env = create_env()
        return env
    return _thunk

create_env, env_name = make_create_env(params=flow_params, version=0)

In [5]:
config = DEFAULT_CONFIG.copy()

In [6]:
config["num_workers"] = min(num_cpus, num_rollouts) - 1
config["train_batch_size"] = horizon * num_rollouts
config["use_gae"] = True
config["horizon"] = horizon
config["lambda"] = gae_lambda
config["lr"] = step_size
config["vf_clip_param"] = 1e6
config["num_sgd_iter"] = 10
config['clip_actions'] = False  # FIXME(ev) temporary ray bug
config["model"]["fcnet_hiddens"] = [100, 50, 25]
config["observation_filter"] = "NoFilter"

# save the flow params for replay
flow_json = json.dumps(
    flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
config['env_config']['flow_params'] = flow_json
config['env_config']['run'] = alg_run

In [7]:
# Register as rllib env
register_env(env_name, create_env)

In [8]:
exp_tag = {
    "run": alg_run,
    "env": env_name,
    "config": {
        **config
    },
    "checkpoint_freq": 25,
    "max_failures": 999,
    "stop": {
        "training_iteration": 500
    },
    "num_samples": 1,
}

In [9]:
trials = run_experiments({
        flow_params["exp_tag"]: exp_tag
    })

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/63 CPUs, 0/0 GPUs
Memory usage on this node: 3.0/94.8 GB

Created LogSyncer for /headless/ray_results/multi_merge/PPO_MultiWaveAttenuationMergePOEnv-v0_0_2019-04-18_22-06-061w6z291m -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 3.3/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-07-17
  done: false
  episode_len_mean: .nan
  episode_reward_max: .nan
  episode_reward_mean: .nan
  episode_reward_min: .nan
  episodes_this_iter: 0
  episodes_total: 0
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.20000001788139343
      cur_lr: 0.0005000000237487257
      entropy: 1.4259556531906128
      kl: 0.0009673548629507422

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 18.1/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 156 s, 6 iter, 429678 ts, 438 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-09-34
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 804.927290589831
  episode_reward_mean: 522.8962128665526
  episode_reward_min: 229.04169320792337
  episodes_this_iter: 21
  episodes_total: 126
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.050000011920928955
      cur_lr: 0.0005000000237487257
      entropy: 1.3389524221420288
      kl: 0.00568394735455513
      policy_loss: -0.0016918210312724113
      total_loss: 4.6704840660095215
      vf_explained_var: 0.2818896472454071
      vf_loss: 4.671890735626221
    grad_ti

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 18.1/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 287 s, 12 iter, 833051 ts, 712 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-11-44
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 867.2541512893827
  episode_reward_mean: 712.5467401418788
  episode_reward_min: 627.6192255625829
  episodes_this_iter: 15
  episodes_total: 257
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.006250000558793545
      cur_lr: 0.0005000000237487257
      entropy: 1.3412318229675293
      kl: 0.003936938010156155
      policy_loss: -0.000680316414218396
      total_loss: 5.847048759460449
      vf_explained_var: 0.29600462317466736
      vf_loss: 5.8477044105529785
    grad_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 18.2/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 411 s, 18 iter, 1233112 ts, 710 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-13-48
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 859.8649373384592
  episode_reward_mean: 713.9495546168265
  episode_reward_min: 631.9345002521995
  episodes_this_iter: 23
  episodes_total: 393
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.0007812500698491931
      cur_lr: 0.0005000000237487257
      entropy: 1.2977361679077148
      kl: 0.008466006256639957
      policy_loss: -0.0017513647908344865
      total_loss: 8.044719696044922
      vf_explained_var: 0.17817899584770203
      vf_loss: 8.046463966369629
    gra

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 18.3/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 541 s, 24 iter, 1631942 ts, 716 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-15-58
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 868.5971910197102
  episode_reward_mean: 721.2379721240586
  episode_reward_min: 632.431694521841
  episodes_this_iter: 21
  episodes_total: 522
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.000390625064028427
      cur_lr: 0.0005000000237487257
      entropy: 1.2261481285095215
      kl: 0.007468735333532095
      policy_loss: -0.0009513133554719388
      total_loss: 8.856468200683594
      vf_explained_var: 0.20327399671077728
      vf_loss: 8.857416152954102
    grad_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 18.3/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 669 s, 30 iter, 2034145 ts, 718 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-18-05
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 835.9112233653974
  episode_reward_mean: 721.5047835533755
  episode_reward_min: 635.3811509067392
  episodes_this_iter: 18
  episodes_total: 654
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.00039062503492459655
      cur_lr: 0.0005000000237487257
      entropy: 1.1538437604904175
      kl: 0.0065783169120550156
      policy_loss: -0.0009154014987871051
      total_loss: 7.0988383293151855
      vf_explained_var: 0.22919858992099762
      vf_loss: 7.099751949310303
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 18.4/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 793 s, 36 iter, 2434794 ts, 720 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-20-10
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 848.3543264967008
  episode_reward_mean: 720.1884417562485
  episode_reward_min: 651.4896970596085
  episodes_this_iter: 22
  episodes_total: 787
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.00039062503492459655
      cur_lr: 0.0005000000237487257
      entropy: 1.1184953451156616
      kl: 0.0076051256619393826
      policy_loss: -0.0004847039526794106
      total_loss: 8.633380889892578
      vf_explained_var: 0.19851122796535492
      vf_loss: 8.633862495422363
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 18.5/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 922 s, 42 iter, 2836430 ts, 721 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-22-19
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 823.0186520789111
  episode_reward_mean: 722.0948175451937
  episode_reward_min: 633.7272506832443
  episodes_this_iter: 24
  episodes_total: 915
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.00039062503492459655
      cur_lr: 0.0005000000237487257
      entropy: 1.101391077041626
      kl: 0.00786697119474411
      policy_loss: -0.0003935389104299247
      total_loss: 8.483076095581055
      vf_explained_var: 0.19879426062107086
      vf_loss: 8.483466148376465
    grad

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 18.5/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 1054 s, 48 iter, 3239689 ts, 719 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-24-33
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 824.3851115348797
  episode_reward_mean: 717.6475071778805
  episode_reward_min: 636.5331926843066
  episodes_this_iter: 16
  episodes_total: 1044
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.00039062503492459655
      cur_lr: 0.0005000000237487257
      entropy: 1.040615200996399
      kl: 0.0095206992700696
      policy_loss: -0.0006322042900137603
      total_loss: 6.081418991088867
      vf_explained_var: 0.22405536472797394
      vf_loss: 6.082047462463379
    gra

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 18.6/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 1188 s, 54 iter, 3642617 ts, 715 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-26-48
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 803.7381148338976
  episode_reward_mean: 716.1443320843388
  episode_reward_min: 644.6135935325459
  episodes_this_iter: 21
  episodes_total: 1177
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.000390625064028427
      cur_lr: 0.0005000000237487257
      entropy: 0.9900717735290527
      kl: 0.009636969305574894
      policy_loss: -0.001138628926128149
      total_loss: 7.49955940246582
      vf_explained_var: 0.13449276983737946
      vf_loss: 7.500694274902344
    grad

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 18.7/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 1326 s, 60 iter, 4041860 ts, 715 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-29-05
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 813.6390755616054
  episode_reward_mean: 712.1129148087854
  episode_reward_min: 637.694384076691
  episodes_this_iter: 18
  episodes_total: 1305
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.000390625064028427
      cur_lr: 0.0005000000237487257
      entropy: 0.9289893507957458
      kl: 0.00840780045837164
      policy_loss: 0.0003156400634907186
      total_loss: 6.062649250030518
      vf_explained_var: 0.1494905948638916
      vf_loss: 6.06233024597168
    grad_ti

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 18.7/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 1461 s, 66 iter, 4443900 ts, 720 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-31-21
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 813.4532704379969
  episode_reward_mean: 718.5452102409442
  episode_reward_min: 650.3182427732977
  episodes_this_iter: 22
  episodes_total: 1439
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.000390625064028427
      cur_lr: 0.0005000000237487257
      entropy: 0.8963829278945923
      kl: 0.008718255907297134
      policy_loss: -0.00024698811466805637
      total_loss: 7.524872779846191
      vf_explained_var: 0.15285784006118774
      vf_loss: 7.525116443634033
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 18.8/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 1593 s, 72 iter, 4846567 ts, 727 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-33-31
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 810.3208932713303
  episode_reward_mean: 724.8688665004078
  episode_reward_min: 628.0072087266018
  episodes_this_iter: 22
  episodes_total: 1569
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.000390625064028427
      cur_lr: 0.0005000000237487257
      entropy: 0.86766517162323
      kl: 0.01269780658185482
      policy_loss: -0.0012995530851185322
      total_loss: 7.488515377044678
      vf_explained_var: 0.11811187863349915
      vf_loss: 7.489809989929199
    grad_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 18.9/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 1720 s, 78 iter, 5248665 ts, 718 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-35-39
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 827.9388391680243
  episode_reward_mean: 717.9932445557258
  episode_reward_min: 637.8546024683811
  episodes_this_iter: 19
  episodes_total: 1698
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.00039062503492459655
      cur_lr: 0.0005000000237487257
      entropy: 0.8153262138366699
      kl: 0.011043911799788475
      policy_loss: -0.00047467771219089627
      total_loss: 6.340322017669678
      vf_explained_var: 0.13885104656219482
      vf_loss: 6.340792179107666
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 18.9/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 1848 s, 84 iter, 5652757 ts, 720 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-37-46
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 815.1687042807766
  episode_reward_mean: 718.3161601286731
  episode_reward_min: 650.8586402789601
  episodes_this_iter: 18
  episodes_total: 1828
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.00039062503492459655
      cur_lr: 0.0005000000237487257
      entropy: 0.8056422472000122
      kl: 0.013444586656987667
      policy_loss: -8.642516331747174e-05
      total_loss: 6.684875965118408
      vf_explained_var: 0.16165313124656677
      vf_loss: 6.6849565505981445
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 19.0/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 1974 s, 90 iter, 6057679 ts, 710 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-39-52
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 824.5826907814301
  episode_reward_mean: 710.0211774425552
  episode_reward_min: 626.6543724066662
  episodes_this_iter: 20
  episodes_total: 1957
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.000390625064028427
      cur_lr: 0.0005000000237487257
      entropy: 0.7876643538475037
      kl: 0.011342479847371578
      policy_loss: -0.0005239949095994234
      total_loss: 5.697430610656738
      vf_explained_var: 0.1428690403699875
      vf_loss: 5.69795036315918
    grad

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 19.1/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 2104 s, 96 iter, 6459208 ts, 700 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-42-03
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 809.8135584995346
  episode_reward_mean: 702.3210353295751
  episode_reward_min: 639.8775620095021
  episodes_this_iter: 17
  episodes_total: 2086
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.00039062503492459655
      cur_lr: 0.0005000000237487257
      entropy: 0.7509114742279053
      kl: 0.011643586680293083
      policy_loss: -2.038184720731806e-05
      total_loss: 4.720218658447266
      vf_explained_var: 0.21355174481868744
      vf_loss: 4.7202348709106445
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 19.1/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 2233 s, 102 iter, 6863115 ts, 688 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-44-12
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 791.1579654915624
  episode_reward_mean: 690.089073628091
  episode_reward_min: 597.4160587954005
  episodes_this_iter: 22
  episodes_total: 2219
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.00039062509313225746
      cur_lr: 0.0005000000237487257
      entropy: 0.7276197075843811
      kl: 0.01435907930135727
      policy_loss: -0.0015834018122404814
      total_loss: 5.610275745391846
      vf_explained_var: 0.22785277664661407
      vf_loss: 5.61185359954834
    gr

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 19.2/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 2364 s, 108 iter, 7265375 ts, 690 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-46-23
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 785.3578424771343
  episode_reward_mean: 688.1211109645052
  episode_reward_min: 580.6419721637903
  episodes_this_iter: 23
  episodes_total: 2348
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.000390625064028427
      cur_lr: 0.0005000000237487257
      entropy: 0.6602105498313904
      kl: 0.012181520462036133
      policy_loss: -4.74163462058641e-05
      total_loss: 4.896727085113525
      vf_explained_var: 0.44289085268974304
      vf_loss: 4.8967695236206055
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 19.3/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 2495 s, 114 iter, 7666869 ts, 676 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-48-34
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 781.392400317557
  episode_reward_mean: 681.9405597625955
  episode_reward_min: 579.6470737442093
  episodes_this_iter: 20
  episodes_total: 2478
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.00039062503492459655
      cur_lr: 0.0005000000237487257
      entropy: 0.589683473110199
      kl: 0.01173361111432314
      policy_loss: -0.0005883036646991968
      total_loss: 5.158384799957275
      vf_explained_var: 0.5062671899795532
      vf_loss: 5.158968448638916
    gra

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 19.3/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 2623 s, 120 iter, 8068080 ts, 685 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-50-43
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 775.0504175904008
  episode_reward_mean: 680.3795205292668
  episode_reward_min: 575.5439462034225
  episodes_this_iter: 23
  episodes_total: 2611
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.00039062509313225746
      cur_lr: 0.0005000000237487257
      entropy: 0.5695179104804993
      kl: 0.013060023076832294
      policy_loss: 0.00012231717118993402
      total_loss: 5.20456075668335
      vf_explained_var: 0.4684646427631378
      vf_loss: 5.204433441162109
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 19.4/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 2754 s, 126 iter, 8473740 ts, 688 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-52-52
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 816.8332484302144
  episode_reward_mean: 681.2091847001483
  episode_reward_min: 577.7435801031666
  episodes_this_iter: 27
  episodes_total: 2742
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.00039062503492459655
      cur_lr: 0.0005000000237487257
      entropy: 0.5328915119171143
      kl: 0.012779802083969116
      policy_loss: 0.000991680077277124
      total_loss: 5.626291275024414
      vf_explained_var: 0.3818090260028839
      vf_loss: 5.6252946853637695
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 19.5/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 2881 s, 132 iter, 8880272 ts, 687 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-55-00
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 815.4887541315358
  episode_reward_mean: 687.4744534782636
  episode_reward_min: 578.7265278856194
  episodes_this_iter: 28
  episodes_total: 2872
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.000390625064028427
      cur_lr: 0.0005000000237487257
      entropy: 0.47316378355026245
      kl: 0.016089092940092087
      policy_loss: 0.0007120269583538175
      total_loss: 5.7807393074035645
      vf_explained_var: 0.29015201330184937
      vf_loss: 5.780020713806152
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 19.5/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 3012 s, 138 iter, 9284655 ts, 706 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-57-11
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 809.4028240917247
  episode_reward_mean: 708.076291914575
  episode_reward_min: 575.7675314840427
  episodes_this_iter: 26
  episodes_total: 3002
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.00039062503492459655
      cur_lr: 0.0005000000237487257
      entropy: 0.4416373670101166
      kl: 0.01571008376777172
      policy_loss: 0.0008541655843146145
      total_loss: 6.126709461212158
      vf_explained_var: 0.2885341942310333
      vf_loss: 6.125848293304443
    gra

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 19.6/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 3134 s, 144 iter, 9688092 ts, 706 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_22-59-14
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 808.345215709566
  episode_reward_mean: 707.4622631780135
  episode_reward_min: 583.4321484708297
  episodes_this_iter: 24
  episodes_total: 3130
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.00039062503492459655
      cur_lr: 0.0005000000237487257
      entropy: 0.3838486075401306
      kl: 0.017678620293736458
      policy_loss: 0.00018235189781989902
      total_loss: 6.280660629272461
      vf_explained_var: 0.2917807996273041
      vf_loss: 6.280471324920654
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 19.6/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 3262 s, 150 iter, 10091623 ts, 725 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-01-22
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 823.557972278711
  episode_reward_mean: 723.8272399904771
  episode_reward_min: 582.3912883485197
  episodes_this_iter: 19
  episodes_total: 3255
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.00039062503492459655
      cur_lr: 0.0005000000237487257
      entropy: 0.37642303109169006
      kl: 0.017290722578763962
      policy_loss: 0.0010190692264586687
      total_loss: 4.907867908477783
      vf_explained_var: 0.35142630338668823
      vf_loss: 4.906842231750488
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 19.7/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 3393 s, 156 iter, 10498823 ts, 708 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-03-32
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 811.1028824027611
  episode_reward_mean: 704.6573318125
  episode_reward_min: 589.9169241743809
  episodes_this_iter: 23
  episodes_total: 3389
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.0005859374650754035
      cur_lr: 0.0005000000237487257
      entropy: 0.2938481867313385
      kl: 0.01943603716790676
      policy_loss: 0.0012905859621241689
      total_loss: 6.046080112457275
      vf_explained_var: 0.29591211676597595
      vf_loss: 6.044777870178223
    grad

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 19.8/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 3518 s, 162 iter, 10902766 ts, 704 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-05-38
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 813.6029852896851
  episode_reward_mean: 708.8595886895497
  episode_reward_min: 586.5829130502634
  episodes_this_iter: 19
  episodes_total: 3516
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.001977538922801614
      cur_lr: 0.0005000000237487257
      entropy: 0.2362212836742401
      kl: 0.018812885507941246
      policy_loss: 0.0020212691742926836
      total_loss: 5.337469577789307
      vf_explained_var: 0.45703625679016113
      vf_loss: 5.335411071777344
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 19.8/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 3645 s, 168 iter, 11310156 ts, 706 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-07-44
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 809.7362373684139
  episode_reward_mean: 709.4707143373371
  episode_reward_min: 618.1768447450834
  episodes_this_iter: 19
  episodes_total: 3642
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.002966308733448386
      cur_lr: 0.0005000000237487257
      entropy: 0.15615913271903992
      kl: 0.023787587881088257
      policy_loss: 0.0038821366615593433
      total_loss: 5.783656597137451
      vf_explained_var: 0.5583696961402893
      vf_loss: 5.779703617095947
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 19.9/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 3771 s, 174 iter, 11719661 ts, 711 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-09-51
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 814.3783250421228
  episode_reward_mean: 709.4568433820995
  episode_reward_min: 611.5917540162394
  episodes_this_iter: 20
  episodes_total: 3775
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.01001129113137722
      cur_lr: 0.0005000000237487257
      entropy: 0.12449343502521515
      kl: 0.019018666818737984
      policy_loss: 0.002105157356709242
      total_loss: 5.519172191619873
      vf_explained_var: 0.5425777435302734
      vf_loss: 5.516877174377441
    gra

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 19.9/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 3897 s, 180 iter, 12126954 ts, 709 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-11-58
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 811.8602953217976
  episode_reward_mean: 706.2878914072172
  episode_reward_min: 613.4496551490771
  episodes_this_iter: 27
  episodes_total: 3907
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.022525401785969734
      cur_lr: 0.0005000000237487257
      entropy: 0.09539032727479935
      kl: 0.022282084450125694
      policy_loss: 0.0027395414654165506
      total_loss: 6.223771572113037
      vf_explained_var: 0.4761982262134552
      vf_loss: 6.220530033111572
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 20.0/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 4024 s, 186 iter, 12534612 ts, 718 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-14-05
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 810.0707598791296
  episode_reward_mean: 716.5732021725883
  episode_reward_min: 642.6867284243907
  episodes_this_iter: 25
  episodes_total: 4035
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.05068216845393181
      cur_lr: 0.0005000000237487257
      entropy: 0.08109986782073975
      kl: 0.017222819849848747
      policy_loss: 0.002795787760987878
      total_loss: 6.86796236038208
      vf_explained_var: 0.5064651370048523
      vf_loss: 6.864293575286865
    grad

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 20.1/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 4151 s, 192 iter, 12940039 ts, 713 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-16-13
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 809.1445833637164
  episode_reward_mean: 714.9098868356444
  episode_reward_min: 637.0457747523635
  episodes_this_iter: 20
  episodes_total: 4163
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.07602324336767197
      cur_lr: 0.0005000000237487257
      entropy: 0.06273277848958969
      kl: 0.016410471871495247
      policy_loss: 0.002140437951311469
      total_loss: 5.406099319458008
      vf_explained_var: 0.5752403736114502
      vf_loss: 5.402710914611816
    gra

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 20.3/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 4281 s, 198 iter, 13347545 ts, 710 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-18-21
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 811.6223130877321
  episode_reward_mean: 713.1709947370397
  episode_reward_min: 649.8337763408389
  episodes_this_iter: 22
  episodes_total: 4292
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.07602323591709137
      cur_lr: 0.0005000000237487257
      entropy: 0.02529989182949066
      kl: 0.017662562429904938
      policy_loss: 0.001183154177851975
      total_loss: 5.859461784362793
      vf_explained_var: 0.5352389812469482
      vf_loss: 5.856936454772949
    gra

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 20.3/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 4403 s, 204 iter, 13754625 ts, 705 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-20-24
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 803.9596722901039
  episode_reward_mean: 704.7786885347455
  episode_reward_min: 637.0618689250958
  episodes_this_iter: 25
  episodes_total: 4423
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.11403488367795944
      cur_lr: 0.0005000000237487257
      entropy: 0.01844453252851963
      kl: 0.020289894193410873
      policy_loss: 0.005019973497837782
      total_loss: 5.824170112609863
      vf_explained_var: 0.5477977395057678
      vf_loss: 5.816835880279541
    gra

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 20.4/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 4531 s, 210 iter, 14160688 ts, 708 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-22-32
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 808.5754219822354
  episode_reward_mean: 704.8464666921428
  episode_reward_min: 613.9360616746129
  episodes_this_iter: 20
  episodes_total: 4549
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.17105229198932648
      cur_lr: 0.0005000000237487257
      entropy: 0.046108733862638474
      kl: 0.015647614374756813
      policy_loss: 0.00024786408175714314
      total_loss: 4.9173583984375
      vf_explained_var: 0.5070381760597229
      vf_loss: 4.914434432983398
    gr

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 20.5/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 4660 s, 216 iter, 14571108 ts, 712 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-24-41
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 811.4514888163103
  episode_reward_mean: 713.6450204665182
  episode_reward_min: 649.1500789550203
  episodes_this_iter: 18
  episodes_total: 4678
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.17105230689048767
      cur_lr: 0.0005000000237487257
      entropy: 0.00579097680747509
      kl: 0.013257944956421852
      policy_loss: 0.0020603861194103956
      total_loss: 5.4436163902282715
      vf_explained_var: 0.3533237874507904
      vf_loss: 5.439288139343262
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 20.6/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 4787 s, 222 iter, 14977123 ts, 714 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-26-48
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 809.2302126352844
  episode_reward_mean: 716.7163272368495
  episode_reward_min: 653.6785009194226
  episodes_this_iter: 18
  episodes_total: 4808
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.030169764533638954
      kl: 0.011062887497246265
      policy_loss: 0.0002178520808229223
      total_loss: 5.694538116455078
      vf_explained_var: 0.28849735856056213
      vf_loss: 5.691482067108154
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 20.6/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 4918 s, 228 iter, 15385305 ts, 717 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-28-59
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 808.8799098803689
  episode_reward_mean: 719.6632655883493
  episode_reward_min: 667.1143406516654
  episodes_this_iter: 22
  episodes_total: 4937
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.005465568974614143
      kl: 0.011800854466855526
      policy_loss: 0.0012942289467900991
      total_loss: 6.996417045593262
      vf_explained_var: 0.2206825315952301
      vf_loss: 6.992094039916992
    gr

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 20.7/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 5042 s, 234 iter, 15791002 ts, 723 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-31-04
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 813.2182550758819
  episode_reward_mean: 721.4495446137189
  episode_reward_min: 661.2643487292181
  episodes_this_iter: 21
  episodes_total: 5068
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: -0.01542965043336153
      kl: 0.012326011434197426
      policy_loss: 0.00021387611923273653
      total_loss: 7.086365222930908
      vf_explained_var: 0.20400767028331757
      vf_loss: 7.08298921585083
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 20.7/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 5170 s, 240 iter, 16197894 ts, 725 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-33-14
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 812.3929111925545
  episode_reward_mean: 727.5578846306203
  episode_reward_min: 654.0510810193447
  episodes_this_iter: 17
  episodes_total: 5195
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: -0.018156394362449646
      kl: 0.012762579135596752
      policy_loss: 0.0009888000786304474
      total_loss: 5.813193321228027
      vf_explained_var: 0.24250952899456024
      vf_loss: 5.808930397033691
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 20.8/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 5301 s, 246 iter, 16603615 ts, 725 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-35-22
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 817.1047325500517
  episode_reward_mean: 727.3097930869409
  episode_reward_min: 672.6911567691834
  episodes_this_iter: 21
  episodes_total: 5327
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: -0.000794524559751153
      kl: 0.012776337563991547
      policy_loss: 0.0023346012458205223
      total_loss: 7.080772876739502
      vf_explained_var: 0.20333242416381836
      vf_loss: 7.075160503387451
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 20.9/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 5424 s, 252 iter, 17010162 ts, 726 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-37-26
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 812.4280565622305
  episode_reward_mean: 726.1140321541761
  episode_reward_min: 651.5713233559079
  episodes_this_iter: 21
  episodes_total: 5455
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784752368927
      cur_lr: 0.0005000000237487257
      entropy: -0.023963499814271927
      kl: 0.0119752436876297
      policy_loss: -0.0005697443266399205
      total_loss: 6.979336261749268
      vf_explained_var: 0.17231632769107819
      vf_loss: 6.976833343505859
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 20.9/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 5553 s, 258 iter, 17416824 ts, 734 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-39-33
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 816.55585182962
  episode_reward_mean: 735.6288765631158
  episode_reward_min: 672.5947715624578
  episodes_this_iter: 21
  episodes_total: 5586
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: -0.03540658950805664
      kl: 0.013385183177888393
      policy_loss: 0.0012897792039439082
      total_loss: 6.607461452484131
      vf_explained_var: 0.2622574269771576
      vf_loss: 6.602736949920654
    grad

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 21.0/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 5678 s, 264 iter, 17821011 ts, 732 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-41-39
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 810.4109933212259
  episode_reward_mean: 731.9867072072901
  episode_reward_min: 663.0446676302482
  episodes_this_iter: 23
  episodes_total: 5720
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: -0.03491848334670067
      kl: 0.014158603735268116
      policy_loss: 0.0024470260832458735
      total_loss: 6.389280319213867
      vf_explained_var: 0.2710864543914795
      vf_loss: 6.383200168609619
    gr

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 21.1/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 5801 s, 270 iter, 18224913 ts, 740 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-43-43
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 809.5558845229478
  episode_reward_mean: 742.3353796941748
  episode_reward_min: 669.0367079427352
  episodes_this_iter: 26
  episodes_total: 5850
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: -0.007496560458093882
      kl: 0.015211720950901508
      policy_loss: 0.0018449620110914111
      total_loss: 7.50508451461792
      vf_explained_var: 0.27333641052246094
      vf_loss: 7.499335765838623
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 21.1/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 5926 s, 276 iter, 18629714 ts, 729 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-45-47
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 810.047668375701
  episode_reward_mean: 727.8016861599832
  episode_reward_min: 639.3903262796736
  episodes_this_iter: 22
  episodes_total: 5978
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: -0.02126471884548664
      kl: 0.013503136113286018
      policy_loss: -0.00021194317378103733
      total_loss: 7.016974925994873
      vf_explained_var: 0.29172560572624207
      vf_loss: 7.013721942901611
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 21.2/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 6051 s, 282 iter, 19034008 ts, 728 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-47-54
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 807.8460365207196
  episode_reward_mean: 729.1043400956121
  episode_reward_min: 641.4310495499849
  episodes_this_iter: 18
  episodes_total: 6106
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784752368927
      cur_lr: 0.0005000000237487257
      entropy: -0.03871878236532211
      kl: 0.013153751380741596
      policy_loss: 0.0028958951588720083
      total_loss: 5.6169281005859375
      vf_explained_var: 0.3330724239349365
      vf_loss: 5.61065673828125
    gr

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 21.3/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 6181 s, 288 iter, 19438353 ts, 731 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-50-03
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 810.2145020126568
  episode_reward_mean: 729.0661913460744
  episode_reward_min: 648.5824441385746
  episodes_this_iter: 13
  episodes_total: 6232
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: -0.026325950399041176
      kl: 0.012435675598680973
      policy_loss: 0.0009881255682557821
      total_loss: 4.3122968673706055
      vf_explained_var: 0.3773072361946106
      vf_loss: 4.3081183433532715
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 21.3/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 6303 s, 294 iter, 19844766 ts, 729 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-52-06
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 808.0079155712594
  episode_reward_mean: 728.7255550066284
  episode_reward_min: 669.4595788809133
  episodes_this_iter: 23
  episodes_total: 6367
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: -7.545336848124862e-05
      kl: 0.010936446487903595
      policy_loss: 0.0013064068043604493
      total_loss: 7.197805404663086
      vf_explained_var: 0.3084748089313507
      vf_loss: 7.193692684173584
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 21.4/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 6431 s, 300 iter, 20248557 ts, 739 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-54-13
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 810.3983408087878
  episode_reward_mean: 735.458891063209
  episode_reward_min: 642.6288012717248
  episodes_this_iter: 24
  episodes_total: 6497
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784156322479
      cur_lr: 0.0005000000237487257
      entropy: -0.03741341829299927
      kl: 0.012848212383687496
      policy_loss: 7.750965596642345e-05
      total_loss: 7.4443559646606445
      vf_explained_var: 0.3255677819252014
      vf_loss: 7.440981864929199
    gr

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 21.5/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 6558 s, 306 iter, 20652244 ts, 733 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-56-21
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 808.7351426885264
  episode_reward_mean: 728.0728523390524
  episode_reward_min: 671.8807335681857
  episodes_this_iter: 21
  episodes_total: 6625
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: -0.06038561090826988
      kl: 0.014565293677151203
      policy_loss: 0.001581083401106298
      total_loss: 6.405945777893066
      vf_explained_var: 0.2669451832771301
      vf_loss: 6.400628089904785
    gra

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 21.5/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 6686 s, 312 iter, 21056153 ts, 734 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-18_23-58-28
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 809.650577686309
  episode_reward_mean: 731.6836864625203
  episode_reward_min: 664.1031222109771
  episodes_this_iter: 19
  episodes_total: 6754
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: -0.052945565432310104
      kl: 0.01200665719807148
      policy_loss: 0.001151815289631486
      total_loss: 6.028656959533691
      vf_explained_var: 0.30661001801490784
      vf_loss: 6.0244245529174805
    gr

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 21.6/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 6812 s, 318 iter, 21460142 ts, 734 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-00-36
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 808.3790228633895
  episode_reward_mean: 728.8873625335898
  episode_reward_min: 657.7672436867837
  episodes_this_iter: 22
  episodes_total: 6885
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: -0.046822503209114075
      kl: 0.011460646986961365
      policy_loss: 0.0006217348272912204
      total_loss: 6.93433952331543
      vf_explained_var: 0.32013964653015137
      vf_loss: 6.9307780265808105
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 21.6/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 6939 s, 324 iter, 21864244 ts, 730 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-02-41
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 807.887221966622
  episode_reward_mean: 734.7055438147346
  episode_reward_min: 654.0468914519645
  episodes_this_iter: 22
  episodes_total: 7016
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: -0.019512509927153587
      kl: 0.013253548182547092
      policy_loss: 0.0018124933121725917
      total_loss: 6.380687236785889
      vf_explained_var: 0.3366650938987732
      vf_loss: 6.375474452972412
    gr

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 21.7/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 7067 s, 330 iter, 22267663 ts, 718 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-04-50
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 808.5956537671264
  episode_reward_mean: 721.2221460472766
  episode_reward_min: 646.4923282398343
  episodes_this_iter: 21
  episodes_total: 7146
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784156322479
      cur_lr: 0.0005000000237487257
      entropy: 0.024109510704874992
      kl: 0.012477106414735317
      policy_loss: 0.0005933318170718849
      total_loss: 5.68966817855835
      vf_explained_var: 0.35647398233413696
      vf_loss: 5.685873508453369
    gr

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 21.7/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 7195 s, 336 iter, 22673233 ts, 724 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-06-59
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 809.5972360899623
  episode_reward_mean: 728.0246289534949
  episode_reward_min: 654.8635486994317
  episodes_this_iter: 21
  episodes_total: 7276
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.003908282611519098
      kl: 0.011937892064452171
      policy_loss: 0.0013366243802011013
      total_loss: 6.208986282348633
      vf_explained_var: 0.32227471470832825
      vf_loss: 6.204586505889893
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 21.8/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 7321 s, 342 iter, 23078853 ts, 727 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-09-03
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 808.1678913145274
  episode_reward_mean: 723.1609548098634
  episode_reward_min: 657.9220446437192
  episodes_this_iter: 24
  episodes_total: 7408
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.018504509702324867
      kl: 0.010882002301514149
      policy_loss: 0.00103132170625031
      total_loss: 6.607147693634033
      vf_explained_var: 0.2298516482114792
      vf_loss: 6.6033244132995605
    gra

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 21.9/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 7448 s, 348 iter, 23485304 ts, 730 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-11-12
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 808.8850869049048
  episode_reward_mean: 734.2472209110356
  episode_reward_min: 663.4580742480791
  episodes_this_iter: 20
  episodes_total: 7534
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: -0.016762524843215942
      kl: 0.012634427286684513
      policy_loss: 0.0006030543590895832
      total_loss: 5.72271728515625
      vf_explained_var: 0.3432413339614868
      vf_loss: 5.718872547149658
    gr

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 21.9/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 7575 s, 354 iter, 23888000 ts, 729 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-13-19
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 807.2649748038267
  episode_reward_mean: 731.2783167361985
  episode_reward_min: 660.6024240126094
  episodes_this_iter: 22
  episodes_total: 7666
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784752368927
      cur_lr: 0.0005000000237487257
      entropy: 0.021117379888892174
      kl: 0.012363498099148273
      policy_loss: 0.00034321745624765754
      total_loss: 6.514639377593994
      vf_explained_var: 0.2730160653591156
      vf_loss: 6.5111236572265625
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 22.0/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 7701 s, 360 iter, 24293267 ts, 732 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-15-24
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 808.448460221019
  episode_reward_mean: 733.2378497057566
  episode_reward_min: 657.7785186853815
  episodes_this_iter: 14
  episodes_total: 7790
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: -0.005129855591803789
      kl: 0.012127352878451347
      policy_loss: 0.0009785023285076022
      total_loss: 4.678843021392822
      vf_explained_var: 0.35602572560310364
      vf_loss: 4.674753189086914
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 22.1/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 7826 s, 366 iter, 24696212 ts, 740 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-17-30
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 808.375279934109
  episode_reward_mean: 742.136816150232
  episode_reward_min: 664.2179156627318
  episodes_this_iter: 16
  episodes_total: 7922
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: -0.01422876212745905
      kl: 0.011602009646594524
      policy_loss: 0.0007490138523280621
      total_loss: 5.200741291046143
      vf_explained_var: 0.3962286114692688
      vf_loss: 5.197015762329102
    grad

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 22.1/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 7953 s, 372 iter, 25100286 ts, 735 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-19-38
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 811.1109367200031
  episode_reward_mean: 734.8640212216446
  episode_reward_min: 666.7291909115231
  episodes_this_iter: 19
  episodes_total: 8053
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: -0.028010768815875053
      kl: 0.012334193103015423
      policy_loss: 0.000997025752440095
      total_loss: 5.9796061515808105
      vf_explained_var: 0.3387068808078766
      vf_loss: 5.975444793701172
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 22.2/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 8081 s, 378 iter, 25505542 ts, 735 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-21-46
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 808.4351638765987
  episode_reward_mean: 730.3498363789082
  episode_reward_min: 657.782027348146
  episodes_this_iter: 28
  episodes_total: 8186
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.0066532595083117485
      kl: 0.012670530937612057
      policy_loss: 0.0001846515224315226
      total_loss: 8.264500617980957
      vf_explained_var: 0.21796827018260956
      vf_loss: 8.261065483093262
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 22.2/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 8208 s, 384 iter, 25910928 ts, 728 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-23-52
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 808.2183039001118
  episode_reward_mean: 728.2618410081594
  episode_reward_min: 643.3160453330743
  episodes_this_iter: 23
  episodes_total: 8316
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.07393801212310791
      kl: 0.010760649107396603
      policy_loss: 0.0006515102577395737
      total_loss: 7.282170295715332
      vf_explained_var: 0.2276936024427414
      vf_loss: 7.278757095336914
    gra

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 22.3/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 8334 s, 390 iter, 26316835 ts, 726 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-25-58
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 807.7294155140313
  episode_reward_mean: 731.9609866552001
  episode_reward_min: 653.1772889144383
  episodes_this_iter: 24
  episodes_total: 8446
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.08545176684856415
      kl: 0.012198175303637981
      policy_loss: 0.0019022846827283502
      total_loss: 7.443707466125488
      vf_explained_var: 0.24434863030910492
      vf_loss: 7.4386749267578125
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 22.4/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 8461 s, 396 iter, 26721144 ts, 724 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-28-05
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 811.373005688062
  episode_reward_mean: 725.1785862899071
  episode_reward_min: 631.2249082373495
  episodes_this_iter: 21
  episodes_total: 8576
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784156322479
      cur_lr: 0.0005000000237487257
      entropy: 0.08480272442102432
      kl: 0.010460207238793373
      policy_loss: -0.0007181396358646452
      total_loss: 6.176578521728516
      vf_explained_var: 0.26186174154281616
      vf_loss: 6.174612522125244
    gr

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 22.4/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 8589 s, 402 iter, 27124737 ts, 734 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-30-14
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 813.1187212312756
  episode_reward_mean: 732.8016630271677
  episode_reward_min: 661.564554760654
  episodes_this_iter: 23
  episodes_total: 8705
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.023725222796201706
      kl: 0.011913557536900043
      policy_loss: 0.001026482554152608
      total_loss: 7.324931621551514
      vf_explained_var: 0.27193310856819153
      vf_loss: 7.32084846496582
    grad

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 22.5/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 8717 s, 408 iter, 27529696 ts, 738 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-32-22
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 813.9159024523032
  episode_reward_mean: 731.3808636495918
  episode_reward_min: 664.7827033572047
  episodes_this_iter: 21
  episodes_total: 8833
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.050684526562690735
      kl: 0.011189179494976997
      policy_loss: -0.000481397524708882
      total_loss: 6.77116584777832
      vf_explained_var: 0.27278900146484375
      vf_loss: 6.768776893615723
    gr

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 22.6/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 8846 s, 414 iter, 27935372 ts, 730 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-34-31
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 811.0412017867268
  episode_reward_mean: 732.5638274801473
  episode_reward_min: 663.4736125132777
  episodes_this_iter: 21
  episodes_total: 8965
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784752368927
      cur_lr: 0.0005000000237487257
      entropy: 0.050354957580566406
      kl: 0.013221390545368195
      policy_loss: 0.0013474458828568459
      total_loss: 6.625591278076172
      vf_explained_var: 0.2571028470993042
      vf_loss: 6.620851993560791
    gr

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 22.6/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 8977 s, 420 iter, 28342290 ts, 724 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-36-41
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 814.1660520765447
  episode_reward_mean: 723.923323815792
  episode_reward_min: 658.6090635692636
  episodes_this_iter: 29
  episodes_total: 9097
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.04036903753876686
      kl: 0.012153525836765766
      policy_loss: 0.0006958253215998411
      total_loss: 8.07425594329834
      vf_explained_var: 0.18123070895671844
      vf_loss: 8.070441246032715
    grad

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 22.7/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 9105 s, 426 iter, 28749683 ts, 719 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-38-49
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 804.9791168516502
  episode_reward_mean: 718.9318885782471
  episode_reward_min: 633.5293048166154
  episodes_this_iter: 27
  episodes_total: 9225
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.08177471905946732
      kl: 0.011865241453051567
      policy_loss: 0.0007445596274919808
      total_loss: 7.817461967468262
      vf_explained_var: 0.21412213146686554
      vf_loss: 7.81367301940918
    gra

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 22.7/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 9229 s, 432 iter, 29155566 ts, 723 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-40-54
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 811.1857973376972
  episode_reward_mean: 723.6714310313729
  episode_reward_min: 634.2037649687178
  episodes_this_iter: 22
  episodes_total: 9354
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.13100531697273254
      kl: 0.011290013790130615
      policy_loss: 0.00017057012883014977
      total_loss: 6.123204708099365
      vf_explained_var: 0.20486842095851898
      vf_loss: 6.1201372146606445
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 22.8/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 9355 s, 438 iter, 29561794 ts, 719 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-43-00
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 808.7597115013219
  episode_reward_mean: 720.550742832408
  episode_reward_min: 635.8294534197626
  episodes_this_iter: 24
  episodes_total: 9484
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.11214374750852585
      kl: 0.011567684821784496
      policy_loss: 0.0005051850457675755
      total_loss: 7.106184959411621
      vf_explained_var: 0.18711742758750916
      vf_loss: 7.102711200714111
    gra

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 22.9/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 9481 s, 444 iter, 29968320 ts, 720 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-45-07
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 810.3331368803969
  episode_reward_mean: 717.1766903354904
  episode_reward_min: 651.8971082512164
  episodes_this_iter: 18
  episodes_total: 9609
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.12371230870485306
      kl: 0.011033257469534874
      policy_loss: 0.0001586240978213027
      total_loss: 5.83709192276001
      vf_explained_var: 0.21844010055065155
      vf_loss: 5.834102630615234
    gra

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 22.9/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 9611 s, 450 iter, 30373849 ts, 723 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-47-18
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 812.7725428386138
  episode_reward_mean: 728.8076735470221
  episode_reward_min: 663.4872445004611
  episodes_this_iter: 19
  episodes_total: 9741
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.06909379363059998
      kl: 0.012515809386968613
      policy_loss: 8.912624616641551e-05
      total_loss: 5.9367146492004395
      vf_explained_var: 0.22836518287658691
      vf_loss: 5.933413982391357
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 23.0/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 9737 s, 456 iter, 30779025 ts, 724 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-49-22
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 808.3693179714694
  episode_reward_mean: 724.6732488479905
  episode_reward_min: 640.854317616078
  episodes_this_iter: 19
  episodes_total: 9870
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.05522417277097702
      kl: 0.013865754008293152
      policy_loss: -2.611129093565978e-05
      total_loss: 6.080308437347412
      vf_explained_var: 0.22265653312206268
      vf_loss: 6.07677698135376
    gra

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 23.1/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 9866 s, 462 iter, 31184626 ts, 725 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-51-32
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 806.1672491886447
  episode_reward_mean: 725.4493729302853
  episode_reward_min: 653.5048909255601
  episodes_this_iter: 20
  episodes_total: 10000
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.03608369827270508
      kl: 0.01162627525627613
      policy_loss: 0.0007621067925356328
      total_loss: 7.037741661071777
      vf_explained_var: 0.1993633359670639
      vf_loss: 7.03399658203125
    grad

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 23.1/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 9991 s, 468 iter, 31588727 ts, 729 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-53-38
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 811.1509897695449
  episode_reward_mean: 731.8788037082251
  episode_reward_min: 670.9552225224311
  episodes_this_iter: 17
  episodes_total: 10127
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.0587698258459568
      kl: 0.01174346636980772
      policy_loss: -0.00043941810145042837
      total_loss: 5.8336381912231445
      vf_explained_var: 0.27953165769577026
      vf_loss: 5.831064701080322
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 23.2/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 10117 s, 474 iter, 31994437 ts, 733 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-55-43
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 809.9162448933068
  episode_reward_mean: 731.8662558670126
  episode_reward_min: 637.3199588014941
  episodes_this_iter: 25
  episodes_total: 10261
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.11439244449138641
      kl: 0.010895511135458946
      policy_loss: -0.00024692100123502314
      total_loss: 8.317546844482422
      vf_explained_var: 0.2017330825328827
      vf_loss: 8.314997673034668
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 23.2/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 10243 s, 480 iter, 32399077 ts, 732 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-57-49
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 807.114498664917
  episode_reward_mean: 731.2282570174661
  episode_reward_min: 669.8361317986262
  episodes_this_iter: 22
  episodes_total: 10388
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.09352132678031921
      kl: 0.010297712869942188
      policy_loss: 0.0005548163899220526
      total_loss: 7.117467403411865
      vf_explained_var: 0.21547037363052368
      vf_loss: 7.114270210266113
    g

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 23.3/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 10369 s, 486 iter, 32803065 ts, 734 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_00-59-55
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 804.0570788190508
  episode_reward_mean: 734.6031547265588
  episode_reward_min: 669.4381874935274
  episodes_this_iter: 20
  episodes_total: 10518
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.09570444375276566
      kl: 0.011193747632205486
      policy_loss: -0.00010967242997139692
      total_loss: 7.019583702087402
      vf_explained_var: 0.18570804595947266
      vf_loss: 7.016821384429932
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 23.4/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 10498 s, 492 iter, 33207273 ts, 734 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_01-02-03
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 807.9285036023809
  episode_reward_mean: 732.399529665289
  episode_reward_min: 661.4798604808495
  episodes_this_iter: 24
  episodes_total: 10649
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784454345703
      cur_lr: 0.0005000000237487257
      entropy: 0.08584172278642654
      kl: 0.011561415158212185
      policy_loss: -0.00043632651795633137
      total_loss: 7.630176067352295
      vf_explained_var: 0.19686436653137207
      vf_loss: 7.627645492553711
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 50/63 CPUs, 0/0 GPUs
Memory usage on this node: 23.4/94.8 GB
Result logdir: /headless/ray_results/multi_merge
RUNNING trials:
 - PPO_MultiWaveAttenuationMergePOEnv-v0_0:	RUNNING [pid=361], 10622 s, 498 iter, 33612163 ts, 732 rew

Result for PPO_MultiWaveAttenuationMergePOEnv-v0_0:
  custom_metrics: {}
  date: 2019-04-19_01-04-09
  done: false
  episode_len_mean: 750.0
  episode_reward_max: 809.146224441628
  episode_reward_mean: 728.4292390031237
  episode_reward_min: 646.9867940586073
  episodes_this_iter: 24
  episodes_total: 10780
  experiment_id: ca1ade39f0914adcb484936affff8a1a
  hostname: flow-main
  info:
    default:
      cur_kl_coeff: 0.2565784156322479
      cur_lr: 0.0005000000237487257
      entropy: 0.10710359364748001
      kl: 0.010123347863554955
      policy_loss: 9.109894745051861e-05
      total_loss: 7.883522033691406
      vf_explained_var: 0.23232530057430267
      vf_loss: 7.880834102630615
    g