In [11]:
import numpy as np
import matplotlib.pyplot as plt

import gym

from gym import spaces


In [6]:
import ray
from ray import tune
from ray.rllib.examples.env.multi_agent import MultiAgentCartPole
from ray.rllib.examples.models.shared_weights_model import \
    SharedWeightsModel1, SharedWeightsModel2, TF2SharedWeightsModel, \
    TorchSharedWeightsModel
from ray.rllib.models import ModelCatalog
# from ray.rllib.policy import PolicySpec
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.test_utils import check_learning_achieved
from ray.tune.registry import register_env

from ray.rllib.env.multi_agent_env import MultiAgentEnv


import ray
from ray.rllib.agents.ppo import PPOTrainer
from ray.rllib.agents.ppo import DEFAULT_CONFIG as DEFAULT_CONFIG_PPO

from ray.rllib.agents.dqn import DQNTrainer, DEFAULT_CONFIG 
from ray.rllib.agents.dqn import  DEFAULT_CONFIG as DEFAULT_CONFIG_DQN


from ray.tune.registry import register_env
from ray.tune.logger import pretty_print

ray.init(ignore_reinit_error=True, log_to_driver=False)

2021-07-26 14:06:57,438	INFO worker.py:745 -- Calling ray.init() again after it has already been called.


In [7]:
class MatrixGame():
    
    def __init__(self, RPST=(3,1,0,5)):
        
        self.RPST = RPST
        
        self.payoff_mat = np.empty((2,2), dtype=np.object)
        
        self.payoff_mat[0, 0] = (RPST[0], RPST[0])
        self.payoff_mat[1, 1] = (RPST[1], RPST[1])
        self.payoff_mat[0, 1] = (RPST[2], RPST[3])
        self.payoff_mat[1, 0] = (RPST[3], RPST[2])
        
    def play(self, a_row, a_col):
        # for ease of things 0 is coooperate
#                            1 is defect
        
        
#         if a_row == 'c':
#             row = 0
#         else:
#             row = 1
            
#         if a_col == 'c':
#             col = 0
#         else:
#             col = 1
            
        return self.payoff_mat[a_row, a_col]
        

In [92]:
class TwoAgentMatrixGameEnv(MultiAgentEnv):
    
    def __init__(self, RPST=(3,1,0,5), history_n=100):
        
        self.num_agents = 2
        
        self.RPST = RPST
        self.history_n = history_n
        self.history = np.zeros((2,2,self.history_n))
        
        self._counter = 0
        self._setup_spaces()
        self.game = MatrixGame(RPST=self.RPST)
    
    
    def _setup_spaces(self):
        
        self.action_space = spaces.Discrete(2)
        
        self.observation_space = spaces.Box(0, 1,
                                           shape=(self.history_n * 4,))
        
        
    def history_to_states(self, history=None):
        
        if history is None:
            history = self.history
            
        state1 = history.flatten()
        state2 = history[::-1,:,:].flatten()
        
        states = {0: state1, 1:state2}
        
        return states
            
        
        
    def step(self, action_dict):
        
        print((action_dict[0], action_dict[1]))
        rewards = self.game.play(action_dict[0], action_dict[1])
        rew = {i: rewards[i] for i in [0, 1]}
        
        self.history[0, action_dict[0], self._counter] = 1
        self.history[1, action_dict[1], self._counter] = 1
        
        obs = self.history_to_states(self.history)
        
        self._counter += 1
        
        is_done = self._counter >= self.history_n
        done = {i: is_done for i in [0, 1, "__all__"]}
        
        info = {0:{}, 1:{}}
        
        return obs, rew, done, info
        
        
        
    def reset(self):
        
        self.history = np.zeros((2,2,self.history_n))
        obs = self.history_to_states(self.history)

        self._counter = 0
        
        return obs
        

In [93]:
env = TwoAgentMatrixGameEnv()

In [94]:
aa = (4,3)
{i:aa[i] for i in [0,1]}

{0: 4, 1: 3}

In [95]:
env.step({0:0,1:0})

(0, 0)


({0: array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 

In [79]:
info ={}

In [80]:
info[0] = {}

In [81]:
info[1] = {}

In [82]:
info

{0: {}, 1: {}}

In [96]:
register_env('twoagent_PD', lambda c: TwoAgentMatrixGameEnv())

In [84]:
trainer_config_ppo = DEFAULT_CONFIG_PPO.copy()
trainer_config_ppo['num_workers'] = 1
trainer_config_ppo['num_sgd_iter'] = 20
trainer_config_ppo['sgd_minibatch_size'] = 32
# trainer_config_ppo['model']['fcnet_hiddens'] = [1024, 512,512, 256,256,32,8]
trainer_config_ppo['model']['fcnet_hiddens'] = [256,256,32,8]

trainer_config_ppo['num_cpus_per_worker'] = 0

In [85]:
trainer = PPOTrainer(trainer_config_ppo, env="twoagent_PD");


2021-07-26 15:52:54,124	INFO trainable.py:101 -- Trainable.setup took 13.018 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [86]:
for i in range(2):
    print("Training iteration {}...".format(i))
    result=trainer.train()
    print(pretty_print(result))

Training iteration 0...
agent_timesteps_total: 4000
custom_metrics: {}
date: 2021-07-26_15-53-24
done: false
episode_len_mean: 100.0
episode_media: {}
episode_reward_max: 478.0
episode_reward_mean: 455.8
episode_reward_min: 428.0
episodes_this_iter: 20
episodes_total: 20
experiment_id: 39b34ab8c778431db4c7d7363eca7f2b
hostname: coolo-computer
info:
  learner:
    default_policy:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 4.999999873689376e-05
        entropy: 0.6630105972290039
        entropy_coeff: 0.0
        kl: 0.031176701188087463
        model: {}
        policy_loss: -0.036966897547245026
        total_loss: 8868.671875
        vf_explained_var: -7.157325967455108e-07
        vf_loss: 8868.7041015625
  num_agent_steps_sampled: 4000
  num_agent_steps_trained: 4000
  num_steps_sampled: 4000
  num_steps_trained: 4000
iterations_since_restore: 1
node_ip: 192.168.1.21
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 44.472

In [87]:
env = TwoAgentMatrixGameEnv()

In [90]:

n_samples = 50


defects = []
rewards = []
for i in range(n_samples):
    state = env.reset()
    
    total_defect = 0
    cum_reward = 0
    done = False
    i = 0
    while not done:
        print('===========')
        print(i)
        print('===========')
        
        i+=1
        action = trainer.compute_actions(state)
        print(action)
#         total_defect += action
        state, reward, done, results = env.step(action)
#         cum_reward += reward
    defects.append(total_defect)
    rewards.append(cum_reward)

0
{0: 1, 1: 1}
(1, 1)
0
{0: 0, 1: 0}
(0, 0)
0
{0: 0, 1: 1}
(0, 1)
0
{0: 1, 1: 1}
(1, 1)
0
{0: 1, 1: 1}
(1, 1)
0
{0: 0, 1: 1}
(0, 1)
0
{0: 1, 1: 1}
(1, 1)
0
{0: 0, 1: 0}
(0, 0)
0
{0: 1, 1: 0}
(1, 0)
0
{0: 0, 1: 0}
(0, 0)
0
{0: 0, 1: 0}
(0, 0)
0
{0: 0, 1: 0}
(0, 0)
0
{0: 0, 1: 1}
(0, 1)
0
{0: 0, 1: 0}
(0, 0)
0
{0: 0, 1: 1}
(0, 1)
0
{0: 0, 1: 0}
(0, 0)
0
{0: 0, 1: 0}
(0, 0)
0
{0: 1, 1: 1}
(1, 1)
0
{0: 0, 1: 1}
(0, 1)
0
{0: 1, 1: 0}
(1, 0)
0
{0: 0, 1: 0}
(0, 0)
0
{0: 0, 1: 1}
(0, 1)
0
{0: 1, 1: 1}
(1, 1)
0
{0: 0, 1: 1}
(0, 1)
0
{0: 0, 1: 0}
(0, 0)
0
{0: 0, 1: 0}
(0, 0)
0
{0: 1, 1: 0}
(1, 0)
0
{0: 0, 1: 1}
(0, 1)
0
{0: 0, 1: 1}
(0, 1)
0
{0: 0, 1: 0}
(0, 0)
0
{0: 1, 1: 0}
(1, 0)
0
{0: 0, 1: 1}
(0, 1)
0
{0: 1, 1: 1}
(1, 1)
0
{0: 0, 1: 0}
(0, 0)
0
{0: 0, 1: 1}
(0, 1)
0
{0: 0, 1: 0}
(0, 0)
0
{0: 1, 1: 0}
(1, 0)
0
{0: 0, 1: 1}
(0, 1)
0
{0: 1, 1: 0}
(1, 0)
0
{0: 0, 1: 0}
(0, 0)
0
{0: 0, 1: 0}
(0, 0)
0
{0: 0, 1: 0}
(0, 0)
0
{0: 0, 1: 0}
(0, 0)
0
{0: 1, 1: 0}
(1, 0)
0
{0: 0, 1: 0}
(0, 0)
0
{0: 1, 1

In [97]:

def env_creator(_):
    return TwoAgentMatrixGameEnv()
single_env = TwoAgentMatrixGameEnv()
env_name = "TwoAgent_PD"
register_env(env_name, env_creator)


obs_space = single_env.observation_space
act_space = single_env.action_space
num_agents = single_env.num_agents

def gen_policy():
    return (None, obs_space, act_space, {})
policy_graphs = {}
for i in range(num_agents):
    policy_graphs['agent-' + str(i)] = gen_policy()
def policy_mapping_fn(agent_id):
        return 'agent-' + str(agent_id)

In [98]:
config={
    "log_level": "WARN",
    "num_workers": 3,
    "num_cpus_for_driver": 1,
    "num_cpus_per_worker": 1,
    "lr": 5e-3,
    "model":{"fcnet_hiddens": [8, 8]},
    "multiagent": {
        "policies": policy_graphs,
        "policy_mapping_fn": policy_mapping_fn,
    },
    "env": "TwoAgent_PD"
}

In [100]:
exp_name = 'TA_TEST1'
exp_dict = {
        'name': exp_name,
        'run_or_experiment': 'PPO',
        "stop": {
            "training_iteration": 100
        },
        'checkpoint_freq': 20,
        "config": config,
}
# ray.init()
tune.run(**exp_dict)

Trial name,status,loc
PPO_TwoAgent_PD_8803d_00000,PENDING,


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 15996
  custom_metrics: {}
  date: 2021-07-26_16-18-57
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 496.0
  episode_reward_mean: 451.0897435897436
  episode_reward_min: 406.0
  episodes_this_iter: 78
  episodes_total: 78
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 0.004999999888241291
          entropy: 0.664276659488678
          entropy_coeff: 0.0
          kl: 0.029636424034833908
          model: {}
          policy_loss: -0.060856517404317856
          total_loss: 7095.986328125
          vf_explained_var: -2.1957581338938326e-05
          vf_loss: 7096.04150390625
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 0.004999999888241291
          entropy: 0.6590754389762878
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,1,13.6652,7998,451.09,496,406,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 31992
  custom_metrics: {}
  date: 2021-07-26_16-19-10
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 480.0
  episode_reward_mean: 443.6
  episode_reward_min: 382.0
  episodes_this_iter: 81
  episodes_total: 159
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 0.30000001192092896
          cur_lr: 0.004999999888241291
          entropy: 0.6360675096511841
          entropy_coeff: 0.0
          kl: 0.028073500841856003
          model: {}
          policy_loss: -0.05279109999537468
          total_loss: 4550.61962890625
          vf_explained_var: 0.072012759745121
          vf_loss: 4550.66455078125
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.30000001192092896
          cur_lr: 0.004999999888241291
          entropy: 0.6255916953086853
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,2,26.5548,15996,443.6,480,382,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 47988
  custom_metrics: {}
  date: 2021-07-26_16-19-23
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 477.0
  episode_reward_mean: 435.44
  episode_reward_min: 360.0
  episodes_this_iter: 78
  episodes_total: 237
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 0.004999999888241291
          entropy: 0.6167100667953491
          entropy_coeff: 0.0
          kl: 0.02464049868285656
          model: {}
          policy_loss: -0.052305497229099274
          total_loss: 3251.7626953125
          vf_explained_var: 0.17814792692661285
          vf_loss: 3251.803955078125
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 0.004999999888241291
          entropy: 0.5898683667182922
          entropy_co

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,3,39.7216,23994,435.44,477,360,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 63984
  custom_metrics: {}
  date: 2021-07-26_16-19-37
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 478.0
  episode_reward_mean: 437.54
  episode_reward_min: 379.0
  episodes_this_iter: 81
  episodes_total: 318
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 0.675000011920929
          cur_lr: 0.004999999888241291
          entropy: 0.6062700152397156
          entropy_coeff: 0.0
          kl: 0.02044774405658245
          model: {}
          policy_loss: -0.040002740919589996
          total_loss: 2311.958740234375
          vf_explained_var: 0.2912120521068573
          vf_loss: 2311.985107421875
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.675000011920929
          cur_lr: 0.004999999888241291
          entropy: 0.5761978030204773
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,4,53.878,31992,437.54,478,379,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 79980
  custom_metrics: {}
  date: 2021-07-26_16-19-52
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 488.0
  episode_reward_mean: 429.11
  episode_reward_min: 364.0
  episodes_this_iter: 81
  episodes_total: 399
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.592710554599762
          entropy_coeff: 0.0
          kl: 0.014430426061153412
          model: {}
          policy_loss: -0.03324376791715622
          total_loss: 1766.1417236328125
          vf_explained_var: 0.4310234487056732
          vf_loss: 1766.16015625
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.675000011920929
          cur_lr: 0.004999999888241291
          entropy: 0.5369526743888855
          entropy_coeff: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,5,67.9485,39990,429.11,488,364,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 95976
  custom_metrics: {}
  date: 2021-07-26_16-20-06
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 486.0
  episode_reward_mean: 419.56
  episode_reward_min: 365.0
  episodes_this_iter: 78
  episodes_total: 477
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.5845174193382263
          entropy_coeff: 0.0
          kl: 0.01274198293685913
          model: {}
          policy_loss: -0.03116431273519993
          total_loss: 1294.3248291015625
          vf_explained_var: 0.5572563409805298
          vf_loss: 1294.3433837890625
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.675000011920929
          cur_lr: 0.004999999888241291
          entropy: 0.5038086175918579
          entropy_coe

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,6,82.7373,47988,419.56,486,365,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 111972
  custom_metrics: {}
  date: 2021-07-26_16-20-22
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 508.0
  episode_reward_mean: 421.31
  episode_reward_min: 365.0
  episodes_this_iter: 81
  episodes_total: 558
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.572909414768219
          entropy_coeff: 0.0
          kl: 0.012790652923285961
          model: {}
          policy_loss: -0.030833085998892784
          total_loss: 842.5501708984375
          vf_explained_var: 0.6742656826972961
          vf_loss: 842.5680541992188
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.675000011920929
          cur_lr: 0.004999999888241291
          entropy: 0.49706533551216125
          entropy_co

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,7,98.3669,55986,421.31,508,365,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 127968
  custom_metrics: {}
  date: 2021-07-26_16-20-38
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 486.0
  episode_reward_mean: 418.86
  episode_reward_min: 359.0
  episodes_this_iter: 81
  episodes_total: 639
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.5610260367393494
          entropy_coeff: 0.0
          kl: 0.012288955971598625
          model: {}
          policy_loss: -0.02785569615662098
          total_loss: 828.1436157226562
          vf_explained_var: 0.7296446561813354
          vf_loss: 828.1589965820312
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.675000011920929
          cur_lr: 0.004999999888241291
          entropy: 0.5015184283256531
          entropy_coe

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,8,114.526,63984,418.86,486,359,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 143964
  custom_metrics: {}
  date: 2021-07-26_16-20-56
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 484.0
  episode_reward_mean: 419.74
  episode_reward_min: 359.0
  episodes_this_iter: 78
  episodes_total: 717
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.5469088554382324
          entropy_coeff: 0.0
          kl: 0.013633936643600464
          model: {}
          policy_loss: -0.030268583446741104
          total_loss: 545.896240234375
          vf_explained_var: 0.8160889148712158
          vf_loss: 545.9127197265625
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.48665159940719604
          entropy_c

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,9,132.573,71982,419.74,484,359,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 159960
  custom_metrics: {}
  date: 2021-07-26_16-21-13
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 490.0
  episode_reward_mean: 421.86
  episode_reward_min: 360.0
  episodes_this_iter: 81
  episodes_total: 798
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.5442790389060974
          entropy_coeff: 0.0
          kl: 0.011967494152486324
          model: {}
          policy_loss: -0.0268519576638937
          total_loss: 479.240478515625
          vf_explained_var: 0.8568693995475769
          vf_loss: 479.2552795410156
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.5050936937332153
          entropy_coef

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,10,148.954,79980,421.86,490,360,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 175956
  custom_metrics: {}
  date: 2021-07-26_16-21-30
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 490.0
  episode_reward_mean: 423.97
  episode_reward_min: 352.0
  episodes_this_iter: 81
  episodes_total: 879
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.5351572632789612
          entropy_coeff: 0.0
          kl: 0.012634098529815674
          model: {}
          policy_loss: -0.030532993376255035
          total_loss: 463.7942199707031
          vf_explained_var: 0.8818807005882263
          vf_loss: 463.81195068359375
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4981108605861664
          entropy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,11,166.57,87978,423.97,490,352,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 191952
  custom_metrics: {}
  date: 2021-07-26_16-21-47
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 512.0
  episode_reward_mean: 425.44
  episode_reward_min: 352.0
  episodes_this_iter: 78
  episodes_total: 957
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.5376333594322205
          entropy_coeff: 0.0
          kl: 0.013263986445963383
          model: {}
          policy_loss: -0.027589788660407066
          total_loss: 318.56915283203125
          vf_explained_var: 0.9269373416900635
          vf_loss: 318.58331298828125
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4882891774177551
          entropy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,12,183.218,95976,425.44,512,352,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 207948
  custom_metrics: {}
  date: 2021-07-26_16-22-04
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 479.0
  episode_reward_mean: 418.02
  episode_reward_min: 349.0
  episodes_this_iter: 81
  episodes_total: 1038
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.5311990976333618
          entropy_coeff: 0.0
          kl: 0.012791587971150875
          model: {}
          policy_loss: -0.02650475688278675
          total_loss: 344.9759216308594
          vf_explained_var: 0.9068655967712402
          vf_loss: 344.9894104003906
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4692382216453552
          entropy_c

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,13,200.308,103974,418.02,479,349,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 223944
  custom_metrics: {}
  date: 2021-07-26_16-22-20
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 499.0
  episode_reward_mean: 416.49
  episode_reward_min: 360.0
  episodes_this_iter: 81
  episodes_total: 1119
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.5157775282859802
          entropy_coeff: 0.0
          kl: 0.012146435678005219
          model: {}
          policy_loss: -0.02660209685564041
          total_loss: 429.18585205078125
          vf_explained_var: 0.919399082660675
          vf_loss: 429.2001953125
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4756297171115875
          entropy_coef

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,14,215.991,111972,416.49,499,360,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 239940
  custom_metrics: {}
  date: 2021-07-26_16-22-36
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 499.0
  episode_reward_mean: 417.26
  episode_reward_min: 350.0
  episodes_this_iter: 78
  episodes_total: 1197
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.5083068609237671
          entropy_coeff: 0.0
          kl: 0.010312821716070175
          model: {}
          policy_loss: -0.02481580525636673
          total_loss: 326.3589172363281
          vf_explained_var: 0.9183055758476257
          vf_loss: 326.373291015625
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4445822536945343
          entropy_co

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,15,231.917,119970,417.26,499,350,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 255936
  custom_metrics: {}
  date: 2021-07-26_16-22-51
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 495.0
  episode_reward_mean: 422.22
  episode_reward_min: 350.0
  episodes_this_iter: 81
  episodes_total: 1278
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.5146774649620056
          entropy_coeff: 0.0
          kl: 0.012435809709131718
          model: {}
          policy_loss: -0.020723560824990273
          total_loss: 406.0738830566406
          vf_explained_var: 0.9189573526382446
          vf_loss: 406.08203125
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.44831782579421997
          entropy_coef

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,16,246.556,127968,422.22,495,350,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 271932
  custom_metrics: {}
  date: 2021-07-26_16-23-06
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 496.0
  episode_reward_mean: 419.54
  episode_reward_min: 363.0
  episodes_this_iter: 81
  episodes_total: 1359
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.5043562650680542
          entropy_coeff: 0.0
          kl: 0.011548547074198723
          model: {}
          policy_loss: -0.024400994181632996
          total_loss: 473.79034423828125
          vf_explained_var: 0.9180144667625427
          vf_loss: 473.80303955078125
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4373171329498291
          entrop

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,17,261.776,135966,419.54,496,363,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 287928
  custom_metrics: {}
  date: 2021-07-26_16-23-20
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 496.0
  episode_reward_mean: 417.65
  episode_reward_min: 343.0
  episodes_this_iter: 78
  episodes_total: 1437
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.5002204179763794
          entropy_coeff: 0.0
          kl: 0.012781798839569092
          model: {}
          policy_loss: -0.03071506880223751
          total_loss: 379.0797424316406
          vf_explained_var: 0.9072388410568237
          vf_loss: 379.0975341796875
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4248342216014862
          entropy_c

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,18,275.99,143964,417.65,496,343,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 303924
  custom_metrics: {}
  date: 2021-07-26_16-23-34
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 492.0
  episode_reward_mean: 420.48
  episode_reward_min: 359.0
  episodes_this_iter: 81
  episodes_total: 1518
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.5033784508705139
          entropy_coeff: 0.0
          kl: 0.010813777334988117
          model: {}
          policy_loss: -0.02617633156478405
          total_loss: 393.9627990722656
          vf_explained_var: 0.9057968258857727
          vf_loss: 393.9781188964844
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.42221349477767944
          entropy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,19,290.123,151962,420.48,492,359,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 319920
  custom_metrics: {}
  date: 2021-07-26_16-23-50
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 475.0
  episode_reward_mean: 415.58
  episode_reward_min: 342.0
  episodes_this_iter: 81
  episodes_total: 1599
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.50180983543396
          entropy_coeff: 0.0
          kl: 0.012369298376142979
          model: {}
          policy_loss: -0.02220752462744713
          total_loss: 330.0269775390625
          vf_explained_var: 0.8993403911590576
          vf_loss: 330.0366516113281
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.40930771827697754
          entropy_co

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,20,305.318,159960,415.58,475,342,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 335916
  custom_metrics: {}
  date: 2021-07-26_16-24-04
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 487.0
  episode_reward_mean: 422.61
  episode_reward_min: 349.0
  episodes_this_iter: 78
  episodes_total: 1677
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.49518388509750366
          entropy_coeff: 0.0
          kl: 0.010999547317624092
          model: {}
          policy_loss: -0.023102272301912308
          total_loss: 423.2574157714844
          vf_explained_var: 0.9109407663345337
          vf_loss: 423.2694091796875
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.41886720061302185
          entrop

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,21,319.818,167958,422.61,487,349,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 351912
  custom_metrics: {}
  date: 2021-07-26_16-24-19
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 487.0
  episode_reward_mean: 418.69
  episode_reward_min: 360.0
  episodes_this_iter: 81
  episodes_total: 1758
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4879505932331085
          entropy_coeff: 0.0
          kl: 0.010824897326529026
          model: {}
          policy_loss: -0.02461123652756214
          total_loss: 334.1821594238281
          vf_explained_var: 0.9001011848449707
          vf_loss: 334.1958312988281
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.41551920771598816
          entropy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,22,334.048,175956,418.69,487,360,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 367908
  custom_metrics: {}
  date: 2021-07-26_16-24-33
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 476.0
  episode_reward_mean: 416.07
  episode_reward_min: 362.0
  episodes_this_iter: 81
  episodes_total: 1839
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.48041394352912903
          entropy_coeff: 0.0
          kl: 0.011771474964916706
          model: {}
          policy_loss: -0.025291211903095245
          total_loss: 394.431396484375
          vf_explained_var: 0.8900811076164246
          vf_loss: 394.44476318359375
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.41243183612823486
          entrop

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,23,348.407,183954,416.07,476,362,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 383904
  custom_metrics: {}
  date: 2021-07-26_16-24-48
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 475.0
  episode_reward_mean: 419.75
  episode_reward_min: 361.0
  episodes_this_iter: 78
  episodes_total: 1917
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.49678874015808105
          entropy_coeff: 0.0
          kl: 0.011160513386130333
          model: {}
          policy_loss: -0.026624765247106552
          total_loss: 364.4513854980469
          vf_explained_var: 0.9277566075325012
          vf_loss: 364.46673583984375
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4138137400150299
          entrop

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,24,363.593,191952,419.75,475,361,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 399900
  custom_metrics: {}
  date: 2021-07-26_16-25-03
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 498.0
  episode_reward_mean: 418.24
  episode_reward_min: 350.0
  episodes_this_iter: 81
  episodes_total: 1998
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.47878965735435486
          entropy_coeff: 0.0
          kl: 0.00977051630616188
          model: {}
          policy_loss: -0.021930230781435966
          total_loss: 366.6459655761719
          vf_explained_var: 0.9023867249488831
          vf_loss: 366.65802001953125
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4259341359138489
          entropy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,25,377.898,199950,418.24,498,350,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 415896
  custom_metrics: {}
  date: 2021-07-26_16-25-17
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 498.0
  episode_reward_mean: 418.64
  episode_reward_min: 350.0
  episodes_this_iter: 81
  episodes_total: 2079
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4692426323890686
          entropy_coeff: 0.0
          kl: 0.012027472257614136
          model: {}
          policy_loss: -0.027322715148329735
          total_loss: 373.313232421875
          vf_explained_var: 0.8980517983436584
          vf_loss: 373.3283386230469
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.43850985169410706
          entropy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,26,392.5,207948,418.64,498,350,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 431892
  custom_metrics: {}
  date: 2021-07-26_16-25-31
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 475.0
  episode_reward_mean: 417.67
  episode_reward_min: 369.0
  episodes_this_iter: 78
  episodes_total: 2157
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4671894311904907
          entropy_coeff: 0.0
          kl: 0.011732826009392738
          model: {}
          policy_loss: -0.02497178502380848
          total_loss: 273.74920654296875
          vf_explained_var: 0.9229036569595337
          vf_loss: 273.7623291015625
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.42537668347358704
          entropy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,27,406.535,215946,417.67,475,369,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 447888
  custom_metrics: {}
  date: 2021-07-26_16-25-46
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 475.0
  episode_reward_mean: 416.58
  episode_reward_min: 366.0
  episodes_this_iter: 81
  episodes_total: 2238
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.44279366731643677
          entropy_coeff: 0.0
          kl: 0.01171612087637186
          model: {}
          policy_loss: -0.025281231850385666
          total_loss: 337.9268493652344
          vf_explained_var: 0.9053969979286194
          vf_loss: 337.9402770996094
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4150516986846924
          entropy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,28,421.288,223944,416.58,475,366,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 463884
  custom_metrics: {}
  date: 2021-07-26_16-26-01
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 498.0
  episode_reward_mean: 419.03
  episode_reward_min: 358.0
  episodes_this_iter: 81
  episodes_total: 2319
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4306054711341858
          entropy_coeff: 0.0
          kl: 0.010086486116051674
          model: {}
          policy_loss: -0.024644965305924416
          total_loss: 436.92205810546875
          vf_explained_var: 0.9093042612075806
          vf_loss: 436.93646240234375
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.41942790150642395
          entro

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,29,436.491,231942,419.03,498,358,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 479880
  custom_metrics: {}
  date: 2021-07-26_16-26-16
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 485.0
  episode_reward_mean: 415.74
  episode_reward_min: 371.0
  episodes_this_iter: 78
  episodes_total: 2397
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4373614192008972
          entropy_coeff: 0.0
          kl: 0.009538330137729645
          model: {}
          policy_loss: -0.021356092765927315
          total_loss: 398.82208251953125
          vf_explained_var: 0.9067662358283997
          vf_loss: 398.833740234375
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.41027626395225525
          entropy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,30,450.975,239940,415.74,485,371,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 495876
  custom_metrics: {}
  date: 2021-07-26_16-26-30
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 475.0
  episode_reward_mean: 412.54
  episode_reward_min: 364.0
  episodes_this_iter: 81
  episodes_total: 2478
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.446271151304245
          entropy_coeff: 0.0
          kl: 0.011439556255936623
          model: {}
          policy_loss: -0.023191988468170166
          total_loss: 390.84185791015625
          vf_explained_var: 0.915631890296936
          vf_loss: 390.8534851074219
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4057779014110565
          entropy_c

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,31,464.847,247938,412.54,475,364,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 511872
  custom_metrics: {}
  date: 2021-07-26_16-26-44
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 492.0
  episode_reward_mean: 414.36
  episode_reward_min: 359.0
  episodes_this_iter: 81
  episodes_total: 2559
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4214867651462555
          entropy_coeff: 0.0
          kl: 0.012173079885542393
          model: {}
          policy_loss: -0.02358979918062687
          total_loss: 296.13385009765625
          vf_explained_var: 0.9316717386245728
          vf_loss: 296.1451416015625
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.41488760709762573
          entropy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,32,479.042,255936,414.36,492,359,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 527868
  custom_metrics: {}
  date: 2021-07-26_16-26-59
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 492.0
  episode_reward_mean: 416.22
  episode_reward_min: 359.0
  episodes_this_iter: 78
  episodes_total: 2637
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4104391038417816
          entropy_coeff: 0.0
          kl: 0.010475213639438152
          model: {}
          policy_loss: -0.02290097065269947
          total_loss: 414.4047546386719
          vf_explained_var: 0.8982279300689697
          vf_loss: 414.4170227050781
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.409149169921875
          entropy_co

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,33,493.568,263934,416.22,492,359,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 543864
  custom_metrics: {}
  date: 2021-07-26_16-27-12
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 481.0
  episode_reward_mean: 407.37
  episode_reward_min: 357.0
  episodes_this_iter: 81
  episodes_total: 2718
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3904101550579071
          entropy_coeff: 0.0
          kl: 0.01346401497721672
          model: {}
          policy_loss: -0.02374507673084736
          total_loss: 265.3645935058594
          vf_explained_var: 0.9472312331199646
          vf_loss: 265.3747253417969
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3994404375553131
          entropy_co

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,34,507.464,271932,407.37,481,357,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 559860
  custom_metrics: {}
  date: 2021-07-26_16-27-27
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 481.0
  episode_reward_mean: 409.56
  episode_reward_min: 360.0
  episodes_this_iter: 81
  episodes_total: 2799
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3926931917667389
          entropy_coeff: 0.0
          kl: 0.009313439950346947
          model: {}
          policy_loss: -0.01963900588452816
          total_loss: 394.74554443359375
          vf_explained_var: 0.901046872138977
          vf_loss: 394.7557678222656
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.39903539419174194
          entropy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,35,522.267,279930,409.56,481,360,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 575856
  custom_metrics: {}
  date: 2021-07-26_16-27-42
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 481.0
  episode_reward_mean: 407.73
  episode_reward_min: 353.0
  episodes_this_iter: 78
  episodes_total: 2877
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.36743873357772827
          entropy_coeff: 0.0
          kl: 0.011000602506101131
          model: {}
          policy_loss: -0.017738547176122665
          total_loss: 478.0013427734375
          vf_explained_var: 0.900433361530304
          vf_loss: 478.0079345703125
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3966531455516815
          entropy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,36,536.73,287928,407.73,481,353,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 591852
  custom_metrics: {}
  date: 2021-07-26_16-27-56
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 482.0
  episode_reward_mean: 414.65
  episode_reward_min: 355.0
  episodes_this_iter: 81
  episodes_total: 2958
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3565111756324768
          entropy_coeff: 0.0
          kl: 0.009375002235174179
          model: {}
          policy_loss: -0.017311180010437965
          total_loss: 321.8431701660156
          vf_explained_var: 0.9309085607528687
          vf_loss: 321.8509216308594
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.39709779620170593
          entropy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,37,551.312,295926,414.65,482,355,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 607848
  custom_metrics: {}
  date: 2021-07-26_16-28-10
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 484.0
  episode_reward_mean: 421.87
  episode_reward_min: 365.0
  episodes_this_iter: 81
  episodes_total: 3039
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3496035039424896
          entropy_coeff: 0.0
          kl: 0.010366411879658699
          model: {}
          policy_loss: -0.02243584208190441
          total_loss: 371.18487548828125
          vf_explained_var: 0.9297465682029724
          vf_loss: 371.1968078613281
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4001935124397278
          entropy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,38,565.131,303924,421.87,484,365,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 623844
  custom_metrics: {}
  date: 2021-07-26_16-28-25
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 497.0
  episode_reward_mean: 420.07
  episode_reward_min: 366.0
  episodes_this_iter: 78
  episodes_total: 3117
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.330331414937973
          entropy_coeff: 0.0
          kl: 0.008779103867709637
          model: {}
          policy_loss: -0.01809767447412014
          total_loss: 393.6966247558594
          vf_explained_var: 0.9157066941261292
          vf_loss: 393.7057800292969
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3967960774898529
          entropy_co

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,39,579.425,311922,420.07,497,366,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 639840
  custom_metrics: {}
  date: 2021-07-26_16-28-39
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 477.0
  episode_reward_mean: 417.73
  episode_reward_min: 375.0
  episodes_this_iter: 81
  episodes_total: 3198
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.341340035200119
          entropy_coeff: 0.0
          kl: 0.01011302787810564
          model: {}
          policy_loss: -0.019485045224428177
          total_loss: 188.4803924560547
          vf_explained_var: 0.9506176114082336
          vf_loss: 188.48963928222656
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3932532072067261
          entropy_c

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,40,594.093,319920,417.73,477,375,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 655836
  custom_metrics: {}
  date: 2021-07-26_16-28-55
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 487.0
  episode_reward_mean: 420.77
  episode_reward_min: 366.0
  episodes_this_iter: 81
  episodes_total: 3279
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3346293866634369
          entropy_coeff: 0.0
          kl: 0.008375790901482105
          model: {}
          policy_loss: -0.01599036529660225
          total_loss: 315.8465881347656
          vf_explained_var: 0.9263412952423096
          vf_loss: 315.8541259765625
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.40252336859703064
          entropy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,41,609.679,327918,420.77,487,366,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 671832
  custom_metrics: {}
  date: 2021-07-26_16-29-09
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 487.0
  episode_reward_mean: 418.62
  episode_reward_min: 355.0
  episodes_this_iter: 78
  episodes_total: 3357
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.32570719718933105
          entropy_coeff: 0.0
          kl: 0.009847952052950859
          model: {}
          policy_loss: -0.016284430399537086
          total_loss: 270.1335144042969
          vf_explained_var: 0.9318884611129761
          vf_loss: 270.13983154296875
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4049447774887085
          entrop

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,42,624.132,335916,418.62,487,355,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 687828
  custom_metrics: {}
  date: 2021-07-26_16-29-24
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 470.0
  episode_reward_mean: 421.04
  episode_reward_min: 355.0
  episodes_this_iter: 81
  episodes_total: 3438
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.2989307641983032
          entropy_coeff: 0.0
          kl: 0.010250059887766838
          model: {}
          policy_loss: -0.02106541581451893
          total_loss: 253.9081268310547
          vf_explained_var: 0.9499887824058533
          vf_loss: 253.91879272460938
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.39548975229263306
          entropy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,43,638.8,343914,421.04,470,355,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 703824
  custom_metrics: {}
  date: 2021-07-26_16-29-40
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 472.0
  episode_reward_mean: 416.99
  episode_reward_min: 351.0
  episodes_this_iter: 81
  episodes_total: 3519
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.29989901185035706
          entropy_coeff: 0.0
          kl: 0.008046877570450306
          model: {}
          policy_loss: -0.0159334484487772
          total_loss: 266.8089904785156
          vf_explained_var: 0.9465422630310059
          vf_loss: 266.8167724609375
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.4087730646133423
          entropy_c

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,44,654.265,351912,416.99,472,351,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 719820
  custom_metrics: {}
  date: 2021-07-26_16-29-54
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 472.0
  episode_reward_mean: 414.5
  episode_reward_min: 368.0
  episodes_this_iter: 78
  episodes_total: 3597
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3137498199939728
          entropy_coeff: 0.0
          kl: 0.009873571805655956
          model: {}
          policy_loss: -0.018065692856907845
          total_loss: 336.83697509765625
          vf_explained_var: 0.9381076097488403
          vf_loss: 336.84503173828125
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.39795440435409546
          entrop

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,45,668.389,359910,414.5,472,368,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 735816
  custom_metrics: {}
  date: 2021-07-26_16-30-10
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 458.0
  episode_reward_mean: 408.04
  episode_reward_min: 346.0
  episodes_this_iter: 81
  episodes_total: 3678
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.2990724742412567
          entropy_coeff: 0.0
          kl: 0.007989493198692799
          model: {}
          policy_loss: -0.015127877704799175
          total_loss: 452.8586730957031
          vf_explained_var: 0.9096102118492126
          vf_loss: 452.86578369140625
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.406154066324234
          entropy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,46,684.078,367908,408.04,458,346,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 751812
  custom_metrics: {}
  date: 2021-07-26_16-30-25
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 468.0
  episode_reward_mean: 406.94
  episode_reward_min: 339.0
  episodes_this_iter: 81
  episodes_total: 3759
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.312396764755249
          entropy_coeff: 0.0
          kl: 0.010748588480055332
          model: {}
          policy_loss: -0.019549570977687836
          total_loss: 323.150390625
          vf_explained_var: 0.9373660683631897
          vf_loss: 323.1590270996094
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.39887091517448425
          entropy_coef

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,47,699.452,375906,406.94,468,339,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 767808
  custom_metrics: {}
  date: 2021-07-26_16-30-41
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 455.0
  episode_reward_mean: 402.99
  episode_reward_min: 350.0
  episodes_this_iter: 78
  episodes_total: 3837
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3011467754840851
          entropy_coeff: 0.0
          kl: 0.009151036851108074
          model: {}
          policy_loss: -0.016776341944932938
          total_loss: 279.3138732910156
          vf_explained_var: 0.9430414438247681
          vf_loss: 279.3213806152344
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3889623284339905
          entropy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,48,715.509,383904,402.99,455,350,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 783804
  custom_metrics: {}
  date: 2021-07-26_16-30-56
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 455.0
  episode_reward_mean: 400.07
  episode_reward_min: 339.0
  episodes_this_iter: 81
  episodes_total: 3918
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.299337238073349
          entropy_coeff: 0.0
          kl: 0.008621602319180965
          model: {}
          policy_loss: -0.016346028074622154
          total_loss: 463.72894287109375
          vf_explained_var: 0.8967483639717102
          vf_loss: 463.7366027832031
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3878239691257477
          entropy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,49,730.611,391902,400.07,455,339,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 799800
  custom_metrics: {}
  date: 2021-07-26_16-31-11
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 462.0
  episode_reward_mean: 402.46
  episode_reward_min: 345.0
  episodes_this_iter: 81
  episodes_total: 3999
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.32766851782798767
          entropy_coeff: 0.0
          kl: 0.01038407813757658
          model: {}
          policy_loss: -0.02032681368291378
          total_loss: 254.59320068359375
          vf_explained_var: 0.9377588033676147
          vf_loss: 254.60299682617188
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.38677361607551575
          entrop

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,50,745.596,399900,402.46,462,345,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 815796
  custom_metrics: {}
  date: 2021-07-26_16-31-25
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 462.0
  episode_reward_mean: 399.77
  episode_reward_min: 345.0
  episodes_this_iter: 78
  episodes_total: 4077
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3049357235431671
          entropy_coeff: 0.0
          kl: 0.009027469903230667
          model: {}
          policy_loss: -0.01716648042201996
          total_loss: 278.42376708984375
          vf_explained_var: 0.9303034543991089
          vf_loss: 278.4317932128906
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3829578757286072
          entropy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,51,759.647,407898,399.77,462,345,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 831792
  custom_metrics: {}
  date: 2021-07-26_16-31-41
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 446.0
  episode_reward_mean: 400.55
  episode_reward_min: 350.0
  episodes_this_iter: 81
  episodes_total: 4158
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.327518492937088
          entropy_coeff: 0.0
          kl: 0.01096846628934145
          model: {}
          policy_loss: -0.022159064188599586
          total_loss: 318.7226257324219
          vf_explained_var: 0.9332990050315857
          vf_loss: 318.733642578125
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.38038894534111023
          entropy_co

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,52,774.928,415896,400.55,446,350,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 847788
  custom_metrics: {}
  date: 2021-07-26_16-31-56
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 447.0
  episode_reward_mean: 402.33
  episode_reward_min: 329.0
  episodes_this_iter: 78
  episodes_total: 4236
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.29000550508499146
          entropy_coeff: 0.0
          kl: 0.009304358623921871
          model: {}
          policy_loss: -0.018292805179953575
          total_loss: 374.8450927734375
          vf_explained_var: 0.9332983493804932
          vf_loss: 374.8539733886719
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3940551280975342
          entropy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,53,789.934,423894,402.33,447,329,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 863784
  custom_metrics: {}
  date: 2021-07-26_16-32-12
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 444.0
  episode_reward_mean: 406.59
  episode_reward_min: 355.0
  episodes_this_iter: 81
  episodes_total: 4317
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.2732475996017456
          entropy_coeff: 0.0
          kl: 0.0092241782695055
          model: {}
          policy_loss: -0.016067108139395714
          total_loss: 241.9945068359375
          vf_explained_var: 0.9417319297790527
          vf_loss: 242.00120544433594
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3954707682132721
          entropy_c

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,54,805.837,431892,406.59,444,355,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 879780
  custom_metrics: {}
  date: 2021-07-26_16-32-27
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 456.0
  episode_reward_mean: 403.96
  episode_reward_min: 327.0
  episodes_this_iter: 81
  episodes_total: 4398
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.2661144733428955
          entropy_coeff: 0.0
          kl: 0.010548556223511696
          model: {}
          policy_loss: -0.01840236410498619
          total_loss: 281.97808837890625
          vf_explained_var: 0.932467520236969
          vf_loss: 281.9858703613281
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3869972825050354
          entropy_c

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,55,821.373,439890,403.96,456,327,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 895776
  custom_metrics: {}
  date: 2021-07-26_16-32-42
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 456.0
  episode_reward_mean: 401.35
  episode_reward_min: 327.0
  episodes_this_iter: 78
  episodes_total: 4476
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.24233928322792053
          entropy_coeff: 0.0
          kl: 0.009087618440389633
          model: {}
          policy_loss: -0.016299139708280563
          total_loss: 284.0053405761719
          vf_explained_var: 0.9306807518005371
          vf_loss: 284.0124206542969
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3919508457183838
          entropy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,56,836.013,447888,401.35,456,327,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 911772
  custom_metrics: {}
  date: 2021-07-26_16-32-57
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 462.0
  episode_reward_mean: 400.64
  episode_reward_min: 331.0
  episodes_this_iter: 81
  episodes_total: 4557
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.2442186325788498
          entropy_coeff: 0.0
          kl: 0.010107491165399551
          model: {}
          policy_loss: -0.019572613760828972
          total_loss: 353.8544921875
          vf_explained_var: 0.9216757416725159
          vf_loss: 353.8638916015625
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.38319534063339233
          entropy_co

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,57,850.746,455886,400.64,462,331,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 927768
  custom_metrics: {}
  date: 2021-07-26_16-33-13
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 455.0
  episode_reward_mean: 399.87
  episode_reward_min: 349.0
  episodes_this_iter: 81
  episodes_total: 4638
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.2337770015001297
          entropy_coeff: 0.0
          kl: 0.010617941617965698
          model: {}
          policy_loss: -0.014266278594732285
          total_loss: 242.62081909179688
          vf_explained_var: 0.9518111944198608
          vf_loss: 242.6243438720703
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3906693756580353
          entropy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,58,866.592,463884,399.87,455,349,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 943764
  custom_metrics: {}
  date: 2021-07-26_16-33-28
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 481.0
  episode_reward_mean: 403.79
  episode_reward_min: 335.0
  episodes_this_iter: 78
  episodes_total: 4716
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.2568591833114624
          entropy_coeff: 0.0
          kl: 0.007509557064622641
          model: {}
          policy_loss: -0.014792942441999912
          total_loss: 322.1475830078125
          vf_explained_var: 0.9176406860351562
          vf_loss: 322.1546936035156
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.38141345977783203
          entropy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,59,882.039,471882,403.79,481,335,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 959760
  custom_metrics: {}
  date: 2021-07-26_16-33-48
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 476.0
  episode_reward_mean: 404.67
  episode_reward_min: 335.0
  episodes_this_iter: 81
  episodes_total: 4797
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.25212061405181885
          entropy_coeff: 0.0
          kl: 0.009571892209351063
          model: {}
          policy_loss: -0.015501260757446289
          total_loss: 284.307373046875
          vf_explained_var: 0.9222297668457031
          vf_loss: 284.3131408691406
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.37355515360832214
          entropy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,60,901.573,479880,404.67,476,335,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 975756
  custom_metrics: {}
  date: 2021-07-26_16-34-12
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 477.0
  episode_reward_mean: 411.35
  episode_reward_min: 355.0
  episodes_this_iter: 81
  episodes_total: 4878
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.24294498562812805
          entropy_coeff: 0.0
          kl: 0.008959508500993252
          model: {}
          policy_loss: -0.014439842663705349
          total_loss: 319.3625793457031
          vf_explained_var: 0.9256961941719055
          vf_loss: 319.3678894042969
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.39172205328941345
          entrop

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,61,925.517,487878,411.35,477,355,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 991752
  custom_metrics: {}
  date: 2021-07-26_16-34-30
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 473.0
  episode_reward_mean: 412.39
  episode_reward_min: 307.0
  episodes_this_iter: 78
  episodes_total: 4956
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.254840224981308
          entropy_coeff: 0.0
          kl: 0.009856765158474445
          model: {}
          policy_loss: -0.012761233374476433
          total_loss: 325.7589111328125
          vf_explained_var: 0.9239456057548523
          vf_loss: 325.7616882324219
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3834461569786072
          entropy_c

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,62,943.794,495876,412.39,473,307,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1007748
  custom_metrics: {}
  date: 2021-07-26_16-34-47
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 481.0
  episode_reward_mean: 412.94
  episode_reward_min: 307.0
  episodes_this_iter: 81
  episodes_total: 5037
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.256808340549469
          entropy_coeff: 0.0
          kl: 0.00848357006907463
          model: {}
          policy_loss: -0.013282516039907932
          total_loss: 334.9039611816406
          vf_explained_var: 0.9145994782447815
          vf_loss: 334.90863037109375
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3774399161338806
          entropy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,63,960.876,503874,412.94,481,307,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1023744
  custom_metrics: {}
  date: 2021-07-26_16-35-04
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 481.0
  episode_reward_mean: 408.25
  episode_reward_min: 358.0
  episodes_this_iter: 81
  episodes_total: 5118
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.21913447976112366
          entropy_coeff: 0.0
          kl: 0.00814506784081459
          model: {}
          policy_loss: -0.016855478286743164
          total_loss: 281.4005432128906
          vf_explained_var: 0.9407631158828735
          vf_loss: 281.40911865234375
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3942965269088745
          entrop

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,64,977.25,511872,408.25,481,358,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1039740
  custom_metrics: {}
  date: 2021-07-26_16-35-19
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 451.0
  episode_reward_mean: 407.35
  episode_reward_min: 344.0
  episodes_this_iter: 78
  episodes_total: 5196
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.22450049221515656
          entropy_coeff: 0.0
          kl: 0.007593050599098206
          model: {}
          policy_loss: -0.01342164445668459
          total_loss: 322.3479919433594
          vf_explained_var: 0.9346719980239868
          vf_loss: 322.3537902832031
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3883882761001587
          entropy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,65,993.084,519870,407.35,451,344,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1055736
  custom_metrics: {}
  date: 2021-07-26_16-35-35
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 456.0
  episode_reward_mean: 410.78
  episode_reward_min: 366.0
  episodes_this_iter: 81
  episodes_total: 5277
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.23773528635501862
          entropy_coeff: 0.0
          kl: 0.008679226972162724
          model: {}
          policy_loss: -0.01379489153623581
          total_loss: 239.9990234375
          vf_explained_var: 0.940788984298706
          vf_loss: 240.00401306152344
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.3772125840187073
          entropy_co

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,66,1008.33,527868,410.78,456,366,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1071732
  custom_metrics: {}
  date: 2021-07-26_16-35-52
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 459.0
  episode_reward_mean: 409.1
  episode_reward_min: 343.0
  episodes_this_iter: 81
  episodes_total: 5358
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.22605714201927185
          entropy_coeff: 0.0
          kl: 0.009058385156095028
          model: {}
          policy_loss: -0.010979539714753628
          total_loss: 272.61749267578125
          vf_explained_var: 0.9374305605888367
          vf_loss: 272.6192932128906
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.38988062739372253
          entro

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,67,1025.59,535866,409.1,459,343,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1087728
  custom_metrics: {}
  date: 2021-07-26_16-36-13
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 454.0
  episode_reward_mean: 403.63
  episode_reward_min: 343.0
  episodes_this_iter: 78
  episodes_total: 5436
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.21080978214740753
          entropy_coeff: 0.0
          kl: 0.008737009949982166
          model: {}
          policy_loss: -0.012997649610042572
          total_loss: 255.27430725097656
          vf_explained_var: 0.9406144022941589
          vf_loss: 255.2784423828125
      agent-1:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.38731491565704346
          entr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,68,1046.7,543864,403.63,454,343,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1103724
  custom_metrics: {}
  date: 2021-07-26_16-36-30
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 458.0
  episode_reward_mean: 404.19
  episode_reward_min: 348.0
  episodes_this_iter: 81
  episodes_total: 5517
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.2237769514322281
          entropy_coeff: 0.0
          kl: 0.008343319408595562
          model: {}
          policy_loss: -0.012482939288020134
          total_loss: 165.97698974609375
          vf_explained_var: 0.9636139869689941
          vf_loss: 165.98098754882812
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.37503042817115784
          entr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,69,1063.29,551862,404.19,458,348,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1119720
  custom_metrics: {}
  date: 2021-07-26_16-36-45
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 453.0
  episode_reward_mean: 403.53
  episode_reward_min: 353.0
  episodes_this_iter: 81
  episodes_total: 5598
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.22130627930164337
          entropy_coeff: 0.0
          kl: 0.008648939430713654
          model: {}
          policy_loss: -0.01712965965270996
          total_loss: 223.8227996826172
          vf_explained_var: 0.9505885243415833
          vf_loss: 223.83116149902344
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.3570132851600647
          entrop

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,70,1077.98,559860,403.53,453,353,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1135716
  custom_metrics: {}
  date: 2021-07-26_16-37-00
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 453.0
  episode_reward_mean: 397.21
  episode_reward_min: 353.0
  episodes_this_iter: 78
  episodes_total: 5676
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.2261674553155899
          entropy_coeff: 0.0
          kl: 0.008258604444563389
          model: {}
          policy_loss: -0.013557943515479565
          total_loss: 188.52090454101562
          vf_explained_var: 0.9470757246017456
          vf_loss: 188.52606201171875
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.36757364869117737
          entr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,71,1092.88,567858,397.21,453,353,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1151712
  custom_metrics: {}
  date: 2021-07-26_16-37-15
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 453.0
  episode_reward_mean: 393.87
  episode_reward_min: 348.0
  episodes_this_iter: 81
  episodes_total: 5757
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.21824930608272552
          entropy_coeff: 0.0
          kl: 0.0074771917425096035
          model: {}
          policy_loss: -0.013683434575796127
          total_loss: 137.37155151367188
          vf_explained_var: 0.9616237878799438
          vf_loss: 137.37765502929688
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.3603613078594208
          ent

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,72,1108.72,575856,393.87,453,348,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1167708
  custom_metrics: {}
  date: 2021-07-26_16-37-30
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 462.0
  episode_reward_mean: 395.47
  episode_reward_min: 357.0
  episodes_this_iter: 81
  episodes_total: 5838
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.21521374583244324
          entropy_coeff: 0.0
          kl: 0.008109299466013908
          model: {}
          policy_loss: -0.011786166578531265
          total_loss: 177.97830200195312
          vf_explained_var: 0.9496250748634338
          vf_loss: 177.98187255859375
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.3555956482887268
          entr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,73,1123.61,583854,395.47,462,357,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1183704
  custom_metrics: {}
  date: 2021-07-26_16-37-46
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 462.0
  episode_reward_mean: 393.38
  episode_reward_min: 356.0
  episodes_this_iter: 78
  episodes_total: 5916
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.2174830436706543
          entropy_coeff: 0.0
          kl: 0.007178018800914288
          model: {}
          policy_loss: -0.014257552102208138
          total_loss: 153.66136169433594
          vf_explained_var: 0.9524946808815002
          vf_loss: 153.6683349609375
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.34816214442253113
          entro

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,74,1139.02,591852,393.38,462,356,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1199700
  custom_metrics: {}
  date: 2021-07-26_16-38-01
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 438.0
  episode_reward_mean: 391.16
  episode_reward_min: 335.0
  episodes_this_iter: 81
  episodes_total: 5997
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.19314740598201752
          entropy_coeff: 0.0
          kl: 0.007340146228671074
          model: {}
          policy_loss: -0.008881542831659317
          total_loss: 162.77044677734375
          vf_explained_var: 0.9631467461585999
          vf_loss: 162.77191162109375
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.34314361214637756
          ent

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,75,1154.66,599850,391.16,438,335,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1215696
  custom_metrics: {}
  date: 2021-07-26_16-38-17
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 456.0
  episode_reward_mean: 389.69
  episode_reward_min: 332.0
  episodes_this_iter: 81
  episodes_total: 6078
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.18508246541023254
          entropy_coeff: 0.0
          kl: 0.007466468960046768
          model: {}
          policy_loss: -0.014211460016667843
          total_loss: 234.74261474609375
          vf_explained_var: 0.9341297149658203
          vf_loss: 234.74925231933594
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.3438134789466858
          entr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,76,1169.83,607848,389.69,456,332,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1231692
  custom_metrics: {}
  date: 2021-07-26_16-38-31
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 456.0
  episode_reward_mean: 388.06
  episode_reward_min: 347.0
  episodes_this_iter: 78
  episodes_total: 6156
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.18638938665390015
          entropy_coeff: 0.0
          kl: 0.007208842318505049
          model: {}
          policy_loss: -0.011877807788550854
          total_loss: 131.11129760742188
          vf_explained_var: 0.9580504894256592
          vf_loss: 131.1158905029297
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.32913312315940857
          entr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,77,1184.56,615846,388.06,456,347,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1247688
  custom_metrics: {}
  date: 2021-07-26_16-38-46
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 458.0
  episode_reward_mean: 385.54
  episode_reward_min: 333.0
  episodes_this_iter: 81
  episodes_total: 6237
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.19268780946731567
          entropy_coeff: 0.0
          kl: 0.006365378387272358
          model: {}
          policy_loss: -0.011625024490058422
          total_loss: 228.68162536621094
          vf_explained_var: 0.9424388408660889
          vf_loss: 228.68678283691406
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.30645880103111267
          ent

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,78,1199.11,623844,385.54,458,333,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1263684
  custom_metrics: {}
  date: 2021-07-26_16-39-00
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 441.0
  episode_reward_mean: 384.48
  episode_reward_min: 340.0
  episodes_this_iter: 81
  episodes_total: 6318
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.18874792754650116
          entropy_coeff: 0.0
          kl: 0.0072004045359790325
          model: {}
          policy_loss: -0.012590460479259491
          total_loss: 161.13934326171875
          vf_explained_var: 0.9491716623306274
          vf_loss: 161.1446533203125
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.33306923508644104
          ent

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,79,1212.91,631842,384.48,441,340,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1279680
  custom_metrics: {}
  date: 2021-07-26_16-39-14
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 438.0
  episode_reward_mean: 385.72
  episode_reward_min: 340.0
  episodes_this_iter: 78
  episodes_total: 6396
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.18967969715595245
          entropy_coeff: 0.0
          kl: 0.006678653880953789
          model: {}
          policy_loss: -0.008573032915592194
          total_loss: 105.11003875732422
          vf_explained_var: 0.9676216244697571
          vf_loss: 105.11185455322266
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.328370064496994
          entro

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,80,1227.44,639840,385.72,438,340,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1295676
  custom_metrics: {}
  date: 2021-07-26_16-39-29
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 427.0
  episode_reward_mean: 384.0
  episode_reward_min: 347.0
  episodes_this_iter: 81
  episodes_total: 6477
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.21083444356918335
          entropy_coeff: 0.0
          kl: 0.007866072468459606
          model: {}
          policy_loss: -0.014118032529950142
          total_loss: 199.45018005371094
          vf_explained_var: 0.9504311680793762
          vf_loss: 199.45632934570312
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.3014853298664093
          entro

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,81,1241.62,647838,384,427,347,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1311672
  custom_metrics: {}
  date: 2021-07-26_16-39-43
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 438.0
  episode_reward_mean: 385.19
  episode_reward_min: 348.0
  episodes_this_iter: 81
  episodes_total: 6558
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.20569045841693878
          entropy_coeff: 0.0
          kl: 0.005740252323448658
          model: {}
          policy_loss: -0.009977834299206734
          total_loss: 166.03468322753906
          vf_explained_var: 0.9478086233139038
          vf_loss: 166.03883361816406
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.31988704204559326
          ent

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,82,1255.67,655836,385.19,438,348,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1327668
  custom_metrics: {}
  date: 2021-07-26_16-39-56
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 435.0
  episode_reward_mean: 391.38
  episode_reward_min: 340.0
  episodes_this_iter: 78
  episodes_total: 6636
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.18983130156993866
          entropy_coeff: 0.0
          kl: 0.015707677230238914
          model: {}
          policy_loss: -0.0040581487119197845
          total_loss: 199.2979736328125
          vf_explained_var: 0.9425944089889526
          vf_loss: 199.28611755371094
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.34248262643814087
          ent

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,83,1269.26,663834,391.38,435,340,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1343664
  custom_metrics: {}
  date: 2021-07-26_16-40-11
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 453.0
  episode_reward_mean: 400.26
  episode_reward_min: 350.0
  episodes_this_iter: 81
  episodes_total: 6717
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.18342237174510956
          entropy_coeff: 0.0
          kl: 0.005932702217251062
          model: {}
          policy_loss: -0.011731968261301517
          total_loss: 359.8079528808594
          vf_explained_var: 0.9245317578315735
          vf_loss: 359.8136901855469
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.3376181721687317
          entrop

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,84,1283.63,671832,400.26,453,350,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1359660
  custom_metrics: {}
  date: 2021-07-26_16-40-24
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 452.0
  episode_reward_mean: 400.37
  episode_reward_min: 336.0
  episodes_this_iter: 81
  episodes_total: 6798
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.19256429374217987
          entropy_coeff: 0.0
          kl: 0.006227440666407347
          model: {}
          policy_loss: -0.01044674962759018
          total_loss: 310.50701904296875
          vf_explained_var: 0.9226920008659363
          vf_loss: 310.51116943359375
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.3428267240524292
          entro

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,85,1297.15,679830,400.37,452,336,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1375656
  custom_metrics: {}
  date: 2021-07-26_16-40-39
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 456.0
  episode_reward_mean: 404.39
  episode_reward_min: 343.0
  episodes_this_iter: 78
  episodes_total: 6876
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.1745738983154297
          entropy_coeff: 0.0
          kl: 0.006919703911989927
          model: {}
          policy_loss: -0.006926167756319046
          total_loss: 538.5416870117188
          vf_explained_var: 0.8760116100311279
          vf_loss: 538.5415649414062
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.34402745962142944
          entrop

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,86,1311.45,687828,404.39,456,343,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1391652
  custom_metrics: {}
  date: 2021-07-26_16-40-53
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 456.0
  episode_reward_mean: 412.04
  episode_reward_min: 362.0
  episodes_this_iter: 81
  episodes_total: 6957
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.17002327740192413
          entropy_coeff: 0.0
          kl: 0.007400567643344402
          model: {}
          policy_loss: -0.009287199005484581
          total_loss: 471.2423400878906
          vf_explained_var: 0.909981906414032
          vf_loss: 471.24407958984375
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.36152204871177673
          entro

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,87,1325.36,695826,412.04,456,362,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1407648
  custom_metrics: {}
  date: 2021-07-26_16-41-06
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 456.0
  episode_reward_mean: 414.57
  episode_reward_min: 353.0
  episodes_this_iter: 81
  episodes_total: 7038
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.16374143958091736
          entropy_coeff: 0.0
          kl: 0.005894342437386513
          model: {}
          policy_loss: -0.011010376736521721
          total_loss: 389.39202880859375
          vf_explained_var: 0.9089522957801819
          vf_loss: 389.3970947265625
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.36553773283958435
          entr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,88,1339,703824,414.57,456,353,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1423644
  custom_metrics: {}
  date: 2021-07-26_16-41-20
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 465.0
  episode_reward_mean: 414.7
  episode_reward_min: 353.0
  episodes_this_iter: 78
  episodes_total: 7116
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.1809963881969452
          entropy_coeff: 0.0
          kl: 0.007176894228905439
          model: {}
          policy_loss: -0.008660020306706429
          total_loss: 427.41583251953125
          vf_explained_var: 0.8932651281356812
          vf_loss: 427.417236328125
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.36056041717529297
          entropy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,89,1352.63,711822,414.7,465,353,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1439640
  custom_metrics: {}
  date: 2021-07-26_16-41-35
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 465.0
  episode_reward_mean: 409.84
  episode_reward_min: 358.0
  episodes_this_iter: 81
  episodes_total: 7197
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.18436583876609802
          entropy_coeff: 0.0
          kl: 0.007575497962534428
          model: {}
          policy_loss: -0.009899790398776531
          total_loss: 288.3080749511719
          vf_explained_var: 0.9315858483314514
          vf_loss: 288.310302734375
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.34809359908103943
          entrop

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,90,1367.17,719820,409.84,465,358,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1455636
  custom_metrics: {}
  date: 2021-07-26_16-41-49
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 448.0
  episode_reward_mean: 402.36
  episode_reward_min: 356.0
  episodes_this_iter: 81
  episodes_total: 7278
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.2055835872888565
          entropy_coeff: 0.0
          kl: 0.007257877849042416
          model: {}
          policy_loss: -0.008871902711689472
          total_loss: 214.63816833496094
          vf_explained_var: 0.9412497878074646
          vf_loss: 214.6396942138672
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.35778453946113586
          entro

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,91,1381.78,727818,402.36,448,356,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1471632
  custom_metrics: {}
  date: 2021-07-26_16-42-04
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 430.0
  episode_reward_mean: 396.75
  episode_reward_min: 351.0
  episodes_this_iter: 78
  episodes_total: 7356
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.1995619237422943
          entropy_coeff: 0.0
          kl: 0.0073877316899597645
          model: {}
          policy_loss: -0.008442332036793232
          total_loss: 144.68338012695312
          vf_explained_var: 0.9536446332931519
          vf_loss: 144.68434143066406
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.3557206988334656
          entr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,92,1396.19,735816,396.75,430,351,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1487628
  custom_metrics: {}
  date: 2021-07-26_16-42-18
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 430.0
  episode_reward_mean: 391.6
  episode_reward_min: 342.0
  episodes_this_iter: 81
  episodes_total: 7437
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.20529185235500336
          entropy_coeff: 0.0
          kl: 0.009585930965840816
          model: {}
          policy_loss: -0.010495685040950775
          total_loss: 118.1724624633789
          vf_explained_var: 0.9617153406143188
          vf_loss: 118.17328643798828
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.33744725584983826
          entro

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,93,1410.53,743814,391.6,430,342,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1503624
  custom_metrics: {}
  date: 2021-07-26_16-42-33
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 431.0
  episode_reward_mean: 388.56
  episode_reward_min: 348.0
  episodes_this_iter: 81
  episodes_total: 7518
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.2193996012210846
          entropy_coeff: 0.0
          kl: 0.006257872562855482
          model: {}
          policy_loss: -0.011065023951232433
          total_loss: 94.89165496826172
          vf_explained_var: 0.9652630090713501
          vf_loss: 94.89637756347656
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.33917683362960815
          entrop

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,94,1425.15,751812,388.56,431,348,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1519620
  custom_metrics: {}
  date: 2021-07-26_16-42-47
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 426.0
  episode_reward_mean: 388.11
  episode_reward_min: 355.0
  episodes_this_iter: 78
  episodes_total: 7596
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.2096399962902069
          entropy_coeff: 0.0
          kl: 0.005589097272604704
          model: {}
          policy_loss: -0.005787092726677656
          total_loss: 83.94373321533203
          vf_explained_var: 0.9679002165794373
          vf_loss: 83.9438705444336
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.32894742488861084
          entropy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,95,1439.27,759810,388.11,426,355,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1535616
  custom_metrics: {}
  date: 2021-07-26_16-43-00
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 436.0
  episode_reward_mean: 392.01
  episode_reward_min: 336.0
  episodes_this_iter: 81
  episodes_total: 7677
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.21648798882961273
          entropy_coeff: 0.0
          kl: 0.006992046255618334
          model: {}
          policy_loss: -0.006999028380960226
          total_loss: 70.91690063476562
          vf_explained_var: 0.9743664860725403
          vf_loss: 70.91681671142578
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.34174323081970215
          entro

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,96,1452.81,767808,392.01,436,336,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1551612
  custom_metrics: {}
  date: 2021-07-26_16-43-14
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 434.0
  episode_reward_mean: 394.93
  episode_reward_min: 359.0
  episodes_this_iter: 81
  episodes_total: 7758
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.2158336192369461
          entropy_coeff: 0.0
          kl: 0.005273258313536644
          model: {}
          policy_loss: -0.009327440522611141
          total_loss: 97.15096282958984
          vf_explained_var: 0.9663954377174377
          vf_loss: 97.15496063232422
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.3455866277217865
          entropy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,97,1466.78,775806,394.93,434,359,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1567608
  custom_metrics: {}
  date: 2021-07-26_16-43-29
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 438.0
  episode_reward_mean: 390.59
  episode_reward_min: 355.0
  episodes_this_iter: 78
  episodes_total: 7836
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.21546031534671783
          entropy_coeff: 0.0
          kl: 0.006571006961166859
          model: {}
          policy_loss: -0.009616047143936157
          total_loss: 80.19050598144531
          vf_explained_var: 0.9691066741943359
          vf_loss: 80.19348907470703
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.3351488709449768
          entrop

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,98,1481.02,783804,390.59,438,355,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1583604
  custom_metrics: {}
  date: 2021-07-26_16-43-42
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 437.0
  episode_reward_mean: 389.52
  episode_reward_min: 355.0
  episodes_this_iter: 81
  episodes_total: 7917
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.208974689245224
          entropy_coeff: 0.0
          kl: 0.005429182201623917
          model: {}
          policy_loss: -0.008406792767345905
          total_loss: 61.02883529663086
          vf_explained_var: 0.9736587405204773
          vf_loss: 61.03174591064453
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.3224791884422302
          entropy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,99,1494.79,791802,389.52,437,355,100


Result for PPO_TwoAgent_PD_8803d_00000:
  agent_timesteps_total: 1599600
  custom_metrics: {}
  date: 2021-07-26_16-43-57
  done: true
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 438.0
  episode_reward_mean: 392.04
  episode_reward_min: 363.0
  episodes_this_iter: 81
  episodes_total: 7998
  experiment_id: b1bbca03a67a4d8b933fd86322895b9b
  hostname: coolo-computer
  info:
    learner:
      agent-0:
        learner_stats:
          cur_kl_coeff: 1.0125000476837158
          cur_lr: 0.004999999888241291
          entropy: 0.2060941606760025
          entropy_coeff: 0.0
          kl: 0.006433700677007437
          model: {}
          policy_loss: -0.008029637858271599
          total_loss: 61.23645782470703
          vf_explained_var: 0.9744699001312256
          vf_loss: 61.23796463012695
      agent-1:
        learner_stats:
          cur_kl_coeff: 0.5062500238418579
          cur_lr: 0.004999999888241291
          entropy: 0.33216193318367004
          entropy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,RUNNING,192.168.1.21:42447,100,1509.03,799800,392.04,438,363,100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TwoAgent_PD_8803d_00000,TERMINATED,,100,1509.03,799800,392.04,438,363,100


2021-07-26 16:43:57,583	INFO tune.py:549 -- Total run time: 1528.08 seconds (1527.69 seconds for the tuning loop).


<ray.tune.analysis.experiment_analysis.ExperimentAnalysis at 0x7f3530481040>

Traceback (most recent call last):
  File "/home/peter/.local/lib/python3.8/site-packages/ray/autoscaler/_private/monitor.py", line 317, in run
    self._run()
  File "/home/peter/.local/lib/python3.8/site-packages/ray/autoscaler/_private/monitor.py", line 207, in _run
    self.update_load_metrics()
  File "/home/peter/.local/lib/python3.8/site-packages/ray/autoscaler/_private/monitor.py", line 169, in update_load_metrics
    response = self.gcs_node_resources_stub.GetAllResourceUsage(
  File "/home/peter/.local/lib/python3.8/site-packages/grpc/_channel.py", line 826, in __call__
    return _end_unary_response_blocking(state, call, False, None)
  File "/home/peter/.local/lib/python3.8/site-packages/grpc/_channel.py", line 729, in _end_unary_response_blocking
    raise _InactiveRpcError(state)
grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with:
	status = StatusCode.DEADLINE_EXCEEDED
	details = "Deadline Exceeded"
	debug_error_string = "{"created":"@162727730