In [44]:
from ray.rllib.agents.ppo import PPOTrainer
from ray.rllib.agents.ppo.ppo_tf_policy import PPOTFPolicy

In [45]:
from ray.rllib.agents.ppo.ppo_torch_policy import PPOTorchPolicy
from ray.rllib.agents.a3c.a3c_torch_policy import A3CTorchPolicy
from ray.rllib.agents.a3c.a2c import A2CTrainer

In [46]:
import gym
from typing import Optional, Dict

import ray
from ray.rllib.agents.ppo.ppo_torch_policy import ValueNetworkMixin
from ray.rllib.evaluation.episode import MultiAgentEpisode
from ray.rllib.evaluation.postprocessing import compute_gae_for_sample_batch, \
    Postprocessing
from ray.rllib.models.action_dist import ActionDistribution
from ray.rllib.models.modelv2 import ModelV2
from ray.rllib.policy.policy import Policy
from ray.rllib.policy.policy_template import build_policy_class
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.utils.annotations import Deprecated
from ray.rllib.utils.framework import try_import_torch
from ray.rllib.utils.torch_ops import apply_grad_clipping, sequence_mask
from ray.rllib.utils.typing import TrainerConfigDict, TensorType, \
    PolicyID, LocalOptimizer

torch, nn = try_import_torch()

In [47]:
def after_init(policy: Policy, obs_space: gym.spaces.Space, 
              action_space: gym.spaces.Space, config: TrainerConfigDict)->None:
        policy.past_len = 5        
        policy.past_models = deque(maxlen =policy.past_len)
        policy.timestep = 0
    

In [48]:
def compute_div_loss(policy: Policy, model: ModelV2,
                      dist_class: ActionDistribution,
                      train_batch: SampleBatch):
    logits, _ = model.from_batch(train_batch)
    values = model.value_function()
    valid_mask = torch.ones_like(values, dtype=torch.bool)
    dist = dist_class(logits, model)
    log_probs = dist.logp(train_batch[SampleBatch.ACTIONS]).reshape(-1)
    
    divs = []
    for idx, past_model in enumerate(policy.past_models):
        logits, _ = past_model.from_batch(train_batch)
        values = past_model.value_function()
        valid_mask = torch.ones_like(values, dtype=torch.bool)
        dist = dist_class(logits, past_model)
        past_log_probs = dist.logp(train_batch[SampleBatch.ACTIONS]).reshape(-1) 
        div = div_metric(log_probs, past_log_probs).sum(1)
        div = div.mean(0)
        divs.append(div)
    
    divs_sort_idx = np.argsort([d.data[0] for d in divs])
    div_loss_orig = 0
    for idx in divs_sort_idx:
        div_loss += divs[idx]
        div_loss_orig += divs[idx]
    
    div_loss = div_loss / self.past_len
    
    return div_loss

In [49]:
def actor_critic_loss(policy: Policy, model: ModelV2,
                      dist_class: ActionDistribution,
                      train_batch: SampleBatch) -> TensorType:
    logits, _ = model.from_batch(train_batch)
    values = model.value_function()
    policy.timestep += 1
    
    if policy.timestep % 100 == 0:
        policy.past_models.append(copy.deepcopy(model))
    
    if policy.is_recurrent():
        B = len(train_batch[SampleBatch.SEQ_LENS])
        max_seq_len = logits.shape[0] // B
        mask_orig = sequence_mask(train_batch[SampleBatch.SEQ_LENS],
                                  max_seq_len)
        valid_mask = torch.reshape(mask_orig, [-1])
    else:
        valid_mask = torch.ones_like(values, dtype=torch.bool)

    dist = dist_class(logits, model)
    log_probs = dist.logp(train_batch[SampleBatch.ACTIONS]).reshape(-1)
    pi_err = -torch.sum(
        torch.masked_select(log_probs * train_batch[Postprocessing.ADVANTAGES],
                            valid_mask))

    # Compute a value function loss.
    if policy.config["use_critic"]:
        value_err = 0.5 * torch.sum(
            torch.pow(
                torch.masked_select(
                    values.reshape(-1) -
                    train_batch[Postprocessing.VALUE_TARGETS], valid_mask),
                2.0))
    # Ignore the value function.
    else:
        value_err = 0.0

    entropy = torch.sum(torch.masked_select(dist.entropy(), valid_mask))


    total_loss = (pi_err + value_err * policy.config["vf_loss_coeff"] -
                  entropy * policy.config["entropy_coeff"] - compute_div_loss(policy, model, dist_class, train_batch))

    policy.entropy = entropy
    policy.pi_err = pi_err
    policy.value_err = value_err

    return total_loss

In [50]:
CustomPolicy = A3CTorchPolicy.with_updates(
    name="MyCustomA3CTorchPolicy",
    loss_fn=actor_critic_loss,
    after_init=after_init)
CustomTrainer = A2CTrainer.with_updates(
    default_policy=CustomPolicy)

In [51]:
tune.run(CustomTrainer, config={"env": 'Frostbite-v0', "num_gpus":1})

Trial name,status,loc
A2C_Frostbite-v0_74b39_00000,PENDING,


[2m[36m(pid=2409)[0m 2021-12-19 15:15:51,512	INFO trainer.py:741 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=2409)[0m 2021-12-19 15:15:51,512	INFO trainer.py:758 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc
A2C_Frostbite-v0_74b39_00000,RUNNING,


Trial name,status,loc
A2C_Frostbite-v0_74b39_00000,RUNNING,




Trial name,status,loc
A2C_Frostbite-v0_74b39_00000,RUNNING,


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 200
  custom_metrics: {}
  date: 2021-12-19_15-16-05
  done: false
  episode_len_mean: .nan
  episode_media: {}
  episode_reward_max: .nan
  episode_reward_mean: .nan
  episode_reward_min: .nan
  episodes_this_iter: 0
  episodes_total: 0
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 39.999996185302734
          model: {}
          policy_entropy: 106.24060821533203
          policy_loss: 2.371837854385376
          var_gnorm: 10.8916597366333
          vf_explained_var: -0.013216972351074219
          vf_loss: 0.17337457835674286
        train: null
    num_agent_steps_sampled: 200
    num_agent_steps_trained: 200
    num_steps_sampled: 200
    num_steps_trained: 200
  iterations_since_restore: 1
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_estimat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,1,6.3408,200,,,,


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,1,6.3408,200,,,,


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 7600
  custom_metrics: {}
  date: 2021-12-19_15-16-15
  done: false
  episode_len_mean: 418.55555555555554
  episode_media: {}
  episode_reward_max: 80.0
  episode_reward_mean: 77.77777777777777
  episode_reward_min: 60.0
  episodes_this_iter: 18
  episodes_total: 18
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 5.307250943232094e-26
          policy_loss: 0.0
          var_gnorm: 10.904439926147461
          vf_explained_var: 0.8947932720184326
          vf_loss: 9.513306617736816
        train: null
    num_agent_steps_sampled: 7600
    num_agent_steps_trained: 7600
    num_steps_sampled: 7600
    num_steps_trained: 7600
  iterations_since_restore: 2
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,2,16.3474,7600,77.7778,80,60,418.556


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,2,16.3474,7600,77.7778,80,60,418.556


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-12-19_15-16-26
  done: false
  episode_len_mean: 413.9189189189189
  episode_media: {}
  episode_reward_max: 80.0
  episode_reward_mean: 78.37837837837837
  episode_reward_min: 60.0
  episodes_this_iter: 19
  episodes_total: 37
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 1.3600801257798723e-13
          policy_loss: 0.0
          var_gnorm: 10.909357070922852
          vf_explained_var: 0.950473964214325
          vf_loss: 2.428934097290039
        train: null
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 15000
  iterations_since_restore: 3
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_poli

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,3,26.5513,15000,78.3784,80,60,413.919


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,3,26.5513,15000,78.3784,80,60,413.919


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 22400
  custom_metrics: {}
  date: 2021-12-19_15-16-36
  done: false
  episode_len_mean: 411.9107142857143
  episode_media: {}
  episode_reward_max: 80.0
  episode_reward_mean: 78.21428571428571
  episode_reward_min: 60.0
  episodes_this_iter: 19
  episodes_total: 56
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 39.999996185302734
          model: {}
          policy_entropy: 4.1365723291164613e-07
          policy_loss: 0.0
          var_gnorm: 10.916982650756836
          vf_explained_var: 0.9393056631088257
          vf_loss: 3.932894229888916
        train: null
    num_agent_steps_sampled: 22400
    num_agent_steps_trained: 22400
    num_steps_sampled: 22400
    num_steps_trained: 22400
  iterations_since_restore: 4
  node_ip: 132.236.59.202
  num_healthy_worker

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,4,36.5849,22400,78.2143,80,60,411.911


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,4,36.5849,22400,78.2143,80,60,411.911


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 29600
  custom_metrics: {}
  date: 2021-12-19_15-16-46
  done: false
  episode_len_mean: 411.7105263157895
  episode_media: {}
  episode_reward_max: 80.0
  episode_reward_mean: 78.15789473684211
  episode_reward_min: 60.0
  episodes_this_iter: 20
  episodes_total: 76
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 3.039107809854613e-07
          policy_loss: 0.0
          var_gnorm: 10.928171157836914
          vf_explained_var: 0.9414781332015991
          vf_loss: 3.740051507949829
        train: null
    num_agent_steps_sampled: 29600
    num_agent_steps_trained: 29600
    num_steps_sampled: 29600
    num_steps_trained: 29600
  iterations_since_restore: 5
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_poli

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,5,46.767,29600,78.1579,80,60,411.711


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,5,46.767,29600,78.1579,80,60,411.711


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-12-19_15-16-56
  done: false
  episode_len_mean: 411.5851063829787
  episode_media: {}
  episode_reward_max: 80.0
  episode_reward_mean: 78.29787234042553
  episode_reward_min: 60.0
  episodes_this_iter: 18
  episodes_total: 94
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 39.999996185302734
          model: {}
          policy_entropy: 0.0002073871874017641
          policy_loss: -3.2757014878370683e-07
          var_gnorm: 10.94045639038086
          vf_explained_var: 0.9621171951293945
          vf_loss: 2.0227980613708496
        train: null
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37000
  iterations_since_restore: 6
  node_ip: 132.236.59.202
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,6,56.8614,37000,78.2979,80,60,411.585


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,6,56.8614,37000,78.2979,80,60,411.585


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 44400
  custom_metrics: {}
  date: 2021-12-19_15-17-06
  done: false
  episode_len_mean: 410.92
  episode_media: {}
  episode_reward_max: 80.0
  episode_reward_mean: 78.4
  episode_reward_min: 70.0
  episodes_this_iter: 19
  episodes_total: 113
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 43.50033950805664
          policy_loss: -2.8569767475128174
          var_gnorm: 10.95610523223877
          vf_explained_var: 0.7752901315689087
          vf_loss: 3.6883392333984375
        train: null
    num_agent_steps_sampled: 44400
    num_agent_steps_trained: 44400
    num_steps_sampled: 44400
    num_steps_trained: 44400
  iterations_since_restore: 7
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_estimato

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,7,67.0873,44400,78.4,80,70,410.92


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,7,67.0873,44400,78.4,80,70,410.92


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 52200
  custom_metrics: {}
  date: 2021-12-19_15-17-16
  done: false
  episode_len_mean: 437.27
  episode_media: {}
  episode_reward_max: 80.0
  episode_reward_mean: 77.3
  episode_reward_min: 20.0
  episodes_this_iter: 2
  episodes_total: 115
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 39.999996185302734
          model: {}
          policy_entropy: 282.56927490234375
          policy_loss: 18.794845581054688
          var_gnorm: 10.996954917907715
          vf_explained_var: -0.007247328758239746
          vf_loss: 6.31093168258667
        train: null
    num_agent_steps_sampled: 52200
    num_agent_steps_trained: 52200
    num_steps_sampled: 52200
    num_steps_trained: 52200
  iterations_since_restore: 8
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,8,77.2934,52200,77.3,80,20,437.27


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,8,77.2934,52200,77.3,80,20,437.27


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 59800
  custom_metrics: {}
  date: 2021-12-19_15-17-27
  done: false
  episode_len_mean: 450.48
  episode_media: {}
  episode_reward_max: 170.0
  episode_reward_mean: 75.3
  episode_reward_min: 0.0
  episodes_this_iter: 7
  episodes_total: 122
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.000003814697266
          model: {}
          policy_entropy: 438.7979736328125
          policy_loss: -8.923070907592773
          var_gnorm: 11.020637512207031
          vf_explained_var: 0.0009496808052062988
          vf_loss: 0.43639248609542847
        train: null
    num_agent_steps_sampled: 59800
    num_agent_steps_trained: 59800
    num_steps_sampled: 59800
    num_steps_trained: 59800
  iterations_since_restore: 9
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  of

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,9,87.329,59800,75.3,170,0,450.48


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,9,87.329,59800,75.3,170,0,450.48


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 67400
  custom_metrics: {}
  date: 2021-12-19_15-17-37
  done: false
  episode_len_mean: 557.85
  episode_media: {}
  episode_reward_max: 170.0
  episode_reward_mean: 74.2
  episode_reward_min: 0.0
  episodes_this_iter: 12
  episodes_total: 134
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 450.1761779785156
          policy_loss: 36.37498092651367
          var_gnorm: 11.03261661529541
          vf_explained_var: 0.23192012310028076
          vf_loss: 14.260164260864258
        train: null
    num_agent_steps_sampled: 67400
    num_agent_steps_trained: 67400
    num_steps_sampled: 67400
    num_steps_trained: 67400
  iterations_since_restore: 10
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_estimato

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,10,97.46,67400,74.2,170,0,557.85


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,10,97.46,67400,74.2,170,0,557.85


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-12-19_15-17-47
  done: false
  episode_len_mean: 584.35
  episode_media: {}
  episode_reward_max: 170.0
  episode_reward_mean: 73.6
  episode_reward_min: 0.0
  episodes_this_iter: 13
  episodes_total: 147
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 507.8266906738281
          policy_loss: 30.685760498046875
          var_gnorm: 11.037708282470703
          vf_explained_var: -0.09832370281219482
          vf_loss: 14.20486068725586
        train: null
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000
  iterations_since_restore: 11
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_estima

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,11,107.563,75000,73.6,170,0,584.35


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,11,107.563,75000,73.6,170,0,584.35


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 82600
  custom_metrics: {}
  date: 2021-12-19_15-17-57
  done: false
  episode_len_mean: 613.37
  episode_media: {}
  episode_reward_max: 170.0
  episode_reward_mean: 72.9
  episode_reward_min: 0.0
  episodes_this_iter: 12
  episodes_total: 159
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 39.999996185302734
          model: {}
          policy_entropy: 509.9335021972656
          policy_loss: 28.67831802368164
          var_gnorm: 11.042494773864746
          vf_explained_var: 0.1536293625831604
          vf_loss: 9.398697853088379
        train: null
    num_agent_steps_sampled: 82600
    num_agent_steps_trained: 82600
    num_steps_sampled: 82600
    num_steps_trained: 82600
  iterations_since_restore: 12
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_po

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,12,117.708,82600,72.9,170,0,613.37


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,12,117.708,82600,72.9,170,0,613.37


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 90200
  custom_metrics: {}
  date: 2021-12-19_15-18-07
  done: false
  episode_len_mean: 639.88
  episode_media: {}
  episode_reward_max: 170.0
  episode_reward_mean: 76.3
  episode_reward_min: 0.0
  episodes_this_iter: 14
  episodes_total: 173
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 431.7368469238281
          policy_loss: 17.481897354125977
          var_gnorm: 11.046037673950195
          vf_explained_var: 0.37686777114868164
          vf_loss: 27.72637176513672
        train: null
    num_agent_steps_sampled: 90200
    num_agent_steps_trained: 90200
    num_steps_sampled: 90200
    num_steps_trained: 90200
  iterations_since_restore: 13
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_estimat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,13,127.935,90200,76.3,170,0,639.88


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,13,127.935,90200,76.3,170,0,639.88


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 97800
  custom_metrics: {}
  date: 2021-12-19_15-18-17
  done: false
  episode_len_mean: 672.21
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 86.3
  episode_reward_min: 0.0
  episodes_this_iter: 10
  episodes_total: 183
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 145.26670837402344
          policy_loss: 5.198121547698975
          var_gnorm: 11.050848960876465
          vf_explained_var: 0.2155742645263672
          vf_loss: 44.34013748168945
        train: null
    num_agent_steps_sampled: 97800
    num_agent_steps_trained: 97800
    num_steps_sampled: 97800
    num_steps_trained: 97800
  iterations_since_restore: 14
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_estimato

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,14,138.121,97800,86.3,240,0,672.21


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,14,138.121,97800,86.3,240,0,672.21


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 105400
  custom_metrics: {}
  date: 2021-12-19_15-18-27
  done: false
  episode_len_mean: 704.01
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 93.2
  episode_reward_min: 0.0
  episodes_this_iter: 12
  episodes_total: 195
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 202.98056030273438
          policy_loss: -16.993711471557617
          var_gnorm: 11.056493759155273
          vf_explained_var: 0.3892133831977844
          vf_loss: 21.497333526611328
        train: null
    num_agent_steps_sampled: 105400
    num_agent_steps_trained: 105400
    num_steps_sampled: 105400
    num_steps_trained: 105400
  iterations_since_restore: 15
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,15,148.208,105400,93.2,240,0,704.01


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,15,148.208,105400,93.2,240,0,704.01


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-12-19_15-18-38
  done: false
  episode_len_mean: 732.59
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 100.3
  episode_reward_min: 0.0
  episodes_this_iter: 13
  episodes_total: 208
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 394.5021667480469
          policy_loss: -103.4769058227539
          var_gnorm: 11.062204360961914
          vf_explained_var: 0.5443865060806274
          vf_loss: 23.719879150390625
        train: null
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_trained: 113000
  iterations_since_restore: 16
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,16,158.374,113000,100.3,240,0,732.59


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,16,158.374,113000,100.3,240,0,732.59


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 120600
  custom_metrics: {}
  date: 2021-12-19_15-18-48
  done: false
  episode_len_mean: 715.75
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 106.9
  episode_reward_min: 30.0
  episodes_this_iter: 13
  episodes_total: 221
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 549.5715942382812
          policy_loss: -15.48093318939209
          var_gnorm: 11.065177917480469
          vf_explained_var: -0.1354433298110962
          vf_loss: 2.573031425476074
        train: null
    num_agent_steps_sampled: 120600
    num_agent_steps_trained: 120600
    num_steps_sampled: 120600
    num_steps_trained: 120600
  iterations_since_restore: 17
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,17,168.479,120600,106.9,240,30,715.75


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,17,168.479,120600,106.9,240,30,715.75


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 128200
  custom_metrics: {}
  date: 2021-12-19_15-18-58
  done: false
  episode_len_mean: 631.51
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 112.3
  episode_reward_min: 40.0
  episodes_this_iter: 14
  episodes_total: 235
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 39.999996185302734
          model: {}
          policy_entropy: 519.0140991210938
          policy_loss: 22.917573928833008
          var_gnorm: 11.068448066711426
          vf_explained_var: 0.7847782373428345
          vf_loss: 7.860245704650879
        train: null
    num_agent_steps_sampled: 128200
    num_agent_steps_trained: 128200
    num_steps_sampled: 128200
    num_steps_trained: 128200
  iterations_since_restore: 18
  node_ip: 132.236.59.202
  num_healthy_workers: 2


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,18,178.698,128200,112.3,240,40,631.51


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,18,178.698,128200,112.3,240,40,631.51


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 135600
  custom_metrics: {}
  date: 2021-12-19_15-19-08
  done: false
  episode_len_mean: 631.11
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 119.5
  episode_reward_min: 40.0
  episodes_this_iter: 12
  episodes_total: 247
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 492.48040771484375
          policy_loss: -59.234649658203125
          var_gnorm: 11.071076393127441
          vf_explained_var: 0.7009562253952026
          vf_loss: 12.972334861755371
        train: null
    num_agent_steps_sampled: 135600
    num_agent_steps_trained: 135600
    num_steps_sampled: 135600
    num_steps_trained: 135600
  iterations_since_restore: 19
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_polic

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,19,188.786,135600,119.5,240,40,631.11


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,19,188.786,135600,119.5,240,40,631.11


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 143200
  custom_metrics: {}
  date: 2021-12-19_15-19-18
  done: false
  episode_len_mean: 632.57
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 119.7
  episode_reward_min: 30.0
  episodes_this_iter: 13
  episodes_total: 260
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 516.337890625
          policy_loss: 2.011120080947876
          var_gnorm: 11.074106216430664
          vf_explained_var: 0.7570834755897522
          vf_loss: 3.0446605682373047
        train: null
    num_agent_steps_sampled: 143200
    num_agent_steps_trained: 143200
    num_steps_sampled: 143200
    num_steps_trained: 143200
  iterations_since_restore: 20
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_estim

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,20,198.913,143200,119.7,240,30,632.57


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,20,198.913,143200,119.7,240,30,632.57


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 150800
  custom_metrics: {}
  date: 2021-12-19_15-19-28
  done: false
  episode_len_mean: 628.64
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 117.2
  episode_reward_min: 30.0
  episodes_this_iter: 14
  episodes_total: 274
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.000003814697266
          model: {}
          policy_entropy: 404.826416015625
          policy_loss: -65.74339294433594
          var_gnorm: 11.076275825500488
          vf_explained_var: 0.5050117373466492
          vf_loss: 17.42196273803711
        train: null
    num_agent_steps_sampled: 150800
    num_agent_steps_trained: 150800
    num_steps_sampled: 150800
    num_steps_trained: 150800
  iterations_since_restore: 21
  node_ip: 132.236.59.202
  num_healthy_workers: 2
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,21,209.094,150800,117.2,240,30,628.64


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,21,209.094,150800,117.2,240,30,628.64


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 158400
  custom_metrics: {}
  date: 2021-12-19_15-19-39
  done: false
  episode_len_mean: 606.87
  episode_media: {}
  episode_reward_max: 230.0
  episode_reward_mean: 107.9
  episode_reward_min: 30.0
  episodes_this_iter: 13
  episodes_total: 287
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 514.0472412109375
          policy_loss: -34.10818862915039
          var_gnorm: 11.079584121704102
          vf_explained_var: 0.7645888328552246
          vf_loss: 9.23136043548584
        train: null
    num_agent_steps_sampled: 158400
    num_agent_steps_trained: 158400
    num_steps_sampled: 158400
    num_steps_trained: 158400
  iterations_since_restore: 22
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_es

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,22,219.32,158400,107.9,230,30,606.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,22,219.32,158400,107.9,230,30,606.87


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 166000
  custom_metrics: {}
  date: 2021-12-19_15-19-49
  done: false
  episode_len_mean: 608.42
  episode_media: {}
  episode_reward_max: 230.0
  episode_reward_mean: 106.4
  episode_reward_min: 30.0
  episodes_this_iter: 13
  episodes_total: 300
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 39.999996185302734
          model: {}
          policy_entropy: 537.7885131835938
          policy_loss: -22.66832160949707
          var_gnorm: 11.08056640625
          vf_explained_var: 0.8862341642379761
          vf_loss: 2.5594327449798584
        train: null
    num_agent_steps_sampled: 166000
    num_agent_steps_trained: 166000
    num_steps_sampled: 166000
    num_steps_trained: 166000
  iterations_since_restore: 23
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  o

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,23,229.441,166000,106.4,230,30,608.42


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,23,229.441,166000,106.4,230,30,608.42


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 173600
  custom_metrics: {}
  date: 2021-12-19_15-19-59
  done: false
  episode_len_mean: 600.17
  episode_media: {}
  episode_reward_max: 210.0
  episode_reward_mean: 102.1
  episode_reward_min: 30.0
  episodes_this_iter: 13
  episodes_total: 313
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.000003814697266
          model: {}
          policy_entropy: 551.7520751953125
          policy_loss: -36.61104965209961
          var_gnorm: 11.081327438354492
          vf_explained_var: 0.9494858980178833
          vf_loss: 1.2197775840759277
        train: null
    num_agent_steps_sampled: 173600
    num_agent_steps_trained: 173600
    num_steps_sampled: 173600
    num_steps_trained: 173600
  iterations_since_restore: 24
  node_ip: 132.236.59.202
  num_healthy_workers: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,24,239.56,173600,102.1,210,30,600.17


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,24,239.56,173600,102.1,210,30,600.17


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 181000
  custom_metrics: {}
  date: 2021-12-19_15-20-09
  done: false
  episode_len_mean: 593.01
  episode_media: {}
  episode_reward_max: 210.0
  episode_reward_mean: 103.1
  episode_reward_min: 30.0
  episodes_this_iter: 16
  episodes_total: 329
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 542.6973266601562
          policy_loss: -56.92386245727539
          var_gnorm: 11.082530975341797
          vf_explained_var: 0.7603296041488647
          vf_loss: 5.06432580947876
        train: null
    num_agent_steps_sampled: 181000
    num_agent_steps_trained: 181000
    num_steps_sampled: 181000
    num_steps_trained: 181000
  iterations_since_restore: 25
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_es

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,25,249.578,181000,103.1,210,30,593.01


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,25,249.578,181000,103.1,210,30,593.01


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 188600
  custom_metrics: {}
  date: 2021-12-19_15-20-19
  done: false
  episode_len_mean: 588.41
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 104.0
  episode_reward_min: 30.0
  episodes_this_iter: 13
  episodes_total: 342
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 434.38092041015625
          policy_loss: 144.51710510253906
          var_gnorm: 11.085206985473633
          vf_explained_var: -0.4223034381866455
          vf_loss: 44.05194091796875
        train: null
    num_agent_steps_sampled: 188600
    num_agent_steps_trained: 188600
    num_steps_sampled: 188600
    num_steps_trained: 188600
  iterations_since_restore: 26
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,26,259.726,188600,104,240,30,588.41


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,26,259.726,188600,104,240,30,588.41


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 196000
  custom_metrics: {}
  date: 2021-12-19_15-20-29
  done: false
  episode_len_mean: 572.03
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 108.1
  episode_reward_min: 30.0
  episodes_this_iter: 15
  episodes_total: 357
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.000003814697266
          model: {}
          policy_entropy: 447.25860595703125
          policy_loss: -89.64437866210938
          var_gnorm: 11.08967113494873
          vf_explained_var: 0.49882423877716064
          vf_loss: 18.910785675048828
        train: null
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_steps_sampled: 196000
    num_steps_trained: 196000
  iterations_since_restore: 27
  node_ip: 132.236.59.202
  num_healthy_workers: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,27,269.802,196000,108.1,240,30,572.03


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,27,269.802,196000,108.1,240,30,572.03


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 203400
  custom_metrics: {}
  date: 2021-12-19_15-20-39
  done: false
  episode_len_mean: 563.04
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 109.9
  episode_reward_min: 40.0
  episodes_this_iter: 14
  episodes_total: 371
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 499.52520751953125
          policy_loss: 69.91419982910156
          var_gnorm: 11.094910621643066
          vf_explained_var: 0.4872499704360962
          vf_loss: 19.248014450073242
        train: null
    num_agent_steps_sampled: 203400
    num_agent_steps_trained: 203400
    num_steps_sampled: 203400
    num_steps_trained: 203400
  iterations_since_restore: 28
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,28,279.858,203400,109.9,240,40,563.04


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,28,279.858,203400,109.9,240,40,563.04


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 210800
  custom_metrics: {}
  date: 2021-12-19_15-20-49
  done: false
  episode_len_mean: 560.34
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 112.4
  episode_reward_min: 40.0
  episodes_this_iter: 15
  episodes_total: 386
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 540.05712890625
          policy_loss: -11.148786544799805
          var_gnorm: 11.097941398620605
          vf_explained_var: 0.6604673862457275
          vf_loss: 6.429433822631836
        train: null
    num_agent_steps_sampled: 210800
    num_agent_steps_trained: 210800
    num_steps_sampled: 210800
    num_steps_trained: 210800
  iterations_since_restore: 29
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_es

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,29,289.887,210800,112.4,240,40,560.34


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,29,289.887,210800,112.4,240,40,560.34


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 218400
  custom_metrics: {}
  date: 2021-12-19_15-21-00
  done: false
  episode_len_mean: 547.31
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 113.7
  episode_reward_min: 40.0
  episodes_this_iter: 14
  episodes_total: 400
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 513.818603515625
          policy_loss: 39.20566177368164
          var_gnorm: 11.100629806518555
          vf_explained_var: 0.5460447669029236
          vf_loss: 27.370296478271484
        train: null
    num_agent_steps_sampled: 218400
    num_agent_steps_trained: 218400
    num_steps_sampled: 218400
    num_steps_trained: 218400
  iterations_since_restore: 30
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_es

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,30,300.008,218400,113.7,240,40,547.31


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,30,300.008,218400,113.7,240,40,547.31


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 225600
  custom_metrics: {}
  date: 2021-12-19_15-21-10
  done: false
  episode_len_mean: 544.55
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 117.7
  episode_reward_min: 30.0
  episodes_this_iter: 13
  episodes_total: 413
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 537.6365966796875
          policy_loss: 61.858333587646484
          var_gnorm: 11.102445602416992
          vf_explained_var: 0.6710696220397949
          vf_loss: 15.87720012664795
        train: null
    num_agent_steps_sampled: 225600
    num_agent_steps_trained: 225600
    num_steps_sampled: 225600
    num_steps_trained: 225600
  iterations_since_restore: 31
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,31,310.082,225600,117.7,240,30,544.55


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,31,310.082,225600,117.7,240,30,544.55


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 233200
  custom_metrics: {}
  date: 2021-12-19_15-21-20
  done: false
  episode_len_mean: 557.9
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 123.1
  episode_reward_min: 30.0
  episodes_this_iter: 14
  episodes_total: 427
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 505.712890625
          policy_loss: -108.04108428955078
          var_gnorm: 11.104292869567871
          vf_explained_var: 0.675460934638977
          vf_loss: 20.880462646484375
        train: null
    num_agent_steps_sampled: 233200
    num_agent_steps_trained: 233200
    num_steps_sampled: 233200
    num_steps_trained: 233200
  iterations_since_restore: 32
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_estim

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,32,320.256,233200,123.1,240,30,557.9


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,32,320.256,233200,123.1,240,30,557.9


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 240800
  custom_metrics: {}
  date: 2021-12-19_15-21-30
  done: false
  episode_len_mean: 561.14
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 127.0
  episode_reward_min: 30.0
  episodes_this_iter: 12
  episodes_total: 439
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 309.5829772949219
          policy_loss: -105.70075988769531
          var_gnorm: 11.109374046325684
          vf_explained_var: -0.39829015731811523
          vf_loss: 114.09542846679688
        train: null
    num_agent_steps_sampled: 240800
    num_agent_steps_trained: 240800
    num_steps_sampled: 240800
    num_steps_trained: 240800
  iterations_since_restore: 33
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_poli

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,33,330.439,240800,127,240,30,561.14


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,33,330.439,240800,127,240,30,561.14


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 248400
  custom_metrics: {}
  date: 2021-12-19_15-21-40
  done: false
  episode_len_mean: 576.39
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 123.9
  episode_reward_min: 10.0
  episodes_this_iter: 12
  episodes_total: 451
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 287.04730224609375
          policy_loss: 21.154056549072266
          var_gnorm: 11.126173973083496
          vf_explained_var: 0.06736582517623901
          vf_loss: 22.249004364013672
        train: null
    num_agent_steps_sampled: 248400
    num_agent_steps_trained: 248400
    num_steps_sampled: 248400
    num_steps_trained: 248400
  iterations_since_restore: 34
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_polic

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,34,340.592,248400,123.9,240,10,576.39


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,34,340.592,248400,123.9,240,10,576.39


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 255800
  custom_metrics: {}
  date: 2021-12-19_15-21-50
  done: false
  episode_len_mean: 571.2
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 121.7
  episode_reward_min: 0.0
  episodes_this_iter: 17
  episodes_total: 468
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 292.44952392578125
          policy_loss: 9.99434757232666
          var_gnorm: 11.1323823928833
          vf_explained_var: 0.35780125856399536
          vf_loss: 9.002650260925293
        train: null
    num_agent_steps_sampled: 255800
    num_agent_steps_trained: 255800
    num_steps_sampled: 255800
    num_steps_trained: 255800
  iterations_since_restore: 35
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_estim

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,35,350.736,255800,121.7,240,0,571.2


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,35,350.736,255800,121.7,240,0,571.2


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 263200
  custom_metrics: {}
  date: 2021-12-19_15-22-01
  done: false
  episode_len_mean: 557.83
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 118.6
  episode_reward_min: 0.0
  episodes_this_iter: 16
  episodes_total: 484
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 173.16921997070312
          policy_loss: 14.2823486328125
          var_gnorm: 11.135560989379883
          vf_explained_var: 0.4234966039657593
          vf_loss: 19.30255126953125
        train: null
    num_agent_steps_sampled: 263200
    num_agent_steps_trained: 263200
    num_steps_sampled: 263200
    num_steps_trained: 263200
  iterations_since_restore: 36
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_est

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,36,360.909,263200,118.6,240,0,557.83


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,36,360.909,263200,118.6,240,0,557.83


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 270800
  custom_metrics: {}
  date: 2021-12-19_15-22-11
  done: false
  episode_len_mean: 540.46
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 105.2
  episode_reward_min: 0.0
  episodes_this_iter: 18
  episodes_total: 502
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 374.7737731933594
          policy_loss: 27.108243942260742
          var_gnorm: 11.139181137084961
          vf_explained_var: 0.06033599376678467
          vf_loss: 12.407506942749023
        train: null
    num_agent_steps_sampled: 270800
    num_agent_steps_trained: 270800
    num_steps_sampled: 270800
    num_steps_trained: 270800
  iterations_since_restore: 37
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,37,371.128,270800,105.2,240,0,540.46


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,37,371.128,270800,105.2,240,0,540.46


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 278400
  custom_metrics: {}
  date: 2021-12-19_15-22-21
  done: false
  episode_len_mean: 524.14
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 102.2
  episode_reward_min: 0.0
  episodes_this_iter: 17
  episodes_total: 519
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 226.59466552734375
          policy_loss: -29.63072967529297
          var_gnorm: 11.145679473876953
          vf_explained_var: 0.6840994954109192
          vf_loss: 19.497278213500977
        train: null
    num_agent_steps_sampled: 278400
    num_agent_steps_trained: 278400
    num_steps_sampled: 278400
    num_steps_trained: 278400
  iterations_since_restore: 38
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,38,381.288,278400,102.2,240,0,524.14


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,38,381.288,278400,102.2,240,0,524.14


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 286000
  custom_metrics: {}
  date: 2021-12-19_15-22-31
  done: false
  episode_len_mean: 504.4
  episode_media: {}
  episode_reward_max: 230.0
  episode_reward_mean: 98.4
  episode_reward_min: 0.0
  episodes_this_iter: 17
  episodes_total: 536
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 164.25161743164062
          policy_loss: -14.645939826965332
          var_gnorm: 11.149625778198242
          vf_explained_var: 0.7942544221878052
          vf_loss: 9.977569580078125
        train: null
    num_agent_steps_sampled: 286000
    num_agent_steps_trained: 286000
    num_steps_sampled: 286000
    num_steps_trained: 286000
  iterations_since_restore: 39
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_es

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,39,391.501,286000,98.4,230,0,504.4


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,39,391.501,286000,98.4,230,0,504.4


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 293600
  custom_metrics: {}
  date: 2021-12-19_15-22-41
  done: false
  episode_len_mean: 476.61
  episode_media: {}
  episode_reward_max: 200.0
  episode_reward_mean: 97.8
  episode_reward_min: 0.0
  episodes_this_iter: 16
  episodes_total: 552
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 327.2786865234375
          policy_loss: 21.951210021972656
          var_gnorm: 11.151516914367676
          vf_explained_var: 0.4034011960029602
          vf_loss: 34.75956344604492
        train: null
    num_agent_steps_sampled: 293600
    num_agent_steps_trained: 293600
    num_steps_sampled: 293600
    num_steps_trained: 293600
  iterations_since_restore: 40
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_est

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,40,401.742,293600,97.8,200,0,476.61


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,40,401.742,293600,97.8,200,0,476.61


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 301200
  custom_metrics: {}
  date: 2021-12-19_15-22-52
  done: false
  episode_len_mean: 484.37
  episode_media: {}
  episode_reward_max: 200.0
  episode_reward_mean: 99.0
  episode_reward_min: 0.0
  episodes_this_iter: 15
  episodes_total: 567
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 506.02105712890625
          policy_loss: 49.04024124145508
          var_gnorm: 11.154868125915527
          vf_explained_var: 0.675247073173523
          vf_loss: 10.657033920288086
        train: null
    num_agent_steps_sampled: 301200
    num_agent_steps_trained: 301200
    num_steps_sampled: 301200
    num_steps_trained: 301200
  iterations_since_restore: 41
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_est

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,41,411.89,301200,99,200,0,484.37


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,41,411.89,301200,99,200,0,484.37


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 308600
  custom_metrics: {}
  date: 2021-12-19_15-23-02
  done: false
  episode_len_mean: 491.07
  episode_media: {}
  episode_reward_max: 200.0
  episode_reward_mean: 93.5
  episode_reward_min: 0.0
  episodes_this_iter: 14
  episodes_total: 581
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.000003814697266
          model: {}
          policy_entropy: 552.8355712890625
          policy_loss: 13.21408462524414
          var_gnorm: 11.157027244567871
          vf_explained_var: 0.5711762309074402
          vf_loss: 5.713490009307861
        train: null
    num_agent_steps_sampled: 308600
    num_agent_steps_trained: 308600
    num_steps_sampled: 308600
    num_steps_trained: 308600
  iterations_since_restore: 42
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  o

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,42,421.96,308600,93.5,200,0,491.07


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,42,421.96,308600,93.5,200,0,491.07


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 316000
  custom_metrics: {}
  date: 2021-12-19_15-23-12
  done: false
  episode_len_mean: 508.54
  episode_media: {}
  episode_reward_max: 200.0
  episode_reward_mean: 94.9
  episode_reward_min: 0.0
  episodes_this_iter: 14
  episodes_total: 595
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 534.496337890625
          policy_loss: 26.46966552734375
          var_gnorm: 11.158880233764648
          vf_explained_var: 0.7369799613952637
          vf_loss: 11.352850914001465
        train: null
    num_agent_steps_sampled: 316000
    num_agent_steps_trained: 316000
    num_steps_sampled: 316000
    num_steps_trained: 316000
  iterations_since_restore: 43
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_esti

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,43,432.155,316000,94.9,200,0,508.54


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,43,432.155,316000,94.9,200,0,508.54


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 323400
  custom_metrics: {}
  date: 2021-12-19_15-23-22
  done: false
  episode_len_mean: 526.41
  episode_media: {}
  episode_reward_max: 220.0
  episode_reward_mean: 107.0
  episode_reward_min: 10.0
  episodes_this_iter: 12
  episodes_total: 607
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.000003814697266
          model: {}
          policy_entropy: 501.57867431640625
          policy_loss: -29.51979637145996
          var_gnorm: 11.161757469177246
          vf_explained_var: 0.8843411207199097
          vf_loss: 11.710097312927246
        train: null
    num_agent_steps_sampled: 323400
    num_agent_steps_trained: 323400
    num_steps_sampled: 323400
    num_steps_trained: 323400
  iterations_since_restore: 44
  node_ip: 132.236.59.202
  num_healthy_workers: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,44,442.323,323400,107,220,10,526.41


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,44,442.323,323400,107,220,10,526.41


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 330800
  custom_metrics: {}
  date: 2021-12-19_15-23-32
  done: false
  episode_len_mean: 536.51
  episode_media: {}
  episode_reward_max: 220.0
  episode_reward_mean: 101.7
  episode_reward_min: 10.0
  episodes_this_iter: 15
  episodes_total: 622
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 39.999996185302734
          model: {}
          policy_entropy: 412.57574462890625
          policy_loss: 30.389415740966797
          var_gnorm: 11.16733455657959
          vf_explained_var: 0.05295085906982422
          vf_loss: 10.34518814086914
        train: null
    num_agent_steps_sampled: 330800
    num_agent_steps_trained: 330800
    num_steps_sampled: 330800
    num_steps_trained: 330800
  iterations_since_restore: 45
  node_ip: 132.236.59.202
  num_healthy_workers: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,45,452.343,330800,101.7,220,10,536.51


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,45,452.343,330800,101.7,220,10,536.51


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 338200
  custom_metrics: {}
  date: 2021-12-19_15-23-42
  done: false
  episode_len_mean: 547.11
  episode_media: {}
  episode_reward_max: 220.0
  episode_reward_mean: 99.1
  episode_reward_min: 10.0
  episodes_this_iter: 13
  episodes_total: 635
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 39.999996185302734
          model: {}
          policy_entropy: 523.5242919921875
          policy_loss: -4.71112585067749
          var_gnorm: 11.171544075012207
          vf_explained_var: 0.8098094463348389
          vf_loss: 13.747886657714844
        train: null
    num_agent_steps_sampled: 338200
    num_agent_steps_trained: 338200
    num_steps_sampled: 338200
    num_steps_trained: 338200
  iterations_since_restore: 46
  node_ip: 132.236.59.202
  num_healthy_workers: 2
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,46,462.541,338200,99.1,220,10,547.11


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,46,462.541,338200,99.1,220,10,547.11


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 345600
  custom_metrics: {}
  date: 2021-12-19_15-23-52
  done: false
  episode_len_mean: 558.89
  episode_media: {}
  episode_reward_max: 220.0
  episode_reward_mean: 99.2
  episode_reward_min: 10.0
  episodes_this_iter: 14
  episodes_total: 649
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 470.9671936035156
          policy_loss: 19.539247512817383
          var_gnorm: 11.173660278320312
          vf_explained_var: 0.33565711975097656
          vf_loss: 21.07461929321289
        train: null
    num_agent_steps_sampled: 345600
    num_agent_steps_trained: 345600
    num_steps_sampled: 345600
    num_steps_trained: 345600
  iterations_since_restore: 47
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,47,472.651,345600,99.2,220,10,558.89


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,47,472.651,345600,99.2,220,10,558.89


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 353200
  custom_metrics: {}
  date: 2021-12-19_15-24-03
  done: false
  episode_len_mean: 567.06
  episode_media: {}
  episode_reward_max: 220.0
  episode_reward_mean: 105.2
  episode_reward_min: 10.0
  episodes_this_iter: 13
  episodes_total: 662
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 485.4456787109375
          policy_loss: -42.206886291503906
          var_gnorm: 11.17627239227295
          vf_explained_var: 0.25229978561401367
          vf_loss: 44.40681076049805
        train: null
    num_agent_steps_sampled: 353200
    num_agent_steps_trained: 353200
    num_steps_sampled: 353200
    num_steps_trained: 353200
  iterations_since_restore: 48
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,48,482.845,353200,105.2,220,10,567.06


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,48,482.845,353200,105.2,220,10,567.06


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 360800
  custom_metrics: {}
  date: 2021-12-19_15-24-13
  done: false
  episode_len_mean: 578.46
  episode_media: {}
  episode_reward_max: 240.0
  episode_reward_mean: 121.0
  episode_reward_min: 10.0
  episodes_this_iter: 14
  episodes_total: 676
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 40.0
          model: {}
          policy_entropy: 333.4662780761719
          policy_loss: -48.16633224487305
          var_gnorm: 11.181046485900879
          vf_explained_var: 0.8563069701194763
          vf_loss: 15.25061321258545
        train: null
    num_agent_steps_sampled: 360800
    num_agent_steps_trained: 360800
    num_steps_sampled: 360800
    num_steps_trained: 360800
  iterations_since_restore: 49
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  off_policy_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,49,492.909,360800,121,240,10,578.46


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,49,492.909,360800,121,240,10,578.46


Result for A2C_Frostbite-v0_74b39_00000:
  agent_timesteps_total: 368400
  custom_metrics: {}
  date: 2021-12-19_15-24-23
  done: false
  episode_len_mean: 580.52
  episode_media: {}
  episode_reward_max: 260.0
  episode_reward_mean: 134.6
  episode_reward_min: 10.0
  episodes_this_iter: 13
  episodes_total: 689
  experiment_id: 2db2de95f09d4a8582be21836ec1643a
  hostname: ml2558-G11CD
  info:
    learner:
      default_policy:
        learner_stats:
          cur_lr: 9.999999747378752e-05
          grad_gnorm: 39.999996185302734
          model: {}
          policy_entropy: 474.266357421875
          policy_loss: 148.001708984375
          var_gnorm: 11.187053680419922
          vf_explained_var: 0.5603840351104736
          vf_loss: 39.65641403198242
        train: null
    num_agent_steps_sampled: 368400
    num_agent_steps_trained: 368400
    num_steps_sampled: 368400
    num_steps_trained: 368400
  iterations_since_restore: 50
  node_ip: 132.236.59.202
  num_healthy_workers: 2
  o

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,50,503.154,368400,134.6,260,10,580.52




Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_Frostbite-v0_74b39_00000,RUNNING,132.236.59.202:2409,50,503.154,368400,134.6,260,10,580.52


[2m[36m(pid=2444)[0m 2021-12-19 15:24:31,519	ERROR worker.py:428 -- SystemExit was raised from the worker
[2m[36m(pid=2444)[0m Traceback (most recent call last):
[2m[36m(pid=2444)[0m   File "python/ray/_raylet.pyx", line 684, in ray._raylet.task_execution_handler
[2m[36m(pid=2444)[0m   File "python/ray/_raylet.pyx", line 524, in ray._raylet.execute_task
[2m[36m(pid=2444)[0m   File "python/ray/_raylet.pyx", line 561, in ray._raylet.execute_task
[2m[36m(pid=2444)[0m   File "python/ray/_raylet.pyx", line 568, in ray._raylet.execute_task
[2m[36m(pid=2444)[0m   File "python/ray/_raylet.pyx", line 572, in ray._raylet.execute_task
[2m[36m(pid=2444)[0m   File "python/ray/_raylet.pyx", line 522, in ray._raylet.execute_task.function_executor
[2m[36m(pid=2444)[0m   File "/home/ml2558/miniconda3/envs/tf-gpu/lib/python3.9/site-packages/ray/_private/function_manager.py", line 579, in actor_method_executor
[2m[36m(pid=2444)[0m     return method(__ray_actor, *args, **kwar

2021-12-19 15:24:31,720	ERROR tune.py:613 -- Trials did not complete: [A2C_Frostbite-v0_74b39_00000]
2021-12-19 15:24:31,721	INFO tune.py:617 -- Total run time: 523.03 seconds (522.78 seconds for the tuning loop).


<ray.tune.analysis.experiment_analysis.ExperimentAnalysis at 0x7f53fda2d8b0>