In [1]:
import gym
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold
import os

In [2]:
environment_name = 'Breakout-v0'
log_path = os.path.join('/home/kchn/rlp/atari/multi/Training', 'Logs')
save_path = os.path.join('/home/kchn/rlp/atari/multi/Training', 'Saved Models')

In [3]:
stop_callback = StopTrainingOnRewardThreshold(reward_threshold=200, verbose=1)

In [None]:
for i in range(1, 11):
    print("Environment number", i)
    env = make_atari_env(environment_name, n_envs=i, seed=0)
    env = VecFrameStack(env, n_stack=i)
    eval_callback = EvalCallback(
        env,
        callback_on_new_best=stop_callback,
        eval_freq=10000,
        best_model_save_path=save_path,
        verbose=1
    )
    mdl = A2C('CnnPolicy', env, verbose=1, tensorboard_log=log_path)
    mdl.learn(total_timesteps=100000)
    print(evaluate_policy(mdl, env, n_eval_episodes=10, render=True))
    env.close()

Environment number 1


A.L.E: Arcade Learning Environment (version 0.7.4+069f8bd)
[Powered by Stella]


Using cuda device
Wrapping the env in a VecTransposeImage.


2022-10-25 18:11:24.257007: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


Logging to /home/kchn/rlp/atari/multi/Training/Logs/A2C_46
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 289      |
|    ep_rew_mean        | 1.88     |
| time/                 |          |
|    fps                | 132      |
|    iterations         | 100      |
|    time_elapsed       | 3        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -1.39    |
|    explained_variance | -0.0194  |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -0.00232 |
|    value_loss         | 7.84e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 301      |
|    ep_rew_mean        | 2        |
| time/                 |          |
|    fps                | 174      |
|    iterations         | 200      |
|    time_elapsed       | 5        |
|    total_times

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 275      |
|    ep_rew_mean        | 1.45     |
| time/                 |          |
|    fps                | 232      |
|    iterations         | 1400     |
|    time_elapsed       | 30       |
|    total_timesteps    | 7000     |
| train/                |          |
|    entropy_loss       | -1.39    |
|    explained_variance | -125     |
|    learning_rate      | 0.0007   |
|    n_updates          | 1399     |
|    policy_loss        | 0.0448   |
|    value_loss         | 0.00169  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 281      |
|    ep_rew_mean        | 1.54     |
| time/                 |          |
|    fps                | 234      |
|    iterations         | 1500     |
|    time_elapsed       | 32       |
|    total_timesteps    | 7500     |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 303      |
|    ep_rew_mean        | 2.06     |
| time/                 |          |
|    fps                | 245      |
|    iterations         | 2700     |
|    time_elapsed       | 54       |
|    total_timesteps    | 13500    |
| train/                |          |
|    entropy_loss       | -1.21    |
|    explained_variance | 0.0847   |
|    learning_rate      | 0.0007   |
|    n_updates          | 2699     |
|    policy_loss        | 0.00326  |
|    value_loss         | 0.000319 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 299       |
|    ep_rew_mean        | 1.99      |
| time/                 |           |
|    fps                | 246       |
|    iterations         | 2800      |
|    time_elapsed       | 56        |
|    total_timesteps    | 14000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 287      |
|    ep_rew_mean        | 1.76     |
| time/                 |          |
|    fps                | 250      |
|    iterations         | 4000     |
|    time_elapsed       | 80       |
|    total_timesteps    | 20000    |
| train/                |          |
|    entropy_loss       | -0.632   |
|    explained_variance | -18.3    |
|    learning_rate      | 0.0007   |
|    n_updates          | 3999     |
|    policy_loss        | -0.077   |
|    value_loss         | 0.013    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 294      |
|    ep_rew_mean        | 1.9      |
| time/                 |          |
|    fps                | 250      |
|    iterations         | 4100     |
|    time_elapsed       | 81       |
|    total_timesteps    | 20500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 315      |
|    ep_rew_mean        | 2.29     |
| time/                 |          |
|    fps                | 252      |
|    iterations         | 5300     |
|    time_elapsed       | 104      |
|    total_timesteps    | 26500    |
| train/                |          |
|    entropy_loss       | -0.197   |
|    explained_variance | -1.55    |
|    learning_rate      | 0.0007   |
|    n_updates          | 5299     |
|    policy_loss        | 0.0167   |
|    value_loss         | 0.000513 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 315      |
|    ep_rew_mean        | 2.27     |
| time/                 |          |
|    fps                | 252      |
|    iterations         | 5400     |
|    time_elapsed       | 106      |
|    total_timesteps    | 27000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 315      |
|    ep_rew_mean        | 2.15     |
| time/                 |          |
|    fps                | 254      |
|    iterations         | 6600     |
|    time_elapsed       | 129      |
|    total_timesteps    | 33000    |
| train/                |          |
|    entropy_loss       | -0.0953  |
|    explained_variance | -0.276   |
|    learning_rate      | 0.0007   |
|    n_updates          | 6599     |
|    policy_loss        | 0.000103 |
|    value_loss         | 0.000172 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 317      |
|    ep_rew_mean        | 2.18     |
| time/                 |          |
|    fps                | 254      |
|    iterations         | 6700     |
|    time_elapsed       | 131      |
|    total_timesteps    | 33500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 307      |
|    ep_rew_mean        | 2.01     |
| time/                 |          |
|    fps                | 256      |
|    iterations         | 7900     |
|    time_elapsed       | 154      |
|    total_timesteps    | 39500    |
| train/                |          |
|    entropy_loss       | -0.246   |
|    explained_variance | -275     |
|    learning_rate      | 0.0007   |
|    n_updates          | 7899     |
|    policy_loss        | -0.0175  |
|    value_loss         | 0.00153  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 315      |
|    ep_rew_mean        | 2.13     |
| time/                 |          |
|    fps                | 256      |
|    iterations         | 8000     |
|    time_elapsed       | 156      |
|    total_timesteps    | 40000    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 305       |
|    ep_rew_mean        | 1.92      |
| time/                 |           |
|    fps                | 255       |
|    iterations         | 9200      |
|    time_elapsed       | 180       |
|    total_timesteps    | 46000     |
| train/                |           |
|    entropy_loss       | -0.138    |
|    explained_variance | 0.531     |
|    learning_rate      | 0.0007    |
|    n_updates          | 9199      |
|    policy_loss        | -0.000643 |
|    value_loss         | 0.00063   |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 305      |
|    ep_rew_mean        | 1.9      |
| time/                 |          |
|    fps                | 255      |
|    iterations         | 9300     |
|    time_elapsed       | 181      |
|    total_timesteps    | 46500    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 303      |
|    ep_rew_mean        | 1.89     |
| time/                 |          |
|    fps                | 256      |
|    iterations         | 10500    |
|    time_elapsed       | 204      |
|    total_timesteps    | 52500    |
| train/                |          |
|    entropy_loss       | -0.168   |
|    explained_variance | -1.1e+04 |
|    learning_rate      | 0.0007   |
|    n_updates          | 10499    |
|    policy_loss        | 0.000147 |
|    value_loss         | 0.000106 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 303      |
|    ep_rew_mean        | 1.89     |
| time/                 |          |
|    fps                | 257      |
|    iterations         | 10600    |
|    time_elapsed       | 206      |
|    total_timesteps    | 53000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 298      |
|    ep_rew_mean        | 1.81     |
| time/                 |          |
|    fps                | 258      |
|    iterations         | 11800    |
|    time_elapsed       | 228      |
|    total_timesteps    | 59000    |
| train/                |          |
|    entropy_loss       | -0.0287  |
|    explained_variance | 0.994    |
|    learning_rate      | 0.0007   |
|    n_updates          | 11799    |
|    policy_loss        | 5.07e-05 |
|    value_loss         | 0.00257  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 299      |
|    ep_rew_mean        | 1.84     |
| time/                 |          |
|    fps                | 258      |
|    iterations         | 11900    |
|    time_elapsed       | 230      |
|    total_timesteps    | 59500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 311      |
|    ep_rew_mean        | 2.08     |
| time/                 |          |
|    fps                | 259      |
|    iterations         | 13100    |
|    time_elapsed       | 252      |
|    total_timesteps    | 65500    |
| train/                |          |
|    entropy_loss       | -1.05    |
|    explained_variance | 0.824    |
|    learning_rate      | 0.0007   |
|    n_updates          | 13099    |
|    policy_loss        | -0.0427  |
|    value_loss         | 0.0329   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 318      |
|    ep_rew_mean        | 2.21     |
| time/                 |          |
|    fps                | 259      |
|    iterations         | 13200    |
|    time_elapsed       | 254      |
|    total_timesteps    | 66000    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 306       |
|    ep_rew_mean        | 1.96      |
| time/                 |           |
|    fps                | 260       |
|    iterations         | 14400     |
|    time_elapsed       | 276       |
|    total_timesteps    | 72000     |
| train/                |           |
|    entropy_loss       | -0.166    |
|    explained_variance | -1.78e+04 |
|    learning_rate      | 0.0007    |
|    n_updates          | 14399     |
|    policy_loss        | -0.00253  |
|    value_loss         | 0.00823   |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 310      |
|    ep_rew_mean        | 2.02     |
| time/                 |          |
|    fps                | 260      |
|    iterations         | 14500    |
|    time_elapsed       | 278      |
|    total_timesteps    | 72500    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 309      |
|    ep_rew_mean        | 1.98     |
| time/                 |          |
|    fps                | 260      |
|    iterations         | 15700    |
|    time_elapsed       | 301      |
|    total_timesteps    | 78500    |
| train/                |          |
|    entropy_loss       | -0.104   |
|    explained_variance | -20.7    |
|    learning_rate      | 0.0007   |
|    n_updates          | 15699    |
|    policy_loss        | -0.00024 |
|    value_loss         | 0.000279 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 305      |
|    ep_rew_mean        | 1.89     |
| time/                 |          |
|    fps                | 260      |
|    iterations         | 15800    |
|    time_elapsed       | 302      |
|    total_timesteps    | 79000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 314      |
|    ep_rew_mean        | 2.11     |
| time/                 |          |
|    fps                | 261      |
|    iterations         | 17000    |
|    time_elapsed       | 325      |
|    total_timesteps    | 85000    |
| train/                |          |
|    entropy_loss       | -0.0162  |
|    explained_variance | 0.997    |
|    learning_rate      | 0.0007   |
|    n_updates          | 16999    |
|    policy_loss        | 4.43e-05 |
|    value_loss         | 0.000402 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 311      |
|    ep_rew_mean        | 2.05     |
| time/                 |          |
|    fps                | 261      |
|    iterations         | 17100    |
|    time_elapsed       | 326      |
|    total_timesteps    | 85500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 311      |
|    ep_rew_mean        | 2.04     |
| time/                 |          |
|    fps                | 262      |
|    iterations         | 18300    |
|    time_elapsed       | 349      |
|    total_timesteps    | 91500    |
| train/                |          |
|    entropy_loss       | -0.171   |
|    explained_variance | 0.787    |
|    learning_rate      | 0.0007   |
|    n_updates          | 18299    |
|    policy_loss        | -0.00445 |
|    value_loss         | 0.0586   |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 316       |
|    ep_rew_mean        | 2.13      |
| time/                 |           |
|    fps                | 262       |
|    iterations         | 18400     |
|    time_elapsed       | 350       |
|    total_timesteps    | 92000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 305      |
|    ep_rew_mean        | 1.91     |
| time/                 |          |
|    fps                | 262      |
|    iterations         | 19600    |
|    time_elapsed       | 373      |
|    total_timesteps    | 98000    |
| train/                |          |
|    entropy_loss       | -0.00743 |
|    explained_variance | -0.204   |
|    learning_rate      | 0.0007   |
|    n_updates          | 19599    |
|    policy_loss        | 1.74e-05 |
|    value_loss         | 0.000844 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 308      |
|    ep_rew_mean        | 1.96     |
| time/                 |          |
|    fps                | 262      |
|    iterations         | 19700    |
|    time_elapsed       | 375      |
|    total_timesteps    | 98500    |
| train/                |          |
|

  logger.warn(


(1.7, 1.1874342087037917)
Environment number 2
Using cuda device
Wrapping the env in a VecTransposeImage.
Logging to /home/kchn/rlp/atari/multi/Training/Logs/A2C_47
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 276      |
|    ep_rew_mean        | 1.32     |
| time/                 |          |
|    fps                | 323      |
|    iterations         | 100      |
|    time_elapsed       | 3        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -1.38    |
|    explained_variance | 0.0453   |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -0.0414  |
|    value_loss         | 0.00365  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 282      |
|    ep_rew_mean        | 1.49     |
| time/                 |          |
|    fps             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 299      |
|    ep_rew_mean        | 2.04     |
| time/                 |          |
|    fps                | 334      |
|    iterations         | 1400     |
|    time_elapsed       | 41       |
|    total_timesteps    | 14000    |
| train/                |          |
|    entropy_loss       | -1.03    |
|    explained_variance | 0.946    |
|    learning_rate      | 0.0007   |
|    n_updates          | 1399     |
|    policy_loss        | -0.041   |
|    value_loss         | 0.0305   |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 303       |
|    ep_rew_mean        | 2.08      |
| time/                 |           |
|    fps                | 334       |
|    iterations         | 1500      |
|    time_elapsed       | 44        |
|    total_timesteps    | 15000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 299      |
|    ep_rew_mean        | 1.98     |
| time/                 |          |
|    fps                | 335      |
|    iterations         | 2700     |
|    time_elapsed       | 80       |
|    total_timesteps    | 27000    |
| train/                |          |
|    entropy_loss       | -1.18    |
|    explained_variance | 0.964    |
|    learning_rate      | 0.0007   |
|    n_updates          | 2699     |
|    policy_loss        | 0.0319   |
|    value_loss         | 0.00917  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 307      |
|    ep_rew_mean        | 2.1      |
| time/                 |          |
|    fps                | 335      |
|    iterations         | 2800     |
|    time_elapsed       | 83       |
|    total_timesteps    | 28000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 309      |
|    ep_rew_mean        | 2.17     |
| time/                 |          |
|    fps                | 336      |
|    iterations         | 4000     |
|    time_elapsed       | 118      |
|    total_timesteps    | 40000    |
| train/                |          |
|    entropy_loss       | -0.896   |
|    explained_variance | 0.965    |
|    learning_rate      | 0.0007   |
|    n_updates          | 3999     |
|    policy_loss        | 0.0078   |
|    value_loss         | 0.0052   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 313      |
|    ep_rew_mean        | 2.24     |
| time/                 |          |
|    fps                | 336      |
|    iterations         | 4100     |
|    time_elapsed       | 121      |
|    total_timesteps    | 41000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 307      |
|    ep_rew_mean        | 2.19     |
| time/                 |          |
|    fps                | 336      |
|    iterations         | 5400     |
|    time_elapsed       | 160      |
|    total_timesteps    | 54000    |
| train/                |          |
|    entropy_loss       | -1.21    |
|    explained_variance | 0.378    |
|    learning_rate      | 0.0007   |
|    n_updates          | 5399     |
|    policy_loss        | 0.11     |
|    value_loss         | 0.0226   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 308      |
|    ep_rew_mean        | 2.22     |
| time/                 |          |
|    fps                | 336      |
|    iterations         | 5500     |
|    time_elapsed       | 163      |
|    total_timesteps    | 55000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 338      |
|    ep_rew_mean        | 3.04     |
| time/                 |          |
|    fps                | 338      |
|    iterations         | 6800     |
|    time_elapsed       | 201      |
|    total_timesteps    | 68000    |
| train/                |          |
|    entropy_loss       | -0.316   |
|    explained_variance | 0.892    |
|    learning_rate      | 0.0007   |
|    n_updates          | 6799     |
|    policy_loss        | 0.00856  |
|    value_loss         | 0.0478   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 342      |
|    ep_rew_mean        | 3.16     |
| time/                 |          |
|    fps                | 338      |
|    iterations         | 6900     |
|    time_elapsed       | 203      |
|    total_timesteps    | 69000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 466      |
|    ep_rew_mean        | 5.65     |
| time/                 |          |
|    fps                | 341      |
|    iterations         | 8100     |
|    time_elapsed       | 237      |
|    total_timesteps    | 81000    |
| train/                |          |
|    entropy_loss       | -0.349   |
|    explained_variance | 0.883    |
|    learning_rate      | 0.0007   |
|    n_updates          | 8099     |
|    policy_loss        | -0.384   |
|    value_loss         | 0.217    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 470      |
|    ep_rew_mean        | 5.69     |
| time/                 |          |
|    fps                | 341      |
|    iterations         | 8200     |
|    time_elapsed       | 240      |
|    total_timesteps    | 82000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 497      |
|    ep_rew_mean        | 6.15     |
| time/                 |          |
|    fps                | 343      |
|    iterations         | 9500     |
|    time_elapsed       | 276      |
|    total_timesteps    | 95000    |
| train/                |          |
|    entropy_loss       | -0.303   |
|    explained_variance | 0.302    |
|    learning_rate      | 0.0007   |
|    n_updates          | 9499     |
|    policy_loss        | -0.277   |
|    value_loss         | 0.454    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 492      |
|    ep_rew_mean        | 6.04     |
| time/                 |          |
|    fps                | 344      |
|    iterations         | 9600     |
|    time_elapsed       | 278      |
|    total_timesteps    | 96000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 334      |
|    ep_rew_mean        | 2.48     |
| time/                 |          |
|    fps                | 381      |
|    iterations         | 800      |
|    time_elapsed       | 31       |
|    total_timesteps    | 12000    |
| train/                |          |
|    entropy_loss       | -0.583   |
|    explained_variance | 0.388    |
|    learning_rate      | 0.0007   |
|    n_updates          | 799      |
|    policy_loss        | 0.15     |
|    value_loss         | 0.144    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 329      |
|    ep_rew_mean        | 2.45     |
| time/                 |          |
|    fps                | 381      |
|    iterations         | 900      |
|    time_elapsed       | 35       |
|    total_timesteps    | 13500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 413      |
|    ep_rew_mean        | 4.47     |
| time/                 |          |
|    fps                | 386      |
|    iterations         | 2200     |
|    time_elapsed       | 85       |
|    total_timesteps    | 33000    |
| train/                |          |
|    entropy_loss       | -0.362   |
|    explained_variance | 0.96     |
|    learning_rate      | 0.0007   |
|    n_updates          | 2199     |
|    policy_loss        | -0.0854  |
|    value_loss         | 0.0653   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 428      |
|    ep_rew_mean        | 4.77     |
| time/                 |          |
|    fps                | 387      |
|    iterations         | 2300     |
|    time_elapsed       | 88       |
|    total_timesteps    | 34500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 446      |
|    ep_rew_mean        | 5.04     |
| time/                 |          |
|    fps                | 392      |
|    iterations         | 3500     |
|    time_elapsed       | 133      |
|    total_timesteps    | 52500    |
| train/                |          |
|    entropy_loss       | -0.209   |
|    explained_variance | 0.964    |
|    learning_rate      | 0.0007   |
|    n_updates          | 3499     |
|    policy_loss        | -0.0062  |
|    value_loss         | 0.0226   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 453      |
|    ep_rew_mean        | 5.18     |
| time/                 |          |
|    fps                | 393      |
|    iterations         | 3600     |
|    time_elapsed       | 137      |
|    total_timesteps    | 54000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 451      |
|    ep_rew_mean        | 5.29     |
| time/                 |          |
|    fps                | 395      |
|    iterations         | 4800     |
|    time_elapsed       | 182      |
|    total_timesteps    | 72000    |
| train/                |          |
|    entropy_loss       | -0.516   |
|    explained_variance | 0.975    |
|    learning_rate      | 0.0007   |
|    n_updates          | 4799     |
|    policy_loss        | 0.0438   |
|    value_loss         | 0.0478   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 457      |
|    ep_rew_mean        | 5.38     |
| time/                 |          |
|    fps                | 395      |
|    iterations         | 4900     |
|    time_elapsed       | 185      |
|    total_timesteps    | 73500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 482      |
|    ep_rew_mean        | 5.9      |
| time/                 |          |
|    fps                | 397      |
|    iterations         | 6200     |
|    time_elapsed       | 234      |
|    total_timesteps    | 93000    |
| train/                |          |
|    entropy_loss       | -0.675   |
|    explained_variance | 0.948    |
|    learning_rate      | 0.0007   |
|    n_updates          | 6199     |
|    policy_loss        | -0.0637  |
|    value_loss         | 0.0182   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 485      |
|    ep_rew_mean        | 5.92     |
| time/                 |          |
|    fps                | 397      |
|    iterations         | 6300     |
|    time_elapsed       | 237      |
|    total_timesteps    | 94500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 310      |
|    ep_rew_mean        | 2.16     |
| time/                 |          |
|    fps                | 185      |
|    iterations         | 900      |
|    time_elapsed       | 97       |
|    total_timesteps    | 18000    |
| train/                |          |
|    entropy_loss       | -1.17    |
|    explained_variance | 0.974    |
|    learning_rate      | 0.0007   |
|    n_updates          | 899      |
|    policy_loss        | -0.0284  |
|    value_loss         | 0.00388  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 311      |
|    ep_rew_mean        | 2.17     |
| time/                 |          |
|    fps                | 185      |
|    iterations         | 1000     |
|    time_elapsed       | 107      |
|    total_timesteps    | 20000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 418      |
|    ep_rew_mean        | 4.47     |
| time/                 |          |
|    fps                | 187      |
|    iterations         | 2300     |
|    time_elapsed       | 245      |
|    total_timesteps    | 46000    |
| train/                |          |
|    entropy_loss       | -0.624   |
|    explained_variance | 0.982    |
|    learning_rate      | 0.0007   |
|    n_updates          | 2299     |
|    policy_loss        | -0.018   |
|    value_loss         | 0.012    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 410      |
|    ep_rew_mean        | 4.32     |
| time/                 |          |
|    fps                | 187      |
|    iterations         | 2400     |
|    time_elapsed       | 256      |
|    total_timesteps    | 48000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 485      |
|    ep_rew_mean        | 6.01     |
| time/                 |          |
|    fps                | 188      |
|    iterations         | 3700     |
|    time_elapsed       | 392      |
|    total_timesteps    | 74000    |
| train/                |          |
|    entropy_loss       | -0.855   |
|    explained_variance | 0.835    |
|    learning_rate      | 0.0007   |
|    n_updates          | 3699     |
|    policy_loss        | -0.11    |
|    value_loss         | 0.113    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 478      |
|    ep_rew_mean        | 5.88     |
| time/                 |          |
|    fps                | 188      |
|    iterations         | 3800     |
|    time_elapsed       | 403      |
|    total_timesteps    | 76000    |
| train/                |          |
|

(7.7, 1.004987562112089)
Environment number 5
Using cuda device
Wrapping the env in a VecTransposeImage.
Logging to /home/kchn/rlp/atari/multi/Training/Logs/A2C_50
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 287      |
|    ep_rew_mean        | 1.6      |
| time/                 |          |
|    fps                | 192      |
|    iterations         | 100      |
|    time_elapsed       | 12       |
|    total_timesteps    | 2500     |
| train/                |          |
|    entropy_loss       | -1.38    |
|    explained_variance | 0.266    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 0.00802  |
|    value_loss         | 0.0143   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 284      |
|    ep_rew_mean        | 1.6      |
| time/                 |          |
|    fps              

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 402      |
|    ep_rew_mean        | 4.2      |
| time/                 |          |
|    fps                | 195      |
|    iterations         | 1400     |
|    time_elapsed       | 179      |
|    total_timesteps    | 35000    |
| train/                |          |
|    entropy_loss       | -0.858   |
|    explained_variance | 0.921    |
|    learning_rate      | 0.0007   |
|    n_updates          | 1399     |
|    policy_loss        | 0.16     |
|    value_loss         | 0.105    |
------------------------------------
