In [1]:
# Filter tensorflow version warnings
import os
# https://stackoverflow.com/questions/40426502/is-there-a-way-to-suppress-the-messages-tensorflow-prints/40426709
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # or any {'0', '1', '2'}
import warnings
# https://stackoverflow.com/questions/15777951/how-to-suppress-pandas-future-warning
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)
import tensorflow as tf
tf.get_logger().setLevel('INFO')
tf.autograph.set_verbosity(0)
import logging
tf.get_logger().setLevel(logging.ERROR)

In [2]:
import gym
from stable_baselines.common.policies import CnnPolicy #, MlpPolicy, CnnLstmPolicy
from stable_baselines.common.vec_env import DummyVecEnv 
from stable_baselines import PPO2

from stable_baselines.common.evaluation import evaluate_policy as test
from stable_baselines.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold


In [3]:
## Choose one agent, see Docu for description
agent='CarRacing-v0'
agent='CarRacing-v1'
agent='CarRacing-v2'

# Stop training when the model reaches the reward threshold
callback_on_best = StopTrainingOnRewardThreshold(reward_threshold = 150, verbose=1)

In [4]:
## This environment param  
## Changing these makes models incompatible!!
game_color = 1
indicators = True
fpst = 4
skip = 3
actions = [[0, 0, 0], [-0.4, 0, 0], [0.4, 0, 0], [0, 0.6, 0], [0, 0, 0.8]]

In [5]:
## This model param
use = 100       # number of times to use same track [1,100]
ept = 15       # different starting points on same track [1,20]
track_complexity = 12
patience = 2.0
seed = 1000

#using follow_centerline for this first leg of training
REWARD = [-0.0, 0.1, 0.0, 0.0, 1.0, 100, -20, -100, -50]

if agent=='CarRacing-v2': 
    env1 = gym.make(agent, #seed=seed, 
        use_track = use,       
        episodes_per_track = ept,  
        tr_complexity = track_complexity, 
        patience = patience,
        game_color=game_color,
        indicators = indicators,
        discre = actions,     #passing custom actions
        frames_per_state = fpst,
        skip_frames = skip,
        f_reward = REWARD )   #passing a custom reward function
else: 
    env1 = gym.make(agent)

env1 = DummyVecEnv([lambda: env1])
env1.metadata

{'render.modes': ['human', 'rgb_array', 'state_pixels'],
 'FPS, 1/timebase': 33.333333333333336,
 'Zoom_level': 1.7,
 'Flight start': False,
 'show_track_1st': False,
 'state_pixels frame size': [96, 96]}

In [6]:
print(env1.action_space)
print(env1.observation_space)

Discrete(5)
Box(0, 255, (96, 96, 4), uint8)


In [7]:
## This training param
batch_size = 256
updates = 1000
epochs = 4

model = PPO2(CnnPolicy, env1, verbose=1, n_steps=batch_size, # seed=314, n_cpu_tf_sess=1,
             gamma=0.99, learning_rate=0.00025, nminibatches=epochs, ent_coef=0.01, vf_coef=0.5) 


In [8]:
## Separate evaluation env
test_freq = 50      #policy updates until evaluation
test_episodes_per_track = 10   #number of starting points on test_track
eval_log = './evals/'

env_test = gym.make(agent, seed=int(3.14*seed), 
        use_track = 1,        #change test track after 1 ept round
        episodes_per_track = test_episodes_per_track,  
        tr_complexity = 12,   # test on a medium complexity track
        patience = patience,  #1.0,
        game_color=game_color,
        indicators = indicators,
        discre = actions,
        frames_per_state = fpst,
        skip_frames = skip   )

env_test = DummyVecEnv([lambda: env_test])

eval_callback = EvalCallback(env_test, callback_on_new_best=callback_on_best,  #None,
                             n_eval_episodes = test_episodes_per_track*2, eval_freq = test_freq*batch_size,
                             best_model_save_path=eval_log, log_path=eval_log, 
                             deterministic=True, render=False)


In [9]:
##Independent test routine
#reward_test, epis = test(model, env_test, n_eval_episodes=test_episodes_per_track, 
#                         deterministic=True, render=False, callback=None, reward_threshold=100, 
#                         return_episode_rewards=True)
#reward_test

In [10]:
## Training #1

model.learn(total_timesteps = updates*batch_size, log_interval=1, callback=eval_callback)


Track generation: 1252..1568 -> 316-tiles track, complex 12
1  cut by time without progress. Steps 157  %advance 1.5  played reward 13.71  last penalty -20
-------------------------------------
| approxkl           | 0.002542828  |
| clipfrac           | 0.0          |
| explained_variance | -0.00626     |
| fps                | 48           |
| n_updates          | 1            |
| policy_entropy     | 1.6068904    |
| policy_loss        | -0.010900296 |
| serial_timesteps   | 256          |
| time_elapsed       | 0            |
| total_timesteps    | 256          |
| value_loss         | 7.487932     |
-------------------------------------
--------------------------------------
| approxkl           | 0.0031470456  |
| clipfrac           | 0.0126953125  |
| explained_variance | -0.149        |
| fps                | 55            |
| n_updates          | 2             |
| policy_entropy     | 1.596546      |
| policy_loss        | -0.0038903551 |
| serial_timesteps   | 512           |

12  cut by time without progress. Steps 229  %advance 1.9  played reward 10.74  last penalty -20
-------------------------------------
| approxkl           | 0.0050288932 |
| clipfrac           | 0.07421875   |
| explained_variance | 0.501        |
| fps                | 54           |
| n_updates          | 16           |
| policy_entropy     | 1.3482565    |
| policy_loss        | 0.0003911123 |
| serial_timesteps   | 4096         |
| time_elapsed       | 72.4         |
| total_timesteps    | 4096         |
| value_loss         | 7.477728     |
-------------------------------------
13  cut by time without progress. Steps 212  %advance 2.5  played reward 10.77  last penalty -20
14  cut by time without progress. Steps 187  %advance 1.5  played reward 10.74  last penalty -20
-------------------------------------
| approxkl           | 0.010636956  |
| clipfrac           | 0.16699219   |
| explained_variance | 0.591        |
| fps                | 54           |
| n_updates          | 17

30  cut by time without progress. Steps 98  %advance 0.9  played reward 9.44  last penalty -20
--------------------------------------
| approxkl           | 0.0018375893  |
| clipfrac           | 0.0087890625  |
| explained_variance | 0.0677        |
| fps                | 53            |
| n_updates          | 29            |
| policy_entropy     | 1.3937393     |
| policy_loss        | -0.0037585413 |
| serial_timesteps   | 7424          |
| time_elapsed       | 133           |
| total_timesteps    | 7424          |
| value_loss         | 20.465916     |
--------------------------------------
31  cut by time without progress. Steps 139  %advance 1.2  played reward 12.22  last penalty -20
--------------------------------------
| approxkl           | 0.0031281535  |
| clipfrac           | 0.0           |
| explained_variance | -0.0061       |
| fps                | 54            |
| n_updates          | 30            |
| policy_entropy     | 1.3275113     |
| policy_loss        | 0.000

42  cut by time without progress. Steps 94  %advance 0.9  played reward 8.17  last penalty -20
--------------------------------------
| approxkl           | 0.0044078277  |
| clipfrac           | 0.07714844    |
| explained_variance | -0.046        |
| fps                | 51            |
| n_updates          | 44            |
| policy_entropy     | 1.4091861     |
| policy_loss        | -0.0036994016 |
| serial_timesteps   | 11264         |
| time_elapsed       | 205           |
| total_timesteps    | 11264         |
| value_loss         | 12.50644      |
--------------------------------------
--------------------------------------
| approxkl           | 0.0067174113  |
| clipfrac           | 0.09375       |
| explained_variance | 0.666         |
| fps                | 53            |
| n_updates          | 45            |
| policy_entropy     | 1.508414      |
| policy_loss        | -0.0072638313 |
| serial_timesteps   | 11520         |
| time_elapsed       | 210           |
| total_

-------------------------------------
| approxkl           | 0.005221431  |
| clipfrac           | 0.02734375   |
| explained_variance | 0.951        |
| fps                | 54           |
| n_updates          | 55           |
| policy_entropy     | 1.5835286    |
| policy_loss        | -0.006784471 |
| serial_timesteps   | 14080        |
| time_elapsed       | 297          |
| total_timesteps    | 14080        |
| value_loss         | 0.1018676    |
-------------------------------------
-------------------------------------
| approxkl           | 0.0038716006 |
| clipfrac           | 0.026367188  |
| explained_variance | 0.536        |
| fps                | 54           |
| n_updates          | 56           |
| policy_entropy     | 1.582422     |
| policy_loss        | -0.003605084 |
| serial_timesteps   | 14336        |
| time_elapsed       | 301          |
| total_timesteps    | 14336        |
| value_loss         | 0.33087024   |
-------------------------------------
------------

--------------------------------------
| approxkl           | 0.001330222   |
| clipfrac           | 0.0           |
| explained_variance | 0.048         |
| fps                | 53            |
| n_updates          | 69            |
| policy_entropy     | 1.2425915     |
| policy_loss        | -0.0015526469 |
| serial_timesteps   | 17664         |
| time_elapsed       | 363           |
| total_timesteps    | 17664         |
| value_loss         | 11.047726     |
--------------------------------------
62  cut by time without progress. Steps 109  %advance 0.9  played reward 9.67  last penalty -20
63  cut by time without progress. Steps 149  %advance 1.2  played reward 13.51  last penalty -20
-------------------------------------
| approxkl           | 0.0069573005 |
| clipfrac           | 0.07324219   |
| explained_variance | 0.073        |
| fps                | 52           |
| n_updates          | 70           |
| policy_entropy     | 1.3259063    |
| policy_loss        | -0.00794055

-------------------------------------
| approxkl           | 0.011308324  |
| clipfrac           | 0.2265625    |
| explained_variance | 0.931        |
| fps                | 53           |
| n_updates          | 84           |
| policy_entropy     | 1.4869037    |
| policy_loss        | -0.018043654 |
| serial_timesteps   | 21504        |
| time_elapsed       | 435          |
| total_timesteps    | 21504        |
| value_loss         | 0.4935114    |
-------------------------------------
74  env max steps reached 1000  %advance 8.8  played reward 66.87  last penalty -50
--------------------------------------
| approxkl           | 0.0076136873  |
| clipfrac           | 0.119140625   |
| explained_variance | 0.0335        |
| fps                | 53            |
| n_updates          | 85            |
| policy_entropy     | 1.4218123     |
| policy_loss        | -0.0076624574 |
| serial_timesteps   | 21760         |
| time_elapsed       | 440           |
| total_timesteps    | 21760    

84  cut by time without progress. Steps 251  %advance 2.2  played reward 8.62  last penalty -20
---------------------------------------
| approxkl           | 0.0012129851   |
| clipfrac           | 0.0            |
| explained_variance | 0.803          |
| fps                | 49             |
| n_updates          | 99             |
| policy_entropy     | 1.4852223      |
| policy_loss        | -0.00041010312 |
| serial_timesteps   | 25344          |
| time_elapsed       | 507            |
| total_timesteps    | 25344          |
| value_loss         | 3.7662172      |
---------------------------------------
1  cut by time without progress. Steps 133  %advance 7.5  played reward -6.27  last penalty -20
2  cut by time without progress. Steps 133  %advance 7.5  played reward -6.27  last penalty -20
3  out of limits. Steps 87  %advance 8.4  played reward -0.76  last penalty -100
4  cut by time without progress. Steps 106  %advance 3.0  played reward -8.09  last penalty -20
5  cut by time 

-------------------------------------
| approxkl           | 0.01079815   |
| clipfrac           | 0.1484375    |
| explained_variance | 0.739        |
| fps                | 53           |
| n_updates          | 110          |
| policy_entropy     | 1.3998626    |
| policy_loss        | -0.010549473 |
| serial_timesteps   | 28160        |
| time_elapsed       | 595          |
| total_timesteps    | 28160        |
| value_loss         | 4.49521      |
-------------------------------------
91  cut by time without progress. Steps 288  %advance 6.0  played reward 19.28  last penalty -20
-------------------------------------
| approxkl           | 0.0045642024 |
| clipfrac           | 0.06933594   |
| explained_variance | 0.864        |
| fps                | 54           |
| n_updates          | 111          |
| policy_entropy     | 1.4434036    |
| policy_loss        | -0.01072894  |
| serial_timesteps   | 28416        |
| time_elapsed       | 599          |
| total_timesteps    | 28416 

---------------------------------------
| approxkl           | 0.0035324339   |
| clipfrac           | 0.046875       |
| explained_variance | 0.0555         |
| fps                | 53             |
| n_updates          | 124            |
| policy_entropy     | 1.4716073      |
| policy_loss        | -0.00097190414 |
| serial_timesteps   | 31744          |
| time_elapsed       | 662            |
| total_timesteps    | 31744          |
| value_loss         | 9.886234       |
---------------------------------------
104  cut by time without progress. Steps 394  %advance 3.1  played reward 32.43  last penalty -20
--------------------------------------
| approxkl           | 0.008505343   |
| clipfrac           | 0.12695312    |
| explained_variance | 0.385         |
| fps                | 53            |
| n_updates          | 125           |
| policy_entropy     | 1.4601595     |
| policy_loss        | -0.0048121936 |
| serial_timesteps   | 32000         |
| time_elapsed       | 667     

117  cut by time without progress. Steps 200  %advance 3.4  played reward 17.33  last penalty -20
-------------------------------------
| approxkl           | 0.0017749446 |
| clipfrac           | 0.0107421875 |
| explained_variance | 0.479        |
| fps                | 53           |
| n_updates          | 138          |
| policy_entropy     | 1.3746207    |
| policy_loss        | -0.007693654 |
| serial_timesteps   | 35328        |
| time_elapsed       | 729          |
| total_timesteps    | 35328        |
| value_loss         | 7.624518     |
-------------------------------------
-------------------------------------
| approxkl           | 0.009399882  |
| clipfrac           | 0.17773438   |
| explained_variance | 0.898        |
| fps                | 54           |
| n_updates          | 139          |
| policy_entropy     | 1.5804586    |
| policy_loss        | -0.009920686 |
| serial_timesteps   | 35584        |
| time_elapsed       | 734          |
| total_timesteps    | 35584

2  cut by time without progress. Steps 67  %advance 0.7  played reward -6.59  last penalty -20
3  cut by time without progress. Steps 131  %advance 6.2  played reward -7.5  last penalty -20
4  cut by time without progress. Steps 307  %advance 9.8  played reward -21.44  last penalty -20
5  cut by time without progress. Steps 143  %advance 2.9  played reward -12.0  last penalty -20
6  cut by time without progress. Steps 129  %advance 6.2  played reward -7.3  last penalty -20
7  cut by time without progress. Steps 67  %advance 0.7  played reward -6.59  last penalty -20
8  cut by time without progress. Steps 157  %advance 10.2  played reward -6.07  last penalty -20
9  cut by time without progress. Steps 67  %advance 0.7  played reward -6.59  last penalty -20
10  cut by time without progress. Steps 151  %advance 5.8  played reward -9.87  last penalty -20
Track generation: 1151..1442 -> 291-tiles track, complex 12
Eval num_timesteps=38400, episode_reward=-28.81 +/- 3.38
Episode length: 128.0

140  cut by time without progress. Steps 168  %advance 1.2  played reward 15.11  last penalty -20
--------------------------------------
| approxkl           | 0.0040588444  |
| clipfrac           | 0.026367188   |
| explained_variance | 0.853         |
| fps                | 52            |
| n_updates          | 163           |
| policy_entropy     | 1.3481376     |
| policy_loss        | -0.0036858104 |
| serial_timesteps   | 41728         |
| time_elapsed       | 883           |
| total_timesteps    | 41728         |
| value_loss         | 3.2543607     |
--------------------------------------
141  cut by time without progress. Steps 260  %advance 3.1  played reward 23.71  last penalty -20
--------------------------------------
| approxkl           | 0.0011232065  |
| clipfrac           | 0.0048828125  |
| explained_variance | 0.434         |
| fps                | 52            |
| n_updates          | 164           |
| policy_entropy     | 1.3726946     |
| policy_loss        | -

--------------------------------------
| approxkl           | 0.0076570977  |
| clipfrac           | 0.11230469    |
| explained_variance | 0.846         |
| fps                | 53            |
| n_updates          | 177           |
| policy_entropy     | 1.1455387     |
| policy_loss        | -0.0036094899 |
| serial_timesteps   | 45312         |
| time_elapsed       | 950           |
| total_timesteps    | 45312         |
| value_loss         | 0.10089086    |
--------------------------------------
-------------------------------------
| approxkl           | 0.0031066136 |
| clipfrac           | 0.048828125  |
| explained_variance | 0.947        |
| fps                | 53           |
| n_updates          | 178          |
| policy_entropy     | 1.3096317    |
| policy_loss        | -0.005734225 |
| serial_timesteps   | 45568        |
| time_elapsed       | 955          |
| total_timesteps    | 45568        |
| value_loss         | 0.095073186  |
-------------------------------------

166  cut by time without progress. Steps 340  %advance 4.4  played reward 24.05  last penalty -20
--------------------------------------
| approxkl           | 0.004454202   |
| clipfrac           | 0.051757812   |
| explained_variance | 0.86          |
| fps                | 53            |
| n_updates          | 192           |
| policy_entropy     | 1.1042206     |
| policy_loss        | -0.0016861664 |
| serial_timesteps   | 49152         |
| time_elapsed       | 1.03e+03      |
| total_timesteps    | 49152         |
| value_loss         | 3.1574526     |
--------------------------------------
167  cut by time without progress. Steps 317  %advance 3.8  played reward 28.21  last penalty -20
-------------------------------------
| approxkl           | 0.012088489  |
| clipfrac           | 0.20117188   |
| explained_variance | 0.331        |
| fps                | 50           |
| n_updates          | 193          |
| policy_entropy     | 1.2659038    |
| policy_loss        | -0.02097

--------------------------------------
| approxkl           | 0.006041598   |
| clipfrac           | 0.09472656    |
| explained_variance | 0.797         |
| fps                | 52            |
| n_updates          | 202           |
| policy_entropy     | 1.3794599     |
| policy_loss        | -0.0038485674 |
| serial_timesteps   | 51712         |
| time_elapsed       | 1.11e+03      |
| total_timesteps    | 51712         |
| value_loss         | 6.7055693     |
--------------------------------------
-------------------------------------
| approxkl           | 0.008793136  |
| clipfrac           | 0.15625      |
| explained_variance | 0.95         |
| fps                | 54           |
| n_updates          | 203          |
| policy_entropy     | 1.3407615    |
| policy_loss        | -0.012228003 |
| serial_timesteps   | 51968        |
| time_elapsed       | 1.11e+03     |
| total_timesteps    | 51968        |
| value_loss         | 0.13026822   |
-------------------------------------

186  cut by time without progress. Steps 440  %advance 3.8  played reward 36.66  last penalty -20
187  cut by time without progress. Steps 67  %advance 0.6  played reward 6.17  last penalty -20
-------------------------------------
| approxkl           | 0.0062299855 |
| clipfrac           | 0.10058594   |
| explained_variance | 0.547        |
| fps                | 51           |
| n_updates          | 217          |
| policy_entropy     | 1.430974     |
| policy_loss        | -0.008288614 |
| serial_timesteps   | 55552        |
| time_elapsed       | 1.19e+03     |
| total_timesteps    | 55552        |
| value_loss         | 13.338136    |
-------------------------------------
188  cut by time without progress. Steps 236  %advance 2.5  played reward 18.58  last penalty -20
189  cut by time without progress. Steps 100  %advance 0.9  played reward 8.69  last penalty -20
--------------------------------------
| approxkl           | 0.00475921    |
| clipfrac           | 0.036132812   |


-------------------------------------
| approxkl           | 0.0037043227 |
| clipfrac           | 0.030273438  |
| explained_variance | 0.35         |
| fps                | 52           |
| n_updates          | 231          |
| policy_entropy     | 1.5066457    |
| policy_loss        | -0.007982356 |
| serial_timesteps   | 59136        |
| time_elapsed       | 1.26e+03     |
| total_timesteps    | 59136        |
| value_loss         | 8.773069     |
-------------------------------------
-------------------------------------
| approxkl           | 0.012815523  |
| clipfrac           | 0.21289062   |
| explained_variance | 0.685        |
| fps                | 53           |
| n_updates          | 232          |
| policy_entropy     | 1.5435357    |
| policy_loss        | -0.014663629 |
| serial_timesteps   | 59392        |
| time_elapsed       | 1.26e+03     |
| total_timesteps    | 59392        |
| value_loss         | 0.14302823   |
-------------------------------------
------------

-------------------------------------
| approxkl           | 0.010583729  |
| clipfrac           | 0.12597656   |
| explained_variance | 0.366        |
| fps                | 50           |
| n_updates          | 246          |
| policy_entropy     | 1.5076163    |
| policy_loss        | -0.015533355 |
| serial_timesteps   | 62976        |
| time_elapsed       | 1.33e+03     |
| total_timesteps    | 62976        |
| value_loss         | 39.685715    |
-------------------------------------
-------------------------------------
| approxkl           | 0.009776195  |
| clipfrac           | 0.12792969   |
| explained_variance | 0.733        |
| fps                | 52           |
| n_updates          | 247          |
| policy_entropy     | 1.43184      |
| policy_loss        | -0.017807003 |
| serial_timesteps   | 63232        |
| time_elapsed       | 1.34e+03     |
| total_timesteps    | 63232        |
| value_loss         | 4.776329     |
-------------------------------------
207  cut by 

211  cut by time without progress. Steps 682  %advance 6.9  played reward 58.31  last penalty -20
-------------------------------------
| approxkl           | 0.0014076998 |
| clipfrac           | 0.0009765625 |
| explained_variance | 0.351        |
| fps                | 43           |
| n_updates          | 258          |
| policy_entropy     | 1.4315758    |
| policy_loss        | -0.002251999 |
| serial_timesteps   | 66048        |
| time_elapsed       | 1.44e+03     |
| total_timesteps    | 66048        |
| value_loss         | 6.9567146    |
-------------------------------------
-------------------------------------
| approxkl           | 0.012361105  |
| clipfrac           | 0.24707031   |
| explained_variance | 0.92         |
| fps                | 52           |
| n_updates          | 259          |
| policy_entropy     | 1.4948499    |
| policy_loss        | -0.020036688 |
| serial_timesteps   | 66304        |
| time_elapsed       | 1.44e+03     |
| total_timesteps    | 66304

220  cut by time without progress. Steps 331  %advance 3.4  played reward 29.14  last penalty -20
--------------------------------------
| approxkl           | 0.0038056052  |
| clipfrac           | 0.026367188   |
| explained_variance | 0.3           |
| fps                | 48            |
| n_updates          | 273           |
| policy_entropy     | 1.2259518     |
| policy_loss        | -0.0060997456 |
| serial_timesteps   | 69888         |
| time_elapsed       | 1.51e+03      |
| total_timesteps    | 69888         |
| value_loss         | 10.388426     |
--------------------------------------
-------------------------------------
| approxkl           | 0.011302089  |
| clipfrac           | 0.20507812   |
| explained_variance | 0.89         |
| fps                | 52           |
| n_updates          | 274          |
| policy_entropy     | 1.2223506    |
| policy_loss        | -0.013188006 |
| serial_timesteps   | 70144        |
| time_elapsed       | 1.52e+03     |
| total_timeste

226  env max steps reached 1000  %advance 27.9  played reward 98.11  last penalty -50
-------------------------------------
| approxkl           | 0.011711382  |
| clipfrac           | 0.16601562   |
| explained_variance | 0.18         |
| fps                | 47           |
| n_updates          | 289          |
| policy_entropy     | 0.96884054   |
| policy_loss        | 0.0023305877 |
| serial_timesteps   | 73984        |
| time_elapsed       | 1.6e+03      |
| total_timesteps    | 73984        |
| value_loss         | 45.00503     |
-------------------------------------
227  cut by time without progress. Steps 472  %advance 21.5  played reward 45.47  last penalty -20
-------------------------------------
| approxkl           | 0.0029713481 |
| clipfrac           | 0.052734375  |
| explained_variance | 0.912        |
| fps                | 50           |
| n_updates          | 290          |
| policy_entropy     | 0.8710124    |
| policy_loss        | -0.006667502 |
| serial_timestep

--------------------------------------
| approxkl           | 0.0036935809  |
| clipfrac           | 0.05078125    |
| explained_variance | 0.754         |
| fps                | 5             |
| n_updates          | 300           |
| policy_entropy     | 1.021799      |
| policy_loss        | -0.0065188683 |
| serial_timesteps   | 76800         |
| time_elapsed       | 1.65e+03      |
| total_timesteps    | 76800         |
| value_loss         | 11.993579     |
--------------------------------------
233  cut by time without progress. Steps 269  %advance 5.3  played reward 26.06  last penalty -20
-------------------------------------
| approxkl           | 0.010035895  |
| clipfrac           | 0.19042969   |
| explained_variance | 0.377        |
| fps                | 48           |
| n_updates          | 301          |
| policy_entropy     | 1.3210703    |
| policy_loss        | -0.011619374 |
| serial_timesteps   | 77056        |
| time_elapsed       | 1.7e+03      |
| total_timeste

-------------------------------------
| approxkl           | 0.013545162  |
| clipfrac           | 0.20507812   |
| explained_variance | 0.789        |
| fps                | 50           |
| n_updates          | 315          |
| policy_entropy     | 1.0405874    |
| policy_loss        | -0.017487608 |
| serial_timesteps   | 80640        |
| time_elapsed       | 1.77e+03     |
| total_timesteps    | 80640        |
| value_loss         | 0.059001774  |
-------------------------------------
242  cut by time without progress. Steps 443  %advance 8.2  played reward 42.06  last penalty -20
-------------------------------------
| approxkl           | 0.0018006745 |
| clipfrac           | 0.0107421875 |
| explained_variance | 0.51         |
| fps                | 47           |
| n_updates          | 316          |
| policy_entropy     | 1.3314795    |
| policy_loss        | -0.007220121 |
| serial_timesteps   | 80896        |
| time_elapsed       | 1.77e+03     |
| total_timesteps    | 80896

-------------------------------------
| approxkl           | 0.0067719514 |
| clipfrac           | 0.10253906   |
| explained_variance | 0.99         |
| fps                | 50           |
| n_updates          | 329          |
| policy_entropy     | 1.2490377    |
| policy_loss        | -0.019721681 |
| serial_timesteps   | 84224        |
| time_elapsed       | 1.84e+03     |
| total_timesteps    | 84224        |
| value_loss         | 0.19813555   |
-------------------------------------
256  cut by time without progress. Steps 574  %advance 16.1  played reward 49.47  last penalty -20
-------------------------------------
| approxkl           | 0.0066896803 |
| clipfrac           | 0.0625       |
| explained_variance | 0.932        |
| fps                | 52           |
| n_updates          | 330          |
| policy_entropy     | 1.0646241    |
| policy_loss        | 0.0019060539 |
| serial_timesteps   | 84480        |
| time_elapsed       | 1.84e+03     |
| total_timesteps    | 8448

268  cut by time without progress. Steps 405  %advance 6.0  played reward 26.47  last penalty -20
--------------------------------------
| approxkl           | 0.0030309465  |
| clipfrac           | 0.026367188   |
| explained_variance | 0.975         |
| fps                | 51            |
| n_updates          | 344           |
| policy_entropy     | 1.1026349     |
| policy_loss        | 0.00069151993 |
| serial_timesteps   | 88064         |
| time_elapsed       | 1.91e+03      |
| total_timesteps    | 88064         |
| value_loss         | 0.8640855     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0026865776  |
| clipfrac           | 0.0146484375  |
| explained_variance | 0.944         |
| fps                | 48            |
| n_updates          | 345           |
| policy_entropy     | 1.2046105     |
| policy_loss        | -0.0071899723 |
| serial_timesteps   | 88320         |
| time_elapsed       | 1.92e+03      |
| tot

275  cut by time without progress. Steps 128  %advance 0.9  played reward 11.44  last penalty -20
-------------------------------------
| approxkl           | 0.012092985  |
| clipfrac           | 0.15820312   |
| explained_variance | 0.752        |
| fps                | 49           |
| n_updates          | 355          |
| policy_entropy     | 1.2919872    |
| policy_loss        | -0.009119101 |
| serial_timesteps   | 90880        |
| time_elapsed       | 2e+03        |
| total_timesteps    | 90880        |
| value_loss         | 5.1978292    |
-------------------------------------
276  cut by time without progress. Steps 296  %advance 6.3  played reward 29.47  last penalty -20
------------------------------------
| approxkl           | 0.014096532 |
| clipfrac           | 0.19238281  |
| explained_variance | 0.861       |
| fps                | 49          |
| n_updates          | 356         |
| policy_entropy     | 1.2403581   |
| policy_loss        | -0.01081579 |
| serial_times

-------------------------------------
| approxkl           | 0.014498677  |
| clipfrac           | 0.2265625    |
| explained_variance | 0.956        |
| fps                | 53           |
| n_updates          | 369          |
| policy_entropy     | 1.2910109    |
| policy_loss        | -0.021586787 |
| serial_timesteps   | 94464        |
| time_elapsed       | 2.07e+03     |
| total_timesteps    | 94464        |
| value_loss         | 1.4047493    |
-------------------------------------
288  cut by time without progress. Steps 201  %advance 2.8  played reward 16.96  last penalty -20
-------------------------------------
| approxkl           | 0.0063895276 |
| clipfrac           | 0.083984375  |
| explained_variance | 0.909        |
| fps                | 47           |
| n_updates          | 370          |
| policy_entropy     | 1.1358392    |
| policy_loss        | -0.009427207 |
| serial_timesteps   | 94720        |
| time_elapsed       | 2.08e+03     |
| total_timesteps    | 94720

303  cut by time without progress. Steps 239  %advance 2.8  played reward 20.86  last penalty -20
--------------------------------------
| approxkl           | 0.0067577143  |
| clipfrac           | 0.07714844    |
| explained_variance | 0.822         |
| fps                | 52            |
| n_updates          | 383           |
| policy_entropy     | 1.1650531     |
| policy_loss        | -0.0063454164 |
| serial_timesteps   | 98048         |
| time_elapsed       | 2.15e+03      |
| total_timesteps    | 98048         |
| value_loss         | 3.408774      |
--------------------------------------
304  cut by time without progress. Steps 286  %advance 7.9  played reward 29.81  last penalty -20
-------------------------------------
| approxkl           | 0.0058627846 |
| clipfrac           | 0.056640625  |
| explained_variance | 0.782        |
| fps                | 49           |
| n_updates          | 384          |
| policy_entropy     | 1.2895993    |
| policy_loss        | -0.01189

-------------------------------------
| approxkl           | 0.013097984  |
| clipfrac           | 0.203125     |
| explained_variance | 0.908        |
| fps                | 49           |
| n_updates          | 398          |
| policy_entropy     | 1.2559246    |
| policy_loss        | -0.012937386 |
| serial_timesteps   | 101888       |
| time_elapsed       | 2.22e+03     |
| total_timesteps    | 101888       |
| value_loss         | 0.19362597   |
-------------------------------------
314  cut by time without progress. Steps 971  %advance 28.5  played reward 101.05  last penalty -20
-------------------------------------
| approxkl           | 0.01324041   |
| clipfrac           | 0.23730469   |
| explained_variance | 0.551        |
| fps                | 53           |
| n_updates          | 399          |
| policy_entropy     | 1.3324134    |
| policy_loss        | -0.011250921 |
| serial_timesteps   | 102144       |
| time_elapsed       | 2.23e+03     |
| total_timesteps    | 102

322  cut by time without progress. Steps 395  %advance 13.0  played reward 44.55  last penalty -20
--------------------------------------
| approxkl           | 0.0021286157  |
| clipfrac           | 0.030273438   |
| explained_variance | 0.754         |
| fps                | 50            |
| n_updates          | 409           |
| policy_entropy     | 1.1738718     |
| policy_loss        | -0.0024863605 |
| serial_timesteps   | 104704        |
| time_elapsed       | 2.32e+03      |
| total_timesteps    | 104704        |
| value_loss         | 5.244598      |
--------------------------------------
323  cut by time without progress. Steps 156  %advance 5.3  played reward 17.39  last penalty -20
324  cut by time without progress. Steps 67  %advance 0.6  played reward 6.1  last penalty -20
-------------------------------------
| approxkl           | 0.00856833   |
| clipfrac           | 0.15820312   |
| explained_variance | 0.518        |
| fps                | 50           |
| n_updates

331  cut by time without progress. Steps 467  %advance 21.2  played reward 42.66  last penalty -20
-------------------------------------
| approxkl           | 0.0062704924 |
| clipfrac           | 0.10253906   |
| explained_variance | 0.942        |
| fps                | 52           |
| n_updates          | 424          |
| policy_entropy     | 1.0781783    |
| policy_loss        | -0.011400837 |
| serial_timesteps   | 108544       |
| time_elapsed       | 2.4e+03      |
| total_timesteps    | 108544       |
| value_loss         | 2.1575384    |
-------------------------------------
332  cut by time without progress. Steps 373  %advance 20.9  played reward 34.22  last penalty -20
--------------------------------------
| approxkl           | 0.019614022   |
| clipfrac           | 0.23046875    |
| explained_variance | 0.913         |
| fps                | 48            |
| n_updates          | 425           |
| policy_entropy     | 1.1149607     |
| policy_loss        | -0.012926822

-------------------------------------
| approxkl           | 0.010206502  |
| clipfrac           | 0.12597656   |
| explained_variance | 0.909        |
| fps                | 47           |
| n_updates          | 437          |
| policy_entropy     | 0.98703057   |
| policy_loss        | -0.011489687 |
| serial_timesteps   | 111872       |
| time_elapsed       | 2.46e+03     |
| total_timesteps    | 111872       |
| value_loss         | 2.5240645    |
-------------------------------------
349  cut by time without progress. Steps 202  %advance 8.2  played reward 17.57  last penalty -20
350  cut by time without progress. Steps 67  %advance 0.6  played reward 5.79  last penalty -20
--------------------------------------
| approxkl           | 0.0019562668  |
| clipfrac           | 0.0087890625  |
| explained_variance | 0.721         |
| fps                | 52            |
| n_updates          | 438           |
| policy_entropy     | 1.1343389     |
| policy_loss        | -0.0011284745 |


1  cut by time without progress. Steps 154  %advance 10.7  played reward -5.26  last penalty -20
2  cut by time without progress. Steps 147  %advance 10.0  played reward -5.3  last penalty -20
3  cut by time without progress. Steps 601  %advance 43.8  played reward -16.87  last penalty -20
4  cut by time without progress. Steps 442  %advance 41.2  played reward -3.57  last penalty -20
5  cut by time without progress. Steps 67  %advance 0.7  played reward -6.59  last penalty -20
6  cut by time without progress. Steps 172  %advance 11.8  played reward -5.94  last penalty -20
7  cut by time without progress. Steps 188  %advance 14.8  played reward -4.57  last penalty -20
8  cut by time without progress. Steps 67  %advance 0.7  played reward -6.59  last penalty -20
9  cut by time without progress. Steps 67  %advance 0.7  played reward -6.59  last penalty -20
10  cut by time without progress. Steps 228  %advance 14.1  played reward -9.31  last penalty -20
Track generation: 1180..1478 -> 298

--------------------------------------
| approxkl           | 0.011318341   |
| clipfrac           | 0.18945312    |
| explained_variance | 0.951         |
| fps                | 53            |
| n_updates          | 463           |
| policy_entropy     | 1.1590587     |
| policy_loss        | -0.0072269347 |
| serial_timesteps   | 118528        |
| time_elapsed       | 2.65e+03      |
| total_timesteps    | 118528        |
| value_loss         | 3.1677155     |
--------------------------------------
364  cut by time without progress. Steps 212  %advance 7.6  played reward 24.78  last penalty -20
365  cut by time without progress. Steps 67  %advance 0.6  played reward 6.24  last penalty -20
-------------------------------------
| approxkl           | 0.00923891   |
| clipfrac           | 0.16601562   |
| explained_variance | 0.695        |
| fps                | 53           |
| n_updates          | 464          |
| policy_entropy     | 1.2612408    |
| policy_loss        | -0.0062003

375  cut by time without progress. Steps 592  %advance 33.6  played reward 74.93  last penalty -20
-------------------------------------
| approxkl           | 0.011290603  |
| clipfrac           | 0.16113281   |
| explained_variance | 0.619        |
| fps                | 53           |
| n_updates          | 478          |
| policy_entropy     | 1.0534403    |
| policy_loss        | -0.004622259 |
| serial_timesteps   | 122368       |
| time_elapsed       | 2.72e+03     |
| total_timesteps    | 122368       |
| value_loss         | 8.082633     |
-------------------------------------
-------------------------------------
| approxkl           | 0.009316924  |
| clipfrac           | 0.10253906   |
| explained_variance | 0.609        |
| fps                | 53           |
| n_updates          | 479          |
| policy_entropy     | 0.9041533    |
| policy_loss        | -0.013329331 |
| serial_timesteps   | 122624       |
| time_elapsed       | 2.72e+03     |
| total_timesteps    | 1226

387  cut by time without progress. Steps 377  %advance 11.4  played reward 40.52  last penalty -20
-------------------------------------
| approxkl           | 0.011490059  |
| clipfrac           | 0.16699219   |
| explained_variance | 0.761        |
| fps                | 52           |
| n_updates          | 493          |
| policy_entropy     | 1.125525     |
| policy_loss        | -0.008340215 |
| serial_timesteps   | 126208       |
| time_elapsed       | 2.79e+03     |
| total_timesteps    | 126208       |
| value_loss         | 5.696044     |
-------------------------------------
388  cut by time without progress. Steps 272  %advance 5.7  played reward 17.82  last penalty -20
--------------------------------------
| approxkl           | 0.006816701   |
| clipfrac           | 0.10839844    |
| explained_variance | 0.954         |
| fps                | 53            |
| n_updates          | 494           |
| policy_entropy     | 1.0969925     |
| policy_loss        | -0.0063620075

392  cut by time without progress. Steps 487  %advance 20.3  played reward 57.95  last penalty -20
-------------------------------------
| approxkl           | 0.0066338517 |
| clipfrac           | 0.09863281   |
| explained_variance | 0.644        |
| fps                | 53           |
| n_updates          | 504          |
| policy_entropy     | 1.2310323    |
| policy_loss        | -0.008655148 |
| serial_timesteps   | 129024       |
| time_elapsed       | 2.89e+03     |
| total_timesteps    | 129024       |
| value_loss         | 8.061407     |
-------------------------------------
-------------------------------------
| approxkl           | 0.007344725  |
| clipfrac           | 0.11816406   |
| explained_variance | 0.818        |
| fps                | 52           |
| n_updates          | 505          |
| policy_entropy     | 1.2426391    |
| policy_loss        | -0.008149503 |
| serial_timesteps   | 129280       |
| time_elapsed       | 2.9e+03      |
| total_timesteps    | 1292

-------------------------------------
| approxkl           | 0.00902885   |
| clipfrac           | 0.12402344   |
| explained_variance | 0.771        |
| fps                | 53           |
| n_updates          | 519          |
| policy_entropy     | 1.0006403    |
| policy_loss        | -0.006483633 |
| serial_timesteps   | 132864       |
| time_elapsed       | 2.97e+03     |
| total_timesteps    | 132864       |
| value_loss         | 8.647773     |
-------------------------------------
-------------------------------------
| approxkl           | 0.0075687403 |
| clipfrac           | 0.08105469   |
| explained_variance | 0.298        |
| fps                | 53           |
| n_updates          | 520          |
| policy_entropy     | 1.0464554    |
| policy_loss        | -0.008126455 |
| serial_timesteps   | 133120       |
| time_elapsed       | 2.97e+03     |
| total_timesteps    | 133120       |
| value_loss         | 1.130174     |
-------------------------------------
------------

406  cut by time without progress. Steps 628  %advance 28.5  played reward 72.41  last penalty -20
-------------------------------------
| approxkl           | 0.010845559  |
| clipfrac           | 0.14355469   |
| explained_variance | 0.832        |
| fps                | 53           |
| n_updates          | 535          |
| policy_entropy     | 1.3793867    |
| policy_loss        | -0.004267562 |
| serial_timesteps   | 136960       |
| time_elapsed       | 3.04e+03     |
| total_timesteps    | 136960       |
| value_loss         | 4.0184674    |
-------------------------------------
-------------------------------------
| approxkl           | 0.010632445  |
| clipfrac           | 0.14257812   |
| explained_variance | -0.688       |
| fps                | 52           |
| n_updates          | 536          |
| policy_entropy     | 0.98460793   |
| policy_loss        | -0.015885215 |
| serial_timesteps   | 137216       |
| time_elapsed       | 3.05e+03     |
| total_timesteps    | 1372

1  cut by time without progress. Steps 334  %advance 35.0  played reward 0.94  last penalty -20
2  cut by time without progress. Steps 334  %advance 35.0  played reward 0.94  last penalty -20
3  cut by time without progress. Steps 249  %advance 22.9  played reward -2.62  last penalty -20
4  cut by time without progress. Steps 198  %advance 15.1  played reward -5.3  last penalty -20
5  cut by time without progress. Steps 118  %advance 5.0  played reward -7.41  last penalty -20
6  cut by time without progress. Steps 396  %advance 36.1  played reward -4.09  last penalty -20
7  cut by time without progress. Steps 658  %advance 68.0  played reward 1.61  last penalty -20
8  cut by time without progress. Steps 197  %advance 16.3  played reward -4.03  last penalty -20
9  cut by time without progress. Steps 129  %advance 6.6  played reward -6.96  last penalty -20
10  cut by time without progress. Steps 503  %advance 55.6  played reward 4.66  last penalty -20
Track generation: 1104..1383 -> 279-

--------------------------------------
| approxkl           | 0.0017134775  |
| clipfrac           | 0.0146484375  |
| explained_variance | 0.702         |
| fps                | 48            |
| n_updates          | 562           |
| policy_entropy     | 0.6448984     |
| policy_loss        | -0.0016176113 |
| serial_timesteps   | 143872        |
| time_elapsed       | 3.29e+03      |
| total_timesteps    | 143872        |
| value_loss         | 3.144029      |
--------------------------------------
420  cut by time without progress. Steps 587  %advance 61.2  played reward 86.54  last penalty -20
-------------------------------------
| approxkl           | 0.012436929  |
| clipfrac           | 0.125        |
| explained_variance | 0.893        |
| fps                | 52           |
| n_updates          | 563          |
| policy_entropy     | 0.7189695    |
| policy_loss        | -0.011228714 |
| serial_timesteps   | 144128       |
| time_elapsed       | 3.29e+03     |
| total_timest

425  env max steps reached 1000  %advance 66.0  played reward 136.81  last penalty -50
-------------------------------------
| approxkl           | 0.004266047  |
| clipfrac           | 0.0546875    |
| explained_variance | 0.0283       |
| fps                | 52           |
| n_updates          | 578          |
| policy_entropy     | 0.8975144    |
| policy_loss        | 0.0012536261 |
| serial_timesteps   | 147968       |
| time_elapsed       | 3.37e+03     |
| total_timesteps    | 147968       |
| value_loss         | 67.7875      |
-------------------------------------
-------------------------------------
| approxkl           | 0.009520607  |
| clipfrac           | 0.1484375    |
| explained_variance | 0.607        |
| fps                | 53           |
| n_updates          | 579          |
| policy_entropy     | 1.0011979    |
| policy_loss        | -0.009371382 |
| serial_timesteps   | 148224       |
| time_elapsed       | 3.37e+03     |
| total_timesteps    | 148224       |
|

430  cut by time without progress. Steps 355  %advance 36.5  played reward 49.17  last penalty -20
-------------------------------------
| approxkl           | 0.009376788  |
| clipfrac           | 0.11425781   |
| explained_variance | 0.482        |
| fps                | 52           |
| n_updates          | 594          |
| policy_entropy     | 0.8987176    |
| policy_loss        | -0.008433821 |
| serial_timesteps   | 152064       |
| time_elapsed       | 3.44e+03     |
| total_timesteps    | 152064       |
| value_loss         | 21.626913    |
-------------------------------------
-------------------------------------
| approxkl           | 0.008352262  |
| clipfrac           | 0.10058594   |
| explained_variance | -0.425       |
| fps                | 53           |
| n_updates          | 595          |
| policy_entropy     | 0.81705254   |
| policy_loss        | -0.008270509 |
| serial_timesteps   | 152320       |
| time_elapsed       | 3.45e+03     |
| total_timesteps    | 1523

-------------------------------------
| approxkl           | 0.01022822   |
| clipfrac           | 0.09472656   |
| explained_variance | 0.0351       |
| fps                | 50           |
| n_updates          | 605          |
| policy_entropy     | 0.71651673   |
| policy_loss        | 0.0028113176 |
| serial_timesteps   | 154880       |
| time_elapsed       | 3.59e+03     |
| total_timesteps    | 154880       |
| value_loss         | 116.921265   |
-------------------------------------
-------------------------------------
| approxkl           | 0.0075232685 |
| clipfrac           | 0.111328125  |
| explained_variance | -0.0729      |
| fps                | 47           |
| n_updates          | 606          |
| policy_entropy     | 0.7460627    |
| policy_loss        | -0.008943842 |
| serial_timesteps   | 155136       |
| time_elapsed       | 3.59e+03     |
| total_timesteps    | 155136       |
| value_loss         | 0.09517047   |
-------------------------------------
435  cut by 

440  cut by time without progress. Steps 368  %advance 30.1  played reward 46.05  last penalty -20
---------------------------------------
| approxkl           | 0.010566678    |
| clipfrac           | 0.1484375      |
| explained_variance | 0.333          |
| fps                | 53             |
| n_updates          | 621            |
| policy_entropy     | 0.7912356      |
| policy_loss        | -0.00089271646 |
| serial_timesteps   | 158976         |
| time_elapsed       | 3.67e+03       |
| total_timesteps    | 158976         |
| value_loss         | 25.25806       |
---------------------------------------
---------------------------------------
| approxkl           | 0.013616406    |
| clipfrac           | 0.11035156     |
| explained_variance | -0.302         |
| fps                | 53             |
| n_updates          | 622            |
| policy_entropy     | 0.68692744     |
| policy_loss        | -0.00039683736 |
| serial_timesteps   | 159232         |
| time_elapsed       

-------------------------------------
| approxkl           | 0.004985456  |
| clipfrac           | 0.080078125  |
| explained_variance | -0.137       |
| fps                | 53           |
| n_updates          | 637          |
| policy_entropy     | 0.70490074   |
| policy_loss        | -0.003678563 |
| serial_timesteps   | 163072       |
| time_elapsed       | 3.75e+03     |
| total_timesteps    | 163072       |
| value_loss         | 0.055560358  |
-------------------------------------
446  env max steps reached 1000  %advance 97.7  played reward 169.57  last penalty -50
-------------------------------------
| approxkl           | 0.0059579997 |
| clipfrac           | 0.07421875   |
| explained_variance | -0.0415      |
| fps                | 53           |
| n_updates          | 638          |
| policy_entropy     | 0.7129601    |
| policy_loss        | 0.0007760074 |
| serial_timesteps   | 163328       |
| time_elapsed       | 3.75e+03     |
| total_timesteps    | 163328       |
|

6  cut by time without progress. Steps 67  %advance 0.6  played reward -6.59  last penalty -20
7  cut by time without progress. Steps 574  %advance 79.5  played reward 21.51  last penalty -20
8  cut by time without progress. Steps 246  %advance 28.8  played reward 3.62  last penalty -20
9  cut by time without progress. Steps 485  %advance 60.0  played reward 10.97  last penalty -20
10  cut by time without progress. Steps 677  %advance 85.4  played reward 17.12  last penalty -20
Track generation: 1063..1332 -> 269-tiles track, complex 12
Eval num_timesteps=166400, episode_reward=3.54 +/- 60.77
Episode length: 422.90 +/- 216.48
New best mean reward!
-------------------------------------
| approxkl           | 0.0069481395 |
| clipfrac           | 0.08105469   |
| explained_variance | 0.0164       |
| fps                | 2            |
| n_updates          | 650          |
| policy_entropy     | 0.7361129    |
| policy_loss        | 0.0013924919 |
| serial_timesteps   | 166400       |
| 

---------------------------------------
| approxkl           | 0.0023747366   |
| clipfrac           | 0.0146484375   |
| explained_variance | 0.326          |
| fps                | 53             |
| n_updates          | 664            |
| policy_entropy     | 0.61250144     |
| policy_loss        | -0.00063228834 |
| serial_timesteps   | 169984         |
| time_elapsed       | 3.99e+03       |
| total_timesteps    | 169984         |
| value_loss         | 0.11504813     |
---------------------------------------
--------------------------------------
| approxkl           | 0.014300339   |
| clipfrac           | 0.18261719    |
| explained_variance | -0.364        |
| fps                | 53            |
| n_updates          | 665           |
| policy_entropy     | 0.64167106    |
| policy_loss        | -0.0079156915 |
| serial_timesteps   | 170240        |
| time_elapsed       | 4e+03         |
| total_timesteps    | 170240        |
| value_loss         | 0.20302607    |
------------

--------------------------------------
| approxkl           | 0.0076839672  |
| clipfrac           | 0.10644531    |
| explained_variance | 0.883         |
| fps                | 52            |
| n_updates          | 679           |
| policy_entropy     | 0.7804849     |
| policy_loss        | -0.0005533409 |
| serial_timesteps   | 173824        |
| time_elapsed       | 4.07e+03      |
| total_timesteps    | 173824        |
| value_loss         | 2.965838      |
--------------------------------------
468  out of limits. Steps 428  %advance 41.5  played reward 49.69  last penalty -100
469  cut by time without progress. Steps 176  %advance 9.2  played reward 4.96  last penalty -20
-------------------------------------
| approxkl           | 0.07611242   |
| clipfrac           | 0.15820312   |
| explained_variance | 0.45         |
| fps                | 52           |
| n_updates          | 680          |
| policy_entropy     | 1.0187551    |
| policy_loss        | -0.019657709 |
| seria

482  cut by time without progress. Steps 206  %advance 13.3  played reward 9.26  last penalty -20
--------------------------------------
| approxkl           | 0.0054660663  |
| clipfrac           | 0.091796875   |
| explained_variance | 0.802         |
| fps                | 52            |
| n_updates          | 693           |
| policy_entropy     | 0.56540567    |
| policy_loss        | -0.0087780785 |
| serial_timesteps   | 177408        |
| time_elapsed       | 4.13e+03      |
| total_timesteps    | 177408        |
| value_loss         | 11.970443     |
--------------------------------------
483  cut by time without progress. Steps 219  %advance 18.4  played reward 18.21  last penalty -20
484  cut by time without progress. Steps 156  %advance 11.4  played reward 10.6  last penalty -20
--------------------------------------
| approxkl           | 0.006941197   |
| clipfrac           | 0.08105469    |
| explained_variance | 0.611         |
| fps                | 52            |
| n

-------------------------------------
| approxkl           | 0.008823119  |
| clipfrac           | 0.08886719   |
| explained_variance | 0.895        |
| fps                | 52           |
| n_updates          | 703          |
| policy_entropy     | 0.5523369    |
| policy_loss        | -0.006437885 |
| serial_timesteps   | 179968       |
| time_elapsed       | 4.25e+03     |
| total_timesteps    | 179968       |
| value_loss         | 6.8003397    |
-------------------------------------
-------------------------------------
| approxkl           | 0.008628594  |
| clipfrac           | 0.1171875    |
| explained_variance | 0.704        |
| fps                | 53           |
| n_updates          | 704          |
| policy_entropy     | 0.55807906   |
| policy_loss        | -0.006221455 |
| serial_timesteps   | 180224       |
| time_elapsed       | 4.26e+03     |
| total_timesteps    | 180224       |
| value_loss         | 11.17669     |
-------------------------------------
492  cut by 

503  cut by time without progress. Steps 687  %advance 91.7  played reward 122.57  last penalty -20
504  cut by time without progress. Steps 67  %advance 0.6  played reward 6.57  last penalty -20
-------------------------------------
| approxkl           | 0.0050170077 |
| clipfrac           | 0.055664062  |
| explained_variance | 0.0998       |
| fps                | 53           |
| n_updates          | 718          |
| policy_entropy     | 0.44698068   |
| policy_loss        | -0.00576603  |
| serial_timesteps   | 183808       |
| time_elapsed       | 4.32e+03     |
| total_timesteps    | 183808       |
| value_loss         | 19.802893    |
-------------------------------------
505  out of limits. Steps 315  %advance 37.7  played reward 45.97  last penalty -100
------------------------------------
| approxkl           | 0.015748303 |
| clipfrac           | 0.0859375   |
| explained_variance | 0.627       |
| fps                | 52          |
| n_updates          | 719         |
| p

-------------------------------------
| approxkl           | 0.0040850975 |
| clipfrac           | 0.041015625  |
| explained_variance | 0.279        |
| fps                | 51           |
| n_updates          | 733          |
| policy_entropy     | 0.7233166    |
| policy_loss        | -0.005608655 |
| serial_timesteps   | 187648       |
| time_elapsed       | 4.4e+03      |
| total_timesteps    | 187648       |
| value_loss         | 0.5030032    |
-------------------------------------
-------------------------------------
| approxkl           | 0.0225356    |
| clipfrac           | 0.20117188   |
| explained_variance | -3.49        |
| fps                | 52           |
| n_updates          | 734          |
| policy_entropy     | 0.7526537    |
| policy_loss        | -0.010993049 |
| serial_timesteps   | 187904       |
| time_elapsed       | 4.4e+03      |
| total_timesteps    | 187904       |
| value_loss         | 0.61501795   |
-------------------------------------
513  Finaliz

--------------------------------------
| approxkl           | 0.008917279   |
| clipfrac           | 0.10644531    |
| explained_variance | 0.24          |
| fps                | 53            |
| n_updates          | 749           |
| policy_entropy     | 0.72113866    |
| policy_loss        | -0.0063070133 |
| serial_timesteps   | 191744        |
| time_elapsed       | 4.47e+03      |
| total_timesteps    | 191744        |
| value_loss         | 0.16202809    |
--------------------------------------
1  cut by time without progress. Steps 269  %advance 25.9  played reward -1.53  last penalty -20
2  cut by time without progress. Steps 269  %advance 25.9  played reward -1.53  last penalty -20
3  cut by time without progress. Steps 67  %advance 0.7  played reward -6.59  last penalty -20
4  cut by time without progress. Steps 743  %advance 96.7  played reward 21.52  last penalty -20
5  cut by time without progress. Steps 67  %advance 0.7  played reward -6.59  last penalty -20
6  cut by ti

-------------------------------------
| approxkl           | 0.016873825  |
| clipfrac           | 0.13476562   |
| explained_variance | -0.711       |
| fps                | 50           |
| n_updates          | 761          |
| policy_entropy     | 0.8027228    |
| policy_loss        | -0.001302219 |
| serial_timesteps   | 194816       |
| time_elapsed       | 4.62e+03     |
| total_timesteps    | 194816       |
| value_loss         | 0.06277375   |
-------------------------------------
525  cut by time without progress. Steps 599  %advance 61.2  played reward 92.34  last penalty -20
--------------------------------------
| approxkl           | 0.008001955   |
| clipfrac           | 0.080078125   |
| explained_variance | 0.894         |
| fps                | 49            |
| n_updates          | 762           |
| policy_entropy     | 0.87850416    |
| policy_loss        | 0.00022569927 |
| serial_timesteps   | 195072        |
| time_elapsed       | 4.63e+03      |
| total_timesteps

530  Finalized in Steps 870  with return=total reward 262.09484337591107
--------------------------------------
| approxkl           | 0.010486639   |
| clipfrac           | 0.12988281    |
| explained_variance | 0.00543       |
| fps                | 49            |
| n_updates          | 777           |
| policy_entropy     | 0.5445732     |
| policy_loss        | -0.0027540526 |
| serial_timesteps   | 198912        |
| time_elapsed       | 4.7e+03       |
| total_timesteps    | 198912        |
| value_loss         | 106.79756     |
--------------------------------------
--------------------------------------
| approxkl           | 0.015164393   |
| clipfrac           | 0.22363281    |
| explained_variance | -0.38         |
| fps                | 49            |
| n_updates          | 778           |
| policy_entropy     | 0.6869105     |
| policy_loss        | -0.0126042655 |
| serial_timesteps   | 199168        |
| time_elapsed       | 4.71e+03      |
| total_timesteps    | 199168 

--------------------------------------
| approxkl           | 0.006184284   |
| clipfrac           | 0.08886719    |
| explained_variance | -0.0168       |
| fps                | 52            |
| n_updates          | 793           |
| policy_entropy     | 0.7418726     |
| policy_loss        | -0.0030130567 |
| serial_timesteps   | 203008        |
| time_elapsed       | 4.79e+03      |
| total_timesteps    | 203008        |
| value_loss         | 0.05086984    |
--------------------------------------
537  Finalized in Steps 840  with return=total reward 253.24944876741728
-------------------------------------
| approxkl           | 0.0038082716 |
| clipfrac           | 0.02734375   |
| explained_variance | 0.121        |
| fps                | 51           |
| n_updates          | 794          |
| policy_entropy     | 0.72639436   |
| policy_loss        | 0.0011749249 |
| serial_timesteps   | 203264       |
| time_elapsed       | 4.79e+03     |
| total_timesteps    | 203264       |
| 

-------------------------------------
| approxkl           | 0.010321348  |
| clipfrac           | 0.09667969   |
| explained_variance | 0.555        |
| fps                | 50           |
| n_updates          | 805          |
| policy_entropy     | 0.7000912    |
| policy_loss        | -0.002170578 |
| serial_timesteps   | 206080       |
| time_elapsed       | 5e+03        |
| total_timesteps    | 206080       |
| value_loss         | 0.068929926  |
-------------------------------------
541  cut by time without progress. Steps 798  %advance 97.4  played reward 139.91  last penalty -20
-------------------------------------
| approxkl           | 0.020665674  |
| clipfrac           | 0.15820312   |
| explained_variance | 0.626        |
| fps                | 52           |
| n_updates          | 806          |
| policy_entropy     | 0.69810545   |
| policy_loss        | 0.0017548292 |
| serial_timesteps   | 206336       |
| time_elapsed       | 5e+03        |
| total_timesteps    | 206

548  cut by time without progress. Steps 540  %advance 58.4  played reward 80.22  last penalty -20
-------------------------------------
| approxkl           | 0.010142884  |
| clipfrac           | 0.140625     |
| explained_variance | 0.893        |
| fps                | 48           |
| n_updates          | 821          |
| policy_entropy     | 0.6386661    |
| policy_loss        | -0.008937102 |
| serial_timesteps   | 210176       |
| time_elapsed       | 5.08e+03     |
| total_timesteps    | 210176       |
| value_loss         | 13.158752    |
-------------------------------------
--------------------------------------
| approxkl           | 0.00862862    |
| clipfrac           | 0.14160156    |
| explained_variance | 0.954         |
| fps                | 53            |
| n_updates          | 822           |
| policy_entropy     | 0.73952055    |
| policy_loss        | -0.0075341812 |
| serial_timesteps   | 210432        |
| time_elapsed       | 5.09e+03      |
| total_timesteps

554  Finalized in Steps 691  with return=total reward 244.51121805259288
--------------------------------------
| approxkl           | 0.004659264   |
| clipfrac           | 0.0703125     |
| explained_variance | 0.02          |
| fps                | 51            |
| n_updates          | 837           |
| policy_entropy     | 0.5856091     |
| policy_loss        | -0.0046967715 |
| serial_timesteps   | 214272        |
| time_elapsed       | 5.16e+03      |
| total_timesteps    | 214272        |
| value_loss         | 108.84709     |
--------------------------------------
-------------------------------------
| approxkl           | 0.011908113  |
| clipfrac           | 0.11621094   |
| explained_variance | 0.972        |
| fps                | 54           |
| n_updates          | 838          |
| policy_entropy     | 0.6900008    |
| policy_loss        | -0.005459809 |
| serial_timesteps   | 214528       |
| time_elapsed       | 5.17e+03     |
| total_timesteps    | 214528       |
| 

6  Finalized in Steps 692  with return=total reward 130.0957746478872
7  Finalized in Steps 663  with return=total reward 132.9957746478872
8  Finalized in Steps 822  with return=total reward 117.09577464788751
9  Finalized in Steps 669  with return=total reward 132.3957746478872
10  Finalized in Steps 771  with return=total reward 122.19577464788742
Track generation: 1144..1433 -> 289-tiles track, complex 12
Eval num_timesteps=217600, episode_reward=132.36 +/- 8.17
Episode length: 669.05 +/- 81.80
New best mean reward!
--------------------------------------
| approxkl           | 0.009524728   |
| clipfrac           | 0.16015625    |
| explained_variance | 0.0565        |
| fps                | 1             |
| n_updates          | 850           |
| policy_entropy     | 0.66501874    |
| policy_loss        | -0.0007899505 |
| serial_timesteps   | 217600        |
| time_elapsed       | 5.23e+03      |
| total_timesteps    | 217600        |
| value_loss         | 104.08256     |
------

567  cut by time without progress. Steps 577  %advance 57.1  played reward 79.32  last penalty -20
------------------------------------
| approxkl           | 0.015703471 |
| clipfrac           | 0.1875      |
| explained_variance | 0.74        |
| fps                | 51          |
| n_updates          | 864         |
| policy_entropy     | 0.92692167  |
| policy_loss        | 0.005133464 |
| serial_timesteps   | 221184      |
| time_elapsed       | 5.48e+03    |
| total_timesteps    | 221184      |
| value_loss         | 18.023384   |
------------------------------------
--------------------------------------
| approxkl           | 0.003308027   |
| clipfrac           | 0.036132812   |
| explained_variance | 0.784         |
| fps                | 51            |
| n_updates          | 865           |
| policy_entropy     | 0.51611066    |
| policy_loss        | -0.0005534503 |
| serial_timesteps   | 221440        |
| time_elapsed       | 5.48e+03      |
| total_timesteps    | 221440 

--------------------------------------
| approxkl           | 0.004519726   |
| clipfrac           | 0.0546875     |
| explained_variance | 0.386         |
| fps                | 48            |
| n_updates          | 879           |
| policy_entropy     | 0.4087857     |
| policy_loss        | -0.0062727844 |
| serial_timesteps   | 225024        |
| time_elapsed       | 5.56e+03      |
| total_timesteps    | 225024        |
| value_loss         | 0.42877188    |
--------------------------------------
-------------------------------------
| approxkl           | 0.003184677  |
| clipfrac           | 0.033203125  |
| explained_variance | 0.602        |
| fps                | 52           |
| n_updates          | 880          |
| policy_entropy     | 0.5229085    |
| policy_loss        | -0.012295682 |
| serial_timesteps   | 225280       |
| time_elapsed       | 5.56e+03     |
| total_timesteps    | 225280       |
| value_loss         | 4.104197     |
-------------------------------------

-------------------------------------
| approxkl           | 0.011656824  |
| clipfrac           | 0.10449219   |
| explained_variance | 0.88         |
| fps                | 49           |
| n_updates          | 894          |
| policy_entropy     | 0.49759752   |
| policy_loss        | -0.008249989 |
| serial_timesteps   | 228864       |
| time_elapsed       | 5.63e+03     |
| total_timesteps    | 228864       |
| value_loss         | 7.4135675    |
-------------------------------------
587  cut by time without progress. Steps 312  %advance 37.1  played reward 44.64  last penalty -20
--------------------------------------
| approxkl           | 0.0026122564  |
| clipfrac           | 0.029296875   |
| explained_variance | 0.904         |
| fps                | 52            |
| n_updates          | 895           |
| policy_entropy     | 0.55958045    |
| policy_loss        | -0.0048060734 |
| serial_timesteps   | 229120        |
| time_elapsed       | 5.64e+03      |
| total_timesteps

-------------------------------------
| approxkl           | 0.0027350825 |
| clipfrac           | 0.03515625   |
| explained_variance | 0.562        |
| fps                | 50           |
| n_updates          | 905          |
| policy_entropy     | 0.66054803   |
| policy_loss        | 0.0028682225 |
| serial_timesteps   | 231680       |
| time_elapsed       | 5.8e+03      |
| total_timesteps    | 231680       |
| value_loss         | 9.860327     |
-------------------------------------
595  cut by time without progress. Steps 675  %advance 68.5  played reward 89.47  last penalty -20
-------------------------------------
| approxkl           | 0.006877721  |
| clipfrac           | 0.061523438  |
| explained_variance | 0.594        |
| fps                | 51           |
| n_updates          | 906          |
| policy_entropy     | 0.2989977    |
| policy_loss        | -0.009006309 |
| serial_timesteps   | 231936       |
| time_elapsed       | 5.8e+03      |
| total_timesteps    | 2319

604  cut by time without progress. Steps 556  %advance 73.9  played reward 93.35  last penalty -20
-------------------------------------
| approxkl           | 0.006794286  |
| clipfrac           | 0.103515625  |
| explained_variance | 0.918        |
| fps                | 49           |
| n_updates          | 920          |
| policy_entropy     | 0.72090244   |
| policy_loss        | -0.002829393 |
| serial_timesteps   | 235520       |
| time_elapsed       | 5.87e+03     |
| total_timesteps    | 235520       |
| value_loss         | 9.940479     |
-------------------------------------
605  cut by time without progress. Steps 215  %advance 14.2  played reward 17.16  last penalty -20
--------------------------------------
| approxkl           | 0.0071840403  |
| clipfrac           | 0.09863281    |
| explained_variance | 0.869         |
| fps                | 52            |
| n_updates          | 921           |
| policy_entropy     | 0.577779      |
| policy_loss        | -0.002099110

-------------------------------------
| approxkl           | 0.004328994  |
| clipfrac           | 0.048828125  |
| explained_variance | 0.787        |
| fps                | 45           |
| n_updates          | 935          |
| policy_entropy     | 0.4785681    |
| policy_loss        | -0.010149099 |
| serial_timesteps   | 239360       |
| time_elapsed       | 5.95e+03     |
| total_timesteps    | 239360       |
| value_loss         | 3.7007349    |
-------------------------------------
614  cut by time without progress. Steps 438  %advance 56.8  played reward 70.23  last penalty -20
-------------------------------------
| approxkl           | 0.003957649  |
| clipfrac           | 0.05078125   |
| explained_variance | 0.817        |
| fps                | 49           |
| n_updates          | 936          |
| policy_entropy     | 0.37844515   |
| policy_loss        | -0.009304414 |
| serial_timesteps   | 239616       |
| time_elapsed       | 5.95e+03     |
| total_timesteps    | 2396

624  Finalized in Steps 632  with return=total reward 237.62378608529912
1  Finalized in Steps 484  with return=total reward 150.82779922779915
2  Finalized in Steps 484  with return=total reward 150.82779922779915
3  Finalized in Steps 530  with return=total reward 145.8416988416987
4  Finalized in Steps 499  with return=total reward 149.32779922779918
5  Finalized in Steps 489  with return=total reward 150.32779922779918
6  Finalized in Steps 494  with return=total reward 149.82779922779918
7  Finalized in Steps 482  with return=total reward 151.02779922779916
8  Finalized in Steps 484  with return=total reward 150.82779922779918
9  Finalized in Steps 484  with return=total reward 150.82779922779918
10  Finalized in Steps 505  with return=total reward 148.72779922779915
Track generation: 864..1083 -> 219-tiles track, complex 12
1  cut by time without progress. Steps 326  %advance 61.4  played reward 28.05  last penalty -20
2  cut by time without progress. Steps 295  %advance 53.2  pl

631  cut by time without progress. Steps 320  %advance 40.6  played reward 48.75  last penalty -20
--------------------------------------
| approxkl           | 0.0019795455  |
| clipfrac           | 0.028320312   |
| explained_variance | 0.898         |
| fps                | 53            |
| n_updates          | 962           |
| policy_entropy     | 0.3368936     |
| policy_loss        | -0.0042062607 |
| serial_timesteps   | 246272        |
| time_elapsed       | 6.2e+03       |
| total_timesteps    | 246272        |
| value_loss         | 9.565037      |
--------------------------------------
632  cut by time without progress. Steps 285  %advance 34.6  played reward 40.99  last penalty -20
--------------------------------------
| approxkl           | 0.0007643148  |
| clipfrac           | 0.0078125     |
| explained_variance | 0.977         |
| fps                | 50            |
| n_updates          | 963           |
| policy_entropy     | 0.31488168    |
| policy_loss        |

641  cut by time without progress. Steps 667  %advance 96.8  played reward 134.54  last penalty -20
-------------------------------------
| approxkl           | 0.0023252321 |
| clipfrac           | 0.034179688  |
| explained_variance | -0.0116      |
| fps                | 50           |
| n_updates          | 977          |
| policy_entropy     | 0.16928202   |
| policy_loss        | 0.0020071422 |
| serial_timesteps   | 250112       |
| time_elapsed       | 6.28e+03     |
| total_timesteps    | 250112       |
| value_loss         | 25.849062    |
-------------------------------------
--------------------------------------
| approxkl           | 0.0060650627  |
| clipfrac           | 0.053710938   |
| explained_variance | -0.64         |
| fps                | 50            |
| n_updates          | 978           |
| policy_entropy     | 0.20768847    |
| policy_loss        | -0.0017528993 |
| serial_timesteps   | 250368        |
| time_elapsed       | 6.28e+03      |
| total_timestep

-------------------------------------
| approxkl           | 0.004957715  |
| clipfrac           | 0.0546875    |
| explained_variance | 0.676        |
| fps                | 46           |
| n_updates          | 992          |
| policy_entropy     | 0.5258568    |
| policy_loss        | 4.085258e-05 |
| serial_timesteps   | 253952       |
| time_elapsed       | 6.35e+03     |
| total_timesteps    | 253952       |
| value_loss         | 7.6524053    |
-------------------------------------
-------------------------------------
| approxkl           | 0.010534166  |
| clipfrac           | 0.061523438  |
| explained_variance | -2.32        |
| fps                | 49           |
| n_updates          | 993          |
| policy_entropy     | 0.20946847   |
| policy_loss        | -0.004278073 |
| serial_timesteps   | 254208       |
| time_elapsed       | 6.36e+03     |
| total_timesteps    | 254208       |
| value_loss         | 1.1979743    |
-------------------------------------
650  cut by 

<stable_baselines.ppo2.ppo2.PPO2 at 0x28913a4acf8>

In [11]:
import pickle
root = 'ppo_cnn_gym-mod_'

file = root+'c{:d}_f{:d}_s{:d}_{}_u{:d}_e{:d}_p{}_bs{:d}'.format(game_color,fpst,skip,indicators,use,ept,patience,batch_size)

model.save(file, cloudpickle=True)
param_list=model.get_parameter_list()


In [12]:
#env1.reset_track()
env1.close()

In [13]:
## This model param  #2
use = 5       # number of times to use same track [1,100]
ept = 10       # different starting points on same track [1,20]
#patience = 2.0
seed = 20000
track_complexity = 12
updates = 500

if agent=='CarRacing-v2': 
    env2 = gym.make(agent, seed=seed, 
        use_track = use,       
        episodes_per_track = ept,  
        tr_complexity = track_complexity, 
        patience = patience,
        game_color=game_color,
        indicators = indicators,
        discre = actions,
        frames_per_state = fpst,
        skip_frames = skip   )  #here I use STD_REWARD, so no param needed
else: 
    env2 = gym.make(agent)

env2 = DummyVecEnv([lambda: env2])

In [14]:
## Training  #2
new_mod = False  #to change batch_size you need a new model !!
updates = 500

if new_mod:
    batch_size2 = 512
    model2 = PPO2(CnnPolicy, env2, verbose=1, n_steps=batch_size2, # seed=314, n_cpu_tf_sess=1,
             gamma=0.99, learning_rate=0.00025, nminibatches=epochs, ent_coef=0.01, vf_coef=0.5) 
    model2.load_parameters(param_list, exact_match=True)
    model2.learn(total_timesteps = updates*batch_size2, log_interval=1, callback=eval_callback)
else:
    model.set_env(env2)
    model.learn(total_timesteps = updates*batch_size, log_interval=1, callback=eval_callback)


Track generation: 1276..1598 -> 322-tiles track, complex 12
--------------------------------------
| approxkl           | 0.0033028494  |
| clipfrac           | 0.032226562   |
| explained_variance | -0.265        |
| fps                | 49            |
| n_updates          | 1             |
| policy_entropy     | 0.19470961    |
| policy_loss        | -0.0033594884 |
| serial_timesteps   | 256           |
| time_elapsed       | 0.004         |
| total_timesteps    | 256           |
| value_loss         | 1.9427452     |
--------------------------------------
1  cut by time without progress. Steps 434  %advance 56.0  played reward 12.15  last penalty -20
--------------------------------------
| approxkl           | 0.005588497   |
| clipfrac           | 0.080078125   |
| explained_variance | 0.692         |
| fps                | 48            |
| n_updates          | 2             |
| policy_entropy     | 0.42564395    |
| policy_loss        | -0.0078270985 |
| serial_timesteps   | 5

-------------------------------------
| approxkl           | 0.0028798399 |
| clipfrac           | 0.041992188  |
| explained_variance | 0.479        |
| fps                | 47           |
| n_updates          | 15           |
| policy_entropy     | 0.32011896   |
| policy_loss        | -0.01108386  |
| serial_timesteps   | 3840         |
| time_elapsed       | 71.5         |
| total_timesteps    | 3840         |
| value_loss         | 0.33724985   |
-------------------------------------
-------------------------------------
| approxkl           | 0.0032357692 |
| clipfrac           | 0.037109375  |
| explained_variance | 0.635        |
| fps                | 50           |
| n_updates          | 16           |
| policy_entropy     | 0.52155906   |
| policy_loss        | -0.009424256 |
| serial_timesteps   | 4096         |
| time_elapsed       | 76.9         |
| total_timesteps    | 4096         |
| value_loss         | 3.49379      |
-------------------------------------
12  cut by t

21  cut by time without progress. Steps 270  %advance 26.4  played reward -1.04  last penalty -20
-------------------------------------
| approxkl           | 0.008311932  |
| clipfrac           | 0.08984375   |
| explained_variance | 0.927        |
| fps                | 51           |
| n_updates          | 30           |
| policy_entropy     | 0.7663045    |
| policy_loss        | -0.012777058 |
| serial_timesteps   | 7680         |
| time_elapsed       | 147          |
| total_timesteps    | 7680         |
| value_loss         | 2.6094995    |
-------------------------------------
22  cut by time without progress. Steps 395  %advance 50.4  played reward 10.44  last penalty -20
-------------------------------------
| approxkl           | 0.011686539  |
| clipfrac           | 0.10449219   |
| explained_variance | 0.947        |
| fps                | 52           |
| n_updates          | 31           |
| policy_entropy     | 0.77260906   |
| policy_loss        | -0.023067785 |
| seri

33  cut by time without progress. Steps 282  %advance 28.9  played reward 0.24  last penalty -20
-------------------------------------
| approxkl           | 0.0062122466 |
| clipfrac           | 0.09667969   |
| explained_variance | 0.848        |
| fps                | 52           |
| n_updates          | 45           |
| policy_entropy     | 0.86296695   |
| policy_loss        | -0.011479665 |
| serial_timesteps   | 11520        |
| time_elapsed       | 222          |
| total_timesteps    | 11520        |
| value_loss         | 5.547432     |
-------------------------------------
34  cut by time without progress. Steps 355  %advance 33.0  played reward -3.0  last penalty -20
-------------------------------------
| approxkl           | 0.006688629  |
| clipfrac           | 0.083984375  |
| explained_variance | 0.708        |
| fps                | 51           |
| n_updates          | 46           |
| policy_entropy     | 0.82894206   |
| policy_loss        | -0.011867457 |
| serial

-------------------------------------
| approxkl           | 0.013080011  |
| clipfrac           | 0.10839844   |
| explained_variance | 0.927        |
| fps                | 51           |
| n_updates          | 56           |
| policy_entropy     | 0.44385624   |
| policy_loss        | -0.011398336 |
| serial_timesteps   | 14336        |
| time_elapsed       | 370          |
| total_timesteps    | 14336        |
| value_loss         | 0.3031834    |
-------------------------------------
--------------------------------------
| approxkl           | 0.0015534937  |
| clipfrac           | 0.009765625   |
| explained_variance | 0.88          |
| fps                | 52            |
| n_updates          | 57            |
| policy_entropy     | 0.74712306    |
| policy_loss        | -0.0010020328 |
| serial_timesteps   | 14592         |
| time_elapsed       | 375           |
| total_timesteps    | 14592         |
| value_loss         | 1.7354673     |
--------------------------------------

50  cut by time without progress. Steps 552  %advance 76.6  played reward 20.91  last penalty -20
Track generation: 1090..1366 -> 276-tiles track, complex 12
--------------------------------------
| approxkl           | 0.018647112   |
| clipfrac           | 0.13183594    |
| explained_variance | 0.853         |
| fps                | 53            |
| n_updates          | 71            |
| policy_entropy     | 0.5633085     |
| policy_loss        | -0.0067430576 |
| serial_timesteps   | 18176         |
| time_elapsed       | 445           |
| total_timesteps    | 18176         |
| value_loss         | 1.6497136     |
--------------------------------------
--------------------------------------
| approxkl           | 0.004476542   |
| clipfrac           | 0.047851562   |
| explained_variance | -1.07         |
| fps                | 53            |
| n_updates          | 72            |
| policy_entropy     | 0.49474066    |
| policy_loss        | -0.0072553917 |
| serial_timesteps   | 

--------------------------------------
| approxkl           | 0.0035308115  |
| clipfrac           | 0.05078125    |
| explained_variance | -0.0834       |
| fps                | 53            |
| n_updates          | 86            |
| policy_entropy     | 0.4149689     |
| policy_loss        | -0.0036490064 |
| serial_timesteps   | 22016         |
| time_elapsed       | 520           |
| total_timesteps    | 22016         |
| value_loss         | 0.13109164    |
--------------------------------------
9  Finalized in Steps 513  with return=total reward 147.60909090909104
--------------------------------------
| approxkl           | 0.0037433181  |
| clipfrac           | 0.040039062   |
| explained_variance | 0.0648        |
| fps                | 53            |
| n_updates          | 87            |
| policy_entropy     | 0.4716073     |
| policy_loss        | -0.0009547622 |
| serial_timesteps   | 22272         |
| time_elapsed       | 524           |
| total_timesteps    | 22272    

9  cut by time without progress. Steps 524  %advance 68.9  played reward 15.27  last penalty -20
10  cut by time without progress. Steps 430  %advance 56.9  played reward 13.07  last penalty -20
Track generation: 916..1148 -> 232-tiles track, complex 12
1  cut by time without progress. Steps 494  %advance 86.5  played reward 36.41  last penalty -20
2  cut by time without progress. Steps 386  %advance 73.5  played reward 34.22  last penalty -20
3  cut by time without progress. Steps 357  %advance 63.6  played reward 27.17  last penalty -20
4  Finalized in Steps 497  with return=total reward 149.43419913419893
5  cut by time without progress. Steps 294  %advance 46.7  played reward 16.58  last penalty -20
6  Finalized in Steps 533  with return=total reward 145.834199134199
7  cut by time without progress. Steps 515  %advance 98.2  played reward 46.0  last penalty -20
8  Finalized in Steps 433  with return=total reward 155.83419913419883
9  Finalized in Steps 450  with return=total reward

-------------------------------------
| approxkl           | 0.0066311844 |
| clipfrac           | 0.06542969   |
| explained_variance | -0.293       |
| fps                | 53           |
| n_updates          | 113          |
| policy_entropy     | 0.25605857   |
| policy_loss        | -0.00700792  |
| serial_timesteps   | 28928        |
| time_elapsed       | 778          |
| total_timesteps    | 28928        |
| value_loss         | 0.041969508  |
-------------------------------------
22  Finalized in Steps 510  with return=total reward 148.27272727272742
-------------------------------------
| approxkl           | 0.015381213  |
| clipfrac           | 0.10449219   |
| explained_variance | 0.127        |
| fps                | 50           |
| n_updates          | 114          |
| policy_entropy     | 0.40269542   |
| policy_loss        | 0.0049927393 |
| serial_timesteps   | 29184        |
| time_elapsed       | 783          |
| total_timesteps    | 29184        |
| value_loss    

-------------------------------------
| approxkl           | 0.0029446697 |
| clipfrac           | 0.037109375  |
| explained_variance | 0.65         |
| fps                | 53           |
| n_updates          | 129          |
| policy_entropy     | 0.22722116   |
| policy_loss        | -0.005365601 |
| serial_timesteps   | 33024        |
| time_elapsed       | 855          |
| total_timesteps    | 33024        |
| value_loss         | 0.027214281  |
-------------------------------------
30  Finalized in Steps 569  with return=total reward 142.37272727272742
--------------------------------------
| approxkl           | 0.0026112236  |
| clipfrac           | 0.034179688   |
| explained_variance | 0.0397        |
| fps                | 54            |
| n_updates          | 130           |
| policy_entropy     | 0.18862152    |
| policy_loss        | -0.0011241428 |
| serial_timesteps   | 33280         |
| time_elapsed       | 859           |
| total_timesteps    | 33280         |
| val

38  Finalized in Steps 563  with return=total reward 142.9727272727274
--------------------------------------
| approxkl           | 0.0021050165  |
| clipfrac           | 0.028320312   |
| explained_variance | 0.0492        |
| fps                | 53            |
| n_updates          | 145           |
| policy_entropy     | 0.26969057    |
| policy_loss        | 0.00019606052 |
| serial_timesteps   | 37120         |
| time_elapsed       | 931           |
| total_timesteps    | 37120         |
| value_loss         | 131.46396     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0015668175  |
| clipfrac           | 0.015625      |
| explained_variance | 0.4           |
| fps                | 53            |
| n_updates          | 146           |
| policy_entropy     | 0.23655471    |
| policy_loss        | -0.0027977908 |
| serial_timesteps   | 37376         |
| time_elapsed       | 936           |
| total_timesteps    | 37376    

-------------------------------------
| approxkl           | 0.0046513835 |
| clipfrac           | 0.05859375   |
| explained_variance | 0.43         |
| fps                | 43           |
| n_updates          | 157          |
| policy_entropy     | 0.28645745   |
| policy_loss        | -0.004744238 |
| serial_timesteps   | 40192        |
| time_elapsed       | 1.11e+03     |
| total_timesteps    | 40192        |
| value_loss         | 0.022609567  |
-------------------------------------
44  Finalized in Steps 503  with return=total reward 148.9727272727274
--------------------------------------
| approxkl           | 0.0010187558  |
| clipfrac           | 0.0126953125  |
| explained_variance | 0.0622        |
| fps                | 44            |
| n_updates          | 158           |
| policy_entropy     | 0.20823233    |
| policy_loss        | -0.0022506001 |
| serial_timesteps   | 40448         |
| time_elapsed       | 1.11e+03      |
| total_timesteps    | 40448         |
| valu

2  cut by time without progress. Steps 298  %advance 37.2  played reward 6.37  last penalty -20
-------------------------------------
| approxkl           | 0.025271688  |
| clipfrac           | 0.24902344   |
| explained_variance | 0.591        |
| fps                | 43           |
| n_updates          | 173          |
| policy_entropy     | 0.5668056    |
| policy_loss        | -0.014540001 |
| serial_timesteps   | 44288        |
| time_elapsed       | 1.2e+03      |
| total_timesteps    | 44288        |
| value_loss         | 12.184819    |
-------------------------------------
3  cut by time without progress. Steps 373  %advance 48.6  played reward 10.64  last penalty -20
---------------------------------------
| approxkl           | 0.0116870785   |
| clipfrac           | 0.14160156     |
| explained_variance | 0.722          |
| fps                | 47             |
| n_updates          | 174            |
| policy_entropy     | 0.44071501     |
| policy_loss        | -0.0009736

11  cut by time without progress. Steps 480  %advance 72.5  played reward 23.86  last penalty -20
-------------------------------------
| approxkl           | 0.0053620012 |
| clipfrac           | 0.064453125  |
| explained_variance | 0.0202       |
| fps                | 47           |
| n_updates          | 188          |
| policy_entropy     | 0.34317786   |
| policy_loss        | 0.0009789    |
| serial_timesteps   | 48128        |
| time_elapsed       | 1.28e+03     |
| total_timesteps    | 48128        |
| value_loss         | 18.059761    |
-------------------------------------
-------------------------------------
| approxkl           | 0.010927272  |
| clipfrac           | 0.1328125    |
| explained_variance | 0.583        |
| fps                | 43           |
| n_updates          | 189          |
| policy_entropy     | 0.6157481    |
| policy_loss        | -0.004281786 |
| serial_timesteps   | 48384        |
| time_elapsed       | 1.29e+03     |
| total_timesteps    | 48384

5  Finalized in Steps 560  with return=total reward 143.25373134328362
6  cut by time without progress. Steps 338  %advance 47.3  played reward 12.94  last penalty -20
7  cut by time without progress. Steps 276  %advance 37.3  played reward 9.06  last penalty -20
8  cut by time without progress. Steps 264  %advance 27.2  played reward 0.19  last penalty -20
9  cut by time without progress. Steps 529  %advance 78.7  played reward 25.18  last penalty -20
10  cut by time without progress. Steps 308  %advance 41.0  played reward 9.59  last penalty -20
Track generation: 1409..1764 -> 355-tiles track, complex 12
Eval num_timesteps=51200, episode_reward=6.01 +/- 34.27
Episode length: 384.80 +/- 121.62
-------------------------------------
| approxkl           | 0.034915898  |
| clipfrac           | 0.26367188   |
| explained_variance | 0.492        |
| fps                | 2            |
| n_updates          | 200          |
| policy_entropy     | 0.64885926   |
| policy_loss        | -0.0234

26  cut by time without progress. Steps 263  %advance 24.7  played reward -2.67  last penalty -20
-------------------------------------
| approxkl           | 0.010301935  |
| clipfrac           | 0.091796875  |
| explained_variance | 0.624        |
| fps                | 42           |
| n_updates          | 214          |
| policy_entropy     | 0.5078477    |
| policy_loss        | -0.012127375 |
| serial_timesteps   | 54784        |
| time_elapsed       | 1.55e+03     |
| total_timesteps    | 54784        |
| value_loss         | 9.690486     |
-------------------------------------
-------------------------------------
| approxkl           | 0.0034267143 |
| clipfrac           | 0.0390625    |
| explained_variance | 0.182        |
| fps                | 43           |
| n_updates          | 215          |
| policy_entropy     | 0.36964005   |
| policy_loss        | -0.003659529 |
| serial_timesteps   | 55040        |
| time_elapsed       | 1.56e+03     |
| total_timesteps    | 55040

35  cut by time without progress. Steps 312  %advance 34.5  played reward 2.23  last penalty -20
-------------------------------------
| approxkl           | 0.0054646684 |
| clipfrac           | 0.07519531   |
| explained_variance | -0.141       |
| fps                | 42           |
| n_updates          | 229          |
| policy_entropy     | 0.41141438   |
| policy_loss        | 0.004809195  |
| serial_timesteps   | 58624        |
| time_elapsed       | 1.64e+03     |
| total_timesteps    | 58624        |
| value_loss         | 18.612562    |
-------------------------------------
--------------------------------------
| approxkl           | 0.0113171805  |
| clipfrac           | 0.08300781    |
| explained_variance | 0.787         |
| fps                | 44            |
| n_updates          | 230           |
| policy_entropy     | 0.4546892     |
| policy_loss        | -0.0123691205 |
| serial_timesteps   | 58880         |
| time_elapsed       | 1.65e+03      |
| total_timesteps  

---------------------------------------
| approxkl           | 0.008472454    |
| clipfrac           | 0.10546875     |
| explained_variance | -0.0267        |
| fps                | 46             |
| n_updates          | 244            |
| policy_entropy     | 0.6284585      |
| policy_loss        | -0.00024314586 |
| serial_timesteps   | 62464          |
| time_elapsed       | 1.73e+03       |
| total_timesteps    | 62464          |
| value_loss         | 15.56499       |
---------------------------------------
43  cut by time without progress. Steps 301  %advance 26.2  played reward -4.9  last penalty -20
-------------------------------------
| approxkl           | 0.009372567  |
| clipfrac           | 0.15234375   |
| explained_variance | 0.14         |
| fps                | 40           |
| n_updates          | 245          |
| policy_entropy     | 0.6661886    |
| policy_loss        | -0.014241853 |
| serial_timesteps   | 62720        |
| time_elapsed       | 1.74e+03     |
| t

-------------------------------------
| approxkl           | 0.0046347594 |
| clipfrac           | 0.0546875    |
| explained_variance | 0.0857       |
| fps                | 43           |
| n_updates          | 255          |
| policy_entropy     | 0.5292293    |
| policy_loss        | -0.004155268 |
| serial_timesteps   | 65280        |
| time_elapsed       | 1.91e+03     |
| total_timesteps    | 65280        |
| value_loss         | 13.690218    |
-------------------------------------
1  cut by time without progress. Steps 272  %advance 35.7  played reward 7.9  last penalty -20
--------------------------------------
| approxkl           | 0.007980484   |
| clipfrac           | 0.109375      |
| explained_variance | 0.442         |
| fps                | 41            |
| n_updates          | 256           |
| policy_entropy     | 0.70053065    |
| policy_loss        | -0.0040153665 |
| serial_timesteps   | 65536         |
| time_elapsed       | 1.92e+03      |
| total_timesteps    

10  cut by time without progress. Steps 325  %advance 45.7  played reward 12.59  last penalty -20
--------------------------------------
| approxkl           | 0.008328812   |
| clipfrac           | 0.13574219    |
| explained_variance | 0.446         |
| fps                | 42            |
| n_updates          | 270           |
| policy_entropy     | 0.78337044    |
| policy_loss        | -0.0070400266 |
| serial_timesteps   | 69120         |
| time_elapsed       | 2e+03         |
| total_timesteps    | 69120         |
| value_loss         | 11.033107     |
--------------------------------------
------------------------------------
| approxkl           | 0.01650249  |
| clipfrac           | 0.19140625  |
| explained_variance | 0.91        |
| fps                | 43          |
| n_updates          | 271         |
| policy_entropy     | 0.72447234  |
| policy_loss        | -0.01675372 |
| serial_timesteps   | 69376       |
| time_elapsed       | 2e+03       |
| total_timesteps    | 69

19  cut by time without progress. Steps 368  %advance 56.4  played reward 19.01  last penalty -20
------------------------------------
| approxkl           | 0.011921406 |
| clipfrac           | 0.14160156  |
| explained_variance | 0.917       |
| fps                | 45          |
| n_updates          | 285         |
| policy_entropy     | 0.56094354  |
| policy_loss        | -0.01873335 |
| serial_timesteps   | 72960       |
| time_elapsed       | 2.09e+03    |
| total_timesteps    | 72960       |
| value_loss         | 4.0231867   |
------------------------------------
20  cut by time without progress. Steps 326  %advance 46.0  played reward 12.85  last penalty -20
-------------------------------------
| approxkl           | 0.008805447  |
| clipfrac           | 0.11621094   |
| explained_variance | 0.919        |
| fps                | 43           |
| n_updates          | 286          |
| policy_entropy     | 0.5809026    |
| policy_loss        | -0.009907765 |
| serial_timesteps 

1  Finalized in Steps 480  with return=total reward 151.23664122137387
2  Finalized in Steps 480  with return=total reward 151.23664122137387
3  cut by time without progress. Steps 407  %advance 57.2  played reward 15.88  last penalty -20
4  cut by time without progress. Steps 324  %advance 47.3  played reward 14.26  last penalty -20
5  cut by time without progress. Steps 272  %advance 37.0  played reward 9.15  last penalty -20
6  cut by time without progress. Steps 346  %advance 27.0  played reward -8.54  last penalty -20
7  cut by time without progress. Steps 224  %advance 17.1  played reward -6.65  last penalty -20
8  Finalized in Steps 496  with return=total reward 149.63664122137388
9  cut by time without progress. Steps 83  %advance 1.5  played reward -7.43  last penalty -20
10  cut by time without progress. Steps 568  %advance 89.6  played reward 31.84  last penalty -20
Track generation: 1070..1341 -> 271-tiles track, complex 12
1  cut by time without progress. Steps 286  %advan

33  Finalized in Steps 518  with return=total reward 147.48571428571427
--------------------------------------
| approxkl           | 0.00412207    |
| clipfrac           | 0.055664062   |
| explained_variance | -0.0157       |
| fps                | 44            |
| n_updates          | 312           |
| policy_entropy     | 0.2614414     |
| policy_loss        | -0.0028213963 |
| serial_timesteps   | 79872         |
| time_elapsed       | 2.35e+03      |
| total_timesteps    | 79872         |
| value_loss         | 145.92178     |
--------------------------------------
--------------------------------------
| approxkl           | 0.002636538   |
| clipfrac           | 0.022460938   |
| explained_variance | 0.314         |
| fps                | 43            |
| n_updates          | 313           |
| policy_entropy     | 0.2279571     |
| policy_loss        | -0.0015456749 |
| serial_timesteps   | 80128         |
| time_elapsed       | 2.35e+03      |
| total_timesteps    | 80128   

-------------------------------------
| approxkl           | 0.00615084   |
| clipfrac           | 0.06542969   |
| explained_variance | 0.427        |
| fps                | 42           |
| n_updates          | 327          |
| policy_entropy     | 0.5104954    |
| policy_loss        | -0.009629231 |
| serial_timesteps   | 83712        |
| time_elapsed       | 2.43e+03     |
| total_timesteps    | 83712        |
| value_loss         | 12.835293    |
-------------------------------------
41  cut by time without progress. Steps 269  %advance 36.0  played reward 8.55  last penalty -20
-------------------------------------
| approxkl           | 0.016326863  |
| clipfrac           | 0.17285156   |
| explained_variance | 0.43         |
| fps                | 42           |
| n_updates          | 328          |
| policy_entropy     | 0.61954784   |
| policy_loss        | 0.0033656952 |
| serial_timesteps   | 83968        |
| time_elapsed       | 2.44e+03     |
| total_timesteps    | 83968 

-------------------------------------
| approxkl           | 0.005719458  |
| clipfrac           | 0.08496094   |
| explained_variance | 0.727        |
| fps                | 40           |
| n_updates          | 342          |
| policy_entropy     | 0.5642829    |
| policy_loss        | -0.010741292 |
| serial_timesteps   | 87552        |
| time_elapsed       | 2.52e+03     |
| total_timesteps    | 87552        |
| value_loss         | 0.3968261    |
-------------------------------------
49  cut by time without progress. Steps 411  %advance 55.7  played reward 13.28  last penalty -20
-------------------------------------
| approxkl           | 0.0060403524 |
| clipfrac           | 0.07519531   |
| explained_variance | 0.589        |
| fps                | 40           |
| n_updates          | 343          |
| policy_entropy     | 0.625307     |
| policy_loss        | -0.007814836 |
| serial_timesteps   | 87808        |
| time_elapsed       | 2.53e+03     |
| total_timesteps    | 87808

-------------------------------------
| approxkl           | 0.005353147  |
| clipfrac           | 0.0625       |
| explained_variance | 0.517        |
| fps                | 49           |
| n_updates          | 353          |
| policy_entropy     | 0.3675142    |
| policy_loss        | -0.010556392 |
| serial_timesteps   | 90368        |
| time_elapsed       | 2.69e+03     |
| total_timesteps    | 90368        |
| value_loss         | 0.62940323   |
-------------------------------------
6  cut by time without progress. Steps 571  %advance 95.6  played reward 37.48  last penalty -20
---------------------------------------
| approxkl           | 0.001500061    |
| clipfrac           | 0.025390625    |
| explained_variance | 0.0737         |
| fps                | 45             |
| n_updates          | 354            |
| policy_entropy     | 0.31506267     |
| policy_loss        | -0.00022526854 |
| serial_timesteps   | 90624          |
| time_elapsed       | 2.7e+03        |
| total_t

-------------------------------------
| approxkl           | 0.0095772445 |
| clipfrac           | 0.13867188   |
| explained_variance | 0.534        |
| fps                | 43           |
| n_updates          | 367          |
| policy_entropy     | 0.69057626   |
| policy_loss        | -0.01073383  |
| serial_timesteps   | 93952        |
| time_elapsed       | 2.77e+03     |
| total_timesteps    | 93952        |
| value_loss         | 18.365444    |
-------------------------------------
------------------------------------
| approxkl           | 0.009392904 |
| clipfrac           | 0.09863281  |
| explained_variance | -0.328      |
| fps                | 43          |
| n_updates          | 368         |
| policy_entropy     | 0.34029278  |
| policy_loss        | -0.00584403 |
| serial_timesteps   | 94208       |
| time_elapsed       | 2.78e+03    |
| total_timesteps    | 94208       |
| value_loss         | 0.23831354  |
------------------------------------
-------------------------

-------------------------------------
| approxkl           | 0.013033283  |
| clipfrac           | 0.14746094   |
| explained_variance | 0.525        |
| fps                | 48           |
| n_updates          | 383          |
| policy_entropy     | 0.45547435   |
| policy_loss        | -0.010021953 |
| serial_timesteps   | 98048        |
| time_elapsed       | 2.86e+03     |
| total_timesteps    | 98048        |
| value_loss         | 0.08464727   |
-------------------------------------
27  Finalized in Steps 477  with return=total reward 151.11889763779533
--------------------------------------
| approxkl           | 0.004648842   |
| clipfrac           | 0.061523438   |
| explained_variance | -0.0137       |
| fps                | 46            |
| n_updates          | 384           |
| policy_entropy     | 0.46478075    |
| policy_loss        | -0.0033348128 |
| serial_timesteps   | 98304         |
| time_elapsed       | 2.87e+03      |
| total_timesteps    | 98304         |
| val

-------------------------------------
| approxkl           | 0.010956741  |
| clipfrac           | 0.10546875   |
| explained_variance | 0.396        |
| fps                | 36           |
| n_updates          | 399          |
| policy_entropy     | 0.45508665   |
| policy_loss        | -0.014984272 |
| serial_timesteps   | 102144       |
| time_elapsed       | 2.95e+03     |
| total_timesteps    | 102144       |
| value_loss         | 0.15866885   |
-------------------------------------
35  cut by time without progress. Steps 657  %advance 99.2  played reward 32.82  last penalty -20
1  cut by time without progress. Steps 564  %advance 99.2  played reward 41.84  last penalty -20
2  cut by time without progress. Steps 564  %advance 99.2  played reward 41.84  last penalty -20
3  cut by time without progress. Steps 602  %advance 99.6  played reward 38.04  last penalty -20
4  cut by time without progress. Steps 594  %advance 99.2  played reward 38.84  last penalty -20
5  Finalized in Step

--------------------------------------
| approxkl           | 0.004790335   |
| clipfrac           | 0.061523438   |
| explained_variance | 0.918         |
| fps                | 49            |
| n_updates          | 411           |
| policy_entropy     | 0.69258875    |
| policy_loss        | -0.0064969314 |
| serial_timesteps   | 105216        |
| time_elapsed       | 3.21e+03      |
| total_timesteps    | 105216        |
| value_loss         | 0.060775165   |
--------------------------------------
41  cut by time without progress. Steps 483  %advance 85.0  played reward 36.05  last penalty -20
-------------------------------------
| approxkl           | 0.016429648  |
| clipfrac           | 0.15332031   |
| explained_variance | 0.449        |
| fps                | 48           |
| n_updates          | 412          |
| policy_entropy     | 0.57695913   |
| policy_loss        | -0.009618249 |
| serial_timesteps   | 105472       |
| time_elapsed       | 3.21e+03     |
| total_timeste

-------------------------------------
| approxkl           | 0.0058805556 |
| clipfrac           | 0.06933594   |
| explained_variance | 0.113        |
| fps                | 42           |
| n_updates          | 426          |
| policy_entropy     | 0.6941992    |
| policy_loss        | -0.012953825 |
| serial_timesteps   | 109056       |
| time_elapsed       | 3.29e+03     |
| total_timesteps    | 109056       |
| value_loss         | 15.401965    |
-------------------------------------
49  cut by time without progress. Steps 255  %advance 32.6  played reward 6.48  last penalty -20
--------------------------------------
| approxkl           | 0.009302993   |
| clipfrac           | 0.10644531    |
| explained_variance | 0.536         |
| fps                | 41            |
| n_updates          | 427           |
| policy_entropy     | 0.6692312     |
| policy_loss        | -0.0034321328 |
| serial_timesteps   | 109312        |
| time_elapsed       | 3.3e+03       |
| total_timesteps  

8  cut by time without progress. Steps 218  %advance 25.0  played reward 2.54  last penalty -20
-------------------------------------
| approxkl           | 0.00403187   |
| clipfrac           | 0.048828125  |
| explained_variance | 0.827        |
| fps                | 49           |
| n_updates          | 441          |
| policy_entropy     | 0.6507344    |
| policy_loss        | -0.010536233 |
| serial_timesteps   | 112896       |
| time_elapsed       | 3.38e+03     |
| total_timesteps    | 112896       |
| value_loss         | 9.039753     |
-------------------------------------
-------------------------------------
| approxkl           | 0.006516384  |
| clipfrac           | 0.07910156   |
| explained_variance | 0.85         |
| fps                | 47           |
| n_updates          | 442          |
| policy_entropy     | 0.70769954   |
| policy_loss        | -0.011193118 |
| serial_timesteps   | 113152       |
| time_elapsed       | 3.38e+03     |
| total_timesteps    | 113152 

15  Finalized in Steps 526  with return=total reward 146.64242424242414
-------------------------------------
| approxkl           | 0.009318103  |
| clipfrac           | 0.15234375   |
| explained_variance | -0.0211      |
| fps                | 54           |
| n_updates          | 453          |
| policy_entropy     | 1.0344791    |
| policy_loss        | 0.0047072684 |
| serial_timesteps   | 115968       |
| time_elapsed       | 3.57e+03     |
| total_timesteps    | 115968       |
| value_loss         | 149.25044    |
-------------------------------------
-------------------------------------
| approxkl           | 0.017238196  |
| clipfrac           | 0.22070312   |
| explained_variance | 0.539        |
| fps                | 44           |
| n_updates          | 454          |
| policy_entropy     | 1.0219117    |
| policy_loss        | -0.016089197 |
| serial_timesteps   | 116224       |
| time_elapsed       | 3.57e+03     |
| total_timesteps    | 116224       |
| value_loss    

--------------------------------------
| approxkl           | 0.0029810343  |
| clipfrac           | 0.038085938   |
| explained_variance | 0.725         |
| fps                | 50            |
| n_updates          | 469           |
| policy_entropy     | 0.6612879     |
| policy_loss        | -0.0072628623 |
| serial_timesteps   | 120064        |
| time_elapsed       | 3.65e+03      |
| total_timesteps    | 120064        |
| value_loss         | 0.09410011    |
--------------------------------------
22  Finalized in Steps 609  with return=total reward 138.34242424242422
-------------------------------------
| approxkl           | 0.009230081  |
| clipfrac           | 0.12109375   |
| explained_variance | 0.0974       |
| fps                | 54           |
| n_updates          | 470          |
| policy_entropy     | 0.6785421    |
| policy_loss        | -0.008341743 |
| serial_timesteps   | 120320       |
| time_elapsed       | 3.65e+03     |
| total_timesteps    | 120320       |
| v

32  Finalized in Steps 498  with return=total reward 149.44242424242418
--------------------------------------
| approxkl           | 0.0032124014  |
| clipfrac           | 0.036132812   |
| explained_variance | 0.00559       |
| fps                | 52            |
| n_updates          | 484           |
| policy_entropy     | 0.5737371     |
| policy_loss        | -0.0054955482 |
| serial_timesteps   | 123904        |
| time_elapsed       | 3.72e+03      |
| total_timesteps    | 123904        |
| value_loss         | 141.28648     |
--------------------------------------
33  cut by time without progress. Steps 248  %advance 21.9  played reward -3.86  last penalty -20
-------------------------------------
| approxkl           | 0.013255206  |
| clipfrac           | 0.140625     |
| explained_variance | 0.483        |
| fps                | 49           |
| n_updates          | 485          |
| policy_entropy     | 0.81978047   |
| policy_loss        | 0.0006466417 |
| serial_timesteps 

-------------------------------------
| approxkl           | 0.009337889  |
| clipfrac           | 0.1484375    |
| explained_variance | 0.0372       |
| fps                | 50           |
| n_updates          | 499          |
| policy_entropy     | 0.91562873   |
| policy_loss        | -0.010252877 |
| serial_timesteps   | 127744       |
| time_elapsed       | 3.8e+03      |
| total_timesteps    | 127744       |
| value_loss         | 141.6658     |
-------------------------------------
41  cut by time without progress. Steps 310  %advance 39.3  played reward 7.73  last penalty -20
1  cut by time without progress. Steps 323  %advance 51.3  played reward 17.99  last penalty -20
2  cut by time without progress. Steps 323  %advance 51.3  played reward 17.99  last penalty -20
3  cut by time without progress. Steps 299  %advance 38.0  played reward 7.45  last penalty -20
4  cut by time without progress. Steps 556  %advance 98.8  played reward 42.14  last penalty -20
5  cut by time without

In [15]:
#import pickle
if new_mod:
    file = root+'c{:d}_f{:d}_s{:d}_{}_u{:d}_e{:d}_p{}_bs{:d}'.format(game_color,fpst,skip,indicators,use,ept,patience,batch_size)
    model2.save(file, cloudpickle=True)
    param_list=model2.get_parameter_list()
else:
    model.save(file+'II', cloudpickle=True)
    param_list=model.get_parameter_list()


In [16]:
env2.close()
env_test.close()

In [25]:
## Enjoy last trained policy

if agent=='CarRacing-v2':  #create an independent test environment
    env3 = gym.make(agent, seed=None, 
        game_color=game_color,
        use_track = 2,       
        episodes_per_track = 1,  
        tr_complexity = 12, 
        patience = 5.0,
        discre = actions,
        indicators = True,
        frames_per_state = fpst,
        skip_frames = skip   )
else:
    env3 = gym.make(agent)

env3 = DummyVecEnv([lambda: env3])
obs = env3.reset()
print(obs.shape)        

done = False
pasos = 0
_states=None

while not done and pasos<1002:
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, info = env3.step(action)
    env3.render()
    pasos+=1
    
env3.close()
print()
print(reward, done, info, pasos)

Track generation: 984..1233 -> 249-tiles track, complex 12
(1, 96, 96, 4)
1  cut by time without progress. Steps 501  %advance 70.9  played reward 20.16  last penalty -20

[-20.] [ True] [{'terminal_observation': array([[[161.874, 161.874, 161.874, 161.874],
        [161.874, 161.874, 161.874, 161.874],
        [161.874, 161.874, 161.874, 161.874],
        ...,
        [  0.   ,   0.   , 176.549, 161.874],
        [  0.   ,   0.   ,   0.   , 161.874],
        [  0.   ,   0.   ,   0.   , 161.874]],

       [[161.874, 161.874, 161.874, 161.874],
        [161.874, 161.874, 161.874, 161.874],
        [161.874, 161.874, 161.874, 161.874],
        ...,
        [  0.   ,   0.   , 176.549, 161.874],
        [  0.   ,   0.   ,   0.   , 161.874],
        [  0.   ,   0.   ,   0.   , 161.874]],

       [[161.874, 161.874, 161.874, 161.874],
        [161.874, 161.874, 161.874, 161.874],
        [161.874, 161.874, 161.874, 161.874],
        ...,
        [  0.   ,   0.   , 176.549, 161.874],
        

In [26]:
## Enjoy best eval_policy

obs = env3.reset()
print(obs.shape)        

## Load bestmodel from eval
#if not isinstance(model_test, PPO2):
model_test = PPO2.load(eval_log+'best_model', env3)

done = False
pasos = 0
_states=None

while not done and pasos<1002:
    action, _states = model_test.predict(obs, deterministic=True)
    obs, reward, done, info = env3.step(action)
    env3.render()
    pasos+=1
    
env3.close()
print()
print(reward, done, pasos)
print(action, _states)

Track generation: 1088..1363 -> 275-tiles track, complex 12
(1, 96, 96, 4)
0  Finalized in Steps 831  with return=total reward 116.1700729927009

[100.26496] [ True] 831
[3] None


In [19]:
model_test.save(file+'_evalbest', cloudpickle=True)

In [20]:
env2.close()

In [21]:
env3.close()

In [22]:
env_test.close()

In [23]:
print(action, _states)

[0] None


In [24]:
obs.shape

(1, 96, 96, 4)