In [1]:
# Filter tensorflow version warnings
import os
# https://stackoverflow.com/questions/40426502/is-there-a-way-to-suppress-the-messages-tensorflow-prints/40426709
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # or any {'0', '1', '2'}
import warnings
# https://stackoverflow.com/questions/15777951/how-to-suppress-pandas-future-warning
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)
import tensorflow as tf
tf.get_logger().setLevel('INFO')
tf.autograph.set_verbosity(0)
import logging
tf.get_logger().setLevel(logging.ERROR)

In [2]:
import gym
from stable_baselines.common.policies import CnnPolicy #, MlpPolicy, CnnLstmPolicy
from stable_baselines.common.vec_env import DummyVecEnv 
from stable_baselines import PPO2

from stable_baselines.common.evaluation import evaluate_policy as test
from stable_baselines.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold


In [3]:
## Choose one agent, see Docu for description
agent='CarRacing-v0'
agent='CarRacing-v1'
agent='CarRacing-v2'


In [4]:
## This environment param  
## BEWARE Changing these makes models incompatible!!
game_color = 1
indicators = True
fpst = 4
skip = 3
actions = [[0, 0, 0], [-0.4, 0, 0], [0.4, 0, 0], [0, 1.0, 0], [0, 0, 0.8]]  #this is ACT

In [5]:
## This model param
use = 10       # number of times to use same track [1,100]
ept = 10       # different starting points on same track [1,20]
track_complexity = 12
patience = 2.0
seed = 1000

#using follow_centerline for this first leg of training
REWARD = [-0.0, 0.1, 0.0, 0.0,   1.0, 0.0,   100, -20, -100, -50]

if agent=='CarRacing-v2': 
    env1 = gym.make(agent, seed=seed, 
        game_color=game_color,
        indicators=indicators,
        frames_per_state=fpst,
        skip_frames=skip,
#        discre=actions,       #passing custom actions
        use_track = use,       
        episodes_per_track = ept,  
        tr_complexity = track_complexity, 
        patience = patience,
        off_track = patience,
        f_reward = REWARD )   #passing a custom reward function
else: 
    env1 = gym.make(agent)

env1 = DummyVecEnv([lambda: env1])
env1.metadata

{'render.modes': ['human', 'rgb_array', 'state_pixels'],
 'FPS, 1/timebase': 33.333333333333336,
 'Zoom_level': 1.7,
 'Flight start': False,
 'show_track_1st': False,
 'state_pixels frame size': [96, 96]}

In [6]:
print(env1.action_space)
print(env1.observation_space)

Discrete(5)
Box(0, 255, (96, 96, 4), uint8)


In [7]:
## This training param
batch_size = 256
updates = 500
epochs = 4

# Stop training when the model reaches the reward threshold
callback_on_best = StopTrainingOnRewardThreshold(reward_threshold = 180, verbose=1)


model = PPO2(CnnPolicy, env1, verbose=1, n_steps=batch_size, # seed=314, n_cpu_tf_sess=1,
             gamma=0.99, learning_rate=0.00025, nminibatches=epochs, ent_coef=0.01, vf_coef=0.5) 


In [8]:
## Separate evaluation env
test_freq = 50      #policy updates until evaluation
test_episodes_per_track = 5   #number of starting points on test_track
eval_log = './evals/'

env_test = gym.make(agent, seed=int(3.14*seed), 
        game_color=game_color,
        indicators=indicators,
        frames_per_state=fpst,
        skip_frames=skip,
        use_track = 1,        #change test track after 1 ept round
        episodes_per_track = test_episodes_per_track,  
        tr_complexity = 12,   #test on a medium complexity track
        patience = 2.0,
        off_track = 2.0,
        discre=actions)       #passing the same custom actions

env_test = DummyVecEnv([lambda: env_test])

eval_callback = EvalCallback(env_test, callback_on_new_best=callback_on_best,  #None,
                             n_eval_episodes=test_episodes_per_track*3, eval_freq=test_freq*batch_size,
                             best_model_save_path=eval_log, log_path=eval_log, deterministic=True, 
                             render=True)  # Turn False not to watch the agent on test, faster


In [9]:
##Independent test routine
#reward_test, epis = test(model, env_test, n_eval_episodes=test_episodes_per_track, 
#                         deterministic=True, render=False, callback=None, reward_threshold=100, 
#                         return_episode_rewards=True)
#reward_test

In [10]:
## Training #1

model.learn(total_timesteps = updates*batch_size, log_interval=1)
#model.learn(total_timesteps = updates*batch_size, log_interval=1, callback=eval_callback)


Track generation: 1226..1540 -> 314-tiles track, complex 12
1  cut by time without progress. Steps 107  %advance 0.9  played reward 9.96  last penalty -20
2  cut by time without progress. Steps 123  %advance 1.5  played reward 8.96  last penalty -20
-------------------------------------
| approxkl           | 0.006985828  |
| clipfrac           | 0.02734375   |
| explained_variance | 0.00678      |
| fps                | 45           |
| n_updates          | 1            |
| policy_entropy     | 1.6032779    |
| policy_loss        | -0.016703583 |
| serial_timesteps   | 256          |
| time_elapsed       | 0            |
| total_timesteps    | 256          |
| value_loss         | 14.011864    |
-------------------------------------
3  cut by time without progress. Steps 153  %advance 1.2  played reward 11.07  last penalty -20
-------------------------------------
| approxkl           | 0.0006264642 |
| clipfrac           | 0.0          |
| explained_variance | 0.015        |
| fps   

11  cut by time without progress. Steps 616  %advance 9.2  played reward 38.92  last penalty -20
-------------------------------------
| approxkl           | 0.009815921  |
| clipfrac           | 0.17773438   |
| explained_variance | 0.256        |
| fps                | 41           |
| n_updates          | 16           |
| policy_entropy     | 1.5599538    |
| policy_loss        | -0.016405117 |
| serial_timesteps   | 4096         |
| time_elapsed       | 94.6         |
| total_timesteps    | 4096         |
| value_loss         | 7.921378     |
-------------------------------------
12  cut by time without progress. Steps 101  %advance 1.5  played reward -0.18  last penalty -20
13  cut by time without progress. Steps 111  %advance 1.2  played reward 5.87  last penalty -20
-------------------------------------
| approxkl           | 0.0034711582 |
| clipfrac           | 0.04296875   |
| explained_variance | 0.214        |
| fps                | 40           |
| n_updates          | 17 

-------------------------------------
| approxkl           | 0.0057453504 |
| clipfrac           | 0.048828125  |
| explained_variance | 0.63         |
| fps                | 40           |
| n_updates          | 30           |
| policy_entropy     | 1.4058906    |
| policy_loss        | -0.003874946 |
| serial_timesteps   | 7680         |
| time_elapsed       | 182          |
| total_timesteps    | 7680         |
| value_loss         | 0.2646995    |
-------------------------------------
27  cut by time without progress. Steps 634  %advance 9.9  played reward 33.92  last penalty -20
--------------------------------------
| approxkl           | 0.0021017906  |
| clipfrac           | 0.0009765625  |
| explained_variance | 0.738         |
| fps                | 41            |
| n_updates          | 31            |
| policy_entropy     | 1.3484144     |
| policy_loss        | 0.00011992478 |
| serial_timesteps   | 7936          |
| time_elapsed       | 188           |
| total_timesteps  

44  cut by time without progress. Steps 192  %advance 1.9  played reward 7.45  last penalty -20
--------------------------------------
| approxkl           | 0.0051989676  |
| clipfrac           | 0.06542969    |
| explained_variance | 0.721         |
| fps                | 40            |
| n_updates          | 44            |
| policy_entropy     | 1.3513836     |
| policy_loss        | -0.0051216753 |
| serial_timesteps   | 11264         |
| time_elapsed       | 271           |
| total_timesteps    | 11264         |
| value_loss         | 3.300218      |
--------------------------------------
45  cut by time without progress. Steps 165  %advance 1.9  played reward 3.33  last penalty -20
46  cut by time without progress. Steps 210  %advance 9.9  played reward 18.85  last penalty -20
-------------------------------------
| approxkl           | 0.011034555  |
| clipfrac           | 0.15039062   |
| explained_variance | 0.897        |
| fps                | 41           |
| n_updates   

--------------------------------------
| approxkl           | 0.009559345   |
| clipfrac           | 0.12109375    |
| explained_variance | 0.588         |
| fps                | 41            |
| n_updates          | 58            |
| policy_entropy     | 1.1340431     |
| policy_loss        | 0.00092778064 |
| serial_timesteps   | 14848         |
| time_elapsed       | 357           |
| total_timesteps    | 14848         |
| value_loss         | 0.7133557     |
--------------------------------------
58  cut by time without progress. Steps 343  %advance 3.1  played reward 29.04  last penalty -20
59  cut by time without progress. Steps 109  %advance 0.9  played reward 8.97  last penalty -20
------------------------------------
| approxkl           | 0.019645613 |
| clipfrac           | 0.4111328   |
| explained_variance | 0.488       |
| fps                | 41          |
| n_updates          | 59          |
| policy_entropy     | 1.094338    |
| policy_loss        | 0.009991458 |
| se

74  cut by time without progress. Steps 157  %advance 1.5  played reward 9.53  last penalty -20
-------------------------------------
| approxkl           | 0.008467657  |
| clipfrac           | 0.1484375    |
| explained_variance | 0.912        |
| fps                | 41           |
| n_updates          | 72           |
| policy_entropy     | 1.3040068    |
| policy_loss        | -0.013511924 |
| serial_timesteps   | 18432        |
| time_elapsed       | 444          |
| total_timesteps    | 18432        |
| value_loss         | 1.0134308    |
-------------------------------------
75  cut by time without progress. Steps 118  %advance 1.2  played reward 9.07  last penalty -20
--------------------------------------
| approxkl           | 0.016367171   |
| clipfrac           | 0.3857422     |
| explained_variance | 0.694         |
| fps                | 40            |
| n_updates          | 73            |
| policy_entropy     | 1.1983895     |
| policy_loss        | -0.0067213136 |
| 

-------------------------------------
| approxkl           | 0.0032596649 |
| clipfrac           | 0.03515625   |
| explained_variance | 0.87         |
| fps                | 40           |
| n_updates          | 86           |
| policy_entropy     | 1.1025467    |
| policy_loss        | -0.005046545 |
| serial_timesteps   | 22016        |
| time_elapsed       | 532          |
| total_timesteps    | 22016        |
| value_loss         | 0.3353795    |
-------------------------------------
87  cut by time without progress. Steps 787  %advance 9.2  played reward 62.32  last penalty -20
-------------------------------------
| approxkl           | 0.009740187  |
| clipfrac           | 0.107421875  |
| explained_variance | 0.897        |
| fps                | 41           |
| n_updates          | 87           |
| policy_entropy     | 1.3240436    |
| policy_loss        | -0.013269507 |
| serial_timesteps   | 22272        |
| time_elapsed       | 538          |
| total_timesteps    | 22272 

97  cut by time without progress. Steps 483  %advance 4.1  played reward 32.15  last penalty -20
98  cut by time without progress. Steps 77  %advance 0.9  played reward 6.7  last penalty -20
--------------------------------------
| approxkl           | 0.0056067407  |
| clipfrac           | 0.018554688   |
| explained_variance | 0.397         |
| fps                | 41            |
| n_updates          | 101           |
| policy_entropy     | 1.2492874     |
| policy_loss        | 0.00048298854 |
| serial_timesteps   | 25856         |
| time_elapsed       | 625           |
| total_timesteps    | 25856         |
| value_loss         | 14.036723     |
--------------------------------------
99  cut by time without progress. Steps 158  %advance 1.2  played reward 13.19  last penalty -20
-------------------------------------
| approxkl           | 0.016854193  |
| clipfrac           | 0.28515625   |
| explained_variance | 0.581        |
| fps                | 41           |
| n_updates    

--------------------------------------
| approxkl           | 0.006525011   |
| clipfrac           | 0.08300781    |
| explained_variance | 0.841         |
| fps                | 45            |
| n_updates          | 115           |
| policy_entropy     | 1.3558605     |
| policy_loss        | -0.0055164853 |
| serial_timesteps   | 29440         |
| time_elapsed       | 708           |
| total_timesteps    | 29440         |
| value_loss         | 0.11001753    |
--------------------------------------
-------------------------------------
| approxkl           | 0.008279138  |
| clipfrac           | 0.12207031   |
| explained_variance | 0.877        |
| fps                | 44           |
| n_updates          | 116          |
| policy_entropy     | 1.5110862    |
| policy_loss        | -0.010692141 |
| serial_timesteps   | 29696        |
| time_elapsed       | 713          |
| total_timesteps    | 29696        |
| value_loss         | 0.24895896   |
-------------------------------------

-------------------------------------
| approxkl           | 0.005572919  |
| clipfrac           | 0.05859375   |
| explained_variance | 0.837        |
| fps                | 43           |
| n_updates          | 130          |
| policy_entropy     | 1.4506147    |
| policy_loss        | 0.0051493365 |
| serial_timesteps   | 33280        |
| time_elapsed       | 794          |
| total_timesteps    | 33280        |
| value_loss         | 2.850231     |
-------------------------------------
19  cut by time without progress. Steps 192  %advance 3.2  played reward 7.3  last penalty -20
--------------------------------------
| approxkl           | 0.0107708955  |
| clipfrac           | 0.16992188    |
| explained_variance | 0.933         |
| fps                | 44            |
| n_updates          | 131           |
| policy_entropy     | 1.3547602     |
| policy_loss        | 0.00081134285 |
| serial_timesteps   | 33536         |
| time_elapsed       | 800           |
| total_timesteps    

-------------------------------------
| approxkl           | 0.010318109  |
| clipfrac           | 0.17773438   |
| explained_variance | 0.918        |
| fps                | 44           |
| n_updates          | 145          |
| policy_entropy     | 1.4107702    |
| policy_loss        | -0.009985015 |
| serial_timesteps   | 37120        |
| time_elapsed       | 882          |
| total_timesteps    | 37120        |
| value_loss         | 0.28906745   |
-------------------------------------
28  cut by time without progress. Steps 442  %advance 9.3  played reward 36.73  last penalty -20
-------------------------------------
| approxkl           | 0.014886803  |
| clipfrac           | 0.26367188   |
| explained_variance | 0.604        |
| fps                | 43           |
| n_updates          | 146          |
| policy_entropy     | 1.436656     |
| policy_loss        | -0.007867815 |
| serial_timesteps   | 37376        |
| time_elapsed       | 888          |
| total_timesteps    | 37376 

35  cut by time without progress. Steps 675  %advance 17.0  played reward 54.09  last penalty -20
-------------------------------------
| approxkl           | 0.0097894315 |
| clipfrac           | 0.15625      |
| explained_variance | 0.85         |
| fps                | 40           |
| n_updates          | 161          |
| policy_entropy     | 1.4317546    |
| policy_loss        | -0.016711038 |
| serial_timesteps   | 41216        |
| time_elapsed       | 976          |
| total_timesteps    | 41216        |
| value_loss         | 4.958049     |
-------------------------------------
36  cut by time without progress. Steps 177  %advance 7.2  played reward 9.03  last penalty -20
-------------------------------------
| approxkl           | 0.009276264  |
| clipfrac           | 0.16113281   |
| explained_variance | 0.865        |
| fps                | 43           |
| n_updates          | 162          |
| policy_entropy     | 1.3042828    |
| policy_loss        | -0.002935673 |
| serial

46  cut by time without progress. Steps 277  %advance 7.6  played reward 21.45  last penalty -20
--------------------------------------
| approxkl           | 0.013414735   |
| clipfrac           | 0.13574219    |
| explained_variance | 0.816         |
| fps                | 41            |
| n_updates          | 176           |
| policy_entropy     | 1.1874355     |
| policy_loss        | -0.0020087531 |
| serial_timesteps   | 45056         |
| time_elapsed       | 1.07e+03      |
| total_timesteps    | 45056         |
| value_loss         | 2.2496839     |
--------------------------------------
47  cut by time without progress. Steps 361  %advance 8.0  played reward 32.81  last penalty -20
-------------------------------------
| approxkl           | 0.00327648   |
| clipfrac           | 0.03125      |
| explained_variance | 0.943        |
| fps                | 42           |
| n_updates          | 177          |
| policy_entropy     | 1.4170481    |
| policy_loss        | -0.0071662

-------------------------------------
| approxkl           | 0.015205629  |
| clipfrac           | 0.21386719   |
| explained_variance | 0.927        |
| fps                | 40           |
| n_updates          | 191          |
| policy_entropy     | 1.1086396    |
| policy_loss        | -0.016533988 |
| serial_timesteps   | 48896        |
| time_elapsed       | 1.16e+03     |
| total_timesteps    | 48896        |
| value_loss         | 0.07778679   |
-------------------------------------
55  cut by time without progress. Steps 879  %advance 10.5  played reward 66.97  last penalty -20
-------------------------------------
| approxkl           | 0.019734709  |
| clipfrac           | 0.2919922    |
| explained_variance | 0.442        |
| fps                | 42           |
| n_updates          | 192          |
| policy_entropy     | 1.4184172    |
| policy_loss        | -0.008533096 |
| serial_timesteps   | 49152        |
| time_elapsed       | 1.17e+03     |
| total_timesteps    | 49152

63  cut by time without progress. Steps 529  %advance 32.7  played reward 63.23  last penalty -20
-------------------------------------
| approxkl           | 0.0070450306 |
| clipfrac           | 0.12695312   |
| explained_variance | 0.717        |
| fps                | 43           |
| n_updates          | 207          |
| policy_entropy     | 1.2658135    |
| policy_loss        | -0.007868524 |
| serial_timesteps   | 52992        |
| time_elapsed       | 1.26e+03     |
| total_timesteps    | 52992        |
| value_loss         | 6.716769     |
-------------------------------------
-------------------------------------
| approxkl           | 0.012071265  |
| clipfrac           | 0.20898438   |
| explained_variance | 0.835        |
| fps                | 41           |
| n_updates          | 208          |
| policy_entropy     | 1.2337532    |
| policy_loss        | -0.007106763 |
| serial_timesteps   | 53248        |
| time_elapsed       | 1.26e+03     |
| total_timesteps    | 53248

75  cut by time without progress. Steps 310  %advance 15.7  played reward 37.65  last penalty -20
-------------------------------------
| approxkl           | 0.0112102805 |
| clipfrac           | 0.2109375    |
| explained_variance | 0.521        |
| fps                | 41           |
| n_updates          | 222          |
| policy_entropy     | 1.356218     |
| policy_loss        | -0.014431631 |
| serial_timesteps   | 56832        |
| time_elapsed       | 1.35e+03     |
| total_timesteps    | 56832        |
| value_loss         | 8.563057     |
-------------------------------------
76  cut by time without progress. Steps 198  %advance 6.0  played reward 19.18  last penalty -20
77  cut by time without progress. Steps 193  %advance 8.9  played reward 22.75  last penalty -20
-------------------------------------
| approxkl           | 0.018396134  |
| clipfrac           | 0.2763672    |
| explained_variance | 0.542        |
| fps                | 43           |
| n_updates          | 2

---------------------------------------
| approxkl           | 0.010088823    |
| clipfrac           | 0.16015625     |
| explained_variance | -0.0367        |
| fps                | 40             |
| n_updates          | 237            |
| policy_entropy     | 0.9099222      |
| policy_loss        | -0.00035507593 |
| serial_timesteps   | 60672          |
| time_elapsed       | 1.44e+03       |
| total_timesteps    | 60672          |
| value_loss         | 60.967335      |
---------------------------------------
--------------------------------------
| approxkl           | 0.005003122   |
| clipfrac           | 0.051757812   |
| explained_variance | 0.153         |
| fps                | 42            |
| n_updates          | 238           |
| policy_entropy     | 0.9594946     |
| policy_loss        | -0.0010179912 |
| serial_timesteps   | 60928         |
| time_elapsed       | 1.44e+03      |
| total_timesteps    | 60928         |
| value_loss         | 0.22189902    |
------------

90  cut by time without progress. Steps 286  %advance 28.7  played reward 36.64  last penalty -20
-------------------------------------
| approxkl           | 0.0074269096 |
| clipfrac           | 0.083984375  |
| explained_variance | 0.831        |
| fps                | 43           |
| n_updates          | 253          |
| policy_entropy     | 0.7936118    |
| policy_loss        | 0.0033630827 |
| serial_timesteps   | 64768        |
| time_elapsed       | 1.54e+03     |
| total_timesteps    | 64768        |
| value_loss         | 8.38761      |
-------------------------------------
91  cut by time without progress. Steps 266  %advance 31.9  played reward 38.25  last penalty -20
--------------------------------------
| approxkl           | 0.002537027   |
| clipfrac           | 0.02734375    |
| explained_variance | 0.848         |
| fps                | 42            |
| n_updates          | 254           |
| policy_entropy     | 0.8733075     |
| policy_loss        | -0.0039199814 

-------------------------------------
| approxkl           | 0.011734233  |
| clipfrac           | 0.16503906   |
| explained_variance | 0.898        |
| fps                | 42           |
| n_updates          | 267          |
| policy_entropy     | 0.8638936    |
| policy_loss        | 0.0021279233 |
| serial_timesteps   | 68352        |
| time_elapsed       | 1.62e+03     |
| total_timesteps    | 68352        |
| value_loss         | 4.1963887    |
-------------------------------------
5  cut by time without progress. Steps 385  %advance 31.5  played reward 39.94  last penalty -20
-------------------------------------
| approxkl           | 0.0069569065 |
| clipfrac           | 0.083984375  |
| explained_variance | 0.948        |
| fps                | 43           |
| n_updates          | 268          |
| policy_entropy     | 0.4119776    |
| policy_loss        | 0.002957286  |
| serial_timesteps   | 68608        |
| time_elapsed       | 1.63e+03     |
| total_timesteps    | 68608 

18  cut by time without progress. Steps 186  %advance 17.5  played reward 18.77  last penalty -20
19  cut by time without progress. Steps 129  %advance 7.5  played reward 4.79  last penalty -20
-------------------------------------
| approxkl           | 0.009906837  |
| clipfrac           | 0.14550781   |
| explained_variance | 0.958        |
| fps                | 41           |
| n_updates          | 281          |
| policy_entropy     | 0.89030963   |
| policy_loss        | -0.010953849 |
| serial_timesteps   | 71936        |
| time_elapsed       | 1.71e+03     |
| total_timesteps    | 71936        |
| value_loss         | 5.506703     |
-------------------------------------
20  cut by time without progress. Steps 158  %advance 12.9  played reward 11.92  last penalty -20
21  cut by time without progress. Steps 157  %advance 11.4  played reward 10.8  last penalty -20
-------------------------------------
| approxkl           | 0.007937933  |
| clipfrac           | 0.14550781   |
| e

------------------------------------
| approxkl           | 0.001964844 |
| clipfrac           | 0.03125     |
| explained_variance | 0.435       |
| fps                | 41          |
| n_updates          | 295         |
| policy_entropy     | 0.82650316  |
| policy_loss        | -0.00262726 |
| serial_timesteps   | 75520       |
| time_elapsed       | 1.79e+03    |
| total_timesteps    | 75520       |
| value_loss         | 7.8903084   |
------------------------------------
35  cut by time without progress. Steps 558  %advance 45.5  played reward 72.59  last penalty -20
36  cut by time without progress. Steps 67  %advance 0.7  played reward 6.59  last penalty -20
--------------------------------------
| approxkl           | 0.00459722    |
| clipfrac           | 0.08496094    |
| explained_variance | 0.759         |
| fps                | 42            |
| n_updates          | 296           |
| policy_entropy     | 0.8190055     |
| policy_loss        | -0.0032354747 |
| serial_times

--------------------------------------
| approxkl           | 0.005707365   |
| clipfrac           | 0.06347656    |
| explained_variance | 0.955         |
| fps                | 43            |
| n_updates          | 308           |
| policy_entropy     | 1.1396686     |
| policy_loss        | -0.0017246401 |
| serial_timesteps   | 78848         |
| time_elapsed       | 1.88e+03      |
| total_timesteps    | 78848         |
| value_loss         | 2.59145       |
--------------------------------------
53  cut by time without progress. Steps 173  %advance 13.2  played reward 12.98  last penalty -20
54  cut by time without progress. Steps 107  %advance 3.9  played reward 0.05  last penalty -20
-------------------------------------
| approxkl           | 0.0029668289 |
| clipfrac           | 0.0390625    |
| explained_variance | 0.938        |
| fps                | 43           |
| n_updates          | 309          |
| policy_entropy     | 0.80173826   |
| policy_loss        | -0.0067059

-------------------------------------
| approxkl           | 0.011135408  |
| clipfrac           | 0.10839844   |
| explained_variance | -0.092       |
| fps                | 40           |
| n_updates          | 323          |
| policy_entropy     | 0.53008604   |
| policy_loss        | -0.007119182 |
| serial_timesteps   | 82688        |
| time_elapsed       | 1.97e+03     |
| total_timesteps    | 82688        |
| value_loss         | 4.941166     |
-------------------------------------
63  cut by time without progress. Steps 727  %advance 97.1  played reward 135.36  last penalty -20
--------------------------------------
| approxkl           | 0.019230252   |
| clipfrac           | 0.21191406    |
| explained_variance | 0.128         |
| fps                | 40            |
| n_updates          | 324           |
| policy_entropy     | 0.6685625     |
| policy_loss        | -0.0023716686 |
| serial_timesteps   | 82944         |
| time_elapsed       | 1.97e+03      |
| total_timesteps

74  cut by time without progress. Steps 630  %advance 59.8  played reward 77.03  last penalty -20
------------------------------------
| approxkl           | 0.011548541 |
| clipfrac           | 0.13476562  |
| explained_variance | 0.57        |
| fps                | 41          |
| n_updates          | 338         |
| policy_entropy     | 0.8327156   |
| policy_loss        | 0.00663493  |
| serial_timesteps   | 86528       |
| time_elapsed       | 2.06e+03    |
| total_timesteps    | 86528       |
| value_loss         | 17.741896   |
------------------------------------
-------------------------------------
| approxkl           | 0.0049408    |
| clipfrac           | 0.0546875    |
| explained_variance | 0.0912       |
| fps                | 41           |
| n_updates          | 339          |
| policy_entropy     | 0.7256733    |
| policy_loss        | -0.010476963 |
| serial_timesteps   | 86784        |
| time_elapsed       | 2.07e+03     |
| total_timesteps    | 86784        |
| v

-------------------------------------
| approxkl           | 0.009827774  |
| clipfrac           | 0.15820312   |
| explained_variance | 0.921        |
| fps                | 41           |
| n_updates          | 353          |
| policy_entropy     | 0.8864454    |
| policy_loss        | -0.016697852 |
| serial_timesteps   | 90368        |
| time_elapsed       | 2.15e+03     |
| total_timesteps    | 90368        |
| value_loss         | 4.4754806    |
-------------------------------------
--------------------------------------
| approxkl           | 0.005626051   |
| clipfrac           | 0.080078125   |
| explained_variance | 0.492         |
| fps                | 41            |
| n_updates          | 354           |
| policy_entropy     | 0.418732      |
| policy_loss        | -0.0055546653 |
| serial_timesteps   | 90624         |
| time_elapsed       | 2.16e+03      |
| total_timesteps    | 90624         |
| value_loss         | 0.13783634    |
--------------------------------------

89  cut by time without progress. Steps 264  %advance 6.4  played reward 25.33  last penalty -20
--------------------------------------
| approxkl           | 0.005966217   |
| clipfrac           | 0.10449219    |
| explained_variance | 0.667         |
| fps                | 42            |
| n_updates          | 368           |
| policy_entropy     | 1.0344249     |
| policy_loss        | -0.0017151278 |
| serial_timesteps   | 94208         |
| time_elapsed       | 2.25e+03      |
| total_timesteps    | 94208         |
| value_loss         | 6.843538      |
--------------------------------------
-------------------------------------
| approxkl           | 0.004789793  |
| clipfrac           | 0.052734375  |
| explained_variance | 0.241        |
| fps                | 36           |
| n_updates          | 369          |
| policy_entropy     | 0.6397669    |
| policy_loss        | -0.007751168 |
| serial_timesteps   | 94464        |
| time_elapsed       | 2.25e+03     |
| total_timestep

100  cut by time without progress. Steps 482  %advance 45.8  played reward 65.63  last penalty -20
Track generation: 1070..1341 -> 271-tiles track, complex 12
1  retry to generate new track (normal below 20, limit 50)
Track generation: 1144..1439 -> 295-tiles track, complex 12
2  retry to generate new track (normal below 20, limit 50)
Track generation: 1102..1381 -> 279-tiles track, complex 12
-------------------------------------
| approxkl           | 0.0065056654 |
| clipfrac           | 0.08691406   |
| explained_variance | 0.952        |
| fps                | 40           |
| n_updates          | 383          |
| policy_entropy     | 1.0253875    |
| policy_loss        | -0.011577023 |
| serial_timesteps   | 98048        |
| time_elapsed       | 2.34e+03     |
| total_timesteps    | 98048        |
| value_loss         | 3.093831     |
-------------------------------------
------------------------------------
| approxkl           | 0.008450058 |
| clipfrac           | 0.115234375 

7  cut by time without progress. Steps 290  %advance 24.4  played reward 30.52  last penalty -20
--------------------------------------
| approxkl           | 0.0067522787  |
| clipfrac           | 0.10546875    |
| explained_variance | 0.958         |
| fps                | 40            |
| n_updates          | 398           |
| policy_entropy     | 0.871718      |
| policy_loss        | -0.0055534635 |
| serial_timesteps   | 101888        |
| time_elapsed       | 2.44e+03      |
| total_timesteps    | 101888        |
| value_loss         | 4.1561513     |
--------------------------------------
8  cut by time without progress. Steps 243  %advance 14.3  played reward 19.02  last penalty -20
-------------------------------------
| approxkl           | 0.0063506234 |
| clipfrac           | 0.09863281   |
| explained_variance | 0.948        |
| fps                | 41           |
| n_updates          | 399          |
| policy_entropy     | 1.0555556    |
| policy_loss        | -0.0122673

13  cut by time without progress. Steps 861  %advance 63.3  played reward 113.53  last penalty -20
--------------------------------------
| approxkl           | 0.0050850944  |
| clipfrac           | 0.07421875    |
| explained_variance | 0.894         |
| fps                | 42            |
| n_updates          | 414           |
| policy_entropy     | 1.1047596     |
| policy_loss        | -0.0018603893 |
| serial_timesteps   | 105984        |
| time_elapsed       | 2.53e+03      |
| total_timesteps    | 105984        |
| value_loss         | 3.377174      |
--------------------------------------
14  cut by time without progress. Steps 363  %advance 4.3  played reward 28.73  last penalty -20
--------------------------------------
| approxkl           | 0.017814998   |
| clipfrac           | 0.18164062    |
| explained_variance | -0.0602       |
| fps                | 41            |
| n_updates          | 415           |
| policy_entropy     | 0.9374646     |
| policy_loss        | -

20  env max steps reached 1000  %advance 39.9  played reward 108.99  last penalty -50
-------------------------------------
| approxkl           | 0.0026633006 |
| clipfrac           | 0.03515625   |
| explained_variance | -0.00901     |
| fps                | 43           |
| n_updates          | 430          |
| policy_entropy     | 0.74772835   |
| policy_loss        | 0.0016980374 |
| serial_timesteps   | 110080       |
| time_elapsed       | 2.63e+03     |
| total_timesteps    | 110080       |
| value_loss         | 59.066765    |
-------------------------------------
-------------------------------------
| approxkl           | 0.012103912  |
| clipfrac           | 0.12402344   |
| explained_variance | 0.691        |
| fps                | 43           |
| n_updates          | 431          |
| policy_entropy     | 0.79432917   |
| policy_loss        | -0.009198322 |
| serial_timesteps   | 110336       |
| time_elapsed       | 2.64e+03     |
| total_timesteps    | 110336       |
| 

-------------------------------------
| approxkl           | 0.02637011   |
| clipfrac           | 0.2607422    |
| explained_variance | 0.965        |
| fps                | 43           |
| n_updates          | 446          |
| policy_entropy     | 1.030009     |
| policy_loss        | -0.009842873 |
| serial_timesteps   | 114176       |
| time_elapsed       | 2.73e+03     |
| total_timesteps    | 114176       |
| value_loss         | 1.1385821    |
-------------------------------------
26  cut by time without progress. Steps 759  %advance 34.5  played reward 85.54  last penalty -20
--------------------------------------
| approxkl           | 0.006131646   |
| clipfrac           | 0.107421875   |
| explained_variance | 0.935         |
| fps                | 41            |
| n_updates          | 447           |
| policy_entropy     | 0.97669464    |
| policy_loss        | -0.0015611564 |
| serial_timesteps   | 114432        |
| time_elapsed       | 2.74e+03      |
| total_timesteps 

--------------------------------------
| approxkl           | 0.0070094448  |
| clipfrac           | 0.10058594    |
| explained_variance | -0.258        |
| fps                | 43            |
| n_updates          | 462           |
| policy_entropy     | 1.0811222     |
| policy_loss        | -0.0091093015 |
| serial_timesteps   | 118272        |
| time_elapsed       | 2.83e+03      |
| total_timesteps    | 118272        |
| value_loss         | 0.10399829    |
--------------------------------------
-------------------------------------
| approxkl           | 0.010171093  |
| clipfrac           | 0.15722656   |
| explained_variance | -1.17        |
| fps                | 41           |
| n_updates          | 463          |
| policy_entropy     | 1.0537666    |
| policy_loss        | -0.004680859 |
| serial_timesteps   | 118528       |
| time_elapsed       | 2.83e+03     |
| total_timesteps    | 118528       |
| value_loss         | 0.33320934   |
-------------------------------------

--------------------------------------
| approxkl           | 0.008273223   |
| clipfrac           | 0.1328125     |
| explained_variance | -0.833        |
| fps                | 41            |
| n_updates          | 478           |
| policy_entropy     | 1.1194108     |
| policy_loss        | -0.0031179064 |
| serial_timesteps   | 122368        |
| time_elapsed       | 2.92e+03      |
| total_timesteps    | 122368        |
| value_loss         | 0.052983463   |
--------------------------------------
35  cut by time without progress. Steps 804  %advance 44.6  played reward 93.18  last penalty -20
---------------------------------------
| approxkl           | 0.005750738    |
| clipfrac           | 0.076171875    |
| explained_variance | 0.58           |
| fps                | 40             |
| n_updates          | 479            |
| policy_entropy     | 1.0934463      |
| policy_loss        | -0.00092797726 |
| serial_timesteps   | 122624         |
| time_elapsed       | 2.93e+03    

40  cut by time without progress. Steps 221  %advance 8.6  played reward 14.22  last penalty -20
-------------------------------------
| approxkl           | 0.015840836  |
| clipfrac           | 0.21875      |
| explained_variance | 0.391        |
| fps                | 34           |
| n_updates          | 494          |
| policy_entropy     | 1.1841246    |
| policy_loss        | 0.0018688585 |
| serial_timesteps   | 126464       |
| time_elapsed       | 3.02e+03     |
| total_timesteps    | 126464       |
| value_loss         | 18.266056    |
-------------------------------------
41  cut by time without progress. Steps 163  %advance 5.3  played reward 7.71  last penalty -20
-------------------------------------
| approxkl           | 0.0035277782 |
| clipfrac           | 0.0234375    |
| explained_variance | 0.333        |
| fps                | 41           |
| n_updates          | 495          |
| policy_entropy     | 1.1297086    |
| policy_loss        | -0.01522967  |
| serial_

<stable_baselines.ppo2.ppo2.PPO2 at 0x18b1770f240>

In [11]:
import pickle
root = 'ppo_cnn_gym-mod_'

file = root+'c{:d}_f{:d}_s{:d}_{}_a{:d}__u{:d}_e{:d}_p{}_bs{:d}'.format(
    game_color,fpst,skip,indicators,len(actions),use,ept,patience,batch_size)

model.save(file, cloudpickle=True)
param_list=model.get_parameter_list()


In [12]:
#env1.reset_track()
env1.close()

In [13]:
## This model param  #2
use = 5      # number of times to use same track [1,100]
ept = 8      # different starting points on same track [1,20]
patience = 1.0
seed = 20000
track_complexity = 16   #now training on difficult tracks
REWARD2 = [-0.11, 0.1, 0.0, 0.0,   1.0, 0.0,   100, -20, -100, -50] #adding incentive to move forward fast

if agent=='CarRacing-v2': 
    env2 = gym.make(agent, seed=seed, 
        game_color=game_color,
        indicators=indicators,
        frames_per_state=fpst,
        skip_frames=skip,     
#        discre=actions,       #passing custom actions
        use_track = use,       
        episodes_per_track = ept,  
        tr_complexity = track_complexity, 
        patience = patience,
        f_reward = REWARD2  ) #passing a custom reward function
else: 
    env2 = gym.make(agent)

env2 = DummyVecEnv([lambda: env2])

In [14]:
## Training  #2
new_mod = False  #to change batch_size you need a new model !!
updates = 1000

if new_mod:
    batch_size2 = 512
    model2 = PPO2(CnnPolicy, env2, verbose=1, n_steps=batch_size2, # seed=314, n_cpu_tf_sess=1,
             gamma=0.99, learning_rate=0.00025, nminibatches=epochs, ent_coef=0.01, vf_coef=0.5) 
    model2.load_parameters(param_list, exact_match=True)
    model2.learn(total_timesteps = updates*batch_size2, log_interval=1, callback=eval_callback)
else:
    model.set_env(env2)
    model.learn(total_timesteps = updates*batch_size, log_interval=1, callback=eval_callback)


Track generation: 1391..1741 -> 350-tiles track, complex 16
--------------------------------------
| approxkl           | 0.0079038795  |
| clipfrac           | 0.099609375   |
| explained_variance | -0.211        |
| fps                | 37            |
| n_updates          | 1             |
| policy_entropy     | 1.0963852     |
| policy_loss        | -0.0042322334 |
| serial_timesteps   | 256           |
| time_elapsed       | 0             |
| total_timesteps    | 256           |
| value_loss         | 1.405933      |
--------------------------------------
--------------------------------------
| approxkl           | 0.0043837656  |
| clipfrac           | 0.06640625    |
| explained_variance | 0.644         |
| fps                | 39            |
| n_updates          | 2             |
| policy_entropy     | 1.074409      |
| policy_loss        | -0.0024100137 |
| serial_timesteps   | 512           |
| time_elapsed       | 6.78          |
| total_timesteps    | 512           |
| va

-------------------------------------
| approxkl           | 0.003721349  |
| clipfrac           | 0.048828125  |
| explained_variance | 0.627        |
| fps                | 40           |
| n_updates          | 15           |
| policy_entropy     | 1.1927099    |
| policy_loss        | -0.014012737 |
| serial_timesteps   | 3840         |
| time_elapsed       | 93.4         |
| total_timesteps    | 3840         |
| value_loss         | 6.8599887    |
-------------------------------------
-------------------------------------
| approxkl           | 0.0038716295 |
| clipfrac           | 0.0390625    |
| explained_variance | -0.17        |
| fps                | 38           |
| n_updates          | 16           |
| policy_entropy     | 0.90592736   |
| policy_loss        | -0.005598315 |
| serial_timesteps   | 4096         |
| time_elapsed       | 99.8         |
| total_timesteps    | 4096         |
| value_loss         | 0.23859058   |
-------------------------------------
------------

-------------------------------------
| approxkl           | 0.005165023  |
| clipfrac           | 0.049804688  |
| explained_variance | 0.257        |
| fps                | 38           |
| n_updates          | 31           |
| policy_entropy     | 0.83216363   |
| policy_loss        | -0.009213417 |
| serial_timesteps   | 7936         |
| time_elapsed       | 198          |
| total_timesteps    | 7936         |
| value_loss         | 0.20211063   |
-------------------------------------
18  cut by time without progress. Steps 407  %advance 38.3  played reward 13.83  last penalty -20
-------------------------------------
| approxkl           | 0.01037382   |
| clipfrac           | 0.18066406   |
| explained_variance | 0.615        |
| fps                | 39           |
| n_updates          | 32           |
| policy_entropy     | 1.1135404    |
| policy_loss        | -0.016618513 |
| serial_timesteps   | 8192         |
| time_elapsed       | 205          |
| total_timesteps    | 8192 

27  cut by time without progress. Steps 423  %advance 30.9  played reward 9.19  last penalty -20
--------------------------------------
| approxkl           | 0.0030196984  |
| clipfrac           | 0.040039062   |
| explained_variance | 0.858         |
| fps                | 38            |
| n_updates          | 46            |
| policy_entropy     | 1.0182817     |
| policy_loss        | -0.0025784464 |
| serial_timesteps   | 11776         |
| time_elapsed       | 298           |
| total_timesteps    | 11776         |
| value_loss         | 3.6221972     |
--------------------------------------
28  cut by time without progress. Steps 269  %advance 18.6  played reward 5.29  last penalty -20
29  cut by time without progress. Steps 113  %advance 6.3  played reward 0.05  last penalty -20
-------------------------------------
| approxkl           | 0.0077904295 |
| clipfrac           | 0.09375      |
| explained_variance | 0.871        |
| fps                | 39           |
| n_updates  

-------------------------------------
| approxkl           | 0.003034641  |
| clipfrac           | 0.056640625  |
| explained_variance | -0.12        |
| fps                | 39           |
| n_updates          | 57           |
| policy_entropy     | 0.83142424   |
| policy_loss        | -0.002622691 |
| serial_timesteps   | 14592        |
| time_elapsed       | 433          |
| total_timesteps    | 14592        |
| value_loss         | 0.7086871    |
-------------------------------------
-------------------------------------
| approxkl           | 0.004857888  |
| clipfrac           | 0.055664062  |
| explained_variance | -0.182       |
| fps                | 37           |
| n_updates          | 58           |
| policy_entropy     | 0.9035179    |
| policy_loss        | -0.003646952 |
| serial_timesteps   | 14848        |
| time_elapsed       | 440          |
| total_timesteps    | 14848        |
| value_loss         | 0.59049296   |
-------------------------------------
------------

------------------------------------
| approxkl           | 0.010082742 |
| clipfrac           | 0.115234375 |
| explained_variance | -0.142      |
| fps                | 42          |
| n_updates          | 73          |
| policy_entropy     | 0.8196882   |
| policy_loss        | -0.01052554 |
| serial_timesteps   | 18688       |
| time_elapsed       | 537         |
| total_timesteps    | 18688       |
| value_loss         | 0.1873892   |
------------------------------------
4  Finalized in Steps 690  with return=total_reward 170.9260666677901
-------------------------------------
| approxkl           | 0.0037401726 |
| clipfrac           | 0.041015625  |
| explained_variance | 0.0418       |
| fps                | 40           |
| n_updates          | 74           |
| policy_entropy     | 0.7917072    |
| policy_loss        | 0.0022630747 |
| serial_timesteps   | 18944        |
| time_elapsed       | 543          |
| total_timesteps    | 18944        |
| value_loss         | 141.5918

11  cut by time without progress. Steps 352  %advance 41.3  played reward 16.19  last penalty -20
-------------------------------------
| approxkl           | 0.004297355  |
| clipfrac           | 0.041992188  |
| explained_variance | 0.799        |
| fps                | 40           |
| n_updates          | 89           |
| policy_entropy     | 0.58080935   |
| policy_loss        | 0.0014254688 |
| serial_timesteps   | 22784        |
| time_elapsed       | 636          |
| total_timesteps    | 22784        |
| value_loss         | 10.169738    |
-------------------------------------
--------------------------------------
| approxkl           | 0.007109003   |
| clipfrac           | 0.08300781    |
| explained_variance | 0.126         |
| fps                | 40            |
| n_updates          | 90            |
| policy_entropy     | 0.5850273     |
| policy_loss        | -0.0055784527 |
| serial_timesteps   | 23040         |
| time_elapsed       | 643           |
| total_timesteps 

19  cut by time without progress. Steps 278  %advance 38.5  played reward 21.49  last penalty -20
-------------------------------------
| approxkl           | 0.0059273504 |
| clipfrac           | 0.080078125  |
| explained_variance | 0.663        |
| fps                | 40           |
| n_updates          | 101          |
| policy_entropy     | 0.54036486   |
| policy_loss        | 0.0048809657 |
| serial_timesteps   | 25856        |
| time_elapsed       | 764          |
| total_timesteps    | 25856        |
| value_loss         | 13.306712    |
-------------------------------------
20  cut by time without progress. Steps 246  %advance 34.6  played reward 19.3  last penalty -20
21  cut by time without progress. Steps 158  %advance 18.1  played reward 4.32  last penalty -20
22  cut by time without progress. Steps 34  %advance 0.7  played reward -0.33  last penalty -20
-------------------------------------
| approxkl           | 0.007893943  |
| clipfrac           | 0.08496094   |
| ex

31  cut by time without progress. Steps 210  %advance 30.3  played reward 14.05  last penalty -20
--------------------------------------
| approxkl           | 0.0062515577  |
| clipfrac           | 0.08496094    |
| explained_variance | 0.869         |
| fps                | 41            |
| n_updates          | 116           |
| policy_entropy     | 0.73106027    |
| policy_loss        | -0.0053738505 |
| serial_timesteps   | 29696         |
| time_elapsed       | 857           |
| total_timesteps    | 29696         |
| value_loss         | 8.030526      |
--------------------------------------
32  cut by time without progress. Steps 248  %advance 36.2  played reward 18.19  last penalty -20
--------------------------------------
| approxkl           | 0.0041497084  |
| clipfrac           | 0.043945312   |
| explained_variance | 0.863         |
| fps                | 41            |
| n_updates          | 117           |
| policy_entropy     | 0.4857432     |
| policy_loss        | -

4  cut by time without progress. Steps 127  %advance 11.6  played reward 0.02  last penalty -20
5  cut by time without progress. Steps 194  %advance 24.5  played reward 11.33  last penalty -20
--------------------------------------
| approxkl           | 0.0037996268  |
| clipfrac           | 0.04296875    |
| explained_variance | 0.823         |
| fps                | 39            |
| n_updates          | 130           |
| policy_entropy     | 0.44218475    |
| policy_loss        | -0.0022609588 |
| serial_timesteps   | 33280         |
| time_elapsed       | 945           |
| total_timesteps    | 33280         |
| value_loss         | 16.40532      |
--------------------------------------
--------------------------------------
| approxkl           | 0.0035455823  |
| clipfrac           | 0.049804688   |
| explained_variance | 0.747         |
| fps                | 39            |
| n_updates          | 131           |
| policy_entropy     | 0.47483492    |
| policy_loss        | 0.00

22  cut by time without progress. Steps 114  %advance 10.3  played reward -0.19  last penalty -20
23  cut by time without progress. Steps 213  %advance 23.8  played reward 8.63  last penalty -20
--------------------------------------
| approxkl           | 0.0032072123  |
| clipfrac           | 0.047851562   |
| explained_variance | 0.917         |
| fps                | 39            |
| n_updates          | 143           |
| policy_entropy     | 0.7559787     |
| policy_loss        | -0.0057043447 |
| serial_timesteps   | 36608         |
| time_elapsed       | 1.03e+03      |
| total_timesteps    | 36608         |
| value_loss         | 4.918081      |
--------------------------------------
24  cut by time without progress. Steps 178  %advance 12.5  played reward 0.18  last penalty -20
--------------------------------------
| approxkl           | 0.0018610641  |
| clipfrac           | 0.028320312   |
| explained_variance | 0.89          |
| fps                | 39            |
| n_up

33  cut by time without progress. Steps 382  %advance 47.4  played reward 25.0  last penalty -20
34  cut by time without progress. Steps 147  %advance 14.1  played reward 2.51  last penalty -20
-------------------------------------
| approxkl           | 0.006655956  |
| clipfrac           | 0.10253906   |
| explained_variance | 0.849        |
| fps                | 41           |
| n_updates          | 154          |
| policy_entropy     | 0.74229944   |
| policy_loss        | -0.009914904 |
| serial_timesteps   | 39424        |
| time_elapsed       | 1.18e+03     |
| total_timesteps    | 39424        |
| value_loss         | 11.656485    |
-------------------------------------
35  cut by time without progress. Steps 34  %advance 0.6  played reward -0.33  last penalty -20
36  cut by time without progress. Steps 142  %advance 10.6  played reward -0.14  last penalty -20
-------------------------------------
| approxkl           | 0.0056190602 |
| clipfrac           | 0.07714844   |
| ex

-------------------------------------
| approxkl           | 0.0076563917 |
| clipfrac           | 0.061523438  |
| explained_variance | 0.887        |
| fps                | 39           |
| n_updates          | 167          |
| policy_entropy     | 0.8510859    |
| policy_loss        | -0.007344676 |
| serial_timesteps   | 42752        |
| time_elapsed       | 1.26e+03     |
| total_timesteps    | 42752        |
| value_loss         | 3.7742229    |
-------------------------------------
11  cut by time without progress. Steps 255  %advance 24.7  played reward 13.26  last penalty -20
-------------------------------------
| approxkl           | 0.015245344  |
| clipfrac           | 0.26953125   |
| explained_variance | 0.912        |
| fps                | 39           |
| n_updates          | 168          |
| policy_entropy     | 0.9329945    |
| policy_loss        | -0.011873841 |
| serial_timesteps   | 43008        |
| time_elapsed       | 1.27e+03     |
| total_timesteps    | 43008

24  cut by time without progress. Steps 202  %advance 11.7  played reward 2.08  last penalty -20
-------------------------------------
| approxkl           | 0.013005125  |
| clipfrac           | 0.1484375    |
| explained_variance | 0.911        |
| fps                | 40           |
| n_updates          | 181          |
| policy_entropy     | 0.9807414    |
| policy_loss        | -0.009788189 |
| serial_timesteps   | 46336        |
| time_elapsed       | 1.35e+03     |
| total_timesteps    | 46336        |
| value_loss         | 1.7173983    |
-------------------------------------
--------------------------------------
| approxkl           | 0.0068229903  |
| clipfrac           | 0.09765625    |
| explained_variance | 0.717         |
| fps                | 44            |
| n_updates          | 182           |
| policy_entropy     | 0.8132249     |
| policy_loss        | -0.0020664728 |
| serial_timesteps   | 46592         |
| time_elapsed       | 1.35e+03      |
| total_timesteps  

-------------------------------------
| approxkl           | 0.010093811  |
| clipfrac           | 0.13183594   |
| explained_variance | -0.166       |
| fps                | 41           |
| n_updates          | 196          |
| policy_entropy     | 0.8844928    |
| policy_loss        | -0.005919462 |
| serial_timesteps   | 50176        |
| time_elapsed       | 1.44e+03     |
| total_timesteps    | 50176        |
| value_loss         | 1.5499191    |
-------------------------------------
-------------------------------------
| approxkl           | 0.0069593135 |
| clipfrac           | 0.087890625  |
| explained_variance | -0.975       |
| fps                | 40           |
| n_updates          | 197          |
| policy_entropy     | 0.86503553   |
| policy_loss        | -0.011593449 |
| serial_timesteps   | 50432        |
| time_elapsed       | 1.44e+03     |
| total_timesteps    | 50432        |
| value_loss         | 0.66773075   |
-------------------------------------
------------

--------------------------------------
| approxkl           | 0.009875334   |
| clipfrac           | 0.14257812    |
| explained_variance | 0.23          |
| fps                | 43            |
| n_updates          | 209           |
| policy_entropy     | 0.7779387     |
| policy_loss        | -0.0077824374 |
| serial_timesteps   | 53504         |
| time_elapsed       | 1.58e+03      |
| total_timesteps    | 53504         |
| value_loss         | 0.20257327    |
--------------------------------------
-------------------------------------
| approxkl           | 0.013539934  |
| clipfrac           | 0.17480469   |
| explained_variance | 0.0392       |
| fps                | 40           |
| n_updates          | 210          |
| policy_entropy     | 0.6654232    |
| policy_loss        | -0.006416614 |
| serial_timesteps   | 53760        |
| time_elapsed       | 1.58e+03     |
| total_timesteps    | 53760        |
| value_loss         | 0.3294361    |
-------------------------------------

--------------------------------------
| approxkl           | 0.0106515875  |
| clipfrac           | 0.12988281    |
| explained_variance | 0.407         |
| fps                | 42            |
| n_updates          | 225           |
| policy_entropy     | 0.7347108     |
| policy_loss        | -0.0018035232 |
| serial_timesteps   | 57600         |
| time_elapsed       | 1.68e+03      |
| total_timesteps    | 57600         |
| value_loss         | 0.053422686   |
--------------------------------------
------------------------------------
| approxkl           | 0.017160568 |
| clipfrac           | 0.18652344  |
| explained_variance | 0.0403      |
| fps                | 42          |
| n_updates          | 226         |
| policy_entropy     | 0.67972755  |
| policy_loss        | 0.005082057 |
| serial_timesteps   | 57856       |
| time_elapsed       | 1.68e+03    |
| total_timesteps    | 57856       |
| value_loss         | 0.05561823  |
------------------------------------
6  Finalized

-------------------------------------
| approxkl           | 0.015050465  |
| clipfrac           | 0.18066406   |
| explained_variance | 0.893        |
| fps                | 42           |
| n_updates          | 240          |
| policy_entropy     | 0.8358735    |
| policy_loss        | -0.016588349 |
| serial_timesteps   | 61440        |
| time_elapsed       | 1.77e+03     |
| total_timesteps    | 61440        |
| value_loss         | 0.068560734  |
-------------------------------------
-------------------------------------
| approxkl           | 0.01882639   |
| clipfrac           | 0.20800781   |
| explained_variance | -0.197       |
| fps                | 41           |
| n_updates          | 241          |
| policy_entropy     | 0.62705576   |
| policy_loss        | -0.019584393 |
| serial_timesteps   | 61696        |
| time_elapsed       | 1.78e+03     |
| total_timesteps    | 61696        |
| value_loss         | 0.080220595  |
-------------------------------------
15  Finalize

19  cut by time without progress. Steps 718  %advance 99.2  played reward 69.46  last penalty -20
-------------------------------------
| approxkl           | 0.0057914364 |
| clipfrac           | 0.06738281   |
| explained_variance | -0.125       |
| fps                | 42           |
| n_updates          | 253          |
| policy_entropy     | 0.7690183    |
| policy_loss        | -0.00859547  |
| serial_timesteps   | 64768        |
| time_elapsed       | 2e+03        |
| total_timesteps    | 64768        |
| value_loss         | 18.71248     |
-------------------------------------
--------------------------------------
| approxkl           | 0.0045812153  |
| clipfrac           | 0.046875      |
| explained_variance | 0.778         |
| fps                | 41            |
| n_updates          | 254           |
| policy_entropy     | 0.6904349     |
| policy_loss        | -0.0051364074 |
| serial_timesteps   | 65024         |
| time_elapsed       | 2.01e+03      |
| total_timesteps 

25  Finalized in Steps 645  with return=total_reward 175.42486424984224
-------------------------------------
| approxkl           | 0.006469666  |
| clipfrac           | 0.0859375    |
| explained_variance | 0.0774       |
| fps                | 41           |
| n_updates          | 269          |
| policy_entropy     | 0.6668412    |
| policy_loss        | -0.002796271 |
| serial_timesteps   | 68864        |
| time_elapsed       | 2.1e+03      |
| total_timesteps    | 68864        |
| value_loss         | 15.122279    |
-------------------------------------
-------------------------------------
| approxkl           | 0.0083828345 |
| clipfrac           | 0.11328125   |
| explained_variance | 0.0138       |
| fps                | 39           |
| n_updates          | 270          |
| policy_entropy     | 0.57260716   |
| policy_loss        | -0.006789098 |
| serial_timesteps   | 69120        |
| time_elapsed       | 2.1e+03      |
| total_timesteps    | 69120        |
| value_loss    

-------------------------------------
| approxkl           | 0.0058910577 |
| clipfrac           | 0.055664062  |
| explained_variance | 0.153        |
| fps                | 41           |
| n_updates          | 285          |
| policy_entropy     | 0.6405477    |
| policy_loss        | -0.006638794 |
| serial_timesteps   | 72960        |
| time_elapsed       | 2.2e+03      |
| total_timesteps    | 72960        |
| value_loss         | 0.0371025    |
-------------------------------------
32  Finalized in Steps 687  with return=total_reward 173.46897052683607
--------------------------------------
| approxkl           | 0.009762174   |
| clipfrac           | 0.05078125    |
| explained_variance | 0.00467       |
| fps                | 43            |
| n_updates          | 286           |
| policy_entropy     | 0.7138157     |
| policy_loss        | -0.0027796603 |
| serial_timesteps   | 73216         |
| time_elapsed       | 2.2e+03       |
| total_timesteps    | 73216         |
| val

2  cut by time without progress. Steps 353  %advance 39.7  played reward 3.9  last penalty -20
3  cut by time without progress. Steps 753  %advance 99.6  played reward 23.76  last penalty -20
4  cut by time without progress. Steps 220  %advance 23.5  played reward 0.93  last penalty -20
5  cut by time without progress. Steps 526  %advance 58.4  played reward 4.93  last penalty -20
Track generation: 1104..1383 -> 279-tiles track, complex 12
1  Finalized in Steps 562  with return=total_reward 142.82086330935238
2  Finalized in Steps 573  with return=total_reward 141.72086330935235
3  cut by time without progress. Steps 469  %advance 69.0  played reward 21.54  last penalty -20
4  cut by time without progress. Steps 162  %advance 11.5  played reward -5.3  last penalty -20
5  cut by time without progress. Steps 595  %advance 92.8  played reward 32.68  last penalty -20
Track generation: 1068..1338 -> 270-tiles track, complex 12
1  Finalized in Steps 530  with return=total_reward 145.98475836

9  cut by time without progress. Steps 221  %advance 23.2  played reward 7.04  last penalty -20
---------------------------------------
| approxkl           | 0.008874474    |
| clipfrac           | 0.091796875    |
| explained_variance | 0.807          |
| fps                | 39             |
| n_updates          | 312            |
| policy_entropy     | 0.83160764     |
| policy_loss        | -0.00047721947 |
| serial_timesteps   | 79872          |
| time_elapsed       | 2.48e+03       |
| total_timesteps    | 79872          |
| value_loss         | 9.687312       |
---------------------------------------
-------------------------------------
| approxkl           | 0.0056965263 |
| clipfrac           | 0.08300781   |
| explained_variance | 0.604        |
| fps                | 36           |
| n_updates          | 313          |
| policy_entropy     | 0.63305074   |
| policy_loss        | -0.002266843 |
| serial_timesteps   | 80128        |
| time_elapsed       | 2.49e+03     |
| to

24  cut by time without progress. Steps 419  %advance 47.7  played reward 20.54  last penalty -20
--------------------------------------
| approxkl           | 0.0042330436  |
| clipfrac           | 0.030273438   |
| explained_variance | 0.8           |
| fps                | 39            |
| n_updates          | 326           |
| policy_entropy     | 0.53019375    |
| policy_loss        | -0.0091494685 |
| serial_timesteps   | 83456         |
| time_elapsed       | 2.57e+03      |
| total_timesteps    | 83456         |
| value_loss         | 4.5933256     |
--------------------------------------
25  cut by time without progress. Steps 340  %advance 35.3  played reward 5.99  last penalty -20
--------------------------------------
| approxkl           | 0.004309893   |
| clipfrac           | 0.049804688   |
| explained_variance | 0.583         |
| fps                | 40            |
| n_updates          | 327           |
| policy_entropy     | 0.60467446    |
| policy_loss        | -0

36  cut by time without progress. Steps 721  %advance 94.9  played reward 57.51  last penalty -20
-------------------------------------
| approxkl           | 0.0081009455 |
| clipfrac           | 0.08886719   |
| explained_variance | 0.665        |
| fps                | 39           |
| n_updates          | 341          |
| policy_entropy     | 0.9044722    |
| policy_loss        | -0.02227994  |
| serial_timesteps   | 87296        |
| time_elapsed       | 2.66e+03     |
| total_timesteps    | 87296        |
| value_loss         | 8.286447     |
-------------------------------------
-------------------------------------
| approxkl           | 0.0052353214 |
| clipfrac           | 0.08105469   |
| explained_variance | -0.279       |
| fps                | 40           |
| n_updates          | 342          |
| policy_entropy     | 0.8792601    |
| policy_loss        | -0.010700126 |
| serial_timesteps   | 87552        |
| time_elapsed       | 2.67e+03     |
| total_timesteps    | 87552

40  cut by time without progress. Steps 937  %advance 98.4  played reward 58.9  last penalty -20
Track generation: 1174..1474 -> 300-tiles track, complex 16
--------------------------------------
| approxkl           | 0.011591472   |
| clipfrac           | 0.13085938    |
| explained_variance | -0.0927       |
| fps                | 36            |
| n_updates          | 354           |
| policy_entropy     | 0.74047995    |
| policy_loss        | -0.0058996263 |
| serial_timesteps   | 90624         |
| time_elapsed       | 2.82e+03      |
| total_timesteps    | 90624         |
| value_loss         | 15.318954     |
--------------------------------------
-------------------------------------
| approxkl           | 0.012471893  |
| clipfrac           | 0.095703125  |
| explained_variance | 0.368        |
| fps                | 42           |
| n_updates          | 355          |
| policy_entropy     | 0.6554837    |
| policy_loss        | -0.014045782 |
| serial_timesteps   | 90880    

--------------------------------------
| approxkl           | 0.011347955   |
| clipfrac           | 0.12011719    |
| explained_variance | -1.28         |
| fps                | 41            |
| n_updates          | 370           |
| policy_entropy     | 0.8685022     |
| policy_loss        | -0.0072588124 |
| serial_timesteps   | 94720         |
| time_elapsed       | 2.92e+03      |
| total_timesteps    | 94720         |
| value_loss         | 0.018723026   |
--------------------------------------
7  Finalized in Steps 746  with return=total_reward 170.6863154173839
-------------------------------------
| approxkl           | 0.016354887  |
| clipfrac           | 0.17089844   |
| explained_variance | 0.0532       |
| fps                | 41           |
| n_updates          | 371          |
| policy_entropy     | 0.7292705    |
| policy_loss        | -0.011544559 |
| serial_timesteps   | 94976        |
| time_elapsed       | 2.93e+03     |
| total_timesteps    | 94976        |
| val

14  cut by time without progress. Steps 304  %advance 36.1  played reward 19.12  last penalty -20
-------------------------------------
| approxkl           | 0.0073909066 |
| clipfrac           | 0.09765625   |
| explained_variance | 0.707        |
| fps                | 39           |
| n_updates          | 386          |
| policy_entropy     | 0.84437454   |
| policy_loss        | -0.012068473 |
| serial_timesteps   | 98816        |
| time_elapsed       | 3.02e+03     |
| total_timesteps    | 98816        |
| value_loss         | 10.403872    |
-------------------------------------
--------------------------------------
| approxkl           | 0.009550682   |
| clipfrac           | 0.15039062    |
| explained_variance | -0.111        |
| fps                | 40            |
| n_updates          | 387           |
| policy_entropy     | 0.8471006     |
| policy_loss        | -0.0093631055 |
| serial_timesteps   | 99072         |
| time_elapsed       | 3.03e+03      |
| total_timesteps 

4  Finalized in Steps 573  with return=total_reward 141.7361702127659
5  Finalized in Steps 572  with return=total_reward 141.8361702127659
Track generation: 988..1238 -> 250-tiles track, complex 12
1  Finalized in Steps 505  with return=total_reward 148.39518072289118
2  cut by time without progress. Steps 564  %advance 98.3  played reward 41.29  last penalty -20
3  cut by time without progress. Steps 637  %advance 95.9  played reward 31.58  last penalty -20
4  cut by time without progress. Steps 243  %advance 32.1  played reward 7.12  last penalty -20
5  cut by time without progress. Steps 341  %advance 54.2  played reward 19.41  last penalty -20
Track generation: 1188..1491 -> 303-tiles track, complex 12
1  retry to generate new track (normal below 20, limit 50)
Track generation: 1008..1265 -> 257-tiles track, complex 12
2  retry to generate new track (normal below 20, limit 50)
Track generation: 1035..1299 -> 264-tiles track, complex 12
Eval num_timesteps=102400, episode_reward=57.

-------------------------------------
| approxkl           | 0.008151196  |
| clipfrac           | 0.10058594   |
| explained_variance | -2.62        |
| fps                | 39           |
| n_updates          | 414          |
| policy_entropy     | 0.86716896   |
| policy_loss        | -0.012705487 |
| serial_timesteps   | 105984       |
| time_elapsed       | 3.32e+03     |
| total_timesteps    | 105984       |
| value_loss         | 0.08153182   |
-------------------------------------
27  Finalized in Steps 671  with return=total_reward 175.3527910433069
-------------------------------------
| approxkl           | 0.0061172876 |
| clipfrac           | 0.09277344   |
| explained_variance | 0.0225       |
| fps                | 43           |
| n_updates          | 415          |
| policy_entropy     | 0.7998964    |
| policy_loss        | -0.005796819 |
| serial_timesteps   | 106240       |
| time_elapsed       | 3.32e+03     |
| total_timesteps    | 106240       |
| value_loss     

-------------------------------------
| approxkl           | 0.011207295  |
| clipfrac           | 0.13574219   |
| explained_variance | -1.58        |
| fps                | 39           |
| n_updates          | 430          |
| policy_entropy     | 0.85527295   |
| policy_loss        | -0.016667249 |
| serial_timesteps   | 110080       |
| time_elapsed       | 3.42e+03     |
| total_timesteps    | 110080       |
| value_loss         | 0.031561892  |
-------------------------------------
35  Finalized in Steps 694  with return=total_reward 169.64964790613135
--------------------------------------
| approxkl           | 0.018134005   |
| clipfrac           | 0.2421875     |
| explained_variance | 0.0383        |
| fps                | 39            |
| n_updates          | 431           |
| policy_entropy     | 0.74320394    |
| policy_loss        | -0.0060036513 |
| serial_timesteps   | 110336        |
| time_elapsed       | 3.43e+03      |
| total_timesteps    | 110336        |
| val

-------------------------------------
| approxkl           | 0.005084831  |
| clipfrac           | 0.06640625   |
| explained_variance | 0.0337       |
| fps                | 41           |
| n_updates          | 445          |
| policy_entropy     | 0.8196004    |
| policy_loss        | 0.0038381706 |
| serial_timesteps   | 113920       |
| time_elapsed       | 3.51e+03     |
| total_timesteps    | 113920       |
| value_loss         | 132.02113    |
-------------------------------------
-------------------------------------
| approxkl           | 0.006867375  |
| clipfrac           | 0.072265625  |
| explained_variance | 0.3          |
| fps                | 41           |
| n_updates          | 446          |
| policy_entropy     | 0.75114566   |
| policy_loss        | -0.015007097 |
| serial_timesteps   | 114176       |
| time_elapsed       | 3.52e+03     |
| total_timesteps    | 114176       |
| value_loss         | 0.10569626   |
-------------------------------------
3  Finalized

8  cut by time without progress. Steps 387  %advance 60.6  played reward 39.1  last penalty -20
--------------------------------------
| approxkl           | 0.0069791516  |
| clipfrac           | 0.10644531    |
| explained_variance | 0.724         |
| fps                | 42            |
| n_updates          | 458           |
| policy_entropy     | 0.951753      |
| policy_loss        | -0.0069070035 |
| serial_timesteps   | 117248        |
| time_elapsed       | 3.68e+03      |
| total_timesteps    | 117248        |
| value_loss         | 12.53726      |
--------------------------------------
9  cut by time without progress. Steps 360  %advance 50.3  played reward 27.92  last penalty -20
-------------------------------------
| approxkl           | 0.010532373  |
| clipfrac           | 0.15722656   |
| explained_variance | 0.727        |
| fps                | 42           |
| n_updates          | 459          |
| policy_entropy     | 0.92046714   |
| policy_loss        | -0.02623395

17  cut by time without progress. Steps 401  %advance 57.9  played reward 35.96  last penalty -20
-------------------------------------
| approxkl           | 0.0061870245 |
| clipfrac           | 0.061523438  |
| explained_variance | 0.846        |
| fps                | 44           |
| n_updates          | 473          |
| policy_entropy     | 0.9156989    |
| policy_loss        | -0.010948476 |
| serial_timesteps   | 121088       |
| time_elapsed       | 3.77e+03     |
| total_timesteps    | 121088       |
| value_loss         | 8.937453     |
-------------------------------------
-------------------------------------
| approxkl           | 0.0074671796 |
| clipfrac           | 0.08984375   |
| explained_variance | -1.47        |
| fps                | 43           |
| n_updates          | 474          |
| policy_entropy     | 0.7132975    |
| policy_loss        | -0.00739183  |
| serial_timesteps   | 121344       |
| time_elapsed       | 3.78e+03     |
| total_timesteps    | 12134

-------------------------------------
| approxkl           | 0.0117074605 |
| clipfrac           | 0.18164062   |
| explained_variance | 0.437        |
| fps                | 41           |
| n_updates          | 489          |
| policy_entropy     | 0.9914566    |
| policy_loss        | -0.011254267 |
| serial_timesteps   | 125184       |
| time_elapsed       | 3.87e+03     |
| total_timesteps    | 125184       |
| value_loss         | 0.03977044   |
-------------------------------------
-------------------------------------
| approxkl           | 0.0074279783 |
| clipfrac           | 0.09375      |
| explained_variance | 0.135        |
| fps                | 40           |
| n_updates          | 490          |
| policy_entropy     | 0.93983686   |
| policy_loss        | -0.009580761 |
| serial_timesteps   | 125440       |
| time_elapsed       | 3.87e+03     |
| total_timesteps    | 125440       |
| value_loss         | 0.025144987  |
-------------------------------------
24  cut by t

------------------------------------
| approxkl           | 0.020490127 |
| clipfrac           | 0.29589844  |
| explained_variance | -0.156      |
| fps                | 42          |
| n_updates          | 502         |
| policy_entropy     | 0.8009903   |
| policy_loss        | -0.01915141 |
| serial_timesteps   | 128512      |
| time_elapsed       | 4.12e+03    |
| total_timesteps    | 128512      |
| value_loss         | 0.039289527 |
------------------------------------
29  Finalized in Steps 634  with return=total_reward 173.55950579714187
--------------------------------------
| approxkl           | 0.008711881   |
| clipfrac           | 0.13574219    |
| explained_variance | 0.0124        |
| fps                | 41            |
| n_updates          | 503           |
| policy_entropy     | 0.7830813     |
| policy_loss        | -0.0004427681 |
| serial_timesteps   | 128768        |
| time_elapsed       | 4.12e+03      |
| total_timesteps    | 128768        |
| value_loss      

35  Finalized in Steps 615  with return=total_reward 177.23221156684164
--------------------------------------
| approxkl           | 0.008831074   |
| clipfrac           | 0.11425781    |
| explained_variance | 0.0425        |
| fps                | 41            |
| n_updates          | 518           |
| policy_entropy     | 0.7128024     |
| policy_loss        | -0.0041638482 |
| serial_timesteps   | 132608        |
| time_elapsed       | 4.21e+03      |
| total_timesteps    | 132608        |
| value_loss         | 124.288826    |
--------------------------------------
--------------------------------------
| approxkl           | 0.013757678   |
| clipfrac           | 0.16992188    |
| explained_variance | 0.0848        |
| fps                | 40            |
| n_updates          | 519           |
| policy_entropy     | 0.6798607     |
| policy_loss        | -0.0052799536 |
| serial_timesteps   | 132864        |
| time_elapsed       | 4.22e+03      |
| total_timesteps    | 132864  

--------------------------------------
| approxkl           | 0.004622276   |
| clipfrac           | 0.05859375    |
| explained_variance | 0.763         |
| fps                | 39            |
| n_updates          | 534           |
| policy_entropy     | 0.72752976    |
| policy_loss        | -0.0062321583 |
| serial_timesteps   | 136704        |
| time_elapsed       | 4.31e+03      |
| total_timesteps    | 136704        |
| value_loss         | 3.1182015     |
--------------------------------------
3  cut by time without progress. Steps 296  %advance 39.9  played reward 20.55  last penalty -20
-------------------------------------
| approxkl           | 0.008174635  |
| clipfrac           | 0.09277344   |
| explained_variance | 0.719        |
| fps                | 33           |
| n_updates          | 535          |
| policy_entropy     | 0.6713495    |
| policy_loss        | -0.013224029 |
| serial_timesteps   | 136960       |
| time_elapsed       | 4.32e+03     |
| total_timestep

10  Finalized in Steps 617  with return=total_reward 175.98044777762297
1  out of limits. Steps 202  %advance 27.7  played reward 6.94  last penalty -100
2  out of limits. Steps 202  %advance 27.7  played reward 6.94  last penalty -100
3  cut by time without progress. Steps 280  %advance 35.2  played reward 6.61  last penalty -20
4  out of limits. Steps 497  %advance 88.2  played reward 37.94  last penalty -100
5  cut by time without progress. Steps 545  %advance 70.8  played reward 15.7  last penalty -20
Track generation: 1039..1302 -> 263-tiles track, complex 12
1  cut by time without progress. Steps 324  %advance 50.0  played reward 16.93  last penalty -20
2  out of limits. Steps 218  %advance 29.3  played reward 6.92  last penalty -100
3  cut by time without progress. Steps 184  %advance 12.9  played reward -6.46  last penalty -20
4  cut by time without progress. Steps 546  %advance 92.3  played reward 36.72  last penalty -20
5  cut by time without progress. Steps 452  %advance 72.

------------------------------------
| approxkl           | 0.009067257 |
| clipfrac           | 0.10449219  |
| explained_variance | 0.6         |
| fps                | 38          |
| n_updates          | 562         |
| policy_entropy     | 0.47168976  |
| policy_loss        | -0.0093789  |
| serial_timesteps   | 143872      |
| time_elapsed       | 4.57e+03    |
| total_timesteps    | 143872      |
| value_loss         | 0.11655991  |
------------------------------------
16  Finalized in Steps 551  with return=total_reward 176.99714312208755
--------------------------------------
| approxkl           | 0.007058132   |
| clipfrac           | 0.09863281    |
| explained_variance | 0.0551        |
| fps                | 42            |
| n_updates          | 563           |
| policy_entropy     | 0.52644825    |
| policy_loss        | -0.0071621714 |
| serial_timesteps   | 144128        |
| time_elapsed       | 4.57e+03      |
| total_timesteps    | 144128        |
| value_loss      

-------------------------------------
| approxkl           | 0.012735873  |
| clipfrac           | 0.12792969   |
| explained_variance | 0.517        |
| fps                | 39           |
| n_updates          | 577          |
| policy_entropy     | 0.62577873   |
| policy_loss        | -0.010844549 |
| serial_timesteps   | 147712       |
| time_elapsed       | 4.66e+03     |
| total_timesteps    | 147712       |
| value_loss         | 0.16082935   |
-------------------------------------
25  cut by time without progress. Steps 414  %advance 63.8  played reward 39.29  last penalty -20
--------------------------------------
| approxkl           | 0.0047707865  |
| clipfrac           | 0.048828125   |
| explained_variance | 0.831         |
| fps                | 41            |
| n_updates          | 578           |
| policy_entropy     | 0.56427854    |
| policy_loss        | -0.0021329927 |
| serial_timesteps   | 147968        |
| time_elapsed       | 4.67e+03      |
| total_timesteps 

-------------------------------------
| approxkl           | 0.020644857  |
| clipfrac           | 0.21972656   |
| explained_variance | 0.488        |
| fps                | 41           |
| n_updates          | 592          |
| policy_entropy     | 0.626087     |
| policy_loss        | -0.003219091 |
| serial_timesteps   | 151552       |
| time_elapsed       | 4.76e+03     |
| total_timesteps    | 151552       |
| value_loss         | 0.11921152   |
-------------------------------------
-------------------------------------
| approxkl           | 0.013688641  |
| clipfrac           | 0.18457031   |
| explained_variance | -1.88        |
| fps                | 41           |
| n_updates          | 593          |
| policy_entropy     | 0.86057687   |
| policy_loss        | -0.005495554 |
| serial_timesteps   | 151808       |
| time_elapsed       | 4.76e+03     |
| total_timesteps    | 151808       |
| value_loss         | 0.0389522    |
-------------------------------------
34  Finalize

40  cut by time without progress. Steps 356  %advance 55.9  played reward 33.33  last penalty -20
Track generation: 1263..1581 -> 318-tiles track, complex 16
--------------------------------------
| approxkl           | 0.005274032   |
| clipfrac           | 0.078125      |
| explained_variance | 0.901         |
| fps                | 41            |
| n_updates          | 604           |
| policy_entropy     | 0.62375325    |
| policy_loss        | -0.0038543758 |
| serial_timesteps   | 154624        |
| time_elapsed       | 4.92e+03      |
| total_timesteps    | 154624        |
| value_loss         | 3.5960898     |
--------------------------------------
1  cut by time without progress. Steps 371  %advance 46.3  played reward 22.77  last penalty -20
-------------------------------------
| approxkl           | 0.013553007  |
| clipfrac           | 0.12597656   |
| explained_variance | 0.859        |
| fps                | 38           |
| n_updates          | 605          |
| policy_e

14  cut by time without progress. Steps 258  %advance 26.4  played reward 10.64  last penalty -20
--------------------------------------
| approxkl           | 0.0057208287  |
| clipfrac           | 0.07324219    |
| explained_variance | 0.859         |
| fps                | 36            |
| n_updates          | 618           |
| policy_entropy     | 0.47271535    |
| policy_loss        | -0.0031067382 |
| serial_timesteps   | 158208        |
| time_elapsed       | 5.01e+03      |
| total_timesteps    | 158208        |
| value_loss         | 7.579896      |
--------------------------------------
-------------------------------------
| approxkl           | 0.008102442  |
| clipfrac           | 0.123046875  |
| explained_variance | -2.38        |
| fps                | 39           |
| n_updates          | 619          |
| policy_entropy     | 0.44422537   |
| policy_loss        | -0.010314986 |
| serial_timesteps   | 158464       |
| time_elapsed       | 5.02e+03     |
| total_timeste

24  cut by time without progress. Steps 737  %advance 98.4  played reward 62.02  last penalty -20
-------------------------------------
| approxkl           | 0.0052406057 |
| clipfrac           | 0.07421875   |
| explained_variance | -0.0202      |
| fps                | 39           |
| n_updates          | 633          |
| policy_entropy     | 0.7727216    |
| policy_loss        | -0.005738085 |
| serial_timesteps   | 162048       |
| time_elapsed       | 5.11e+03     |
| total_timesteps    | 162048       |
| value_loss         | 14.338197    |
-------------------------------------
-------------------------------------
| approxkl           | 0.009086076  |
| clipfrac           | 0.12207031   |
| explained_variance | 0.434        |
| fps                | 39           |
| n_updates          | 634          |
| policy_entropy     | 0.7993995    |
| policy_loss        | -0.012393181 |
| serial_timesteps   | 162304       |
| time_elapsed       | 5.12e+03     |
| total_timesteps    | 16230

31  Finalized in Steps 750  with return=total_reward 168.03765455778745
--------------------------------------
| approxkl           | 0.001854124   |
| clipfrac           | 0.016601562   |
| explained_variance | 0.0355        |
| fps                | 40            |
| n_updates          | 649           |
| policy_entropy     | 0.8377444     |
| policy_loss        | -0.0033683055 |
| serial_timesteps   | 166144        |
| time_elapsed       | 5.22e+03      |
| total_timesteps    | 166144        |
| value_loss         | 81.89981      |
--------------------------------------
1  Finalized in Steps 502  with return=total_reward 148.74169884169876
2  Finalized in Steps 502  with return=total_reward 148.74169884169876
3  Finalized in Steps 513  with return=total_reward 147.64169884169877
4  Finalized in Steps 501  with return=total_reward 148.84169884169876
5  Finalized in Steps 540  with return=total_reward 144.94169884169878
Track generation: 864..1083 -> 219-tiles track, complex 12
1  Fina

--------------------------------------
| approxkl           | 0.02509031    |
| clipfrac           | 0.2578125     |
| explained_variance | -0.171        |
| fps                | 41            |
| n_updates          | 662           |
| policy_entropy     | 0.7501198     |
| policy_loss        | -0.0029263399 |
| serial_timesteps   | 169472        |
| time_elapsed       | 5.4e+03       |
| total_timesteps    | 169472        |
| value_loss         | 0.03893141    |
--------------------------------------
-------------------------------------
| approxkl           | 0.014480807  |
| clipfrac           | 0.16601562   |
| explained_variance | -0.638       |
| fps                | 40           |
| n_updates          | 663          |
| policy_entropy     | 0.83114994   |
| policy_loss        | -0.019863594 |
| serial_timesteps   | 169728       |
| time_elapsed       | 5.41e+03     |
| total_timesteps    | 169728       |
| value_loss         | 0.023041276  |
-------------------------------------

6  Finalized in Steps 747  with return=total_reward 166.22587094770705
------------------------------------
| approxkl           | 0.013611682 |
| clipfrac           | 0.17675781  |
| explained_variance | 0.00405     |
| fps                | 40          |
| n_updates          | 677         |
| policy_entropy     | 0.62327725  |
| policy_loss        | -0.01011909 |
| serial_timesteps   | 173312      |
| time_elapsed       | 5.5e+03     |
| total_timesteps    | 173312      |
| value_loss         | 138.60782   |
------------------------------------
-------------------------------------
| approxkl           | 0.007448026  |
| clipfrac           | 0.08300781   |
| explained_variance | 0.653        |
| fps                | 40           |
| n_updates          | 678          |
| policy_entropy     | 0.74643445   |
| policy_loss        | -0.010761791 |
| serial_timesteps   | 173568       |
| time_elapsed       | 5.5e+03      |
| total_timesteps    | 173568       |
| value_loss         | 0.07075

12  cut by time without progress. Steps 587  %advance 73.5  played reward 47.89  last penalty -20
-------------------------------------
| approxkl           | 0.0076955343 |
| clipfrac           | 0.08691406   |
| explained_variance | 0.304        |
| fps                | 40           |
| n_updates          | 693          |
| policy_entropy     | 0.6809596    |
| policy_loss        | -0.00777385  |
| serial_timesteps   | 177408       |
| time_elapsed       | 5.6e+03      |
| total_timesteps    | 177408       |
| value_loss         | 14.618947    |
-------------------------------------
-------------------------------------
| approxkl           | 0.008191788  |
| clipfrac           | 0.11035156   |
| explained_variance | 0.821        |
| fps                | 40           |
| n_updates          | 694          |
| policy_entropy     | 0.63836586   |
| policy_loss        | -0.013929114 |
| serial_timesteps   | 177664       |
| time_elapsed       | 5.61e+03     |
| total_timesteps    | 17766

-------------------------------------
| approxkl           | 0.008164858  |
| clipfrac           | 0.107421875  |
| explained_variance | 0.45         |
| fps                | 41           |
| n_updates          | 706          |
| policy_entropy     | 0.64564663   |
| policy_loss        | -0.012791833 |
| serial_timesteps   | 180736       |
| time_elapsed       | 5.81e+03     |
| total_timesteps    | 180736       |
| value_loss         | 0.082771845  |
-------------------------------------
-------------------------------------
| approxkl           | 0.009836855  |
| clipfrac           | 0.13671875   |
| explained_variance | -2.36        |
| fps                | 40           |
| n_updates          | 707          |
| policy_entropy     | 0.72703415   |
| policy_loss        | -0.008531902 |
| serial_timesteps   | 180992       |
| time_elapsed       | 5.82e+03     |
| total_timesteps    | 180992       |
| value_loss         | 0.06647286   |
-------------------------------------
18  cut by t

24  cut by time without progress. Steps 206  %advance 21.6  played reward 7.94  last penalty -20
25  cut by time without progress. Steps 100  %advance 6.9  played reward -2.63  last penalty -20
-------------------------------------
| approxkl           | 0.011900184  |
| clipfrac           | 0.13378906   |
| explained_variance | 0.669        |
| fps                | 38           |
| n_updates          | 722          |
| policy_entropy     | 0.8372662    |
| policy_loss        | -0.013583425 |
| serial_timesteps   | 184832       |
| time_elapsed       | 5.91e+03     |
| total_timesteps    | 184832       |
| value_loss         | 22.263096    |
-------------------------------------
-------------------------------------
| approxkl           | 0.010100218  |
| clipfrac           | 0.09863281   |
| explained_variance | -1.49        |
| fps                | 41           |
| n_updates          | 723          |
| policy_entropy     | 0.71180916   |
| policy_loss        | -0.008304235 |
| serial

31  Finalized in Steps 747  with return=total_reward 171.14098253060166
------------------------------------
| approxkl           | 0.010508136 |
| clipfrac           | 0.13085938  |
| explained_variance | 0.0125      |
| fps                | 40          |
| n_updates          | 738         |
| policy_entropy     | 0.8852314   |
| policy_loss        | -0.0093148  |
| serial_timesteps   | 188928      |
| time_elapsed       | 6.01e+03    |
| total_timesteps    | 188928      |
| value_loss         | 134.78214   |
------------------------------------
--------------------------------------
| approxkl           | 0.00492163    |
| clipfrac           | 0.057617188   |
| explained_variance | 0.757         |
| fps                | 40            |
| n_updates          | 739           |
| policy_entropy     | 0.8686454     |
| policy_loss        | -0.0112071205 |
| serial_timesteps   | 189184        |
| time_elapsed       | 6.02e+03      |
| total_timesteps    | 189184        |
| value_loss      

-------------------------------------
| approxkl           | 0.025493933  |
| clipfrac           | 0.18847656   |
| explained_variance | 0.553        |
| fps                | 41           |
| n_updates          | 752          |
| policy_entropy     | 0.6341352    |
| policy_loss        | -0.012282325 |
| serial_timesteps   | 192512       |
| time_elapsed       | 6.25e+03     |
| total_timesteps    | 192512       |
| value_loss         | 0.0407965    |
-------------------------------------
36  Finalized in Steps 718  with return=total_reward 171.30200800554658
--------------------------------------
| approxkl           | 0.004608479   |
| clipfrac           | 0.056640625   |
| explained_variance | 0.0434        |
| fps                | 38            |
| n_updates          | 753           |
| policy_entropy     | 0.72785074    |
| policy_loss        | -0.0025048577 |
| serial_timesteps   | 192768        |
| time_elapsed       | 6.25e+03      |
| total_timesteps    | 192768        |
| val

2  Finalized in Steps 622  with return=total_reward 176.5470767744651
--------------------------------------
| approxkl           | 0.014884351   |
| clipfrac           | 0.18164062    |
| explained_variance | 0.0315        |
| fps                | 40            |
| n_updates          | 768           |
| policy_entropy     | 0.6770319     |
| policy_loss        | -0.0010400893 |
| serial_timesteps   | 196608        |
| time_elapsed       | 6.35e+03      |
| total_timesteps    | 196608        |
| value_loss         | 129.66066     |
--------------------------------------
-------------------------------------
| approxkl           | 0.005928684  |
| clipfrac           | 0.064453125  |
| explained_variance | 0.08         |
| fps                | 41           |
| n_updates          | 769          |
| policy_entropy     | 0.7023469    |
| policy_loss        | -0.008170229 |
| serial_timesteps   | 196864       |
| time_elapsed       | 6.35e+03     |
| total_timesteps    | 196864       |
| val

10  Finalized in Steps 621  with return=total_reward 176.4495962282245
-------------------------------------
| approxkl           | 0.009759563  |
| clipfrac           | 0.1171875    |
| explained_variance | 0.0508       |
| fps                | 40           |
| n_updates          | 784          |
| policy_entropy     | 0.5647086    |
| policy_loss        | 0.0011531585 |
| serial_timesteps   | 200704       |
| time_elapsed       | 6.45e+03     |
| total_timesteps    | 200704       |
| value_loss         | 123.09936    |
-------------------------------------
-------------------------------------
| approxkl           | 0.01065471   |
| clipfrac           | 0.119140625  |
| explained_variance | -0.346       |
| fps                | 40           |
| n_updates          | 785          |
| policy_entropy     | 0.5434929    |
| policy_loss        | -0.013674167 |
| serial_timesteps   | 200960       |
| time_elapsed       | 6.45e+03     |
| total_timesteps    | 200960       |
| value_loss     

17  Finalized in Steps 560  with return=total_reward 178.2309134519063
1  Finalized in Steps 582  with return=total_reward 140.865517241379
2  Finalized in Steps 582  with return=total_reward 140.865517241379
3  cut by time without progress. Steps 712  %advance 98.9  played reward 27.17  last penalty -20
4  cut by time without progress. Steps 657  %advance 98.9  played reward 32.67  last penalty -20
5  cut by time without progress. Steps 412  %advance 56.8  played reward 15.1  last penalty -20
Track generation: 993..1246 -> 253-tiles track, complex 12
1  retry to generate new track (normal below 20, limit 50)
Track generation: 942..1181 -> 239-tiles track, complex 12
1  Finalized in Steps 471  with return=total_reward 151.73949579831893
2  Finalized in Steps 466  with return=total_reward 152.23949579831896
3  cut by time without progress. Steps 414  %advance 71.4  played reward 29.28  last penalty -20
4  Finalized in Steps 458  with return=total_reward 153.03949579831897
5  Finalized i

--------------------------------------
| approxkl           | 0.007171079   |
| clipfrac           | 0.07128906    |
| explained_variance | 0.796         |
| fps                | 43            |
| n_updates          | 813           |
| policy_entropy     | 0.70964485    |
| policy_loss        | -0.0101648765 |
| serial_timesteps   | 208128        |
| time_elapsed       | 6.76e+03      |
| total_timesteps    | 208128        |
| value_loss         | 3.4398987     |
--------------------------------------
23  cut by time without progress. Steps 487  %advance 77.8  played reward 52.91  last penalty -20
-------------------------------------
| approxkl           | 0.016205054  |
| clipfrac           | 0.1640625    |
| explained_variance | 0.724        |
| fps                | 44           |
| n_updates          | 814          |
| policy_entropy     | 0.58960795   |
| policy_loss        | -0.020118399 |
| serial_timesteps   | 208384       |
| time_elapsed       | 6.77e+03     |
| total_timeste

-------------------------------------
| approxkl           | 0.019345056  |
| clipfrac           | 0.19824219   |
| explained_variance | 0.704        |
| fps                | 42           |
| n_updates          | 828          |
| policy_entropy     | 0.52602303   |
| policy_loss        | -0.009514229 |
| serial_timesteps   | 211968       |
| time_elapsed       | 6.86e+03     |
| total_timesteps    | 211968       |
| value_loss         | 0.5403865    |
-------------------------------------
33  cut by time without progress. Steps 580  %advance 98.2  played reward 70.98  last penalty -20
-------------------------------------
| approxkl           | 0.0063687386 |
| clipfrac           | 0.08886719   |
| explained_variance | -0.0852      |
| fps                | 43           |
| n_updates          | 829          |
| policy_entropy     | 0.5435819    |
| policy_loss        | -0.008193301 |
| serial_timesteps   | 212224       |
| time_elapsed       | 6.86e+03     |
| total_timesteps    | 21222

5  cut by time without progress. Steps 206  %advance 18.2  played reward -0.45  last penalty -20
-------------------------------------
| approxkl           | 0.008438535  |
| clipfrac           | 0.12402344   |
| explained_variance | 0.869        |
| fps                | 40           |
| n_updates          | 843          |
| policy_entropy     | 0.5845675    |
| policy_loss        | -0.009459348 |
| serial_timesteps   | 215808       |
| time_elapsed       | 6.95e+03     |
| total_timesteps    | 215808       |
| value_loss         | 10.883862    |
-------------------------------------
------------------------------------
| approxkl           | 0.024894316 |
| clipfrac           | 0.17285156  |
| explained_variance | -0.305      |
| fps                | 38          |
| n_updates          | 844         |
| policy_entropy     | 0.6931777   |
| policy_loss        | -0.02061168 |
| serial_timesteps   | 216064      |
| time_elapsed       | 6.95e+03    |
| total_timesteps    | 216064      |
| 

-------------------------------------
| approxkl           | 0.012205603  |
| clipfrac           | 0.17285156   |
| explained_variance | 0.0894       |
| fps                | 40           |
| n_updates          | 856          |
| policy_entropy     | 0.66208553   |
| policy_loss        | -0.013746248 |
| serial_timesteps   | 219136       |
| time_elapsed       | 7.18e+03     |
| total_timesteps    | 219136       |
| value_loss         | 0.03093282   |
-------------------------------------
11  Finalized in Steps 603  with return=total_reward 174.50879418768153
-------------------------------------
| approxkl           | 0.05688161   |
| clipfrac           | 0.31445312   |
| explained_variance | 0.026        |
| fps                | 41           |
| n_updates          | 857          |
| policy_entropy     | 0.515086     |
| policy_loss        | -0.012191024 |
| serial_timesteps   | 219392       |
| time_elapsed       | 7.18e+03     |
| total_timesteps    | 219392       |
| value_loss    

17  Finalized in Steps 627  with return=total_reward 175.07582805758832
-------------------------------------
| approxkl           | 0.008348719  |
| clipfrac           | 0.103515625  |
| explained_variance | 0.0517       |
| fps                | 40           |
| n_updates          | 872          |
| policy_entropy     | 0.48067194   |
| policy_loss        | 0.0012410551 |
| serial_timesteps   | 223232       |
| time_elapsed       | 7.28e+03     |
| total_timesteps    | 223232       |
| value_loss         | 122.216324   |
-------------------------------------
------------------------------------
| approxkl           | 0.009995224 |
| clipfrac           | 0.09863281  |
| explained_variance | 0.66        |
| fps                | 41          |
| n_updates          | 873         |
| policy_entropy     | 0.5909001   |
| policy_loss        | -0.00781358 |
| serial_timesteps   | 223488      |
| time_elapsed       | 7.28e+03    |
| total_timesteps    | 223488      |
| value_loss         | 0.08

24  cut by time without progress. Steps 377  %advance 44.2  played reward 20.38  last penalty -20
-------------------------------------
| approxkl           | 0.012662539  |
| clipfrac           | 0.13769531   |
| explained_variance | 0.702        |
| fps                | 42           |
| n_updates          | 888          |
| policy_entropy     | 0.5240374    |
| policy_loss        | -0.011684133 |
| serial_timesteps   | 227328       |
| time_elapsed       | 7.38e+03     |
| total_timesteps    | 227328       |
| value_loss         | 9.747815     |
-------------------------------------
25  cut by time without progress. Steps 283  %advance 30.9  played reward 11.03  last penalty -20
-------------------------------------
| approxkl           | 0.025039677  |
| clipfrac           | 0.203125     |
| explained_variance | 0.857        |
| fps                | 41           |
| n_updates          | 889          |
| policy_entropy     | 0.49229553   |
| policy_loss        | -0.015391761 |
| seri

30  Finalized in Steps 670  with return=total_reward 174.2092389964152
--------------------------------------
| approxkl           | 0.014347287   |
| clipfrac           | 0.14355469    |
| explained_variance | 0.0327        |
| fps                | 42            |
| n_updates          | 901           |
| policy_entropy     | 0.5371594     |
| policy_loss        | -0.0020383182 |
| serial_timesteps   | 230656        |
| time_elapsed       | 7.61e+03      |
| total_timesteps    | 230656        |
| value_loss         | 27.583483     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0064884485  |
| clipfrac           | 0.048828125   |
| explained_variance | 0.646         |
| fps                | 41            |
| n_updates          | 902           |
| policy_entropy     | 0.56743085    |
| policy_loss        | -0.0046935263 |
| serial_timesteps   | 230912        |
| time_elapsed       | 7.62e+03      |
| total_timesteps    | 230912   

36  Finalized in Steps 711  with return=total_reward 171.90327544058607
------------------------------------
| approxkl           | 0.010225345 |
| clipfrac           | 0.09667969  |
| explained_variance | 0.0507      |
| fps                | 40          |
| n_updates          | 917         |
| policy_entropy     | 0.5463708   |
| policy_loss        | 0.002186982 |
| serial_timesteps   | 234752      |
| time_elapsed       | 7.71e+03    |
| total_timesteps    | 234752      |
| value_loss         | 128.63866   |
------------------------------------
-------------------------------------
| approxkl           | 0.008149607  |
| clipfrac           | 0.123046875  |
| explained_variance | 0.261        |
| fps                | 36           |
| n_updates          | 918          |
| policy_entropy     | 0.6371521    |
| policy_loss        | -0.009099663 |
| serial_timesteps   | 235008       |
| time_elapsed       | 7.72e+03     |
| total_timesteps    | 235008       |
| value_loss         | 0.0496

------------------------------------
| approxkl           | 0.007641207 |
| clipfrac           | 0.083984375 |
| explained_variance | 0.701       |
| fps                | 42          |
| n_updates          | 933         |
| policy_entropy     | 0.61096823  |
| policy_loss        | 0.003343213 |
| serial_timesteps   | 238848      |
| time_elapsed       | 7.81e+03    |
| total_timesteps    | 238848      |
| value_loss         | 1.8604497   |
------------------------------------
3  cut by time without progress. Steps 364  %advance 48.1  played reward 27.25  last penalty -20
-------------------------------------
| approxkl           | 0.0077003227 |
| clipfrac           | 0.072265625  |
| explained_variance | 0.795        |
| fps                | 42           |
| n_updates          | 934          |
| policy_entropy     | 0.5572649    |
| policy_loss        | -0.008751893 |
| serial_timesteps   | 239104       |
| time_elapsed       | 7.82e+03     |
| total_timesteps    | 239104       |
| va

-------------------------------------
| approxkl           | 0.006581408  |
| clipfrac           | 0.095703125  |
| explained_variance | -0.0493      |
| fps                | 38           |
| n_updates          | 949          |
| policy_entropy     | 0.65621495   |
| policy_loss        | -0.010630509 |
| serial_timesteps   | 242944       |
| time_elapsed       | 7.91e+03     |
| total_timesteps    | 242944       |
| value_loss         | 0.018223226  |
-------------------------------------
1  Finalized in Steps 606  with return=total_reward 138.3888888888886
2  Finalized in Steps 606  with return=total_reward 138.3888888888886
3  Finalized in Steps 630  with return=total_reward 135.9888888888886
4  Finalized in Steps 596  with return=total_reward 139.38888888888863
5  Finalized in Steps 604  with return=total_reward 138.58888888888862
Track generation: 1187..1487 -> 300-tiles track, complex 12
1  Finalized in Steps 660  with return=total_reward 133.09665551839427
2  Finalized in Steps 6

-------------------------------------
| approxkl           | 0.0060126167 |
| clipfrac           | 0.095703125  |
| explained_variance | -0.585       |
| fps                | 38           |
| n_updates          | 963          |
| policy_entropy     | 0.59434736   |
| policy_loss        | -0.006649752 |
| serial_timesteps   | 246528       |
| time_elapsed       | 8.15e+03     |
| total_timesteps    | 246528       |
| value_loss         | 0.040594712  |
-------------------------------------
-------------------------------------
| approxkl           | 0.008267656  |
| clipfrac           | 0.1015625    |
| explained_variance | 0.607        |
| fps                | 41           |
| n_updates          | 964          |
| policy_entropy     | 0.65774244   |
| policy_loss        | -0.009428497 |
| serial_timesteps   | 246784       |
| time_elapsed       | 8.16e+03     |
| total_timesteps    | 246784       |
| value_loss         | 0.021962423  |
-------------------------------------
14  Finalize

19  Finalized in Steps 718  with return=total_reward 173.37029046587995
--------------------------------------
| approxkl           | 0.0061029242  |
| clipfrac           | 0.0859375     |
| explained_variance | 0.0748        |
| fps                | 41            |
| n_updates          | 979           |
| policy_entropy     | 0.3959881     |
| policy_loss        | -0.0010101837 |
| serial_timesteps   | 250624        |
| time_elapsed       | 8.25e+03      |
| total_timesteps    | 250624        |
| value_loss         | 125.00241     |
--------------------------------------
--------------------------------------
| approxkl           | 0.012515646   |
| clipfrac           | 0.078125      |
| explained_variance | 0.429         |
| fps                | 41            |
| n_updates          | 980           |
| policy_entropy     | 0.42554224    |
| policy_loss        | -0.0075946236 |
| serial_timesteps   | 250880        |
| time_elapsed       | 8.26e+03      |
| total_timesteps    | 250880  

--------------------------------------
| approxkl           | 0.009647422   |
| clipfrac           | 0.09472656    |
| explained_variance | 0.371         |
| fps                | 42            |
| n_updates          | 995           |
| policy_entropy     | 0.5530812     |
| policy_loss        | -0.0011547611 |
| serial_timesteps   | 254720        |
| time_elapsed       | 8.35e+03      |
| total_timesteps    | 254720        |
| value_loss         | 0.0709304     |
--------------------------------------
25  Finalized in Steps 701  with return=total_reward 174.52650571920242
------------------------------------
| approxkl           | 0.01209136  |
| clipfrac           | 0.17089844  |
| explained_variance | 0.122       |
| fps                | 42          |
| n_updates          | 996         |
| policy_entropy     | 0.6407254   |
| policy_loss        | 0.002174265 |
| serial_timesteps   | 254976      |
| time_elapsed       | 8.35e+03    |
| total_timesteps    | 254976      |
| value_loss  

In [15]:
#import pickle
if new_mod:
    file = root+'c{:d}_f{:d}_s{:d}_{}_a{:d}__u{:d}_e{:d}_p{}_bs{:d}'.format(
        game_color,fpst,skip,indicators,len(actions),use,ept,patience,batch_size2)
    model2.save(file, cloudpickle=True)
    param_list=model2.get_parameter_list()
else:
    model.save(file+'II', cloudpickle=True)
    param_list=model.get_parameter_list()


In [16]:
env2.close()
env_test.close()

In [27]:
## Enjoy last trained policy

if agent=='CarRacing-v2':  #create an independent test environment
    env3 = gym.make(agent, seed=None, 
        game_color=game_color,
        indicators = True,
        frames_per_state = fpst,
        skip_frames = skip,   
#        discre = actions,
        use_track = 2,       
        episodes_per_track = 1,  
        tr_complexity = 12, 
        patience = 5.0,
        off-track = 5.0 )
else:
    env3 = gym.make(agent)

env3 = DummyVecEnv([lambda: env3])
obs = env3.reset()
print(obs.shape)        

done = False
pasos = 0
_states=None

while not done and pasos<1002:
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, info = env3.step(action)
    env3.render()
    pasos+=1
    
env3.close()
print()
print(reward, done, pasos, info)

Track generation: 1164..1458 -> 294-tiles track, complex 12
(1, 96, 96, 4)
1  Finalized in Steps 644  with return=total_reward 134.67610921501688

[100.] [ True] 644 [{'terminal_observation': array([[[176.549, 176.549, 161.874, 161.874],
        [176.549, 176.549, 161.874, 161.874],
        [176.549, 176.549, 161.874, 161.874],
        ...,
        [161.874, 161.874, 161.874, 161.874],
        [161.874, 161.874, 161.874, 161.874],
        [161.874, 161.874, 161.874, 161.874]],

       [[176.549, 176.549, 161.874, 161.874],
        [176.549, 176.549, 161.874, 161.874],
        [176.549, 176.549, 161.874, 161.874],
        ...,
        [161.874, 161.874, 161.874, 161.874],
        [161.874, 161.874, 161.874, 161.874],
        [161.874, 161.874, 161.874, 161.874]],

       [[176.549, 176.549, 161.874, 161.874],
        [176.549, 176.549, 161.874, 161.874],
        [176.549, 176.549, 161.874, 161.874],
        ...,
        [161.874, 161.874, 161.874, 161.874],
        [161.874, 161.874, 16

In [28]:
## Enjoy best eval_policy

obs = env3.reset()
print(obs.shape)        

## Load bestmodel from eval
#if not isinstance(model_test, PPO2):
model_test = PPO2.load(eval_log+'best_model', env3)

done = False
pasos = 0
_states=None

while not done and pasos<1002:
    action, _states = model_test.predict(obs, deterministic=True)
    obs, reward, done, info = env3.step(action)
    env3.render()
    pasos+=1
    
env3.close()
print()
print(reward, done, pasos)
print(action, _states)

Track generation: 1084..1358 -> 274-tiles track, complex 12
(1, 96, 96, 4)
0  Finalized in Steps 544  with return=total_reward 144.60109890109854

[100.] [ True] 544
[3] None


In [19]:
model_test.save(file+'_evalbest', cloudpickle=True)

In [20]:
env2.close()

In [21]:
env3.close()

In [22]:
env_test.close()

In [23]:
print(action, _states)

[0] None


In [24]:
obs.shape

(1, 96, 96, 4)