# Halite 

## 

In [354]:
import gym
import time
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.insert(0, "../Environment/")
import halite_env as Env

In [365]:
from importlib import reload
reload(Env)

<module 'halite_env' from '../Environment/halite_env.py'>

In [366]:
num_players = 1
map_size = 7 # 7 x 7 map

In [367]:
HEnv = Env.HaliteEnv(num_players, map_size)

In [368]:
def Single_ship_env_initialization(map_size, num_players = 1):
    HEnv = Env.HaliteEnv(num_players, map_size)
    return Env.SingleShipEnv(HEnv, 1, map_size)

def check_position(env):
    print(env.state[:,:,:])


In [369]:
env = Single_ship_env_initialization(map_size)
env.reset()
check_position(env)

[[[227   0   0   0   0]
  [126   0   0   0   0]
  [889   0   0   0   0]
  [585   0   0   0   0]
  [ 34   0   0   0   0]
  [422   0   0   0   0]
  [782   0   0   0   0]]

 [[596   0   0   0   0]
  [709   0   0   0   0]
  [949   0   0   0   0]
  [822   0   0   0   0]
  [557   0   0   0   0]
  [ 57   0   0   0   0]
  [338   0   0   0   0]]

 [[ 91   0   0   0   0]
  [257   0   0   0   0]
  [311   0   0   0   0]
  [331   0   0   0   0]
  [174   0   0   0   0]
  [430   0   0   0   0]
  [767   0   0   0   0]]

 [[353   0   0   0   0]
  [988   0   0   0   0]
  [249   0   0   0   0]
  [  0   1   0   1   1]
  [102   0   0   0   0]
  [638   0   0   0   0]
  [200   0   0   0   0]]

 [[453   0   0   0   0]
  [ 50   0   0   0   0]
  [366   0   0   0   0]
  [ 62   0   0   0   0]
  [771   0   0   0   0]
  [614   0   0   0   0]
  [158   0   0   0   0]]

 [[271   0   0   0   0]
  [580   0   0   0   0]
  [599   0   0   0   0]
  [473   0   0   0   0]
  [763   0   0   0   0]
  [581   0   0   0   0]
  [211

In [370]:
env.step(1)
check_position(env)


[0]
[[[227   0   0   0   0]
  [126   0   0   0   0]
  [889   0   0   0   0]
  [585   0   0   0   0]
  [ 34   0   0   0   0]
  [422   0   0   0   0]
  [782   0   0   0   0]]

 [[596   0   0   0   0]
  [709   0   0   0   0]
  [949   0   0   0   0]
  [822   0   0   0   0]
  [557   0   0   0   0]
  [ 57   0   0   0   0]
  [338   0   0   0   0]]

 [[ 91   0   0   0   0]
  [257   0   0   0   0]
  [311   0   0   0   0]
  [331   0   0   0   0]
  [174   0   0   0   0]
  [430   0   0   0   0]
  [767   0   0   0   0]]

 [[353   0   0   0   0]
  [988   0   0   0   0]
  [249   0   0   0   0]
  [  0   0   0   1   0]
  [102   0   0   0   0]
  [638   0   0   0   0]
  [200   0   0   0   0]]

 [[453   0   0   0   0]
  [ 50   0   0   0   0]
  [366   0   0   0   0]
  [ 62   1   0   0   1]
  [771   0   0   0   0]
  [614   0   0   0   0]
  [158   0   0   0   0]]

 [[271   0   0   0   0]
  [580   0   0   0   0]
  [599   0   0   0   0]
  [473   0   0   0   0]
  [763   0   0   0   0]
  [581   0   0   0   0]
  

## Baselines

In [371]:
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines.common import set_global_seeds
from stable_baselines import PPO2, ACKTR


MULTI = False
obs_type = "raw"
n_timesteps = 250
map_size = 7

In [372]:
def make_env(map_size, rank, seed=1234):
    """
    Utility function for multiprocessed env.

    :param env_id: (str) the environment ID
    :param rank: (int) index of the subprocess
    :param seed: (int) the inital seed for RNG
    """

    def _init():
        env = Single_ship_env_initialization(map_size)
        env.seed(seed + rank)
        return env

    set_global_seeds(seed)
    return _init

def evaluate(model, num_steps=1000):
    """
    Evaluate a RL agent
    :param model: (BaseRLModel object) the RL Agent
    :param num_steps: (int) number of timesteps to evaluate it
    :return: (float) Mean reward
    """
    episode_rewards = [[0.0] for _ in range(env.num_envs)]
    obs = env.reset()
    for i in range(num_steps):
        # _states are only useful when using LSTM policies
        actions, _states = model.predict(obs)
        # here, action, rewards and dones are arrays
        # because we are using vectorized env
        obs, rewards, dones, info = env.step(actions)

        # Stats
        for i in range(env.num_envs):
            episode_rewards[i][-1] += rewards[i]
            if dones[i]:
                episode_rewards[i].append(0.0)

    mean_rewards = [0.0 for _ in range(env.num_envs)]
    n_episodes = 0
    for i in range(env.num_envs):
        mean_rewards[i] = np.mean(episode_rewards[i])
        n_episodes += len(episode_rewards[i])

    # Compute mean reward
    mean_reward = round(np.mean(mean_rewards), 1)
    print("Mean reward:", mean_reward, "Num episodes:", n_episodes)

    return mean_reward





if MULTI:
    num_cpu = 32  # Number of processes to use
    # Create the vectorized environment
    env = SubprocVecEnv([make_env(map_size) for i in range(num_cpu)])
else:
    # env = SingleSnek(obs_type="rgb", n_food=3)
    env = Single_ship_env_initialization(map_size)
    # The algorithms require a vectorized environment to run
    env = DummyVecEnv([lambda: env])


ob = env.reset()



In [373]:
model = PPO2(MlpPolicy, env, verbose=1, tensorboard_log="/tmp/halite/")

In [374]:
mean_reward_before_train = evaluate(model, num_steps=1000)

[0]
[0]
[0]
[0]
[0]
[55]
[55]
[55]
[96]
[127]
[150]
[167]
[180]
[190]
[197]
[203]
[203]
[278]
[334]
[376]
[408]
[432]
[450]
[463]
[463]
[557]
[628]
[681]
[721]
[751]
[773]
[790]
[802]
[802]
[802]
[802]
[846]
[846]
[846]
[879]
[904]
[922]
[936]
[946]
[954]
[960]
[964]
[964]
[1000]
[1000]
[1000]
[1000]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[4]
[7]
[9]
[11]
[12]
[13]
[14]
[14]
[14]
[14]
[24]
[32]
[38]
[38]
[48]
[55]
[60]
[64]
[67]
[69]
[69]
[73]
[73]
[169]
[241]
[295]
[335]
[365]
[388]
[405]
[418]
[428]
[435]
[435]
[671]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[4]
[7]
[9]
[11]
[12]
[12]
[12]
[14]
[14]
[20]
[24]
[27]
[29]
[29]
[207]
[340]
[440]
[515]
[571]
[613]
[645]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[1]
[1]
[3]
[4]
[5]
[6]
[6]
[6]
[6]
[6]
[6]
[6]
[7]
[7]
[7]
[7]
[7]
[7]
[7]
[7]
[7]
[7]
[7]
[7]
[7]
[7]
[7]
[7]
[7]
[9]
[11]
[11]
[13]
[14]
[15]
[16]
[16]
[16]
[16]
[16]
[16]
[16]
[16]
[16]
[16]
[16]
[16]
[16]
[16]
[16]
[16]


In [352]:
if MULTI:
    # Multiprocessed RL Training
    start_time = time.time()
    model.learn(n_timesteps)
    total_time_multi = time.time() - start_time

    print(
        "Took {:.2f}s for multiprocessed version - {:.2f} FPS".format(
            total_time_multi, n_timesteps / total_time_multi
        )
    )

    # Single Process RL Training
    SnakeEnv = SnakeWrapper("raw", n_food=3)
    single_process_model = PPO2(MlpPolicy, DummyVecEnv([lambda: SnakeEnv]), verbose=0)

    start_time = time.time()
    single_process_model.learn(n_timesteps)
    total_time_single = time.time() - start_time

    print(
        "Took {:.2f}s for single process version - {:.2f} FPS".format(
            total_time_single, n_timesteps / total_time_single
        )
    )

    print(
        "Multiprocessed training is {:.2f}x faster!".format(
            total_time_single / total_time_multi
        )
    )

    mean_reward = evaluate(model, num_steps=10000)
else:
    start_time = time.time()
    model.learn(n_timesteps)
    total_time_single = time.time() - start_time

    print(
        "Took {:.2f}s for training - {:.2f} FPS".format(
            total_time_single, n_timesteps / total_time_single
        )
    )

    mean_reward = evaluate(model, num_steps=10000)


--------------------------------------
| approxkl           | 3.5642104e-06 |
| clipfrac           | 0.0           |
| explained_variance | nan           |
| fps                | 155           |
| nupdates           | 1             |
| policy_entropy     | 1.4204379     |
| policy_loss        | 0.0           |
| serial_timesteps   | 128           |
| time_elapsed       | 3.34e-06      |
| total_timesteps    | 144           |
| value_loss         | 1.744313      |
--------------------------------------
--------------------------------------
| approxkl           | 4.4531016e-06 |
| clipfrac           | 0.0           |
| explained_variance | nan           |
| fps                | 504           |
| nupdates           | 2             |
| policy_entropy     | 1.4239691     |
| policy_loss        | 0.0           |
| serial_timesteps   | 256           |
| time_elapsed       | 0.83          |
| total_timesteps    | 288           |
| value_loss         | 0.79552406    |
-------------------------

--------------------------------------
| approxkl           | 8.469478e-06  |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 479           |
| nupdates           | 18            |
| policy_entropy     | 1.4901993     |
| policy_loss        | 0.0           |
| serial_timesteps   | 2304          |
| time_elapsed       | 5.06          |
| total_timesteps    | 2592          |
| value_loss         | 1.9722071e-13 |
--------------------------------------
--------------------------------------
| approxkl           | 5.0225863e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 489           |
| nupdates           | 19            |
| policy_entropy     | 1.4942918     |
| policy_loss        | 0.0           |
| serial_timesteps   | 2432          |
| time_elapsed       | 5.33          |
| total_timesteps    | 2736          |
| value_loss         | 4.089437e-14  |
-------------------------

--------------------------------------
| approxkl           | 4.739013e-06  |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 515           |
| nupdates           | 35            |
| policy_entropy     | 1.5592299     |
| policy_loss        | 0.0           |
| serial_timesteps   | 4480          |
| time_elapsed       | 9.54          |
| total_timesteps    | 5040          |
| value_loss         | 4.1633363e-16 |
--------------------------------------
--------------------------------------
| approxkl           | 4.3578652e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 476           |
| nupdates           | 36            |
| policy_entropy     | 1.5632675     |
| policy_loss        | 0.0           |
| serial_timesteps   | 4608          |
| time_elapsed       | 9.79          |
| total_timesteps    | 5184          |
| value_loss         | 4.1633363e-16 |
-------------------------

--------------------------------------
| approxkl           | 2.9064215e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 493           |
| nupdates           | 52            |
| policy_entropy     | 1.627708      |
| policy_loss        | 0.0           |
| serial_timesteps   | 6656          |
| time_elapsed       | 14            |
| total_timesteps    | 7488          |
| value_loss         | 4.1633363e-16 |
--------------------------------------
--------------------------------------
| approxkl           | 3.5553123e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 535           |
| nupdates           | 53            |
| policy_entropy     | 1.6317278     |
| policy_loss        | 0.0           |
| serial_timesteps   | 6784          |
| time_elapsed       | 14.2          |
| total_timesteps    | 7632          |
| value_loss         | 4.1633363e-16 |
-------------------------

--------------------------------------
| approxkl           | 5.9805516e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 482           |
| nupdates           | 69            |
| policy_entropy     | 1.6959698     |
| policy_loss        | 0.0           |
| serial_timesteps   | 8832          |
| time_elapsed       | 18.8          |
| total_timesteps    | 9936          |
| value_loss         | 4.1633363e-16 |
--------------------------------------
--------------------------------------
| approxkl           | 6.484434e-06  |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 445           |
| nupdates           | 70            |
| policy_entropy     | 1.6999812     |
| policy_loss        | 0.0           |
| serial_timesteps   | 8960          |
| time_elapsed       | 19            |
| total_timesteps    | 10080         |
| value_loss         | 4.1633363e-16 |
-------------------------

--------------------------------------
| approxkl           | 3.3730776e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 498           |
| nupdates           | 86            |
| policy_entropy     | 1.764119      |
| policy_loss        | 0.0           |
| serial_timesteps   | 11008         |
| time_elapsed       | 23.6          |
| total_timesteps    | 12384         |
| value_loss         | 3.8857806e-16 |
--------------------------------------
--------------------------------------
| approxkl           | 4.5499314e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 505           |
| nupdates           | 87            |
| policy_entropy     | 1.7681254     |
| policy_loss        | 0.0           |
| serial_timesteps   | 11136         |
| time_elapsed       | 23.9          |
| total_timesteps    | 12528         |
| value_loss         | 4.1633363e-16 |
-------------------------

--------------------------------------
| approxkl           | 3.5240719e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 521           |
| nupdates           | 103           |
| policy_entropy     | 1.8322011     |
| policy_loss        | 0.0           |
| serial_timesteps   | 13184         |
| time_elapsed       | 28.2          |
| total_timesteps    | 14832         |
| value_loss         | 4.1633363e-16 |
--------------------------------------
--------------------------------------
| approxkl           | 4.2644215e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 501           |
| nupdates           | 104           |
| policy_entropy     | 1.8362045     |
| policy_loss        | 0.0           |
| serial_timesteps   | 13312         |
| time_elapsed       | 28.5          |
| total_timesteps    | 14976         |
| value_loss         | 4.1633363e-16 |
-------------------------

--------------------------------------
| approxkl           | 6.273811e-06  |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 490           |
| nupdates           | 120           |
| policy_entropy     | 1.9002398     |
| policy_loss        | 0.0           |
| serial_timesteps   | 15360         |
| time_elapsed       | 32.8          |
| total_timesteps    | 17280         |
| value_loss         | 4.1633363e-16 |
--------------------------------------
--------------------------------------
| approxkl           | 5.1501515e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 521           |
| nupdates           | 121           |
| policy_entropy     | 1.904241      |
| policy_loss        | 0.0           |
| serial_timesteps   | 15488         |
| time_elapsed       | 33            |
| total_timesteps    | 17424         |
| value_loss         | 4.1633363e-16 |
-------------------------

--------------------------------------
| approxkl           | 3.3763988e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 514           |
| nupdates           | 137           |
| policy_entropy     | 1.968249      |
| policy_loss        | 0.0           |
| serial_timesteps   | 17536         |
| time_elapsed       | 37.3          |
| total_timesteps    | 19728         |
| value_loss         | 4.1633363e-16 |
--------------------------------------
--------------------------------------
| approxkl           | 4.370772e-06  |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 503           |
| nupdates           | 138           |
| policy_entropy     | 1.9722487     |
| policy_loss        | 0.0           |
| serial_timesteps   | 17664         |
| time_elapsed       | 37.5          |
| total_timesteps    | 19872         |
| value_loss         | 4.1633363e-16 |
-------------------------

-------------------------------------
| approxkl           | 4.119275e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 457          |
| nupdates           | 154          |
| policy_entropy     | 2.0362377    |
| policy_loss        | 0.0          |
| serial_timesteps   | 19712        |
| time_elapsed       | 41.8         |
| total_timesteps    | 22176        |
| value_loss         | 4.440892e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 4.6132996e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 530           |
| nupdates           | 155           |
| policy_entropy     | 2.0402365     |
| policy_loss        | 0.0           |
| serial_timesteps   | 19840         |
| time_elapsed       | 42.1          |
| total_timesteps    | 22320         |
| value_loss         | 4.1633363e-16 |
--------------------------------------

-------------------------------------
| approxkl           | 3.22212e-06  |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 537          |
| nupdates           | 171          |
| policy_entropy     | 2.1042128    |
| policy_loss        | 0.0          |
| serial_timesteps   | 21888        |
| time_elapsed       | 46.2         |
| total_timesteps    | 24624        |
| value_loss         | 4.440892e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 4.2585116e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 527           |
| nupdates           | 172           |
| policy_entropy     | 2.1082106     |
| policy_loss        | 0.0           |
| serial_timesteps   | 22016         |
| time_elapsed       | 46.5          |
| total_timesteps    | 24768         |
| value_loss         | 4.1633363e-16 |
--------------------------------------

--------------------------------------
| approxkl           | 2.8971374e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 457           |
| nupdates           | 188           |
| policy_entropy     | 2.1721754     |
| policy_loss        | 0.0           |
| serial_timesteps   | 24064         |
| time_elapsed       | 50.9          |
| total_timesteps    | 27072         |
| value_loss         | 4.440892e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 4.1533135e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 487           |
| nupdates           | 189           |
| policy_entropy     | 2.1761732     |
| policy_loss        | 0.0           |
| serial_timesteps   | 24192         |
| time_elapsed       | 51.2          |
| total_timesteps    | 27216         |
| value_loss         | 4.440892e-16  |
-------------------------

--------------------------------------
| approxkl           | 3.2742369e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 411           |
| nupdates           | 205           |
| policy_entropy     | 2.2401323     |
| policy_loss        | 0.0           |
| serial_timesteps   | 26240         |
| time_elapsed       | 55.9          |
| total_timesteps    | 29520         |
| value_loss         | 4.440892e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 3.2832065e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 374           |
| nupdates           | 206           |
| policy_entropy     | 2.2441297     |
| policy_loss        | 0.0           |
| serial_timesteps   | 26368         |
| time_elapsed       | 56.2          |
| total_timesteps    | 29664         |
| value_loss         | 4.440892e-16  |
-------------------------

-------------------------------------
| approxkl           | 5.279625e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 482          |
| nupdates           | 222          |
| policy_entropy     | 2.3080792    |
| policy_loss        | 0.0          |
| serial_timesteps   | 28416        |
| time_elapsed       | 61.2         |
| total_timesteps    | 31968        |
| value_loss         | 4.440892e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 4.6328287e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 473           |
| nupdates           | 223           |
| policy_entropy     | 2.3120756     |
| policy_loss        | 0.0           |
| serial_timesteps   | 28544         |
| time_elapsed       | 61.4          |
| total_timesteps    | 32112         |
| value_loss         | 4.440892e-16  |
--------------------------------------

-------------------------------------
| approxkl           | 5.611772e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 484          |
| nupdates           | 239          |
| policy_entropy     | 2.3760252    |
| policy_loss        | 0.0          |
| serial_timesteps   | 30592        |
| time_elapsed       | 66.3         |
| total_timesteps    | 34416        |
| value_loss         | 4.440892e-16 |
-------------------------------------
-------------------------------------
| approxkl           | 5.103979e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 485          |
| nupdates           | 240          |
| policy_entropy     | 2.3800225    |
| policy_loss        | 0.0          |
| serial_timesteps   | 30720        |
| time_elapsed       | 66.5         |
| total_timesteps    | 34560        |
| value_loss         | 4.440892e-16 |
-------------------------------------
------------

--------------------------------------
| approxkl           | 3.5473722e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 492           |
| nupdates           | 256           |
| policy_entropy     | 2.443966      |
| policy_loss        | 0.0           |
| serial_timesteps   | 32768         |
| time_elapsed       | 70.9          |
| total_timesteps    | 36864         |
| value_loss         | 4.440892e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 4.5802003e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 473           |
| nupdates           | 257           |
| policy_entropy     | 2.4479618     |
| policy_loss        | 0.0           |
| serial_timesteps   | 32896         |
| time_elapsed       | 71.1          |
| total_timesteps    | 37008         |
| value_loss         | 4.440892e-16  |
-------------------------

--------------------------------------
| approxkl           | 2.6457253e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 254           |
| nupdates           | 273           |
| policy_entropy     | 2.5118961     |
| policy_loss        | 0.0           |
| serial_timesteps   | 34944         |
| time_elapsed       | 75.5          |
| total_timesteps    | 39312         |
| value_loss         | 4.440892e-16  |
--------------------------------------
-------------------------------------
| approxkl           | 5.711524e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 366          |
| nupdates           | 274          |
| policy_entropy     | 2.5158918    |
| policy_loss        | 0.0          |
| serial_timesteps   | 35072        |
| time_elapsed       | 76           |
| total_timesteps    | 39456        |
| value_loss         | 4.440892e-16 |
-------------------------------------

-------------------------------------
| approxkl           | 5.575405e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 433          |
| nupdates           | 290          |
| policy_entropy     | 2.579826     |
| policy_loss        | 0.0          |
| serial_timesteps   | 37120        |
| time_elapsed       | 81           |
| total_timesteps    | 41760        |
| value_loss         | 4.440892e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 3.4970615e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 429           |
| nupdates           | 291           |
| policy_entropy     | 2.5838223     |
| policy_loss        | 0.0           |
| serial_timesteps   | 37248         |
| time_elapsed       | 81.3          |
| total_timesteps    | 41904         |
| value_loss         | 4.440892e-16  |
--------------------------------------

-------------------------------------
| approxkl           | 6.312159e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 505          |
| nupdates           | 307          |
| policy_entropy     | 2.6477566    |
| policy_loss        | 0.0          |
| serial_timesteps   | 39296        |
| time_elapsed       | 85.7         |
| total_timesteps    | 44208        |
| value_loss         | 4.440892e-16 |
-------------------------------------
-------------------------------------
| approxkl           | 5.65818e-06  |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 536          |
| nupdates           | 308          |
| policy_entropy     | 2.6517525    |
| policy_loss        | 0.0          |
| serial_timesteps   | 39424        |
| time_elapsed       | 85.9         |
| total_timesteps    | 44352        |
| value_loss         | 4.440892e-16 |
-------------------------------------
------------

--------------------------------------
| approxkl           | 6.0040315e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 524           |
| nupdates           | 324           |
| policy_entropy     | 2.7156868     |
| policy_loss        | 0.0           |
| serial_timesteps   | 41472         |
| time_elapsed       | 90            |
| total_timesteps    | 46656         |
| value_loss         | 4.440892e-16  |
--------------------------------------
-------------------------------------
| approxkl           | 5.939808e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 483          |
| nupdates           | 325          |
| policy_entropy     | 2.7196827    |
| policy_loss        | 0.0          |
| serial_timesteps   | 41600        |
| time_elapsed       | 90.2         |
| total_timesteps    | 46800        |
| value_loss         | 4.440892e-16 |
-------------------------------------

--------------------------------------
| approxkl           | 4.9098107e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 424           |
| nupdates           | 341           |
| policy_entropy     | 2.783617      |
| policy_loss        | 0.0           |
| serial_timesteps   | 43648         |
| time_elapsed       | 94.5          |
| total_timesteps    | 49104         |
| value_loss         | 4.440892e-16  |
--------------------------------------
-------------------------------------
| approxkl           | 4.264191e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 412          |
| nupdates           | 342          |
| policy_entropy     | 2.7876132    |
| policy_loss        | 0.0          |
| serial_timesteps   | 43776        |
| time_elapsed       | 94.8         |
| total_timesteps    | 49248        |
| value_loss         | 4.440892e-16 |
-------------------------------------

-------------------------------------
| approxkl           | 4.938716e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 449          |
| nupdates           | 358          |
| policy_entropy     | 2.8515475    |
| policy_loss        | 0.0          |
| serial_timesteps   | 45824        |
| time_elapsed       | 99           |
| total_timesteps    | 51552        |
| value_loss         | 4.440892e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 3.2568141e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 485           |
| nupdates           | 359           |
| policy_entropy     | 2.8555431     |
| policy_loss        | 0.0           |
| serial_timesteps   | 45952         |
| time_elapsed       | 99.3          |
| total_timesteps    | 51696         |
| value_loss         | 4.440892e-16  |
--------------------------------------

-------------------------------------
| approxkl           | 4.626375e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 478          |
| nupdates           | 375          |
| policy_entropy     | 2.919478     |
| policy_loss        | 0.0          |
| serial_timesteps   | 48000        |
| time_elapsed       | 104          |
| total_timesteps    | 54000        |
| value_loss         | 4.440892e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 2.7072156e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 496           |
| nupdates           | 376           |
| policy_entropy     | 2.9234734     |
| policy_loss        | 0.0           |
| serial_timesteps   | 48128         |
| time_elapsed       | 104           |
| total_timesteps    | 54144         |
| value_loss         | 4.440892e-16  |
--------------------------------------

--------------------------------------
| approxkl           | 2.8447732e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 512           |
| nupdates           | 392           |
| policy_entropy     | 2.9874077     |
| policy_loss        | 0.0           |
| serial_timesteps   | 50176         |
| time_elapsed       | 108           |
| total_timesteps    | 56448         |
| value_loss         | 9.620123e-09  |
--------------------------------------
--------------------------------------
| approxkl           | 5.349006e-06  |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 517           |
| nupdates           | 393           |
| policy_entropy     | 2.991404      |
| policy_loss        | 0.0           |
| serial_timesteps   | 50304         |
| time_elapsed       | 109           |
| total_timesteps    | 56592         |
| value_loss         | 1.7643514e-09 |
-------------------------

-------------------------------------
| approxkl           | 4.892432e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 544          |
| nupdates           | 409          |
| policy_entropy     | 3.0553384    |
| policy_loss        | 0.0          |
| serial_timesteps   | 52352        |
| time_elapsed       | 113          |
| total_timesteps    | 58896        |
| value_loss         | 4.440892e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 3.9734796e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 499           |
| nupdates           | 410           |
| policy_entropy     | 3.059334      |
| policy_loss        | 0.0           |
| serial_timesteps   | 52480         |
| time_elapsed       | 113           |
| total_timesteps    | 59040         |
| value_loss         | 4.440892e-16  |
--------------------------------------

-------------------------------------
| approxkl           | 4.468221e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 509          |
| nupdates           | 426          |
| policy_entropy     | 3.1232684    |
| policy_loss        | 0.0          |
| serial_timesteps   | 54528        |
| time_elapsed       | 117          |
| total_timesteps    | 61344        |
| value_loss         | 4.440892e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 3.7967852e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 501           |
| nupdates           | 427           |
| policy_entropy     | 3.1272645     |
| policy_loss        | 0.0           |
| serial_timesteps   | 54656         |
| time_elapsed       | 117           |
| total_timesteps    | 61488         |
| value_loss         | 4.440892e-16  |
--------------------------------------

--------------------------------------
| approxkl           | 3.9531124e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 525           |
| nupdates           | 443           |
| policy_entropy     | 3.1911988     |
| policy_loss        | 0.0           |
| serial_timesteps   | 56704         |
| time_elapsed       | 122           |
| total_timesteps    | 63792         |
| value_loss         | 4.440892e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 5.1280867e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 526           |
| nupdates           | 444           |
| policy_entropy     | 3.1951942     |
| policy_loss        | 0.0           |
| serial_timesteps   | 56832         |
| time_elapsed       | 122           |
| total_timesteps    | 63936         |
| value_loss         | 4.440892e-16  |
-------------------------

--------------------------------------
| approxkl           | 7.5593907e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 483           |
| nupdates           | 460           |
| policy_entropy     | 3.259129      |
| policy_loss        | 0.0           |
| serial_timesteps   | 58880         |
| time_elapsed       | 126           |
| total_timesteps    | 66240         |
| value_loss         | 4.1633363e-16 |
--------------------------------------
--------------------------------------
| approxkl           | 6.134455e-06  |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 472           |
| nupdates           | 461           |
| policy_entropy     | 3.2631245     |
| policy_loss        | 0.0           |
| serial_timesteps   | 59008         |
| time_elapsed       | 127           |
| total_timesteps    | 66384         |
| value_loss         | 4.4061976e-16 |
-------------------------

--------------------------------------
| approxkl           | 7.406553e-06  |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 513           |
| nupdates           | 477           |
| policy_entropy     | 3.3270588     |
| policy_loss        | 0.0           |
| serial_timesteps   | 61056         |
| time_elapsed       | 131           |
| total_timesteps    | 68688         |
| value_loss         | 4.1633363e-16 |
--------------------------------------
--------------------------------------
| approxkl           | 6.799702e-06  |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 462           |
| nupdates           | 478           |
| policy_entropy     | 3.331055      |
| policy_loss        | 0.0           |
| serial_timesteps   | 61184         |
| time_elapsed       | 131           |
| total_timesteps    | 68832         |
| value_loss         | 4.1633363e-16 |
-------------------------

-------------------------------------
| approxkl           | 4.236618e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 515          |
| nupdates           | 494          |
| policy_entropy     | 3.3949893    |
| policy_loss        | 0.0          |
| serial_timesteps   | 63232        |
| time_elapsed       | 135          |
| total_timesteps    | 71136        |
| value_loss         | 3.330669e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 2.6408027e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 485           |
| nupdates           | 495           |
| policy_entropy     | 3.398985      |
| policy_loss        | 0.0           |
| serial_timesteps   | 63360         |
| time_elapsed       | 136           |
| total_timesteps    | 71280         |
| value_loss         | 3.330669e-16  |
--------------------------------------

--------------------------------------
| approxkl           | 2.133274e-06  |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 487           |
| nupdates           | 511           |
| policy_entropy     | 3.4629402     |
| policy_loss        | 0.0           |
| serial_timesteps   | 65408         |
| time_elapsed       | 140           |
| total_timesteps    | 73584         |
| value_loss         | 7.4419637e-16 |
--------------------------------------
--------------------------------------
| approxkl           | 4.362714e-06  |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 530           |
| nupdates           | 512           |
| policy_entropy     | 3.466938      |
| policy_loss        | 0.0           |
| serial_timesteps   | 65536         |
| time_elapsed       | 140           |
| total_timesteps    | 73728         |
| value_loss         | 3.7816972e-16 |
-------------------------

-------------------------------------
| approxkl           | 8.560801e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 434          |
| nupdates           | 528          |
| policy_entropy     | 3.5309029    |
| policy_loss        | 0.0          |
| serial_timesteps   | 67584        |
| time_elapsed       | 145          |
| total_timesteps    | 76032        |
| value_loss         | 4.440892e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 3.8132189e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 499           |
| nupdates           | 529           |
| policy_entropy     | 3.5349007     |
| policy_loss        | 0.0           |
| serial_timesteps   | 67712         |
| time_elapsed       | 145           |
| total_timesteps    | 76176         |
| value_loss         | 9.072604e-16  |
--------------------------------------

--------------------------------------
| approxkl           | 4.199248e-06  |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 512           |
| nupdates           | 545           |
| policy_entropy     | 3.5988655     |
| policy_loss        | 0.0           |
| serial_timesteps   | 69760         |
| time_elapsed       | 149           |
| total_timesteps    | 78480         |
| value_loss         | 6.1183697e-15 |
--------------------------------------
--------------------------------------
| approxkl           | 5.2025803e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 478           |
| nupdates           | 546           |
| policy_entropy     | 3.6028633     |
| policy_loss        | 0.0           |
| serial_timesteps   | 69888         |
| time_elapsed       | 150           |
| total_timesteps    | 78624         |
| value_loss         | 3.9863945e-15 |
-------------------------

--------------------------------------
| approxkl           | 3.6217843e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 507           |
| nupdates           | 562           |
| policy_entropy     | 3.6668282     |
| policy_loss        | 0.0           |
| serial_timesteps   | 71936         |
| time_elapsed       | 154           |
| total_timesteps    | 80928         |
| value_loss         | 6.1062266e-16 |
--------------------------------------
--------------------------------------
| approxkl           | 5.5045825e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 525           |
| nupdates           | 563           |
| policy_entropy     | 3.670826      |
| policy_loss        | 0.0           |
| serial_timesteps   | 72064         |
| time_elapsed       | 154           |
| total_timesteps    | 81072         |
| value_loss         | 4.440892e-16  |
-------------------------

-------------------------------------
| approxkl           | 5.118021e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 549          |
| nupdates           | 579          |
| policy_entropy     | 3.7347908    |
| policy_loss        | 0.0          |
| serial_timesteps   | 74112        |
| time_elapsed       | 158          |
| total_timesteps    | 83376        |
| value_loss         | 4.440892e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 5.8017267e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 527           |
| nupdates           | 580           |
| policy_entropy     | 3.7387886     |
| policy_loss        | 0.0           |
| serial_timesteps   | 74240         |
| time_elapsed       | 158           |
| total_timesteps    | 83520         |
| value_loss         | 1.110223e-16  |
--------------------------------------

--------------------------------------
| approxkl           | 4.4534418e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 487           |
| nupdates           | 596           |
| policy_entropy     | 3.8027534     |
| policy_loss        | 0.0           |
| serial_timesteps   | 76288         |
| time_elapsed       | 163           |
| total_timesteps    | 85824         |
| value_loss         | 1.110223e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 3.2741614e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 503           |
| nupdates           | 597           |
| policy_entropy     | 3.8067513     |
| policy_loss        | 0.0           |
| serial_timesteps   | 76416         |
| time_elapsed       | 163           |
| total_timesteps    | 85968         |
| value_loss         | 1.110223e-16  |
-------------------------

-------------------------------------
| approxkl           | 3.262963e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 425          |
| nupdates           | 613          |
| policy_entropy     | 3.870716     |
| policy_loss        | 0.0          |
| serial_timesteps   | 78464        |
| time_elapsed       | 167          |
| total_timesteps    | 88272        |
| value_loss         | 1.110223e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 5.1030447e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 315           |
| nupdates           | 614           |
| policy_entropy     | 3.874714      |
| policy_loss        | 0.0           |
| serial_timesteps   | 78592         |
| time_elapsed       | 168           |
| total_timesteps    | 88416         |
| value_loss         | 1.110223e-16  |
--------------------------------------

--------------------------------------
| approxkl           | 7.4454424e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 525           |
| nupdates           | 630           |
| policy_entropy     | 3.9386787     |
| policy_loss        | 0.0           |
| serial_timesteps   | 80640         |
| time_elapsed       | 174           |
| total_timesteps    | 90720         |
| value_loss         | 1.110223e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 2.2865474e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 499           |
| nupdates           | 631           |
| policy_entropy     | 3.9426765     |
| policy_loss        | 0.0           |
| serial_timesteps   | 80768         |
| time_elapsed       | 174           |
| total_timesteps    | 90864         |
| value_loss         | 1.110223e-16  |
-------------------------

-------------------------------------
| approxkl           | 2.933404e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 486          |
| nupdates           | 647          |
| policy_entropy     | 4.0066414    |
| policy_loss        | 0.0          |
| serial_timesteps   | 82816        |
| time_elapsed       | 178          |
| total_timesteps    | 93168        |
| value_loss         | 1.110223e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 2.4292563e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 520           |
| nupdates           | 648           |
| policy_entropy     | 4.010639      |
| policy_loss        | 0.0           |
| serial_timesteps   | 82944         |
| time_elapsed       | 178           |
| total_timesteps    | 93312         |
| value_loss         | 1.110223e-16  |
--------------------------------------

--------------------------------------
| approxkl           | 5.7022567e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 503           |
| nupdates           | 664           |
| policy_entropy     | 4.074604      |
| policy_loss        | 0.0           |
| serial_timesteps   | 84992         |
| time_elapsed       | 183           |
| total_timesteps    | 95616         |
| value_loss         | 1.110223e-16  |
--------------------------------------
-------------------------------------
| approxkl           | 4.581772e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 518          |
| nupdates           | 665          |
| policy_entropy     | 4.078602     |
| policy_loss        | 0.0          |
| serial_timesteps   | 85120        |
| time_elapsed       | 183          |
| total_timesteps    | 95760        |
| value_loss         | 1.110223e-16 |
-------------------------------------

-------------------------------------
| approxkl           | 5.034115e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 510          |
| nupdates           | 681          |
| policy_entropy     | 4.1425667    |
| policy_loss        | 0.0          |
| serial_timesteps   | 87168        |
| time_elapsed       | 187          |
| total_timesteps    | 98064        |
| value_loss         | 1.110223e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 4.6286564e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 505           |
| nupdates           | 682           |
| policy_entropy     | 4.1465645     |
| policy_loss        | 0.0           |
| serial_timesteps   | 87296         |
| time_elapsed       | 187           |
| total_timesteps    | 98208         |
| value_loss         | 1.110223e-16  |
--------------------------------------

-------------------------------------
| approxkl           | 8.213035e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 538          |
| nupdates           | 698          |
| policy_entropy     | 4.2105293    |
| policy_loss        | 0.0          |
| serial_timesteps   | 89344        |
| time_elapsed       | 192          |
| total_timesteps    | 100512       |
| value_loss         | 1.110223e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 2.7869485e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 499           |
| nupdates           | 699           |
| policy_entropy     | 4.214527      |
| policy_loss        | 0.0           |
| serial_timesteps   | 89472         |
| time_elapsed       | 192           |
| total_timesteps    | 100656        |
| value_loss         | 1.110223e-16  |
--------------------------------------

--------------------------------------
| approxkl           | 5.2735127e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 472           |
| nupdates           | 715           |
| policy_entropy     | 4.278492      |
| policy_loss        | 0.0           |
| serial_timesteps   | 91520         |
| time_elapsed       | 196           |
| total_timesteps    | 102960        |
| value_loss         | 1.110223e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 9.0386275e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 495           |
| nupdates           | 716           |
| policy_entropy     | 4.28249       |
| policy_loss        | 0.0           |
| serial_timesteps   | 91648         |
| time_elapsed       | 197           |
| total_timesteps    | 103104        |
| value_loss         | 1.110223e-16  |
-------------------------

--------------------------------------
| approxkl           | 5.3473705e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 512           |
| nupdates           | 732           |
| policy_entropy     | 4.3464546     |
| policy_loss        | 0.0           |
| serial_timesteps   | 93696         |
| time_elapsed       | 201           |
| total_timesteps    | 105408        |
| value_loss         | 1.110223e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 6.3168245e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 498           |
| nupdates           | 733           |
| policy_entropy     | 4.3504524     |
| policy_loss        | 0.0           |
| serial_timesteps   | 93824         |
| time_elapsed       | 201           |
| total_timesteps    | 105552        |
| value_loss         | 1.110223e-16  |
-------------------------

-------------------------------------
| approxkl           | 3.695831e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 436          |
| nupdates           | 749          |
| policy_entropy     | 4.4144173    |
| policy_loss        | 0.0          |
| serial_timesteps   | 95872        |
| time_elapsed       | 205          |
| total_timesteps    | 107856       |
| value_loss         | 1.110223e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 5.2761297e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 510           |
| nupdates           | 750           |
| policy_entropy     | 4.418415      |
| policy_loss        | 0.0           |
| serial_timesteps   | 96000         |
| time_elapsed       | 206           |
| total_timesteps    | 108000        |
| value_loss         | 1.110223e-16  |
--------------------------------------

-------------------------------------
| approxkl           | 4.025751e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 493          |
| nupdates           | 766          |
| policy_entropy     | 4.48238      |
| policy_loss        | 0.0          |
| serial_timesteps   | 98048        |
| time_elapsed       | 210          |
| total_timesteps    | 110304       |
| value_loss         | 1.110223e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 2.9976964e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 527           |
| nupdates           | 767           |
| policy_entropy     | 4.4863777     |
| policy_loss        | 0.0           |
| serial_timesteps   | 98176         |
| time_elapsed       | 210           |
| total_timesteps    | 110448        |
| value_loss         | 1.110223e-16  |
--------------------------------------

--------------------------------------
| approxkl           | 4.7291815e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 479           |
| nupdates           | 783           |
| policy_entropy     | 4.5503426     |
| policy_loss        | 0.0           |
| serial_timesteps   | 100224        |
| time_elapsed       | 214           |
| total_timesteps    | 112752        |
| value_loss         | 1.110223e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 5.5211344e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 536           |
| nupdates           | 784           |
| policy_entropy     | 4.5543404     |
| policy_loss        | 0.0           |
| serial_timesteps   | 100352        |
| time_elapsed       | 215           |
| total_timesteps    | 112896        |
| value_loss         | 1.110223e-16  |
-------------------------

--------------------------------------
| approxkl           | 3.9246925e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 480           |
| nupdates           | 800           |
| policy_entropy     | 4.618305      |
| policy_loss        | 0.0           |
| serial_timesteps   | 102400        |
| time_elapsed       | 219           |
| total_timesteps    | 115200        |
| value_loss         | 1.110223e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 3.3126787e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 520           |
| nupdates           | 801           |
| policy_entropy     | 4.622303      |
| policy_loss        | 0.0           |
| serial_timesteps   | 102528        |
| time_elapsed       | 219           |
| total_timesteps    | 115344        |
| value_loss         | 1.110223e-16  |
-------------------------

--------------------------------------
| approxkl           | 2.3765967e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 501           |
| nupdates           | 817           |
| policy_entropy     | 4.686268      |
| policy_loss        | 0.0           |
| serial_timesteps   | 104576        |
| time_elapsed       | 224           |
| total_timesteps    | 117648        |
| value_loss         | 1.110223e-16  |
--------------------------------------
-------------------------------------
| approxkl           | 2.990213e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 468          |
| nupdates           | 818          |
| policy_entropy     | 4.6902657    |
| policy_loss        | 0.0          |
| serial_timesteps   | 104704       |
| time_elapsed       | 224          |
| total_timesteps    | 117792       |
| value_loss         | 1.110223e-16 |
-------------------------------------

-------------------------------------
| approxkl           | 5.220222e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 515          |
| nupdates           | 834          |
| policy_entropy     | 4.7542305    |
| policy_loss        | 0.0          |
| serial_timesteps   | 106752       |
| time_elapsed       | 228          |
| total_timesteps    | 120096       |
| value_loss         | 1.110223e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 3.5324847e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 458           |
| nupdates           | 835           |
| policy_entropy     | 4.7582283     |
| policy_loss        | 0.0           |
| serial_timesteps   | 106880        |
| time_elapsed       | 228           |
| total_timesteps    | 120240        |
| value_loss         | 1.110223e-16  |
--------------------------------------

-------------------------------------
| approxkl           | 2.925573e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 444          |
| nupdates           | 851          |
| policy_entropy     | 4.822193     |
| policy_loss        | 0.0          |
| serial_timesteps   | 108928       |
| time_elapsed       | 233          |
| total_timesteps    | 122544       |
| value_loss         | 1.110223e-16 |
-------------------------------------
-------------------------------------
| approxkl           | 3.240743e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 487          |
| nupdates           | 852          |
| policy_entropy     | 4.826191     |
| policy_loss        | 0.0          |
| serial_timesteps   | 109056       |
| time_elapsed       | 233          |
| total_timesteps    | 122688       |
| value_loss         | 1.110223e-16 |
-------------------------------------
------------

-------------------------------------
| approxkl           | 3.723208e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 496          |
| nupdates           | 868          |
| policy_entropy     | 4.890156     |
| policy_loss        | 0.0          |
| serial_timesteps   | 111104       |
| time_elapsed       | 237          |
| total_timesteps    | 124992       |
| value_loss         | 1.110223e-16 |
-------------------------------------
-------------------------------------
| approxkl           | 6.907065e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 489          |
| nupdates           | 869          |
| policy_entropy     | 4.8941536    |
| policy_loss        | 0.0          |
| serial_timesteps   | 111232       |
| time_elapsed       | 237          |
| total_timesteps    | 125136       |
| value_loss         | 1.110223e-16 |
-------------------------------------
------------

-------------------------------------
| approxkl           | 4.718054e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 451          |
| nupdates           | 885          |
| policy_entropy     | 4.9581184    |
| policy_loss        | 0.0          |
| serial_timesteps   | 113280       |
| time_elapsed       | 242          |
| total_timesteps    | 127440       |
| value_loss         | 1.110223e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 3.6907613e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 493           |
| nupdates           | 886           |
| policy_entropy     | 4.9621162     |
| policy_loss        | 0.0           |
| serial_timesteps   | 113408        |
| time_elapsed       | 242           |
| total_timesteps    | 127584        |
| value_loss         | 1.110223e-16  |
--------------------------------------

-------------------------------------
| approxkl           | 9.429728e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 548          |
| nupdates           | 902          |
| policy_entropy     | 5.026081     |
| policy_loss        | 0.0          |
| serial_timesteps   | 115456       |
| time_elapsed       | 246          |
| total_timesteps    | 129888       |
| value_loss         | 1.110223e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 4.2285537e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 539           |
| nupdates           | 903           |
| policy_entropy     | 5.030079      |
| policy_loss        | 0.0           |
| serial_timesteps   | 115584        |
| time_elapsed       | 247           |
| total_timesteps    | 130032        |
| value_loss         | 1.110223e-16  |
--------------------------------------

--------------------------------------
| approxkl           | 4.3317045e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 494           |
| nupdates           | 919           |
| policy_entropy     | 5.0940437     |
| policy_loss        | 0.0           |
| serial_timesteps   | 117632        |
| time_elapsed       | 251           |
| total_timesteps    | 132336        |
| value_loss         | 8.273056e-12  |
--------------------------------------
--------------------------------------
| approxkl           | 2.8374384e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 445           |
| nupdates           | 920           |
| policy_entropy     | 5.0980415     |
| policy_loss        | 0.0           |
| serial_timesteps   | 117760        |
| time_elapsed       | 251           |
| total_timesteps    | 132480        |
| value_loss         | 8.0972555e-13 |
-------------------------

--------------------------------------
| approxkl           | 5.9804734e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 489           |
| nupdates           | 936           |
| policy_entropy     | 5.1620064     |
| policy_loss        | 0.0           |
| serial_timesteps   | 119808        |
| time_elapsed       | 255           |
| total_timesteps    | 134784        |
| value_loss         | 1.9428903e-16 |
--------------------------------------
--------------------------------------
| approxkl           | 4.8329234e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 527           |
| nupdates           | 937           |
| policy_entropy     | 5.166004      |
| policy_loss        | 0.0           |
| serial_timesteps   | 119936        |
| time_elapsed       | 256           |
| total_timesteps    | 134928        |
| value_loss         | 1.9428903e-16 |
-------------------------

-------------------------------------
| approxkl           | 6.28644e-06  |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 502          |
| nupdates           | 953          |
| policy_entropy     | 5.229969     |
| policy_loss        | 0.0          |
| serial_timesteps   | 121984       |
| time_elapsed       | 260          |
| total_timesteps    | 137232       |
| value_loss         | 3.330669e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 6.7688093e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 481           |
| nupdates           | 954           |
| policy_entropy     | 5.233967      |
| policy_loss        | 0.0           |
| serial_timesteps   | 122112        |
| time_elapsed       | 260           |
| total_timesteps    | 137376        |
| value_loss         | 3.8857806e-16 |
--------------------------------------

--------------------------------------
| approxkl           | 3.4815682e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 520           |
| nupdates           | 970           |
| policy_entropy     | 5.2979317     |
| policy_loss        | 0.0           |
| serial_timesteps   | 124160        |
| time_elapsed       | 265           |
| total_timesteps    | 139680        |
| value_loss         | 4.1633363e-16 |
--------------------------------------
--------------------------------------
| approxkl           | 4.679116e-06  |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 499           |
| nupdates           | 971           |
| policy_entropy     | 5.3019295     |
| policy_loss        | 0.0           |
| serial_timesteps   | 124288        |
| time_elapsed       | 265           |
| total_timesteps    | 139824        |
| value_loss         | 4.1633363e-16 |
-------------------------

-------------------------------------
| approxkl           | 5.039431e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 511          |
| nupdates           | 987          |
| policy_entropy     | 5.3658943    |
| policy_loss        | 0.0          |
| serial_timesteps   | 126336       |
| time_elapsed       | 269          |
| total_timesteps    | 142128       |
| value_loss         | 4.440892e-16 |
-------------------------------------
-------------------------------------
| approxkl           | 4.384451e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 481          |
| nupdates           | 988          |
| policy_entropy     | 5.369892     |
| policy_loss        | 0.0          |
| serial_timesteps   | 126464       |
| time_elapsed       | 269          |
| total_timesteps    | 142272       |
| value_loss         | 4.440892e-16 |
-------------------------------------
------------

--------------------------------------
| approxkl           | 4.007954e-06  |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 514           |
| nupdates           | 1004          |
| policy_entropy     | 5.433857      |
| policy_loss        | 0.0           |
| serial_timesteps   | 128512        |
| time_elapsed       | 274           |
| total_timesteps    | 144576        |
| value_loss         | 4.7531423e-16 |
--------------------------------------
--------------------------------------
| approxkl           | 3.1700195e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 529           |
| nupdates           | 1005          |
| policy_entropy     | 5.437855      |
| policy_loss        | 0.0           |
| serial_timesteps   | 128640        |
| time_elapsed       | 274           |
| total_timesteps    | 144720        |
| value_loss         | 4.440892e-16  |
-------------------------

-------------------------------------
| approxkl           | 3.966048e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 494          |
| nupdates           | 1021         |
| policy_entropy     | 5.5018196    |
| policy_loss        | 0.0          |
| serial_timesteps   | 130688       |
| time_elapsed       | 278          |
| total_timesteps    | 147024       |
| value_loss         | 4.440892e-16 |
-------------------------------------
-------------------------------------
| approxkl           | 9.383981e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 456          |
| nupdates           | 1022         |
| policy_entropy     | 5.5058174    |
| policy_loss        | 0.0          |
| serial_timesteps   | 130816       |
| time_elapsed       | 279          |
| total_timesteps    | 147168       |
| value_loss         | 4.440892e-16 |
-------------------------------------
------------

--------------------------------------
| approxkl           | 5.1610255e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 459           |
| nupdates           | 1038          |
| policy_entropy     | 5.5697823     |
| policy_loss        | 0.0           |
| serial_timesteps   | 132864        |
| time_elapsed       | 283           |
| total_timesteps    | 149472        |
| value_loss         | 4.440892e-16  |
--------------------------------------
-------------------------------------
| approxkl           | 4.82468e-06  |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 500          |
| nupdates           | 1039         |
| policy_entropy     | 5.57378      |
| policy_loss        | 0.0          |
| serial_timesteps   | 132992       |
| time_elapsed       | 283          |
| total_timesteps    | 149616       |
| value_loss         | 4.440892e-16 |
-------------------------------------

-------------------------------------
| approxkl           | 5.466306e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 331          |
| nupdates           | 1055         |
| policy_entropy     | 5.637745     |
| policy_loss        | 0.0          |
| serial_timesteps   | 135040       |
| time_elapsed       | 288          |
| total_timesteps    | 151920       |
| value_loss         | 4.440892e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 6.6218017e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 498           |
| nupdates           | 1056          |
| policy_entropy     | 5.6417427     |
| policy_loss        | 0.0           |
| serial_timesteps   | 135168        |
| time_elapsed       | 288           |
| total_timesteps    | 152064        |
| value_loss         | 4.440892e-16  |
--------------------------------------

-------------------------------------
| approxkl           | 6.993696e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 541          |
| nupdates           | 1072         |
| policy_entropy     | 5.7057076    |
| policy_loss        | 0.0          |
| serial_timesteps   | 137216       |
| time_elapsed       | 293          |
| total_timesteps    | 154368       |
| value_loss         | 4.440892e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 2.7467013e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 497           |
| nupdates           | 1073          |
| policy_entropy     | 5.7097054     |
| policy_loss        | 0.0           |
| serial_timesteps   | 137344        |
| time_elapsed       | 293           |
| total_timesteps    | 154512        |
| value_loss         | 4.440892e-16  |
--------------------------------------

-------------------------------------
| approxkl           | 4.586653e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 521          |
| nupdates           | 1089         |
| policy_entropy     | 5.77367      |
| policy_loss        | 0.0          |
| serial_timesteps   | 139392       |
| time_elapsed       | 299          |
| total_timesteps    | 156816       |
| value_loss         | 4.440892e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 4.6466203e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 486           |
| nupdates           | 1090          |
| policy_entropy     | 5.777668      |
| policy_loss        | 0.0           |
| serial_timesteps   | 139520        |
| time_elapsed       | 299           |
| total_timesteps    | 156960        |
| value_loss         | 4.440892e-16  |
--------------------------------------

--------------------------------------
| approxkl           | 4.5684724e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 469           |
| nupdates           | 1106          |
| policy_entropy     | 5.841633      |
| policy_loss        | 0.0           |
| serial_timesteps   | 141568        |
| time_elapsed       | 304           |
| total_timesteps    | 159264        |
| value_loss         | 4.440892e-16  |
--------------------------------------
-------------------------------------
| approxkl           | 2.460807e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 520          |
| nupdates           | 1107         |
| policy_entropy     | 5.8456306    |
| policy_loss        | 0.0          |
| serial_timesteps   | 141696       |
| time_elapsed       | 304          |
| total_timesteps    | 159408       |
| value_loss         | 4.440892e-16 |
-------------------------------------

-------------------------------------
| approxkl           | 3.342582e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 509          |
| nupdates           | 1123         |
| policy_entropy     | 5.9095955    |
| policy_loss        | 0.0          |
| serial_timesteps   | 143744       |
| time_elapsed       | 308          |
| total_timesteps    | 161712       |
| value_loss         | 4.440892e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 3.1518791e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 513           |
| nupdates           | 1124          |
| policy_entropy     | 5.9135933     |
| policy_loss        | 0.0           |
| serial_timesteps   | 143872        |
| time_elapsed       | 308           |
| total_timesteps    | 161856        |
| value_loss         | 4.440892e-16  |
--------------------------------------

-------------------------------------
| approxkl           | 3.428074e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 500          |
| nupdates           | 1140         |
| policy_entropy     | 5.977558     |
| policy_loss        | 0.0          |
| serial_timesteps   | 145920       |
| time_elapsed       | 313          |
| total_timesteps    | 164160       |
| value_loss         | 4.440892e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 3.8378844e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 490           |
| nupdates           | 1141          |
| policy_entropy     | 5.981556      |
| policy_loss        | 0.0           |
| serial_timesteps   | 146048        |
| time_elapsed       | 313           |
| total_timesteps    | 164304        |
| value_loss         | 4.440892e-16  |
--------------------------------------

--------------------------------------
| approxkl           | 5.7366415e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 489           |
| nupdates           | 1157          |
| policy_entropy     | 6.045521      |
| policy_loss        | 0.0           |
| serial_timesteps   | 148096        |
| time_elapsed       | 317           |
| total_timesteps    | 166608        |
| value_loss         | 4.440892e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 5.5931832e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 523           |
| nupdates           | 1158          |
| policy_entropy     | 6.0495186     |
| policy_loss        | 0.0           |
| serial_timesteps   | 148224        |
| time_elapsed       | 318           |
| total_timesteps    | 166752        |
| value_loss         | 4.440892e-16  |
-------------------------

-------------------------------------
| approxkl           | 5.673617e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 415          |
| nupdates           | 1174         |
| policy_entropy     | 6.1134834    |
| policy_loss        | 0.0          |
| serial_timesteps   | 150272       |
| time_elapsed       | 322          |
| total_timesteps    | 169056       |
| value_loss         | 4.440892e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 1.9799584e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 491           |
| nupdates           | 1175          |
| policy_entropy     | 6.117481      |
| policy_loss        | 0.0           |
| serial_timesteps   | 150400        |
| time_elapsed       | 322           |
| total_timesteps    | 169200        |
| value_loss         | 4.440892e-16  |
--------------------------------------

--------------------------------------
| approxkl           | 4.8228617e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 460           |
| nupdates           | 1191          |
| policy_entropy     | 6.181446      |
| policy_loss        | 0.0           |
| serial_timesteps   | 152448        |
| time_elapsed       | 327           |
| total_timesteps    | 171504        |
| value_loss         | 4.440892e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 9.0728245e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 448           |
| nupdates           | 1192          |
| policy_entropy     | 6.185444      |
| policy_loss        | 0.0           |
| serial_timesteps   | 152576        |
| time_elapsed       | 327           |
| total_timesteps    | 171648        |
| value_loss         | 4.440892e-16  |
-------------------------

--------------------------------------
| approxkl           | 2.1362946e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 505           |
| nupdates           | 1208          |
| policy_entropy     | 6.2494087     |
| policy_loss        | 0.0           |
| serial_timesteps   | 154624        |
| time_elapsed       | 331           |
| total_timesteps    | 173952        |
| value_loss         | 4.440892e-16  |
--------------------------------------
-------------------------------------
| approxkl           | 4.467733e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 527          |
| nupdates           | 1209         |
| policy_entropy     | 6.2534065    |
| policy_loss        | 0.0          |
| serial_timesteps   | 154752       |
| time_elapsed       | 332          |
| total_timesteps    | 174096       |
| value_loss         | 4.440892e-16 |
-------------------------------------

--------------------------------------
| approxkl           | 5.0506715e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 513           |
| nupdates           | 1225          |
| policy_entropy     | 6.3173714     |
| policy_loss        | 0.0           |
| serial_timesteps   | 156800        |
| time_elapsed       | 336           |
| total_timesteps    | 176400        |
| value_loss         | 4.440892e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 5.8559413e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 533           |
| nupdates           | 1226          |
| policy_entropy     | 6.321369      |
| policy_loss        | 0.0           |
| serial_timesteps   | 156928        |
| time_elapsed       | 336           |
| total_timesteps    | 176544        |
| value_loss         | 2.0572086e-14 |
-------------------------

--------------------------------------
| approxkl           | 5.811658e-06  |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 526           |
| nupdates           | 1242          |
| policy_entropy     | 6.385334      |
| policy_loss        | 0.0           |
| serial_timesteps   | 158976        |
| time_elapsed       | 340           |
| total_timesteps    | 178848        |
| value_loss         | 1.1466522e-15 |
--------------------------------------
--------------------------------------
| approxkl           | 3.8724006e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 528           |
| nupdates           | 1243          |
| policy_entropy     | 6.389332      |
| policy_loss        | 0.0           |
| serial_timesteps   | 159104        |
| time_elapsed       | 340           |
| total_timesteps    | 178992        |
| value_loss         | 3.8857806e-16 |
-------------------------

--------------------------------------
| approxkl           | 5.665799e-06  |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 537           |
| nupdates           | 1259          |
| policy_entropy     | 6.4532967     |
| policy_loss        | 0.0           |
| serial_timesteps   | 161152        |
| time_elapsed       | 344           |
| total_timesteps    | 181296        |
| value_loss         | 4.1633363e-16 |
--------------------------------------
--------------------------------------
| approxkl           | 3.5230753e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 530           |
| nupdates           | 1260          |
| policy_entropy     | 6.4572945     |
| policy_loss        | 0.0           |
| serial_timesteps   | 161280        |
| time_elapsed       | 345           |
| total_timesteps    | 181440        |
| value_loss         | 3.8857806e-16 |
-------------------------

--------------------------------------
| approxkl           | 5.543807e-06  |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 516           |
| nupdates           | 1276          |
| policy_entropy     | 6.5212593     |
| policy_loss        | 0.0           |
| serial_timesteps   | 163328        |
| time_elapsed       | 349           |
| total_timesteps    | 183744        |
| value_loss         | 4.3715032e-16 |
--------------------------------------
-------------------------------------
| approxkl           | 6.85254e-06  |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 512          |
| nupdates           | 1277         |
| policy_entropy     | 6.525257     |
| policy_loss        | 0.0          |
| serial_timesteps   | 163456       |
| time_elapsed       | 349          |
| total_timesteps    | 183888       |
| value_loss         | 5.308254e-16 |
-------------------------------------

--------------------------------------
| approxkl           | 3.4761317e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 474           |
| nupdates           | 1293          |
| policy_entropy     | 6.589222      |
| policy_loss        | 0.0           |
| serial_timesteps   | 165504        |
| time_elapsed       | 353           |
| total_timesteps    | 186192        |
| value_loss         | 4.8398785e-16 |
--------------------------------------
--------------------------------------
| approxkl           | 4.7663843e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 518           |
| nupdates           | 1294          |
| policy_entropy     | 6.5932198     |
| policy_loss        | 0.0           |
| serial_timesteps   | 165632        |
| time_elapsed       | 353           |
| total_timesteps    | 186336        |
| value_loss         | 4.3715032e-16 |
-------------------------

--------------------------------------
| approxkl           | 5.469837e-06  |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 515           |
| nupdates           | 1310          |
| policy_entropy     | 6.6571846     |
| policy_loss        | 0.0           |
| serial_timesteps   | 167680        |
| time_elapsed       | 357           |
| total_timesteps    | 188640        |
| value_loss         | 1.4155344e-15 |
--------------------------------------
--------------------------------------
| approxkl           | 3.9077026e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 513           |
| nupdates           | 1311          |
| policy_entropy     | 6.6611824     |
| policy_loss        | 0.0           |
| serial_timesteps   | 167808        |
| time_elapsed       | 358           |
| total_timesteps    | 188784        |
| value_loss         | 1.4155344e-15 |
-------------------------

--------------------------------------
| approxkl           | 5.5261835e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 545           |
| nupdates           | 1327          |
| policy_entropy     | 6.7251472     |
| policy_loss        | 0.0           |
| serial_timesteps   | 169856        |
| time_elapsed       | 362           |
| total_timesteps    | 191088        |
| value_loss         | 1.4155344e-15 |
--------------------------------------
--------------------------------------
| approxkl           | 5.637358e-06  |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 524           |
| nupdates           | 1328          |
| policy_entropy     | 6.729145      |
| policy_loss        | 0.0           |
| serial_timesteps   | 169984        |
| time_elapsed       | 362           |
| total_timesteps    | 191232        |
| value_loss         | 1.4155344e-15 |
-------------------------

-------------------------------------
| approxkl           | 5.518515e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 521          |
| nupdates           | 1344         |
| policy_entropy     | 6.79311      |
| policy_loss        | 0.0          |
| serial_timesteps   | 172032       |
| time_elapsed       | 366          |
| total_timesteps    | 193536       |
| value_loss         | 5.308254e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 3.9102088e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 521           |
| nupdates           | 1345          |
| policy_entropy     | 6.7971077     |
| policy_loss        | 0.0           |
| serial_timesteps   | 172160        |
| time_elapsed       | 366           |
| total_timesteps    | 193680        |
| value_loss         | 5.689893e-16  |
--------------------------------------

--------------------------------------
| approxkl           | 3.8359894e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 547           |
| nupdates           | 1361          |
| policy_entropy     | 6.8610725     |
| policy_loss        | 0.0           |
| serial_timesteps   | 174208        |
| time_elapsed       | 370           |
| total_timesteps    | 195984        |
| value_loss         | 5.551115e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 4.4383964e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 533           |
| nupdates           | 1362          |
| policy_entropy     | 6.8650703     |
| policy_loss        | 0.0           |
| serial_timesteps   | 174336        |
| time_elapsed       | 371           |
| total_timesteps    | 196128        |
| value_loss         | 5.551115e-16  |
-------------------------

--------------------------------------
| approxkl           | 5.0986127e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 497           |
| nupdates           | 1378          |
| policy_entropy     | 6.929035      |
| policy_loss        | 0.0           |
| serial_timesteps   | 176384        |
| time_elapsed       | 375           |
| total_timesteps    | 198432        |
| value_loss         | 2.9316827e-16 |
--------------------------------------
--------------------------------------
| approxkl           | 6.1102037e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 524           |
| nupdates           | 1379          |
| policy_entropy     | 6.933033      |
| policy_loss        | 0.0           |
| serial_timesteps   | 176512        |
| time_elapsed       | 375           |
| total_timesteps    | 198576        |
| value_loss         | 2.0990154e-16 |
-------------------------

--------------------------------------
| approxkl           | 2.2933427e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 530           |
| nupdates           | 1395          |
| policy_entropy     | 6.996998      |
| policy_loss        | 0.0           |
| serial_timesteps   | 178560        |
| time_elapsed       | 379           |
| total_timesteps    | 200880        |
| value_loss         | 1.9428903e-16 |
--------------------------------------
--------------------------------------
| approxkl           | 2.9234334e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 518           |
| nupdates           | 1396          |
| policy_entropy     | 7.0009956     |
| policy_loss        | 0.0           |
| serial_timesteps   | 178688        |
| time_elapsed       | 379           |
| total_timesteps    | 201024        |
| value_loss         | 1.9428903e-16 |
-------------------------

--------------------------------------
| approxkl           | 3.4591017e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 550           |
| nupdates           | 1412          |
| policy_entropy     | 7.0649605     |
| policy_loss        | 0.0           |
| serial_timesteps   | 180736        |
| time_elapsed       | 383           |
| total_timesteps    | 203328        |
| value_loss         | 1.9428903e-16 |
--------------------------------------
--------------------------------------
| approxkl           | 6.9928265e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 509           |
| nupdates           | 1413          |
| policy_entropy     | 7.0689583     |
| policy_loss        | 0.0           |
| serial_timesteps   | 180864        |
| time_elapsed       | 383           |
| total_timesteps    | 203472        |
| value_loss         | 5.2926413e-15 |
-------------------------

--------------------------------------
| approxkl           | 4.3572295e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 522           |
| nupdates           | 1429          |
| policy_entropy     | 7.132923      |
| policy_loss        | 0.0           |
| serial_timesteps   | 182912        |
| time_elapsed       | 387           |
| total_timesteps    | 205776        |
| value_loss         | 1.110223e-16  |
--------------------------------------
-------------------------------------
| approxkl           | 5.611622e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 541          |
| nupdates           | 1430         |
| policy_entropy     | 7.136921     |
| policy_loss        | 0.0          |
| serial_timesteps   | 183040       |
| time_elapsed       | 388          |
| total_timesteps    | 205920       |
| value_loss         | 1.110223e-16 |
-------------------------------------

--------------------------------------
| approxkl           | 3.8198123e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 515           |
| nupdates           | 1446          |
| policy_entropy     | 7.200886      |
| policy_loss        | 0.0           |
| serial_timesteps   | 185088        |
| time_elapsed       | 392           |
| total_timesteps    | 208224        |
| value_loss         | 1.110223e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 3.2235055e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 507           |
| nupdates           | 1447          |
| policy_entropy     | 7.2048836     |
| policy_loss        | 0.0           |
| serial_timesteps   | 185216        |
| time_elapsed       | 392           |
| total_timesteps    | 208368        |
| value_loss         | 1.110223e-16  |
-------------------------

--------------------------------------
| approxkl           | 6.3806333e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 521           |
| nupdates           | 1463          |
| policy_entropy     | 7.2688484     |
| policy_loss        | 0.0           |
| serial_timesteps   | 187264        |
| time_elapsed       | 396           |
| total_timesteps    | 210672        |
| value_loss         | 1.110223e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 2.7080678e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 488           |
| nupdates           | 1464          |
| policy_entropy     | 7.272846      |
| policy_loss        | 0.0           |
| serial_timesteps   | 187392        |
| time_elapsed       | 396           |
| total_timesteps    | 210816        |
| value_loss         | 1.110223e-16  |
-------------------------

--------------------------------------
| approxkl           | 2.6155756e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 515           |
| nupdates           | 1480          |
| policy_entropy     | 7.336811      |
| policy_loss        | 0.0           |
| serial_timesteps   | 189440        |
| time_elapsed       | 400           |
| total_timesteps    | 213120        |
| value_loss         | 1.110223e-16  |
--------------------------------------
-------------------------------------
| approxkl           | 5.161294e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 541          |
| nupdates           | 1481         |
| policy_entropy     | 7.340809     |
| policy_loss        | 0.0          |
| serial_timesteps   | 189568       |
| time_elapsed       | 401          |
| total_timesteps    | 213264       |
| value_loss         | 1.110223e-16 |
-------------------------------------

--------------------------------------
| approxkl           | 2.7556666e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 525           |
| nupdates           | 1497          |
| policy_entropy     | 7.4047737     |
| policy_loss        | 0.0           |
| serial_timesteps   | 191616        |
| time_elapsed       | 405           |
| total_timesteps    | 215568        |
| value_loss         | 1.110223e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 4.9480163e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 522           |
| nupdates           | 1498          |
| policy_entropy     | 7.4087715     |
| policy_loss        | 0.0           |
| serial_timesteps   | 191744        |
| time_elapsed       | 405           |
| total_timesteps    | 215712        |
| value_loss         | 1.110223e-16  |
-------------------------

--------------------------------------
| approxkl           | 2.6253438e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 479           |
| nupdates           | 1514          |
| policy_entropy     | 7.4727364     |
| policy_loss        | 0.0           |
| serial_timesteps   | 193792        |
| time_elapsed       | 409           |
| total_timesteps    | 218016        |
| value_loss         | 1.110223e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 3.7733819e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 471           |
| nupdates           | 1515          |
| policy_entropy     | 7.476734      |
| policy_loss        | 0.0           |
| serial_timesteps   | 193920        |
| time_elapsed       | 410           |
| total_timesteps    | 218160        |
| value_loss         | 1.110223e-16  |
-------------------------

--------------------------------------
| approxkl           | 2.5953755e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 480           |
| nupdates           | 1531          |
| policy_entropy     | 7.540699      |
| policy_loss        | 0.0           |
| serial_timesteps   | 195968        |
| time_elapsed       | 414           |
| total_timesteps    | 220464        |
| value_loss         | 1.110223e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 3.1667737e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 534           |
| nupdates           | 1532          |
| policy_entropy     | 7.544697      |
| policy_loss        | 0.0           |
| serial_timesteps   | 196096        |
| time_elapsed       | 414           |
| total_timesteps    | 220608        |
| value_loss         | 1.110223e-16  |
-------------------------

--------------------------------------
| approxkl           | 3.4473064e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 536           |
| nupdates           | 1548          |
| policy_entropy     | 7.6086617     |
| policy_loss        | 0.0           |
| serial_timesteps   | 198144        |
| time_elapsed       | 418           |
| total_timesteps    | 222912        |
| value_loss         | 1.110223e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 5.7251955e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 531           |
| nupdates           | 1549          |
| policy_entropy     | 7.6126595     |
| policy_loss        | 0.0           |
| serial_timesteps   | 198272        |
| time_elapsed       | 419           |
| total_timesteps    | 223056        |
| value_loss         | 1.110223e-16  |
-------------------------

-------------------------------------
| approxkl           | 4.558731e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 528          |
| nupdates           | 1565         |
| policy_entropy     | 7.6766243    |
| policy_loss        | 0.0          |
| serial_timesteps   | 200320       |
| time_elapsed       | 423          |
| total_timesteps    | 225360       |
| value_loss         | 1.110223e-16 |
-------------------------------------
--------------------------------------
| approxkl           | 4.7777057e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 538           |
| nupdates           | 1566          |
| policy_entropy     | 7.680622      |
| policy_loss        | 0.0           |
| serial_timesteps   | 200448        |
| time_elapsed       | 423           |
| total_timesteps    | 225504        |
| value_loss         | 1.110223e-16  |
--------------------------------------

--------------------------------------
| approxkl           | 1.2272363e-05 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 494           |
| nupdates           | 1582          |
| policy_entropy     | 7.744587      |
| policy_loss        | 0.0           |
| serial_timesteps   | 202496        |
| time_elapsed       | 427           |
| total_timesteps    | 227808        |
| value_loss         | 1.110223e-16  |
--------------------------------------
-------------------------------------
| approxkl           | 5.20605e-06  |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 547          |
| nupdates           | 1583         |
| policy_entropy     | 7.7485847    |
| policy_loss        | 0.0          |
| serial_timesteps   | 202624       |
| time_elapsed       | 427          |
| total_timesteps    | 227952       |
| value_loss         | 1.110223e-16 |
-------------------------------------

-------------------------------------
| approxkl           | 3.634649e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 545          |
| nupdates           | 1599         |
| policy_entropy     | 7.8125496    |
| policy_loss        | 0.0          |
| serial_timesteps   | 204672       |
| time_elapsed       | 431          |
| total_timesteps    | 230256       |
| value_loss         | 1.110223e-16 |
-------------------------------------
-------------------------------------
| approxkl           | 6.097649e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 532          |
| nupdates           | 1600         |
| policy_entropy     | 7.8165474    |
| policy_loss        | 0.0          |
| serial_timesteps   | 204800       |
| time_elapsed       | 431          |
| total_timesteps    | 230400       |
| value_loss         | 1.110223e-16 |
-------------------------------------
------------

-------------------------------------
| approxkl           | 4.597513e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 535          |
| nupdates           | 1616         |
| policy_entropy     | 7.880512     |
| policy_loss        | 0.0          |
| serial_timesteps   | 206848       |
| time_elapsed       | 436          |
| total_timesteps    | 232704       |
| value_loss         | 1.110223e-16 |
-------------------------------------
-------------------------------------
| approxkl           | 4.51539e-06  |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 512          |
| nupdates           | 1617         |
| policy_entropy     | 7.88451      |
| policy_loss        | 0.0          |
| serial_timesteps   | 206976       |
| time_elapsed       | 436          |
| total_timesteps    | 232848       |
| value_loss         | 1.110223e-16 |
-------------------------------------
------------

--------------------------------------
| approxkl           | 4.5021666e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 534           |
| nupdates           | 1633          |
| policy_entropy     | 7.948475      |
| policy_loss        | 0.0           |
| serial_timesteps   | 209024        |
| time_elapsed       | 440           |
| total_timesteps    | 235152        |
| value_loss         | 1.110223e-16  |
--------------------------------------
--------------------------------------
| approxkl           | 1.0236201e-05 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 516           |
| nupdates           | 1634          |
| policy_entropy     | 7.9524727     |
| policy_loss        | 0.0           |
| serial_timesteps   | 209152        |
| time_elapsed       | 440           |
| total_timesteps    | 235296        |
| value_loss         | 1.110223e-16  |
-------------------------

--------------------------------------
| approxkl           | 4.3451028e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 530           |
| nupdates           | 1650          |
| policy_entropy     | 8.016438      |
| policy_loss        | 0.0           |
| serial_timesteps   | 211200        |
| time_elapsed       | 444           |
| total_timesteps    | 237600        |
| value_loss         | 0.0006878023  |
--------------------------------------
---------------------------------------
| approxkl           | 7.700526e-06   |
| clipfrac           | 0.0            |
| explained_variance | 1              |
| fps                | 532            |
| nupdates           | 1651           |
| policy_entropy     | 8.020435       |
| policy_loss        | 0.0            |
| serial_timesteps   | 211328         |
| time_elapsed       | 444            |
| total_timesteps    | 237744         |
| value_loss         | 0.000118879536 |
-------------

--------------------------------------
| approxkl           | 2.8462161e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 492           |
| nupdates           | 1667          |
| policy_entropy     | 8.0844        |
| policy_loss        | 0.0           |
| serial_timesteps   | 213376        |
| time_elapsed       | 448           |
| total_timesteps    | 240048        |
| value_loss         | 3.7816972e-16 |
--------------------------------------
-------------------------------------
| approxkl           | 2.564076e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 505          |
| nupdates           | 1668         |
| policy_entropy     | 8.088398     |
| policy_loss        | 0.0          |
| serial_timesteps   | 213504       |
| time_elapsed       | 449          |
| total_timesteps    | 240192       |
| value_loss         | 3.035766e-16 |
-------------------------------------

-------------------------------------
| approxkl           | 2.986606e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 557          |
| nupdates           | 1684         |
| policy_entropy     | 8.152363     |
| policy_loss        | 0.0          |
| serial_timesteps   | 215552       |
| time_elapsed       | 453          |
| total_timesteps    | 242496       |
| value_loss         | 0.0          |
-------------------------------------
-------------------------------------
| approxkl           | 4.061523e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 540          |
| nupdates           | 1685         |
| policy_entropy     | 8.156361     |
| policy_loss        | 0.0          |
| serial_timesteps   | 215680       |
| time_elapsed       | 453          |
| total_timesteps    | 242640       |
| value_loss         | 0.0          |
-------------------------------------
------------

-------------------------------------
| approxkl           | 7.964141e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 536          |
| nupdates           | 1701         |
| policy_entropy     | 8.220325     |
| policy_loss        | 0.0          |
| serial_timesteps   | 217728       |
| time_elapsed       | 457          |
| total_timesteps    | 244944       |
| value_loss         | 0.0          |
-------------------------------------
--------------------------------------
| approxkl           | 4.7672033e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 548           |
| nupdates           | 1702          |
| policy_entropy     | 8.224323      |
| policy_loss        | 0.0           |
| serial_timesteps   | 217856        |
| time_elapsed       | 457           |
| total_timesteps    | 245088        |
| value_loss         | 0.0           |
--------------------------------------

--------------------------------------
| approxkl           | 3.4728605e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 509           |
| nupdates           | 1718          |
| policy_entropy     | 8.288288      |
| policy_loss        | 0.0           |
| serial_timesteps   | 219904        |
| time_elapsed       | 461           |
| total_timesteps    | 247392        |
| value_loss         | 0.0           |
--------------------------------------
--------------------------------------
| approxkl           | 4.4326857e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 511           |
| nupdates           | 1719          |
| policy_entropy     | 8.292286      |
| policy_loss        | 0.0           |
| serial_timesteps   | 220032        |
| time_elapsed       | 462           |
| total_timesteps    | 247536        |
| value_loss         | 0.0           |
-------------------------

--------------------------------------
| approxkl           | 3.6536776e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 533           |
| nupdates           | 1735          |
| policy_entropy     | 8.356251      |
| policy_loss        | 0.0           |
| serial_timesteps   | 222080        |
| time_elapsed       | 466           |
| total_timesteps    | 249840        |
| value_loss         | 0.0           |
--------------------------------------
--------------------------------------
| approxkl           | 4.6713612e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 531           |
| nupdates           | 1736          |
| policy_entropy     | 8.360249      |
| policy_loss        | 0.0           |
| serial_timesteps   | 222208        |
| time_elapsed       | 466           |
| total_timesteps    | 249984        |
| value_loss         | 0.0           |
-------------------------

--------------------------------------
| approxkl           | 5.7243797e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 515           |
| nupdates           | 1752          |
| policy_entropy     | 8.424213      |
| policy_loss        | 0.0           |
| serial_timesteps   | 224256        |
| time_elapsed       | 470           |
| total_timesteps    | 252288        |
| value_loss         | 0.0           |
--------------------------------------
--------------------------------------
| approxkl           | 3.2491432e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 528           |
| nupdates           | 1753          |
| policy_entropy     | 8.428211      |
| policy_loss        | 0.0           |
| serial_timesteps   | 224384        |
| time_elapsed       | 470           |
| total_timesteps    | 252432        |
| value_loss         | 0.0           |
-------------------------

--------------------------------------
| approxkl           | 4.2638994e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 492           |
| nupdates           | 1769          |
| policy_entropy     | 8.492176      |
| policy_loss        | 0.0           |
| serial_timesteps   | 226432        |
| time_elapsed       | 475           |
| total_timesteps    | 254736        |
| value_loss         | 0.0           |
--------------------------------------
--------------------------------------
| approxkl           | 3.4448747e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 454           |
| nupdates           | 1770          |
| policy_entropy     | 8.496174      |
| policy_loss        | 0.0           |
| serial_timesteps   | 226560        |
| time_elapsed       | 475           |
| total_timesteps    | 254880        |
| value_loss         | 0.0           |
-------------------------

--------------------------------------
| approxkl           | 6.8917298e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 456           |
| nupdates           | 1786          |
| policy_entropy     | 8.560139      |
| policy_loss        | 0.0           |
| serial_timesteps   | 228608        |
| time_elapsed       | 479           |
| total_timesteps    | 257184        |
| value_loss         | 0.0           |
--------------------------------------
--------------------------------------
| approxkl           | 6.3795005e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 509           |
| nupdates           | 1787          |
| policy_entropy     | 8.5641365     |
| policy_loss        | 0.0           |
| serial_timesteps   | 228736        |
| time_elapsed       | 480           |
| total_timesteps    | 257328        |
| value_loss         | 0.0           |
-------------------------

-------------------------------------
| approxkl           | 6.900549e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 519          |
| nupdates           | 1803         |
| policy_entropy     | 8.628101     |
| policy_loss        | 0.0          |
| serial_timesteps   | 230784       |
| time_elapsed       | 484          |
| total_timesteps    | 259632       |
| value_loss         | 0.0          |
-------------------------------------
-------------------------------------
| approxkl           | 5.234184e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 497          |
| nupdates           | 1804         |
| policy_entropy     | 8.632099     |
| policy_loss        | 0.0          |
| serial_timesteps   | 230912       |
| time_elapsed       | 484          |
| total_timesteps    | 259776       |
| value_loss         | 0.0          |
-------------------------------------
------------

-------------------------------------
| approxkl           | 7.265217e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 416          |
| nupdates           | 1820         |
| policy_entropy     | 8.696064     |
| policy_loss        | 0.0          |
| serial_timesteps   | 232960       |
| time_elapsed       | 489          |
| total_timesteps    | 262080       |
| value_loss         | 0.0          |
-------------------------------------
-------------------------------------
| approxkl           | 4.330778e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 455          |
| nupdates           | 1821         |
| policy_entropy     | 8.700062     |
| policy_loss        | 0.0          |
| serial_timesteps   | 233088       |
| time_elapsed       | 489          |
| total_timesteps    | 262224       |
| value_loss         | 0.0          |
-------------------------------------
------------

--------------------------------------
| approxkl           | 5.8091823e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 534           |
| nupdates           | 1837          |
| policy_entropy     | 8.764027      |
| policy_loss        | 0.0           |
| serial_timesteps   | 235136        |
| time_elapsed       | 494           |
| total_timesteps    | 264528        |
| value_loss         | 0.0           |
--------------------------------------
--------------------------------------
| approxkl           | 7.8018165e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 544           |
| nupdates           | 1838          |
| policy_entropy     | 8.768024      |
| policy_loss        | 0.0           |
| serial_timesteps   | 235264        |
| time_elapsed       | 494           |
| total_timesteps    | 264672        |
| value_loss         | 0.0           |
-------------------------

--------------------------------------
| approxkl           | 4.8257803e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 514           |
| nupdates           | 1854          |
| policy_entropy     | 8.831989      |
| policy_loss        | 0.0           |
| serial_timesteps   | 237312        |
| time_elapsed       | 499           |
| total_timesteps    | 266976        |
| value_loss         | 0.0           |
--------------------------------------
-------------------------------------
| approxkl           | 4.427491e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 450          |
| nupdates           | 1855         |
| policy_entropy     | 8.835987     |
| policy_loss        | 0.0          |
| serial_timesteps   | 237440       |
| time_elapsed       | 499          |
| total_timesteps    | 267120       |
| value_loss         | 0.0          |
-------------------------------------

--------------------------------------
| approxkl           | 5.1805323e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 526           |
| nupdates           | 1871          |
| policy_entropy     | 8.899952      |
| policy_loss        | 0.0           |
| serial_timesteps   | 239488        |
| time_elapsed       | 503           |
| total_timesteps    | 269424        |
| value_loss         | 0.0           |
--------------------------------------
-------------------------------------
| approxkl           | 5.291903e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 509          |
| nupdates           | 1872         |
| policy_entropy     | 8.90395      |
| policy_loss        | 0.0          |
| serial_timesteps   | 239616       |
| time_elapsed       | 503          |
| total_timesteps    | 269568       |
| value_loss         | 0.0          |
-------------------------------------

-----------------------------------
| approxkl           | 5.1487e-06 |
| clipfrac           | 0.0        |
| explained_variance | 1          |
| fps                | 531        |
| nupdates           | 1888       |
| policy_entropy     | 8.967915   |
| policy_loss        | 0.0        |
| serial_timesteps   | 241664     |
| time_elapsed       | 508        |
| total_timesteps    | 271872     |
| value_loss         | 0.0        |
-----------------------------------
--------------------------------------
| approxkl           | 3.9370093e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 412           |
| nupdates           | 1889          |
| policy_entropy     | 8.971912      |
| policy_loss        | 0.0           |
| serial_timesteps   | 241792        |
| time_elapsed       | 508           |
| total_timesteps    | 272016        |
| value_loss         | 0.0           |
--------------------------------------
-------------------------

--------------------------------------
| approxkl           | 6.9993202e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 421           |
| nupdates           | 1905          |
| policy_entropy     | 9.035877      |
| policy_loss        | 0.0           |
| serial_timesteps   | 243840        |
| time_elapsed       | 512           |
| total_timesteps    | 274320        |
| value_loss         | 0.0           |
--------------------------------------
--------------------------------------
| approxkl           | 5.3274034e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 532           |
| nupdates           | 1906          |
| policy_entropy     | 9.039875      |
| policy_loss        | 0.0           |
| serial_timesteps   | 243968        |
| time_elapsed       | 513           |
| total_timesteps    | 274464        |
| value_loss         | 0.0           |
-------------------------

-------------------------------------
| approxkl           | 5.707495e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 533          |
| nupdates           | 1922         |
| policy_entropy     | 9.10384      |
| policy_loss        | 0.0          |
| serial_timesteps   | 246016       |
| time_elapsed       | 517          |
| total_timesteps    | 276768       |
| value_loss         | 0.0          |
-------------------------------------
-------------------------------------
| approxkl           | 3.379014e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 503          |
| nupdates           | 1923         |
| policy_entropy     | 9.107838     |
| policy_loss        | 0.0          |
| serial_timesteps   | 246144       |
| time_elapsed       | 517          |
| total_timesteps    | 276912       |
| value_loss         | 0.0          |
-------------------------------------
------------

--------------------------------------
| approxkl           | 2.9453693e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1             |
| fps                | 520           |
| nupdates           | 1939          |
| policy_entropy     | 9.1718025     |
| policy_loss        | 0.0           |
| serial_timesteps   | 248192        |
| time_elapsed       | 521           |
| total_timesteps    | 279216        |
| value_loss         | 0.0           |
--------------------------------------
-------------------------------------
| approxkl           | 4.047494e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1            |
| fps                | 534          |
| nupdates           | 1940         |
| policy_entropy     | 9.1758       |
| policy_loss        | 0.0          |
| serial_timesteps   | 248320       |
| time_elapsed       | 522          |
| total_timesteps    | 279360       |
| value_loss         | 0.0          |
-------------------------------------

In [353]:
obs = env.reset()
for i in range(1000):
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render()


NotImplementedError: 

In [None]:
model.save("halite")
del model
model = PPO2.load("halite", env=env)


In [323]:
env = gym.make("PongNoFrameskip-v4")
env.reset()
env.step(env.action_space.sample())

(array([[[  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0],
         ...,
         [109, 118,  43],
         [109, 118,  43],
         [109, 118,  43]],
 
        [[109, 118,  43],
         [109, 118,  43],
         [109, 118,  43],
         ...,
         [109, 118,  43],
         [109, 118,  43],
         [109, 118,  43]],
 
        [[109, 118,  43],
         [109, 118,  43],
         [109, 118,  43],
         ...,
         [109, 118,  43],
         [109, 118,  43],
         [109, 118,  43]],
 
        ...,
 
        [[ 53,  95,  24],
         [ 53,  95,  24],
         [ 53,  95,  24],
         ...,
         [ 53,  95,  24],
         [ 53,  95,  24],
         [ 53,  95,  24]],
 
        [[ 53,  95,  24],
         [ 53,  95,  24],
         [ 53,  95,  24],
         ...,
         [ 53,  95,  24],
         [ 53,  95,  24],
         [ 53,  95,  24]],
 
        [[ 53,  95,  24],
         [ 53,  95,  24],
         [ 53,  95,  24],
         ...,
         [ 53,  95,  24],
  

In [None]:
import time
from random import uniform
from IPython.display import display, clear_output

i = 1
for _ in range(200):
    clear_output(wait=True)
    display('Iteration '+str(i)+' Score: '+str(uniform(0, 1)))
    time.sleep(0.1)
    i += 1