A template of the RL training

In [1]:
import numpy as np
import pandas as pd

from pathlib import Path
from datetime import datetime

from utils import print_log

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# load the created dataset
dataset_folder_path = Path("dataset", "20250707", "split")

In [4]:
# copied from 03_data_split.ipynb

# Helper functions for the new split folder structure
def load_split_data_from_folder(split_folder, split_type='train'):
    """Load aggregate data from split folder"""
    segments = []
    with open(split_folder / f'{split_type}_segments.txt', 'r') as f:
        for line in f:
            start_str, end_str = line.strip().split(' - ')
            start = datetime.fromisoformat(start_str)
            end = datetime.fromisoformat(end_str)
            segments.append((start, end))
    
    df = pd.read_pickle(split_folder / f'{split_type}_aggregate_df.pkl')
    return segments, df

def load_signatures_from_split_folder(split_folder, split_type, appliance):
    """Load appliance signatures from split folder"""
    sig_path = split_folder / 'load_signature_library' / split_type / appliance / 'load_signatures.pkl'
    ranges_path = split_folder / 'load_signature_library' / split_type / appliance / 'selected_ranges.txt'
    
    if not sig_path.exists():
        return pd.DataFrame(), []
    
    signatures_df = pd.read_pickle(sig_path)
    ranges = []
    if ranges_path.exists():
        with open(ranges_path, 'r') as f:
            for line in f:
                start, end = map(int, line.strip().split(','))
                ranges.append((start, end))
    
    return signatures_df, ranges

In [5]:
aggregate_load_segments_train, aggregate_load_df_train = load_split_data_from_folder(dataset_folder_path, 'train')
aggregate_load_segments_test, aggregate_load_df_test = load_split_data_from_folder(dataset_folder_path, 'test') 
aggregate_load_segments_validation, aggregate_load_df_validation = load_split_data_from_folder(dataset_folder_path, 'val')

In [6]:
aggregate_load_df_train

Unnamed: 0,timestamp,aggregate,datetime,washing_machine,dishwasher,fridge,kettle,microwave,toaster,tv,htpc,gas_oven,kitchen_lights
0,1.357603e+09,234.0,2013-01-08 00:00:05+00:00,0.0,1.0,0.0,1.0,1.0,0.0,1.0,69.0,,0.0
1,1.357603e+09,231.0,2013-01-08 00:00:11+00:00,0.0,1.0,0.0,1.0,1.0,0.0,1.0,70.0,,0.0
2,1.357603e+09,234.0,2013-01-08 00:00:17+00:00,0.0,1.0,0.0,1.0,1.0,0.0,1.0,70.0,,0.0
3,1.357603e+09,232.0,2013-01-08 00:00:23+00:00,0.0,1.0,0.0,1.0,1.0,0.0,1.0,68.0,,0.0
4,1.357603e+09,232.0,2013-01-08 00:00:30+00:00,0.0,1.0,0.0,1.0,1.0,0.0,1.0,70.0,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2231636,1.388448e+09,178.0,2013-12-30 23:59:35+00:00,0.0,1.0,0.0,1.0,1.0,0.0,1.0,2.0,3.0,0.0
2231637,1.388448e+09,177.0,2013-12-30 23:59:41+00:00,0.0,1.0,0.0,1.0,1.0,0.0,1.0,2.0,3.0,0.0
2231638,1.388448e+09,178.0,2013-12-30 23:59:47+00:00,0.0,1.0,0.0,1.0,1.0,0.0,1.0,2.0,3.0,0.0
2231639,1.388448e+09,178.0,2013-12-30 23:59:53+00:00,0.0,1.0,0.0,1.0,1.0,0.0,1.0,2.0,3.0,0.0


In [7]:
aggregate_load_segments_train

[(datetime.datetime(2013, 1, 8, 0, 0, tzinfo=datetime.timezone.utc),
  datetime.datetime(2013, 1, 10, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc)),
 (datetime.datetime(2013, 2, 27, 0, 0, tzinfo=datetime.timezone.utc),
  datetime.datetime(2013, 2, 28, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc)),
 (datetime.datetime(2013, 3, 8, 0, 0, tzinfo=datetime.timezone.utc),
  datetime.datetime(2013, 3, 10, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc)),
 (datetime.datetime(2013, 3, 28, 0, 0, tzinfo=datetime.timezone.utc),
  datetime.datetime(2013, 3, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc)),
 (datetime.datetime(2013, 3, 22, 0, 0, tzinfo=datetime.timezone.utc),
  datetime.datetime(2013, 3, 26, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc)),
 (datetime.datetime(2013, 4, 8, 0, 0, tzinfo=datetime.timezone.utc),
  datetime.datetime(2013, 4, 10, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc)),
 (datetime.datetime(2013, 4, 26, 0, 0, tzinfo=datetime.timezone.utc),
  d

In [8]:
from rl_env.env_data_loader import SmartMeterDataLoader

sm_dl = SmartMeterDataLoader(
    aggregate_load_segments=aggregate_load_segments_train,
    aggregate_load_df=aggregate_load_df_train
)

sm_dl.get_divided_segments_length()

162

In [9]:
sm_dl.divided_segments[7]

array([datetime.datetime(2013, 3, 10, 0, 0, tzinfo=datetime.timezone.utc),
       datetime.datetime(2013, 3, 10, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc)],
      dtype=object)

In [10]:
load_segment = sm_dl.get_aggregate_load_segment(13)

load_segment

Unnamed: 0,timestamp,aggregate,datetime
104747,1.363997e+09,335.0,2013-03-23 00:00:05+00:00
104748,1.363997e+09,336.0,2013-03-23 00:00:11+00:00
104749,1.363997e+09,333.0,2013-03-23 00:00:17+00:00
104750,1.363997e+09,334.0,2013-03-23 00:00:24+00:00
104751,1.363997e+09,331.0,2013-03-23 00:00:30+00:00
...,...,...,...
118501,1.364083e+09,179.0,2013-03-23 23:59:30+00:00
118502,1.364083e+09,171.0,2013-03-23 23:59:37+00:00
118503,1.364083e+09,171.0,2013-03-23 23:59:43+00:00
118504,1.364083e+09,171.0,2013-03-23 23:59:49+00:00


In [11]:
load_segment['datetime'] = pd.to_datetime(load_segment['timestamp'], unit='s')

load_segment

Unnamed: 0,timestamp,aggregate,datetime
104747,1.363997e+09,335.0,2013-03-23 00:00:05
104748,1.363997e+09,336.0,2013-03-23 00:00:11
104749,1.363997e+09,333.0,2013-03-23 00:00:17
104750,1.363997e+09,334.0,2013-03-23 00:00:24
104751,1.363997e+09,331.0,2013-03-23 00:00:30
...,...,...,...
118501,1.364083e+09,179.0,2013-03-23 23:59:30
118502,1.364083e+09,171.0,2013-03-23 23:59:37
118503,1.364083e+09,171.0,2013-03-23 23:59:43
118504,1.364083e+09,171.0,2013-03-23 23:59:49


(Optional) Load the pre-trained H-network and related components

In final product, the H-network should be trained along with the DDQL/PPO agent

In [12]:
import torch
from model.H_network.h_network import HNetwork

h_network_datetime = datetime(2025, 7, 12)

h_network_path = Path("model_trained", f"h_network_{h_network_datetime.strftime('%Y%m%d')}.pth")

h_network = HNetwork(2, 44, 1)
h_network.load_state_dict(torch.load(h_network_path))
h_network.eval()

HNetwork(
  (LSTM_1): LSTM(2, 44, batch_first=True, bidirectional=True)
  (ac1): Tanh()
  (LSTM_2): LSTM(88, 1, batch_first=True, bidirectional=True)
  (ac2): Tanh()
  (fc): Linear(in_features=2, out_features=1, bias=True)
)

In [13]:
h_network_stdscaler_path = Path("model_trained", f"h_network_standardscaler_{h_network_datetime.strftime('%Y%m%d')}.pkl")
import joblib
h_network_stdscaler = joblib.load(h_network_stdscaler_path)

Create the environment

In [14]:
import sys
sys.path.append(str(Path('rl_env')))

from rl_env.hrl_env import SmartMeterWorld

env = SmartMeterWorld(
    aggregate_load_df=load_segment,
    render_mode="human",
)

env.set_h_network(h_network)
env.set_h_network_stdscaler(h_network_stdscaler)

[2025-07-12 02:01:26:508] [SmartMeterWorld] Render mode set to 'human'. Render server at 127.0.0.1:50007. render_connected: True. render_client_socket: <socket.socket fd=69, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=0, laddr=('127.0.0.1', 35610), raddr=('127.0.0.1', 50007)>


In [15]:
from gymnasium.utils.env_checker import check_env

# This will catch many common issues
try:
    check_env(env)
    print("Environment passes all checks!")
except Exception as e:
    print(f"Environment has issues: {e}")

Environment passes all checks!


  logger.warn(
  logger.warn(


In [16]:
obs = env.reset()
obs

({'aggregate_load': array([335.], dtype=float32),
  'battery_soc': array([0.05382102], dtype=float32),
  'timestamp_features': array([-0.5       ,  0.33333334, -0.25      ], dtype=float32)},
 {})

In [17]:
env.reset_render_window()

In [18]:
# initialize a PPO agent
from stable_baselines3 import PPO

rl_datetime = datetime.now()
tensorboard_log_path = Path("rl_model", "PPO", f"{rl_datetime.strftime('%Y%m%d_%H%M%S')}")

rl_model = PPO(
    "MultiInputPolicy", 
    env, 
    verbose=2,
    tensorboard_log=tensorboard_log_path
)

rl_model.learn(
    total_timesteps=300000,
    progress_bar=True,
    tb_log_name="PPO_SmartMeterWorld"
)

2025-07-12 02:01:26.833878: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-12 02:01:26.841661: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752282086.850399  309627 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752282086.852950  309627 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1752282086.859580  309627 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to rl_model/PPO/20250712_020127/PPO_SmartMeterWorld_1


Output()

-----------------------------
| time/              |      |
|    fps             | 101  |
|    iterations      | 1    |
|    time_elapsed    | 20   |
|    total_timesteps | 2048 |
-----------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 85          |
|    iterations           | 2           |
|    time_elapsed         | 47          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.002185715 |
|    clip_fraction        | 0.000586    |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.42       |
|    explained_variance   | 2.98e-06    |
|    learning_rate        | 0.0003      |
|    loss                 | 9.66e+05    |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.000123   |
|    std                  | 0.992       |
|    value_loss           | 1.99e+06    |
-----------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 77           |
|    iterations           | 3            |
|    time_elapsed         | 78           |
|    total_timesteps      | 6144         |
| train/                  |              |
|    approx_kl            | 0.0044515515 |
|    clip_fraction        | 0.0171       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.41        |
|    explained_variance   | 3.58e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 6.7e+06      |
|    n_updates            | 20           |
|    policy_gradient_loss | -0.00293     |
|    std                  | 0.991        |
|    value_loss           | 1.38e+07     |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 70            |
|    iterations           | 4             |
|    time_elapsed         | 115           |
|    total_timesteps      | 8192          |
| train/                  |               |
|    approx_kl            | 0.00014618429 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.41         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.08e+07      |
|    n_updates            | 30            |
|    policy_gradient_loss | -5.23e-05     |
|    std                  | 0.993         |
|    value_loss           | 4.21e+07      |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 65           |
|    iterations           | 5            |
|    time_elapsed         | 156          |
|    total_timesteps      | 10240        |
| train/                  |              |
|    approx_kl            | 0.0012805604 |
|    clip_fraction        | 9.77e-05     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.41        |
|    explained_variance   | 5.96e-08     |
|    learning_rate        | 0.0003       |
|    loss                 | 4.08e+07     |
|    n_updates            | 40           |
|    policy_gradient_loss | -0.000799    |
|    std                  | 0.995        |
|    value_loss           | 8.2e+07      |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 60            |
|    iterations           | 6             |
|    time_elapsed         | 202           |
|    total_timesteps      | 12288         |
| train/                  |               |
|    approx_kl            | 0.00013962394 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.41         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 6.82e+07      |
|    n_updates            | 50            |
|    policy_gradient_loss | -0.000103     |
|    std                  | 0.996         |
|    value_loss           | 1.34e+08      |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.53e+06    |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 7           |
|    time_elapsed         | 243         |
|    total_timesteps      | 14336       |
| train/                  |             |
|    approx_kl            | 0.000817201 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.41       |
|    explained_variance   | -4.77e-07   |
|    learning_rate        | 0.0003      |
|    loss                 | 1.28e+08    |
|    n_updates            | 60          |
|    policy_gradient_loss | -0.000796   |
|    std                  | 0.996       |
|    value_loss           | 2.49e+08    |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.53e+06      |
| time/                   |               |
|    fps                  | 61            |
|    iterations           | 8             |
|    time_elapsed         | 265           |
|    total_timesteps      | 16384         |
| train/                  |               |
|    approx_kl            | 1.1795288e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.41         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 1.43e+08      |
|    n_updates            | 70            |
|    policy_gradient_loss | 4.26e-06      |
|    std                  | 0.995         |
|    value_loss           | 2.63e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.53e+06     |
| time/                   |              |
|    fps                  | 62           |
|    iterations           | 9            |
|    time_elapsed         | 293          |
|    total_timesteps      | 18432        |
| train/                  |              |
|    approx_kl            | 0.0022236183 |
|    clip_fraction        | 0.000635     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.41        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.92e+06     |
|    n_updates            | 80           |
|    policy_gradient_loss | -0.000182    |
|    std                  | 0.989        |
|    value_loss           | 3.62e+06     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.53e+06     |
| time/                   |              |
|    fps                  | 62           |
|    iterations           | 10           |
|    time_elapsed         | 326          |
|    total_timesteps      | 20480        |
| train/                  |              |
|    approx_kl            | 0.0033178078 |
|    clip_fraction        | 0.00176      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.41        |
|    explained_variance   | 2.38e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 8.45e+06     |
|    n_updates            | 90           |
|    policy_gradient_loss | -0.00101     |
|    std                  | 0.985        |
|    value_loss           | 1.75e+07     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.53e+06      |
| time/                   |               |
|    fps                  | 61            |
|    iterations           | 11            |
|    time_elapsed         | 364           |
|    total_timesteps      | 22528         |
| train/                  |               |
|    approx_kl            | 0.00032366396 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.4          |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.55e+07      |
|    n_updates            | 100           |
|    policy_gradient_loss | -0.000127     |
|    std                  | 0.986         |
|    value_loss           | 5.04e+07      |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.53e+06    |
| time/                   |             |
|    fps                  | 60          |
|    iterations           | 12          |
|    time_elapsed         | 407         |
|    total_timesteps      | 24576       |
| train/                  |             |
|    approx_kl            | 0.000580244 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.41       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 4.71e+07    |
|    n_updates            | 110         |
|    policy_gradient_loss | -0.000325   |
|    std                  | 0.987       |
|    value_loss           | 9.45e+07    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.53e+06    |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 13          |
|    time_elapsed         | 454         |
|    total_timesteps      | 26624       |
| train/                  |             |
|    approx_kl            | 0.003162625 |
|    clip_fraction        | 0.00161     |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.41       |
|    explained_variance   | 5.96e-08    |
|    learning_rate        | 0.0003      |
|    loss                 | 7.97e+07    |
|    n_updates            | 120         |
|    policy_gradient_loss | -0.00261    |
|    std                  | 0.986       |
|    value_loss           | 1.58e+08    |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.5e+06       |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 14            |
|    time_elapsed         | 487           |
|    total_timesteps      | 28672         |
| train/                  |               |
|    approx_kl            | 0.00042108534 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.4          |
|    explained_variance   | -3.58e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 1.47e+08      |
|    n_updates            | 130           |
|    policy_gradient_loss | -0.000476     |
|    std                  | 0.986         |
|    value_loss           | 2.92e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.5e+06      |
| time/                   |              |
|    fps                  | 59           |
|    iterations           | 15           |
|    time_elapsed         | 512          |
|    total_timesteps      | 30720        |
| train/                  |              |
|    approx_kl            | 0.0005397114 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.41        |
|    explained_variance   | -2.38e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 9.23e+07     |
|    n_updates            | 140          |
|    policy_gradient_loss | -0.000236    |
|    std                  | 0.987        |
|    value_loss           | 1.66e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.5e+06      |
| time/                   |              |
|    fps                  | 60           |
|    iterations           | 16           |
|    time_elapsed         | 541          |
|    total_timesteps      | 32768        |
| train/                  |              |
|    approx_kl            | 0.0011100131 |
|    clip_fraction        | 4.88e-05     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.41        |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 3.23e+06     |
|    n_updates            | 150          |
|    policy_gradient_loss | -6.03e-05    |
|    std                  | 0.99         |
|    value_loss           | 6.53e+06     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.5e+06       |
| time/                   |               |
|    fps                  | 60            |
|    iterations           | 17            |
|    time_elapsed         | 576           |
|    total_timesteps      | 34816         |
| train/                  |               |
|    approx_kl            | 0.00091891247 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.41         |
|    explained_variance   | 1.79e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 1.2e+07       |
|    n_updates            | 160           |
|    policy_gradient_loss | -0.000242     |
|    std                  | 0.991         |
|    value_loss           | 2.48e+07      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.5e+06      |
| time/                   |              |
|    fps                  | 59           |
|    iterations           | 18           |
|    time_elapsed         | 616          |
|    total_timesteps      | 36864        |
| train/                  |              |
|    approx_kl            | 6.291916e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.41        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 3.23e+07     |
|    n_updates            | 170          |
|    policy_gradient_loss | -3.52e-05    |
|    std                  | 0.988        |
|    value_loss           | 6.12e+07     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.5e+06       |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 19            |
|    time_elapsed         | 660           |
|    total_timesteps      | 38912         |
| train/                  |               |
|    approx_kl            | 0.00015520694 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.41         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 5.62e+07      |
|    n_updates            | 180           |
|    policy_gradient_loss | -3.08e-05     |
|    std                  | 0.988         |
|    value_loss           | 1.07e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.5e+06       |
| time/                   |               |
|    fps                  | 57            |
|    iterations           | 20            |
|    time_elapsed         | 709           |
|    total_timesteps      | 40960         |
| train/                  |               |
|    approx_kl            | 5.8998237e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.41         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 9.46e+07      |
|    n_updates            | 190           |
|    policy_gradient_loss | -2.95e-05     |
|    std                  | 0.987         |
|    value_loss           | 1.86e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.47e+06      |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 21            |
|    time_elapsed         | 734           |
|    total_timesteps      | 43008         |
| train/                  |               |
|    approx_kl            | 0.00014870014 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.41         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 1.65e+08      |
|    n_updates            | 200           |
|    policy_gradient_loss | -0.000151     |
|    std                  | 0.988         |
|    value_loss           | 3.24e+08      |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.47e+06    |
| time/                   |             |
|    fps                  | 59          |
|    iterations           | 22          |
|    time_elapsed         | 759         |
|    total_timesteps      | 45056       |
| train/                  |             |
|    approx_kl            | 0.003677243 |
|    clip_fraction        | 0.00366     |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.41       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 3.74e+07    |
|    n_updates            | 210         |
|    policy_gradient_loss | -0.000925   |
|    std                  | 0.987       |
|    value_loss           | 5.73e+07    |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.47e+06     |
| time/                   |              |
|    fps                  | 59           |
|    iterations           | 23           |
|    time_elapsed         | 791          |
|    total_timesteps      | 47104        |
| train/                  |              |
|    approx_kl            | 0.0041590547 |
|    clip_fraction        | 0.0122       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.4         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 5.3e+06      |
|    n_updates            | 220          |
|    policy_gradient_loss | -0.00152     |
|    std                  | 0.983        |
|    value_loss           | 1.03e+07     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.47e+06     |
| time/                   |              |
|    fps                  | 59           |
|    iterations           | 24           |
|    time_elapsed         | 826          |
|    total_timesteps      | 49152        |
| train/                  |              |
|    approx_kl            | 8.176401e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.4         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.78e+07     |
|    n_updates            | 230          |
|    policy_gradient_loss | 2.87e-05     |
|    std                  | 0.984        |
|    value_loss           | 3.52e+07     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.47e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 25           |
|    time_elapsed         | 868          |
|    total_timesteps      | 51200        |
| train/                  |              |
|    approx_kl            | 0.0015993996 |
|    clip_fraction        | 0.000146     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.4         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 3.78e+07     |
|    n_updates            | 240          |
|    policy_gradient_loss | -0.000794    |
|    std                  | 0.986        |
|    value_loss           | 7.48e+07     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.47e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 26           |
|    time_elapsed         | 914          |
|    total_timesteps      | 53248        |
| train/                  |              |
|    approx_kl            | 0.0017706419 |
|    clip_fraction        | 4.88e-05     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.4         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 5.9e+07      |
|    n_updates            | 250          |
|    policy_gradient_loss | -0.0011      |
|    std                  | 0.985        |
|    value_loss           | 1.2e+08      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.47e+06      |
| time/                   |               |
|    fps                  | 57            |
|    iterations           | 27            |
|    time_elapsed         | 959           |
|    total_timesteps      | 55296         |
| train/                  |               |
|    approx_kl            | 7.1439717e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.4          |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 1.12e+08      |
|    n_updates            | 260           |
|    policy_gradient_loss | 5.93e-06      |
|    std                  | 0.986         |
|    value_loss           | 2.23e+08      |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.47e+06    |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 28          |
|    time_elapsed         | 981         |
|    total_timesteps      | 57344       |
| train/                  |             |
|    approx_kl            | 0.001700358 |
|    clip_fraction        | 4.88e-05    |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.4        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 1.49e+08    |
|    n_updates            | 270         |
|    policy_gradient_loss | -0.00165    |
|    std                  | 0.984       |
|    value_loss           | 3.07e+08    |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.47e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 29           |
|    time_elapsed         | 1008         |
|    total_timesteps      | 59392        |
| train/                  |              |
|    approx_kl            | 0.0007111801 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.4         |
|    explained_variance   | 5.96e-08     |
|    learning_rate        | 0.0003       |
|    loss                 | 1.18e+06     |
|    n_updates            | 280          |
|    policy_gradient_loss | 8.34e-05     |
|    std                  | 0.981        |
|    value_loss           | 2.43e+06     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.47e+06     |
| time/                   |              |
|    fps                  | 59           |
|    iterations           | 30           |
|    time_elapsed         | 1040         |
|    total_timesteps      | 61440        |
| train/                  |              |
|    approx_kl            | 6.659326e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.4         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 7.08e+06     |
|    n_updates            | 290          |
|    policy_gradient_loss | 5.81e-05     |
|    std                  | 0.98         |
|    value_loss           | 1.38e+07     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.47e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 31           |
|    time_elapsed         | 1077         |
|    total_timesteps      | 63488        |
| train/                  |              |
|    approx_kl            | 0.0006252627 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.4         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.02e+07     |
|    n_updates            | 300          |
|    policy_gradient_loss | -0.00026     |
|    std                  | 0.979        |
|    value_loss           | 4.27e+07     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.47e+06      |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 32            |
|    time_elapsed         | 1119          |
|    total_timesteps      | 65536         |
| train/                  |               |
|    approx_kl            | 0.00019092354 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.4          |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 4.34e+07      |
|    n_updates            | 310           |
|    policy_gradient_loss | -7.6e-05      |
|    std                  | 0.978         |
|    value_loss           | 8.33e+07      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.47e+06      |
| time/                   |               |
|    fps                  | 57            |
|    iterations           | 33            |
|    time_elapsed         | 1166          |
|    total_timesteps      | 67584         |
| train/                  |               |
|    approx_kl            | 0.00013386927 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.4          |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 7.36e+07      |
|    n_updates            | 320           |
|    policy_gradient_loss | -5.77e-05     |
|    std                  | 0.978         |
|    value_loss           | 1.4e+08       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.46e+06     |
| time/                   |              |
|    fps                  | 57           |
|    iterations           | 34           |
|    time_elapsed         | 1203         |
|    total_timesteps      | 69632        |
| train/                  |              |
|    approx_kl            | 1.604014e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.4         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.33e+08     |
|    n_updates            | 330          |
|    policy_gradient_loss | 2.41e-06     |
|    std                  | 0.978        |
|    value_loss           | 2.67e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.46e+06      |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 35            |
|    time_elapsed         | 1226          |
|    total_timesteps      | 71680         |
| train/                  |               |
|    approx_kl            | 0.00010511567 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.4          |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 1.11e+08      |
|    n_updates            | 340           |
|    policy_gradient_loss | -4.56e-05     |
|    std                  | 0.979         |
|    value_loss           | 2.2e+08       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.46e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 36           |
|    time_elapsed         | 1255         |
|    total_timesteps      | 73728        |
| train/                  |              |
|    approx_kl            | 0.0033812611 |
|    clip_fraction        | 0.00439      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.48e+06     |
|    n_updates            | 350          |
|    policy_gradient_loss | -0.000998    |
|    std                  | 0.966        |
|    value_loss           | 4.97e+06     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.46e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 37           |
|    time_elapsed         | 1289         |
|    total_timesteps      | 75776        |
| train/                  |              |
|    approx_kl            | 0.0033204397 |
|    clip_fraction        | 0.00381      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.38        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 9.94e+06     |
|    n_updates            | 360          |
|    policy_gradient_loss | -0.00134     |
|    std                  | 0.965        |
|    value_loss           | 2.13e+07     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.46e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 38           |
|    time_elapsed         | 1327         |
|    total_timesteps      | 77824        |
| train/                  |              |
|    approx_kl            | 0.0027977545 |
|    clip_fraction        | 0.00151      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.38        |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 2.8e+07      |
|    n_updates            | 370          |
|    policy_gradient_loss | -0.00142     |
|    std                  | 0.965        |
|    value_loss           | 5.75e+07     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.46e+06      |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 39            |
|    time_elapsed         | 1370          |
|    total_timesteps      | 79872         |
| train/                  |               |
|    approx_kl            | 0.00021130446 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.38         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 4.96e+07      |
|    n_updates            | 380           |
|    policy_gradient_loss | -0.00018      |
|    std                  | 0.968         |
|    value_loss           | 1.01e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.46e+06     |
| time/                   |              |
|    fps                  | 57           |
|    iterations           | 40           |
|    time_elapsed         | 1418         |
|    total_timesteps      | 81920        |
| train/                  |              |
|    approx_kl            | 0.0019729175 |
|    clip_fraction        | 0.000439     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 8.61e+07     |
|    n_updates            | 390          |
|    policy_gradient_loss | -0.00143     |
|    std                  | 0.968        |
|    value_loss           | 1.71e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.46e+06     |
| time/                   |              |
|    fps                  | 57           |
|    iterations           | 41           |
|    time_elapsed         | 1447         |
|    total_timesteps      | 83968        |
| train/                  |              |
|    approx_kl            | 4.803005e-07 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.58e+08     |
|    n_updates            | 400          |
|    policy_gradient_loss | 7.24e-06     |
|    std                  | 0.968        |
|    value_loss           | 3.09e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.46e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 42           |
|    time_elapsed         | 1472         |
|    total_timesteps      | 86016        |
| train/                  |              |
|    approx_kl            | 0.0021688761 |
|    clip_fraction        | 0.000635     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 5.03e+07     |
|    n_updates            | 410          |
|    policy_gradient_loss | -0.000898    |
|    std                  | 0.968        |
|    value_loss           | 1.16e+08     |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.46e+06    |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 43          |
|    time_elapsed         | 1502        |
|    total_timesteps      | 88064       |
| train/                  |             |
|    approx_kl            | 0.001755296 |
|    clip_fraction        | 0.000342    |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 3.58e+06    |
|    n_updates            | 420         |
|    policy_gradient_loss | -0.000227   |
|    std                  | 0.965       |
|    value_loss           | 6.92e+06    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.46e+06    |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 44          |
|    time_elapsed         | 1537        |
|    total_timesteps      | 90112       |
| train/                  |             |
|    approx_kl            | 0.004612257 |
|    clip_fraction        | 0.0185      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 1.33e+07    |
|    n_updates            | 430         |
|    policy_gradient_loss | -0.00294    |
|    std                  | 0.963       |
|    value_loss           | 2.57e+07    |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.46e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 45           |
|    time_elapsed         | 1577         |
|    total_timesteps      | 92160        |
| train/                  |              |
|    approx_kl            | 0.0010743912 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.38        |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 2.97e+07     |
|    n_updates            | 440          |
|    policy_gradient_loss | -0.000456    |
|    std                  | 0.965        |
|    value_loss           | 5.89e+07     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.46e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 46           |
|    time_elapsed         | 1622         |
|    total_timesteps      | 94208        |
| train/                  |              |
|    approx_kl            | 3.749726e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.38        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 4.84e+07     |
|    n_updates            | 450          |
|    policy_gradient_loss | -8.08e-06    |
|    std                  | 0.965        |
|    value_loss           | 1e+08        |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.46e+06      |
| time/                   |               |
|    fps                  | 57            |
|    iterations           | 47            |
|    time_elapsed         | 1672          |
|    total_timesteps      | 96256         |
| train/                  |               |
|    approx_kl            | 7.9318415e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.38         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 9.63e+07      |
|    n_updates            | 460           |
|    policy_gradient_loss | 1.01e-05      |
|    std                  | 0.966         |
|    value_loss           | 1.84e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.41e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 48           |
|    time_elapsed         | 1694         |
|    total_timesteps      | 98304        |
| train/                  |              |
|    approx_kl            | 6.651302e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.38        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.58e+08     |
|    n_updates            | 470          |
|    policy_gradient_loss | 1.22e-05     |
|    std                  | 0.966        |
|    value_loss           | 3.15e+08     |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.41e+06    |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 49          |
|    time_elapsed         | 1721        |
|    total_timesteps      | 100352      |
| train/                  |             |
|    approx_kl            | 0.002668358 |
|    clip_fraction        | 0.00566     |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 1.19e-07    |
|    learning_rate        | 0.0003      |
|    loss                 | 1.38e+06    |
|    n_updates            | 480         |
|    policy_gradient_loss | -0.000417   |
|    std                  | 0.969       |
|    value_loss           | 6.2e+06     |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.41e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 50           |
|    time_elapsed         | 1752         |
|    total_timesteps      | 102400       |
| train/                  |              |
|    approx_kl            | 5.808333e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 4.88e+06     |
|    n_updates            | 490          |
|    policy_gradient_loss | 6.69e-05     |
|    std                  | 0.969        |
|    value_loss           | 9.76e+06     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.41e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 51           |
|    time_elapsed         | 1789         |
|    total_timesteps      | 104448       |
| train/                  |              |
|    approx_kl            | 0.0018734607 |
|    clip_fraction        | 9.77e-05     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.68e+07     |
|    n_updates            | 500          |
|    policy_gradient_loss | -0.000638    |
|    std                  | 0.969        |
|    value_loss           | 3.41e+07     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.41e+06      |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 52            |
|    time_elapsed         | 1830          |
|    total_timesteps      | 106496        |
| train/                  |               |
|    approx_kl            | 0.00012732262 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 3.66e+07      |
|    n_updates            | 510           |
|    policy_gradient_loss | -9.26e-05     |
|    std                  | 0.971         |
|    value_loss           | 7.26e+07      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.41e+06     |
| time/                   |              |
|    fps                  | 57           |
|    iterations           | 53           |
|    time_elapsed         | 1876         |
|    total_timesteps      | 108544       |
| train/                  |              |
|    approx_kl            | 8.114439e-07 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 5.99e+07     |
|    n_updates            | 520          |
|    policy_gradient_loss | 1.85e-05     |
|    std                  | 0.971        |
|    value_loss           | 1.23e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.38e+06     |
| time/                   |              |
|    fps                  | 57           |
|    iterations           | 54           |
|    time_elapsed         | 1918         |
|    total_timesteps      | 110592       |
| train/                  |              |
|    approx_kl            | 0.0014139013 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 4.17e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 1.23e+08     |
|    n_updates            | 530          |
|    policy_gradient_loss | -0.00123     |
|    std                  | 0.971        |
|    value_loss           | 2.33e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.38e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 55           |
|    time_elapsed         | 1940         |
|    total_timesteps      | 112640       |
| train/                  |              |
|    approx_kl            | 9.449702e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.36e+08     |
|    n_updates            | 540          |
|    policy_gradient_loss | -5.12e-05    |
|    std                  | 0.971        |
|    value_loss           | 2.6e+08      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.38e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 56           |
|    time_elapsed         | 1968         |
|    total_timesteps      | 114688       |
| train/                  |              |
|    approx_kl            | 0.0053245462 |
|    clip_fraction        | 0.0376       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.59e+06     |
|    n_updates            | 550          |
|    policy_gradient_loss | -0.00341     |
|    std                  | 0.965        |
|    value_loss           | 3e+06        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.38e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 57           |
|    time_elapsed         | 2000         |
|    total_timesteps      | 116736       |
| train/                  |              |
|    approx_kl            | 0.0006322566 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.38        |
|    explained_variance   | -2.38e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 7.9e+06      |
|    n_updates            | 560          |
|    policy_gradient_loss | -0.000131    |
|    std                  | 0.967        |
|    value_loss           | 1.48e+07     |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.38e+06    |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 58          |
|    time_elapsed         | 2039        |
|    total_timesteps      | 118784      |
| train/                  |             |
|    approx_kl            | 2.03097e-05 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 2.24e+07    |
|    n_updates            | 570         |
|    policy_gradient_loss | 2.41e-05    |
|    std                  | 0.968       |
|    value_loss           | 4.5e+07     |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.38e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 59           |
|    time_elapsed         | 2082         |
|    total_timesteps      | 120832       |
| train/                  |              |
|    approx_kl            | 0.0011647637 |
|    clip_fraction        | 4.88e-05     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 4.2e+07      |
|    n_updates            | 580          |
|    policy_gradient_loss | -0.000655    |
|    std                  | 0.968        |
|    value_loss           | 8.46e+07     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.38e+06      |
| time/                   |               |
|    fps                  | 57            |
|    iterations           | 60            |
|    time_elapsed         | 2129          |
|    total_timesteps      | 122880        |
| train/                  |               |
|    approx_kl            | 0.00043454222 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 6.87e+07      |
|    n_updates            | 590           |
|    policy_gradient_loss | -0.000325     |
|    std                  | 0.967         |
|    value_loss           | 1.42e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.36e+06      |
| time/                   |               |
|    fps                  | 57            |
|    iterations           | 61            |
|    time_elapsed         | 2163          |
|    total_timesteps      | 124928        |
| train/                  |               |
|    approx_kl            | 0.00024088557 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | 5.36e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 1.34e+08      |
|    n_updates            | 600           |
|    policy_gradient_loss | -0.000247     |
|    std                  | 0.968         |
|    value_loss           | 2.65e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.36e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 62           |
|    time_elapsed         | 2187         |
|    total_timesteps      | 126976       |
| train/                  |              |
|    approx_kl            | 0.0014311534 |
|    clip_fraction        | 0.000146     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 7.99e+07     |
|    n_updates            | 610          |
|    policy_gradient_loss | -0.000868    |
|    std                  | 0.972        |
|    value_loss           | 1.6e+08      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.36e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 63           |
|    time_elapsed         | 2216         |
|    total_timesteps      | 129024       |
| train/                  |              |
|    approx_kl            | 0.0035263717 |
|    clip_fraction        | 0.00796      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.59e+06     |
|    n_updates            | 620          |
|    policy_gradient_loss | -0.000996    |
|    std                  | 0.968        |
|    value_loss           | 5.02e+06     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.36e+06      |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 64            |
|    time_elapsed         | 2250          |
|    total_timesteps      | 131072        |
| train/                  |               |
|    approx_kl            | 0.00031990928 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | -2.38e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 9.87e+06      |
|    n_updates            | 630           |
|    policy_gradient_loss | -2.53e-05     |
|    std                  | 0.966         |
|    value_loss           | 2.09e+07      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.36e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 65           |
|    time_elapsed         | 2289         |
|    total_timesteps      | 133120       |
| train/                  |              |
|    approx_kl            | 0.0006906834 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | -3.58e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.86e+07     |
|    n_updates            | 640          |
|    policy_gradient_loss | -0.000307    |
|    std                  | 0.967        |
|    value_loss           | 5.73e+07     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.36e+06      |
| time/                   |               |
|    fps                  | 57            |
|    iterations           | 66            |
|    time_elapsed         | 2334          |
|    total_timesteps      | 135168        |
| train/                  |               |
|    approx_kl            | 0.00015869664 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | 5.96e-08      |
|    learning_rate        | 0.0003        |
|    loss                 | 5.33e+07      |
|    n_updates            | 650           |
|    policy_gradient_loss | -0.000102     |
|    std                  | 0.969         |
|    value_loss           | 1.04e+08      |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.36e+06    |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 67          |
|    time_elapsed         | 2382        |
|    total_timesteps      | 137216      |
| train/                  |             |
|    approx_kl            | 0.001029569 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 8.73e+07    |
|    n_updates            | 660         |
|    policy_gradient_loss | -0.000795   |
|    std                  | 0.969       |
|    value_loss           | 1.78e+08    |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.35e+06     |
| time/                   |              |
|    fps                  | 57           |
|    iterations           | 68           |
|    time_elapsed         | 2408         |
|    total_timesteps      | 139264       |
| train/                  |              |
|    approx_kl            | 0.0002893129 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 2.98e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 1.58e+08     |
|    n_updates            | 670          |
|    policy_gradient_loss | -0.000274    |
|    std                  | 0.969        |
|    value_loss           | 3.13e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.35e+06      |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 69            |
|    time_elapsed         | 2433          |
|    total_timesteps      | 141312        |
| train/                  |               |
|    approx_kl            | 0.00039297843 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | 5.96e-08      |
|    learning_rate        | 0.0003        |
|    loss                 | 3.96e+07      |
|    n_updates            | 680           |
|    policy_gradient_loss | 4.98e-05      |
|    std                  | 0.97          |
|    value_loss           | 6.47e+07      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.35e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 70           |
|    time_elapsed         | 2463         |
|    total_timesteps      | 143360       |
| train/                  |              |
|    approx_kl            | 0.0042695613 |
|    clip_fraction        | 0.0139       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 4.79e+06     |
|    n_updates            | 690          |
|    policy_gradient_loss | -0.00167     |
|    std                  | 0.968        |
|    value_loss           | 9.56e+06     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.35e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 71           |
|    time_elapsed         | 2499         |
|    total_timesteps      | 145408       |
| train/                  |              |
|    approx_kl            | 0.0047539817 |
|    clip_fraction        | 0.0157       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.57e+07     |
|    n_updates            | 700          |
|    policy_gradient_loss | -0.00274     |
|    std                  | 0.967        |
|    value_loss           | 3.38e+07     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.35e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 72           |
|    time_elapsed         | 2539         |
|    total_timesteps      | 147456       |
| train/                  |              |
|    approx_kl            | 0.0012600776 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | -4.77e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 3.53e+07     |
|    n_updates            | 710          |
|    policy_gradient_loss | -0.000617    |
|    std                  | 0.967        |
|    value_loss           | 7.41e+07     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.35e+06     |
| time/                   |              |
|    fps                  | 57           |
|    iterations           | 73           |
|    time_elapsed         | 2584         |
|    total_timesteps      | 149504       |
| train/                  |              |
|    approx_kl            | 0.0012791287 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.38        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 6.23e+07     |
|    n_updates            | 720          |
|    policy_gradient_loss | -0.000859    |
|    std                  | 0.965        |
|    value_loss           | 1.24e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.37e+06      |
| time/                   |               |
|    fps                  | 57            |
|    iterations           | 74            |
|    time_elapsed         | 2629          |
|    total_timesteps      | 151552        |
| train/                  |               |
|    approx_kl            | 0.00013752881 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.38         |
|    explained_variance   | 2.98e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 1.15e+08      |
|    n_updates            | 730           |
|    policy_gradient_loss | -8.68e-05     |
|    std                  | 0.965         |
|    value_loss           | 2.28e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.37e+06      |
| time/                   |               |
|    fps                  | 57            |
|    iterations           | 75            |
|    time_elapsed         | 2650          |
|    total_timesteps      | 153600        |
| train/                  |               |
|    approx_kl            | 8.6087675e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.38         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 1.71e+08      |
|    n_updates            | 740           |
|    policy_gradient_loss | 1.33e-05      |
|    std                  | 0.966         |
|    value_loss           | 3.2e+08       |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.37e+06    |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 76          |
|    time_elapsed         | 2677        |
|    total_timesteps      | 155648      |
| train/                  |             |
|    approx_kl            | 0.002834016 |
|    clip_fraction        | 0.00908     |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0003      |
|    loss                 | 1.16e+06    |
|    n_updates            | 750         |
|    policy_gradient_loss | -0.00131    |
|    std                  | 0.965       |
|    value_loss           | 2.5e+06     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.37e+06    |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 77          |
|    time_elapsed         | 2708        |
|    total_timesteps      | 157696      |
| train/                  |             |
|    approx_kl            | 0.002561483 |
|    clip_fraction        | 0.00122     |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 7.28e+06    |
|    n_updates            | 760         |
|    policy_gradient_loss | -0.000687   |
|    std                  | 0.966       |
|    value_loss           | 1.42e+07    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.37e+06    |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 78          |
|    time_elapsed         | 2745        |
|    total_timesteps      | 159744      |
| train/                  |             |
|    approx_kl            | 0.002448044 |
|    clip_fraction        | 0.000586    |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 2.21e+07    |
|    n_updates            | 770         |
|    policy_gradient_loss | -0.00101    |
|    std                  | 0.967       |
|    value_loss           | 4.41e+07    |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.37e+06      |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 79            |
|    time_elapsed         | 2786          |
|    total_timesteps      | 161792        |
| train/                  |               |
|    approx_kl            | 1.4274556e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 4.28e+07      |
|    n_updates            | 780           |
|    policy_gradient_loss | 1.48e-05      |
|    std                  | 0.967         |
|    value_loss           | 8.75e+07      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.37e+06      |
| time/                   |               |
|    fps                  | 57            |
|    iterations           | 80            |
|    time_elapsed         | 2831          |
|    total_timesteps      | 163840        |
| train/                  |               |
|    approx_kl            | 4.4073444e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 7.36e+07      |
|    n_updates            | 790           |
|    policy_gradient_loss | -1.54e-05     |
|    std                  | 0.966         |
|    value_loss           | 1.46e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.39e+06      |
| time/                   |               |
|    fps                  | 57            |
|    iterations           | 81            |
|    time_elapsed         | 2868          |
|    total_timesteps      | 165888        |
| train/                  |               |
|    approx_kl            | 0.00020792623 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | -4.77e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 1.32e+08      |
|    n_updates            | 800           |
|    policy_gradient_loss | -0.000208     |
|    std                  | 0.967         |
|    value_loss           | 2.71e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.39e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 82           |
|    time_elapsed         | 2891         |
|    total_timesteps      | 167936       |
| train/                  |              |
|    approx_kl            | 0.0010909187 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.18e+08     |
|    n_updates            | 810          |
|    policy_gradient_loss | -0.000829    |
|    std                  | 0.967        |
|    value_loss           | 2.32e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.39e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 83           |
|    time_elapsed         | 2919         |
|    total_timesteps      | 169984       |
| train/                  |              |
|    approx_kl            | 0.0028404242 |
|    clip_fraction        | 0.004        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.27e+06     |
|    n_updates            | 820          |
|    policy_gradient_loss | -0.000439    |
|    std                  | 0.966        |
|    value_loss           | 4.37e+06     |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.39e+06    |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 84          |
|    time_elapsed         | 2952        |
|    total_timesteps      | 172032      |
| train/                  |             |
|    approx_kl            | 0.002036476 |
|    clip_fraction        | 0.000488    |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 9.45e+06    |
|    n_updates            | 830         |
|    policy_gradient_loss | -0.000579   |
|    std                  | 0.968       |
|    value_loss           | 1.96e+07    |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.39e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 85           |
|    time_elapsed         | 2990         |
|    total_timesteps      | 174080       |
| train/                  |              |
|    approx_kl            | 0.0007883139 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | -2.38e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.62e+07     |
|    n_updates            | 840          |
|    policy_gradient_loss | -0.000342    |
|    std                  | 0.969        |
|    value_loss           | 5.38e+07     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.39e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 86           |
|    time_elapsed         | 3033         |
|    total_timesteps      | 176128       |
| train/                  |              |
|    approx_kl            | 0.0007482446 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 5.96e-08     |
|    learning_rate        | 0.0003       |
|    loss                 | 4.74e+07     |
|    n_updates            | 850          |
|    policy_gradient_loss | -0.00039     |
|    std                  | 0.971        |
|    value_loss           | 9.58e+07     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.39e+06     |
| time/                   |              |
|    fps                  | 57           |
|    iterations           | 87           |
|    time_elapsed         | 3080         |
|    total_timesteps      | 178176       |
| train/                  |              |
|    approx_kl            | 0.0014809775 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 8.11e+07     |
|    n_updates            | 860          |
|    policy_gradient_loss | -0.00117     |
|    std                  | 0.97         |
|    value_loss           | 1.64e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.39e+06     |
| time/                   |              |
|    fps                  | 57           |
|    iterations           | 88           |
|    time_elapsed         | 3109         |
|    total_timesteps      | 180224       |
| train/                  |              |
|    approx_kl            | 0.0019012729 |
|    clip_fraction        | 0.000146     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.5e+08      |
|    n_updates            | 870          |
|    policy_gradient_loss | -0.00186     |
|    std                  | 0.969        |
|    value_loss           | 2.98e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.39e+06      |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 89            |
|    time_elapsed         | 3133          |
|    total_timesteps      | 182272        |
| train/                  |               |
|    approx_kl            | 0.00053686637 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 6.87e+07      |
|    n_updates            | 880           |
|    policy_gradient_loss | -0.000212     |
|    std                  | 0.967         |
|    value_loss           | 1.23e+08      |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.39e+06    |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 90          |
|    time_elapsed         | 3162        |
|    total_timesteps      | 184320      |
| train/                  |             |
|    approx_kl            | 0.003701454 |
|    clip_fraction        | 0.0144      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 1.79e-07    |
|    learning_rate        | 0.0003      |
|    loss                 | 3.89e+06    |
|    n_updates            | 890         |
|    policy_gradient_loss | -0.00179    |
|    std                  | 0.964       |
|    value_loss           | 7.28e+06    |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.39e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 91           |
|    time_elapsed         | 3197         |
|    total_timesteps      | 186368       |
| train/                  |              |
|    approx_kl            | 0.0025874018 |
|    clip_fraction        | 0.00234      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.38        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.33e+07     |
|    n_updates            | 900          |
|    policy_gradient_loss | -0.001       |
|    std                  | 0.966        |
|    value_loss           | 2.81e+07     |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.39e+06    |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 92          |
|    time_elapsed         | 3236        |
|    total_timesteps      | 188416      |
| train/                  |             |
|    approx_kl            | 8.79608e-06 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 5.96e-08    |
|    learning_rate        | 0.0003      |
|    loss                 | 3.5e+07     |
|    n_updates            | 910         |
|    policy_gradient_loss | 3e-05       |
|    std                  | 0.965       |
|    value_loss           | 6.8e+07     |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.39e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 93           |
|    time_elapsed         | 3279         |
|    total_timesteps      | 190464       |
| train/                  |              |
|    approx_kl            | 0.0007064758 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.38        |
|    explained_variance   | 5.96e-08     |
|    learning_rate        | 0.0003       |
|    loss                 | 5.85e+07     |
|    n_updates            | 920          |
|    policy_gradient_loss | -0.000411    |
|    std                  | 0.964        |
|    value_loss           | 1.19e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.39e+06      |
| time/                   |               |
|    fps                  | 57            |
|    iterations           | 94            |
|    time_elapsed         | 3327          |
|    total_timesteps      | 192512        |
| train/                  |               |
|    approx_kl            | 0.00011524698 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.38         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 1.07e+08      |
|    n_updates            | 930           |
|    policy_gradient_loss | -0.000153     |
|    std                  | 0.961         |
|    value_loss           | 2.12e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.41e+06      |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 95            |
|    time_elapsed         | 3349          |
|    total_timesteps      | 194560        |
| train/                  |               |
|    approx_kl            | 2.2674503e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.38         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 1.79e+08      |
|    n_updates            | 940           |
|    policy_gradient_loss | 3.79e-06      |
|    std                  | 0.962         |
|    value_loss           | 3.54e+08      |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.41e+06    |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 96          |
|    time_elapsed         | 3375        |
|    total_timesteps      | 196608      |
| train/                  |             |
|    approx_kl            | 0.004772432 |
|    clip_fraction        | 0.011       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | -2.38e-07   |
|    learning_rate        | 0.0003      |
|    loss                 | 4e+06       |
|    n_updates            | 950         |
|    policy_gradient_loss | -0.000715   |
|    std                  | 0.967       |
|    value_loss           | 1.69e+07    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.41e+06    |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 97          |
|    time_elapsed         | 3406        |
|    total_timesteps      | 198656      |
| train/                  |             |
|    approx_kl            | 0.003798197 |
|    clip_fraction        | 0.00972     |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 6.43e+06    |
|    n_updates            | 960         |
|    policy_gradient_loss | -0.00144    |
|    std                  | 0.967       |
|    value_loss           | 1.23e+07    |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.41e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 98           |
|    time_elapsed         | 3442         |
|    total_timesteps      | 200704       |
| train/                  |              |
|    approx_kl            | 0.0009513822 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.07e+07     |
|    n_updates            | 970          |
|    policy_gradient_loss | -0.000435    |
|    std                  | 0.971        |
|    value_loss           | 3.98e+07     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.41e+06      |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 99            |
|    time_elapsed         | 3482          |
|    total_timesteps      | 202752        |
| train/                  |               |
|    approx_kl            | 0.00013346248 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | -2.38e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 4.03e+07      |
|    n_updates            | 980           |
|    policy_gradient_loss | -0.000101     |
|    std                  | 0.969         |
|    value_loss           | 8.25e+07      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.41e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 100          |
|    time_elapsed         | 3527         |
|    total_timesteps      | 204800       |
| train/                  |              |
|    approx_kl            | 0.0019987375 |
|    clip_fraction        | 0.000195     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 7.02e+07     |
|    n_updates            | 990          |
|    policy_gradient_loss | -0.00133     |
|    std                  | 0.97         |
|    value_loss           | 1.39e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.43e+06      |
| time/                   |               |
|    fps                  | 57            |
|    iterations           | 101           |
|    time_elapsed         | 3568          |
|    total_timesteps      | 206848        |
| train/                  |               |
|    approx_kl            | 0.00012555276 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | 3.58e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 1.21e+08      |
|    n_updates            | 1000          |
|    policy_gradient_loss | -0.000106     |
|    std                  | 0.97          |
|    value_loss           | 2.43e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.43e+06      |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 102           |
|    time_elapsed         | 3591          |
|    total_timesteps      | 208896        |
| train/                  |               |
|    approx_kl            | 0.00034562443 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 1.5e+08       |
|    n_updates            | 1010          |
|    policy_gradient_loss | -0.000263     |
|    std                  | 0.971         |
|    value_loss           | 2.75e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.43e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 103          |
|    time_elapsed         | 3618         |
|    total_timesteps      | 210944       |
| train/                  |              |
|    approx_kl            | 0.0046343063 |
|    clip_fraction        | 0.0223       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 1.62e+06     |
|    n_updates            | 1020         |
|    policy_gradient_loss | -0.0024      |
|    std                  | 0.968        |
|    value_loss           | 3.61e+06     |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.43e+06    |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 104         |
|    time_elapsed         | 3650        |
|    total_timesteps      | 212992      |
| train/                  |             |
|    approx_kl            | 0.000533351 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0003      |
|    loss                 | 9.21e+06    |
|    n_updates            | 1030        |
|    policy_gradient_loss | -0.000103   |
|    std                  | 0.966       |
|    value_loss           | 1.84e+07    |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.43e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 105          |
|    time_elapsed         | 3687         |
|    total_timesteps      | 215040       |
| train/                  |              |
|    approx_kl            | 0.0013999842 |
|    clip_fraction        | 4.88e-05     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.87e+07     |
|    n_updates            | 1040         |
|    policy_gradient_loss | -0.00055     |
|    std                  | 0.967        |
|    value_loss           | 5.47e+07     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.43e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 106          |
|    time_elapsed         | 3729         |
|    total_timesteps      | 217088       |
| train/                  |              |
|    approx_kl            | 4.477374e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 5.1e+07      |
|    n_updates            | 1050         |
|    policy_gradient_loss | -9.86e-06    |
|    std                  | 0.968        |
|    value_loss           | 1.04e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.43e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 107          |
|    time_elapsed         | 3775         |
|    total_timesteps      | 219136       |
| train/                  |              |
|    approx_kl            | 7.019084e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 5.96e-08     |
|    learning_rate        | 0.0003       |
|    loss                 | 8.84e+07     |
|    n_updates            | 1060         |
|    policy_gradient_loss | -3.87e-05    |
|    std                  | 0.969        |
|    value_loss           | 1.73e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.46e+06      |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 108           |
|    time_elapsed         | 3809          |
|    total_timesteps      | 221184        |
| train/                  |               |
|    approx_kl            | 0.00023290521 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 1.55e+08      |
|    n_updates            | 1070          |
|    policy_gradient_loss | -0.000208     |
|    std                  | 0.969         |
|    value_loss           | 3.04e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.46e+06      |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 109           |
|    time_elapsed         | 3832          |
|    total_timesteps      | 223232        |
| train/                  |               |
|    approx_kl            | 0.00032423198 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 8.63e+07      |
|    n_updates            | 1080          |
|    policy_gradient_loss | -0.000252     |
|    std                  | 0.972         |
|    value_loss           | 1.94e+08      |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.46e+06    |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 110         |
|    time_elapsed         | 3861        |
|    total_timesteps      | 225280      |
| train/                  |             |
|    approx_kl            | 0.000270025 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0003      |
|    loss                 | 3.49e+06    |
|    n_updates            | 1090        |
|    policy_gradient_loss | 6.34e-05    |
|    std                  | 0.967       |
|    value_loss           | 6.27e+06    |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.46e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 111          |
|    time_elapsed         | 3895         |
|    total_timesteps      | 227328       |
| train/                  |              |
|    approx_kl            | 0.0023512687 |
|    clip_fraction        | 0.000391     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.38        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.2e+07      |
|    n_updates            | 1100         |
|    policy_gradient_loss | -0.00064     |
|    std                  | 0.966        |
|    value_loss           | 2.55e+07     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.46e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 112          |
|    time_elapsed         | 3933         |
|    total_timesteps      | 229376       |
| train/                  |              |
|    approx_kl            | 0.0010558413 |
|    clip_fraction        | 9.77e-05     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 3.1e+07      |
|    n_updates            | 1110         |
|    policy_gradient_loss | -0.000538    |
|    std                  | 0.968        |
|    value_loss           | 6.38e+07     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.46e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 113          |
|    time_elapsed         | 3976         |
|    total_timesteps      | 231424       |
| train/                  |              |
|    approx_kl            | 0.0020859488 |
|    clip_fraction        | 0.000195     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 5.54e+07     |
|    n_updates            | 1120         |
|    policy_gradient_loss | -0.00122     |
|    std                  | 0.967        |
|    value_loss           | 1.13e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.46e+06      |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 114           |
|    time_elapsed         | 4023          |
|    total_timesteps      | 233472        |
| train/                  |               |
|    approx_kl            | 1.6388425e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.38         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 9.64e+07      |
|    n_updates            | 1130          |
|    policy_gradient_loss | 1.29e-06      |
|    std                  | 0.966         |
|    value_loss           | 1.91e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.47e+06      |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 115           |
|    time_elapsed         | 4049          |
|    total_timesteps      | 235520        |
| train/                  |               |
|    approx_kl            | 0.00016881223 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.38         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 1.68e+08      |
|    n_updates            | 1140          |
|    policy_gradient_loss | -0.000169     |
|    std                  | 0.966         |
|    value_loss           | 3.28e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.47e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 116          |
|    time_elapsed         | 4074         |
|    total_timesteps      | 237568       |
| train/                  |              |
|    approx_kl            | 9.819708e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 5.96e-08     |
|    learning_rate        | 0.0003       |
|    loss                 | 3.62e+07     |
|    n_updates            | 1150         |
|    policy_gradient_loss | 9.55e-06     |
|    std                  | 0.968        |
|    value_loss           | 7.81e+07     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.47e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 117          |
|    time_elapsed         | 4104         |
|    total_timesteps      | 239616       |
| train/                  |              |
|    approx_kl            | 0.0026892475 |
|    clip_fraction        | 0.00176      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 5.8e+06      |
|    n_updates            | 1160         |
|    policy_gradient_loss | -0.000557    |
|    std                  | 0.966        |
|    value_loss           | 1.03e+07     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.47e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 118          |
|    time_elapsed         | 4139         |
|    total_timesteps      | 241664       |
| train/                  |              |
|    approx_kl            | 0.0018320165 |
|    clip_fraction        | 0.000195     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 1.73e+07     |
|    n_updates            | 1170         |
|    policy_gradient_loss | -0.000731    |
|    std                  | 0.969        |
|    value_loss           | 3.56e+07     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.47e+06      |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 119           |
|    time_elapsed         | 4178          |
|    total_timesteps      | 243712        |
| train/                  |               |
|    approx_kl            | 0.00011023987 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 3.98e+07      |
|    n_updates            | 1180          |
|    policy_gradient_loss | 1.33e-05      |
|    std                  | 0.97          |
|    value_loss           | 7.81e+07      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.47e+06      |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 120           |
|    time_elapsed         | 4223          |
|    total_timesteps      | 245760        |
| train/                  |               |
|    approx_kl            | 0.00012297317 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 6.74e+07      |
|    n_updates            | 1190          |
|    policy_gradient_loss | -6.11e-05     |
|    std                  | 0.971         |
|    value_loss           | 1.31e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.49e+06      |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 121           |
|    time_elapsed         | 4269          |
|    total_timesteps      | 247808        |
| train/                  |               |
|    approx_kl            | 0.00022624078 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 1.21e+08      |
|    n_updates            | 1200          |
|    policy_gradient_loss | -0.000189     |
|    std                  | 0.971         |
|    value_loss           | 2.3e+08       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.49e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 122          |
|    time_elapsed         | 4290         |
|    total_timesteps      | 249856       |
| train/                  |              |
|    approx_kl            | 9.255431e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 1.68e+08     |
|    n_updates            | 1210         |
|    policy_gradient_loss | -9.3e-05     |
|    std                  | 0.971        |
|    value_loss           | 3.39e+08     |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.49e+06    |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 123         |
|    time_elapsed         | 4316        |
|    total_timesteps      | 251904      |
| train/                  |             |
|    approx_kl            | 0.001203906 |
|    clip_fraction        | 0.00156     |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.4        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 1.17e+06    |
|    n_updates            | 1220        |
|    policy_gradient_loss | -0.000257   |
|    std                  | 0.987       |
|    value_loss           | 2.25e+06    |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.49e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 124          |
|    time_elapsed         | 4348         |
|    total_timesteps      | 253952       |
| train/                  |              |
|    approx_kl            | 0.0010896694 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.41        |
|    explained_variance   | -2.38e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 7.46e+06     |
|    n_updates            | 1230         |
|    policy_gradient_loss | -0.000181    |
|    std                  | 0.986        |
|    value_loss           | 1.45e+07     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.49e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 125          |
|    time_elapsed         | 4384         |
|    total_timesteps      | 256000       |
| train/                  |              |
|    approx_kl            | 0.0005982603 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.4         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.29e+07     |
|    n_updates            | 1240         |
|    policy_gradient_loss | -0.000238    |
|    std                  | 0.986        |
|    value_loss           | 4.48e+07     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.49e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 126          |
|    time_elapsed         | 4425         |
|    total_timesteps      | 258048       |
| train/                  |              |
|    approx_kl            | 0.0018446865 |
|    clip_fraction        | 0.000293     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.4         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 4.34e+07     |
|    n_updates            | 1250         |
|    policy_gradient_loss | -0.00108     |
|    std                  | 0.986        |
|    value_loss           | 8.95e+07     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.49e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 127          |
|    time_elapsed         | 4471         |
|    total_timesteps      | 260096       |
| train/                  |              |
|    approx_kl            | 9.924336e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.41        |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 7.33e+07     |
|    n_updates            | 1260         |
|    policy_gradient_loss | -9.49e-05    |
|    std                  | 0.988        |
|    value_loss           | 1.49e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.5e+06       |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 128           |
|    time_elapsed         | 4509          |
|    total_timesteps      | 262144        |
| train/                  |               |
|    approx_kl            | 1.7614482e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.41         |
|    explained_variance   | 5.36e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 1.35e+08      |
|    n_updates            | 1270          |
|    policy_gradient_loss | -2.17e-05     |
|    std                  | 0.987         |
|    value_loss           | 2.63e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.5e+06       |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 129           |
|    time_elapsed         | 4532          |
|    total_timesteps      | 264192        |
| train/                  |               |
|    approx_kl            | 0.00087491365 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.41         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 1.22e+08      |
|    n_updates            | 1280          |
|    policy_gradient_loss | -0.00063      |
|    std                  | 0.986         |
|    value_loss           | 2.35e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.5e+06       |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 130           |
|    time_elapsed         | 4560          |
|    total_timesteps      | 266240        |
| train/                  |               |
|    approx_kl            | 0.00020394573 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.4          |
|    explained_variance   | 1.79e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.3e+06       |
|    n_updates            | 1290          |
|    policy_gradient_loss | 4.49e-05      |
|    std                  | 0.984         |
|    value_loss           | 4.37e+06      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.5e+06       |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 131           |
|    time_elapsed         | 4592          |
|    total_timesteps      | 268288        |
| train/                  |               |
|    approx_kl            | 1.8925057e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.4          |
|    explained_variance   | 2.98e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 1.04e+07      |
|    n_updates            | 1300          |
|    policy_gradient_loss | 4.56e-05      |
|    std                  | 0.983         |
|    value_loss           | 2.06e+07      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.5e+06      |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 132          |
|    time_elapsed         | 4630         |
|    total_timesteps      | 270336       |
| train/                  |              |
|    approx_kl            | 0.0020503872 |
|    clip_fraction        | 0.000195     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.4         |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 2.93e+07     |
|    n_updates            | 1310         |
|    policy_gradient_loss | -0.000806    |
|    std                  | 0.983        |
|    value_loss           | 5.63e+07     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.5e+06      |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 133          |
|    time_elapsed         | 4672         |
|    total_timesteps      | 272384       |
| train/                  |              |
|    approx_kl            | 0.0020477984 |
|    clip_fraction        | 0.000439     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.4         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 4.88e+07     |
|    n_updates            | 1320         |
|    policy_gradient_loss | -0.00131     |
|    std                  | 0.982        |
|    value_loss           | 1.02e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.5e+06       |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 134           |
|    time_elapsed         | 4719          |
|    total_timesteps      | 274432        |
| train/                  |               |
|    approx_kl            | 0.00030383252 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.4          |
|    explained_variance   | 5.96e-08      |
|    learning_rate        | 0.0003        |
|    loss                 | 8.63e+07      |
|    n_updates            | 1330          |
|    policy_gradient_loss | -0.000178     |
|    std                  | 0.982         |
|    value_loss           | 1.68e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.5e+06      |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 135          |
|    time_elapsed         | 4749         |
|    total_timesteps      | 276480       |
| train/                  |              |
|    approx_kl            | 0.0005124898 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.4         |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.45e+08     |
|    n_updates            | 1340         |
|    policy_gradient_loss | -0.00045     |
|    std                  | 0.981        |
|    value_loss           | 2.87e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.5e+06      |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 136          |
|    time_elapsed         | 4773         |
|    total_timesteps      | 278528       |
| train/                  |              |
|    approx_kl            | 5.946294e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.4         |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 9.04e+07     |
|    n_updates            | 1350         |
|    policy_gradient_loss | 5.46e-06     |
|    std                  | 0.98         |
|    value_loss           | 1.26e+08     |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.5e+06     |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 137         |
|    time_elapsed         | 4802        |
|    total_timesteps      | 280576      |
| train/                  |             |
|    approx_kl            | 0.004362055 |
|    clip_fraction        | 0.0205      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.4        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 3.47e+06    |
|    n_updates            | 1360        |
|    policy_gradient_loss | -0.00232    |
|    std                  | 0.978       |
|    value_loss           | 6.64e+06    |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.5e+06       |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 138           |
|    time_elapsed         | 4837          |
|    total_timesteps      | 282624        |
| train/                  |               |
|    approx_kl            | 4.9713737e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.4          |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 1.29e+07      |
|    n_updates            | 1370          |
|    policy_gradient_loss | -5.78e-05     |
|    std                  | 0.975         |
|    value_loss           | 2.61e+07      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.5e+06       |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 139           |
|    time_elapsed         | 4876          |
|    total_timesteps      | 284672        |
| train/                  |               |
|    approx_kl            | 3.9367704e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 3.03e+07      |
|    n_updates            | 1380          |
|    policy_gradient_loss | -3.21e-05     |
|    std                  | 0.973         |
|    value_loss           | 5.99e+07      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.5e+06      |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 140          |
|    time_elapsed         | 4920         |
|    total_timesteps      | 286720       |
| train/                  |              |
|    approx_kl            | 0.0023682825 |
|    clip_fraction        | 0.000342     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 5.14e+07     |
|    n_updates            | 1390         |
|    policy_gradient_loss | -0.00147     |
|    std                  | 0.97         |
|    value_loss           | 1.04e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 7.5e+06       |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 141           |
|    time_elapsed         | 4968          |
|    total_timesteps      | 288768        |
| train/                  |               |
|    approx_kl            | 9.4734336e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 8.98e+07      |
|    n_updates            | 1400          |
|    policy_gradient_loss | -6.22e-05     |
|    std                  | 0.969         |
|    value_loss           | 1.72e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.48e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 142          |
|    time_elapsed         | 4990         |
|    total_timesteps      | 290816       |
| train/                  |              |
|    approx_kl            | 9.613432e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 4.77e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 1.42e+08     |
|    n_updates            | 1410         |
|    policy_gradient_loss | 6.12e-06     |
|    std                  | 0.97         |
|    value_loss           | 2.78e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.48e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 143          |
|    time_elapsed         | 5016         |
|    total_timesteps      | 292864       |
| train/                  |              |
|    approx_kl            | 0.0040676566 |
|    clip_fraction        | 0.0145       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 3.14e+06     |
|    n_updates            | 1420         |
|    policy_gradient_loss | -0.000784    |
|    std                  | 0.965        |
|    value_loss           | 2.14e+07     |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.48e+06    |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 144         |
|    time_elapsed         | 5046        |
|    total_timesteps      | 294912      |
| train/                  |             |
|    approx_kl            | 0.002895398 |
|    clip_fraction        | 0.00317     |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 4.32e+06    |
|    n_updates            | 1430        |
|    policy_gradient_loss | -0.000555   |
|    std                  | 0.967       |
|    value_loss           | 8.8e+06     |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.48e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 145          |
|    time_elapsed         | 5082         |
|    total_timesteps      | 296960       |
| train/                  |              |
|    approx_kl            | 0.0007584458 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.38        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.37e+07     |
|    n_updates            | 1440         |
|    policy_gradient_loss | -0.000238    |
|    std                  | 0.964        |
|    value_loss           | 2.69e+07     |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 7.48e+06    |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 146         |
|    time_elapsed         | 5122        |
|    total_timesteps      | 299008      |
| train/                  |             |
|    approx_kl            | 0.003507853 |
|    clip_fraction        | 0.00503     |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 2.78e+07    |
|    n_updates            | 1450        |
|    policy_gradient_loss | -0.00179    |
|    std                  | 0.964       |
|    value_loss           | 5.51e+07    |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 7.48e+06     |
| time/                   |              |
|    fps                  | 58           |
|    iterations           | 147          |
|    time_elapsed         | 5167         |
|    total_timesteps      | 301056       |
| train/                  |              |
|    approx_kl            | 9.829071e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.38        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 4.61e+07     |
|    n_updates            | 1460         |
|    policy_gradient_loss | -4.25e-05    |
|    std                  | 0.964        |
|    value_loss           | 9.19e+07     |
------------------------------------------


<stable_baselines3.ppo.ppo.PPO at 0x7fafffb509d0>

In [19]:
obs, info = env.reset()
for i in range(1):
    done = False
    while not done:
        action, _states = rl_model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(action)
        print_log(f"Step: {env.current_step}, Action: {action}, Reward: {reward}")
        env.render()

[2025-07-12 04:40:12:693] Step: 1, Action: [0.6829988], Reward: 0.28550860819745066
[2025-07-12 04:40:12:706] Step: 2, Action: [0.6830041], Reward: 0.029417896038015683
[2025-07-12 04:40:12:715] Step: 3, Action: [0.6829878], Reward: 0.0020240538809208407
[2025-07-12 04:40:12:721] Step: 4, Action: [0.6829932], Reward: 0.00039408203559865556
[2025-07-12 04:40:12:726] Step: 5, Action: [0.6829765], Reward: 0.00039342130331570906
[2025-07-12 04:40:12:733] Step: 6, Action: [0.6829765], Reward: 0.0029685337984214225
[2025-07-12 04:40:12:738] Step: 7, Action: [0.68294775], Reward: 0.016577786414345107
[2025-07-12 04:40:12:743] Step: 8, Action: [0.68293583], Reward: 0.04486150693547726
[2025-07-12 04:40:12:748] Step: 9, Action: [0.68292356], Reward: 0.07381703701988857
[2025-07-12 04:40:12:753] Step: 10, Action: [0.6828985], Reward: 0.0961358482132223
[2025-07-12 04:40:12:761] Step: 11, Action: [0.6829048], Reward: 0.11127642069892088
[2025-07-12 04:40:12:767] Step: 12, Action: [0.68287903], Re

In [20]:
env.aggregate_load_df

Unnamed: 0,timestamp,aggregate,datetime,grid_load,battery_soc
104747,1.363997e+09,335.0,2013-03-23 00:00:05,3066.995106,0.184338
104748,1.363997e+09,336.0,2013-03-23 00:00:11,3068.016325,0.184907
104749,1.363997e+09,333.0,2013-03-23 00:00:17,3064.951237,0.185476
104750,1.363997e+09,334.0,2013-03-23 00:00:24,3065.972694,0.18614
104751,1.363997e+09,331.0,2013-03-23 00:00:30,3062.905937,0.18671
...,...,...,...,...,...
118501,1.364083e+09,179.0,2013-03-23 23:59:30,2906.402687,1.0
118502,1.364083e+09,171.0,2013-03-23 23:59:37,2897.657391,1.0
118503,1.364083e+09,171.0,2013-03-23 23:59:43,2897.657391,1.0
118504,1.364083e+09,171.0,2013-03-23 23:59:49,2897.657391,1.0


In [21]:
# save the graph

env.save_graph(
    str(Path("rl_model", "PPO", f"{rl_datetime.strftime('%Y%m%d_%H%M%S')}", "graph.png"))
)

In [22]:
env.close()

[2025-07-12 04:44:05:356] [SmartMeterWorld] Environment closed.


In [23]:
# save the model
rl_model_path = Path("rl_model", "PPO", f"{rl_datetime.strftime('%Y%m%d_%H%M%S')}", "rl_model.zip")
rl_model.save(rl_model_path)

---

In [None]:
# load the model & environment
import sys
sys.path.append(str(Path('rl_env')))

from rl_env.hrl_env import SmartMeterWorld
from stable_baselines3 import PPO

env = SmartMeterWorld(
    aggregate_load_df=load_segment,
    render_mode="human",
)

env.set_h_network(h_network)
env.set_h_network_stdscaler(h_network_stdscaler)

rl_model_path = Path("rl_model", "PPO", f"{datetime(2025,7,10,19,18,57).strftime('%Y%m%d_%H%M%S')}", "rl_model.zip")
rl_model_loaded = PPO.load(rl_model_path, env=env)

[2025-07-11 17:41:15:349] [SmartMeterWorld] Render mode set to 'human'. Render server at 127.0.0.1:50007. render_connected: True. render_client_socket: <socket.socket fd=85, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=0, laddr=('127.0.0.1', 57334), raddr=('127.0.0.1', 50007)>
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [None]:
obs, info = env.reset()
for i in range(1):
    done = False
    while not done:
        action, _states = rl_model_loaded.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(action)
        print_log(f"Step: {env.current_step}, Action: {action}, Reward: {reward}")
        env.render()

[2025-07-11 17:43:49:737] Step: 1, Action: [0.8852709], Reward: 0.11131098730440935
[2025-07-11 17:45:38:966] Step: 2, Action: [0.8852702], Reward: 0.5481597745709021
[2025-07-11 17:45:47:193] Step: 3, Action: [0.88527256], Reward: 0.7274080372309486
[2025-07-11 17:45:47:202] Step: 4, Action: [0.88527185], Reward: 0.9513516866327525
[2025-07-11 17:45:47:211] Step: 5, Action: [0.885274], Reward: 1.2329133591758012
[2025-07-11 17:45:47:217] Step: 6, Action: [0.88527405], Reward: 1.4986055812901657
[2025-07-11 17:45:47:222] Step: 7, Action: [0.8852769], Reward: 1.7794962723778884
[2025-07-11 17:45:47:234] Step: 8, Action: [0.8852778], Reward: 2.0722826678674617
[2025-07-11 17:45:47:243] Step: 9, Action: [0.88527864], Reward: 2.3641625840023757
[2025-07-11 17:45:47:253] Step: 10, Action: [0.8852796], Reward: 2.6564700872594185
[2025-07-11 17:45:47:268] Step: 11, Action: [0.8852795], Reward: 2.941596050857504
[2025-07-11 17:45:47:277] Step: 12, Action: [0.88527995], Reward: 3.22153648909596

In [None]:
env.save_graph(
    str(Path("rl_model", "PPO", f"{rl_datetime.strftime('%Y%m%d_%H%M%S')}", "graph2.png"))
)