A template of the RL training

In [33]:
import numpy as np
import pandas as pd

from pathlib import Path
from datetime import datetime

from utils import print_log

In [34]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [35]:
# load the created dataset
dataset_folder_path = Path("dataset", "20250707", "split")

In [36]:
# copied from 03_data_split.ipynb

# Helper functions for the new split folder structure
def load_split_data_from_folder(split_folder, split_type='train'):
    """Load aggregate data from split folder"""
    segments = []
    with open(split_folder / f'{split_type}_segments.txt', 'r') as f:
        for line in f:
            start_str, end_str = line.strip().split(' - ')
            start = datetime.fromisoformat(start_str)
            end = datetime.fromisoformat(end_str)
            segments.append((start, end))
    
    df = pd.read_pickle(split_folder / f'{split_type}_aggregate_df.pkl')
    return segments, df

def load_signatures_from_split_folder(split_folder, split_type, appliance):
    """Load appliance signatures from split folder"""
    sig_path = split_folder / 'load_signature_library' / split_type / appliance / 'load_signatures.pkl'
    ranges_path = split_folder / 'load_signature_library' / split_type / appliance / 'selected_ranges.txt'
    
    if not sig_path.exists():
        return pd.DataFrame(), []
    
    signatures_df = pd.read_pickle(sig_path)
    ranges = []
    if ranges_path.exists():
        with open(ranges_path, 'r') as f:
            for line in f:
                start, end = map(int, line.strip().split(','))
                ranges.append((start, end))
    
    return signatures_df, ranges

In [37]:
aggregate_load_segments_train, aggregate_load_df_train = load_split_data_from_folder(dataset_folder_path, 'train')
aggregate_load_segments_test, aggregate_load_df_test = load_split_data_from_folder(dataset_folder_path, 'test') 
aggregate_load_segments_validation, aggregate_load_df_validation = load_split_data_from_folder(dataset_folder_path, 'val')

In [38]:
aggregate_load_df_train

Unnamed: 0,timestamp,aggregate,datetime,washing_machine,dishwasher,fridge,kettle,microwave,toaster,tv,htpc,gas_oven,kitchen_lights
0,1.357603e+09,234.0,2013-01-08 00:00:05+00:00,0.0,1.0,0.0,1.0,1.0,0.0,1.0,69.0,,0.0
1,1.357603e+09,231.0,2013-01-08 00:00:11+00:00,0.0,1.0,0.0,1.0,1.0,0.0,1.0,70.0,,0.0
2,1.357603e+09,234.0,2013-01-08 00:00:17+00:00,0.0,1.0,0.0,1.0,1.0,0.0,1.0,70.0,,0.0
3,1.357603e+09,232.0,2013-01-08 00:00:23+00:00,0.0,1.0,0.0,1.0,1.0,0.0,1.0,68.0,,0.0
4,1.357603e+09,232.0,2013-01-08 00:00:30+00:00,0.0,1.0,0.0,1.0,1.0,0.0,1.0,70.0,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2231636,1.388448e+09,178.0,2013-12-30 23:59:35+00:00,0.0,1.0,0.0,1.0,1.0,0.0,1.0,2.0,3.0,0.0
2231637,1.388448e+09,177.0,2013-12-30 23:59:41+00:00,0.0,1.0,0.0,1.0,1.0,0.0,1.0,2.0,3.0,0.0
2231638,1.388448e+09,178.0,2013-12-30 23:59:47+00:00,0.0,1.0,0.0,1.0,1.0,0.0,1.0,2.0,3.0,0.0
2231639,1.388448e+09,178.0,2013-12-30 23:59:53+00:00,0.0,1.0,0.0,1.0,1.0,0.0,1.0,2.0,3.0,0.0


In [39]:
aggregate_load_segments_train

[(datetime.datetime(2013, 1, 8, 0, 0, tzinfo=datetime.timezone.utc),
  datetime.datetime(2013, 1, 10, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc)),
 (datetime.datetime(2013, 2, 27, 0, 0, tzinfo=datetime.timezone.utc),
  datetime.datetime(2013, 2, 28, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc)),
 (datetime.datetime(2013, 3, 8, 0, 0, tzinfo=datetime.timezone.utc),
  datetime.datetime(2013, 3, 10, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc)),
 (datetime.datetime(2013, 3, 28, 0, 0, tzinfo=datetime.timezone.utc),
  datetime.datetime(2013, 3, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc)),
 (datetime.datetime(2013, 3, 22, 0, 0, tzinfo=datetime.timezone.utc),
  datetime.datetime(2013, 3, 26, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc)),
 (datetime.datetime(2013, 4, 8, 0, 0, tzinfo=datetime.timezone.utc),
  datetime.datetime(2013, 4, 10, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc)),
 (datetime.datetime(2013, 4, 26, 0, 0, tzinfo=datetime.timezone.utc),
  d

In [40]:
from rl_env.env_data_loader import SmartMeterDataLoader

sm_dl = SmartMeterDataLoader(
    aggregate_load_segments=aggregate_load_segments_train,
    aggregate_load_df=aggregate_load_df_train
)

sm_dl.get_divided_segments_length()

162

In [41]:
sm_dl.divided_segments[7]

array([datetime.datetime(2013, 3, 10, 0, 0, tzinfo=datetime.timezone.utc),
       datetime.datetime(2013, 3, 10, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc)],
      dtype=object)

In [42]:
load_segment = sm_dl.get_aggregate_load_segment(13)

load_segment

Unnamed: 0,timestamp,aggregate,datetime
104747,1.363997e+09,335.0,2013-03-23 00:00:05+00:00
104748,1.363997e+09,336.0,2013-03-23 00:00:11+00:00
104749,1.363997e+09,333.0,2013-03-23 00:00:17+00:00
104750,1.363997e+09,334.0,2013-03-23 00:00:24+00:00
104751,1.363997e+09,331.0,2013-03-23 00:00:30+00:00
...,...,...,...
118501,1.364083e+09,179.0,2013-03-23 23:59:30+00:00
118502,1.364083e+09,171.0,2013-03-23 23:59:37+00:00
118503,1.364083e+09,171.0,2013-03-23 23:59:43+00:00
118504,1.364083e+09,171.0,2013-03-23 23:59:49+00:00


In [43]:
load_segment['datetime'] = pd.to_datetime(load_segment['timestamp'], unit='s')

load_segment

Unnamed: 0,timestamp,aggregate,datetime
104747,1.363997e+09,335.0,2013-03-23 00:00:05
104748,1.363997e+09,336.0,2013-03-23 00:00:11
104749,1.363997e+09,333.0,2013-03-23 00:00:17
104750,1.363997e+09,334.0,2013-03-23 00:00:24
104751,1.363997e+09,331.0,2013-03-23 00:00:30
...,...,...,...
118501,1.364083e+09,179.0,2013-03-23 23:59:30
118502,1.364083e+09,171.0,2013-03-23 23:59:37
118503,1.364083e+09,171.0,2013-03-23 23:59:43
118504,1.364083e+09,171.0,2013-03-23 23:59:49


(Optional) Load the pre-trained H-network and related components

In final product, the H-network should be trained along with the DDQL/PPO agent

In [44]:
import torch
from model.H_network.h_network import HNetwork

h_network_path = Path("model_trained", "h_network_20250708.pth")

h_network = HNetwork(2, 44, 1)
h_network.load_state_dict(torch.load(h_network_path))
h_network.eval()

HNetwork(
  (LSTM_1): LSTM(2, 44, batch_first=True, bidirectional=True)
  (ac1): Tanh()
  (LSTM_2): LSTM(88, 1, batch_first=True, bidirectional=True)
  (ac2): Tanh()
  (fc): Linear(in_features=2, out_features=1, bias=True)
)

In [45]:
h_network_stdscaler_path = Path("model_trained", "h_network_standardscaler_20250708.pkl")
import joblib
h_network_stdscaler = joblib.load(h_network_stdscaler_path)

Create the environment

In [46]:
import sys
sys.path.append(str(Path('rl_env')))

from rl_env.hrl_env import SmartMeterWorld

env = SmartMeterWorld(
    aggregate_load_df=load_segment,
    render_mode="human",
)

env.set_h_network(h_network)
env.set_h_network_stdscaler(h_network_stdscaler)

[2025-07-10 19:18:57:258] [SmartMeterWorld] Render mode set to 'human'. Render server at 127.0.0.1:50007. render_connected: True. render_client_socket: <socket.socket fd=93, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=0, laddr=('127.0.0.1', 38414), raddr=('127.0.0.1', 50007)>


In [47]:
from gymnasium.utils.env_checker import check_env

# This will catch many common issues
try:
    check_env(env)
    print("Environment passes all checks!")
except Exception as e:
    print(f"Environment has issues: {e}")

Environment passes all checks!


  logger.warn(
  logger.warn(


In [48]:
obs = env.reset()
obs

({'aggregate_load': array([335.], dtype=float32),
  'battery_soc': array([0.05382102], dtype=float32),
  'timestamp_features': array([-0.5       ,  0.33333334, -0.25      ], dtype=float32)},
 {})

In [49]:
# initialize a PPO agent
from stable_baselines3 import PPO

rl_datetime = datetime.now()
tensorboard_log_path = Path("rl_model", "PPO", f"{rl_datetime.strftime('%Y%m%d_%H%M%S')}")

rl_model = PPO(
    "MultiInputPolicy", 
    env, 
    verbose=2,
    tensorboard_log=tensorboard_log_path
)

rl_model.learn(
    total_timesteps=300000,
    progress_bar=True,
    tb_log_name="PPO_SmartMeterWorld"
)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to rl_model/PPO/20250710_191857/PPO_SmartMeterWorld_1


Output()

-----------------------------
| time/              |      |
|    fps             | 117  |
|    iterations      | 1    |
|    time_elapsed    | 17   |
|    total_timesteps | 2048 |
-----------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 115           |
|    iterations           | 2             |
|    time_elapsed         | 35            |
|    total_timesteps      | 4096          |
| train/                  |               |
|    approx_kl            | 0.00043522645 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.42         |
|    explained_variance   | -1.79e-06     |
|    learning_rate        | 0.0003        |
|    loss                 | 1.08e+08      |
|    n_updates            | 10            |
|    policy_gradient_loss | -0.000339     |
|    std                  | 1             |
|    value_loss           | 2.3e+08       |
-------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 116         |
|    iterations           | 3           |
|    time_elapsed         | 52          |
|    total_timesteps      | 6144        |
| train/                  |             |
|    approx_kl            | 0.002173535 |
|    clip_fraction        | 0.000684    |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.42       |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0003      |
|    loss                 | 1.34e+08    |
|    n_updates            | 20          |
|    policy_gradient_loss | -0.00198    |
|    std                  | 1           |
|    value_loss           | 2.63e+08    |
-----------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 117           |
|    iterations           | 4             |
|    time_elapsed         | 69            |
|    total_timesteps      | 8192          |
| train/                  |               |
|    approx_kl            | 0.00014453632 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.42         |
|    explained_variance   | 3.58e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 1.48e+08      |
|    n_updates            | 30            |
|    policy_gradient_loss | -0.000193     |
|    std                  | 1             |
|    value_loss           | 2.91e+08      |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 117          |
|    iterations           | 5            |
|    time_elapsed         | 87           |
|    total_timesteps      | 10240        |
| train/                  |              |
|    approx_kl            | 0.0013526289 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | 2.98e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 1.48e+08     |
|    n_updates            | 40           |
|    policy_gradient_loss | -0.00123     |
|    std                  | 1            |
|    value_loss           | 2.97e+08     |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 117          |
|    iterations           | 6            |
|    time_elapsed         | 104          |
|    total_timesteps      | 12288        |
| train/                  |              |
|    approx_kl            | 0.0002500948 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | -2.38e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 1.63e+08     |
|    n_updates            | 50           |
|    policy_gradient_loss | -0.00022     |
|    std                  | 1            |
|    value_loss           | 3.34e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.43e+07      |
| time/                   |               |
|    fps                  | 117           |
|    iterations           | 7             |
|    time_elapsed         | 122           |
|    total_timesteps      | 14336         |
| train/                  |               |
|    approx_kl            | 1.8966268e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.42         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.1e+08       |
|    n_updates            | 60            |
|    policy_gradient_loss | -6.96e-06     |
|    std                  | 1             |
|    value_loss           | 4.14e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.43e+07     |
| time/                   |              |
|    fps                  | 117          |
|    iterations           | 8            |
|    time_elapsed         | 139          |
|    total_timesteps      | 16384        |
| train/                  |              |
|    approx_kl            | 3.618162e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 1.31e+08     |
|    n_updates            | 70           |
|    policy_gradient_loss | -1.92e-05    |
|    std                  | 1            |
|    value_loss           | 2.77e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.43e+07     |
| time/                   |              |
|    fps                  | 118          |
|    iterations           | 9            |
|    time_elapsed         | 155          |
|    total_timesteps      | 18432        |
| train/                  |              |
|    approx_kl            | 3.292429e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.85e+08     |
|    n_updates            | 80           |
|    policy_gradient_loss | 9.43e-06     |
|    std                  | 1            |
|    value_loss           | 3.79e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.43e+07      |
| time/                   |               |
|    fps                  | 118           |
|    iterations           | 10            |
|    time_elapsed         | 173           |
|    total_timesteps      | 20480         |
| train/                  |               |
|    approx_kl            | 2.3626111e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.42         |
|    explained_variance   | 5.96e-08      |
|    learning_rate        | 0.0003        |
|    loss                 | 1.86e+08      |
|    n_updates            | 90            |
|    policy_gradient_loss | -2.92e-05     |
|    std                  | 1             |
|    value_loss           | 3.67e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.43e+07     |
| time/                   |              |
|    fps                  | 118          |
|    iterations           | 11           |
|    time_elapsed         | 190          |
|    total_timesteps      | 22528        |
| train/                  |              |
|    approx_kl            | 5.864422e-07 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 1.89e+08     |
|    n_updates            | 100          |
|    policy_gradient_loss | 1.72e-05     |
|    std                  | 1            |
|    value_loss           | 3.77e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.43e+07     |
| time/                   |              |
|    fps                  | 118          |
|    iterations           | 12           |
|    time_elapsed         | 207          |
|    total_timesteps      | 24576        |
| train/                  |              |
|    approx_kl            | 0.0008922054 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.98e+08     |
|    n_updates            | 110          |
|    policy_gradient_loss | -0.000964    |
|    std                  | 1            |
|    value_loss           | 3.82e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.43e+07      |
| time/                   |               |
|    fps                  | 118           |
|    iterations           | 13            |
|    time_elapsed         | 224           |
|    total_timesteps      | 26624         |
| train/                  |               |
|    approx_kl            | 0.00012208181 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.42         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 1.88e+08      |
|    n_updates            | 120           |
|    policy_gradient_loss | -0.000171     |
|    std                  | 1.01          |
|    value_loss           | 3.84e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.5e+07       |
| time/                   |               |
|    fps                  | 118           |
|    iterations           | 14            |
|    time_elapsed         | 242           |
|    total_timesteps      | 28672         |
| train/                  |               |
|    approx_kl            | 1.2749573e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.09e+08      |
|    n_updates            | 130           |
|    policy_gradient_loss | -2.57e-05     |
|    std                  | 1.01          |
|    value_loss           | 4.21e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.5e+07       |
| time/                   |               |
|    fps                  | 118           |
|    iterations           | 15            |
|    time_elapsed         | 259           |
|    total_timesteps      | 30720         |
| train/                  |               |
|    approx_kl            | 0.00028084428 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 1.46e+08      |
|    n_updates            | 140           |
|    policy_gradient_loss | -0.000265     |
|    std                  | 1.01          |
|    value_loss           | 3.02e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.5e+07       |
| time/                   |               |
|    fps                  | 117           |
|    iterations           | 16            |
|    time_elapsed         | 278           |
|    total_timesteps      | 32768         |
| train/                  |               |
|    approx_kl            | 0.00021683596 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 1.83e+08      |
|    n_updates            | 150           |
|    policy_gradient_loss | -0.000227     |
|    std                  | 1.01          |
|    value_loss           | 3.68e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.5e+07      |
| time/                   |              |
|    fps                  | 117          |
|    iterations           | 17           |
|    time_elapsed         | 296          |
|    total_timesteps      | 34816        |
| train/                  |              |
|    approx_kl            | 0.0010490548 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 5.96e-08     |
|    learning_rate        | 0.0003       |
|    loss                 | 1.94e+08     |
|    n_updates            | 160          |
|    policy_gradient_loss | -0.00105     |
|    std                  | 1.01         |
|    value_loss           | 3.81e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.5e+07       |
| time/                   |               |
|    fps                  | 117           |
|    iterations           | 18            |
|    time_elapsed         | 312           |
|    total_timesteps      | 36864         |
| train/                  |               |
|    approx_kl            | 2.5124464e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.01e+08      |
|    n_updates            | 170           |
|    policy_gradient_loss | -1.57e-05     |
|    std                  | 1.01          |
|    value_loss           | 3.99e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.5e+07      |
| time/                   |              |
|    fps                  | 118          |
|    iterations           | 19           |
|    time_elapsed         | 329          |
|    total_timesteps      | 38912        |
| train/                  |              |
|    approx_kl            | 8.276166e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.02e+08     |
|    n_updates            | 180          |
|    policy_gradient_loss | -0.000114    |
|    std                  | 1.01         |
|    value_loss           | 4.08e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.5e+07       |
| time/                   |               |
|    fps                  | 118           |
|    iterations           | 20            |
|    time_elapsed         | 346           |
|    total_timesteps      | 40960         |
| train/                  |               |
|    approx_kl            | 0.00016313777 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 2.38e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.08e+08      |
|    n_updates            | 190           |
|    policy_gradient_loss | -0.000192     |
|    std                  | 1.01          |
|    value_loss           | 4.17e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.54e+07      |
| time/                   |               |
|    fps                  | 117           |
|    iterations           | 21            |
|    time_elapsed         | 364           |
|    total_timesteps      | 43008         |
| train/                  |               |
|    approx_kl            | 4.5631023e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.26e+08      |
|    n_updates            | 200           |
|    policy_gradient_loss | -1.41e-05     |
|    std                  | 1.01          |
|    value_loss           | 4.43e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.54e+07      |
| time/                   |               |
|    fps                  | 118           |
|    iterations           | 22            |
|    time_elapsed         | 381           |
|    total_timesteps      | 45056         |
| train/                  |               |
|    approx_kl            | 0.00011490213 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 1.79e+08      |
|    n_updates            | 210           |
|    policy_gradient_loss | -8.53e-05     |
|    std                  | 1.01          |
|    value_loss           | 3.42e+08      |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 1.54e+07    |
| time/                   |             |
|    fps                  | 118         |
|    iterations           | 23          |
|    time_elapsed         | 397         |
|    total_timesteps      | 47104       |
| train/                  |             |
|    approx_kl            | 0.001352215 |
|    clip_fraction        | 4.88e-05    |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.43       |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0003      |
|    loss                 | 2.06e+08    |
|    n_updates            | 220         |
|    policy_gradient_loss | -0.00148    |
|    std                  | 1.01        |
|    value_loss           | 4.13e+08    |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.54e+07      |
| time/                   |               |
|    fps                  | 118           |
|    iterations           | 24            |
|    time_elapsed         | 414           |
|    total_timesteps      | 49152         |
| train/                  |               |
|    approx_kl            | 0.00016446237 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.19e+08      |
|    n_updates            | 230           |
|    policy_gradient_loss | -0.000144     |
|    std                  | 1.01          |
|    value_loss           | 4.34e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.54e+07      |
| time/                   |               |
|    fps                  | 118           |
|    iterations           | 25            |
|    time_elapsed         | 430           |
|    total_timesteps      | 51200         |
| train/                  |               |
|    approx_kl            | 0.00016659641 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.04e+08      |
|    n_updates            | 240           |
|    policy_gradient_loss | -0.000207     |
|    std                  | 1.01          |
|    value_loss           | 4.18e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.54e+07      |
| time/                   |               |
|    fps                  | 118           |
|    iterations           | 26            |
|    time_elapsed         | 448           |
|    total_timesteps      | 53248         |
| train/                  |               |
|    approx_kl            | 0.00018742852 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.08e+08      |
|    n_updates            | 250           |
|    policy_gradient_loss | -0.000168     |
|    std                  | 1.01          |
|    value_loss           | 4.15e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.57e+07     |
| time/                   |              |
|    fps                  | 118          |
|    iterations           | 27           |
|    time_elapsed         | 464          |
|    total_timesteps      | 55296        |
| train/                  |              |
|    approx_kl            | 5.362861e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.29e+08     |
|    n_updates            | 260          |
|    policy_gradient_loss | -3.79e-05    |
|    std                  | 1.01         |
|    value_loss           | 4.7e+08      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.57e+07      |
| time/                   |               |
|    fps                  | 118           |
|    iterations           | 28            |
|    time_elapsed         | 482           |
|    total_timesteps      | 57344         |
| train/                  |               |
|    approx_kl            | 0.00029758862 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 1.8e+08       |
|    n_updates            | 270           |
|    policy_gradient_loss | -0.000286     |
|    std                  | 1.01          |
|    value_loss           | 3.46e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.57e+07      |
| time/                   |               |
|    fps                  | 119           |
|    iterations           | 29            |
|    time_elapsed         | 498           |
|    total_timesteps      | 59392         |
| train/                  |               |
|    approx_kl            | 3.9531238e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 1.92e+08      |
|    n_updates            | 280           |
|    policy_gradient_loss | -4.42e-05     |
|    std                  | 1.01          |
|    value_loss           | 3.77e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.57e+07      |
| time/                   |               |
|    fps                  | 119           |
|    iterations           | 30            |
|    time_elapsed         | 515           |
|    total_timesteps      | 61440         |
| train/                  |               |
|    approx_kl            | 0.00039296327 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.25e+08      |
|    n_updates            | 290           |
|    policy_gradient_loss | -0.000425     |
|    std                  | 1.01          |
|    value_loss           | 4.47e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.57e+07      |
| time/                   |               |
|    fps                  | 119           |
|    iterations           | 31            |
|    time_elapsed         | 532           |
|    total_timesteps      | 63488         |
| train/                  |               |
|    approx_kl            | 0.00012437988 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.37e+08      |
|    n_updates            | 300           |
|    policy_gradient_loss | -0.000124     |
|    std                  | 1.01          |
|    value_loss           | 4.61e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.57e+07     |
| time/                   |              |
|    fps                  | 119          |
|    iterations           | 32           |
|    time_elapsed         | 549          |
|    total_timesteps      | 65536        |
| train/                  |              |
|    approx_kl            | 0.0001338071 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 2.38e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 2.14e+08     |
|    n_updates            | 310          |
|    policy_gradient_loss | -0.000175    |
|    std                  | 1.01         |
|    value_loss           | 4.34e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.57e+07     |
| time/                   |              |
|    fps                  | 119          |
|    iterations           | 33           |
|    time_elapsed         | 567          |
|    total_timesteps      | 67584        |
| train/                  |              |
|    approx_kl            | 0.0005553446 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 1.97e+08     |
|    n_updates            | 320          |
|    policy_gradient_loss | -0.000613    |
|    std                  | 1.01         |
|    value_loss           | 4.02e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.59e+07      |
| time/                   |               |
|    fps                  | 119           |
|    iterations           | 34            |
|    time_elapsed         | 584           |
|    total_timesteps      | 69632         |
| train/                  |               |
|    approx_kl            | 1.5770202e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.37e+08      |
|    n_updates            | 330           |
|    policy_gradient_loss | -1.1e-05      |
|    std                  | 1.01          |
|    value_loss           | 4.7e+08       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.59e+07     |
| time/                   |              |
|    fps                  | 119          |
|    iterations           | 35           |
|    time_elapsed         | 602          |
|    total_timesteps      | 71680        |
| train/                  |              |
|    approx_kl            | 4.495564e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 5.96e-08     |
|    learning_rate        | 0.0003       |
|    loss                 | 1.9e+08      |
|    n_updates            | 340          |
|    policy_gradient_loss | -4.94e-05    |
|    std                  | 1.01         |
|    value_loss           | 3.9e+08      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.59e+07      |
| time/                   |               |
|    fps                  | 118           |
|    iterations           | 36            |
|    time_elapsed         | 619           |
|    total_timesteps      | 73728         |
| train/                  |               |
|    approx_kl            | 0.00030663935 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.37e+08      |
|    n_updates            | 350           |
|    policy_gradient_loss | -0.000352     |
|    std                  | 1.01          |
|    value_loss           | 4.75e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.59e+07      |
| time/                   |               |
|    fps                  | 119           |
|    iterations           | 37            |
|    time_elapsed         | 636           |
|    total_timesteps      | 75776         |
| train/                  |               |
|    approx_kl            | 0.00013794654 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.33e+08      |
|    n_updates            | 360           |
|    policy_gradient_loss | -0.000151     |
|    std                  | 1.01          |
|    value_loss           | 4.65e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.59e+07     |
| time/                   |              |
|    fps                  | 119          |
|    iterations           | 38           |
|    time_elapsed         | 653          |
|    total_timesteps      | 77824        |
| train/                  |              |
|    approx_kl            | 2.378007e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 2.38e+08     |
|    n_updates            | 370          |
|    policy_gradient_loss | -3.33e-05    |
|    std                  | 1.01         |
|    value_loss           | 4.84e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.59e+07      |
| time/                   |               |
|    fps                  | 119           |
|    iterations           | 39            |
|    time_elapsed         | 669           |
|    total_timesteps      | 79872         |
| train/                  |               |
|    approx_kl            | 4.0569226e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.47e+08      |
|    n_updates            | 380           |
|    policy_gradient_loss | -3.58e-05     |
|    std                  | 1.01          |
|    value_loss           | 4.9e+08       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.59e+07      |
| time/                   |               |
|    fps                  | 119           |
|    iterations           | 40            |
|    time_elapsed         | 686           |
|    total_timesteps      | 81920         |
| train/                  |               |
|    approx_kl            | 2.0354317e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 2.38e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.49e+08      |
|    n_updates            | 390           |
|    policy_gradient_loss | -1.7e-05      |
|    std                  | 1.01          |
|    value_loss           | 4.92e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.63e+07     |
| time/                   |              |
|    fps                  | 119          |
|    iterations           | 41           |
|    time_elapsed         | 703          |
|    total_timesteps      | 83968        |
| train/                  |              |
|    approx_kl            | 6.127506e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.4e+08      |
|    n_updates            | 400          |
|    policy_gradient_loss | -7.07e-05    |
|    std                  | 1.01         |
|    value_loss           | 4.86e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.63e+07     |
| time/                   |              |
|    fps                  | 119          |
|    iterations           | 42           |
|    time_elapsed         | 720          |
|    total_timesteps      | 86016        |
| train/                  |              |
|    approx_kl            | 0.0004698435 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2e+08        |
|    n_updates            | 410          |
|    policy_gradient_loss | -0.000435    |
|    std                  | 1.01         |
|    value_loss           | 3.96e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.63e+07      |
| time/                   |               |
|    fps                  | 119           |
|    iterations           | 43            |
|    time_elapsed         | 736           |
|    total_timesteps      | 88064         |
| train/                  |               |
|    approx_kl            | 1.2508244e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.46e+08      |
|    n_updates            | 420           |
|    policy_gradient_loss | 6.58e-07      |
|    std                  | 1.01          |
|    value_loss           | 4.84e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.63e+07     |
| time/                   |              |
|    fps                  | 119          |
|    iterations           | 44           |
|    time_elapsed         | 753          |
|    total_timesteps      | 90112        |
| train/                  |              |
|    approx_kl            | 0.0015644103 |
|    clip_fraction        | 0.000146     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | -2.38e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.53e+08     |
|    n_updates            | 430          |
|    policy_gradient_loss | -0.00188     |
|    std                  | 1.01         |
|    value_loss           | 4.98e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.63e+07      |
| time/                   |               |
|    fps                  | 119           |
|    iterations           | 45            |
|    time_elapsed         | 769           |
|    total_timesteps      | 92160         |
| train/                  |               |
|    approx_kl            | 0.00013119463 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.58e+08      |
|    n_updates            | 440           |
|    policy_gradient_loss | -0.000151     |
|    std                  | 1.01          |
|    value_loss           | 5.17e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.63e+07      |
| time/                   |               |
|    fps                  | 119           |
|    iterations           | 46            |
|    time_elapsed         | 785           |
|    total_timesteps      | 94208         |
| train/                  |               |
|    approx_kl            | 2.5664776e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.76e+08      |
|    n_updates            | 450           |
|    policy_gradient_loss | -2.46e-05     |
|    std                  | 1.01          |
|    value_loss           | 5.43e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.63e+07     |
| time/                   |              |
|    fps                  | 119          |
|    iterations           | 47           |
|    time_elapsed         | 802          |
|    total_timesteps      | 96256        |
| train/                  |              |
|    approx_kl            | 5.384648e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.48e+08     |
|    n_updates            | 460          |
|    policy_gradient_loss | -6.14e-05    |
|    std                  | 1.01         |
|    value_loss           | 4.94e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.65e+07      |
| time/                   |               |
|    fps                  | 119           |
|    iterations           | 48            |
|    time_elapsed         | 819           |
|    total_timesteps      | 98304         |
| train/                  |               |
|    approx_kl            | 0.00010165677 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.54e+08      |
|    n_updates            | 470           |
|    policy_gradient_loss | -0.000105     |
|    std                  | 1.01          |
|    value_loss           | 5.03e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.65e+07      |
| time/                   |               |
|    fps                  | 120           |
|    iterations           | 49            |
|    time_elapsed         | 835           |
|    total_timesteps      | 100352        |
| train/                  |               |
|    approx_kl            | 1.6649719e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 1.92e+08      |
|    n_updates            | 480           |
|    policy_gradient_loss | -4.85e-06     |
|    std                  | 1.02          |
|    value_loss           | 4.13e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.65e+07      |
| time/                   |               |
|    fps                  | 120           |
|    iterations           | 50            |
|    time_elapsed         | 852           |
|    total_timesteps      | 102400        |
| train/                  |               |
|    approx_kl            | 1.8419814e-07 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.63e+08      |
|    n_updates            | 490           |
|    policy_gradient_loss | 1.35e-05      |
|    std                  | 1.02          |
|    value_loss           | 5.27e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.65e+07      |
| time/                   |               |
|    fps                  | 120           |
|    iterations           | 51            |
|    time_elapsed         | 868           |
|    total_timesteps      | 104448        |
| train/                  |               |
|    approx_kl            | 2.0859792e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 5.96e-08      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.65e+08      |
|    n_updates            | 500           |
|    policy_gradient_loss | 8.83e-08      |
|    std                  | 1.02          |
|    value_loss           | 5.24e+08      |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 1.65e+07    |
| time/                   |             |
|    fps                  | 120         |
|    iterations           | 52          |
|    time_elapsed         | 884         |
|    total_timesteps      | 106496      |
| train/                  |             |
|    approx_kl            | 0.000680445 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.43       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 2.58e+08    |
|    n_updates            | 510         |
|    policy_gradient_loss | -0.000874   |
|    std                  | 1.02        |
|    value_loss           | 5.12e+08    |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.65e+07      |
| time/                   |               |
|    fps                  | 120           |
|    iterations           | 53            |
|    time_elapsed         | 901           |
|    total_timesteps      | 108544        |
| train/                  |               |
|    approx_kl            | 0.00012570314 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.44         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.58e+08      |
|    n_updates            | 520           |
|    policy_gradient_loss | -0.000128     |
|    std                  | 1.02          |
|    value_loss           | 5.17e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.68e+07     |
| time/                   |              |
|    fps                  | 120          |
|    iterations           | 54           |
|    time_elapsed         | 918          |
|    total_timesteps      | 110592       |
| train/                  |              |
|    approx_kl            | 0.0002697141 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.71e+08     |
|    n_updates            | 530          |
|    policy_gradient_loss | -0.000316    |
|    std                  | 1.02         |
|    value_loss           | 5.4e+08      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.68e+07      |
| time/                   |               |
|    fps                  | 120           |
|    iterations           | 55            |
|    time_elapsed         | 935           |
|    total_timesteps      | 112640        |
| train/                  |               |
|    approx_kl            | 2.8175069e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.06e+08      |
|    n_updates            | 540           |
|    policy_gradient_loss | -1.81e-05     |
|    std                  | 1.02          |
|    value_loss           | 4.38e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.68e+07     |
| time/                   |              |
|    fps                  | 120          |
|    iterations           | 56           |
|    time_elapsed         | 950          |
|    total_timesteps      | 114688       |
| train/                  |              |
|    approx_kl            | 0.0009164037 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.66e+08     |
|    n_updates            | 550          |
|    policy_gradient_loss | -0.00121     |
|    std                  | 1.02         |
|    value_loss           | 5.33e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.68e+07      |
| time/                   |               |
|    fps                  | 120           |
|    iterations           | 57            |
|    time_elapsed         | 965           |
|    total_timesteps      | 116736        |
| train/                  |               |
|    approx_kl            | 0.00042214312 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.94e+08      |
|    n_updates            | 560           |
|    policy_gradient_loss | -0.000555     |
|    std                  | 1.02          |
|    value_loss           | 5.89e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.68e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 58           |
|    time_elapsed         | 980          |
|    total_timesteps      | 118784       |
| train/                  |              |
|    approx_kl            | 8.630275e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.44        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.67e+08     |
|    n_updates            | 570          |
|    policy_gradient_loss | 4.33e-06     |
|    std                  | 1.02         |
|    value_loss           | 5.32e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.68e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 59            |
|    time_elapsed         | 998           |
|    total_timesteps      | 120832        |
| train/                  |               |
|    approx_kl            | 1.9835134e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.65e+08      |
|    n_updates            | 580           |
|    policy_gradient_loss | -1.16e-05     |
|    std                  | 1.02          |
|    value_loss           | 5.24e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.68e+07     |
| time/                   |              |
|    fps                  | 120          |
|    iterations           | 60           |
|    time_elapsed         | 1016         |
|    total_timesteps      | 122880       |
| train/                  |              |
|    approx_kl            | 0.0005026156 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.59e+08     |
|    n_updates            | 590          |
|    policy_gradient_loss | -0.000643    |
|    std                  | 1.02         |
|    value_loss           | 5.28e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.7e+07       |
| time/                   |               |
|    fps                  | 120           |
|    iterations           | 61            |
|    time_elapsed         | 1033          |
|    total_timesteps      | 124928        |
| train/                  |               |
|    approx_kl            | 0.00040390322 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.44         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.41e+08      |
|    n_updates            | 600           |
|    policy_gradient_loss | -0.000474     |
|    std                  | 1.02          |
|    value_loss           | 4.96e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.7e+07       |
| time/                   |               |
|    fps                  | 120           |
|    iterations           | 62            |
|    time_elapsed         | 1049          |
|    total_timesteps      | 126976        |
| train/                  |               |
|    approx_kl            | 0.00010084719 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.44         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.25e+08      |
|    n_updates            | 610           |
|    policy_gradient_loss | -9.71e-05     |
|    std                  | 1.02          |
|    value_loss           | 4.43e+08      |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 1.7e+07     |
| time/                   |             |
|    fps                  | 121         |
|    iterations           | 63          |
|    time_elapsed         | 1065        |
|    total_timesteps      | 129024      |
| train/                  |             |
|    approx_kl            | 0.000491702 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.44       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 2.94e+08    |
|    n_updates            | 620         |
|    policy_gradient_loss | -0.00062    |
|    std                  | 1.02        |
|    value_loss           | 5.87e+08    |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.7e+07      |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 64           |
|    time_elapsed         | 1082         |
|    total_timesteps      | 131072       |
| train/                  |              |
|    approx_kl            | 5.771569e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.44        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 3.13e+08     |
|    n_updates            | 630          |
|    policy_gradient_loss | -7.12e-05    |
|    std                  | 1.02         |
|    value_loss           | 6.17e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.7e+07      |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 65           |
|    time_elapsed         | 1098         |
|    total_timesteps      | 133120       |
| train/                  |              |
|    approx_kl            | 7.585477e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.84e+08     |
|    n_updates            | 640          |
|    policy_gradient_loss | 4.09e-06     |
|    std                  | 1.02         |
|    value_loss           | 5.61e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.7e+07       |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 66            |
|    time_elapsed         | 1115          |
|    total_timesteps      | 135168        |
| train/                  |               |
|    approx_kl            | 0.00010676522 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.75e+08      |
|    n_updates            | 650           |
|    policy_gradient_loss | -0.000123     |
|    std                  | 1.02          |
|    value_loss           | 5.49e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.7e+07       |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 67            |
|    time_elapsed         | 1132          |
|    total_timesteps      | 137216        |
| train/                  |               |
|    approx_kl            | 0.00020960238 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.56e+08      |
|    n_updates            | 660           |
|    policy_gradient_loss | -0.000239     |
|    std                  | 1.02          |
|    value_loss           | 5.09e+08      |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 1.72e+07    |
| time/                   |             |
|    fps                  | 121         |
|    iterations           | 68          |
|    time_elapsed         | 1148        |
|    total_timesteps      | 139264      |
| train/                  |             |
|    approx_kl            | 7.84884e-05 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.43       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 2.54e+08    |
|    n_updates            | 670         |
|    policy_gradient_loss | -7.6e-05    |
|    std                  | 1.02        |
|    value_loss           | 5.11e+08    |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.72e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 69           |
|    time_elapsed         | 1165         |
|    total_timesteps      | 141312       |
| train/                  |              |
|    approx_kl            | 8.196919e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.27e+08     |
|    n_updates            | 680          |
|    policy_gradient_loss | -2.97e-06    |
|    std                  | 1.02         |
|    value_loss           | 4.41e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.72e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 70            |
|    time_elapsed         | 1181          |
|    total_timesteps      | 143360        |
| train/                  |               |
|    approx_kl            | 0.00054623384 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.65e+08      |
|    n_updates            | 690           |
|    policy_gradient_loss | -0.000699     |
|    std                  | 1.02          |
|    value_loss           | 5.41e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.72e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 71           |
|    time_elapsed         | 1198         |
|    total_timesteps      | 145408       |
| train/                  |              |
|    approx_kl            | 0.0004304503 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 3.05e+08     |
|    n_updates            | 700          |
|    policy_gradient_loss | -0.000585    |
|    std                  | 1.01         |
|    value_loss           | 5.98e+08     |
------------------------------------------


--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 1.38e+04       |
|    ep_rew_mean          | 1.72e+07       |
| time/                   |                |
|    fps                  | 121            |
|    iterations           | 72             |
|    time_elapsed         | 1214           |
|    total_timesteps      | 147456         |
| train/                  |                |
|    approx_kl            | 0.000105785235 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -1.43          |
|    explained_variance   | 0              |
|    learning_rate        | 0.0003         |
|    loss                 | 2.71e+08       |
|    n_updates            | 710            |
|    policy_gradient_loss | -9.22e-05      |
|    std                  | 1.01           |
|    value_loss           | 5.4e+08        |
--------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.72e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 73            |
|    time_elapsed         | 1231          |
|    total_timesteps      | 149504        |
| train/                  |               |
|    approx_kl            | 4.5876746e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.81e+08      |
|    n_updates            | 720           |
|    policy_gradient_loss | -4.13e-05     |
|    std                  | 1.01          |
|    value_loss           | 5.63e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.73e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 74            |
|    time_elapsed         | 1247          |
|    total_timesteps      | 151552        |
| train/                  |               |
|    approx_kl            | 3.8335304e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 1.79e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.49e+08      |
|    n_updates            | 730           |
|    policy_gradient_loss | -4.57e-05     |
|    std                  | 1.01          |
|    value_loss           | 4.97e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.73e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 75           |
|    time_elapsed         | 1265         |
|    total_timesteps      | 153600       |
| train/                  |              |
|    approx_kl            | 5.196032e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.57e+08     |
|    n_updates            | 740          |
|    policy_gradient_loss | -6.11e-05    |
|    std                  | 1.01         |
|    value_loss           | 4.73e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.73e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 76            |
|    time_elapsed         | 1281          |
|    total_timesteps      | 155648        |
| train/                  |               |
|    approx_kl            | 0.00031497556 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.65e+08      |
|    n_updates            | 750           |
|    policy_gradient_loss | -0.000331     |
|    std                  | 1.01          |
|    value_loss           | 5.42e+08      |
-------------------------------------------


--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 1.38e+04       |
|    ep_rew_mean          | 1.73e+07       |
| time/                   |                |
|    fps                  | 121            |
|    iterations           | 77             |
|    time_elapsed         | 1298           |
|    total_timesteps      | 157696         |
| train/                  |                |
|    approx_kl            | 0.000116451876 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -1.43          |
|    explained_variance   | 5.96e-08       |
|    learning_rate        | 0.0003         |
|    loss                 | 2.81e+08       |
|    n_updates            | 760            |
|    policy_gradient_loss | -0.000157      |
|    std                  | 1.01           |
|    value_loss           | 5.67e+08       |
--------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.73e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 78            |
|    time_elapsed         | 1315          |
|    total_timesteps      | 159744        |
| train/                  |               |
|    approx_kl            | 0.00015447263 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.71e+08      |
|    n_updates            | 770           |
|    policy_gradient_loss | -0.000192     |
|    std                  | 1.01          |
|    value_loss           | 5.44e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.73e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 79            |
|    time_elapsed         | 1333          |
|    total_timesteps      | 161792        |
| train/                  |               |
|    approx_kl            | 0.00010514769 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.75e+08      |
|    n_updates            | 780           |
|    policy_gradient_loss | -0.000107     |
|    std                  | 1.01          |
|    value_loss           | 5.52e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.73e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 80           |
|    time_elapsed         | 1351         |
|    total_timesteps      | 163840       |
| train/                  |              |
|    approx_kl            | 9.683718e-07 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.43e+08     |
|    n_updates            | 790          |
|    policy_gradient_loss | 6.37e-06     |
|    std                  | 1.01         |
|    value_loss           | 4.93e+08     |
------------------------------------------


--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 1.38e+04       |
|    ep_rew_mean          | 1.75e+07       |
| time/                   |                |
|    fps                  | 121            |
|    iterations           | 81             |
|    time_elapsed         | 1368           |
|    total_timesteps      | 165888         |
| train/                  |                |
|    approx_kl            | 0.000102813414 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -1.43          |
|    explained_variance   | 0              |
|    learning_rate        | 0.0003         |
|    loss                 | 2.53e+08       |
|    n_updates            | 800            |
|    policy_gradient_loss | -0.000116      |
|    std                  | 1.01           |
|    value_loss           | 5.09e+08       |
--------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.75e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 82           |
|    time_elapsed         | 1385         |
|    total_timesteps      | 167936       |
| train/                  |              |
|    approx_kl            | 0.0004159849 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 2.44e+08     |
|    n_updates            | 810          |
|    policy_gradient_loss | -0.000456    |
|    std                  | 1.01         |
|    value_loss           | 4.45e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.75e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 83            |
|    time_elapsed         | 1402          |
|    total_timesteps      | 169984        |
| train/                  |               |
|    approx_kl            | 3.5119738e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.87e+08      |
|    n_updates            | 820           |
|    policy_gradient_loss | -2.84e-05     |
|    std                  | 1.01          |
|    value_loss           | 5.73e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.75e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 84           |
|    time_elapsed         | 1419         |
|    total_timesteps      | 172032       |
| train/                  |              |
|    approx_kl            | 7.918582e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 3e+08        |
|    n_updates            | 830          |
|    policy_gradient_loss | -0.000101    |
|    std                  | 1.01         |
|    value_loss           | 6.04e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.75e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 85           |
|    time_elapsed         | 1435         |
|    total_timesteps      | 174080       |
| train/                  |              |
|    approx_kl            | 0.0001709286 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.77e+08     |
|    n_updates            | 840          |
|    policy_gradient_loss | -0.000218    |
|    std                  | 1.01         |
|    value_loss           | 5.51e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.75e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 86           |
|    time_elapsed         | 1452         |
|    total_timesteps      | 176128       |
| train/                  |              |
|    approx_kl            | 0.0004053014 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.97e+08     |
|    n_updates            | 850          |
|    policy_gradient_loss | -0.000496    |
|    std                  | 1.01         |
|    value_loss           | 5.91e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.75e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 87            |
|    time_elapsed         | 1469          |
|    total_timesteps      | 178176        |
| train/                  |               |
|    approx_kl            | 2.4991721e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.74e+08      |
|    n_updates            | 860           |
|    policy_gradient_loss | -4.61e-05     |
|    std                  | 1.02          |
|    value_loss           | 5.42e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.76e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 88            |
|    time_elapsed         | 1486          |
|    total_timesteps      | 180224        |
| train/                  |               |
|    approx_kl            | 6.8994326e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.68e+08      |
|    n_updates            | 870           |
|    policy_gradient_loss | -7.82e-05     |
|    std                  | 1.01          |
|    value_loss           | 5.45e+08      |
-------------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1.38e+04  |
|    ep_rew_mean          | 1.76e+07  |
| time/                   |           |
|    fps                  | 121       |
|    iterations           | 89        |
|    time_elapsed         | 1504      |
|    total_timesteps      | 182272    |
| train/                  |           |
|    approx_kl            | 0.0014511 |
|    clip_fraction        | 4.88e-05  |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.43     |
|    explained_variance   | -2.38e-07 |
|    learning_rate        | 0.0003    |
|    loss                 | 2.59e+08  |
|    n_updates            | 880       |
|    policy_gradient_loss | -0.00174  |
|    std                  | 1.01      |
|    value_loss           | 4.87e+08  |
---------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.76e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 90            |
|    time_elapsed         | 1521          |
|    total_timesteps      | 184320        |
| train/                  |               |
|    approx_kl            | 2.4133798e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 3.13e+08      |
|    n_updates            | 890           |
|    policy_gradient_loss | -2.41e-05     |
|    std                  | 1.01          |
|    value_loss           | 6.19e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.76e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 91           |
|    time_elapsed         | 1538         |
|    total_timesteps      | 186368       |
| train/                  |              |
|    approx_kl            | 3.036391e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.97e+08     |
|    n_updates            | 900          |
|    policy_gradient_loss | -3.76e-05    |
|    std                  | 1.01         |
|    value_loss           | 5.91e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.76e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 92           |
|    time_elapsed         | 1555         |
|    total_timesteps      | 188416       |
| train/                  |              |
|    approx_kl            | 1.669483e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 3.07e+08     |
|    n_updates            | 910          |
|    policy_gradient_loss | 8.01e-06     |
|    std                  | 1.01         |
|    value_loss           | 6.12e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.76e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 93            |
|    time_elapsed         | 1572          |
|    total_timesteps      | 190464        |
| train/                  |               |
|    approx_kl            | 7.9299556e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.82e+08      |
|    n_updates            | 920           |
|    policy_gradient_loss | -8.3e-05      |
|    std                  | 1.01          |
|    value_loss           | 5.64e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.76e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 94           |
|    time_elapsed         | 1589         |
|    total_timesteps      | 192512       |
| train/                  |              |
|    approx_kl            | 0.0002007031 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.84e+08     |
|    n_updates            | 930          |
|    policy_gradient_loss | -0.000263    |
|    std                  | 1.01         |
|    value_loss           | 5.7e+08      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.78e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 95            |
|    time_elapsed         | 1606          |
|    total_timesteps      | 194560        |
| train/                  |               |
|    approx_kl            | 0.00033902703 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.85e+08      |
|    n_updates            | 940           |
|    policy_gradient_loss | -0.000445     |
|    std                  | 1.01          |
|    value_loss           | 5.63e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.78e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 96           |
|    time_elapsed         | 1623         |
|    total_timesteps      | 196608       |
| train/                  |              |
|    approx_kl            | 7.186769e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 5.96e-08     |
|    learning_rate        | 0.0003       |
|    loss                 | 2.73e+08     |
|    n_updates            | 950          |
|    policy_gradient_loss | -9.51e-05    |
|    std                  | 1.01         |
|    value_loss           | 5.27e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.78e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 97            |
|    time_elapsed         | 1640          |
|    total_timesteps      | 198656        |
| train/                  |               |
|    approx_kl            | 0.00015031683 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.77e+08      |
|    n_updates            | 960           |
|    policy_gradient_loss | -0.000201     |
|    std                  | 1.01          |
|    value_loss           | 5.59e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.78e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 98            |
|    time_elapsed         | 1657          |
|    total_timesteps      | 200704        |
| train/                  |               |
|    approx_kl            | 0.00024496482 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.68e+08      |
|    n_updates            | 970           |
|    policy_gradient_loss | -0.000272     |
|    std                  | 1.01          |
|    value_loss           | 5.37e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.78e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 99           |
|    time_elapsed         | 1674         |
|    total_timesteps      | 202752       |
| train/                  |              |
|    approx_kl            | 4.481466e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 3.02e+08     |
|    n_updates            | 980          |
|    policy_gradient_loss | 1.08e-05     |
|    std                  | 1.01         |
|    value_loss           | 5.95e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.78e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 100          |
|    time_elapsed         | 1692         |
|    total_timesteps      | 204800       |
| train/                  |              |
|    approx_kl            | 2.548605e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.82e+08     |
|    n_updates            | 990          |
|    policy_gradient_loss | -3.32e-05    |
|    std                  | 1.01         |
|    value_loss           | 5.72e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.79e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 101           |
|    time_elapsed         | 1709          |
|    total_timesteps      | 206848        |
| train/                  |               |
|    approx_kl            | 1.9728963e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.73e+08      |
|    n_updates            | 1000          |
|    policy_gradient_loss | 6.88e-06      |
|    std                  | 1.01          |
|    value_loss           | 5.47e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.79e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 102          |
|    time_elapsed         | 1725         |
|    total_timesteps      | 208896       |
| train/                  |              |
|    approx_kl            | 0.0007797675 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.26e+08     |
|    n_updates            | 1010         |
|    policy_gradient_loss | -0.00092     |
|    std                  | 1.01         |
|    value_loss           | 4.63e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.79e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 103           |
|    time_elapsed         | 1742          |
|    total_timesteps      | 210944        |
| train/                  |               |
|    approx_kl            | 6.6031906e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 3e+08         |
|    n_updates            | 1020          |
|    policy_gradient_loss | -5.08e-05     |
|    std                  | 1.01          |
|    value_loss           | 6e+08         |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.79e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 104          |
|    time_elapsed         | 1759         |
|    total_timesteps      | 212992       |
| train/                  |              |
|    approx_kl            | 6.361178e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 3e+08        |
|    n_updates            | 1030         |
|    policy_gradient_loss | -9.63e-05    |
|    std                  | 1.01         |
|    value_loss           | 5.96e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.79e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 105           |
|    time_elapsed         | 1775          |
|    total_timesteps      | 215040        |
| train/                  |               |
|    approx_kl            | 0.00047800402 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.86e+08      |
|    n_updates            | 1040          |
|    policy_gradient_loss | -0.000608     |
|    std                  | 1.01          |
|    value_loss           | 5.98e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.79e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 106          |
|    time_elapsed         | 1792         |
|    total_timesteps      | 217088       |
| train/                  |              |
|    approx_kl            | 0.0007030545 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.96e+08     |
|    n_updates            | 1050         |
|    policy_gradient_loss | -0.00086     |
|    std                  | 1.01         |
|    value_loss           | 5.89e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.79e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 107           |
|    time_elapsed         | 1808          |
|    total_timesteps      | 219136        |
| train/                  |               |
|    approx_kl            | 4.6502304e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.75e+08      |
|    n_updates            | 1060          |
|    policy_gradient_loss | -6.14e-05     |
|    std                  | 1.01          |
|    value_loss           | 5.54e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.8e+07       |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 108           |
|    time_elapsed         | 1825          |
|    total_timesteps      | 221184        |
| train/                  |               |
|    approx_kl            | 1.3018725e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.72e+08      |
|    n_updates            | 1070          |
|    policy_gradient_loss | 8.09e-06      |
|    std                  | 1.01          |
|    value_loss           | 5.3e+08       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.8e+07      |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 109          |
|    time_elapsed         | 1841         |
|    total_timesteps      | 223232       |
| train/                  |              |
|    approx_kl            | 6.474287e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.53e+08     |
|    n_updates            | 1080         |
|    policy_gradient_loss | -5.21e-05    |
|    std                  | 1.01         |
|    value_loss           | 5.05e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.8e+07      |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 110          |
|    time_elapsed         | 1858         |
|    total_timesteps      | 225280       |
| train/                  |              |
|    approx_kl            | 7.820403e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.96e+08     |
|    n_updates            | 1090         |
|    policy_gradient_loss | -1.7e-07     |
|    std                  | 1.01         |
|    value_loss           | 5.9e+08      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.8e+07      |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 111          |
|    time_elapsed         | 1874         |
|    total_timesteps      | 227328       |
| train/                  |              |
|    approx_kl            | 6.771588e-07 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.93e+08     |
|    n_updates            | 1100         |
|    policy_gradient_loss | 7.53e-06     |
|    std                  | 1.01         |
|    value_loss           | 5.99e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.8e+07      |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 112          |
|    time_elapsed         | 1891         |
|    total_timesteps      | 229376       |
| train/                  |              |
|    approx_kl            | 0.0002662118 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.76e+08     |
|    n_updates            | 1110         |
|    policy_gradient_loss | -0.000347    |
|    std                  | 1.01         |
|    value_loss           | 5.47e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.8e+07      |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 113          |
|    time_elapsed         | 1907         |
|    total_timesteps      | 231424       |
| train/                  |              |
|    approx_kl            | 1.855343e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.97e+08     |
|    n_updates            | 1120         |
|    policy_gradient_loss | -2.16e-05    |
|    std                  | 1.01         |
|    value_loss           | 6.15e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.8e+07       |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 114           |
|    time_elapsed         | 1923          |
|    total_timesteps      | 233472        |
| train/                  |               |
|    approx_kl            | 2.5959278e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.59e+08      |
|    n_updates            | 1130          |
|    policy_gradient_loss | -1.23e-05     |
|    std                  | 1.01          |
|    value_loss           | 5.33e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.81e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 115          |
|    time_elapsed         | 1940         |
|    total_timesteps      | 235520       |
| train/                  |              |
|    approx_kl            | 0.0006506179 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.79e+08     |
|    n_updates            | 1140         |
|    policy_gradient_loss | -0.000735    |
|    std                  | 1.01         |
|    value_loss           | 5.56e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.81e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 116           |
|    time_elapsed         | 1957          |
|    total_timesteps      | 237568        |
| train/                  |               |
|    approx_kl            | 0.00067324366 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.5e+08       |
|    n_updates            | 1150          |
|    policy_gradient_loss | -0.000771     |
|    std                  | 1.01          |
|    value_loss           | 5.09e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.81e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 117           |
|    time_elapsed         | 1973          |
|    total_timesteps      | 239616        |
| train/                  |               |
|    approx_kl            | 2.4721085e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 1.79e-07      |
|    learning_rate        | 0.0003        |
|    loss                 | 3.13e+08      |
|    n_updates            | 1160          |
|    policy_gradient_loss | -4.16e-05     |
|    std                  | 1.01          |
|    value_loss           | 6.19e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.81e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 118           |
|    time_elapsed         | 1989          |
|    total_timesteps      | 241664        |
| train/                  |               |
|    approx_kl            | 5.4243603e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.76e+08      |
|    n_updates            | 1170          |
|    policy_gradient_loss | -4.98e-05     |
|    std                  | 1.01          |
|    value_loss           | 5.49e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.81e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 119          |
|    time_elapsed         | 2006         |
|    total_timesteps      | 243712       |
| train/                  |              |
|    approx_kl            | 9.014312e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.81e+08     |
|    n_updates            | 1180         |
|    policy_gradient_loss | -0.000143    |
|    std                  | 1.01         |
|    value_loss           | 5.55e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.81e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 120           |
|    time_elapsed         | 2022          |
|    total_timesteps      | 245760        |
| train/                  |               |
|    approx_kl            | 0.00012629156 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.89e+08      |
|    n_updates            | 1190          |
|    policy_gradient_loss | -0.000188     |
|    std                  | 1.01          |
|    value_loss           | 5.76e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.81e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 121          |
|    time_elapsed         | 2039         |
|    total_timesteps      | 247808       |
| train/                  |              |
|    approx_kl            | 4.579575e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.44e+08     |
|    n_updates            | 1200         |
|    policy_gradient_loss | -3.64e-05    |
|    std                  | 1.01         |
|    value_loss           | 4.88e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.81e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 122           |
|    time_elapsed         | 2055          |
|    total_timesteps      | 249856        |
| train/                  |               |
|    approx_kl            | 0.00019612073 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.61e+08      |
|    n_updates            | 1210          |
|    policy_gradient_loss | -0.000196     |
|    std                  | 1.01          |
|    value_loss           | 4.99e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.81e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 123           |
|    time_elapsed         | 2072          |
|    total_timesteps      | 251904        |
| train/                  |               |
|    approx_kl            | 0.00033311112 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.73e+08      |
|    n_updates            | 1220          |
|    policy_gradient_loss | -0.000376     |
|    std                  | 1.01          |
|    value_loss           | 5.27e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.81e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 124          |
|    time_elapsed         | 2089         |
|    total_timesteps      | 253952       |
| train/                  |              |
|    approx_kl            | 8.344359e-07 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 3.05e+08     |
|    n_updates            | 1230         |
|    policy_gradient_loss | 4.42e-06     |
|    std                  | 1.01         |
|    value_loss           | 5.97e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.81e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 125          |
|    time_elapsed         | 2105         |
|    total_timesteps      | 256000       |
| train/                  |              |
|    approx_kl            | 0.0005716712 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.75e+08     |
|    n_updates            | 1240         |
|    policy_gradient_loss | -0.000667    |
|    std                  | 1.01         |
|    value_loss           | 5.5e+08      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.81e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 126           |
|    time_elapsed         | 2121          |
|    total_timesteps      | 258048        |
| train/                  |               |
|    approx_kl            | 0.00017496455 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.88e+08      |
|    n_updates            | 1250          |
|    policy_gradient_loss | -0.000178     |
|    std                  | 1.01          |
|    value_loss           | 5.74e+08      |
-------------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.38e+04   |
|    ep_rew_mean          | 1.81e+07   |
| time/                   |            |
|    fps                  | 121        |
|    iterations           | 127        |
|    time_elapsed         | 2138       |
|    total_timesteps      | 260096     |
| train/                  |            |
|    approx_kl            | 9.5219e-07 |
|    clip_fraction        | 0          |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.43      |
|    explained_variance   | 5.96e-08   |
|    learning_rate        | 0.0003     |
|    loss                 | 2.79e+08   |
|    n_updates            | 1260       |
|    policy_gradient_loss | 1.79e-05   |
|    std                  | 1.01       |
|    value_loss           | 5.52e+08   |
----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.82e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 128          |
|    time_elapsed         | 2155         |
|    total_timesteps      | 262144       |
| train/                  |              |
|    approx_kl            | 0.0002567674 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.43e+08     |
|    n_updates            | 1270         |
|    policy_gradient_loss | -0.000271    |
|    std                  | 1.01         |
|    value_loss           | 5.02e+08     |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.82e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 129          |
|    time_elapsed         | 2171         |
|    total_timesteps      | 264192       |
| train/                  |              |
|    approx_kl            | 2.141032e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | -2.38e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.15e+08     |
|    n_updates            | 1280         |
|    policy_gradient_loss | -1.59e-05    |
|    std                  | 1.01         |
|    value_loss           | 4.65e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.82e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 130           |
|    time_elapsed         | 2188          |
|    total_timesteps      | 266240        |
| train/                  |               |
|    approx_kl            | 2.3973495e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.81e+08      |
|    n_updates            | 1290          |
|    policy_gradient_loss | -6.88e-06     |
|    std                  | 1.01          |
|    value_loss           | 5.66e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.82e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 131          |
|    time_elapsed         | 2204         |
|    total_timesteps      | 268288       |
| train/                  |              |
|    approx_kl            | 4.620198e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 2.85e+08     |
|    n_updates            | 1300         |
|    policy_gradient_loss | -4.88e-05    |
|    std                  | 1.01         |
|    value_loss           | 5.74e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.82e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 132           |
|    time_elapsed         | 2221          |
|    total_timesteps      | 270336        |
| train/                  |               |
|    approx_kl            | 0.00015994813 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 5.96e-08      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.83e+08      |
|    n_updates            | 1310          |
|    policy_gradient_loss | -0.000197     |
|    std                  | 1.01          |
|    value_loss           | 5.64e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.82e+07     |
| time/                   |              |
|    fps                  | 121          |
|    iterations           | 133          |
|    time_elapsed         | 2237         |
|    total_timesteps      | 272384       |
| train/                  |              |
|    approx_kl            | 6.692426e-07 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.93e+08     |
|    n_updates            | 1320         |
|    policy_gradient_loss | 8.72e-06     |
|    std                  | 1.01         |
|    value_loss           | 5.86e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.82e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 134           |
|    time_elapsed         | 2254          |
|    total_timesteps      | 274432        |
| train/                  |               |
|    approx_kl            | 1.2224773e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.58e+08      |
|    n_updates            | 1330          |
|    policy_gradient_loss | -1.25e-05     |
|    std                  | 1.01          |
|    value_loss           | 5.22e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.82e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 135           |
|    time_elapsed         | 2271          |
|    total_timesteps      | 276480        |
| train/                  |               |
|    approx_kl            | 3.5463017e-07 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.44e+08      |
|    n_updates            | 1340          |
|    policy_gradient_loss | 7.94e-06      |
|    std                  | 1.01          |
|    value_loss           | 5.04e+08      |
-------------------------------------------


--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 1.38e+04       |
|    ep_rew_mean          | 1.82e+07       |
| time/                   |                |
|    fps                  | 121            |
|    iterations           | 136            |
|    time_elapsed         | 2287           |
|    total_timesteps      | 278528         |
| train/                  |                |
|    approx_kl            | 0.000106076855 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -1.43          |
|    explained_variance   | 0              |
|    learning_rate        | 0.0003         |
|    loss                 | 2.41e+08       |
|    n_updates            | 1350           |
|    policy_gradient_loss | -0.000153      |
|    std                  | 1.01           |
|    value_loss           | 4.8e+08        |
--------------------------------------------


--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 1.38e+04       |
|    ep_rew_mean          | 1.82e+07       |
| time/                   |                |
|    fps                  | 121            |
|    iterations           | 137            |
|    time_elapsed         | 2302           |
|    total_timesteps      | 280576         |
| train/                  |                |
|    approx_kl            | 0.000120239536 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -1.43          |
|    explained_variance   | 0              |
|    learning_rate        | 0.0003         |
|    loss                 | 2.96e+08       |
|    n_updates            | 1360           |
|    policy_gradient_loss | -0.000151      |
|    std                  | 1.01           |
|    value_loss           | 5.88e+08       |
--------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.82e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 138           |
|    time_elapsed         | 2318          |
|    total_timesteps      | 282624        |
| train/                  |               |
|    approx_kl            | 0.00012801509 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.81e+08      |
|    n_updates            | 1370          |
|    policy_gradient_loss | -0.000111     |
|    std                  | 1.01          |
|    value_loss           | 5.59e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.82e+07      |
| time/                   |               |
|    fps                  | 121           |
|    iterations           | 139           |
|    time_elapsed         | 2333          |
|    total_timesteps      | 284672        |
| train/                  |               |
|    approx_kl            | 0.00023020859 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 0.0003        |
|    loss                 | 2.72e+08      |
|    n_updates            | 1380          |
|    policy_gradient_loss | -0.000327     |
|    std                  | 1.01          |
|    value_loss           | 5.51e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.82e+07     |
| time/                   |              |
|    fps                  | 122          |
|    iterations           | 140          |
|    time_elapsed         | 2348         |
|    total_timesteps      | 286720       |
| train/                  |              |
|    approx_kl            | 4.074286e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 2.98e+08     |
|    n_updates            | 1390         |
|    policy_gradient_loss | -4.83e-05    |
|    std                  | 1.01         |
|    value_loss           | 6.03e+08     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.82e+07      |
| time/                   |               |
|    fps                  | 122           |
|    iterations           | 141           |
|    time_elapsed         | 2364          |
|    total_timesteps      | 288768        |
| train/                  |               |
|    approx_kl            | 0.00035508408 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 5.96e-08      |
|    learning_rate        | 0.0003        |
|    loss                 | 2.9e+08       |
|    n_updates            | 1400          |
|    policy_gradient_loss | -0.000426     |
|    std                  | 1.01          |
|    value_loss           | 5.76e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.83e+07      |
| time/                   |               |
|    fps                  | 122           |
|    iterations           | 142           |
|    time_elapsed         | 2379          |
|    total_timesteps      | 290816        |
| train/                  |               |
|    approx_kl            | 0.00031148724 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.77e+08      |
|    n_updates            | 1410          |
|    policy_gradient_loss | -0.000278     |
|    std                  | 1.01          |
|    value_loss           | 5.47e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.83e+07      |
| time/                   |               |
|    fps                  | 122           |
|    iterations           | 143           |
|    time_elapsed         | 2395          |
|    total_timesteps      | 292864        |
| train/                  |               |
|    approx_kl            | 0.00024632586 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.51e+08      |
|    n_updates            | 1420          |
|    policy_gradient_loss | -0.000252     |
|    std                  | 1.01          |
|    value_loss           | 4.95e+08      |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.38e+04    |
|    ep_rew_mean          | 1.83e+07    |
| time/                   |             |
|    fps                  | 122         |
|    iterations           | 144         |
|    time_elapsed         | 2410        |
|    total_timesteps      | 294912      |
| train/                  |             |
|    approx_kl            | 0.000298306 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.43       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 2.88e+08    |
|    n_updates            | 1430        |
|    policy_gradient_loss | -0.000362   |
|    std                  | 1.01        |
|    value_loss           | 5.71e+08    |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.83e+07      |
| time/                   |               |
|    fps                  | 122           |
|    iterations           | 145           |
|    time_elapsed         | 2425          |
|    total_timesteps      | 296960        |
| train/                  |               |
|    approx_kl            | 0.00051718083 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 3.03e+08      |
|    n_updates            | 1440          |
|    policy_gradient_loss | -0.000607     |
|    std                  | 1.01          |
|    value_loss           | 5.94e+08      |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.38e+04      |
|    ep_rew_mean          | 1.83e+07      |
| time/                   |               |
|    fps                  | 122           |
|    iterations           | 146           |
|    time_elapsed         | 2440          |
|    total_timesteps      | 299008        |
| train/                  |               |
|    approx_kl            | 1.9569707e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.43         |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 2.86e+08      |
|    n_updates            | 1450          |
|    policy_gradient_loss | 1.52e-05      |
|    std                  | 1.01          |
|    value_loss           | 5.75e+08      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.38e+04     |
|    ep_rew_mean          | 1.83e+07     |
| time/                   |              |
|    fps                  | 122          |
|    iterations           | 147          |
|    time_elapsed         | 2456         |
|    total_timesteps      | 301056       |
| train/                  |              |
|    approx_kl            | 4.769268e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.43        |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 2.94e+08     |
|    n_updates            | 1460         |
|    policy_gradient_loss | -5.47e-05    |
|    std                  | 1.01         |
|    value_loss           | 5.79e+08     |
------------------------------------------


<stable_baselines3.ppo.ppo.PPO at 0x7f662410aa60>

In [50]:
env.reset_render_window()

In [51]:
obs, info = env.reset()
for i in range(1):
    done = False
    while not done:
        action, _states = rl_model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(action)
        print_log(f"Step: {env.current_step}, Action: {action}, Reward: {reward}")
        env.render()

[2025-07-10 20:35:06:370] Step: 1, Action: [0.88528097], Reward: 2.9484610037982466
[2025-07-10 20:35:06:379] Step: 2, Action: [0.8852801], Reward: 1.158057089406848
[2025-07-10 20:35:06:387] Step: 3, Action: [0.88528275], Reward: 1.782771721322934
[2025-07-10 20:35:06:394] Step: 4, Action: [0.885282], Reward: 3.132552595565041
[2025-07-10 20:35:06:401] Step: 5, Action: [0.8852845], Reward: 4.3714891865448555
[2025-07-10 20:35:06:410] Step: 6, Action: [0.88528454], Reward: 5.460555739656687
[2025-07-10 20:35:06:415] Step: 7, Action: [0.88528806], Reward: 6.31189403058811
[2025-07-10 20:35:06:421] Step: 8, Action: [0.8852892], Reward: 7.000847525537491
[2025-07-10 20:35:06:427] Step: 9, Action: [0.88529027], Reward: 7.595435328352213
[2025-07-10 20:35:06:433] Step: 10, Action: [0.8852918], Reward: 8.111209466182364
[2025-07-10 20:35:06:439] Step: 11, Action: [0.8852915], Reward: 8.613227648519398
[2025-07-10 20:35:06:445] Step: 12, Action: [0.8852926], Reward: 9.097737593362929
[2025-07

In [52]:
env.aggregate_load_df

Unnamed: 0,timestamp,aggregate,datetime,grid_load,battery_soc
104747,1.363997e+09,335.0,2013-03-23 00:00:05,3876.123867,0.184338
104748,1.363997e+09,336.0,2013-03-23 00:00:11,3877.120291,0.185076
104749,1.363997e+09,333.0,2013-03-23 00:00:17,3874.13102,0.185813
104750,1.363997e+09,334.0,2013-03-23 00:00:24,3875.12792,0.186674
104751,1.363997e+09,331.0,2013-03-23 00:00:30,3872.137934,0.187412
...,...,...,...,...,...
118501,1.364083e+09,179.0,2013-03-23 23:59:30,3724.610189,1.0
118502,1.364083e+09,171.0,2013-03-23 23:59:37,3715.701815,1.0
118503,1.364083e+09,171.0,2013-03-23 23:59:43,3715.701815,1.0
118504,1.364083e+09,171.0,2013-03-23 23:59:49,3715.701815,1.0


In [53]:
# save the graph

env.save_graph(
    str(Path("rl_model", "PPO", f"{rl_datetime.strftime('%Y%m%d_%H%M%S')}", "graph.png"))
)

In [67]:
env.close()

[2025-07-11 01:37:30:557] [SmartMeterWorld] Environment closed.


In [54]:
# save the model
rl_model_path = Path("rl_model", "PPO", f"{rl_datetime.strftime('%Y%m%d_%H%M%S')}", "rl_model.zip")
rl_model.save(rl_model_path)

---

In [64]:
# load the model & environment


env = SmartMeterWorld(
    aggregate_load_df=load_segment,
    render_mode="human",
)

env.set_h_network(h_network)
env.set_h_network_stdscaler(h_network_stdscaler)

rl_model_loaded = PPO.load(rl_model_path, env=env)

[2025-07-10 21:43:13:528] [SmartMeterWorld] Render mode set to 'human'. Render server at 127.0.0.1:50007. render_connected: True. render_client_socket: <socket.socket fd=97, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=0, laddr=('127.0.0.1', 43478), raddr=('127.0.0.1', 50007)>
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [65]:
obs, info = env.reset()
for i in range(1):
    done = False
    while not done:
        action, _states = rl_model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(action)
        print_log(f"Step: {env.current_step}, Action: {action}, Reward: {reward}")
        env.render()

[2025-07-10 21:43:16:202] Step: 1, Action: [0.8852958], Reward: 2.9483444876393077
[2025-07-10 21:43:16:209] Step: 2, Action: [0.88529474], Reward: 1.157978982493043
[2025-07-10 21:43:16:220] Step: 3, Action: [0.88529783], Reward: 1.782662393298871
[2025-07-10 21:43:16:227] Step: 4, Action: [0.88529694], Reward: 3.1324137634190716
[2025-07-10 21:43:16:232] Step: 5, Action: [0.88529986], Reward: 4.37132996958228
[2025-07-10 21:43:16:244] Step: 6, Action: [0.8853], Reward: 5.460382360626499
[2025-07-10 21:43:16:252] Step: 7, Action: [0.8853042], Reward: 6.311706489446164
[2025-07-10 21:43:16:265] Step: 8, Action: [0.8853057], Reward: 7.000642389080326
[2025-07-10 21:43:16:275] Step: 9, Action: [0.885307], Reward: 7.595228475265224
[2025-07-10 21:43:16:281] Step: 10, Action: [0.8853092], Reward: 8.11098973825264
[2025-07-10 21:43:16:287] Step: 11, Action: [0.8853088], Reward: 8.612986463120897
[2025-07-10 21:43:16:293] Step: 12, Action: [0.88531053], Reward: 9.097480958396357
[2025-07-10 

In [66]:
env.save_graph(
    str(Path("rl_model", "PPO", f"{rl_datetime.strftime('%Y%m%d_%H%M%S')}", "graph2.png"))
)