A template of the RL training, with H-network trained along side the PPO agent

In [1]:
import numpy as np
import pandas as pd

from pathlib import Path
from datetime import datetime

from utils import print_log

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# load the created dataset
dataset_folder_path = Path("dataset", "20250707_downsampled_1min", "split")

In [4]:
# copied from 03_data_split.ipynb

# Helper functions for the new split folder structure
def load_split_data_from_folder(split_folder, split_type='train'):
    """Load aggregate data from split folder"""
    segments = []
    with open(split_folder / f'{split_type}_segments.txt', 'r') as f:
        for line in f:
            start_str, end_str = line.strip().split(' - ')
            start = datetime.fromisoformat(start_str)
            end = datetime.fromisoformat(end_str)
            segments.append((start, end))
    
    df = pd.read_pickle(split_folder / f'{split_type}_aggregate_df.pkl')
    return segments, df

def load_signatures_from_split_folder(split_folder, split_type, appliance):
    """Load appliance signatures from split folder"""
    sig_path = split_folder / 'load_signature_library' / split_type / appliance / 'load_signatures.pkl'
    ranges_path = split_folder / 'load_signature_library' / split_type / appliance / 'selected_ranges.txt'
    
    if not sig_path.exists():
        return pd.DataFrame(), []
    
    signatures_df = pd.read_pickle(sig_path)
    ranges = []
    if ranges_path.exists():
        with open(ranges_path, 'r') as f:
            for line in f:
                start, end = map(int, line.strip().split(','))
                ranges.append((start, end))
    
    return signatures_df, ranges

In [5]:
# convert datetime objects to timezone-naive datetime objects
def convert_to_naive_datetimes_df(df):
    """Convert datetime objects in DataFrame to timezone-naive datetime objects"""
    df['datetime'] = df['datetime'].apply(lambda x: x.replace(tzinfo=None) if isinstance(x, datetime) else x)

    return df

def convert_to_naive_datetimes(segments):
    """Convert datetime objects in segments to timezone-naive datetime objects"""
    return [(start.replace(tzinfo=None), end.replace(tzinfo=None)) for start, end in segments]

In [6]:
aggregate_load_segments_train, aggregate_load_df_train = load_split_data_from_folder(dataset_folder_path, 'train')
aggregate_load_segments_test, aggregate_load_df_test = load_split_data_from_folder(dataset_folder_path, 'test') 
aggregate_load_segments_validation, aggregate_load_df_validation = load_split_data_from_folder(dataset_folder_path, 'val')

In [7]:
aggregate_load_segments_train, aggregate_load_df_train = convert_to_naive_datetimes(aggregate_load_segments_train), convert_to_naive_datetimes_df(aggregate_load_df_train)
aggregate_load_segments_test, aggregate_load_df_test = convert_to_naive_datetimes(aggregate_load_segments_test), convert_to_naive_datetimes_df(aggregate_load_df_test)
aggregate_load_segments_validation, aggregate_load_df_validation = convert_to_naive_datetimes(aggregate_load_segments_validation), convert_to_naive_datetimes_df(aggregate_load_df_validation)

In [8]:
aggregate_load_df_train

Unnamed: 0,datetime,aggregate,timestamp
0,2013-01-08 00:00:05,234.000000,1357603205
1,2013-01-08 00:01:05,230.407069,1357603265
2,2013-01-08 00:02:05,230.680121,1357603325
3,2013-01-08 00:03:05,231.607379,1357603385
4,2013-01-08 00:04:05,231.280688,1357603445
...,...,...,...
231546,2013-12-30 23:55:01,176.973052,1388447701
231547,2013-12-30 23:56:01,177.850890,1388447761
231548,2013-12-30 23:57:01,177.333811,1388447821
231549,2013-12-30 23:58:01,178.462801,1388447881


In [9]:
aggregate_load_segments_train

[(datetime.datetime(2013, 1, 8, 0, 0),
  datetime.datetime(2013, 1, 8, 23, 59, 59, 999000)),
 (datetime.datetime(2013, 1, 9, 0, 0),
  datetime.datetime(2013, 1, 9, 23, 59, 59, 999000)),
 (datetime.datetime(2013, 1, 10, 0, 0),
  datetime.datetime(2013, 1, 10, 23, 59, 59, 999000)),
 (datetime.datetime(2013, 2, 27, 0, 0),
  datetime.datetime(2013, 2, 27, 23, 59, 59, 999000)),
 (datetime.datetime(2013, 2, 28, 0, 0),
  datetime.datetime(2013, 2, 28, 23, 59, 59, 999000)),
 (datetime.datetime(2013, 3, 8, 0, 0),
  datetime.datetime(2013, 3, 8, 23, 59, 59, 999000)),
 (datetime.datetime(2013, 3, 9, 0, 0),
  datetime.datetime(2013, 3, 9, 23, 59, 59, 999000)),
 (datetime.datetime(2013, 3, 10, 0, 0),
  datetime.datetime(2013, 3, 10, 23, 59, 59, 999000)),
 (datetime.datetime(2013, 3, 28, 0, 0),
  datetime.datetime(2013, 3, 28, 23, 59, 59, 999000)),
 (datetime.datetime(2013, 3, 29, 0, 0),
  datetime.datetime(2013, 3, 29, 23, 59, 59, 999000)),
 (datetime.datetime(2013, 3, 30, 0, 0),
  datetime.datetim

In [10]:
from rl_env.env_data_loader import SmartMeterDataLoader

sm_dl_train = SmartMeterDataLoader(
    aggregate_load_segments=aggregate_load_segments_train,
    aggregate_load_df=aggregate_load_df_train
)

sm_dl_train.get_divided_segments_length()

162

In [11]:
sm_dl_train.divided_segments[7]

array([datetime.datetime(2013, 3, 10, 0, 0),
       datetime.datetime(2013, 3, 10, 23, 59, 59, 999000)], dtype=object)

In [12]:
# sample segment

sm_dl_train.get_aggregate_load_segment(13)

Unnamed: 0,timestamp,aggregate,datetime
11231,1363996805,335.000000,2013-03-23 00:00:05
11232,1363996865,317.206591,2013-03-23 00:01:05
11233,1363996925,293.301546,2013-03-23 00:02:05
11234,1363996985,276.229767,2013-03-23 00:03:05
11235,1363997045,276.501419,2013-03-23 00:04:05
...,...,...,...
12666,1364082905,173.758178,2013-03-23 23:55:05
12667,1364082965,172.392595,2013-03-23 23:56:05
12668,1364083025,174.270419,2013-03-23 23:57:05
12669,1364083085,171.201633,2013-03-23 23:58:05


In [13]:
# create dataloader for validation and test sets
sm_dl_validation = SmartMeterDataLoader(
    aggregate_load_segments=aggregate_load_segments_validation,
    aggregate_load_df=aggregate_load_df_validation
)

sm_dl_test = SmartMeterDataLoader(
    aggregate_load_segments=aggregate_load_segments_test,
    aggregate_load_df=aggregate_load_df_test
)

(Optional) Load the pre-trained H-network and related components

In [14]:

# from model.H_network.h_network import HNetwork

# h_network_datetime = datetime(2025, 7, 13)

# h_network_path = Path("model_trained", f"h_network_{h_network_datetime.strftime('%Y%m%d')}.pth")

# h_network = HNetwork(2, 44, 1)
# h_network.load_state_dict(torch.load(h_network_path))
# h_network.eval()

Create the environment

In [15]:
import torch
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [16]:
import sys
sys.path.append(str(Path('model', 'H_network')))
from model.H_network.h_network_rl_module import HNetworkRLModule
from model.H_network.h_network_arch import HNetworkType

h_network_rl_module = HNetworkRLModule(
    h_network_type=HNetworkType.H_NETWORK2,
    device=DEVICE
)

In [17]:
sys.path.append(str(Path('rl_env')))

from rl_env.hrl_env_hnetwork_loop import SmartMeterWorld

env_train = SmartMeterWorld(
    smart_meter_data_loader=sm_dl_train,
    h_network_rl_module=h_network_rl_module,
    # render_mode="human",
    render_mode=None,
)

In [18]:
HNetworkType.H_NETWORK2 is h_network_rl_module.h_network_type

True

In [19]:
h_network_rl_module.set_h_network(
    h_network_rl_module.initialize_h_network()
)
h_network_rl_module.initialize_h_network_training()

In [20]:
from gymnasium.utils.env_checker import check_env

# This will catch many common issues
try:
    check_env(env_train)
    print("Environment passes all checks!")
except Exception as e:
    print(f"Environment has issues: {e}")

[2025-07-16 16:18:02:849] [SmartMeterWorld] Resetting environment with a new episode. Episode info: {'length': 1440, 'datetime_range': (Timestamp('2013-07-29 00:00:04'), Timestamp('2013-07-29 23:59:04'))}
[2025-07-16 16:18:02:853] [SmartMeterWorld] Resetting environment with a new episode. Episode info: {'length': 693, 'datetime_range': (Timestamp('2013-01-10 00:00:02'), Timestamp('2013-01-10 11:32:02'))}
[2025-07-16 16:18:02:855] [SmartMeterWorld] Resetting environment with a new episode. Episode info: {'length': 1440, 'datetime_range': (Timestamp('2013-09-26 00:00:00'), Timestamp('2013-09-26 23:59:00'))}
[2025-07-16 16:18:02:857] [SmartMeterWorld] Resetting environment with a new episode. Episode info: {'length': 693, 'datetime_range': (Timestamp('2013-01-10 00:00:02'), Timestamp('2013-01-10 11:32:02'))}
[2025-07-16 16:18:02:860] [SmartMeterWorld] Resetting environment with a new episode. Episode info: {'length': 1440, 'datetime_range': (Timestamp('2013-09-26 00:00:00'), Timestamp('2

  logger.warn(


In [21]:
obs, info = env_train.reset()
obs

[2025-07-16 16:18:03:044] [SmartMeterWorld] Resetting environment with a new episode. Episode info: {'length': 1440, 'datetime_range': (Timestamp('2013-09-26 00:00:00'), Timestamp('2013-09-26 23:59:00'))}


{'aggregate_load': array([-0.4250123], dtype=float32),
 'battery_soc': array([0.], dtype=float32),
 'timestamp_features': array([-0.5 ,  0.  ,  0.25], dtype=float32)}

In [22]:
env_train.reset_render_window()

In [23]:
from stable_baselines3.common.callbacks import EveryNTimesteps, ConvertCallback
from typing import Any

class TrainHNetworkEveryNTimesteps(EveryNTimesteps):
    def __init__(self, n_steps: int, h_network_rl_module: HNetworkRLModule):
        super().__init__(n_steps=n_steps, callback=ConvertCallback(self._train))
        self.h_network_rl_module = h_network_rl_module

    def _train(self, _locals: dict[str, Any], _globals: dict[str, Any]) -> bool:
        
        print_log("Training H-network...")

        # Train the H-network
        self.h_network_rl_module.train()

        if self.h_network_rl_module.train_loss_list is None:
            print_log("No training loss data available. Skipping logging.")
            return True

        mean_loss = self.h_network_rl_module.train_loss_list[-1][0]
        std_loss = self.h_network_rl_module.train_loss_list[-1][1]
        print_log(f"(mean, std): ({mean_loss:.8f}, {std_loss:.8f})")

        return True

2025-07-16 16:18:03.297514: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-16 16:18:03.307043: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752679083.317412 1339424 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752679083.320518 1339424 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1752679083.329673 1339424 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [24]:
# initialize experiment folder to store related information/data for future analysis

rl_datetime = datetime.now()

experiment_folder = Path(
    "experiments", rl_datetime.strftime('%Y%m%d_%H%M%S')
)

if not experiment_folder.exists():
    experiment_folder.mkdir(parents=True)

In [25]:
# initialize a PPO agent
from stable_baselines3 import PPO

rl_datetime = datetime.now()
tensorboard_log_path = experiment_folder / "PPO"

rl_model = PPO(
    "MultiInputPolicy", 
    env_train, 
    verbose=2,
    tensorboard_log=tensorboard_log_path
)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [26]:
rl_model.learn(
    total_timesteps=24 * 60 * 5 * 50,
    progress_bar=True,
    tb_log_name="PPO_SmartMeterWorld",
    callback=[TrainHNetworkEveryNTimesteps(n_steps=24 * 60 * 5, h_network_rl_module=h_network_rl_module)]
)

[2025-07-16 16:18:08:097] [SmartMeterWorld] Resetting environment with a new episode. Episode info: {'length': 1440, 'datetime_range': (Timestamp('2013-08-28 00:00:02'), Timestamp('2013-08-28 23:59:02'))}
Logging to experiments/20250716_161804/PPO/PPO_SmartMeterWorld_1


Output()

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.44e+03 |
|    ep_rew_mean     | 53.1     |
| time/              |          |
|    fps             | 190      |
|    iterations      | 1        |
|    time_elapsed    | 10       |
|    total_timesteps | 2048     |
---------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.44e+03   |
|    ep_rew_mean          | 58.6       |
| time/                   |            |
|    fps                  | 179        |
|    iterations           | 2          |
|    time_elapsed         | 22         |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.00368561 |
|    clip_fraction        | 0.0162     |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.42      |
|    explained_variance   | 0.113      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0516     |
|    n_updates            | 10         |
|    policy_gradient_loss | -0.0012    |
|    std                  | 1          |
|    value_loss           | 0.141      |
----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 59.5         |
| time/                   |              |
|    fps                  | 175          |
|    iterations           | 3            |
|    time_elapsed         | 35           |
|    total_timesteps      | 6144         |
| train/                  |              |
|    approx_kl            | 0.0005067418 |
|    clip_fraction        | 0.000879     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | -0.117       |
|    learning_rate        | 0.0003       |
|    loss                 | 0.0346       |
|    n_updates            | 20           |
|    policy_gradient_loss | -0.000262    |
|    std                  | 0.996        |
|    value_loss           | 0.119        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 74.1         |
| time/                   |              |
|    fps                  | 172          |
|    iterations           | 4            |
|    time_elapsed         | 47           |
|    total_timesteps      | 8192         |
| train/                  |              |
|    approx_kl            | 0.0010288844 |
|    clip_fraction        | 0.00273      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | 0.344        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.0566       |
|    n_updates            | 30           |
|    policy_gradient_loss | -0.000407    |
|    std                  | 0.996        |
|    value_loss           | 0.222        |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.44e+03    |
|    ep_rew_mean          | 70.6        |
| time/                   |             |
|    fps                  | 170         |
|    iterations           | 5           |
|    time_elapsed         | 59          |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.002345676 |
|    clip_fraction        | 0.0155      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.4        |
|    explained_variance   | 0.512       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0775      |
|    n_updates            | 40          |
|    policy_gradient_loss | -0.00312    |
|    std                  | 0.965       |
|    value_loss           | 0.295       |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 71.9         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 6            |
|    time_elapsed         | 72           |
|    total_timesteps      | 12288        |
| train/                  |              |
|    approx_kl            | 0.0037454362 |
|    clip_fraction        | 0.0229       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.38        |
|    explained_variance   | 0.767        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.0495       |
|    n_updates            | 50           |
|    policy_gradient_loss | -0.00283     |
|    std                  | 0.959        |
|    value_loss           | 0.159        |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.44e+03      |
|    ep_rew_mean          | 70            |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 7             |
|    time_elapsed         | 84            |
|    total_timesteps      | 14336         |
| train/                  |               |
|    approx_kl            | 0.00061661226 |
|    clip_fraction        | 0.000635      |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.38         |
|    explained_variance   | 0.744         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.0933        |
|    n_updates            | 60            |
|    policy_gradient_loss | -0.000218     |
|    std                  | 0.963         |
|    value_loss           | 0.202         |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.44e+03    |
|    ep_rew_mean          | 73.8        |
| time/                   |             |
|    fps                  | 169         |
|    iterations           | 8           |
|    time_elapsed         | 96          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.006695579 |
|    clip_fraction        | 0.05        |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.37       |
|    explained_variance   | 0.953       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0269      |
|    n_updates            | 70          |
|    policy_gradient_loss | -0.00587    |
|    std                  | 0.948       |
|    value_loss           | 0.0783      |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.44e+03    |
|    ep_rew_mean          | 72.9        |
| time/                   |             |
|    fps                  | 169         |
|    iterations           | 9           |
|    time_elapsed         | 109         |
|    total_timesteps      | 18432       |
| train/                  |             |
|    approx_kl            | 0.002334885 |
|    clip_fraction        | 0.00723     |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.37       |
|    explained_variance   | 0.388       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.155       |
|    n_updates            | 80          |
|    policy_gradient_loss | -0.000664   |
|    std                  | 0.959       |
|    value_loss           | 0.492       |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 77.1         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 10           |
|    time_elapsed         | 120          |
|    total_timesteps      | 20480        |
| train/                  |              |
|    approx_kl            | 0.0030111237 |
|    clip_fraction        | 0.0284       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.38        |
|    explained_variance   | 0.916        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.0872       |
|    n_updates            | 90           |
|    policy_gradient_loss | -0.00232     |
|    std                  | 0.959        |
|    value_loss           | 0.138        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 75.6         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 11           |
|    time_elapsed         | 132          |
|    total_timesteps      | 22528        |
| train/                  |              |
|    approx_kl            | 0.0022006927 |
|    clip_fraction        | 0.00972      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.38        |
|    explained_variance   | 0.683        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.127        |
|    n_updates            | 100          |
|    policy_gradient_loss | -0.00149     |
|    std                  | 0.962        |
|    value_loss           | 0.285        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 74           |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 12           |
|    time_elapsed         | 145          |
|    total_timesteps      | 24576        |
| train/                  |              |
|    approx_kl            | 0.0017342751 |
|    clip_fraction        | 0.0124       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.38        |
|    explained_variance   | 0.835        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.0699       |
|    n_updates            | 110          |
|    policy_gradient_loss | -0.000764    |
|    std                  | 0.962        |
|    value_loss           | 0.144        |
------------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.44e+03   |
|    ep_rew_mean          | 72.7       |
| time/                   |            |
|    fps                  | 167        |
|    iterations           | 13         |
|    time_elapsed         | 158        |
|    total_timesteps      | 26624      |
| train/                  |            |
|    approx_kl            | 0.00377939 |
|    clip_fraction        | 0.0372     |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.38      |
|    explained_variance   | 0.824      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.167      |
|    n_updates            | 120        |
|    policy_gradient_loss | -0.00276   |
|    std                  | 0.962      |
|    value_loss           | 0.309      |
----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 71.1         |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 14           |
|    time_elapsed         | 171          |
|    total_timesteps      | 28672        |
| train/                  |              |
|    approx_kl            | 0.0041202363 |
|    clip_fraction        | 0.0422       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.37        |
|    explained_variance   | 0.885        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.0781       |
|    n_updates            | 130          |
|    policy_gradient_loss | -0.00362     |
|    std                  | 0.946        |
|    value_loss           | 0.174        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 70.9         |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 15           |
|    time_elapsed         | 183          |
|    total_timesteps      | 30720        |
| train/                  |              |
|    approx_kl            | 0.0020170552 |
|    clip_fraction        | 0.0141       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.36        |
|    explained_variance   | 0.897        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.115        |
|    n_updates            | 140          |
|    policy_gradient_loss | -0.000898    |
|    std                  | 0.945        |
|    value_loss           | 0.199        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 69.5         |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 16           |
|    time_elapsed         | 195          |
|    total_timesteps      | 32768        |
| train/                  |              |
|    approx_kl            | 0.0045168903 |
|    clip_fraction        | 0.0422       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.37        |
|    explained_variance   | 0.807        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.107        |
|    n_updates            | 150          |
|    policy_gradient_loss | -0.00371     |
|    std                  | 0.953        |
|    value_loss           | 0.369        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 66.6         |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 17           |
|    time_elapsed         | 207          |
|    total_timesteps      | 34816        |
| train/                  |              |
|    approx_kl            | 0.0034463648 |
|    clip_fraction        | 0.0217       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.36        |
|    explained_variance   | 0.828        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.0957       |
|    n_updates            | 160          |
|    policy_gradient_loss | -0.00238     |
|    std                  | 0.939        |
|    value_loss           | 0.166        |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.44e+03    |
|    ep_rew_mean          | 65.2        |
| time/                   |             |
|    fps                  | 168         |
|    iterations           | 18          |
|    time_elapsed         | 219         |
|    total_timesteps      | 36864       |
| train/                  |             |
|    approx_kl            | 0.002766487 |
|    clip_fraction        | 0.0182      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.35       |
|    explained_variance   | 0.87        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.177       |
|    n_updates            | 170         |
|    policy_gradient_loss | -0.00136    |
|    std                  | 0.927       |
|    value_loss           | 0.32        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 61.7         |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 19           |
|    time_elapsed         | 231          |
|    total_timesteps      | 38912        |
| train/                  |              |
|    approx_kl            | 0.0056620846 |
|    clip_fraction        | 0.055        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.34        |
|    explained_variance   | 0.927        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.0429       |
|    n_updates            | 180          |
|    policy_gradient_loss | -0.00495     |
|    std                  | 0.918        |
|    value_loss           | 0.115        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 60.5         |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 20           |
|    time_elapsed         | 242          |
|    total_timesteps      | 40960        |
| train/                  |              |
|    approx_kl            | 0.0034018098 |
|    clip_fraction        | 0.0256       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.33        |
|    explained_variance   | 0.918        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.0944       |
|    n_updates            | 190          |
|    policy_gradient_loss | -0.00251     |
|    std                  | 0.908        |
|    value_loss           | 0.22         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 59.6         |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 21           |
|    time_elapsed         | 255          |
|    total_timesteps      | 43008        |
| train/                  |              |
|    approx_kl            | 0.0038031118 |
|    clip_fraction        | 0.0237       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.31        |
|    explained_variance   | 0.949        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.0801       |
|    n_updates            | 200          |
|    policy_gradient_loss | -0.00193     |
|    std                  | 0.891        |
|    value_loss           | 0.183        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 56.8         |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 22           |
|    time_elapsed         | 267          |
|    total_timesteps      | 45056        |
| train/                  |              |
|    approx_kl            | 0.0057173157 |
|    clip_fraction        | 0.03         |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.3         |
|    explained_variance   | 0.935        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.082        |
|    n_updates            | 210          |
|    policy_gradient_loss | -0.00272     |
|    std                  | 0.888        |
|    value_loss           | 0.233        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 55.7         |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 23           |
|    time_elapsed         | 281          |
|    total_timesteps      | 47104        |
| train/                  |              |
|    approx_kl            | 0.0072450917 |
|    clip_fraction        | 0.0825       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.31        |
|    explained_variance   | 0.932        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.0998       |
|    n_updates            | 220          |
|    policy_gradient_loss | -0.00564     |
|    std                  | 0.9          |
|    value_loss           | 0.289        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 51.6         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 24           |
|    time_elapsed         | 294          |
|    total_timesteps      | 49152        |
| train/                  |              |
|    approx_kl            | 0.0047341874 |
|    clip_fraction        | 0.0275       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.31        |
|    explained_variance   | 0.912        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.125        |
|    n_updates            | 230          |
|    policy_gradient_loss | -0.00142     |
|    std                  | 0.896        |
|    value_loss           | 0.326        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 50.3         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 25           |
|    time_elapsed         | 306          |
|    total_timesteps      | 51200        |
| train/                  |              |
|    approx_kl            | 0.0014981522 |
|    clip_fraction        | 0.0113       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.31        |
|    explained_variance   | 0.969        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.107        |
|    n_updates            | 240          |
|    policy_gradient_loss | -0.0012      |
|    std                  | 0.891        |
|    value_loss           | 0.202        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 45.9         |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 26           |
|    time_elapsed         | 318          |
|    total_timesteps      | 53248        |
| train/                  |              |
|    approx_kl            | 0.0024140384 |
|    clip_fraction        | 0.0141       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.3         |
|    explained_variance   | 0.939        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.129        |
|    n_updates            | 250          |
|    policy_gradient_loss | -0.000694    |
|    std                  | 0.893        |
|    value_loss           | 0.294        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 44.1         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 27           |
|    time_elapsed         | 331          |
|    total_timesteps      | 55296        |
| train/                  |              |
|    approx_kl            | 0.0020528743 |
|    clip_fraction        | 0.00845      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.31        |
|    explained_variance   | 0.921        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.131        |
|    n_updates            | 260          |
|    policy_gradient_loss | -0.00105     |
|    std                  | 0.898        |
|    value_loss           | 0.444        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 42.1         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 28           |
|    time_elapsed         | 344          |
|    total_timesteps      | 57344        |
| train/                  |              |
|    approx_kl            | 0.0039321017 |
|    clip_fraction        | 0.0338       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.31        |
|    explained_variance   | 0.954        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.19         |
|    n_updates            | 270          |
|    policy_gradient_loss | -0.00284     |
|    std                  | 0.895        |
|    value_loss           | 0.432        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 37.7         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 29           |
|    time_elapsed         | 356          |
|    total_timesteps      | 59392        |
| train/                  |              |
|    approx_kl            | 0.0027640117 |
|    clip_fraction        | 0.027        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.31        |
|    explained_variance   | 0.961        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.202        |
|    n_updates            | 280          |
|    policy_gradient_loss | -0.00127     |
|    std                  | 0.903        |
|    value_loss           | 0.378        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 35.6         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 30           |
|    time_elapsed         | 368          |
|    total_timesteps      | 61440        |
| train/                  |              |
|    approx_kl            | 0.0032430983 |
|    clip_fraction        | 0.0296       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.32        |
|    explained_variance   | 0.938        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.358        |
|    n_updates            | 290          |
|    policy_gradient_loss | -0.00319     |
|    std                  | 0.906        |
|    value_loss           | 0.725        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 31.6         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 31           |
|    time_elapsed         | 380          |
|    total_timesteps      | 63488        |
| train/                  |              |
|    approx_kl            | 0.0033631653 |
|    clip_fraction        | 0.0235       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.32        |
|    explained_variance   | 0.946        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.475        |
|    n_updates            | 300          |
|    policy_gradient_loss | -0.00126     |
|    std                  | 0.908        |
|    value_loss           | 0.911        |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | 28.1         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 32           |
|    time_elapsed         | 392          |
|    total_timesteps      | 65536        |
| train/                  |              |
|    approx_kl            | 0.0044691684 |
|    clip_fraction        | 0.0413       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.31        |
|    explained_variance   | 0.949        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.32         |
|    n_updates            | 310          |
|    policy_gradient_loss | -0.00439     |
|    std                  | 0.892        |
|    value_loss           | 0.95         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | 26.4         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 33           |
|    time_elapsed         | 404          |
|    total_timesteps      | 67584        |
| train/                  |              |
|    approx_kl            | 0.0026388231 |
|    clip_fraction        | 0.0122       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.3         |
|    explained_variance   | 0.925        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.49         |
|    n_updates            | 320          |
|    policy_gradient_loss | -0.00101     |
|    std                  | 0.885        |
|    value_loss           | 1.09         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | 23.7         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 34           |
|    time_elapsed         | 417          |
|    total_timesteps      | 69632        |
| train/                  |              |
|    approx_kl            | 0.0034738479 |
|    clip_fraction        | 0.0206       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.3         |
|    explained_variance   | 0.89         |
|    learning_rate        | 0.0003       |
|    loss                 | 0.625        |
|    n_updates            | 330          |
|    policy_gradient_loss | -0.00215     |
|    std                  | 0.888        |
|    value_loss           | 1.97         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.41e+03     |
|    ep_rew_mean          | 21.4         |
| time/                   |              |
|    fps                  | 166          |
|    iterations           | 35           |
|    time_elapsed         | 429          |
|    total_timesteps      | 71680        |
| train/                  |              |
|    approx_kl            | 0.0019907905 |
|    clip_fraction        | 0.0238       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.3         |
|    explained_variance   | 0.909        |
|    learning_rate        | 0.0003       |
|    loss                 | 1.21         |
|    n_updates            | 340          |
|    policy_gradient_loss | -0.00197     |
|    std                  | 0.884        |
|    value_loss           | 2.06         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.41e+03    |
|    ep_rew_mean          | 18.4        |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 36          |
|    time_elapsed         | 441         |
|    total_timesteps      | 73728       |
| train/                  |             |
|    approx_kl            | 0.005332603 |
|    clip_fraction        | 0.0496      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.3        |
|    explained_variance   | 0.935       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.647       |
|    n_updates            | 350         |
|    policy_gradient_loss | -0.00256    |
|    std                  | 0.883       |
|    value_loss           | 2.06        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.41e+03    |
|    ep_rew_mean          | 15.2        |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 37          |
|    time_elapsed         | 453         |
|    total_timesteps      | 75776       |
| train/                  |             |
|    approx_kl            | 0.002052477 |
|    clip_fraction        | 0.0214      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.29       |
|    explained_variance   | 0.881       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.37        |
|    n_updates            | 360         |
|    policy_gradient_loss | -0.00185    |
|    std                  | 0.879       |
|    value_loss           | 2.77        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.41e+03    |
|    ep_rew_mean          | 10.8        |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 38          |
|    time_elapsed         | 465         |
|    total_timesteps      | 77824       |
| train/                  |             |
|    approx_kl            | 0.004444588 |
|    clip_fraction        | 0.0271      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.28       |
|    explained_variance   | 0.913       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.826       |
|    n_updates            | 370         |
|    policy_gradient_loss | -0.0026     |
|    std                  | 0.865       |
|    value_loss           | 2.22        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.41e+03     |
|    ep_rew_mean          | 7.93         |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 39           |
|    time_elapsed         | 477          |
|    total_timesteps      | 79872        |
| train/                  |              |
|    approx_kl            | 0.0020542392 |
|    clip_fraction        | 0.0135       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.28        |
|    explained_variance   | 0.911        |
|    learning_rate        | 0.0003       |
|    loss                 | 2.1          |
|    n_updates            | 380          |
|    policy_gradient_loss | -0.000656    |
|    std                  | 0.868        |
|    value_loss           | 3.68         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.41e+03    |
|    ep_rew_mean          | 4.7         |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 40          |
|    time_elapsed         | 489         |
|    total_timesteps      | 81920       |
| train/                  |             |
|    approx_kl            | 0.004977703 |
|    clip_fraction        | 0.031       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.28       |
|    explained_variance   | 0.865       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.958       |
|    n_updates            | 390         |
|    policy_gradient_loss | -0.00204    |
|    std                  | 0.879       |
|    value_loss           | 2.89        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.41e+03     |
|    ep_rew_mean          | -1.59        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 41           |
|    time_elapsed         | 500          |
|    total_timesteps      | 83968        |
| train/                  |              |
|    approx_kl            | 0.0046542203 |
|    clip_fraction        | 0.0308       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.29        |
|    explained_variance   | 0.869        |
|    learning_rate        | 0.0003       |
|    loss                 | 3.38         |
|    n_updates            | 400          |
|    policy_gradient_loss | -0.00297     |
|    std                  | 0.887        |
|    value_loss           | 6.29         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.41e+03     |
|    ep_rew_mean          | -5.06        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 42           |
|    time_elapsed         | 513          |
|    total_timesteps      | 86016        |
| train/                  |              |
|    approx_kl            | 0.0031576287 |
|    clip_fraction        | 0.0298       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.29        |
|    explained_variance   | 0.939        |
|    learning_rate        | 0.0003       |
|    loss                 | 2.7          |
|    n_updates            | 410          |
|    policy_gradient_loss | -0.00189     |
|    std                  | 0.872        |
|    value_loss           | 3.58         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.41e+03    |
|    ep_rew_mean          | -11         |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 43          |
|    time_elapsed         | 526         |
|    total_timesteps      | 88064       |
| train/                  |             |
|    approx_kl            | 0.005558604 |
|    clip_fraction        | 0.0268      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.28       |
|    explained_variance   | 0.876       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.23        |
|    n_updates            | 420         |
|    policy_gradient_loss | -0.00191    |
|    std                  | 0.873       |
|    value_loss           | 5.04        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.42e+03    |
|    ep_rew_mean          | -11.2       |
| time/                   |             |
|    fps                  | 167         |
|    iterations           | 44          |
|    time_elapsed         | 538         |
|    total_timesteps      | 90112       |
| train/                  |             |
|    approx_kl            | 0.003428006 |
|    clip_fraction        | 0.0211      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.28       |
|    explained_variance   | 0.919       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.02        |
|    n_updates            | 430         |
|    policy_gradient_loss | -0.00276    |
|    std                  | 0.868       |
|    value_loss           | 5.57        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -18.4        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 45           |
|    time_elapsed         | 550          |
|    total_timesteps      | 92160        |
| train/                  |              |
|    approx_kl            | 0.0017795658 |
|    clip_fraction        | 0.00659      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.28        |
|    explained_variance   | 0.85         |
|    learning_rate        | 0.0003       |
|    loss                 | 3.45         |
|    n_updates            | 440          |
|    policy_gradient_loss | -0.0014      |
|    std                  | 0.863        |
|    value_loss           | 6.69         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -21.7        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 46           |
|    time_elapsed         | 561          |
|    total_timesteps      | 94208        |
| train/                  |              |
|    approx_kl            | 0.0020574266 |
|    clip_fraction        | 0.00723      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.28        |
|    explained_variance   | 0.906        |
|    learning_rate        | 0.0003       |
|    loss                 | 3.02         |
|    n_updates            | 450          |
|    policy_gradient_loss | -0.000737    |
|    std                  | 0.868        |
|    value_loss           | 8.52         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -25.4        |
| time/                   |              |
|    fps                  | 167          |
|    iterations           | 47           |
|    time_elapsed         | 573          |
|    total_timesteps      | 96256        |
| train/                  |              |
|    approx_kl            | 0.0025451668 |
|    clip_fraction        | 0.0149       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.27        |
|    explained_variance   | 0.933        |
|    learning_rate        | 0.0003       |
|    loss                 | 2.18         |
|    n_updates            | 460          |
|    policy_gradient_loss | -0.00163     |
|    std                  | 0.862        |
|    value_loss           | 5.95         |
------------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.42e+03   |
|    ep_rew_mean          | -35.7      |
| time/                   |            |
|    fps                  | 168        |
|    iterations           | 48         |
|    time_elapsed         | 585        |
|    total_timesteps      | 98304      |
| train/                  |            |
|    approx_kl            | 0.00261127 |
|    clip_fraction        | 0.0114     |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.27      |
|    explained_variance   | 0.898      |
|    learning_rate        | 0.0003     |
|    loss                 | 5.95       |
|    n_updates            | 470        |
|    policy_gradient_loss | -0.00124   |
|    std                  | 0.855      |
|    value_loss           | 11.6       |
----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.42e+03      |
|    ep_rew_mean          | -36.3         |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 49            |
|    time_elapsed         | 596           |
|    total_timesteps      | 100352        |
| train/                  |               |
|    approx_kl            | 0.00070637057 |
|    clip_fraction        | 0.00132       |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.26         |
|    explained_variance   | 0.941         |
|    learning_rate        | 0.0003        |
|    loss                 | 4.03          |
|    n_updates            | 480           |
|    policy_gradient_loss | -0.000144     |
|    std                  | 0.852         |
|    value_loss           | 9.52          |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -46.6        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 50           |
|    time_elapsed         | 608          |
|    total_timesteps      | 102400       |
| train/                  |              |
|    approx_kl            | 0.0037729004 |
|    clip_fraction        | 0.0137       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.26        |
|    explained_variance   | 0.867        |
|    learning_rate        | 0.0003       |
|    loss                 | 6.24         |
|    n_updates            | 490          |
|    policy_gradient_loss | -0.00103     |
|    std                  | 0.847        |
|    value_loss           | 15.4         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -47          |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 51           |
|    time_elapsed         | 619          |
|    total_timesteps      | 104448       |
| train/                  |              |
|    approx_kl            | 0.0020902567 |
|    clip_fraction        | 0.00864      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.25        |
|    explained_variance   | 0.933        |
|    learning_rate        | 0.0003       |
|    loss                 | 4.45         |
|    n_updates            | 500          |
|    policy_gradient_loss | -0.00115     |
|    std                  | 0.835        |
|    value_loss           | 13.4         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -57.4        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 52           |
|    time_elapsed         | 633          |
|    total_timesteps      | 106496       |
| train/                  |              |
|    approx_kl            | 0.0056428267 |
|    clip_fraction        | 0.0274       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.24        |
|    explained_variance   | 0.897        |
|    learning_rate        | 0.0003       |
|    loss                 | 3.85         |
|    n_updates            | 510          |
|    policy_gradient_loss | -0.00185     |
|    std                  | 0.835        |
|    value_loss           | 12           |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.42e+03    |
|    ep_rew_mean          | -63         |
| time/                   |             |
|    fps                  | 168         |
|    iterations           | 53          |
|    time_elapsed         | 645         |
|    total_timesteps      | 108544      |
| train/                  |             |
|    approx_kl            | 0.002307754 |
|    clip_fraction        | 0.015       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.24       |
|    explained_variance   | 0.915       |
|    learning_rate        | 0.0003      |
|    loss                 | 4.64        |
|    n_updates            | 520         |
|    policy_gradient_loss | -0.00125    |
|    std                  | 0.829       |
|    value_loss           | 13.6        |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.42e+03   |
|    ep_rew_mean          | -69.8      |
| time/                   |            |
|    fps                  | 168        |
|    iterations           | 54         |
|    time_elapsed         | 657        |
|    total_timesteps      | 110592     |
| train/                  |            |
|    approx_kl            | 0.00505065 |
|    clip_fraction        | 0.0434     |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.23      |
|    explained_variance   | 0.943      |
|    learning_rate        | 0.0003     |
|    loss                 | 5.5        |
|    n_updates            | 530        |
|    policy_gradient_loss | -0.00345   |
|    std                  | 0.826      |
|    value_loss           | 12.6       |
----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -79.6        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 55           |
|    time_elapsed         | 668          |
|    total_timesteps      | 112640       |
| train/                  |              |
|    approx_kl            | 0.0037789557 |
|    clip_fraction        | 0.015        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.22        |
|    explained_variance   | 0.932        |
|    learning_rate        | 0.0003       |
|    loss                 | 7.45         |
|    n_updates            | 540          |
|    policy_gradient_loss | -0.00201     |
|    std                  | 0.82         |
|    value_loss           | 16.9         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -82.8        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 56           |
|    time_elapsed         | 681          |
|    total_timesteps      | 114688       |
| train/                  |              |
|    approx_kl            | 0.0030445708 |
|    clip_fraction        | 0.019        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.22        |
|    explained_variance   | 0.937        |
|    learning_rate        | 0.0003       |
|    loss                 | 3.64         |
|    n_updates            | 550          |
|    policy_gradient_loss | -0.00212     |
|    std                  | 0.818        |
|    value_loss           | 13.9         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -98.3        |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 57           |
|    time_elapsed         | 693          |
|    total_timesteps      | 116736       |
| train/                  |              |
|    approx_kl            | 0.0024344344 |
|    clip_fraction        | 0.0106       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.21        |
|    explained_variance   | 0.925        |
|    learning_rate        | 0.0003       |
|    loss                 | 6.99         |
|    n_updates            | 560          |
|    policy_gradient_loss | -0.00147     |
|    std                  | 0.806        |
|    value_loss           | 19.4         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.42e+03    |
|    ep_rew_mean          | -106        |
| time/                   |             |
|    fps                  | 168         |
|    iterations           | 58          |
|    time_elapsed         | 705         |
|    total_timesteps      | 118784      |
| train/                  |             |
|    approx_kl            | 0.003976189 |
|    clip_fraction        | 0.0279      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.2        |
|    explained_variance   | 0.924       |
|    learning_rate        | 0.0003      |
|    loss                 | 8.4         |
|    n_updates            | 570         |
|    policy_gradient_loss | -0.00372    |
|    std                  | 0.803       |
|    value_loss           | 34.3        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -123         |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 59           |
|    time_elapsed         | 717          |
|    total_timesteps      | 120832       |
| train/                  |              |
|    approx_kl            | 0.0023803958 |
|    clip_fraction        | 0.0041       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.2         |
|    explained_variance   | 0.928        |
|    learning_rate        | 0.0003       |
|    loss                 | 14.1         |
|    n_updates            | 580          |
|    policy_gradient_loss | -0.000179    |
|    std                  | 0.798        |
|    value_loss           | 34.9         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.42e+03    |
|    ep_rew_mean          | -130        |
| time/                   |             |
|    fps                  | 168         |
|    iterations           | 60          |
|    time_elapsed         | 729         |
|    total_timesteps      | 122880      |
| train/                  |             |
|    approx_kl            | 0.002132177 |
|    clip_fraction        | 0.00962     |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.2        |
|    explained_variance   | 0.914       |
|    learning_rate        | 0.0003      |
|    loss                 | 25.5        |
|    n_updates            | 590         |
|    policy_gradient_loss | -0.00148    |
|    std                  | 0.804       |
|    value_loss           | 43.7        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -140         |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 61           |
|    time_elapsed         | 741          |
|    total_timesteps      | 124928       |
| train/                  |              |
|    approx_kl            | 0.0026400504 |
|    clip_fraction        | 0.0179       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.2         |
|    explained_variance   | 0.952        |
|    learning_rate        | 0.0003       |
|    loss                 | 8.7          |
|    n_updates            | 600          |
|    policy_gradient_loss | -0.00235     |
|    std                  | 0.806        |
|    value_loss           | 28.6         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.42e+03    |
|    ep_rew_mean          | -158        |
| time/                   |             |
|    fps                  | 168         |
|    iterations           | 62          |
|    time_elapsed         | 753         |
|    total_timesteps      | 126976      |
| train/                  |             |
|    approx_kl            | 0.000980614 |
|    clip_fraction        | 0.000488    |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.2        |
|    explained_variance   | 0.901       |
|    learning_rate        | 0.0003      |
|    loss                 | 36          |
|    n_updates            | 610         |
|    policy_gradient_loss | -0.000414   |
|    std                  | 0.799       |
|    value_loss           | 74          |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -166         |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 63           |
|    time_elapsed         | 766          |
|    total_timesteps      | 129024       |
| train/                  |              |
|    approx_kl            | 0.0012942271 |
|    clip_fraction        | 0.00127      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.19        |
|    explained_variance   | 0.928        |
|    learning_rate        | 0.0003       |
|    loss                 | 27.1         |
|    n_updates            | 620          |
|    policy_gradient_loss | -0.000593    |
|    std                  | 0.799        |
|    value_loss           | 81.7         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.41e+03    |
|    ep_rew_mean          | -175        |
| time/                   |             |
|    fps                  | 168         |
|    iterations           | 64          |
|    time_elapsed         | 778         |
|    total_timesteps      | 131072      |
| train/                  |             |
|    approx_kl            | 0.002666209 |
|    clip_fraction        | 0.0132      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.2        |
|    explained_variance   | 0.891       |
|    learning_rate        | 0.0003      |
|    loss                 | 50.4        |
|    n_updates            | 630         |
|    policy_gradient_loss | -0.00212    |
|    std                  | 0.801       |
|    value_loss           | 91.1        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -199         |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 65           |
|    time_elapsed         | 790          |
|    total_timesteps      | 133120       |
| train/                  |              |
|    approx_kl            | 0.0022558428 |
|    clip_fraction        | 0.00937      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.19        |
|    explained_variance   | 0.927        |
|    learning_rate        | 0.0003       |
|    loss                 | 30.6         |
|    n_updates            | 640          |
|    policy_gradient_loss | -0.00163     |
|    std                  | 0.793        |
|    value_loss           | 103          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -208         |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 66           |
|    time_elapsed         | 802          |
|    total_timesteps      | 135168       |
| train/                  |              |
|    approx_kl            | 0.0012117932 |
|    clip_fraction        | 0.000537     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.19        |
|    explained_variance   | 0.91         |
|    learning_rate        | 0.0003       |
|    loss                 | 57.6         |
|    n_updates            | 650          |
|    policy_gradient_loss | -0.00053     |
|    std                  | 0.8          |
|    value_loss           | 141          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -218         |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 67           |
|    time_elapsed         | 814          |
|    total_timesteps      | 137216       |
| train/                  |              |
|    approx_kl            | 0.0019327314 |
|    clip_fraction        | 0.00161      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.2         |
|    explained_variance   | 0.931        |
|    learning_rate        | 0.0003       |
|    loss                 | 54.9         |
|    n_updates            | 660          |
|    policy_gradient_loss | -0.0002      |
|    std                  | 0.803        |
|    value_loss           | 96           |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -230         |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 68           |
|    time_elapsed         | 825          |
|    total_timesteps      | 139264       |
| train/                  |              |
|    approx_kl            | 0.0018595401 |
|    clip_fraction        | 0.013        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.2         |
|    explained_variance   | 0.9          |
|    learning_rate        | 0.0003       |
|    loss                 | 29.7         |
|    n_updates            | 670          |
|    policy_gradient_loss | -0.00234     |
|    std                  | 0.808        |
|    value_loss           | 124          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -240         |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 69           |
|    time_elapsed         | 837          |
|    total_timesteps      | 141312       |
| train/                  |              |
|    approx_kl            | 0.0028281652 |
|    clip_fraction        | 0.0126       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.2         |
|    explained_variance   | 0.91         |
|    learning_rate        | 0.0003       |
|    loss                 | 19.6         |
|    n_updates            | 680          |
|    policy_gradient_loss | -0.0014      |
|    std                  | 0.803        |
|    value_loss           | 86.4         |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.42e+03      |
|    ep_rew_mean          | -260          |
| time/                   |               |
|    fps                  | 168           |
|    iterations           | 70            |
|    time_elapsed         | 849           |
|    total_timesteps      | 143360        |
| train/                  |               |
|    approx_kl            | 0.00074903783 |
|    clip_fraction        | 0.0019        |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.2          |
|    explained_variance   | 0.914         |
|    learning_rate        | 0.0003        |
|    loss                 | 38.2          |
|    n_updates            | 690           |
|    policy_gradient_loss | -0.000207     |
|    std                  | 0.808         |
|    value_loss           | 144           |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -266         |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 71           |
|    time_elapsed         | 861          |
|    total_timesteps      | 145408       |
| train/                  |              |
|    approx_kl            | 0.0042869532 |
|    clip_fraction        | 0.0224       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.2         |
|    explained_variance   | 0.933        |
|    learning_rate        | 0.0003       |
|    loss                 | 55.1         |
|    n_updates            | 700          |
|    policy_gradient_loss | -0.00361     |
|    std                  | 0.796        |
|    value_loss           | 101          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -291         |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 72           |
|    time_elapsed         | 874          |
|    total_timesteps      | 147456       |
| train/                  |              |
|    approx_kl            | 0.0007266149 |
|    clip_fraction        | 0.00283      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.2         |
|    explained_variance   | 0.94         |
|    learning_rate        | 0.0003       |
|    loss                 | 34.4         |
|    n_updates            | 710          |
|    policy_gradient_loss | -0.000209    |
|    std                  | 0.803        |
|    value_loss           | 72.3         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.42e+03    |
|    ep_rew_mean          | -310        |
| time/                   |             |
|    fps                  | 168         |
|    iterations           | 73          |
|    time_elapsed         | 886         |
|    total_timesteps      | 149504      |
| train/                  |             |
|    approx_kl            | 0.005994668 |
|    clip_fraction        | 0.0473      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.2        |
|    explained_variance   | 0.895       |
|    learning_rate        | 0.0003      |
|    loss                 | 51.1        |
|    n_updates            | 720         |
|    policy_gradient_loss | -0.00558    |
|    std                  | 0.802       |
|    value_loss           | 114         |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -324         |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 74           |
|    time_elapsed         | 898          |
|    total_timesteps      | 151552       |
| train/                  |              |
|    approx_kl            | 0.0028841458 |
|    clip_fraction        | 0.0131       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.21        |
|    explained_variance   | 0.935        |
|    learning_rate        | 0.0003       |
|    loss                 | 105          |
|    n_updates            | 730          |
|    policy_gradient_loss | -0.00239     |
|    std                  | 0.813        |
|    value_loss           | 158          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -355         |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 75           |
|    time_elapsed         | 909          |
|    total_timesteps      | 153600       |
| train/                  |              |
|    approx_kl            | 0.0022119181 |
|    clip_fraction        | 0.00708      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.22        |
|    explained_variance   | 0.931        |
|    learning_rate        | 0.0003       |
|    loss                 | 51.5         |
|    n_updates            | 740          |
|    policy_gradient_loss | -0.00111     |
|    std                  | 0.817        |
|    value_loss           | 135          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -359         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 76           |
|    time_elapsed         | 920          |
|    total_timesteps      | 155648       |
| train/                  |              |
|    approx_kl            | 0.0038975615 |
|    clip_fraction        | 0.0349       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.22        |
|    explained_variance   | 0.949        |
|    learning_rate        | 0.0003       |
|    loss                 | 35.9         |
|    n_updates            | 750          |
|    policy_gradient_loss | -0.00369     |
|    std                  | 0.823        |
|    value_loss           | 131          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -385         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 77           |
|    time_elapsed         | 932          |
|    total_timesteps      | 157696       |
| train/                  |              |
|    approx_kl            | 0.0036043657 |
|    clip_fraction        | 0.0173       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.22        |
|    explained_variance   | 0.914        |
|    learning_rate        | 0.0003       |
|    loss                 | 46           |
|    n_updates            | 760          |
|    policy_gradient_loss | -0.00156     |
|    std                  | 0.812        |
|    value_loss           | 111          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -399         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 78           |
|    time_elapsed         | 944          |
|    total_timesteps      | 159744       |
| train/                  |              |
|    approx_kl            | 0.0018079903 |
|    clip_fraction        | 0.00083      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.21        |
|    explained_variance   | 0.942        |
|    learning_rate        | 0.0003       |
|    loss                 | 102          |
|    n_updates            | 770          |
|    policy_gradient_loss | -0.000157    |
|    std                  | 0.807        |
|    value_loss           | 112          |
------------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.42e+03   |
|    ep_rew_mean          | -415       |
| time/                   |            |
|    fps                  | 169        |
|    iterations           | 79         |
|    time_elapsed         | 956        |
|    total_timesteps      | 161792     |
| train/                  |            |
|    approx_kl            | 0.00628003 |
|    clip_fraction        | 0.043      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.2       |
|    explained_variance   | 0.947      |
|    learning_rate        | 0.0003     |
|    loss                 | 108        |
|    n_updates            | 780        |
|    policy_gradient_loss | -0.00364   |
|    std                  | 0.807      |
|    value_loss           | 120        |
----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -449         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 80           |
|    time_elapsed         | 967          |
|    total_timesteps      | 163840       |
| train/                  |              |
|    approx_kl            | 0.0029342729 |
|    clip_fraction        | 0.011        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.2         |
|    explained_variance   | 0.923        |
|    learning_rate        | 0.0003       |
|    loss                 | 62.9         |
|    n_updates            | 790          |
|    policy_gradient_loss | -0.000733    |
|    std                  | 0.807        |
|    value_loss           | 144          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -456         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 81           |
|    time_elapsed         | 979          |
|    total_timesteps      | 165888       |
| train/                  |              |
|    approx_kl            | 0.0034052688 |
|    clip_fraction        | 0.0196       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.21        |
|    explained_variance   | 0.966        |
|    learning_rate        | 0.0003       |
|    loss                 | 58.2         |
|    n_updates            | 800          |
|    policy_gradient_loss | -0.00172     |
|    std                  | 0.808        |
|    value_loss           | 86.5         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -483         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 82           |
|    time_elapsed         | 992          |
|    total_timesteps      | 167936       |
| train/                  |              |
|    approx_kl            | 0.0033484981 |
|    clip_fraction        | 0.0227       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.21        |
|    explained_variance   | 0.901        |
|    learning_rate        | 0.0003       |
|    loss                 | 63.2         |
|    n_updates            | 810          |
|    policy_gradient_loss | -0.00243     |
|    std                  | 0.82         |
|    value_loss           | 137          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -496         |
| time/                   |              |
|    fps                  | 168          |
|    iterations           | 83           |
|    time_elapsed         | 1005         |
|    total_timesteps      | 169984       |
| train/                  |              |
|    approx_kl            | 0.0019433711 |
|    clip_fraction        | 0.0116       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.21        |
|    explained_variance   | 0.946        |
|    learning_rate        | 0.0003       |
|    loss                 | 44.2         |
|    n_updates            | 820          |
|    policy_gradient_loss | -0.00217     |
|    std                  | 0.805        |
|    value_loss           | 110          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -524         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 84           |
|    time_elapsed         | 1017         |
|    total_timesteps      | 172032       |
| train/                  |              |
|    approx_kl            | 0.0032611163 |
|    clip_fraction        | 0.0192       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.21        |
|    explained_variance   | 0.932        |
|    learning_rate        | 0.0003       |
|    loss                 | 48.9         |
|    n_updates            | 830          |
|    policy_gradient_loss | -0.00212     |
|    std                  | 0.812        |
|    value_loss           | 128          |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.42e+03    |
|    ep_rew_mean          | -535        |
| time/                   |             |
|    fps                  | 169         |
|    iterations           | 85          |
|    time_elapsed         | 1029        |
|    total_timesteps      | 174080      |
| train/                  |             |
|    approx_kl            | 0.005915422 |
|    clip_fraction        | 0.0349      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.2        |
|    explained_variance   | 0.943       |
|    learning_rate        | 0.0003      |
|    loss                 | 70.8        |
|    n_updates            | 840         |
|    policy_gradient_loss | -0.00363    |
|    std                  | 0.801       |
|    value_loss           | 128         |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -550         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 86           |
|    time_elapsed         | 1041         |
|    total_timesteps      | 176128       |
| train/                  |              |
|    approx_kl            | 0.0005803793 |
|    clip_fraction        | 0.00298      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.21        |
|    explained_variance   | 0.964        |
|    learning_rate        | 0.0003       |
|    loss                 | 33.5         |
|    n_updates            | 850          |
|    policy_gradient_loss | -0.000401    |
|    std                  | 0.815        |
|    value_loss           | 73.7         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -574         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 87           |
|    time_elapsed         | 1053         |
|    total_timesteps      | 178176       |
| train/                  |              |
|    approx_kl            | 0.0043918192 |
|    clip_fraction        | 0.0173       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.21        |
|    explained_variance   | 0.953        |
|    learning_rate        | 0.0003       |
|    loss                 | 39           |
|    n_updates            | 860          |
|    policy_gradient_loss | -0.00181     |
|    std                  | 0.805        |
|    value_loss           | 90.4         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -583         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 88           |
|    time_elapsed         | 1066         |
|    total_timesteps      | 180224       |
| train/                  |              |
|    approx_kl            | 0.0020978763 |
|    clip_fraction        | 0.0282       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.2         |
|    explained_variance   | 0.955        |
|    learning_rate        | 0.0003       |
|    loss                 | 44.5         |
|    n_updates            | 870          |
|    policy_gradient_loss | -0.000851    |
|    std                  | 0.807        |
|    value_loss           | 76.2         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -615         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 89           |
|    time_elapsed         | 1077         |
|    total_timesteps      | 182272       |
| train/                  |              |
|    approx_kl            | 0.0030780113 |
|    clip_fraction        | 0.0206       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.2         |
|    explained_variance   | 0.952        |
|    learning_rate        | 0.0003       |
|    loss                 | 32.5         |
|    n_updates            | 880          |
|    policy_gradient_loss | -0.00304     |
|    std                  | 0.795        |
|    value_loss           | 66.6         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -630         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 90           |
|    time_elapsed         | 1089         |
|    total_timesteps      | 184320       |
| train/                  |              |
|    approx_kl            | 0.0055257855 |
|    clip_fraction        | 0.0322       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.19        |
|    explained_variance   | 0.938        |
|    learning_rate        | 0.0003       |
|    loss                 | 30.1         |
|    n_updates            | 890          |
|    policy_gradient_loss | -0.00336     |
|    std                  | 0.8          |
|    value_loss           | 128          |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.42e+03    |
|    ep_rew_mean          | -649        |
| time/                   |             |
|    fps                  | 169         |
|    iterations           | 91          |
|    time_elapsed         | 1100        |
|    total_timesteps      | 186368      |
| train/                  |             |
|    approx_kl            | 0.002740839 |
|    clip_fraction        | 0.0134      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.2        |
|    explained_variance   | 0.934       |
|    learning_rate        | 0.0003      |
|    loss                 | 93.8        |
|    n_updates            | 900         |
|    policy_gradient_loss | -0.00168    |
|    std                  | 0.812       |
|    value_loss           | 164         |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -652         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 92           |
|    time_elapsed         | 1112         |
|    total_timesteps      | 188416       |
| train/                  |              |
|    approx_kl            | 0.0043783677 |
|    clip_fraction        | 0.0303       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.21        |
|    explained_variance   | 0.929        |
|    learning_rate        | 0.0003       |
|    loss                 | 47.9         |
|    n_updates            | 910          |
|    policy_gradient_loss | -0.00224     |
|    std                  | 0.806        |
|    value_loss           | 109          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -661         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 93           |
|    time_elapsed         | 1125         |
|    total_timesteps      | 190464       |
| train/                  |              |
|    approx_kl            | 0.0042046844 |
|    clip_fraction        | 0.0386       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.2         |
|    explained_variance   | 0.929        |
|    learning_rate        | 0.0003       |
|    loss                 | 29.9         |
|    n_updates            | 920          |
|    policy_gradient_loss | -0.00387     |
|    std                  | 0.797        |
|    value_loss           | 92.9         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.42e+03    |
|    ep_rew_mean          | -687        |
| time/                   |             |
|    fps                  | 169         |
|    iterations           | 94          |
|    time_elapsed         | 1136        |
|    total_timesteps      | 192512      |
| train/                  |             |
|    approx_kl            | 0.004386826 |
|    clip_fraction        | 0.0417      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.18       |
|    explained_variance   | 0.941       |
|    learning_rate        | 0.0003      |
|    loss                 | 24.3        |
|    n_updates            | 930         |
|    policy_gradient_loss | -0.00352    |
|    std                  | 0.781       |
|    value_loss           | 83.2        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -698         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 95           |
|    time_elapsed         | 1149         |
|    total_timesteps      | 194560       |
| train/                  |              |
|    approx_kl            | 0.0016314605 |
|    clip_fraction        | 0.00532      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.17        |
|    explained_variance   | 0.947        |
|    learning_rate        | 0.0003       |
|    loss                 | 81.1         |
|    n_updates            | 940          |
|    policy_gradient_loss | -0.000515    |
|    std                  | 0.771        |
|    value_loss           | 99.5         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -727         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 96           |
|    time_elapsed         | 1161         |
|    total_timesteps      | 196608       |
| train/                  |              |
|    approx_kl            | 0.0025138555 |
|    clip_fraction        | 0.0118       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.17        |
|    explained_variance   | 0.915        |
|    learning_rate        | 0.0003       |
|    loss                 | 43.8         |
|    n_updates            | 950          |
|    policy_gradient_loss | -0.00177     |
|    std                  | 0.784        |
|    value_loss           | 159          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -740         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 97           |
|    time_elapsed         | 1172         |
|    total_timesteps      | 198656       |
| train/                  |              |
|    approx_kl            | 0.0042493492 |
|    clip_fraction        | 0.0259       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.18        |
|    explained_variance   | 0.929        |
|    learning_rate        | 0.0003       |
|    loss                 | 66.2         |
|    n_updates            | 960          |
|    policy_gradient_loss | -0.00254     |
|    std                  | 0.787        |
|    value_loss           | 121          |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.42e+03    |
|    ep_rew_mean          | -772        |
| time/                   |             |
|    fps                  | 169         |
|    iterations           | 98          |
|    time_elapsed         | 1184        |
|    total_timesteps      | 200704      |
| train/                  |             |
|    approx_kl            | 0.003279517 |
|    clip_fraction        | 0.0162      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.18       |
|    explained_variance   | 0.944       |
|    learning_rate        | 0.0003      |
|    loss                 | 66.5        |
|    n_updates            | 970         |
|    policy_gradient_loss | -0.00255    |
|    std                  | 0.784       |
|    value_loss           | 183         |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -780         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 99           |
|    time_elapsed         | 1196         |
|    total_timesteps      | 202752       |
| train/                  |              |
|    approx_kl            | 0.0046865568 |
|    clip_fraction        | 0.0234       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.18        |
|    explained_variance   | 0.94         |
|    learning_rate        | 0.0003       |
|    loss                 | 106          |
|    n_updates            | 980          |
|    policy_gradient_loss | -0.00211     |
|    std                  | 0.784        |
|    value_loss           | 151          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -792         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 100          |
|    time_elapsed         | 1208         |
|    total_timesteps      | 204800       |
| train/                  |              |
|    approx_kl            | 0.0049724816 |
|    clip_fraction        | 0.0371       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.17        |
|    explained_variance   | 0.962        |
|    learning_rate        | 0.0003       |
|    loss                 | 40.5         |
|    n_updates            | 990          |
|    policy_gradient_loss | -0.00324     |
|    std                  | 0.778        |
|    value_loss           | 102          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -810         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 101          |
|    time_elapsed         | 1219         |
|    total_timesteps      | 206848       |
| train/                  |              |
|    approx_kl            | 0.0050731758 |
|    clip_fraction        | 0.0566       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.16        |
|    explained_variance   | 0.908        |
|    learning_rate        | 0.0003       |
|    loss                 | 91.6         |
|    n_updates            | 1000         |
|    policy_gradient_loss | -0.00479     |
|    std                  | 0.766        |
|    value_loss           | 173          |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.42e+03    |
|    ep_rew_mean          | -821        |
| time/                   |             |
|    fps                  | 169         |
|    iterations           | 102         |
|    time_elapsed         | 1231        |
|    total_timesteps      | 208896      |
| train/                  |             |
|    approx_kl            | 0.006434438 |
|    clip_fraction        | 0.0397      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.15       |
|    explained_variance   | 0.961       |
|    learning_rate        | 0.0003      |
|    loss                 | 16.7        |
|    n_updates            | 1010        |
|    policy_gradient_loss | -0.00336    |
|    std                  | 0.769       |
|    value_loss           | 75.1        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -831         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 103          |
|    time_elapsed         | 1244         |
|    total_timesteps      | 210944       |
| train/                  |              |
|    approx_kl            | 0.0017650473 |
|    clip_fraction        | 0.00747      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.16        |
|    explained_variance   | 0.916        |
|    learning_rate        | 0.0003       |
|    loss                 | 35.3         |
|    n_updates            | 1020         |
|    policy_gradient_loss | -0.000426    |
|    std                  | 0.778        |
|    value_loss           | 112          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.42e+03     |
|    ep_rew_mean          | -840         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 104          |
|    time_elapsed         | 1256         |
|    total_timesteps      | 212992       |
| train/                  |              |
|    approx_kl            | 0.0067861653 |
|    clip_fraction        | 0.0458       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.17        |
|    explained_variance   | 0.935        |
|    learning_rate        | 0.0003       |
|    loss                 | 29.8         |
|    n_updates            | 1030         |
|    policy_gradient_loss | -0.00369     |
|    std                  | 0.787        |
|    value_loss           | 83.5         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.43e+03     |
|    ep_rew_mean          | -852         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 105          |
|    time_elapsed         | 1268         |
|    total_timesteps      | 215040       |
| train/                  |              |
|    approx_kl            | 0.0030317307 |
|    clip_fraction        | 0.0139       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.17        |
|    explained_variance   | 0.932        |
|    learning_rate        | 0.0003       |
|    loss                 | 18.1         |
|    n_updates            | 1040         |
|    policy_gradient_loss | -0.00238     |
|    std                  | 0.774        |
|    value_loss           | 73           |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.43e+03    |
|    ep_rew_mean          | -870        |
| time/                   |             |
|    fps                  | 169         |
|    iterations           | 106         |
|    time_elapsed         | 1279        |
|    total_timesteps      | 217088      |
| train/                  |             |
|    approx_kl            | 0.004004136 |
|    clip_fraction        | 0.0404      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.15       |
|    explained_variance   | 0.883       |
|    learning_rate        | 0.0003      |
|    loss                 | 37.4        |
|    n_updates            | 1050        |
|    policy_gradient_loss | -0.00333    |
|    std                  | 0.759       |
|    value_loss           | 117         |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.43e+03     |
|    ep_rew_mean          | -875         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 107          |
|    time_elapsed         | 1291         |
|    total_timesteps      | 219136       |
| train/                  |              |
|    approx_kl            | 0.0006586318 |
|    clip_fraction        | 0.0105       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.14        |
|    explained_variance   | 0.952        |
|    learning_rate        | 0.0003       |
|    loss                 | 52.7         |
|    n_updates            | 1060         |
|    policy_gradient_loss | -0.000876    |
|    std                  | 0.758        |
|    value_loss           | 81.1         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.43e+03     |
|    ep_rew_mean          | -895         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 108          |
|    time_elapsed         | 1302         |
|    total_timesteps      | 221184       |
| train/                  |              |
|    approx_kl            | 0.0067468416 |
|    clip_fraction        | 0.0507       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.13        |
|    explained_variance   | 0.884        |
|    learning_rate        | 0.0003       |
|    loss                 | 37.6         |
|    n_updates            | 1070         |
|    policy_gradient_loss | -0.00358     |
|    std                  | 0.747        |
|    value_loss           | 108          |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.43e+03      |
|    ep_rew_mean          | -909          |
| time/                   |               |
|    fps                  | 169           |
|    iterations           | 109           |
|    time_elapsed         | 1314          |
|    total_timesteps      | 223232        |
| train/                  |               |
|    approx_kl            | 0.00061582023 |
|    clip_fraction        | 0.00752       |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.12         |
|    explained_variance   | 0.928         |
|    learning_rate        | 0.0003        |
|    loss                 | 33.9          |
|    n_updates            | 1080          |
|    policy_gradient_loss | 0.000181      |
|    std                  | 0.737         |
|    value_loss           | 94.3          |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.43e+03     |
|    ep_rew_mean          | -918         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 110          |
|    time_elapsed         | 1325         |
|    total_timesteps      | 225280       |
| train/                  |              |
|    approx_kl            | 0.0016459263 |
|    clip_fraction        | 0.00669      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.11        |
|    explained_variance   | 0.873        |
|    learning_rate        | 0.0003       |
|    loss                 | 73.8         |
|    n_updates            | 1090         |
|    policy_gradient_loss | -0.00184     |
|    std                  | 0.727        |
|    value_loss           | 207          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.43e+03     |
|    ep_rew_mean          | -919         |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 111          |
|    time_elapsed         | 1336         |
|    total_timesteps      | 227328       |
| train/                  |              |
|    approx_kl            | 0.0050191865 |
|    clip_fraction        | 0.0222       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.09        |
|    explained_variance   | 0.897        |
|    learning_rate        | 0.0003       |
|    loss                 | 33.1         |
|    n_updates            | 1100         |
|    policy_gradient_loss | -0.00147     |
|    std                  | 0.717        |
|    value_loss           | 111          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.43e+03     |
|    ep_rew_mean          | -921         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 112          |
|    time_elapsed         | 1349         |
|    total_timesteps      | 229376       |
| train/                  |              |
|    approx_kl            | 0.0042277696 |
|    clip_fraction        | 0.033        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.09        |
|    explained_variance   | 0.936        |
|    learning_rate        | 0.0003       |
|    loss                 | 19.9         |
|    n_updates            | 1110         |
|    policy_gradient_loss | -0.00217     |
|    std                  | 0.719        |
|    value_loss           | 62.8         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.43e+03    |
|    ep_rew_mean          | -938        |
| time/                   |             |
|    fps                  | 169         |
|    iterations           | 113         |
|    time_elapsed         | 1362        |
|    total_timesteps      | 231424      |
| train/                  |             |
|    approx_kl            | 0.002204191 |
|    clip_fraction        | 0.0239      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.08       |
|    explained_variance   | 0.927       |
|    learning_rate        | 0.0003      |
|    loss                 | 34.8        |
|    n_updates            | 1120        |
|    policy_gradient_loss | -0.0015     |
|    std                  | 0.711       |
|    value_loss           | 51.9        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.43e+03     |
|    ep_rew_mean          | -951         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 114          |
|    time_elapsed         | 1375         |
|    total_timesteps      | 233472       |
| train/                  |              |
|    approx_kl            | 0.0034697955 |
|    clip_fraction        | 0.0154       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.08        |
|    explained_variance   | 0.82         |
|    learning_rate        | 0.0003       |
|    loss                 | 78.3         |
|    n_updates            | 1130         |
|    policy_gradient_loss | -0.00138     |
|    std                  | 0.716        |
|    value_loss           | 156          |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.43e+03    |
|    ep_rew_mean          | -965        |
| time/                   |             |
|    fps                  | 169         |
|    iterations           | 115         |
|    time_elapsed         | 1386        |
|    total_timesteps      | 235520      |
| train/                  |             |
|    approx_kl            | 0.004593921 |
|    clip_fraction        | 0.0414      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | 0.814       |
|    learning_rate        | 0.0003      |
|    loss                 | 64.1        |
|    n_updates            | 1140        |
|    policy_gradient_loss | -0.00314    |
|    std                  | 0.721       |
|    value_loss           | 173         |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.43e+03     |
|    ep_rew_mean          | -977         |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 116          |
|    time_elapsed         | 1398         |
|    total_timesteps      | 237568       |
| train/                  |              |
|    approx_kl            | 0.0028206087 |
|    clip_fraction        | 0.0255       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.09        |
|    explained_variance   | 0.892        |
|    learning_rate        | 0.0003       |
|    loss                 | 22           |
|    n_updates            | 1150         |
|    policy_gradient_loss | -0.00137     |
|    std                  | 0.717        |
|    value_loss           | 75.3         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.43e+03    |
|    ep_rew_mean          | -991        |
| time/                   |             |
|    fps                  | 169         |
|    iterations           | 117         |
|    time_elapsed         | 1411        |
|    total_timesteps      | 239616      |
| train/                  |             |
|    approx_kl            | 0.001999617 |
|    clip_fraction        | 0.0107      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.08       |
|    explained_variance   | 0.755       |
|    learning_rate        | 0.0003      |
|    loss                 | 89.2        |
|    n_updates            | 1160        |
|    policy_gradient_loss | -0.000487   |
|    std                  | 0.714       |
|    value_loss           | 257         |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.43e+03     |
|    ep_rew_mean          | -1e+03       |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 118          |
|    time_elapsed         | 1423         |
|    total_timesteps      | 241664       |
| train/                  |              |
|    approx_kl            | 0.0029409067 |
|    clip_fraction        | 0.0108       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.08        |
|    explained_variance   | 0.829        |
|    learning_rate        | 0.0003       |
|    loss                 | 43.9         |
|    n_updates            | 1170         |
|    policy_gradient_loss | -0.000919    |
|    std                  | 0.717        |
|    value_loss           | 124          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.43e+03     |
|    ep_rew_mean          | -1.01e+03    |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 119          |
|    time_elapsed         | 1435         |
|    total_timesteps      | 243712       |
| train/                  |              |
|    approx_kl            | 0.0033354668 |
|    clip_fraction        | 0.0174       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.08        |
|    explained_variance   | 0.906        |
|    learning_rate        | 0.0003       |
|    loss                 | 62.9         |
|    n_updates            | 1180         |
|    policy_gradient_loss | -0.0013      |
|    std                  | 0.71         |
|    value_loss           | 109          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.43e+03     |
|    ep_rew_mean          | -1.02e+03    |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 120          |
|    time_elapsed         | 1446         |
|    total_timesteps      | 245760       |
| train/                  |              |
|    approx_kl            | 0.0027391724 |
|    clip_fraction        | 0.0153       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.07        |
|    explained_variance   | 0.897        |
|    learning_rate        | 0.0003       |
|    loss                 | 22.5         |
|    n_updates            | 1190         |
|    policy_gradient_loss | -0.00133     |
|    std                  | 0.699        |
|    value_loss           | 90.6         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.43e+03     |
|    ep_rew_mean          | -1.03e+03    |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 121          |
|    time_elapsed         | 1458         |
|    total_timesteps      | 247808       |
| train/                  |              |
|    approx_kl            | 0.0030828323 |
|    clip_fraction        | 0.0216       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.05        |
|    explained_variance   | 0.902        |
|    learning_rate        | 0.0003       |
|    loss                 | 16.7         |
|    n_updates            | 1200         |
|    policy_gradient_loss | -0.00157     |
|    std                  | 0.692        |
|    value_loss           | 72.8         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.43e+03     |
|    ep_rew_mean          | -1.04e+03    |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 122          |
|    time_elapsed         | 1470         |
|    total_timesteps      | 249856       |
| train/                  |              |
|    approx_kl            | 0.0050262855 |
|    clip_fraction        | 0.0101       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.04        |
|    explained_variance   | 0.894        |
|    learning_rate        | 0.0003       |
|    loss                 | 26.5         |
|    n_updates            | 1210         |
|    policy_gradient_loss | -0.000161    |
|    std                  | 0.682        |
|    value_loss           | 93.4         |
------------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1.43e+03  |
|    ep_rew_mean          | -1.04e+03 |
| time/                   |           |
|    fps                  | 169       |
|    iterations           | 123       |
|    time_elapsed         | 1482      |
|    total_timesteps      | 251904    |
| train/                  |           |
|    approx_kl            | 0.0032933 |
|    clip_fraction        | 0.0332    |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.03     |
|    explained_variance   | 0.917     |
|    learning_rate        | 0.0003    |
|    loss                 | 36.5      |
|    n_updates            | 1220      |
|    policy_gradient_loss | -0.00393  |
|    std                  | 0.677     |
|    value_loss           | 105       |
---------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.43e+03     |
|    ep_rew_mean          | -1.07e+03    |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 124          |
|    time_elapsed         | 1494         |
|    total_timesteps      | 253952       |
| train/                  |              |
|    approx_kl            | 0.0027506747 |
|    clip_fraction        | 0.0201       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.02        |
|    explained_variance   | 0.899        |
|    learning_rate        | 0.0003       |
|    loss                 | 48.4         |
|    n_updates            | 1230         |
|    policy_gradient_loss | -0.00169     |
|    std                  | 0.667        |
|    value_loss           | 111          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.43e+03     |
|    ep_rew_mean          | -1.08e+03    |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 125          |
|    time_elapsed         | 1506         |
|    total_timesteps      | 256000       |
| train/                  |              |
|    approx_kl            | 0.0025235163 |
|    clip_fraction        | 0.00776      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.01        |
|    explained_variance   | 0.83         |
|    learning_rate        | 0.0003       |
|    loss                 | 124          |
|    n_updates            | 1240         |
|    policy_gradient_loss | -0.000988    |
|    std                  | 0.661        |
|    value_loss           | 264          |
------------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.43e+03   |
|    ep_rew_mean          | -1.09e+03  |
| time/                   |            |
|    fps                  | 169        |
|    iterations           | 126        |
|    time_elapsed         | 1519       |
|    total_timesteps      | 258048     |
| train/                  |            |
|    approx_kl            | 0.00243528 |
|    clip_fraction        | 0.0115     |
|    clip_range           | 0.2        |
|    entropy_loss         | -1         |
|    explained_variance   | 0.901      |
|    learning_rate        | 0.0003     |
|    loss                 | 119        |
|    n_updates            | 1250       |
|    policy_gradient_loss | -0.000483  |
|    std                  | 0.653      |
|    value_loss           | 271        |
----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.43e+03     |
|    ep_rew_mean          | -1.1e+03     |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 127          |
|    time_elapsed         | 1530         |
|    total_timesteps      | 260096       |
| train/                  |              |
|    approx_kl            | 0.0035218487 |
|    clip_fraction        | 0.0264       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.997       |
|    explained_variance   | 0.907        |
|    learning_rate        | 0.0003       |
|    loss                 | 46.1         |
|    n_updates            | 1260         |
|    policy_gradient_loss | -0.00282     |
|    std                  | 0.657        |
|    value_loss           | 113          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.43e+03     |
|    ep_rew_mean          | -1.1e+03     |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 128          |
|    time_elapsed         | 1542         |
|    total_timesteps      | 262144       |
| train/                  |              |
|    approx_kl            | 0.0019176272 |
|    clip_fraction        | 0.00386      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.996       |
|    explained_variance   | 0.904        |
|    learning_rate        | 0.0003       |
|    loss                 | 59.8         |
|    n_updates            | 1270         |
|    policy_gradient_loss | -0.000179    |
|    std                  | 0.653        |
|    value_loss           | 121          |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.43e+03    |
|    ep_rew_mean          | -1.11e+03   |
| time/                   |             |
|    fps                  | 169         |
|    iterations           | 129         |
|    time_elapsed         | 1554        |
|    total_timesteps      | 264192      |
| train/                  |             |
|    approx_kl            | 0.005875936 |
|    clip_fraction        | 0.0496      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.99       |
|    explained_variance   | 0.917       |
|    learning_rate        | 0.0003      |
|    loss                 | 63.5        |
|    n_updates            | 1280        |
|    policy_gradient_loss | -0.00431    |
|    std                  | 0.649       |
|    value_loss           | 166         |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.43e+03     |
|    ep_rew_mean          | -1.12e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 130          |
|    time_elapsed         | 1565         |
|    total_timesteps      | 266240       |
| train/                  |              |
|    approx_kl            | 0.0038489501 |
|    clip_fraction        | 0.0461       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.986       |
|    explained_variance   | 0.898        |
|    learning_rate        | 0.0003       |
|    loss                 | 37           |
|    n_updates            | 1290         |
|    policy_gradient_loss | -0.00282     |
|    std                  | 0.647        |
|    value_loss           | 127          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.43e+03     |
|    ep_rew_mean          | -1.12e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 131          |
|    time_elapsed         | 1577         |
|    total_timesteps      | 268288       |
| train/                  |              |
|    approx_kl            | 0.0044520725 |
|    clip_fraction        | 0.0413       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.989       |
|    explained_variance   | 0.934        |
|    learning_rate        | 0.0003       |
|    loss                 | 95.4         |
|    n_updates            | 1300         |
|    policy_gradient_loss | -0.00315     |
|    std                  | 0.654        |
|    value_loss           | 155          |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.43e+03    |
|    ep_rew_mean          | -1.14e+03   |
| time/                   |             |
|    fps                  | 170         |
|    iterations           | 132         |
|    time_elapsed         | 1589        |
|    total_timesteps      | 270336      |
| train/                  |             |
|    approx_kl            | 0.003049071 |
|    clip_fraction        | 0.0111      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.992      |
|    explained_variance   | 0.923       |
|    learning_rate        | 0.0003      |
|    loss                 | 71.9        |
|    n_updates            | 1310        |
|    policy_gradient_loss | -0.00146    |
|    std                  | 0.654       |
|    value_loss           | 179         |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.43e+03    |
|    ep_rew_mean          | -1.14e+03   |
| time/                   |             |
|    fps                  | 169         |
|    iterations           | 133         |
|    time_elapsed         | 1602        |
|    total_timesteps      | 272384      |
| train/                  |             |
|    approx_kl            | 0.005158835 |
|    clip_fraction        | 0.0329      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.997      |
|    explained_variance   | 0.945       |
|    learning_rate        | 0.0003      |
|    loss                 | 32          |
|    n_updates            | 1320        |
|    policy_gradient_loss | -0.00174    |
|    std                  | 0.659       |
|    value_loss           | 120         |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.15e+03    |
| time/                   |              |
|    fps                  | 169          |
|    iterations           | 134          |
|    time_elapsed         | 1614         |
|    total_timesteps      | 274432       |
| train/                  |              |
|    approx_kl            | 0.0040050843 |
|    clip_fraction        | 0.0145       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.01        |
|    explained_variance   | 0.919        |
|    learning_rate        | 0.0003       |
|    loss                 | 70.6         |
|    n_updates            | 1330         |
|    policy_gradient_loss | -0.000467    |
|    std                  | 0.663        |
|    value_loss           | 147          |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.44e+03    |
|    ep_rew_mean          | -1.15e+03   |
| time/                   |             |
|    fps                  | 169         |
|    iterations           | 135         |
|    time_elapsed         | 1626        |
|    total_timesteps      | 276480      |
| train/                  |             |
|    approx_kl            | 0.004632881 |
|    clip_fraction        | 0.0272      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1          |
|    explained_variance   | 0.949       |
|    learning_rate        | 0.0003      |
|    loss                 | 38.8        |
|    n_updates            | 1340        |
|    policy_gradient_loss | -0.00212    |
|    std                  | 0.655       |
|    value_loss           | 106         |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.44e+03    |
|    ep_rew_mean          | -1.15e+03   |
| time/                   |             |
|    fps                  | 170         |
|    iterations           | 136         |
|    time_elapsed         | 1638        |
|    total_timesteps      | 278528      |
| train/                  |             |
|    approx_kl            | 0.004639617 |
|    clip_fraction        | 0.0217      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.982      |
|    explained_variance   | 0.918       |
|    learning_rate        | 0.0003      |
|    loss                 | 57          |
|    n_updates            | 1350        |
|    policy_gradient_loss | -0.00121    |
|    std                  | 0.639       |
|    value_loss           | 181         |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.16e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 137          |
|    time_elapsed         | 1649         |
|    total_timesteps      | 280576       |
| train/                  |              |
|    approx_kl            | 0.0021477477 |
|    clip_fraction        | 0.0337       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.969       |
|    explained_variance   | 0.941        |
|    learning_rate        | 0.0003       |
|    loss                 | 54.9         |
|    n_updates            | 1360         |
|    policy_gradient_loss | -0.00208     |
|    std                  | 0.637        |
|    value_loss           | 106          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.16e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 138          |
|    time_elapsed         | 1661         |
|    total_timesteps      | 282624       |
| train/                  |              |
|    approx_kl            | 0.0034320327 |
|    clip_fraction        | 0.014        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.96        |
|    explained_variance   | 0.922        |
|    learning_rate        | 0.0003       |
|    loss                 | 59.6         |
|    n_updates            | 1370         |
|    policy_gradient_loss | -0.00128     |
|    std                  | 0.626        |
|    value_loss           | 180          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.17e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 139          |
|    time_elapsed         | 1672         |
|    total_timesteps      | 284672       |
| train/                  |              |
|    approx_kl            | 0.0029138667 |
|    clip_fraction        | 0.015        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.939       |
|    explained_variance   | 0.942        |
|    learning_rate        | 0.0003       |
|    loss                 | 69.8         |
|    n_updates            | 1380         |
|    policy_gradient_loss | -0.00238     |
|    std                  | 0.61         |
|    value_loss           | 156          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.16e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 140          |
|    time_elapsed         | 1684         |
|    total_timesteps      | 286720       |
| train/                  |              |
|    approx_kl            | 0.0024359373 |
|    clip_fraction        | 0.0158       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.926       |
|    explained_variance   | 0.915        |
|    learning_rate        | 0.0003       |
|    loss                 | 43.4         |
|    n_updates            | 1390         |
|    policy_gradient_loss | 2.51e-05     |
|    std                  | 0.611        |
|    value_loss           | 145          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.16e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 141          |
|    time_elapsed         | 1695         |
|    total_timesteps      | 288768       |
| train/                  |              |
|    approx_kl            | 0.0024347443 |
|    clip_fraction        | 0.0173       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.922       |
|    explained_variance   | 0.911        |
|    learning_rate        | 0.0003       |
|    loss                 | 34.5         |
|    n_updates            | 1400         |
|    policy_gradient_loss | -0.00102     |
|    std                  | 0.605        |
|    value_loss           | 92.8         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.44e+03    |
|    ep_rew_mean          | -1.17e+03   |
| time/                   |             |
|    fps                  | 170         |
|    iterations           | 142         |
|    time_elapsed         | 1708        |
|    total_timesteps      | 290816      |
| train/                  |             |
|    approx_kl            | 0.010215918 |
|    clip_fraction        | 0.103       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.909      |
|    explained_variance   | 0.915       |
|    learning_rate        | 0.0003      |
|    loss                 | 39.6        |
|    n_updates            | 1410        |
|    policy_gradient_loss | -0.00474    |
|    std                  | 0.597       |
|    value_loss           | 142         |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.16e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 143          |
|    time_elapsed         | 1721         |
|    total_timesteps      | 292864       |
| train/                  |              |
|    approx_kl            | 0.0019613402 |
|    clip_fraction        | 0.00898      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.903       |
|    explained_variance   | 0.867        |
|    learning_rate        | 0.0003       |
|    loss                 | 104          |
|    n_updates            | 1420         |
|    policy_gradient_loss | -0.000884    |
|    std                  | 0.596        |
|    value_loss           | 268          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.16e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 144          |
|    time_elapsed         | 1733         |
|    total_timesteps      | 294912       |
| train/                  |              |
|    approx_kl            | 0.0032059615 |
|    clip_fraction        | 0.0206       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.892       |
|    explained_variance   | 0.816        |
|    learning_rate        | 0.0003       |
|    loss                 | 75.7         |
|    n_updates            | 1430         |
|    policy_gradient_loss | -0.000819    |
|    std                  | 0.584        |
|    value_loss           | 188          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.16e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 145          |
|    time_elapsed         | 1745         |
|    total_timesteps      | 296960       |
| train/                  |              |
|    approx_kl            | 0.0036114058 |
|    clip_fraction        | 0.0474       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.877       |
|    explained_variance   | 0.908        |
|    learning_rate        | 0.0003       |
|    loss                 | 31.7         |
|    n_updates            | 1440         |
|    policy_gradient_loss | -0.00226     |
|    std                  | 0.58         |
|    value_loss           | 118          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.15e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 146          |
|    time_elapsed         | 1757         |
|    total_timesteps      | 299008       |
| train/                  |              |
|    approx_kl            | 0.0030694532 |
|    clip_fraction        | 0.0279       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.856       |
|    explained_variance   | 0.89         |
|    learning_rate        | 0.0003       |
|    loss                 | 45.7         |
|    n_updates            | 1450         |
|    policy_gradient_loss | -0.00294     |
|    std                  | 0.559        |
|    value_loss           | 143          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.14e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 147          |
|    time_elapsed         | 1770         |
|    total_timesteps      | 301056       |
| train/                  |              |
|    approx_kl            | 0.0034744442 |
|    clip_fraction        | 0.0354       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.837       |
|    explained_variance   | 0.919        |
|    learning_rate        | 0.0003       |
|    loss                 | 13.2         |
|    n_updates            | 1460         |
|    policy_gradient_loss | -0.00183     |
|    std                  | 0.557        |
|    value_loss           | 44.2         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.44e+03    |
|    ep_rew_mean          | -1.14e+03   |
| time/                   |             |
|    fps                  | 170         |
|    iterations           | 148         |
|    time_elapsed         | 1782        |
|    total_timesteps      | 303104      |
| train/                  |             |
|    approx_kl            | 0.004477505 |
|    clip_fraction        | 0.0251      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.83       |
|    explained_variance   | 0.918       |
|    learning_rate        | 0.0003      |
|    loss                 | 46.8        |
|    n_updates            | 1470        |
|    policy_gradient_loss | -0.00146    |
|    std                  | 0.554       |
|    value_loss           | 111         |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.44e+03    |
|    ep_rew_mean          | -1.13e+03   |
| time/                   |             |
|    fps                  | 170         |
|    iterations           | 149         |
|    time_elapsed         | 1794        |
|    total_timesteps      | 305152      |
| train/                  |             |
|    approx_kl            | 0.005626929 |
|    clip_fraction        | 0.05        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.832      |
|    explained_variance   | 0.901       |
|    learning_rate        | 0.0003      |
|    loss                 | 42.5        |
|    n_updates            | 1480        |
|    policy_gradient_loss | -0.0041     |
|    std                  | 0.559       |
|    value_loss           | 95.3        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.44e+03    |
|    ep_rew_mean          | -1.11e+03   |
| time/                   |             |
|    fps                  | 170         |
|    iterations           | 150         |
|    time_elapsed         | 1806        |
|    total_timesteps      | 307200      |
| train/                  |             |
|    approx_kl            | 0.004458686 |
|    clip_fraction        | 0.0539      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.838      |
|    explained_variance   | 0.879       |
|    learning_rate        | 0.0003      |
|    loss                 | 29.6        |
|    n_updates            | 1490        |
|    policy_gradient_loss | -0.00289    |
|    std                  | 0.561       |
|    value_loss           | 95.9        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.11e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 151          |
|    time_elapsed         | 1817         |
|    total_timesteps      | 309248       |
| train/                  |              |
|    approx_kl            | 0.0026205904 |
|    clip_fraction        | 0.0183       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.836       |
|    explained_variance   | 0.89         |
|    learning_rate        | 0.0003       |
|    loss                 | 34.6         |
|    n_updates            | 1500         |
|    policy_gradient_loss | -0.000509    |
|    std                  | 0.556        |
|    value_loss           | 75           |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.11e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 152          |
|    time_elapsed         | 1829         |
|    total_timesteps      | 311296       |
| train/                  |              |
|    approx_kl            | 0.0039227605 |
|    clip_fraction        | 0.0327       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.823       |
|    explained_variance   | 0.866        |
|    learning_rate        | 0.0003       |
|    loss                 | 22           |
|    n_updates            | 1510         |
|    policy_gradient_loss | -0.00154     |
|    std                  | 0.547        |
|    value_loss           | 90           |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.1e+03     |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 153          |
|    time_elapsed         | 1841         |
|    total_timesteps      | 313344       |
| train/                  |              |
|    approx_kl            | 0.0032632356 |
|    clip_fraction        | 0.0202       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.817       |
|    explained_variance   | 0.889        |
|    learning_rate        | 0.0003       |
|    loss                 | 35.9         |
|    n_updates            | 1520         |
|    policy_gradient_loss | -0.00114     |
|    std                  | 0.548        |
|    value_loss           | 102          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.1e+03     |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 154          |
|    time_elapsed         | 1854         |
|    total_timesteps      | 315392       |
| train/                  |              |
|    approx_kl            | 0.0055648074 |
|    clip_fraction        | 0.0353       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.804       |
|    explained_variance   | 0.93         |
|    learning_rate        | 0.0003       |
|    loss                 | 21           |
|    n_updates            | 1530         |
|    policy_gradient_loss | -0.000953    |
|    std                  | 0.534        |
|    value_loss           | 43.2         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.09e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 155          |
|    time_elapsed         | 1866         |
|    total_timesteps      | 317440       |
| train/                  |              |
|    approx_kl            | 0.0037261944 |
|    clip_fraction        | 0.0229       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.795       |
|    explained_variance   | 0.889        |
|    learning_rate        | 0.0003       |
|    loss                 | 28.7         |
|    n_updates            | 1540         |
|    policy_gradient_loss | -0.00324     |
|    std                  | 0.537        |
|    value_loss           | 91           |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.08e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 156          |
|    time_elapsed         | 1878         |
|    total_timesteps      | 319488       |
| train/                  |              |
|    approx_kl            | 0.0032542548 |
|    clip_fraction        | 0.0229       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.8         |
|    explained_variance   | 0.856        |
|    learning_rate        | 0.0003       |
|    loss                 | 21.8         |
|    n_updates            | 1550         |
|    policy_gradient_loss | 1.74e-05     |
|    std                  | 0.539        |
|    value_loss           | 75.1         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.07e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 157          |
|    time_elapsed         | 1890         |
|    total_timesteps      | 321536       |
| train/                  |              |
|    approx_kl            | 0.0024039252 |
|    clip_fraction        | 0.00923      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.803       |
|    explained_variance   | 0.853        |
|    learning_rate        | 0.0003       |
|    loss                 | 44.2         |
|    n_updates            | 1560         |
|    policy_gradient_loss | -0.000173    |
|    std                  | 0.54         |
|    value_loss           | 90.5         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.09e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 158          |
|    time_elapsed         | 1902         |
|    total_timesteps      | 323584       |
| train/                  |              |
|    approx_kl            | 0.0023875367 |
|    clip_fraction        | 0.016        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.81        |
|    explained_variance   | 0.838        |
|    learning_rate        | 0.0003       |
|    loss                 | 21.3         |
|    n_updates            | 1570         |
|    policy_gradient_loss | -0.00175     |
|    std                  | 0.546        |
|    value_loss           | 67.7         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.08e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 159          |
|    time_elapsed         | 1913         |
|    total_timesteps      | 325632       |
| train/                  |              |
|    approx_kl            | 0.0045974087 |
|    clip_fraction        | 0.0343       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.816       |
|    explained_variance   | 0.77         |
|    learning_rate        | 0.0003       |
|    loss                 | 54.8         |
|    n_updates            | 1580         |
|    policy_gradient_loss | -0.0026      |
|    std                  | 0.548        |
|    value_loss           | 162          |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.44e+03    |
|    ep_rew_mean          | -1.08e+03   |
| time/                   |             |
|    fps                  | 170         |
|    iterations           | 160         |
|    time_elapsed         | 1925        |
|    total_timesteps      | 327680      |
| train/                  |             |
|    approx_kl            | 0.003956628 |
|    clip_fraction        | 0.0397      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.818      |
|    explained_variance   | 0.843       |
|    learning_rate        | 0.0003      |
|    loss                 | 48.5        |
|    n_updates            | 1590        |
|    policy_gradient_loss | -0.0027     |
|    std                  | 0.548       |
|    value_loss           | 128         |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.09e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 161          |
|    time_elapsed         | 1936         |
|    total_timesteps      | 329728       |
| train/                  |              |
|    approx_kl            | 0.0055340016 |
|    clip_fraction        | 0.0261       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.817       |
|    explained_variance   | 0.818        |
|    learning_rate        | 0.0003       |
|    loss                 | 76.8         |
|    n_updates            | 1600         |
|    policy_gradient_loss | -0.00182     |
|    std                  | 0.548        |
|    value_loss           | 367          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.11e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 162          |
|    time_elapsed         | 1949         |
|    total_timesteps      | 331776       |
| train/                  |              |
|    approx_kl            | 0.0047842236 |
|    clip_fraction        | 0.0402       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.82        |
|    explained_variance   | 0.768        |
|    learning_rate        | 0.0003       |
|    loss                 | 128          |
|    n_updates            | 1610         |
|    policy_gradient_loss | -0.0035      |
|    std                  | 0.552        |
|    value_loss           | 331          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.1e+03     |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 163          |
|    time_elapsed         | 1962         |
|    total_timesteps      | 333824       |
| train/                  |              |
|    approx_kl            | 0.0025340708 |
|    clip_fraction        | 0.0164       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.823       |
|    explained_variance   | 0.81         |
|    learning_rate        | 0.0003       |
|    loss                 | 151          |
|    n_updates            | 1620         |
|    policy_gradient_loss | -0.00121     |
|    std                  | 0.551        |
|    value_loss           | 367          |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.44e+03    |
|    ep_rew_mean          | -1.1e+03    |
| time/                   |             |
|    fps                  | 170         |
|    iterations           | 164         |
|    time_elapsed         | 1974        |
|    total_timesteps      | 335872      |
| train/                  |             |
|    approx_kl            | 0.003205207 |
|    clip_fraction        | 0.0245      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.819      |
|    explained_variance   | 0.881       |
|    learning_rate        | 0.0003      |
|    loss                 | 108         |
|    n_updates            | 1630        |
|    policy_gradient_loss | -0.000944   |
|    std                  | 0.547       |
|    value_loss           | 152         |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.09e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 165          |
|    time_elapsed         | 1986         |
|    total_timesteps      | 337920       |
| train/                  |              |
|    approx_kl            | 0.0032456052 |
|    clip_fraction        | 0.026        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.816       |
|    explained_variance   | 0.91         |
|    learning_rate        | 0.0003       |
|    loss                 | 48.1         |
|    n_updates            | 1640         |
|    policy_gradient_loss | -0.00195     |
|    std                  | 0.548        |
|    value_loss           | 136          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.09e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 166          |
|    time_elapsed         | 1998         |
|    total_timesteps      | 339968       |
| train/                  |              |
|    approx_kl            | 0.0037831517 |
|    clip_fraction        | 0.0445       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.812       |
|    explained_variance   | 0.895        |
|    learning_rate        | 0.0003       |
|    loss                 | 33           |
|    n_updates            | 1650         |
|    policy_gradient_loss | -0.00185     |
|    std                  | 0.543        |
|    value_loss           | 72.8         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.44e+03    |
|    ep_rew_mean          | -1.08e+03   |
| time/                   |             |
|    fps                  | 170         |
|    iterations           | 167         |
|    time_elapsed         | 2009        |
|    total_timesteps      | 342016      |
| train/                  |             |
|    approx_kl            | 0.004355565 |
|    clip_fraction        | 0.0278      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.802      |
|    explained_variance   | 0.72        |
|    learning_rate        | 0.0003      |
|    loss                 | 91.1        |
|    n_updates            | 1660        |
|    policy_gradient_loss | -0.00188    |
|    std                  | 0.536       |
|    value_loss           | 210         |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.09e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 168          |
|    time_elapsed         | 2021         |
|    total_timesteps      | 344064       |
| train/                  |              |
|    approx_kl            | 0.0041165645 |
|    clip_fraction        | 0.039        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.8         |
|    explained_variance   | 0.884        |
|    learning_rate        | 0.0003       |
|    loss                 | 48.6         |
|    n_updates            | 1670         |
|    policy_gradient_loss | -0.00353     |
|    std                  | 0.54         |
|    value_loss           | 160          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.1e+03     |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 169          |
|    time_elapsed         | 2033         |
|    total_timesteps      | 346112       |
| train/                  |              |
|    approx_kl            | 0.0027033007 |
|    clip_fraction        | 0.0356       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.806       |
|    explained_variance   | 0.766        |
|    learning_rate        | 0.0003       |
|    loss                 | 179          |
|    n_updates            | 1680         |
|    policy_gradient_loss | -0.00377     |
|    std                  | 0.542        |
|    value_loss           | 448          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.1e+03     |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 170          |
|    time_elapsed         | 2044         |
|    total_timesteps      | 348160       |
| train/                  |              |
|    approx_kl            | 0.0027256408 |
|    clip_fraction        | 0.0391       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.8         |
|    explained_variance   | 0.89         |
|    learning_rate        | 0.0003       |
|    loss                 | 170          |
|    n_updates            | 1690         |
|    policy_gradient_loss | -0.00467     |
|    std                  | 0.535        |
|    value_loss           | 285          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.1e+03     |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 171          |
|    time_elapsed         | 2056         |
|    total_timesteps      | 350208       |
| train/                  |              |
|    approx_kl            | 0.0038179918 |
|    clip_fraction        | 0.0279       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.787       |
|    explained_variance   | 0.858        |
|    learning_rate        | 0.0003       |
|    loss                 | 83           |
|    n_updates            | 1700         |
|    policy_gradient_loss | -0.000606    |
|    std                  | 0.529        |
|    value_loss           | 171          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.09e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 172          |
|    time_elapsed         | 2069         |
|    total_timesteps      | 352256       |
| train/                  |              |
|    approx_kl            | 0.0071905963 |
|    clip_fraction        | 0.0336       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.781       |
|    explained_variance   | 0.853        |
|    learning_rate        | 0.0003       |
|    loss                 | 41.6         |
|    n_updates            | 1710         |
|    policy_gradient_loss | -0.00114     |
|    std                  | 0.529        |
|    value_loss           | 121          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.09e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 173          |
|    time_elapsed         | 2083         |
|    total_timesteps      | 354304       |
| train/                  |              |
|    approx_kl            | 0.0041191177 |
|    clip_fraction        | 0.0281       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.784       |
|    explained_variance   | 0.874        |
|    learning_rate        | 0.0003       |
|    loss                 | 37.4         |
|    n_updates            | 1720         |
|    policy_gradient_loss | -0.003       |
|    std                  | 0.531        |
|    value_loss           | 126          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.09e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 174          |
|    time_elapsed         | 2095         |
|    total_timesteps      | 356352       |
| train/                  |              |
|    approx_kl            | 0.0045599565 |
|    clip_fraction        | 0.0406       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.779       |
|    explained_variance   | 0.882        |
|    learning_rate        | 0.0003       |
|    loss                 | 66.1         |
|    n_updates            | 1730         |
|    policy_gradient_loss | -0.0035      |
|    std                  | 0.523        |
|    value_loss           | 137          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.09e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 175          |
|    time_elapsed         | 2106         |
|    total_timesteps      | 358400       |
| train/                  |              |
|    approx_kl            | 0.0051008174 |
|    clip_fraction        | 0.0756       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.755       |
|    explained_variance   | 0.933        |
|    learning_rate        | 0.0003       |
|    loss                 | 11.8         |
|    n_updates            | 1740         |
|    policy_gradient_loss | -0.00398     |
|    std                  | 0.51         |
|    value_loss           | 50           |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | -1.08e+03    |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 176          |
|    time_elapsed         | 2119         |
|    total_timesteps      | 360448       |
| train/                  |              |
|    approx_kl            | 0.0019572652 |
|    clip_fraction        | 0.0145       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.735       |
|    explained_variance   | 0.872        |
|    learning_rate        | 0.0003       |
|    loss                 | 58.4         |
|    n_updates            | 1750         |
|    policy_gradient_loss | -0.000959    |
|    std                  | 0.5          |
|    value_loss           | 167          |
------------------------------------------


<stable_baselines3.ppo.ppo.PPO at 0x7f6f42d41160>

Create a validation environment

and put the policy into the validation env

In [41]:
import sys
sys.path.append(str(Path('rl_env')))
from rl_env.hrl_env_hnetwork_loop import SmartMeterWorld

env_valid = SmartMeterWorld(
    smart_meter_data_loader=sm_dl_validation,
    h_network_rl_module=h_network_rl_module,
    render_mode="human",
)

[2025-07-17 00:25:00:602] [SmartMeterWorld] Render mode set to 'human'. Render server at 127.0.0.1:50007. render_connected: True. render_client_socket: <socket.socket fd=89, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=0, laddr=('127.0.0.1', 34266), raddr=('127.0.0.1', 50007)>


In [47]:
env_valid.reset_render_window()

In [43]:
rl_model.set_env(env_valid)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [48]:
env_valid_seed = 5

obs, info = env_valid.reset(env_valid_seed)
for i in range(1):
    done = False
    while not done:
        action, _states = rl_model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env_valid.step(action)
        print_log(f"Step: {env_valid.episode.get_current_step()}, Action: {action}, Reward: {reward}")
        env_valid.render()

[2025-07-17 00:27:10:990] [SmartMeterWorld] Resetting environment with a new episode. Episode info: {'length': 1440, 'datetime_range': (Timestamp('2013-09-11 00:00:02'), Timestamp('2013-09-11 23:59:02'))}
[2025-07-17 00:27:11:000] Step: 1, Action: [0.8147611], Reward: 0.17708887810905774
[2025-07-17 00:27:11:005] Step: 2, Action: [0.81093997], Reward: 0.1811874174773693
[2025-07-17 00:27:11:009] Step: 3, Action: [0.80468327], Reward: 0.18105786076188088
[2025-07-17 00:27:11:014] Step: 4, Action: [0.80429995], Reward: 0.18087816176017124
[2025-07-17 00:27:11:017] Step: 5, Action: [0.8104201], Reward: 0.18079597077171009
[2025-07-17 00:27:11:021] Step: 6, Action: [0.8093508], Reward: 0.18078525075713794
[2025-07-17 00:27:11:026] Step: 7, Action: [0.8172339], Reward: 0.18077042319377262
[2025-07-17 00:27:11:031] Step: 8, Action: [0.8111543], Reward: 0.18081671494046847
[2025-07-17 00:27:11:038] Step: 9, Action: [0.8107143], Reward: 0.18083522830605506
[2025-07-17 00:27:11:043] Step: 10, A

In [45]:
env_valid.episode.df

Unnamed: 0,timestamp,aggregate,datetime,grid_load,battery_soc,aggregate_std
98417,1386720004,274.000000,2013-12-11 00:00:04,4127.707075,0.0,-0.425049
98418,1386720064,274.857664,2013-12-11 00:01:04,4112.500095,0.008029,-0.422770
98419,1386720124,275.368756,2013-12-11 00:02:04,4101.837463,0.016024,-0.421411
98420,1386720184,228.711933,2013-12-11 00:03:04,4228.711933,0.023995,-0.545419
98421,1386720244,211.047353,2013-12-11 00:04:04,4211.047353,0.032329,-0.592369
...,...,...,...,...,...,...
99852,1386806104,348.279965,2013-12-11 23:55:04,2756.595385,0.499239,-0.227622
99853,1386806164,348.376078,2013-12-11 23:56:04,2742.894215,0.504256,-0.227367
99854,1386806224,348.769009,2013-12-11 23:57:04,2717.696249,0.509245,-0.226323
99855,1386806284,348.182031,2013-12-11 23:58:04,2730.595179,0.51418,-0.227883


In [49]:
# save the graph

env_valid.save_graph(
    {"fname": str(experiment_folder / f"graph_valid_index_{env_valid.selected_idx}.png"),
     "dpi": 300}
)

In [73]:
env_valid.close()

[2025-07-16 06:31:55:987] [SmartMeterWorld] Environment closed.


In [27]:
# save the model
rl_model_path = experiment_folder /"rl_model.zip"
rl_model.save(rl_model_path)

In [28]:
# save the H-network
h_network_path = experiment_folder / "h_network2.pth"
h_network_rl_module.save_h_network(h_network_path)

In [None]:
h_network_rl_module.save_train_loss_list(experiment_folder / "train_loss_list.npy")

---

In [None]:
# load the model & environment
import sys
sys.path.append(str(Path('rl_env')))

from rl_env.hrl_env import SmartMeterWorld
from stable_baselines3 import PPO
from model.H_network.h_network_arch import HNetworkType

env_test = SmartMeterWorld(
    sm_dl_test,
    h_model_type=HNetworkType.H_NETWORK,
    render_mode="human",
)

env_test.set_h_network(h_network)
env_test.set_h_network_stdscaler(h_network_stdscaler)

rl_model_path = Path("rl_model", "PPO", f"{datetime(2025,7,12,18,25,2).strftime('%Y%m%d_%H%M%S')}", "rl_model.zip")
rl_model_loaded = PPO.load(rl_model_path, env=env_test)

In [None]:
obs, info = env_test.reset(43)

In [None]:
info

In [None]:
env_test.reset_render_window()

In [None]:
env_test.episode.df

In [None]:
for i in range(1):
    done = False
    while not done:
        action, _states = rl_model_loaded.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env_test.step(action)
        print_log(f"Step: {env_test.episode.get_current_step()}, Action: {action}, Reward: {reward}, Info: {info}")
        env_test.render()

In [None]:
env_test.save_graph(
    str(Path("rl_model", "PPO", f"{rl_datetime.strftime('%Y%m%d_%H%M%S')}", "graph_test.png"))
)

In [None]:
env_test.close()