In [1]:
import gym
import pytz
from datetime import datetime, timedelta
import numpy as np
from gym_mtsim import MtSimulator, OrderType

In [2]:
from typing import List, Tuple, Dict, Any, Optional, Union, Callable

import copy
from datetime import datetime
from pathos.multiprocessing import ProcessingPool as Pool

import numpy as np
from scipy.special import expit

import matplotlib.pyplot as plt
import matplotlib.cm as plt_cm
import matplotlib.colors as plt_colors
import plotly.graph_objects as go

import gym
from gym import spaces
from gym.utils import seeding

class MtEnv(gym.Env):

    metadata = {'render.modes': ['human', 'simple_figure', 'advanced_figure']}

    def __init__(
            self, original_simulator: MtSimulator, trading_symbols: List[str],
            window_size: int, time_points: Optional[List[datetime]]=None,
            hold_threshold: float=0.5, close_threshold: float=0.5,
            fee: Union[float, Callable[[str], float]]=0.0005,
            symbol_max_orders: int=1, multiprocessing_processes: Optional[int]=None
        ) -> None:

        # validations
        # assert is for debugging (true return nothing, false return as ______)
        assert len(original_simulator.symbols_data) > 0, "no data available"
        assert len(original_simulator.symbols_info) > 0, "no data available"
        assert len(trading_symbols) > 0, "no trading symbols provided"
        assert 0. <= hold_threshold <= 1., "'hold_threshold' must be in range [0., 1.]"

        if not original_simulator.hedge:
            symbol_max_orders = 1

        for symbol in trading_symbols:
            assert symbol in original_simulator.symbols_info, f"symbol '{symbol}' not found"
            currency_profit = original_simulator.symbols_info[symbol].currency_profit
            assert original_simulator._get_unit_symbol_info(currency_profit) is not None, \
                   f"unit symbol for '{currency_profit}' not found"

        if time_points is None:
            time_points = original_simulator.symbols_data[trading_symbols[0]].index.to_pydatetime().tolist()
        assert len(time_points) > window_size, "not enough time points provided"

        # attributes
        self.seed()
        self.original_simulator = original_simulator
        self.trading_symbols = trading_symbols
        # The number of time points (current and previous points) as the length of each observation's features.
        self.window_size = window_size
        # Time move (index)
        self.time_points = time_points
        self.hold_threshold = hold_threshold
        self.close_threshold = close_threshold
        self.fee = fee
        self.symbol_max_orders = symbol_max_orders
        # max number of process for parallel processing
        self.multiprocessing_pool = Pool(multiprocessing_processes) if multiprocessing_processes else None
        
        ######################### ADD self.indicator #########################
        self.indicator = self._get_indicator()
        self.prices = self._get_prices()
        self.signal_features = self._process_data()
        self.features_shape = (window_size, self.signal_features.shape[1])

        # spaces
        self.action_space = spaces.Box(
            low=-np.inf, high=np.inf,
            shape=(len(self.trading_symbols) * (self.symbol_max_orders + 2),)
        )  # symbol -> [close_order_i(logic), hold(logic), volume]

        self.observation_space = spaces.Dict({
            'balance': spaces.Box(low=0, high=np.inf, shape=(1,)),
            'equity': spaces.Box(low=0 , high=np.inf, shape=(1,)),
            'margin': spaces.Box(low=-np.inf, high=np.inf, shape=(1,)),
            'features': spaces.Box(low=0, high=np.inf, shape=self.features_shape),
            'orders': spaces.Box(
                low=-np.inf, high=np.inf,
                shape=(len(self.trading_symbols), self.symbol_max_orders, 3)
            )  # symbol, order_i -> [entry_price, volume, profit]
        })

        # episode
        self._start_tick = self.window_size - 1
        self._end_tick = len(self.time_points) - 1
        self._done: bool = NotImplemented
        self._current_tick: int = NotImplemented
        self.simulator: MtSimulator = NotImplemented
        self.history: List[Dict[str, Any]] = NotImplemented


    def seed(self, seed: Optional[int]=None) -> List[int]:
        self.np_random, seed = seeding.np_random(seed)
        return [seed]


    def reset(self) -> Dict[str, np.ndarray]:
        self._done = False
        self._current_tick = self._start_tick
        self.simulator = copy.deepcopy(self.original_simulator)
        self.simulator.current_time = self.time_points[self._current_tick]
        self.history = [self._create_info()]
        return self._get_observation()


    def step(self, action: np.ndarray) -> Tuple[Dict[str, np.ndarray], float, bool, Dict[str, Any]]:
        orders_info, closed_orders_info = self._apply_action(action)

        self._current_tick += 1
        if self._current_tick == self._end_tick:
            self._done = True

        dt = self.time_points[self._current_tick] - self.time_points[self._current_tick - 1]
        self.simulator.tick(dt)

        step_reward = self._calculate_reward()

        info = self._create_info(
            orders=orders_info, closed_orders=closed_orders_info, step_reward=step_reward
        )
        observation = self._get_observation()
        self.history.append(info)

        return observation, step_reward, self._done, info


    def _apply_action(self, action: np.ndarray) -> Tuple[Dict, Dict]:
        orders_info = {}
        closed_orders_info = {symbol: [] for symbol in self.trading_symbols}

        k = self.symbol_max_orders + 2

        for i, symbol in enumerate(self.trading_symbols):
            symbol_action = action[k*i:k*(i+1)]
            close_orders_logit = symbol_action[:-2]
            hold_logit = symbol_action[-2]
            volume = symbol_action[-1]

            # The expit function, also known as the logistic sigmoid function
            close_orders_probability = expit(close_orders_logit)
            hold_probability = expit(hold_logit)
            hold = bool(hold_probability > self.hold_threshold)
            modified_volume = self._get_modified_volume(symbol, volume)

            symbol_orders = self.simulator.symbol_orders(symbol)
            orders_to_close_index = np.where(
                close_orders_probability[:len(symbol_orders)] > self.close_threshold
            )[0]
            orders_to_close = np.array(symbol_orders)[orders_to_close_index]

            for j, order in enumerate(orders_to_close):
                self.simulator.close_order(order)
                closed_orders_info[symbol].append(dict(
                    order_id=order.id, symbol=order.symbol, order_type=order.type,
                    volume=order.volume, fee=order.fee,
                    margin=order.margin, profit=order.profit,
                    close_probability=close_orders_probability[orders_to_close_index][j],
                ))

            orders_capacity = self.symbol_max_orders - (len(symbol_orders) - len(orders_to_close))
            orders_info[symbol] = dict(
                order_id=None, symbol=symbol, hold_probability=hold_probability,
                hold=hold, volume=volume, capacity=orders_capacity, order_type=None,
                modified_volume=modified_volume, fee=float('nan'), margin=float('nan'),
                error='',
            )

            if self.simulator.hedge and orders_capacity == 0:
                orders_info[symbol].update(dict(
                    error="cannot add more orders"
                ))
            elif not hold:
                order_type = OrderType.Buy if volume > 0. else OrderType.Sell
                fee = self.fee if type(self.fee) is float else self.fee(symbol)

                try:
                    order = self.simulator.create_order(order_type, symbol, modified_volume, fee)
                    new_info = dict(
                        order_id=order.id, order_type=order_type,
                        fee=fee, margin=order.margin,
                    )
                except ValueError as e:
                    new_info = dict(error=str(e))

                orders_info[symbol].update(new_info)

        return orders_info, closed_orders_info

    # Change to lower case
    def _get_prices(self, keys: List[str]=['Close', 'Open']) -> Dict[str, np.ndarray]:
        prices = {}

        for symbol in self.trading_symbols:
            get_price_at = lambda time: \
                self.original_simulator.price_at(symbol, time)[keys]

            if self.multiprocessing_pool is None:
                p = list(map(get_price_at, self.time_points))
            else:
                p = self.multiprocessing_pool.map(get_price_at, self.time_points)

            prices[symbol] = np.array(p)

        return prices
    
        ######################### ADD self.indicator #########################
        # columns=['Time', 'Open', 'High', 'Low', 'Close', 'Volume', '_', '_']
        # lowercase: ["open", "high", "low", "close"]
        
    def _get_indicator(self, keys: List[str]=['open','close','low','high','Volume','EFI','ADX','ATR','STOCHRSI']) -> Dict[str, np.ndarray]:
        #['Open','High', 'Low', 'Volume','EFI','CFI','ADX','STOCHRSI','KAMA']
        indicator = {}

        for symbol in self.trading_symbols:
            get_price_at = lambda time: \
                self.original_simulator.price_at(symbol, time)[keys]

            if self.multiprocessing_pool is None:
                p = list(map(get_price_at, self.time_points))
            else:
                p = self.multiprocessing_pool.map(get_price_at, self.time_points)

            indicator[symbol] = np.array(p)

        return indicator
    
    


    def _process_data(self) -> np.ndarray:
        # Add own indicator
        
        #data = self.prices
        data = self.indicator
        signal_features = np.column_stack(list(data.values()))
        return signal_features


    def _get_observation(self) -> Dict[str, np.ndarray]:
        features = self.signal_features[(self._current_tick-self.window_size+1):(self._current_tick+1)]

        orders = np.zeros(self.observation_space['orders'].shape)
        for i, symbol in enumerate(self.trading_symbols):
            symbol_orders = self.simulator.symbol_orders(symbol)
            for j, order in enumerate(symbol_orders):
                orders[i, j] = [order.entry_price, order.volume, order.profit]

        observation = {
            'balance': np.array([self.simulator.balance]),
            'equity': np.array([self.simulator.equity]),
            'margin': np.array([self.simulator.margin]),
            'features': features,
            'orders': orders,
        }
        return observation


    def _calculate_reward(self) -> float:
        prev_equity = self.history[-1]['equity']
        current_equity = self.simulator.equity
        step_reward = current_equity - prev_equity
        return step_reward


    def _create_info(self, **kwargs: Any) -> Dict[str, Any]:
        info = {k: v for k, v in kwargs.items()}
        info['balance'] = self.simulator.balance
        info['equity'] = self.simulator.equity
        info['margin'] = self.simulator.margin
        info['free_margin'] = self.simulator.free_margin
        info['margin_level'] = self.simulator.margin_level
        return info


    def _get_modified_volume(self, symbol: str, volume: float) -> float:
        si = self.simulator.symbols_info[symbol]
        v = abs(volume)
        v = np.clip(v, si.volume_min, si.volume_max)
        v = round(v / si.volume_step) * si.volume_step
        return v


    def render(self, mode: str='human', **kwargs: Any) -> Any:
        if mode == 'simple_figure':
            return self._render_simple_figure(**kwargs)
        if mode == 'advanced_figure':
            return self._render_advanced_figure(**kwargs)
        return self.simulator.get_state(**kwargs)


    def _render_simple_figure(
        self, figsize: Tuple[float, float]=(14, 6), return_figure: bool=False
    ) -> Any:
        fig, ax = plt.subplots(figsize=figsize, facecolor='white')

        cmap_colors = np.array(plt_cm.tab10.colors)[[0, 1, 4, 5, 6, 8]]
        cmap = plt_colors.LinearSegmentedColormap.from_list('mtsim', cmap_colors)
        symbol_colors = cmap(np.linspace(0, 1, len(self.trading_symbols)))

        for j, symbol in enumerate(self.trading_symbols):
            close_price = self.prices[symbol][:, 0]
            symbol_color = symbol_colors[j]

            ax.plot(self.time_points, close_price, c=symbol_color, marker='.', label=symbol)

            buy_ticks = []
            buy_error_ticks = []
            sell_ticks = []
            sell_error_ticks = []
            close_ticks = []

            for i in range(1, len(self.history)):
                tick = self._start_tick + i - 1

                order = self.history[i]['orders'].get(symbol, {})
                if order and not order['hold']:
                    if order['order_type'] == OrderType.Buy:
                        if order['error']:
                            buy_error_ticks.append(tick)
                        else:
                            buy_ticks.append(tick)
                    else:
                        if order['error']:
                            sell_error_ticks.append(tick)
                        else:
                            sell_ticks.append(tick)

                closed_orders = self.history[i]['closed_orders'].get(symbol, [])
                if len(closed_orders) > 0:
                    close_ticks.append(tick)

            tp = np.array(self.time_points)
            ax.plot(tp[buy_ticks], close_price[buy_ticks], '^', color='green')
            ax.plot(tp[buy_error_ticks], close_price[buy_error_ticks], '^', color='gray')
            ax.plot(tp[sell_ticks], close_price[sell_ticks], 'v', color='red')
            ax.plot(tp[sell_error_ticks], close_price[sell_error_ticks], 'v', color='gray')
            ax.plot(tp[close_ticks], close_price[close_ticks], '|', color='black')

            ax.tick_params(axis='y', labelcolor=symbol_color)
            ax.yaxis.tick_left()
            if j < len(self.trading_symbols) - 1:
                ax = ax.twinx()

        fig.suptitle(
            f"Balance: {self.simulator.balance:.6f} {self.simulator.unit} ~ "
            f"Equity: {self.simulator.equity:.6f} ~ "
            f"Margin: {self.simulator.margin:.6f} ~ "
            f"Free Margin: {self.simulator.free_margin:.6f} ~ "
            f"Margin Level: {self.simulator.margin_level:.6f}"
        )
        fig.legend(loc='right')

        if return_figure:
            return fig

        plt.show()


    def _render_advanced_figure(
            self, figsize: Tuple[float, float]=(1400, 600), time_format: str="%Y-%m-%d %H:%m",
            return_figure: bool=False
        ) -> Any:

        fig = go.Figure()

        cmap_colors = np.array(plt_cm.tab10.colors)[[0, 1, 4, 5, 6, 8]]
        cmap = plt_colors.LinearSegmentedColormap.from_list('mtsim', cmap_colors)
        symbol_colors = cmap(np.linspace(0, 1, len(self.trading_symbols)))
        get_color_string = lambda color: "rgba(%s, %s, %s, %s)" % tuple(color)

        extra_info = [
            f"balance: {h['balance']:.6f} {self.simulator.unit}<br>"
            f"equity: {h['equity']:.6f}<br>"
            f"margin: {h['margin']:.6f}<br>"
            f"free margin: {h['free_margin']:.6f}<br>"
            f"margin level: {h['margin_level']:.6f}"
            for h in self.history
        ]
        extra_info = [extra_info[0]] * (self.window_size - 1) + extra_info

        for j, symbol in enumerate(self.trading_symbols):
            close_price = self.prices[symbol][:, 0]
            symbol_color = symbol_colors[j]

            fig.add_trace(
                go.Scatter(
                    x=self.time_points,
                    y=close_price,
                    mode='lines+markers',
                    line_color=get_color_string(symbol_color),
                    opacity=1.0,
                    hovertext=extra_info,
                    name=symbol,
                    yaxis=f'y{j+1}',
                    legendgroup=f'g{j+1}',
                ),
            )

            fig.update_layout(**{
                f'yaxis{j+1}': dict(
                    tickfont=dict(color=get_color_string(symbol_color * [1, 1, 1, 0.8])),
                    overlaying='y' if j > 0 else None,
                    # position=0.035*j
                ),
            })

            trade_ticks = []
            trade_markers = []
            trade_colors = []
            trade_sizes = []
            trade_extra_info = []
            trade_max_volume = max([
                h.get('orders', {}).get(symbol, {}).get('modified_volume') or 0
                for h in self.history
            ])
            close_ticks = []
            close_extra_info = []

            for i in range(1, len(self.history)):
                tick = self._start_tick + i - 1

                order = self.history[i]['orders'].get(symbol)
                if order and not order['hold']:
                    marker = None
                    color = None
                    size = 8 + 22 * (order['modified_volume'] / trade_max_volume)
                    info = (
                        f"order id: {order['order_id'] or ''}<br>"
                        f"hold probability: {order['hold_probability']:.4f}<br>"
                        f"hold: {order['hold']}<br>"
                        f"volume: {order['volume']:.6f}<br>"
                        f"modified volume: {order['modified_volume']:.4f}<br>"
                        f"fee: {order['fee']:.6f}<br>"
                        f"margin: {order['margin']:.6f}<br>"
                        f"error: {order['error']}"
                    )

                    if order['order_type'] == OrderType.Buy:
                        marker = 'triangle-up'
                        color = 'gray' if order['error'] else 'green'
                    else:
                        marker = 'triangle-down'
                        color = 'gray' if order['error'] else 'red'

                    trade_ticks.append(tick)
                    trade_markers.append(marker)
                    trade_colors.append(color)
                    trade_sizes.append(size)
                    trade_extra_info.append(info)

                closed_orders = self.history[i]['closed_orders'].get(symbol, [])
                if len(closed_orders) > 0:
                    info = []
                    for order in closed_orders:
                        info_i = (
                            f"order id: {order['order_id']}<br>"
                            f"order type: {order['order_type'].name}<br>"
                            f"close probability: {order['close_probability']:.4f}<br>"
                            f"margin: {order['margin']:.6f}<br>"
                            f"profit: {order['profit']:.6f}"
                        )
                        info.append(info_i)
                    info = '<br>---------------------------------<br>'.join(info)

                    close_ticks.append(tick)
                    close_extra_info.append(info)

            fig.add_trace(
                go.Scatter(
                    x=np.array(self.time_points)[trade_ticks],
                    y=close_price[trade_ticks],
                    mode='markers',
                    hovertext=trade_extra_info,
                    marker_symbol=trade_markers,
                    marker_color=trade_colors,
                    marker_size=trade_sizes,
                    name=symbol,
                    yaxis=f'y{j+1}',
                    showlegend=False,
                    legendgroup=f'g{j+1}',
                ),
            )

            fig.add_trace(
                go.Scatter(
                    x=np.array(self.time_points)[close_ticks],
                    y=close_price[close_ticks],
                    mode='markers',
                    hovertext=close_extra_info,
                    marker_symbol='line-ns',
                    marker_color='black',
                    marker_size=7,
                    marker_line_width=1.5,
                    name=symbol,
                    yaxis=f'y{j+1}',
                    showlegend=False,
                    legendgroup=f'g{j+1}',
                ),
            )

        title = (
            f"Balance: {self.simulator.balance:.6f} {self.simulator.unit} ~ "
            f"Equity: {self.simulator.equity:.6f} ~ "
            f"Margin: {self.simulator.margin:.6f} ~ "
            f"Free Margin: {self.simulator.free_margin:.6f} ~ "
            f"Margin Level: {self.simulator.margin_level:.6f}"
        )
        fig.update_layout(
            title=title,
            xaxis_tickformat=time_format,
            width=figsize[0],
            height=figsize[1],
        )

        if return_figure:
            return fig

        fig.show()


    def close(self) -> None:
        plt.close()

In [3]:
sim = MtSimulator(
    unit='USD',
    balance=10000.,
    leverage=100.,
    stop_out_level=0.2,
    hedge=True,
    symbols_filename='1D_train_final.pkl'
)

In [4]:
env = MtEnv(
    original_simulator=sim,
    trading_symbols=['EURUSD', 'USDJPY','GBPUSD','GBPCAD','GBPJPY'],
    #trading_symbols=['EURUSD'],
    window_size=15,
    # time_points=[desired time points ...],
    hold_threshold=0.5,
    close_threshold=0.5,
    fee=lambda symbol: {
        'EURUSD': max(0., np.random.normal(0.0002, 0.00003)),
        'USDJPY': max(0., np.random.normal(0.02, 0.003)),
        'GBPUSD': max(0., np.random.normal(0.0002, 0.00003)),
        'GBPCAD': max(0., np.random.normal(0.0002, 0.00003)),
        'GBPJPY': max(0., np.random.normal(0.02, 0.003)),
    }[symbol],
    symbol_max_orders=2,
    multiprocessing_processes=2
)

In [5]:
print("env information:")

for symbol in env.prices:
    print(f"> prices[{symbol}].shape:", env.prices[symbol].shape)

print("> signal_features.shape:", env.signal_features.shape)
print("> features_shape:", env.features_shape)

env information:
> prices[EURUSD].shape: (1037, 2)
> prices[USDJPY].shape: (1037, 2)
> prices[GBPUSD].shape: (1037, 2)
> prices[GBPCAD].shape: (1037, 2)
> prices[GBPJPY].shape: (1037, 2)
> signal_features.shape: (1037, 45)
> features_shape: (15, 45)


In [6]:
print(env.prices['EURUSD'])

[[1.04519 1.051  ]
 [1.04044 1.04524]
 [1.04871 1.04044]
 ...
 [1.22482 1.2216 ]
 [1.22961 1.22471]
 [1.22155 1.22963]]


# A2C 1D 4in All-symbol

In [7]:
import os
from stable_baselines3 import A2C
from stable_baselines3 import PPO
# A2C, PPO

models_dir = "models/A2C_1D_norm_all"
logdir = "logs"
#tensorboard --logdir D:\Study\IS\code\gym-mtsim-main\gym_mtsim\logs\

if not os.path.exists(models_dir):
    os.makedirs(models_dir)

if not os.path.exists(logdir):
    os.makedirs(logdir)


# env = gym.make('forex-hedge-v0')

env.reset()


model = A2C('MultiInputPolicy', env, verbose=1, tensorboard_log = logdir)

TIMESTEPS = 10000
for i in range(1,101):    
    model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name="A2C_1D_norm_all")
    model.save(f"{models_dir}/{TIMESTEPS*i}")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Logging to logs\A2C_1D_norm_all_0
------------------------------------
| time/                 |          |
|    fps                | 137      |
|    iterations         | 100      |
|    time_elapsed       | 3        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -28.4    |
|    explained_variance | -4.93    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -0.115   |
|    std                | 1        |
|    value_loss         | 1.75e-05 |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 187      |
|    iterations         | 200      |
|    time_elapsed       | 5        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -28.4    |
|    explained_variance | 0.245    |
|    learning_rate      | 0.0007   |
|    n_updates          | 199      |
|   

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 244      |
|    iterations         | 1400     |
|    time_elapsed       | 28       |
|    total_timesteps    | 7000     |
| train/                |          |
|    entropy_loss       | -28.4    |
|    explained_variance | -12.2    |
|    learning_rate      | 0.0007   |
|    n_updates          | 1399     |
|    policy_loss        | 0.00429  |
|    std                | 1        |
|    value_loss         | 3.34e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 246      |
|    iterations         | 1500     |
|    time_elapsed       | 30       |
|    total_timesteps    | 7500     |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 282      |
|    iterations         | 700      |
|    time_elapsed       | 12       |
|    total_timesteps    | 13500    |
| train/                |          |
|    entropy_loss       | -28.6    |
|    explained_variance | 0.00071  |
|    learning_rate      | 0.0007   |
|    n_updates          | 2699     |
|    policy_loss        | 3.99e+04 |
|    std                | 1.01     |
|    value_loss         | 2.68e+06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -1e+04    |
| time/                 |           |
|    fps                | 271       |
|    iterations         | 800       |
|    time_elapsed       | 14        |
|    total_timesteps    | 1400

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.67e+03 |
| time/                 |           |
|    fps                | 268       |
|    iterations         | 2000      |
|    time_elapsed       | 37        |
|    total_timesteps    | 20000     |
| train/                |           |
|    entropy_loss       | -28.6     |
|    explained_variance | -32.6     |
|    learning_rate      | 0.0007    |
|    n_updates          | 3999      |
|    policy_loss        | 0.00191   |
|    std                | 1.01      |
|    value_loss         | 1.76e-07  |
-------------------------------------
Logging to logs\A2C_1D_norm_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.79e+03 |
| time/                 |           |
|    fps                | 298       |
|    iterations         | 100       |
|    time_elapse

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.57e+03 |
| time/                 |           |
|    fps                | 290       |
|    iterations         | 1200      |
|    time_elapsed       | 20        |
|    total_timesteps    | 26000     |
| train/                |           |
|    entropy_loss       | -28.6     |
|    explained_variance | -9.18e-06 |
|    learning_rate      | 0.0007    |
|    n_updates          | 5199      |
|    policy_loss        | 3.95e+04  |
|    std                | 1.01      |
|    value_loss         | 2.98e+06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.57e+03 |
| time/                 |           |
|    fps                | 289       |
|    iterations         | 1300      |
|    time_elapsed       | 22        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.04e+03 |
| time/                 |           |
|    fps                | 325       |
|    iterations         | 400       |
|    time_elapsed       | 6         |
|    total_timesteps    | 32000     |
| train/                |           |
|    entropy_loss       | -28.5     |
|    explained_variance | -407      |
|    learning_rate      | 0.0007    |
|    n_updates          | 6399      |
|    policy_loss        | -0.0179   |
|    std                | 1.01      |
|    value_loss         | 2.73e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.04e+03 |
| time/                 |           |
|    fps                | 321       |
|    iterations         | 500       |
|    time_elapsed       | 7         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.36e+03 |
| time/                 |           |
|    fps                | 276       |
|    iterations         | 1700      |
|    time_elapsed       | 30        |
|    total_timesteps    | 38500     |
| train/                |           |
|    entropy_loss       | -28.4     |
|    explained_variance | -43.9     |
|    learning_rate      | 0.0007    |
|    n_updates          | 7699      |
|    policy_loss        | 0.00393   |
|    std                | 1         |
|    value_loss         | 6.97e-07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -8.4e+03 |
| time/                 |          |
|    fps                | 276      |
|    iterations         | 1800     |
|    time_elapsed       | 32       |
|    total_timesteps

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.59e+03 |
| time/                 |           |
|    fps                | 283       |
|    iterations         | 900       |
|    time_elapsed       | 15        |
|    total_timesteps    | 44500     |
| train/                |           |
|    entropy_loss       | -28.5     |
|    explained_variance | -0.000124 |
|    learning_rate      | 0.0007    |
|    n_updates          | 8899      |
|    policy_loss        | -1.05e+05 |
|    std                | 1.01      |
|    value_loss         | 1.43e+07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.62e+03 |
| time/                 |           |
|    fps                | 279       |
|    iterations         | 1000      |
|    time_elapsed       | 17        |
|    total_t

Logging to logs\A2C_1D_norm_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.3e+03  |
| time/                 |           |
|    fps                | 370       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 50500     |
| train/                |           |
|    entropy_loss       | -28.5     |
|    explained_variance | -5.89e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 10099     |
|    policy_loss        | 9.22      |
|    std                | 1.01      |
|    value_loss         | 0.658     |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.3e+03  |
| time/                 |           |
|    fps                | 375       |
|    iterations         | 200       |
|    time_elapse

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.69e+03 |
| time/                 |           |
|    fps                | 386       |
|    iterations         | 1300      |
|    time_elapsed       | 16        |
|    total_timesteps    | 56500     |
| train/                |           |
|    entropy_loss       | -28.5     |
|    explained_variance | -8.06     |
|    learning_rate      | 0.0007    |
|    n_updates          | 11299     |
|    policy_loss        | -0.00685  |
|    std                | 1.01      |
|    value_loss         | 9.38e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.69e+03 |
| time/                 |           |
|    fps                | 388       |
|    iterations         | 1400      |
|    time_elapsed       | 17        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.37e+03 |
| time/                 |           |
|    fps                | 350       |
|    iterations         | 500       |
|    time_elapsed       | 7         |
|    total_timesteps    | 62500     |
| train/                |           |
|    entropy_loss       | -28.7     |
|    explained_variance | -121      |
|    learning_rate      | 0.0007    |
|    n_updates          | 12499     |
|    policy_loss        | -0.00648  |
|    std                | 1.02      |
|    value_loss         | 6.13e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.37e+03 |
| time/                 |           |
|    fps                | 355       |
|    iterations         | 600       |
|    time_elapsed       | 8         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.13e+03 |
| time/                 |           |
|    fps                | 332       |
|    iterations         | 1700      |
|    time_elapsed       | 25        |
|    total_timesteps    | 68500     |
| train/                |           |
|    entropy_loss       | -28.7     |
|    explained_variance | 0.000114  |
|    learning_rate      | 0.0007    |
|    n_updates          | 13699     |
|    policy_loss        | 1.03e+05  |
|    std                | 1.02      |
|    value_loss         | 4.13e+07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.13e+03 |
| time/                 |           |
|    fps                | 334       |
|    iterations         | 1800      |
|    time_elapsed       | 26        |
|    total_t

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -6.4e+03 |
| time/                 |          |
|    fps                | 361      |
|    iterations         | 900      |
|    time_elapsed       | 12       |
|    total_timesteps    | 74500    |
| train/                |          |
|    entropy_loss       | -28.7    |
|    explained_variance | -0.556   |
|    learning_rate      | 0.0007   |
|    n_updates          | 14899    |
|    policy_loss        | 0.0109   |
|    std                | 1.02     |
|    value_loss         | 1.72e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.44e+03 |
| time/                 |           |
|    fps                | 356       |
|    iterations         | 1000      |
|    time_elapsed       | 14        |
|    total_timesteps    | 7500

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.67e+03 |
| time/                 |           |
|    fps                | 310       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 80500     |
| train/                |           |
|    entropy_loss       | -28.8     |
|    explained_variance | 0.295     |
|    learning_rate      | 0.0007    |
|    n_updates          | 16099     |
|    policy_loss        | -0.00521  |
|    std                | 1.02      |
|    value_loss         | 4.7e-08   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.71e+03 |
| time/                 |           |
|    fps                | 332       |
|    iterations         | 200       |
|    time_elapsed       | 3         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.95e+03 |
| time/                 |           |
|    fps                | 349       |
|    iterations         | 1400      |
|    time_elapsed       | 20        |
|    total_timesteps    | 87000     |
| train/                |           |
|    entropy_loss       | -28.8     |
|    explained_variance | 0.0492    |
|    learning_rate      | 0.0007    |
|    n_updates          | 17399     |
|    policy_loss        | 1.63e+03  |
|    std                | 1.02      |
|    value_loss         | 3.46e+03  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.95e+03 |
| time/                 |           |
|    fps                | 348       |
|    iterations         | 1500      |
|    time_elapsed       | 21        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.12e+03 |
| time/                 |           |
|    fps                | 323       |
|    iterations         | 600       |
|    time_elapsed       | 9         |
|    total_timesteps    | 93000     |
| train/                |           |
|    entropy_loss       | -29       |
|    explained_variance | 0.283     |
|    learning_rate      | 0.0007    |
|    n_updates          | 18599     |
|    policy_loss        | 69.1      |
|    std                | 1.03      |
|    value_loss         | 12.9      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.15e+03 |
| time/                 |           |
|    fps                | 322       |
|    iterations         | 700       |
|    time_elapsed       | 10        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.29e+03 |
| time/                 |           |
|    fps                | 332       |
|    iterations         | 1800      |
|    time_elapsed       | 27        |
|    total_timesteps    | 99000     |
| train/                |           |
|    entropy_loss       | -28.9     |
|    explained_variance | -60.3     |
|    learning_rate      | 0.0007    |
|    n_updates          | 19799     |
|    policy_loss        | 0.105     |
|    std                | 1.03      |
|    value_loss         | 1.75e-05  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.32e+03 |
| time/                 |           |
|    fps                | 332       |
|    iterations         | 1900      |
|    time_elapsed       | 28        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.39e+03 |
| time/                 |           |
|    fps                | 339       |
|    iterations         | 1000      |
|    time_elapsed       | 14        |
|    total_timesteps    | 105000    |
| train/                |           |
|    entropy_loss       | -29       |
|    explained_variance | -2.32     |
|    learning_rate      | 0.0007    |
|    n_updates          | 20999     |
|    policy_loss        | 0.00762   |
|    std                | 1.04      |
|    value_loss         | 7.58e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.39e+03 |
| time/                 |           |
|    fps                | 338       |
|    iterations         | 1100      |
|    time_elapsed       | 16        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.39e+03 |
| time/                 |           |
|    fps                | 344       |
|    iterations         | 200       |
|    time_elapsed       | 2         |
|    total_timesteps    | 111000    |
| train/                |           |
|    entropy_loss       | -29.2     |
|    explained_variance | -0.108    |
|    learning_rate      | 0.0007    |
|    n_updates          | 22199     |
|    policy_loss        | -0.00055  |
|    std                | 1.04      |
|    value_loss         | 1.22e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.39e+03 |
| time/                 |           |
|    fps                | 341       |
|    iterations         | 300       |
|    time_elapsed       | 4         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.83e+03 |
| time/                 |           |
|    fps                | 335       |
|    iterations         | 1400      |
|    time_elapsed       | 20        |
|    total_timesteps    | 117000    |
| train/                |           |
|    entropy_loss       | -29.2     |
|    explained_variance | -54.7     |
|    learning_rate      | 0.0007    |
|    n_updates          | 23399     |
|    policy_loss        | 0.00292   |
|    std                | 1.04      |
|    value_loss         | 1.62e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.83e+03 |
| time/                 |           |
|    fps                | 336       |
|    iterations         | 1500      |
|    time_elapsed       | 22        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.83e+03 |
| time/                 |           |
|    fps                | 346       |
|    iterations         | 600       |
|    time_elapsed       | 8         |
|    total_timesteps    | 123000    |
| train/                |           |
|    entropy_loss       | -29.3     |
|    explained_variance | -39.3     |
|    learning_rate      | 0.0007    |
|    n_updates          | 24599     |
|    policy_loss        | -0.00398  |
|    std                | 1.05      |
|    value_loss         | 2.74e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.83e+03 |
| time/                 |           |
|    fps                | 345       |
|    iterations         | 700       |
|    time_elapsed       | 10        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.99e+03 |
| time/                 |           |
|    fps                | 335       |
|    iterations         | 1800      |
|    time_elapsed       | 26        |
|    total_timesteps    | 129000    |
| train/                |           |
|    entropy_loss       | -29.2     |
|    explained_variance | -7.2      |
|    learning_rate      | 0.0007    |
|    n_updates          | 25799     |
|    policy_loss        | -0.00349  |
|    std                | 1.05      |
|    value_loss         | 3.56e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.99e+03 |
| time/                 |           |
|    fps                | 334       |
|    iterations         | 1900      |
|    time_elapsed       | 28        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.99e+03 |
| time/                 |           |
|    fps                | 327       |
|    iterations         | 1000      |
|    time_elapsed       | 15        |
|    total_timesteps    | 135000    |
| train/                |           |
|    entropy_loss       | -29.2     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 26999     |
|    policy_loss        | 0.209     |
|    std                | 1.05      |
|    value_loss         | 6.63e-05  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.99e+03 |
| time/                 |           |
|    fps                | 329       |
|    iterations         | 1100      |
|    time_elapsed       | 16        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.91e+03 |
| time/                 |           |
|    fps                | 323       |
|    iterations         | 200       |
|    time_elapsed       | 3         |
|    total_timesteps    | 141000    |
| train/                |           |
|    entropy_loss       | -29.3     |
|    explained_variance | 0.0102    |
|    learning_rate      | 0.0007    |
|    n_updates          | 28199     |
|    policy_loss        | -2.5e+03  |
|    std                | 1.06      |
|    value_loss         | 2.13e+04  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -7.9e+03 |
| time/                 |          |
|    fps                | 328      |
|    iterations         | 300      |
|    time_elapsed       | 4        |
|    total_timesteps

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.47e+03 |
| time/                 |           |
|    fps                | 318       |
|    iterations         | 1500      |
|    time_elapsed       | 23        |
|    total_timesteps    | 147500    |
| train/                |           |
|    entropy_loss       | -29.4     |
|    explained_variance | -14.7     |
|    learning_rate      | 0.0007    |
|    n_updates          | 29499     |
|    policy_loss        | -0.000392 |
|    std                | 1.06      |
|    value_loss         | 2.12e-10  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.47e+03 |
| time/                 |           |
|    fps                | 317       |
|    iterations         | 1600      |
|    time_elapsed       | 25        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.68e+03 |
| time/                 |           |
|    fps                | 328       |
|    iterations         | 700       |
|    time_elapsed       | 10        |
|    total_timesteps    | 153500    |
| train/                |           |
|    entropy_loss       | -29.4     |
|    explained_variance | -1.98e+04 |
|    learning_rate      | 0.0007    |
|    n_updates          | 30699     |
|    policy_loss        | 0.00108   |
|    std                | 1.06      |
|    value_loss         | 4.72e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.68e+03 |
| time/                 |           |
|    fps                | 328       |
|    iterations         | 800       |
|    time_elapsed       | 12        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.66e+03 |
| time/                 |           |
|    fps                | 333       |
|    iterations         | 1900      |
|    time_elapsed       | 28        |
|    total_timesteps    | 159500    |
| train/                |           |
|    entropy_loss       | -29.2     |
|    explained_variance | 0.00106   |
|    learning_rate      | 0.0007    |
|    n_updates          | 31899     |
|    policy_loss        | -5.24e+05 |
|    std                | 1.05      |
|    value_loss         | 4.52e+08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.66e+03 |
| time/                 |           |
|    fps                | 333       |
|    iterations         | 2000      |
|    time_elapsed       | 29        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.06e+03 |
| time/                 |           |
|    fps                | 295       |
|    iterations         | 1100      |
|    time_elapsed       | 18        |
|    total_timesteps    | 165500    |
| train/                |           |
|    entropy_loss       | -29.3     |
|    explained_variance | -0.473    |
|    learning_rate      | 0.0007    |
|    n_updates          | 33099     |
|    policy_loss        | -0.0338   |
|    std                | 1.05      |
|    value_loss         | 1.42e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.06e+03 |
| time/                 |           |
|    fps                | 295       |
|    iterations         | 1200      |
|    time_elapsed       | 20        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.44e+03 |
| time/                 |           |
|    fps                | 281       |
|    iterations         | 300       |
|    time_elapsed       | 5         |
|    total_timesteps    | 171500    |
| train/                |           |
|    entropy_loss       | -29.4     |
|    explained_variance | -0.253    |
|    learning_rate      | 0.0007    |
|    n_updates          | 34299     |
|    policy_loss        | 0.0538    |
|    std                | 1.06      |
|    value_loss         | 3.4e-06   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.44e+03 |
| time/                 |           |
|    fps                | 282       |
|    iterations         | 400       |
|    time_elapsed       | 7         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.42e+03 |
| time/                 |           |
|    fps                | 288       |
|    iterations         | 1500      |
|    time_elapsed       | 26        |
|    total_timesteps    | 177500    |
| train/                |           |
|    entropy_loss       | -29.5     |
|    explained_variance | -21.9     |
|    learning_rate      | 0.0007    |
|    n_updates          | 35499     |
|    policy_loss        | -0.0089   |
|    std                | 1.06      |
|    value_loss         | 1.01e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.42e+03 |
| time/                 |           |
|    fps                | 287       |
|    iterations         | 1600      |
|    time_elapsed       | 27        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.42e+03 |
| time/                 |           |
|    fps                | 297       |
|    iterations         | 700       |
|    time_elapsed       | 11        |
|    total_timesteps    | 183500    |
| train/                |           |
|    entropy_loss       | -29.4     |
|    explained_variance | -1.17e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 36699     |
|    policy_loss        | 0.605     |
|    std                | 1.06      |
|    value_loss         | 0.000392  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.42e+03 |
| time/                 |           |
|    fps                | 299       |
|    iterations         | 800       |
|    time_elapsed       | 13        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.54e+03 |
| time/                 |           |
|    fps                | 305       |
|    iterations         | 2000      |
|    time_elapsed       | 32        |
|    total_timesteps    | 190000    |
| train/                |           |
|    entropy_loss       | -29.3     |
|    explained_variance | 0.0276    |
|    learning_rate      | 0.0007    |
|    n_updates          | 37999     |
|    policy_loss        | -7.42e+03 |
|    std                | 1.05      |
|    value_loss         | 8.83e+04  |
-------------------------------------
Logging to logs\A2C_1D_norm_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.46e+03 |
| time/                 |           |
|    fps                | 305       |
|    iterations         | 100       |
|    time_elapse

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.09e+03 |
| time/                 |           |
|    fps                | 326       |
|    iterations         | 1200      |
|    time_elapsed       | 18        |
|    total_timesteps    | 196000    |
| train/                |           |
|    entropy_loss       | -29.1     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 39199     |
|    policy_loss        | -0.41     |
|    std                | 1.04      |
|    value_loss         | 0.00024   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.99e+03 |
| time/                 |           |
|    fps                | 328       |
|    iterations         | 1300      |
|    time_elapsed       | 19        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.49e+03 |
| time/                 |           |
|    fps                | 349       |
|    iterations         | 400       |
|    time_elapsed       | 5         |
|    total_timesteps    | 202000    |
| train/                |           |
|    entropy_loss       | -29.3     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 40399     |
|    policy_loss        | -0.00836  |
|    std                | 1.05      |
|    value_loss         | 9.57e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.39e+03 |
| time/                 |           |
|    fps                | 349       |
|    iterations         | 500       |
|    time_elapsed       | 7         |
|    total_t

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -6.1e+03 |
| time/                 |          |
|    fps                | 343      |
|    iterations         | 1700     |
|    time_elapsed       | 24       |
|    total_timesteps    | 208500   |
| train/                |          |
|    entropy_loss       | -29.3    |
|    explained_variance | 0        |
|    learning_rate      | 0.0007   |
|    n_updates          | 41699    |
|    policy_loss        | 2.02     |
|    std                | 1.06     |
|    value_loss         | 0.00582  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -6.1e+03 |
| time/                 |          |
|    fps                | 343      |
|    iterations         | 1800     |
|    time_elapsed       | 26       |
|    total_timesteps    | 209000   |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -6.1e+03 |
| time/                 |          |
|    fps                | 333      |
|    iterations         | 1000     |
|    time_elapsed       | 15       |
|    total_timesteps    | 215000   |
| train/                |          |
|    entropy_loss       | -29.1    |
|    explained_variance | 0.000599 |
|    learning_rate      | 0.0007   |
|    n_updates          | 42999    |
|    policy_loss        | 632      |
|    std                | 1.04     |
|    value_loss         | 9.82e+03 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -6.1e+03 |
| time/                 |          |
|    fps                | 330      |
|    iterations         | 1100     |
|    time_elapsed       | 16       |
|    total_timesteps    | 215500   |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.09e+03 |
| time/                 |           |
|    fps                | 278       |
|    iterations         | 300       |
|    time_elapsed       | 5         |
|    total_timesteps    | 221500    |
| train/                |           |
|    entropy_loss       | -29       |
|    explained_variance | -3.19e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 44299     |
|    policy_loss        | -0.0181   |
|    std                | 1.04      |
|    value_loss         | 5.17e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.09e+03 |
| time/                 |           |
|    fps                | 281       |
|    iterations         | 400       |
|    time_elapsed       | 7         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -5.98e+03 |
| time/                 |           |
|    fps                | 276       |
|    iterations         | 1600      |
|    time_elapsed       | 28        |
|    total_timesteps    | 228000    |
| train/                |           |
|    entropy_loss       | -29.1     |
|    explained_variance | 0.0143    |
|    learning_rate      | 0.0007    |
|    n_updates          | 45599     |
|    policy_loss        | -2.22e+04 |
|    std                | 1.04      |
|    value_loss         | 1.06e+06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -5.98e+03 |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 1700      |
|    time_elapsed       | 30        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -5.9e+03  |
| time/                 |           |
|    fps                | 270       |
|    iterations         | 900       |
|    time_elapsed       | 16        |
|    total_timesteps    | 234500    |
| train/                |           |
|    entropy_loss       | -29       |
|    explained_variance | -3.87     |
|    learning_rate      | 0.0007    |
|    n_updates          | 46899     |
|    policy_loss        | -0.000577 |
|    std                | 1.04      |
|    value_loss         | 7.62e-10  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -5.9e+03 |
| time/                 |          |
|    fps                | 274      |
|    iterations         | 1000     |
|    time_elapsed       | 18       |
|    total_timesteps

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -5.98e+03 |
| time/                 |           |
|    fps                | 310       |
|    iterations         | 200       |
|    time_elapsed       | 3         |
|    total_timesteps    | 241000    |
| train/                |           |
|    entropy_loss       | -29       |
|    explained_variance | -0.0203   |
|    learning_rate      | 0.0007    |
|    n_updates          | 48199     |
|    policy_loss        | -0.138    |
|    std                | 1.04      |
|    value_loss         | 2.16e-05  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -5.98e+03 |
| time/                 |           |
|    fps                | 311       |
|    iterations         | 300       |
|    time_elapsed       | 4         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.42e+03 |
| time/                 |           |
|    fps                | 311       |
|    iterations         | 1400      |
|    time_elapsed       | 22        |
|    total_timesteps    | 247000    |
| train/                |           |
|    entropy_loss       | -29       |
|    explained_variance | -1.4e+04  |
|    learning_rate      | 0.0007    |
|    n_updates          | 49399     |
|    policy_loss        | -56.6     |
|    std                | 1.04      |
|    value_loss         | 28.7      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.42e+03 |
| time/                 |           |
|    fps                | 310       |
|    iterations         | 1500      |
|    time_elapsed       | 24        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.41e+03 |
| time/                 |           |
|    fps                | 321       |
|    iterations         | 600       |
|    time_elapsed       | 9         |
|    total_timesteps    | 253000    |
| train/                |           |
|    entropy_loss       | -29       |
|    explained_variance | 0.657     |
|    learning_rate      | 0.0007    |
|    n_updates          | 50599     |
|    policy_loss        | -0.0391   |
|    std                | 1.04      |
|    value_loss         | 2.03e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.41e+03 |
| time/                 |           |
|    fps                | 320       |
|    iterations         | 700       |
|    time_elapsed       | 10        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.39e+03 |
| time/                 |           |
|    fps                | 316       |
|    iterations         | 1800      |
|    time_elapsed       | 28        |
|    total_timesteps    | 259000    |
| train/                |           |
|    entropy_loss       | -29.1     |
|    explained_variance | 5.96e-08  |
|    learning_rate      | 0.0007    |
|    n_updates          | 51799     |
|    policy_loss        | -1.67e-06 |
|    std                | 1.04      |
|    value_loss         | 3.12e-14  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.39e+03 |
| time/                 |           |
|    fps                | 316       |
|    iterations         | 1900      |
|    time_elapsed       | 30        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.26e+03 |
| time/                 |           |
|    fps                | 312       |
|    iterations         | 1000      |
|    time_elapsed       | 16        |
|    total_timesteps    | 265000    |
| train/                |           |
|    entropy_loss       | -29.1     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 52999     |
|    policy_loss        | -7.33e-06 |
|    std                | 1.04      |
|    value_loss         | 1.24e-14  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.26e+03 |
| time/                 |           |
|    fps                | 313       |
|    iterations         | 1100      |
|    time_elapsed       | 17        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.08e+03 |
| time/                 |           |
|    fps                | 283       |
|    iterations         | 200       |
|    time_elapsed       | 3         |
|    total_timesteps    | 271000    |
| train/                |           |
|    entropy_loss       | -29.1     |
|    explained_variance | 5.96e-08  |
|    learning_rate      | 0.0007    |
|    n_updates          | 54199     |
|    policy_loss        | 1.68e-06  |
|    std                | 1.05      |
|    value_loss         | 4.72e-15  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.08e+03 |
| time/                 |           |
|    fps                | 286       |
|    iterations         | 300       |
|    time_elapsed       | 5         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.91e+03 |
| time/                 |           |
|    fps                | 285       |
|    iterations         | 1400      |
|    time_elapsed       | 24        |
|    total_timesteps    | 277000    |
| train/                |           |
|    entropy_loss       | -29.1     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0007    |
|    n_updates          | 55399     |
|    policy_loss        | -0.00992  |
|    std                | 1.04      |
|    value_loss         | 1.45e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.91e+03 |
| time/                 |           |
|    fps                | 284       |
|    iterations         | 1500      |
|    time_elapsed       | 26        |
|    total_t

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -6.7e+03 |
| time/                 |          |
|    fps                | 266      |
|    iterations         | 600      |
|    time_elapsed       | 11       |
|    total_timesteps    | 283000   |
| train/                |          |
|    entropy_loss       | -29.2    |
|    explained_variance | 0        |
|    learning_rate      | 0.0007   |
|    n_updates          | 56599    |
|    policy_loss        | 0.0126   |
|    std                | 1.05     |
|    value_loss         | 2.18e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.64e+03 |
| time/                 |           |
|    fps                | 266       |
|    iterations         | 700       |
|    time_elapsed       | 13        |
|    total_timesteps    | 2835

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.5e+03  |
| time/                 |           |
|    fps                | 254       |
|    iterations         | 1900      |
|    time_elapsed       | 37        |
|    total_timesteps    | 289500    |
| train/                |           |
|    entropy_loss       | -29.3     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 57899     |
|    policy_loss        | -1.38e-05 |
|    std                | 1.06      |
|    value_loss         | 5.08e-13  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.5e+03  |
| time/                 |           |
|    fps                | 253       |
|    iterations         | 2000      |
|    time_elapsed       | 39        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.42e+03 |
| time/                 |           |
|    fps                | 214       |
|    iterations         | 1100      |
|    time_elapsed       | 25        |
|    total_timesteps    | 295500    |
| train/                |           |
|    entropy_loss       | -29.6     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 59099     |
|    policy_loss        | -2.45e-05 |
|    std                | 1.07      |
|    value_loss         | 1.5e-13   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.42e+03 |
| time/                 |           |
|    fps                | 207       |
|    iterations         | 1200      |
|    time_elapsed       | 28        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.8e+03  |
| time/                 |           |
|    fps                | 179       |
|    iterations         | 300       |
|    time_elapsed       | 8         |
|    total_timesteps    | 301500    |
| train/                |           |
|    entropy_loss       | -29.6     |
|    explained_variance | 0.0067    |
|    learning_rate      | 0.0007    |
|    n_updates          | 60299     |
|    policy_loss        | -1.55e+05 |
|    std                | 1.07      |
|    value_loss         | 4.25e+07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -7.8e+03 |
| time/                 |          |
|    fps                | 184      |
|    iterations         | 400      |
|    time_elapsed       | 10       |
|    total_timesteps

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.35e+03 |
| time/                 |           |
|    fps                | 205       |
|    iterations         | 1600      |
|    time_elapsed       | 38        |
|    total_timesteps    | 308000    |
| train/                |           |
|    entropy_loss       | -29.5     |
|    explained_variance | 0.106     |
|    learning_rate      | 0.0007    |
|    n_updates          | 61599     |
|    policy_loss        | -0.0189   |
|    std                | 1.07      |
|    value_loss         | 5.6e-07   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.35e+03 |
| time/                 |           |
|    fps                | 207       |
|    iterations         | 1700      |
|    time_elapsed       | 40        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.34e+03 |
| time/                 |           |
|    fps                | 204       |
|    iterations         | 800       |
|    time_elapsed       | 19        |
|    total_timesteps    | 314000    |
| train/                |           |
|    entropy_loss       | -29.6     |
|    explained_variance | 0.816     |
|    learning_rate      | 0.0007    |
|    n_updates          | 62799     |
|    policy_loss        | 13.8      |
|    std                | 1.07      |
|    value_loss         | 1.9       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.34e+03 |
| time/                 |           |
|    fps                | 206       |
|    iterations         | 900       |
|    time_elapsed       | 21        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.33e+03 |
| time/                 |           |
|    fps                | 208       |
|    iterations         | 2000      |
|    time_elapsed       | 48        |
|    total_timesteps    | 320000    |
| train/                |           |
|    entropy_loss       | -29.7     |
|    explained_variance | -8.69     |
|    learning_rate      | 0.0007    |
|    n_updates          | 63999     |
|    policy_loss        | 0.0841    |
|    std                | 1.08      |
|    value_loss         | 1.08e-05  |
-------------------------------------
Logging to logs\A2C_1D_norm_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.33e+03 |
| time/                 |           |
|    fps                | 184       |
|    iterations         | 100       |
|    time_elapse

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.33e+03 |
| time/                 |           |
|    fps                | 219       |
|    iterations         | 1200      |
|    time_elapsed       | 27        |
|    total_timesteps    | 326000    |
| train/                |           |
|    entropy_loss       | -29.7     |
|    explained_variance | 0.434     |
|    learning_rate      | 0.0007    |
|    n_updates          | 65199     |
|    policy_loss        | -0.0112   |
|    std                | 1.08      |
|    value_loss         | 1.72e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.33e+03 |
| time/                 |           |
|    fps                | 216       |
|    iterations         | 1300      |
|    time_elapsed       | 30        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.36e+03 |
| time/                 |           |
|    fps                | 217       |
|    iterations         | 400       |
|    time_elapsed       | 9         |
|    total_timesteps    | 332000    |
| train/                |           |
|    entropy_loss       | -29.8     |
|    explained_variance | -4.89     |
|    learning_rate      | 0.0007    |
|    n_updates          | 66399     |
|    policy_loss        | 0.000986  |
|    std                | 1.08      |
|    value_loss         | 5.36e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.36e+03 |
| time/                 |           |
|    fps                | 218       |
|    iterations         | 500       |
|    time_elapsed       | 11        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.41e+03 |
| time/                 |           |
|    fps                | 229       |
|    iterations         | 1700      |
|    time_elapsed       | 37        |
|    total_timesteps    | 338500    |
| train/                |           |
|    entropy_loss       | -29.8     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 67699     |
|    policy_loss        | 1.56e-06  |
|    std                | 1.09      |
|    value_loss         | 3.5e-14   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.41e+03 |
| time/                 |           |
|    fps                | 229       |
|    iterations         | 1800      |
|    time_elapsed       | 39        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.34e+03 |
| time/                 |           |
|    fps                | 218       |
|    iterations         | 900       |
|    time_elapsed       | 20        |
|    total_timesteps    | 344500    |
| train/                |           |
|    entropy_loss       | -29.9     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 68899     |
|    policy_loss        | -0.271    |
|    std                | 1.09      |
|    value_loss         | 0.00012   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.34e+03 |
| time/                 |           |
|    fps                | 217       |
|    iterations         | 1000      |
|    time_elapsed       | 23        |
|    total_t

Logging to logs\A2C_1D_norm_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.34e+03 |
| time/                 |           |
|    fps                | 261       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 350500    |
| train/                |           |
|    entropy_loss       | -29.8     |
|    explained_variance | 0.247     |
|    learning_rate      | 0.0007    |
|    n_updates          | 70099     |
|    policy_loss        | 0.0215    |
|    std                | 1.09      |
|    value_loss         | 5.87e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.35e+03 |
| time/                 |           |
|    fps                | 249       |
|    iterations         | 200       |
|    time_elapse

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.35e+03 |
| time/                 |           |
|    fps                | 248       |
|    iterations         | 1300      |
|    time_elapsed       | 26        |
|    total_timesteps    | 356500    |
| train/                |           |
|    entropy_loss       | -29.8     |
|    explained_variance | 0.26      |
|    learning_rate      | 0.0007    |
|    n_updates          | 71299     |
|    policy_loss        | 0.0304    |
|    std                | 1.08      |
|    value_loss         | 1.2e-06   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.35e+03 |
| time/                 |           |
|    fps                | 249       |
|    iterations         | 1400      |
|    time_elapsed       | 28        |
|    total_t

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -8.4e+03 |
| time/                 |          |
|    fps                | 293      |
|    iterations         | 500      |
|    time_elapsed       | 8        |
|    total_timesteps    | 362500   |
| train/                |          |
|    entropy_loss       | -29.7    |
|    explained_variance | -20.2    |
|    learning_rate      | 0.0007   |
|    n_updates          | 72499    |
|    policy_loss        | -0.0487  |
|    std                | 1.08     |
|    value_loss         | 3.47e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.42e+03 |
| time/                 |           |
|    fps                | 297       |
|    iterations         | 600       |
|    time_elapsed       | 10        |
|    total_timesteps    | 3630

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.54e+03 |
| time/                 |           |
|    fps                | 297       |
|    iterations         | 1800      |
|    time_elapsed       | 30        |
|    total_timesteps    | 369000    |
| train/                |           |
|    entropy_loss       | -29.8     |
|    explained_variance | 0.404     |
|    learning_rate      | 0.0007    |
|    n_updates          | 73799     |
|    policy_loss        | 0.0135    |
|    std                | 1.09      |
|    value_loss         | 1.42e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.54e+03 |
| time/                 |           |
|    fps                | 297       |
|    iterations         | 1900      |
|    time_elapsed       | 31        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.73e+03 |
| time/                 |           |
|    fps                | 285       |
|    iterations         | 1000      |
|    time_elapsed       | 17        |
|    total_timesteps    | 375000    |
| train/                |           |
|    entropy_loss       | -29.9     |
|    explained_variance | -0.0219   |
|    learning_rate      | 0.0007    |
|    n_updates          | 74999     |
|    policy_loss        | -0.0219   |
|    std                | 1.09      |
|    value_loss         | 6.74e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.76e+03 |
| time/                 |           |
|    fps                | 284       |
|    iterations         | 1100      |
|    time_elapsed       | 19        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.92e+03 |
| time/                 |           |
|    fps                | 282       |
|    iterations         | 200       |
|    time_elapsed       | 3         |
|    total_timesteps    | 381000    |
| train/                |           |
|    entropy_loss       | -30       |
|    explained_variance | -2.7      |
|    learning_rate      | 0.0007    |
|    n_updates          | 76199     |
|    policy_loss        | 0.000934  |
|    std                | 1.1       |
|    value_loss         | 1.81e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.99e+03 |
| time/                 |           |
|    fps                | 275       |
|    iterations         | 300       |
|    time_elapsed       | 5         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.21e+03 |
| time/                 |           |
|    fps                | 266       |
|    iterations         | 1400      |
|    time_elapsed       | 26        |
|    total_timesteps    | 387000    |
| train/                |           |
|    entropy_loss       | -30.1     |
|    explained_variance | -0.746    |
|    learning_rate      | 0.0007    |
|    n_updates          | 77399     |
|    policy_loss        | -0.00177  |
|    std                | 1.1       |
|    value_loss         | 9.63e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.22e+03 |
| time/                 |           |
|    fps                | 266       |
|    iterations         | 1500      |
|    time_elapsed       | 28        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.36e+03 |
| time/                 |           |
|    fps                | 281       |
|    iterations         | 600       |
|    time_elapsed       | 10        |
|    total_timesteps    | 393000    |
| train/                |           |
|    entropy_loss       | -30.3     |
|    explained_variance | -570      |
|    learning_rate      | 0.0007    |
|    n_updates          | 78599     |
|    policy_loss        | 0.0952    |
|    std                | 1.11      |
|    value_loss         | 1.21e-05  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.38e+03 |
| time/                 |           |
|    fps                | 281       |
|    iterations         | 700       |
|    time_elapsed       | 12        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.55e+03 |
| time/                 |           |
|    fps                | 283       |
|    iterations         | 1900      |
|    time_elapsed       | 33        |
|    total_timesteps    | 399500    |
| train/                |           |
|    entropy_loss       | -30.3     |
|    explained_variance | 0.464     |
|    learning_rate      | 0.0007    |
|    n_updates          | 79899     |
|    policy_loss        | -0.000605 |
|    std                | 1.11      |
|    value_loss         | 1.57e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.59e+03 |
| time/                 |           |
|    fps                | 283       |
|    iterations         | 2000      |
|    time_elapsed       | 35        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.74e+03 |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 1100      |
|    time_elapsed       | 19        |
|    total_timesteps    | 405500    |
| train/                |           |
|    entropy_loss       | -30.2     |
|    explained_variance | 0.433     |
|    learning_rate      | 0.0007    |
|    n_updates          | 81099     |
|    policy_loss        | -0.00288  |
|    std                | 1.11      |
|    value_loss         | 2.7e-08   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.75e+03 |
| time/                 |           |
|    fps                | 275       |
|    iterations         | 1200      |
|    time_elapsed       | 21        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.77e+03 |
| time/                 |           |
|    fps                | 259       |
|    iterations         | 300       |
|    time_elapsed       | 5         |
|    total_timesteps    | 411500    |
| train/                |           |
|    entropy_loss       | -30.3     |
|    explained_variance | 0.913     |
|    learning_rate      | 0.0007    |
|    n_updates          | 82299     |
|    policy_loss        | -0.00996  |
|    std                | 1.12      |
|    value_loss         | 1.12e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.77e+03 |
| time/                 |           |
|    fps                | 257       |
|    iterations         | 400       |
|    time_elapsed       | 7         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.78e+03 |
| time/                 |           |
|    fps                | 258       |
|    iterations         | 1500      |
|    time_elapsed       | 29        |
|    total_timesteps    | 417500    |
| train/                |           |
|    entropy_loss       | -30.4     |
|    explained_variance | -62.1     |
|    learning_rate      | 0.0007    |
|    n_updates          | 83499     |
|    policy_loss        | 0.0144    |
|    std                | 1.12      |
|    value_loss         | 9.22e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.78e+03 |
| time/                 |           |
|    fps                | 258       |
|    iterations         | 1600      |
|    time_elapsed       | 30        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.78e+03 |
| time/                 |           |
|    fps                | 259       |
|    iterations         | 700       |
|    time_elapsed       | 13        |
|    total_timesteps    | 423500    |
| train/                |           |
|    entropy_loss       | -30.4     |
|    explained_variance | -0.185    |
|    learning_rate      | 0.0007    |
|    n_updates          | 84699     |
|    policy_loss        | 0.0017    |
|    std                | 1.12      |
|    value_loss         | 4.25e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.78e+03 |
| time/                 |           |
|    fps                | 260       |
|    iterations         | 800       |
|    time_elapsed       | 15        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.79e+03 |
| time/                 |           |
|    fps                | 234       |
|    iterations         | 1900      |
|    time_elapsed       | 40        |
|    total_timesteps    | 429500    |
| train/                |           |
|    entropy_loss       | -30.4     |
|    explained_variance | -65.6     |
|    learning_rate      | 0.0007    |
|    n_updates          | 85899     |
|    policy_loss        | 0.000155  |
|    std                | 1.12      |
|    value_loss         | 4.08e-11  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.79e+03 |
| time/                 |           |
|    fps                | 233       |
|    iterations         | 2000      |
|    time_elapsed       | 42        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.9e+03  |
| time/                 |           |
|    fps                | 233       |
|    iterations         | 1100      |
|    time_elapsed       | 23        |
|    total_timesteps    | 435500    |
| train/                |           |
|    entropy_loss       | -30.4     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 87099     |
|    policy_loss        | -1.21e-06 |
|    std                | 1.12      |
|    value_loss         | 2.92e-12  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.9e+03 |
| time/                 |          |
|    fps                | 232      |
|    iterations         | 1200     |
|    time_elapsed       | 25       |
|    total_timesteps

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.94e+03 |
| time/                 |           |
|    fps                | 251       |
|    iterations         | 400       |
|    time_elapsed       | 7         |
|    total_timesteps    | 442000    |
| train/                |           |
|    entropy_loss       | -30.4     |
|    explained_variance | -1.08     |
|    learning_rate      | 0.0007    |
|    n_updates          | 88399     |
|    policy_loss        | -0.00259  |
|    std                | 1.12      |
|    value_loss         | 9.31e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.94e+03 |
| time/                 |           |
|    fps                | 250       |
|    iterations         | 500       |
|    time_elapsed       | 9         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.97e+03 |
| time/                 |           |
|    fps                | 242       |
|    iterations         | 1600      |
|    time_elapsed       | 32        |
|    total_timesteps    | 448000    |
| train/                |           |
|    entropy_loss       | -30.3     |
|    explained_variance | -5.66     |
|    learning_rate      | 0.0007    |
|    n_updates          | 89599     |
|    policy_loss        | 0.000337  |
|    std                | 1.12      |
|    value_loss         | 1.51e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.97e+03 |
| time/                 |           |
|    fps                | 244       |
|    iterations         | 1700      |
|    time_elapsed       | 34        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.97e+03 |
| time/                 |           |
|    fps                | 280       |
|    iterations         | 800       |
|    time_elapsed       | 14        |
|    total_timesteps    | 454000    |
| train/                |           |
|    entropy_loss       | -30.3     |
|    explained_variance | -27.2     |
|    learning_rate      | 0.0007    |
|    n_updates          | 90799     |
|    policy_loss        | -0.00123  |
|    std                | 1.12      |
|    value_loss         | 2.09e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.97e+03 |
| time/                 |           |
|    fps                | 282       |
|    iterations         | 900       |
|    time_elapsed       | 15        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.97e+03 |
| time/                 |           |
|    fps                | 275       |
|    iterations         | 2000      |
|    time_elapsed       | 36        |
|    total_timesteps    | 460000    |
| train/                |           |
|    entropy_loss       | -30.3     |
|    explained_variance | 0.31      |
|    learning_rate      | 0.0007    |
|    n_updates          | 91999     |
|    policy_loss        | -0.0102   |
|    std                | 1.12      |
|    value_loss         | 1.28e-07  |
-------------------------------------
Logging to logs\A2C_1D_norm_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.97e+03 |
| time/                 |           |
|    fps                | 287       |
|    iterations         | 100       |
|    time_elapse

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.97e+03 |
| time/                 |           |
|    fps                | 267       |
|    iterations         | 1200      |
|    time_elapsed       | 22        |
|    total_timesteps    | 466000    |
| train/                |           |
|    entropy_loss       | -30.4     |
|    explained_variance | -2.34     |
|    learning_rate      | 0.0007    |
|    n_updates          | 93199     |
|    policy_loss        | 0.0192    |
|    std                | 1.12      |
|    value_loss         | 4.61e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.97e+03 |
| time/                 |           |
|    fps                | 268       |
|    iterations         | 1300      |
|    time_elapsed       | 24        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.96e+03 |
| time/                 |           |
|    fps                | 312       |
|    iterations         | 400       |
|    time_elapsed       | 6         |
|    total_timesteps    | 472000    |
| train/                |           |
|    entropy_loss       | -30.6     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 94399     |
|    policy_loss        | -4.32e-07 |
|    std                | 1.14      |
|    value_loss         | 2.43e-16  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.94e+03 |
| time/                 |           |
|    fps                | 308       |
|    iterations         | 500       |
|    time_elapsed       | 8         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.94e+03 |
| time/                 |           |
|    fps                | 282       |
|    iterations         | 1600      |
|    time_elapsed       | 28        |
|    total_timesteps    | 478000    |
| train/                |           |
|    entropy_loss       | -30.8     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 95599     |
|    policy_loss        | 4.15e-06  |
|    std                | 1.15      |
|    value_loss         | 1.51e-13  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.93e+03 |
| time/                 |           |
|    fps                | 282       |
|    iterations         | 1700      |
|    time_elapsed       | 30        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.83e+03 |
| time/                 |           |
|    fps                | 275       |
|    iterations         | 800       |
|    time_elapsed       | 14        |
|    total_timesteps    | 484000    |
| train/                |           |
|    entropy_loss       | -30.7     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 96799     |
|    policy_loss        | 2.34e-05  |
|    std                | 1.14      |
|    value_loss         | 1.36e-13  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.82e+03 |
| time/                 |           |
|    fps                | 276       |
|    iterations         | 900       |
|    time_elapsed       | 16        |
|    total_t

Logging to logs\A2C_1D_norm_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.78e+03 |
| time/                 |           |
|    fps                | 290       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 490500    |
| train/                |           |
|    entropy_loss       | -30.9     |
|    explained_variance | -0.306    |
|    learning_rate      | 0.0007    |
|    n_updates          | 98099     |
|    policy_loss        | 0.0163    |
|    std                | 1.16      |
|    value_loss         | 3.61e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.78e+03 |
| time/                 |           |
|    fps                | 270       |
|    iterations         | 200       |
|    time_elapse

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.78e+03 |
| time/                 |           |
|    fps                | 288       |
|    iterations         | 1300      |
|    time_elapsed       | 22        |
|    total_timesteps    | 496500    |
| train/                |           |
|    entropy_loss       | -31.1     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 99299     |
|    policy_loss        | -0.000157 |
|    std                | 1.17      |
|    value_loss         | 2.07e-11  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.75e+03 |
| time/                 |           |
|    fps                | 287       |
|    iterations         | 1400      |
|    time_elapsed       | 24        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.76e+03 |
| time/                 |           |
|    fps                | 250       |
|    iterations         | 500       |
|    time_elapsed       | 9         |
|    total_timesteps    | 502500    |
| train/                |           |
|    entropy_loss       | -31.1     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 100499    |
|    policy_loss        | -0.000917 |
|    std                | 1.17      |
|    value_loss         | 9.27e-10  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.75e+03 |
| time/                 |           |
|    fps                | 259       |
|    iterations         | 600       |
|    time_elapsed       | 11        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.67e+03 |
| time/                 |           |
|    fps                | 280       |
|    iterations         | 1700      |
|    time_elapsed       | 30        |
|    total_timesteps    | 508500    |
| train/                |           |
|    entropy_loss       | -31.1     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0007    |
|    n_updates          | 101699    |
|    policy_loss        | 0.0268    |
|    std                | 1.17      |
|    value_loss         | 9.01e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.67e+03 |
| time/                 |           |
|    fps                | 278       |
|    iterations         | 1800      |
|    time_elapsed       | 32        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.59e+03 |
| time/                 |           |
|    fps                | 294       |
|    iterations         | 900       |
|    time_elapsed       | 15        |
|    total_timesteps    | 514500    |
| train/                |           |
|    entropy_loss       | -31.2     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 102899    |
|    policy_loss        | 5.51e-06  |
|    std                | 1.17      |
|    value_loss         | 4.34e-13  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.59e+03 |
| time/                 |           |
|    fps                | 298       |
|    iterations         | 1000      |
|    time_elapsed       | 16        |
|    total_t

Logging to logs\A2C_1D_norm_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.51e+03 |
| time/                 |           |
|    fps                | 281       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 520500    |
| train/                |           |
|    entropy_loss       | -31       |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 104099    |
|    policy_loss        | 1.44e-05  |
|    std                | 1.16      |
|    value_loss         | 4.79e-14  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.51e+03 |
| time/                 |           |
|    fps                | 300       |
|    iterations         | 200       |
|    time_elapse

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.47e+03 |
| time/                 |           |
|    fps                | 260       |
|    iterations         | 1400      |
|    time_elapsed       | 26        |
|    total_timesteps    | 527000    |
| train/                |           |
|    entropy_loss       | -31.1     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 105399    |
|    policy_loss        | 0.000178  |
|    std                | 1.17      |
|    value_loss         | 5.56e-11  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.45e+03 |
| time/                 |           |
|    fps                | 262       |
|    iterations         | 1500      |
|    time_elapsed       | 28        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.42e+03 |
| time/                 |           |
|    fps                | 275       |
|    iterations         | 600       |
|    time_elapsed       | 10        |
|    total_timesteps    | 533000    |
| train/                |           |
|    entropy_loss       | -31.2     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 106599    |
|    policy_loss        | -0.0176   |
|    std                | 1.17      |
|    value_loss         | 4.41e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.41e+03 |
| time/                 |           |
|    fps                | 279       |
|    iterations         | 700       |
|    time_elapsed       | 12        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.36e+03 |
| time/                 |           |
|    fps                | 285       |
|    iterations         | 1800      |
|    time_elapsed       | 31        |
|    total_timesteps    | 539000    |
| train/                |           |
|    entropy_loss       | -31.2     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 107799    |
|    policy_loss        | 2.62e-05  |
|    std                | 1.17      |
|    value_loss         | 1.28e-12  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.36e+03 |
| time/                 |           |
|    fps                | 283       |
|    iterations         | 1900      |
|    time_elapsed       | 33        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.22e+03 |
| time/                 |           |
|    fps                | 300       |
|    iterations         | 1000      |
|    time_elapsed       | 16        |
|    total_timesteps    | 545000    |
| train/                |           |
|    entropy_loss       | -31.2     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 108999    |
|    policy_loss        | 2.88e-06  |
|    std                | 1.18      |
|    value_loss         | 6.53e-13  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.22e+03 |
| time/                 |           |
|    fps                | 302       |
|    iterations         | 1100      |
|    time_elapsed       | 18        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.02e+03 |
| time/                 |           |
|    fps                | 324       |
|    iterations         | 200       |
|    time_elapsed       | 3         |
|    total_timesteps    | 551000    |
| train/                |           |
|    entropy_loss       | -31.3     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 110199    |
|    policy_loss        | -7.48e-07 |
|    std                | 1.19      |
|    value_loss         | 7.43e-16  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.02e+03 |
| time/                 |           |
|    fps                | 325       |
|    iterations         | 300       |
|    time_elapsed       | 4         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.87e+03 |
| time/                 |           |
|    fps                | 305       |
|    iterations         | 1500      |
|    time_elapsed       | 24        |
|    total_timesteps    | 557500    |
| train/                |           |
|    entropy_loss       | -31.4     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 111499    |
|    policy_loss        | -3.31e-05 |
|    std                | 1.19      |
|    value_loss         | 1.53e-12  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.87e+03 |
| time/                 |           |
|    fps                | 301       |
|    iterations         | 1600      |
|    time_elapsed       | 26        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.78e+03 |
| time/                 |           |
|    fps                | 317       |
|    iterations         | 700       |
|    time_elapsed       | 11        |
|    total_timesteps    | 563500    |
| train/                |           |
|    entropy_loss       | -31.4     |
|    explained_variance | -1.06     |
|    learning_rate      | 0.0007    |
|    n_updates          | 112699    |
|    policy_loss        | 0.000179  |
|    std                | 1.19      |
|    value_loss         | 1.2e-10   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.78e+03 |
| time/                 |           |
|    fps                | 318       |
|    iterations         | 800       |
|    time_elapsed       | 12        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.77e+03 |
| time/                 |           |
|    fps                | 304       |
|    iterations         | 1900      |
|    time_elapsed       | 31        |
|    total_timesteps    | 569500    |
| train/                |           |
|    entropy_loss       | -31.3     |
|    explained_variance | -0.461    |
|    learning_rate      | 0.0007    |
|    n_updates          | 113899    |
|    policy_loss        | 0.00031   |
|    std                | 1.19      |
|    value_loss         | 1.83e-10  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.77e+03 |
| time/                 |           |
|    fps                | 305       |
|    iterations         | 2000      |
|    time_elapsed       | 32        |
|    total_t

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -7.8e+03 |
| time/                 |          |
|    fps                | 294      |
|    iterations         | 1100     |
|    time_elapsed       | 18       |
|    total_timesteps    | 575500   |
| train/                |          |
|    entropy_loss       | -31.2    |
|    explained_variance | -104     |
|    learning_rate      | 0.0007   |
|    n_updates          | 115099   |
|    policy_loss        | -0.00169 |
|    std                | 1.18     |
|    value_loss         | 2.68e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -7.8e+03 |
| time/                 |          |
|    fps                | 290      |
|    iterations         | 1200     |
|    time_elapsed       | 20       |
|    total_timesteps    | 576000   |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.77e+03 |
| time/                 |           |
|    fps                | 304       |
|    iterations         | 400       |
|    time_elapsed       | 6         |
|    total_timesteps    | 582000    |
| train/                |           |
|    entropy_loss       | -31.4     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 116399    |
|    policy_loss        | 2.72e-05  |
|    std                | 1.19      |
|    value_loss         | 1.35e-12  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.77e+03 |
| time/                 |           |
|    fps                | 303       |
|    iterations         | 500       |
|    time_elapsed       | 8         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.86e+03 |
| time/                 |           |
|    fps                | 279       |
|    iterations         | 1600      |
|    time_elapsed       | 28        |
|    total_timesteps    | 588000    |
| train/                |           |
|    entropy_loss       | -31.7     |
|    explained_variance | nan       |
|    learning_rate      | 0.0007    |
|    n_updates          | 117599    |
|    policy_loss        | -0        |
|    std                | 1.21      |
|    value_loss         | 0         |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.86e+03 |
| time/                 |           |
|    fps                | 280       |
|    iterations         | 1700      |
|    time_elapsed       | 30        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.83e+03 |
| time/                 |           |
|    fps                | 268       |
|    iterations         | 800       |
|    time_elapsed       | 14        |
|    total_timesteps    | 594000    |
| train/                |           |
|    entropy_loss       | -31.9     |
|    explained_variance | 0.449     |
|    learning_rate      | 0.0007    |
|    n_updates          | 118799    |
|    policy_loss        | 0.00227   |
|    std                | 1.22      |
|    value_loss         | 8.06e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.83e+03 |
| time/                 |           |
|    fps                | 272       |
|    iterations         | 900       |
|    time_elapsed       | 16        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.83e+03 |
| time/                 |           |
|    fps                | 285       |
|    iterations         | 2000      |
|    time_elapsed       | 35        |
|    total_timesteps    | 600000    |
| train/                |           |
|    entropy_loss       | -31.8     |
|    explained_variance | -0.349    |
|    learning_rate      | 0.0007    |
|    n_updates          | 119999    |
|    policy_loss        | -0.034    |
|    std                | 1.22      |
|    value_loss         | 1.36e-06  |
-------------------------------------
Logging to logs\A2C_1D_norm_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.83e+03 |
| time/                 |           |
|    fps                | 291       |
|    iterations         | 100       |
|    time_elapse

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.82e+03 |
| time/                 |           |
|    fps                | 274       |
|    iterations         | 1200      |
|    time_elapsed       | 21        |
|    total_timesteps    | 606000    |
| train/                |           |
|    entropy_loss       | -31.9     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 121199    |
|    policy_loss        | 2.48e-05  |
|    std                | 1.22      |
|    value_loss         | 1.14e-12  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.82e+03 |
| time/                 |           |
|    fps                | 275       |
|    iterations         | 1300      |
|    time_elapsed       | 23        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.85e+03 |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 400       |
|    time_elapsed       | 7         |
|    total_timesteps    | 612000    |
| train/                |           |
|    entropy_loss       | -31.9     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 122399    |
|    policy_loss        | -1.47e-05 |
|    std                | 1.22      |
|    value_loss         | 5e-14     |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.84e+03 |
| time/                 |           |
|    fps                | 280       |
|    iterations         | 500       |
|    time_elapsed       | 8         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.89e+03 |
| time/                 |           |
|    fps                | 257       |
|    iterations         | 1600      |
|    time_elapsed       | 31        |
|    total_timesteps    | 618000    |
| train/                |           |
|    entropy_loss       | -32.1     |
|    explained_variance | -6.99     |
|    learning_rate      | 0.0007    |
|    n_updates          | 123599    |
|    policy_loss        | -0.00168  |
|    std                | 1.23      |
|    value_loss         | 9.91e-08  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -7.9e+03 |
| time/                 |          |
|    fps                | 258      |
|    iterations         | 1700     |
|    time_elapsed       | 32       |
|    total_timesteps

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.93e+03 |
| time/                 |           |
|    fps                | 229       |
|    iterations         | 900       |
|    time_elapsed       | 19        |
|    total_timesteps    | 624500    |
| train/                |           |
|    entropy_loss       | -32.3     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 124899    |
|    policy_loss        | -0.00511  |
|    std                | 1.25      |
|    value_loss         | 3.54e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.93e+03 |
| time/                 |           |
|    fps                | 225       |
|    iterations         | 1000      |
|    time_elapsed       | 22        |
|    total_t

Logging to logs\A2C_1D_norm_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.94e+03 |
| time/                 |           |
|    fps                | 229       |
|    iterations         | 100       |
|    time_elapsed       | 2         |
|    total_timesteps    | 630500    |
| train/                |           |
|    entropy_loss       | -32.5     |
|    explained_variance | -0.0168   |
|    learning_rate      | 0.0007    |
|    n_updates          | 126099    |
|    policy_loss        | -0.0419   |
|    std                | 1.26      |
|    value_loss         | 1.94e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.96e+03 |
| time/                 |           |
|    fps                | 235       |
|    iterations         | 200       |
|    time_elapse

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.98e+03 |
| time/                 |           |
|    fps                | 256       |
|    iterations         | 1300      |
|    time_elapsed       | 25        |
|    total_timesteps    | 636500    |
| train/                |           |
|    entropy_loss       | -32.7     |
|    explained_variance | -28.5     |
|    learning_rate      | 0.0007    |
|    n_updates          | 127299    |
|    policy_loss        | 0.0109    |
|    std                | 1.27      |
|    value_loss         | 1.47e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.98e+03 |
| time/                 |           |
|    fps                | 259       |
|    iterations         | 1400      |
|    time_elapsed       | 26        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.04e+03 |
| time/                 |           |
|    fps                | 284       |
|    iterations         | 600       |
|    time_elapsed       | 10        |
|    total_timesteps    | 643000    |
| train/                |           |
|    entropy_loss       | -32.7     |
|    explained_variance | 0.616     |
|    learning_rate      | 0.0007    |
|    n_updates          | 128599    |
|    policy_loss        | 0.00581   |
|    std                | 1.27      |
|    value_loss         | 6.09e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.04e+03 |
| time/                 |           |
|    fps                | 288       |
|    iterations         | 700       |
|    time_elapsed       | 12        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.19e+03 |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 1900      |
|    time_elapsed       | 34        |
|    total_timesteps    | 649500    |
| train/                |           |
|    entropy_loss       | -32.8     |
|    explained_variance | 0.907     |
|    learning_rate      | 0.0007    |
|    n_updates          | 129899    |
|    policy_loss        | -0.0129   |
|    std                | 1.27      |
|    value_loss         | 1.6e-07   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.25e+03 |
| time/                 |           |
|    fps                | 274       |
|    iterations         | 2000      |
|    time_elapsed       | 36        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.35e+03 |
| time/                 |           |
|    fps                | 300       |
|    iterations         | 1100      |
|    time_elapsed       | 18        |
|    total_timesteps    | 655500    |
| train/                |           |
|    entropy_loss       | -32.8     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 131099    |
|    policy_loss        | 0.000234  |
|    std                | 1.28      |
|    value_loss         | 6.26e-11  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.35e+03 |
| time/                 |           |
|    fps                | 299       |
|    iterations         | 1200      |
|    time_elapsed       | 20        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.43e+03 |
| time/                 |           |
|    fps                | 293       |
|    iterations         | 300       |
|    time_elapsed       | 5         |
|    total_timesteps    | 661500    |
| train/                |           |
|    entropy_loss       | -32.9     |
|    explained_variance | -21.7     |
|    learning_rate      | 0.0007    |
|    n_updates          | 132299    |
|    policy_loss        | 0.0466    |
|    std                | 1.28      |
|    value_loss         | 2.4e-06   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.43e+03 |
| time/                 |           |
|    fps                | 294       |
|    iterations         | 400       |
|    time_elapsed       | 6         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.49e+03 |
| time/                 |           |
|    fps                | 286       |
|    iterations         | 1500      |
|    time_elapsed       | 26        |
|    total_timesteps    | 667500    |
| train/                |           |
|    entropy_loss       | -33       |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 133499    |
|    policy_loss        | 0.00029   |
|    std                | 1.29      |
|    value_loss         | 1.01e-10  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.49e+03 |
| time/                 |           |
|    fps                | 286       |
|    iterations         | 1600      |
|    time_elapsed       | 27        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.46e+03 |
| time/                 |           |
|    fps                | 218       |
|    iterations         | 700       |
|    time_elapsed       | 16        |
|    total_timesteps    | 673500    |
| train/                |           |
|    entropy_loss       | -33       |
|    explained_variance | -1.87     |
|    learning_rate      | 0.0007    |
|    n_updates          | 134699    |
|    policy_loss        | 0.0062    |
|    std                | 1.29      |
|    value_loss         | 6.1e-08   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.46e+03 |
| time/                 |           |
|    fps                | 218       |
|    iterations         | 800       |
|    time_elapsed       | 18        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.44e+03 |
| time/                 |           |
|    fps                | 219       |
|    iterations         | 1900      |
|    time_elapsed       | 43        |
|    total_timesteps    | 679500    |
| train/                |           |
|    entropy_loss       | -33.1     |
|    explained_variance | nan       |
|    learning_rate      | 0.0007    |
|    n_updates          | 135899    |
|    policy_loss        | -0        |
|    std                | 1.3       |
|    value_loss         | 0         |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.44e+03 |
| time/                 |           |
|    fps                | 220       |
|    iterations         | 2000      |
|    time_elapsed       | 45        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.48e+03 |
| time/                 |           |
|    fps                | 206       |
|    iterations         | 1100      |
|    time_elapsed       | 26        |
|    total_timesteps    | 685500    |
| train/                |           |
|    entropy_loss       | -33.4     |
|    explained_variance | -0.179    |
|    learning_rate      | 0.0007    |
|    n_updates          | 137099    |
|    policy_loss        | -2.03e-05 |
|    std                | 1.31      |
|    value_loss         | 4.16e-12  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.48e+03 |
| time/                 |           |
|    fps                | 206       |
|    iterations         | 1200      |
|    time_elapsed       | 28        |
|    total_t

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.5e+03 |
| time/                 |          |
|    fps                | 196      |
|    iterations         | 300      |
|    time_elapsed       | 7        |
|    total_timesteps    | 691500   |
| train/                |          |
|    entropy_loss       | -33.4    |
|    explained_variance | -4.64    |
|    learning_rate      | 0.0007   |
|    n_updates          | 138299   |
|    policy_loss        | -0.00223 |
|    std                | 1.32     |
|    value_loss         | 2.2e-08  |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.51e+03 |
| time/                 |           |
|    fps                | 199       |
|    iterations         | 400       |
|    time_elapsed       | 10        |
|    total_timesteps    | 6920

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.5e+03  |
| time/                 |           |
|    fps                | 202       |
|    iterations         | 1600      |
|    time_elapsed       | 39        |
|    total_timesteps    | 698000    |
| train/                |           |
|    entropy_loss       | -33.4     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 139599    |
|    policy_loss        | -3.69e-07 |
|    std                | 1.31      |
|    value_loss         | 6.2e-14   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.48e+03 |
| time/                 |           |
|    fps                | 203       |
|    iterations         | 1700      |
|    time_elapsed       | 41        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.48e+03 |
| time/                 |           |
|    fps                | 208       |
|    iterations         | 800       |
|    time_elapsed       | 19        |
|    total_timesteps    | 704000    |
| train/                |           |
|    entropy_loss       | -33.3     |
|    explained_variance | 0.387     |
|    learning_rate      | 0.0007    |
|    n_updates          | 140799    |
|    policy_loss        | 0.0271    |
|    std                | 1.31      |
|    value_loss         | 7.54e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.48e+03 |
| time/                 |           |
|    fps                | 204       |
|    iterations         | 900       |
|    time_elapsed       | 22        |
|    total_t

Logging to logs\A2C_1D_norm_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -2.19e+03 |
| time/                 |           |
|    fps                | 217       |
|    iterations         | 100       |
|    time_elapsed       | 2         |
|    total_timesteps    | 710500    |
| train/                |           |
|    entropy_loss       | -33.5     |
|    explained_variance | -0.346    |
|    learning_rate      | 0.0007    |
|    n_updates          | 142099    |
|    policy_loss        | -0.00133  |
|    std                | 1.32      |
|    value_loss         | 1.86e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -2.19e+03 |
| time/                 |           |
|    fps                | 222       |
|    iterations         | 200       |
|    time_elapse

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | 1.56e+03 |
| time/                 |          |
|    fps                | 214      |
|    iterations         | 1400     |
|    time_elapsed       | 32       |
|    total_timesteps    | 717000   |
| train/                |          |
|    entropy_loss       | -33.5    |
|    explained_variance | -2.95    |
|    learning_rate      | 0.0007   |
|    n_updates          | 143399   |
|    policy_loss        | 0.000516 |
|    std                | 1.32     |
|    value_loss         | 3.28e-10 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | 1.56e+03  |
| time/                 |           |
|    fps                | 216       |
|    iterations         | 1500      |
|    time_elapsed       | 34        |
|    total_timesteps    | 7175

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | 1.54e+03 |
| time/                 |          |
|    fps                | 230      |
|    iterations         | 700      |
|    time_elapsed       | 15       |
|    total_timesteps    | 723500   |
| train/                |          |
|    entropy_loss       | -33.4    |
|    explained_variance | 0.355    |
|    learning_rate      | 0.0007   |
|    n_updates          | 144699   |
|    policy_loss        | 0.000496 |
|    std                | 1.31     |
|    value_loss         | 3.04e-10 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | 1.53e+03  |
| time/                 |           |
|    fps                | 228       |
|    iterations         | 800       |
|    time_elapsed       | 17        |
|    total_timesteps    | 7240

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | 1.5e+03  |
| time/                 |          |
|    fps                | 236      |
|    iterations         | 2000     |
|    time_elapsed       | 42       |
|    total_timesteps    | 730000   |
| train/                |          |
|    entropy_loss       | -33.6    |
|    explained_variance | -323     |
|    learning_rate      | 0.0007   |
|    n_updates          | 145999   |
|    policy_loss        | 3.71e-05 |
|    std                | 1.33     |
|    value_loss         | 1.49e-12 |
------------------------------------
Logging to logs\A2C_1D_norm_all_0
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | 1.5e+03  |
| time/                 |          |
|    fps                | 204      |
|    iterations         | 100      |
|    time_elapsed       | 2        |
|   

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | 1.47e+03 |
| time/                 |          |
|    fps                | 223      |
|    iterations         | 1300     |
|    time_elapsed       | 29       |
|    total_timesteps    | 736500   |
| train/                |          |
|    entropy_loss       | -33.5    |
|    explained_variance | 0.455    |
|    learning_rate      | 0.0007   |
|    n_updates          | 147299   |
|    policy_loss        | -0.0205  |
|    std                | 1.32     |
|    value_loss         | 6.31e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | 1.47e+03  |
| time/                 |           |
|    fps                | 223       |
|    iterations         | 1400      |
|    time_elapsed       | 31        |
|    total_timesteps    | 7370

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | 1.44e+03  |
| time/                 |           |
|    fps                | 224       |
|    iterations         | 600       |
|    time_elapsed       | 13        |
|    total_timesteps    | 743000    |
| train/                |           |
|    entropy_loss       | -33.5     |
|    explained_variance | 0.0109    |
|    learning_rate      | 0.0007    |
|    n_updates          | 148599    |
|    policy_loss        | -5.58e+04 |
|    std                | 1.33      |
|    value_loss         | 1.98e+07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | 1.44e+03 |
| time/                 |          |
|    fps                | 223      |
|    iterations         | 700      |
|    time_elapsed       | 15       |
|    total_timesteps

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | 1.42e+03 |
| time/                 |          |
|    fps                | 206      |
|    iterations         | 1900     |
|    time_elapsed       | 45       |
|    total_timesteps    | 749500   |
| train/                |          |
|    entropy_loss       | -33.6    |
|    explained_variance | -0.137   |
|    learning_rate      | 0.0007   |
|    n_updates          | 149899   |
|    policy_loss        | 0.00432  |
|    std                | 1.33     |
|    value_loss         | 1.81e-08 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | 1.42e+03 |
| time/                 |          |
|    fps                | 205      |
|    iterations         | 2000     |
|    time_elapsed       | 48       |
|    total_timesteps    | 750000   |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | 1.39e+03  |
| time/                 |           |
|    fps                | 215       |
|    iterations         | 1200      |
|    time_elapsed       | 27        |
|    total_timesteps    | 756000    |
| train/                |           |
|    entropy_loss       | -33.7     |
|    explained_variance | -51.8     |
|    learning_rate      | 0.0007    |
|    n_updates          | 151199    |
|    policy_loss        | -0.000515 |
|    std                | 1.33      |
|    value_loss         | 3.76e-10  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | 1.38e+03  |
| time/                 |           |
|    fps                | 211       |
|    iterations         | 1300      |
|    time_elapsed       | 30        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | 1.36e+03  |
| time/                 |           |
|    fps                | 211       |
|    iterations         | 500       |
|    time_elapsed       | 11        |
|    total_timesteps    | 762500    |
| train/                |           |
|    entropy_loss       | -33.9     |
|    explained_variance | -135      |
|    learning_rate      | 0.0007    |
|    n_updates          | 152499    |
|    policy_loss        | -0.000722 |
|    std                | 1.35      |
|    value_loss         | 8.02e-10  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | 1.36e+03  |
| time/                 |           |
|    fps                | 205       |
|    iterations         | 600       |
|    time_elapsed       | 14        |
|    total_t

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | 1.35e+03 |
| time/                 |          |
|    fps                | 215      |
|    iterations         | 1800     |
|    time_elapsed       | 41       |
|    total_timesteps    | 769000   |
| train/                |          |
|    entropy_loss       | -34      |
|    explained_variance | -7.44    |
|    learning_rate      | 0.0007   |
|    n_updates          | 153799   |
|    policy_loss        | -0.00238 |
|    std                | 1.35     |
|    value_loss         | 6.11e-09 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | 1.35e+03 |
| time/                 |          |
|    fps                | 214      |
|    iterations         | 1900     |
|    time_elapsed       | 44       |
|    total_timesteps    | 769500   |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | 1.3e+03  |
| time/                 |          |
|    fps                | 220      |
|    iterations         | 1100     |
|    time_elapsed       | 24       |
|    total_timesteps    | 775500   |
| train/                |          |
|    entropy_loss       | -33.9    |
|    explained_variance | 0.144    |
|    learning_rate      | 0.0007   |
|    n_updates          | 155099   |
|    policy_loss        | 0.00239  |
|    std                | 1.35     |
|    value_loss         | 5e-09    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | 1.3e+03  |
| time/                 |          |
|    fps                | 223      |
|    iterations         | 1200     |
|    time_elapsed       | 26       |
|    total_timesteps    | 776000   |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | 1.28e+03 |
| time/                 |          |
|    fps                | 259      |
|    iterations         | 400      |
|    time_elapsed       | 7        |
|    total_timesteps    | 782000   |
| train/                |          |
|    entropy_loss       | -34.2    |
|    explained_variance | 0.372    |
|    learning_rate      | 0.0007   |
|    n_updates          | 156399   |
|    policy_loss        | 0.00115  |
|    std                | 1.37     |
|    value_loss         | 1.78e-09 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | 1.28e+03 |
| time/                 |          |
|    fps                | 258      |
|    iterations         | 500      |
|    time_elapsed       | 9        |
|    total_timesteps    | 782500   |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | 1.27e+03 |
| time/                 |          |
|    fps                | 260      |
|    iterations         | 1700     |
|    time_elapsed       | 32       |
|    total_timesteps    | 788500   |
| train/                |          |
|    entropy_loss       | -34.4    |
|    explained_variance | -2.11    |
|    learning_rate      | 0.0007   |
|    n_updates          | 157699   |
|    policy_loss        | 0.0175   |
|    std                | 1.39     |
|    value_loss         | 3.34e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | 1.27e+03 |
| time/                 |          |
|    fps                | 260      |
|    iterations         | 1800     |
|    time_elapsed       | 34       |
|    total_timesteps    | 789000   |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | 1.26e+03 |
| time/                 |          |
|    fps                | 267      |
|    iterations         | 1000     |
|    time_elapsed       | 18       |
|    total_timesteps    | 795000   |
| train/                |          |
|    entropy_loss       | -34.6    |
|    explained_variance | 5.96e-08 |
|    learning_rate      | 0.0007   |
|    n_updates          | 158999   |
|    policy_loss        | 12.1     |
|    std                | 1.4      |
|    value_loss         | 4        |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | 1.26e+03 |
| time/                 |          |
|    fps                | 267      |
|    iterations         | 1100     |
|    time_elapsed       | 20       |
|    total_timesteps    | 795500   |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | 1.22e+03  |
| time/                 |           |
|    fps                | 229       |
|    iterations         | 300       |
|    time_elapsed       | 6         |
|    total_timesteps    | 801500    |
| train/                |           |
|    entropy_loss       | -34.7     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0007    |
|    n_updates          | 160299    |
|    policy_loss        | 5.62e+04  |
|    std                | 1.4       |
|    value_loss         | 3.92e+07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | 1.22e+03  |
| time/                 |           |
|    fps                | 210       |
|    iterations         | 400       |
|    time_elapsed       | 9         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -3.29e+03 |
| time/                 |           |
|    fps                | 231       |
|    iterations         | 1600      |
|    time_elapsed       | 34        |
|    total_timesteps    | 808000    |
| train/                |           |
|    entropy_loss       | -34.6     |
|    explained_variance | -10.1     |
|    learning_rate      | 0.0007    |
|    n_updates          | 161599    |
|    policy_loss        | -0.0136   |
|    std                | 1.39      |
|    value_loss         | 1.82e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -3.29e+03 |
| time/                 |           |
|    fps                | 232       |
|    iterations         | 1700      |
|    time_elapsed       | 36        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.05e+03 |
| time/                 |           |
|    fps                | 234       |
|    iterations         | 800       |
|    time_elapsed       | 17        |
|    total_timesteps    | 814000    |
| train/                |           |
|    entropy_loss       | -34.5     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 162799    |
|    policy_loss        | -7.35e-06 |
|    std                | 1.39      |
|    value_loss         | 6.21e-14  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -6.05e+03 |
| time/                 |           |
|    fps                | 234       |
|    iterations         | 900       |
|    time_elapsed       | 19        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.79e+03 |
| time/                 |           |
|    fps                | 232       |
|    iterations         | 2000      |
|    time_elapsed       | 43        |
|    total_timesteps    | 820000    |
| train/                |           |
|    entropy_loss       | -34.5     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 163999    |
|    policy_loss        | 1.08e-05  |
|    std                | 1.39      |
|    value_loss         | 4.86e-14  |
-------------------------------------
Logging to logs\A2C_1D_norm_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.79e+03 |
| time/                 |           |
|    fps                | 175       |
|    iterations         | 100       |
|    time_elapse

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.61e+03 |
| time/                 |           |
|    fps                | 221       |
|    iterations         | 1200      |
|    time_elapsed       | 27        |
|    total_timesteps    | 826000    |
| train/                |           |
|    entropy_loss       | -34.5     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 165199    |
|    policy_loss        | 0.00353   |
|    std                | 1.39      |
|    value_loss         | 1.32e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.61e+03 |
| time/                 |           |
|    fps                | 221       |
|    iterations         | 1300      |
|    time_elapsed       | 29        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.36e+03 |
| time/                 |           |
|    fps                | 280       |
|    iterations         | 400       |
|    time_elapsed       | 7         |
|    total_timesteps    | 832000    |
| train/                |           |
|    entropy_loss       | -34.5     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 166399    |
|    policy_loss        | -1.36e-05 |
|    std                | 1.39      |
|    value_loss         | 4.09e-14  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.36e+03 |
| time/                 |           |
|    fps                | 282       |
|    iterations         | 500       |
|    time_elapsed       | 8         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.21e+03 |
| time/                 |           |
|    fps                | 286       |
|    iterations         | 1600      |
|    time_elapsed       | 27        |
|    total_timesteps    | 838000    |
| train/                |           |
|    entropy_loss       | -34.5     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 167599    |
|    policy_loss        | -3.64e-06 |
|    std                | 1.39      |
|    value_loss         | 1.36e-14  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.13e+03 |
| time/                 |           |
|    fps                | 286       |
|    iterations         | 1700      |
|    time_elapsed       | 29        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.86e+03 |
| time/                 |           |
|    fps                | 282       |
|    iterations         | 800       |
|    time_elapsed       | 14        |
|    total_timesteps    | 844000    |
| train/                |           |
|    entropy_loss       | -34.5     |
|    explained_variance | 0.305     |
|    learning_rate      | 0.0007    |
|    n_updates          | 168799    |
|    policy_loss        | -0.0128   |
|    std                | 1.39      |
|    value_loss         | 1.52e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.86e+03 |
| time/                 |           |
|    fps                | 280       |
|    iterations         | 900       |
|    time_elapsed       | 16        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.82e+03 |
| time/                 |           |
|    fps                | 272       |
|    iterations         | 2000      |
|    time_elapsed       | 36        |
|    total_timesteps    | 850000    |
| train/                |           |
|    entropy_loss       | -34.5     |
|    explained_variance | -1.59     |
|    learning_rate      | 0.0007    |
|    n_updates          | 169999    |
|    policy_loss        | -0.0113   |
|    std                | 1.39      |
|    value_loss         | 1.27e-07  |
-------------------------------------
Logging to logs\A2C_1D_norm_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.82e+03 |
| time/                 |           |
|    fps                | 232       |
|    iterations         | 100       |
|    time_elapse

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.82e+03 |
| time/                 |           |
|    fps                | 276       |
|    iterations         | 1200      |
|    time_elapsed       | 21        |
|    total_timesteps    | 856000    |
| train/                |           |
|    entropy_loss       | -34.5     |
|    explained_variance | -4.14     |
|    learning_rate      | 0.0007    |
|    n_updates          | 171199    |
|    policy_loss        | 0.0135    |
|    std                | 1.39      |
|    value_loss         | 2.19e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.82e+03 |
| time/                 |           |
|    fps                | 276       |
|    iterations         | 1300      |
|    time_elapsed       | 23        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.82e+03 |
| time/                 |           |
|    fps                | 216       |
|    iterations         | 400       |
|    time_elapsed       | 9         |
|    total_timesteps    | 862000    |
| train/                |           |
|    entropy_loss       | -34.5     |
|    explained_variance | -0.776    |
|    learning_rate      | 0.0007    |
|    n_updates          | 172399    |
|    policy_loss        | 0.000271  |
|    std                | 1.38      |
|    value_loss         | 9.67e-11  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.82e+03 |
| time/                 |           |
|    fps                | 220       |
|    iterations         | 500       |
|    time_elapsed       | 11        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.71e+03 |
| time/                 |           |
|    fps                | 231       |
|    iterations         | 1600      |
|    time_elapsed       | 34        |
|    total_timesteps    | 868000    |
| train/                |           |
|    entropy_loss       | -34.4     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 173599    |
|    policy_loss        | -1.7e-05  |
|    std                | 1.38      |
|    value_loss         | 4.72e-14  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.71e+03 |
| time/                 |           |
|    fps                | 231       |
|    iterations         | 1700      |
|    time_elapsed       | 36        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.65e+03 |
| time/                 |           |
|    fps                | 234       |
|    iterations         | 800       |
|    time_elapsed       | 17        |
|    total_timesteps    | 874000    |
| train/                |           |
|    entropy_loss       | -34.6     |
|    explained_variance | -5.14     |
|    learning_rate      | 0.0007    |
|    n_updates          | 174799    |
|    policy_loss        | 0.000907  |
|    std                | 1.39      |
|    value_loss         | 6.11e-10  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.65e+03 |
| time/                 |           |
|    fps                | 233       |
|    iterations         | 900       |
|    time_elapsed       | 19        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.65e+03 |
| time/                 |           |
|    fps                | 233       |
|    iterations         | 2000      |
|    time_elapsed       | 42        |
|    total_timesteps    | 880000    |
| train/                |           |
|    entropy_loss       | -34.8     |
|    explained_variance | 0.836     |
|    learning_rate      | 0.0007    |
|    n_updates          | 175999    |
|    policy_loss        | 0.00296   |
|    std                | 1.4       |
|    value_loss         | 1.14e-08  |
-------------------------------------
Logging to logs\A2C_1D_norm_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.65e+03 |
| time/                 |           |
|    fps                | 211       |
|    iterations         | 100       |
|    time_elapse

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.61e+03 |
| time/                 |           |
|    fps                | 257       |
|    iterations         | 1200      |
|    time_elapsed       | 23        |
|    total_timesteps    | 886000    |
| train/                |           |
|    entropy_loss       | -34.8     |
|    explained_variance | -910      |
|    learning_rate      | 0.0007    |
|    n_updates          | 177199    |
|    policy_loss        | 0.00315   |
|    std                | 1.41      |
|    value_loss         | 9.62e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.61e+03 |
| time/                 |           |
|    fps                | 258       |
|    iterations         | 1300      |
|    time_elapsed       | 25        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.47e+03 |
| time/                 |           |
|    fps                | 274       |
|    iterations         | 400       |
|    time_elapsed       | 7         |
|    total_timesteps    | 892000    |
| train/                |           |
|    entropy_loss       | -34.6     |
|    explained_variance | -0.347    |
|    learning_rate      | 0.0007    |
|    n_updates          | 178399    |
|    policy_loss        | 0.0478    |
|    std                | 1.39      |
|    value_loss         | 2.45e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.47e+03 |
| time/                 |           |
|    fps                | 274       |
|    iterations         | 500       |
|    time_elapsed       | 9         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.32e+03 |
| time/                 |           |
|    fps                | 279       |
|    iterations         | 1600      |
|    time_elapsed       | 28        |
|    total_timesteps    | 898000    |
| train/                |           |
|    entropy_loss       | -34.5     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0007    |
|    n_updates          | 179599    |
|    policy_loss        | -0.00902  |
|    std                | 1.39      |
|    value_loss         | 8.62e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.29e+03 |
| time/                 |           |
|    fps                | 279       |
|    iterations         | 1700      |
|    time_elapsed       | 30        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.22e+03 |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 800       |
|    time_elapsed       | 14        |
|    total_timesteps    | 904000    |
| train/                |           |
|    entropy_loss       | -34.6     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 180799    |
|    policy_loss        | -0.129    |
|    std                | 1.39      |
|    value_loss         | 1.54e-05  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.18e+03 |
| time/                 |           |
|    fps                | 275       |
|    iterations         | 900       |
|    time_elapsed       | 16        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.03e+03 |
| time/                 |           |
|    fps                | 275       |
|    iterations         | 2000      |
|    time_elapsed       | 36        |
|    total_timesteps    | 910000    |
| train/                |           |
|    entropy_loss       | -34.8     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 181999    |
|    policy_loss        | 0.038     |
|    std                | 1.4       |
|    value_loss         | 1.49e-06  |
-------------------------------------
Logging to logs\A2C_1D_norm_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.03e+03 |
| time/                 |           |
|    fps                | 281       |
|    iterations         | 100       |
|    time_elapse

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.03e+03 |
| time/                 |           |
|    fps                | 274       |
|    iterations         | 1200      |
|    time_elapsed       | 21        |
|    total_timesteps    | 916000    |
| train/                |           |
|    entropy_loss       | -34.9     |
|    explained_variance | -219      |
|    learning_rate      | 0.0007    |
|    n_updates          | 183199    |
|    policy_loss        | -0.000519 |
|    std                | 1.41      |
|    value_loss         | 1.23e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.03e+03 |
| time/                 |           |
|    fps                | 275       |
|    iterations         | 1300      |
|    time_elapsed       | 23        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.97e+03 |
| time/                 |           |
|    fps                | 248       |
|    iterations         | 400       |
|    time_elapsed       | 8         |
|    total_timesteps    | 922000    |
| train/                |           |
|    entropy_loss       | -35       |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 184399    |
|    policy_loss        | -0.000156 |
|    std                | 1.42      |
|    value_loss         | 1.56e-11  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.97e+03 |
| time/                 |           |
|    fps                | 245       |
|    iterations         | 500       |
|    time_elapsed       | 10        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.98e+03 |
| time/                 |           |
|    fps                | 254       |
|    iterations         | 1700      |
|    time_elapsed       | 33        |
|    total_timesteps    | 928500    |
| train/                |           |
|    entropy_loss       | -34.9     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 185699    |
|    policy_loss        | 0.00753   |
|    std                | 1.41      |
|    value_loss         | 5.7e-08   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.01e+03 |
| time/                 |           |
|    fps                | 253       |
|    iterations         | 1800      |
|    time_elapsed       | 35        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.08e+03 |
| time/                 |           |
|    fps                | 262       |
|    iterations         | 900       |
|    time_elapsed       | 17        |
|    total_timesteps    | 934500    |
| train/                |           |
|    entropy_loss       | -34.9     |
|    explained_variance | 0.401     |
|    learning_rate      | 0.0007    |
|    n_updates          | 186899    |
|    policy_loss        | -0.000169 |
|    std                | 1.41      |
|    value_loss         | 6.7e-11   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.08e+03 |
| time/                 |           |
|    fps                | 264       |
|    iterations         | 1000      |
|    time_elapsed       | 18        |
|    total_t

Logging to logs\A2C_1D_norm_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.31e+03 |
| time/                 |           |
|    fps                | 216       |
|    iterations         | 100       |
|    time_elapsed       | 2         |
|    total_timesteps    | 940500    |
| train/                |           |
|    entropy_loss       | -34.9     |
|    explained_variance | -41.9     |
|    learning_rate      | 0.0007    |
|    n_updates          | 188099    |
|    policy_loss        | -0.00613  |
|    std                | 1.41      |
|    value_loss         | 4.1e-08   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.31e+03 |
| time/                 |           |
|    fps                | 235       |
|    iterations         | 200       |
|    time_elapse

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.54e+03 |
| time/                 |           |
|    fps                | 255       |
|    iterations         | 1300      |
|    time_elapsed       | 25        |
|    total_timesteps    | 946500    |
| train/                |           |
|    entropy_loss       | -34.8     |
|    explained_variance | 5.96e-08  |
|    learning_rate      | 0.0007    |
|    n_updates          | 189299    |
|    policy_loss        | 4.73e-05  |
|    std                | 1.41      |
|    value_loss         | 5.58e-12  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.54e+03 |
| time/                 |           |
|    fps                | 257       |
|    iterations         | 1400      |
|    time_elapsed       | 27        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.42e+03 |
| time/                 |           |
|    fps                | 274       |
|    iterations         | 500       |
|    time_elapsed       | 9         |
|    total_timesteps    | 952500    |
| train/                |           |
|    entropy_loss       | -34.9     |
|    explained_variance | -12.2     |
|    learning_rate      | 0.0007    |
|    n_updates          | 190499    |
|    policy_loss        | -0.0406   |
|    std                | 1.41      |
|    value_loss         | 1.77e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.42e+03 |
| time/                 |           |
|    fps                | 266       |
|    iterations         | 600       |
|    time_elapsed       | 11        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.27e+03 |
| time/                 |           |
|    fps                | 274       |
|    iterations         | 1700      |
|    time_elapsed       | 30        |
|    total_timesteps    | 958500    |
| train/                |           |
|    entropy_loss       | -34.8     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 191699    |
|    policy_loss        | 1.62e-05  |
|    std                | 1.41      |
|    value_loss         | 4.86e-14  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -8.2e+03 |
| time/                 |          |
|    fps                | 274      |
|    iterations         | 1800     |
|    time_elapsed       | 32       |
|    total_timesteps

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.03e+03 |
| time/                 |           |
|    fps                | 261       |
|    iterations         | 900       |
|    time_elapsed       | 17        |
|    total_timesteps    | 964500    |
| train/                |           |
|    entropy_loss       | -34.8     |
|    explained_variance | 0.03      |
|    learning_rate      | 0.0007    |
|    n_updates          | 192899    |
|    policy_loss        | 4.88e+04  |
|    std                | 1.41      |
|    value_loss         | 3.67e+06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.97e+03 |
| time/                 |           |
|    fps                | 261       |
|    iterations         | 1000      |
|    time_elapsed       | 19        |
|    total_t

Logging to logs\A2C_1D_norm_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.01e+03 |
| time/                 |           |
|    fps                | 288       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 970500    |
| train/                |           |
|    entropy_loss       | -34.7     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 194099    |
|    policy_loss        | -3.23e-05 |
|    std                | 1.4       |
|    value_loss         | 1.97e-13  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.01e+03 |
| time/                 |           |
|    fps                | 287       |
|    iterations         | 200       |
|    time_elapse

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.94e+03 |
| time/                 |           |
|    fps                | 245       |
|    iterations         | 1400      |
|    time_elapsed       | 28        |
|    total_timesteps    | 977000    |
| train/                |           |
|    entropy_loss       | -34.9     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0007    |
|    n_updates          | 195399    |
|    policy_loss        | 5.76e-05  |
|    std                | 1.42      |
|    value_loss         | 6.02e-13  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.94e+03 |
| time/                 |           |
|    fps                | 247       |
|    iterations         | 1500      |
|    time_elapsed       | 30        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.93e+03 |
| time/                 |           |
|    fps                | 258       |
|    iterations         | 600       |
|    time_elapsed       | 11        |
|    total_timesteps    | 983000    |
| train/                |           |
|    entropy_loss       | -35       |
|    explained_variance | 0.345     |
|    learning_rate      | 0.0007    |
|    n_updates          | 196599    |
|    policy_loss        | 0.074     |
|    std                | 1.42      |
|    value_loss         | 5.79e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.93e+03 |
| time/                 |           |
|    fps                | 260       |
|    iterations         | 700       |
|    time_elapsed       | 13        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.92e+03 |
| time/                 |           |
|    fps                | 255       |
|    iterations         | 1800      |
|    time_elapsed       | 35        |
|    total_timesteps    | 989000    |
| train/                |           |
|    entropy_loss       | -35.1     |
|    explained_variance | -515      |
|    learning_rate      | 0.0007    |
|    n_updates          | 197799    |
|    policy_loss        | 0.00891   |
|    std                | 1.43      |
|    value_loss         | 8.78e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8e+03    |
| time/                 |           |
|    fps                | 256       |
|    iterations         | 1900      |
|    time_elapsed       | 36        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.95e+03 |
| time/                 |           |
|    fps                | 263       |
|    iterations         | 1000      |
|    time_elapsed       | 18        |
|    total_timesteps    | 995000    |
| train/                |           |
|    entropy_loss       | -35       |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 198999    |
|    policy_loss        | -1.9e-06  |
|    std                | 1.43      |
|    value_loss         | 3.88e-15  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.88e+03 |
| time/                 |           |
|    fps                | 265       |
|    iterations         | 1100      |
|    time_elapsed       | 20        |
|    total_t

# PPO 1D 4in All-symbol

In [8]:
import os
from stable_baselines3 import A2C
from stable_baselines3 import PPO
# A2C, PPO

models_dir = "models/PPO_1D_norm_all"
logdir = "logs"
#tensorboard --logdir D:\Study\IS\code\gym-mtsim-main\gym_mtsim\logs\

if not os.path.exists(models_dir):
    os.makedirs(models_dir)

if not os.path.exists(logdir):
    os.makedirs(logdir)


# env = gym.make('forex-hedge-v0')

env.reset()


model = PPO('MultiInputPolicy', env, verbose=1, tensorboard_log = logdir)

TIMESTEPS = 10000
for i in range(1,101):    
    model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name="PPO_1D_norm_all")
    model.save(f"{models_dir}/{TIMESTEPS*i}")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to logs\PPO_1D_norm_all_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.02e+03 |
|    ep_rew_mean     | -1e+04   |
| time/              |          |
|    fps             | 325      |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -1e+04       |
| time/                   |              |
|    fps                  | 300          |
|    iterations           | 2            |
|    time_elapsed         | 13           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0121445535 |
|    clip_fraction        | 0.13         |
|    clip_range 

Logging to logs\PPO_1D_norm_all_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -6.24e+03 |
| time/              |           |
|    fps             | 317       |
|    iterations      | 1         |
|    time_elapsed    | 6         |
|    total_timesteps | 22528     |
----------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -6.55e+03    |
| time/                   |              |
|    fps                  | 288          |
|    iterations           | 2            |
|    time_elapsed         | 14           |
|    total_timesteps      | 24576        |
| train/                  |              |
|    approx_kl            | 0.0042696404 |
|    clip_fraction        | 0.0458       |
|    clip_range           | 0.2          |
|    entropy_loss         | -28.6        |
|    explained

Logging to logs\PPO_1D_norm_all_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -8.03e+03 |
| time/              |           |
|    fps             | 287       |
|    iterations      | 1         |
|    time_elapsed    | 7         |
|    total_timesteps | 43008     |
----------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -8.12e+03   |
| time/                   |             |
|    fps                  | 238         |
|    iterations           | 2           |
|    time_elapsed         | 17          |
|    total_timesteps      | 45056       |
| train/                  |             |
|    approx_kl            | 0.012749372 |
|    clip_fraction        | 0.108       |
|    clip_range           | 0.2         |
|    entropy_loss         | -28.7       |
|    explained_variance   | 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -8.71e+03   |
| time/                   |             |
|    fps                  | 296         |
|    iterations           | 2           |
|    time_elapsed         | 13          |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.007108765 |
|    clip_fraction        | 0.0478      |
|    clip_range           | 0.2         |
|    entropy_loss         | -28.7       |
|    explained_variance   | 0.0194      |
|    learning_rate        | 0.0003      |
|    loss                 | 1.52e+06    |
|    n_updates            | 310         |
|    policy_gradient_loss | -0.004      |
|    std                  | 1.02        |
|    value_loss           | 1.63e+06    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -7.76e+03    |
| time/                   |              |
|    fps                  | 282          |
|    iterations           | 3            |
|    time_elapsed         | 21           |
|    total_timesteps      | 88064        |
| train/                  |              |
|    approx_kl            | 0.0044788737 |
|    clip_fraction        | 0.0282       |
|    clip_range           | 0.2          |
|    entropy_loss         | -28.7        |
|    explained_variance   | 0.00261      |
|    learning_rate        | 0.0003       |
|    loss                 | 2.98e+06     |
|    n_updates            | 420          |
|    policy_gradient_loss | -0.00776     |
|    std                  | 1.02         |
|    value_loss           | 4.3e+06      |
------------------------------------------
---------------------------------------
| rollout/    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -8.88e+03   |
| time/                   |             |
|    fps                  | 306         |
|    iterations           | 4           |
|    time_elapsed         | 26          |
|    total_timesteps      | 110592      |
| train/                  |             |
|    approx_kl            | 0.011547802 |
|    clip_fraction        | 0.0768      |
|    clip_range           | 0.2         |
|    entropy_loss         | -28.7       |
|    explained_variance   | 0.017       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.25e+06    |
|    n_updates            | 530         |
|    policy_gradient_loss | -0.00285    |
|    std                  | 1.02        |
|    value_loss           | 2.65e+06    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -7.2e+03    |
| time/                   |             |
|    fps                  | 284         |
|    iterations           | 4           |
|    time_elapsed         | 28          |
|    total_timesteps      | 131072      |
| train/                  |             |
|    approx_kl            | 0.024084577 |
|    clip_fraction        | 0.143       |
|    clip_range           | 0.2         |
|    entropy_loss         | -28.6       |
|    explained_variance   | -0.0112     |
|    learning_rate        | 0.0003      |
|    loss                 | 7.45e+05    |
|    n_updates            | 630         |
|    policy_gradient_loss | 0.00931     |
|    std                  | 1.01        |
|    value_loss           | 2.31e+06    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -6.98e+03   |
| time/                   |             |
|    fps                  | 242         |
|    iterations           | 5           |
|    time_elapsed         | 42          |
|    total_timesteps      | 153600      |
| train/                  |             |
|    approx_kl            | 0.011004493 |
|    clip_fraction        | 0.0918      |
|    clip_range           | 0.2         |
|    entropy_loss         | -28.6       |
|    explained_variance   | 0.0554      |
|    learning_rate        | 0.0003      |
|    loss                 | 1.15e+05    |
|    n_updates            | 740         |
|    policy_gradient_loss | -0.00671    |
|    std                  | 1.01        |
|    value_loss           | 9.88e+05    |
-----------------------------------------
Logging to logs\PPO_1D_norm_all_0
----------------------------------
| rollo

Logging to logs\PPO_1D_norm_all_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -6.98e+03 |
| time/              |           |
|    fps             | 309       |
|    iterations      | 1         |
|    time_elapsed    | 6         |
|    total_timesteps | 176128    |
----------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -6.61e+03   |
| time/                   |             |
|    fps                  | 228         |
|    iterations           | 2           |
|    time_elapsed         | 17          |
|    total_timesteps      | 178176      |
| train/                  |             |
|    approx_kl            | 0.015722405 |
|    clip_fraction        | 0.207       |
|    clip_range           | 0.2         |
|    entropy_loss         | -28.6       |
|    explained_variance   | 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -5.98e+03    |
| time/                   |              |
|    fps                  | 314          |
|    iterations           | 2            |
|    time_elapsed         | 13           |
|    total_timesteps      | 198656       |
| train/                  |              |
|    approx_kl            | 0.0016339527 |
|    clip_fraction        | 0.00747      |
|    clip_range           | 0.2          |
|    entropy_loss         | -28.8        |
|    explained_variance   | 0.0405       |
|    learning_rate        | 0.0003       |
|    loss                 | 1.49e+06     |
|    n_updates            | 960          |
|    policy_gradient_loss | -0.00258     |
|    std                  | 1.02         |
|    value_loss           | 1.51e+06     |
------------------------------------------
-----------------------------------------
| rollout/  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -5.19e+03   |
| time/                   |             |
|    fps                  | 302         |
|    iterations           | 3           |
|    time_elapsed         | 20          |
|    total_timesteps      | 221184      |
| train/                  |             |
|    approx_kl            | 0.017211089 |
|    clip_fraction        | 0.159       |
|    clip_range           | 0.2         |
|    entropy_loss         | -28.8       |
|    explained_variance   | 0.115       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.25e+05    |
|    n_updates            | 1070        |
|    policy_gradient_loss | 0.00209     |
|    std                  | 1.03        |
|    value_loss           | 5.6e+05     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.02e+03   |
|    ep_rew_mean          | -6.54e+03  |
| time/                   |            |
|    fps                  | 310        |
|    iterations           | 4          |
|    time_elapsed         | 26         |
|    total_timesteps      | 243712     |
| train/                  |            |
|    approx_kl            | 0.05750744 |
|    clip_fraction        | 0.256      |
|    clip_range           | 0.2        |
|    entropy_loss         | -29        |
|    explained_variance   | 0.0694     |
|    learning_rate        | 0.0003     |
|    loss                 | 1.13e+06   |
|    n_updates            | 1180       |
|    policy_gradient_loss | -0.00108   |
|    std                  | 1.03       |
|    value_loss           | 1.75e+06   |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -6.75e+03    |
| time/                   |              |
|    fps                  | 283          |
|    iterations           | 5            |
|    time_elapsed         | 36           |
|    total_timesteps      | 266240       |
| train/                  |              |
|    approx_kl            | 0.0048482753 |
|    clip_fraction        | 0.0308       |
|    clip_range           | 0.2          |
|    entropy_loss         | -29.1        |
|    explained_variance   | 0.146        |
|    learning_rate        | 0.0003       |
|    loss                 | 2.82e+05     |
|    n_updates            | 1290         |
|    policy_gradient_loss | -0.00697     |
|    std                  | 1.04         |
|    value_loss           | 5.39e+05     |
------------------------------------------
Logging to logs\PPO_1D_norm_all_0
--------------------

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -7.91e+03   |
| time/                   |             |
|    fps                  | 253         |
|    iterations           | 5           |
|    time_elapsed         | 40          |
|    total_timesteps      | 286720      |
| train/                  |             |
|    approx_kl            | 0.019208398 |
|    clip_fraction        | 0.126       |
|    clip_range           | 0.2         |
|    entropy_loss         | -29.2       |
|    explained_variance   | 0.0746      |
|    learning_rate        | 0.0003      |
|    loss                 | 1.14e+06    |
|    n_updates            | 1390        |
|    policy_gradient_loss | -0.00297    |
|    std                  | 1.04        |
|    value_loss           | 1.81e+06    |
-----------------------------------------
Logging to logs\PPO_1D_norm_all_0
----------------------------------
| rollo

Logging to logs\PPO_1D_norm_all_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -9.99e+03 |
| time/              |           |
|    fps             | 374       |
|    iterations      | 1         |
|    time_elapsed    | 5         |
|    total_timesteps | 309248    |
----------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.02e+03   |
|    ep_rew_mean          | -9.99e+03  |
| time/                   |            |
|    fps                  | 292        |
|    iterations           | 2          |
|    time_elapsed         | 14         |
|    total_timesteps      | 311296     |
| train/                  |            |
|    approx_kl            | 0.90552366 |
|    clip_fraction        | 0.76       |
|    clip_range           | 0.2        |
|    entropy_loss         | -29.3      |
|    explained_variance   | 0.141      |
|

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.02e+03 |
|    ep_rew_mean     | -9.6e+03 |
| time/              |          |
|    fps             | 312      |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 329728   |
---------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1.02e+03  |
|    ep_rew_mean          | -9.6e+03  |
| time/                   |           |
|    fps                  | 272       |
|    iterations           | 2         |
|    time_elapsed         | 15        |
|    total_timesteps      | 331776    |
| train/                  |           |
|    approx_kl            | 0.6392556 |
|    clip_fraction        | 0.805     |
|    clip_range           | 0.2       |
|    entropy_loss         | -29.4     |
|    explained_variance   | 0.132     |
|    learning_rate        | 0.0003    |
|    loss           

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -9.6e+03     |
| time/                   |              |
|    fps                  | 273          |
|    iterations           | 2            |
|    time_elapsed         | 14           |
|    total_timesteps      | 352256       |
| train/                  |              |
|    approx_kl            | 0.0015829285 |
|    clip_fraction        | 0.00557      |
|    clip_range           | 0.2          |
|    entropy_loss         | -29.6        |
|    explained_variance   | 0.0311       |
|    learning_rate        | 0.0003       |
|    loss                 | 1.05e+06     |
|    n_updates            | 1710         |
|    policy_gradient_loss | -0.0018      |
|    std                  | 1.06         |
|    value_loss           | 4.17e+06     |
------------------------------------------
-------------------------------------------
| rollout/

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -8.41e+03   |
| time/                   |             |
|    fps                  | 297         |
|    iterations           | 2           |
|    time_elapsed         | 13          |
|    total_timesteps      | 372736      |
| train/                  |             |
|    approx_kl            | 0.015391588 |
|    clip_fraction        | 0.072       |
|    clip_range           | 0.2         |
|    entropy_loss         | -29.5       |
|    explained_variance   | 0.0388      |
|    learning_rate        | 0.0003      |
|    loss                 | 1.62e+06    |
|    n_updates            | 1810        |
|    policy_gradient_loss | 0.00379     |
|    std                  | 1.06        |
|    value_loss           | 2.87e+06    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -8.41e+03   |
| time/                   |             |
|    fps                  | 294         |
|    iterations           | 2           |
|    time_elapsed         | 13          |
|    total_timesteps      | 393216      |
| train/                  |             |
|    approx_kl            | 0.009894969 |
|    clip_fraction        | 0.0337      |
|    clip_range           | 0.2         |
|    entropy_loss         | -29.5       |
|    explained_variance   | 0.0162      |
|    learning_rate        | 0.0003      |
|    loss                 | 3.63e+06    |
|    n_updates            | 1910        |
|    policy_gradient_loss | -0.00697    |
|    std                  | 1.06        |
|    value_loss           | 9.95e+06    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1.02e+03  |
|    ep_rew_mean          | -8.41e+03 |
| time/                   |           |
|    fps                  | 329       |
|    iterations           | 2         |
|    time_elapsed         | 12        |
|    total_timesteps      | 413696    |
| train/                  |           |
|    approx_kl            | 0.4165969 |
|    clip_fraction        | 0.81      |
|    clip_range           | 0.2       |
|    entropy_loss         | -29.5     |
|    explained_variance   | 0.201     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.91e+05  |
|    n_updates            | 2010      |
|    policy_gradient_loss | 0.0427    |
|    std                  | 1.06      |
|    value_loss           | 2.91e+05  |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1.02e+03  |


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -8.81e+03   |
| time/                   |             |
|    fps                  | 272         |
|    iterations           | 3           |
|    time_elapsed         | 22          |
|    total_timesteps      | 436224      |
| train/                  |             |
|    approx_kl            | 0.020597178 |
|    clip_fraction        | 0.108       |
|    clip_range           | 0.2         |
|    entropy_loss         | -29.7       |
|    explained_variance   | 0.0139      |
|    learning_rate        | 0.0003      |
|    loss                 | 6.27e+06    |
|    n_updates            | 2120        |
|    policy_gradient_loss | 0.0134      |
|    std                  | 1.07        |
|    value_loss           | 1.2e+07     |
-----------------------------------------
------------------------------------------
| rollout/                |      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -8.81e+03   |
| time/                   |             |
|    fps                  | 292         |
|    iterations           | 3           |
|    time_elapsed         | 21          |
|    total_timesteps      | 456704      |
| train/                  |             |
|    approx_kl            | 0.005016243 |
|    clip_fraction        | 0.0342      |
|    clip_range           | 0.2         |
|    entropy_loss         | -29.6       |
|    explained_variance   | 0.0491      |
|    learning_rate        | 0.0003      |
|    loss                 | 2.47e+06    |
|    n_updates            | 2220        |
|    policy_gradient_loss | -0.00633    |
|    std                  | 1.07        |
|    value_loss           | 2.88e+06    |
-----------------------------------------
------------------------------------------
| rollout/                |      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -8.88e+03   |
| time/                   |             |
|    fps                  | 248         |
|    iterations           | 4           |
|    time_elapsed         | 33          |
|    total_timesteps      | 479232      |
| train/                  |             |
|    approx_kl            | 0.013897042 |
|    clip_fraction        | 0.118       |
|    clip_range           | 0.2         |
|    entropy_loss         | -29.6       |
|    explained_variance   | 0.0416      |
|    learning_rate        | 0.0003      |
|    loss                 | 2.63e+06    |
|    n_updates            | 2330        |
|    policy_gradient_loss | 0.00253     |
|    std                  | 1.07        |
|    value_loss           | 4.81e+06    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -8.88e+03   |
| time/                   |             |
|    fps                  | 258         |
|    iterations           | 5           |
|    time_elapsed         | 39          |
|    total_timesteps      | 501760      |
| train/                  |             |
|    approx_kl            | 0.016947873 |
|    clip_fraction        | 0.0915      |
|    clip_range           | 0.2         |
|    entropy_loss         | -29.7       |
|    explained_variance   | 0.0177      |
|    learning_rate        | 0.0003      |
|    loss                 | 6.72e+06    |
|    n_updates            | 2440        |
|    policy_gradient_loss | 0.0113      |
|    std                  | 1.07        |
|    value_loss           | 1.15e+07    |
-----------------------------------------
Logging to logs\PPO_1D_norm_all_0
----------------------------------
| rollo

Logging to logs\PPO_1D_norm_all_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -8.15e+03 |
| time/              |           |
|    fps             | 331       |
|    iterations      | 1         |
|    time_elapsed    | 6         |
|    total_timesteps | 524288    |
----------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -8.06e+03   |
| time/                   |             |
|    fps                  | 238         |
|    iterations           | 2           |
|    time_elapsed         | 17          |
|    total_timesteps      | 526336      |
| train/                  |             |
|    approx_kl            | 0.020859517 |
|    clip_fraction        | 0.113       |
|    clip_range           | 0.2         |
|    entropy_loss         | -29.8       |
|    explained_variance   | 

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.02e+03   |
|    ep_rew_mean          | -7.48e+03  |
| time/                   |            |
|    fps                  | 252        |
|    iterations           | 2          |
|    time_elapsed         | 16         |
|    total_timesteps      | 546816     |
| train/                  |            |
|    approx_kl            | 0.02071129 |
|    clip_fraction        | 0.146      |
|    clip_range           | 0.2        |
|    entropy_loss         | -30        |
|    explained_variance   | 0.027      |
|    learning_rate        | 0.0003     |
|    loss                 | 2.62e+06   |
|    n_updates            | 2660       |
|    policy_gradient_loss | 0.00635    |
|    std                  | 1.09       |
|    value_loss           | 6.91e+06   |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.02e+03   |
|    ep_rew_mean          | -8.59e+03  |
| time/                   |            |
|    fps                  | 238        |
|    iterations           | 3          |
|    time_elapsed         | 25         |
|    total_timesteps      | 569344     |
| train/                  |            |
|    approx_kl            | 0.00476362 |
|    clip_fraction        | 0.0176     |
|    clip_range           | 0.2        |
|    entropy_loss         | -30        |
|    explained_variance   | 0.0546     |
|    learning_rate        | 0.0003     |
|    loss                 | 4.27e+06   |
|    n_updates            | 2770       |
|    policy_gradient_loss | -0.00454   |
|    std                  | 1.09       |
|    value_loss           | 6.65e+06   |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -7.83e+03   |
| time/                   |             |
|    fps                  | 261         |
|    iterations           | 4           |
|    time_elapsed         | 31          |
|    total_timesteps      | 591872      |
| train/                  |             |
|    approx_kl            | 0.013131556 |
|    clip_fraction        | 0.0506      |
|    clip_range           | 0.2         |
|    entropy_loss         | -30.3       |
|    explained_variance   | 0.13        |
|    learning_rate        | 0.0003      |
|    loss                 | 3.21e+06    |
|    n_updates            | 2880        |
|    policy_gradient_loss | -0.00309    |
|    std                  | 1.1         |
|    value_loss           | 3.24e+06    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -7.4e+03    |
| time/                   |             |
|    fps                  | 247         |
|    iterations           | 4           |
|    time_elapsed         | 33          |
|    total_timesteps      | 612352      |
| train/                  |             |
|    approx_kl            | 0.006647465 |
|    clip_fraction        | 0.0278      |
|    clip_range           | 0.2         |
|    entropy_loss         | -30.3       |
|    explained_variance   | 0.0697      |
|    learning_rate        | 0.0003      |
|    loss                 | 6.94e+05    |
|    n_updates            | 2980        |
|    policy_gradient_loss | -0.00431    |
|    std                  | 1.1         |
|    value_loss           | 2.79e+06    |
-----------------------------------------
------------------------------------------
| rollout/                |      

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.02e+03   |
|    ep_rew_mean          | -7.04e+03  |
| time/                   |            |
|    fps                  | 279        |
|    iterations           | 5          |
|    time_elapsed         | 36         |
|    total_timesteps      | 634880     |
| train/                  |            |
|    approx_kl            | 0.06675322 |
|    clip_fraction        | 0.765      |
|    clip_range           | 0.2        |
|    entropy_loss         | -30.5      |
|    explained_variance   | 0.248      |
|    learning_rate        | 0.0003     |
|    loss                 | 2.51e+03   |
|    n_updates            | 3090       |
|    policy_gradient_loss | 0.0327     |
|    std                  | 1.12       |
|    value_loss           | 1.72e+05   |
----------------------------------------
Logging to logs\PPO_1D_norm_all_0
----------------------------------
| rollout/           |       

Logging to logs\PPO_1D_norm_all_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -6.27e+03 |
| time/              |           |
|    fps             | 449       |
|    iterations      | 1         |
|    time_elapsed    | 4         |
|    total_timesteps | 657408    |
----------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.02e+03   |
|    ep_rew_mean          | -6.27e+03  |
| time/                   |            |
|    fps                  | 350        |
|    iterations           | 2          |
|    time_elapsed         | 11         |
|    total_timesteps      | 659456     |
| train/                  |            |
|    approx_kl            | 0.41292053 |
|    clip_fraction        | 0.768      |
|    clip_range           | 0.2        |
|    entropy_loss         | -30.9      |
|    explained_variance   | 0.145      |
|

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.02e+03   |
|    ep_rew_mean          | -6.27e+03  |
| time/                   |            |
|    fps                  | 310        |
|    iterations           | 2          |
|    time_elapsed         | 13         |
|    total_timesteps      | 679936     |
| train/                  |            |
|    approx_kl            | 0.01534997 |
|    clip_fraction        | 0.081      |
|    clip_range           | 0.2        |
|    entropy_loss         | -31        |
|    explained_variance   | 0.097      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.11e+06   |
|    n_updates            | 3310       |
|    policy_gradient_loss | 0.0149     |
|    std                  | 1.14       |
|    value_loss           | 3.02e+06   |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.02e+03      |
|    ep_rew_mean          | -7.28e+03     |
| time/                   |               |
|    fps                  | 290           |
|    iterations           | 3             |
|    time_elapsed         | 21            |
|    total_timesteps      | 702464        |
| train/                  |               |
|    approx_kl            | 0.00087502506 |
|    clip_fraction        | 0.00518       |
|    clip_range           | 0.2           |
|    entropy_loss         | -31.1         |
|    explained_variance   | 0.0186        |
|    learning_rate        | 0.0003        |
|    loss                 | 4.5e+06       |
|    n_updates            | 3420          |
|    policy_gradient_loss | -0.000336     |
|    std                  | 1.15          |
|    value_loss           | 1.81e+07      |
-------------------------------------------
--------------------------------

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.02e+03      |
|    ep_rew_mean          | -7.46e+03     |
| time/                   |               |
|    fps                  | 286           |
|    iterations           | 3             |
|    time_elapsed         | 21            |
|    total_timesteps      | 722944        |
| train/                  |               |
|    approx_kl            | 0.00050295645 |
|    clip_fraction        | 0.00415       |
|    clip_range           | 0.2           |
|    entropy_loss         | -31.1         |
|    explained_variance   | 0.043         |
|    learning_rate        | 0.0003        |
|    loss                 | 7.29e+06      |
|    n_updates            | 3520          |
|    policy_gradient_loss | 2.83e-05      |
|    std                  | 1.15          |
|    value_loss           | 1.24e+07      |
-------------------------------------------
--------------------------------

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -9.44e+03   |
| time/                   |             |
|    fps                  | 276         |
|    iterations           | 3           |
|    time_elapsed         | 22          |
|    total_timesteps      | 743424      |
| train/                  |             |
|    approx_kl            | 0.007664084 |
|    clip_fraction        | 0.0494      |
|    clip_range           | 0.2         |
|    entropy_loss         | -31.2       |
|    explained_variance   | 0.226       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.73e+05    |
|    n_updates            | 3620        |
|    policy_gradient_loss | -0.00571    |
|    std                  | 1.16        |
|    value_loss           | 3.44e+05    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -8.68e+03   |
| time/                   |             |
|    fps                  | 299         |
|    iterations           | 3           |
|    time_elapsed         | 20          |
|    total_timesteps      | 763904      |
| train/                  |             |
|    approx_kl            | 0.019793015 |
|    clip_fraction        | 0.276       |
|    clip_range           | 0.2         |
|    entropy_loss         | -31.3       |
|    explained_variance   | 0.151       |
|    learning_rate        | 0.0003      |
|    loss                 | 9.28e+04    |
|    n_updates            | 3720        |
|    policy_gradient_loss | -0.000311   |
|    std                  | 1.16        |
|    value_loss           | 7.93e+05    |
-----------------------------------------
------------------------------------------
| rollout/                |      

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.02e+03   |
|    ep_rew_mean          | -8.68e+03  |
| time/                   |            |
|    fps                  | 305        |
|    iterations           | 3          |
|    time_elapsed         | 20         |
|    total_timesteps      | 784384     |
| train/                  |            |
|    approx_kl            | 0.08442732 |
|    clip_fraction        | 0.548      |
|    clip_range           | 0.2        |
|    entropy_loss         | -31.3      |
|    explained_variance   | 0.265      |
|    learning_rate        | 0.0003     |
|    loss                 | 5.14e+04   |
|    n_updates            | 3820       |
|    policy_gradient_loss | 0.0183     |
|    std                  | 1.16       |
|    value_loss           | 2.91e+05   |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.02e+03   |
|    ep_rew_mean          | -8.68e+03  |
| time/                   |            |
|    fps                  | 267        |
|    iterations           | 4          |
|    time_elapsed         | 30         |
|    total_timesteps      | 806912     |
| train/                  |            |
|    approx_kl            | 0.04417538 |
|    clip_fraction        | 0.242      |
|    clip_range           | 0.2        |
|    entropy_loss         | -31.4      |
|    explained_variance   | 0.12       |
|    learning_rate        | 0.0003     |
|    loss                 | 1.03e+06   |
|    n_updates            | 3930       |
|    policy_gradient_loss | 0.000624   |
|    std                  | 1.17       |
|    value_loss           | 1.11e+06   |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -7.85e+03   |
| time/                   |             |
|    fps                  | 238         |
|    iterations           | 5           |
|    time_elapsed         | 42          |
|    total_timesteps      | 829440      |
| train/                  |             |
|    approx_kl            | 0.003380769 |
|    clip_fraction        | 0.0237      |
|    clip_range           | 0.2         |
|    entropy_loss         | -31.5       |
|    explained_variance   | 0.013       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.84e+07    |
|    n_updates            | 4040        |
|    policy_gradient_loss | 0.00348     |
|    std                  | 1.17        |
|    value_loss           | 4.58e+07    |
-----------------------------------------
Logging to logs\PPO_1D_norm_all_0
----------------------------------
| rollo

Logging to logs\PPO_1D_norm_all_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -7.26e+03 |
| time/              |           |
|    fps             | 369       |
|    iterations      | 1         |
|    time_elapsed    | 5         |
|    total_timesteps | 851968    |
----------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -7.26e+03    |
| time/                   |              |
|    fps                  | 304          |
|    iterations           | 2            |
|    time_elapsed         | 13           |
|    total_timesteps      | 854016       |
| train/                  |              |
|    approx_kl            | 0.0014987643 |
|    clip_fraction        | 0.00752      |
|    clip_range           | 0.2          |
|    entropy_loss         | -31.5        |
|    explained

Logging to logs\PPO_1D_norm_all_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -5.92e+03 |
| time/              |           |
|    fps             | 337       |
|    iterations      | 1         |
|    time_elapsed    | 6         |
|    total_timesteps | 872448    |
----------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -5.92e+03    |
| time/                   |              |
|    fps                  | 291          |
|    iterations           | 2            |
|    time_elapsed         | 14           |
|    total_timesteps      | 874496       |
| train/                  |              |
|    approx_kl            | 0.0016315208 |
|    clip_fraction        | 0.0061       |
|    clip_range           | 0.2          |
|    entropy_loss         | -31.7        |
|    explained

Logging to logs\PPO_1D_norm_all_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -5.29e+03 |
| time/              |           |
|    fps             | 284       |
|    iterations      | 1         |
|    time_elapsed    | 7         |
|    total_timesteps | 892928    |
----------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.02e+03   |
|    ep_rew_mean          | -5.29e+03  |
| time/                   |            |
|    fps                  | 289        |
|    iterations           | 2          |
|    time_elapsed         | 14         |
|    total_timesteps      | 894976     |
| train/                  |            |
|    approx_kl            | 0.01839026 |
|    clip_fraction        | 0.179      |
|    clip_range           | 0.2        |
|    entropy_loss         | -31.7      |
|    explained_variance   | 0.0088     |
|

Logging to logs\PPO_1D_norm_all_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -4.47e+03 |
| time/              |           |
|    fps             | 387       |
|    iterations      | 1         |
|    time_elapsed    | 5         |
|    total_timesteps | 913408    |
----------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -4.47e+03   |
| time/                   |             |
|    fps                  | 335         |
|    iterations           | 2           |
|    time_elapsed         | 12          |
|    total_timesteps      | 915456      |
| train/                  |             |
|    approx_kl            | 0.004853447 |
|    clip_fraction        | 0.0186      |
|    clip_range           | 0.2         |
|    entropy_loss         | -31.7       |
|    explained_variance   | 

Logging to logs\PPO_1D_norm_all_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -5.13e+03 |
| time/              |           |
|    fps             | 393       |
|    iterations      | 1         |
|    time_elapsed    | 5         |
|    total_timesteps | 933888    |
----------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -5.13e+03   |
| time/                   |             |
|    fps                  | 306         |
|    iterations           | 2           |
|    time_elapsed         | 13          |
|    total_timesteps      | 935936      |
| train/                  |             |
|    approx_kl            | 0.023036215 |
|    clip_fraction        | 0.103       |
|    clip_range           | 0.2         |
|    entropy_loss         | -31.7       |
|    explained_variance   | 

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.02e+03   |
|    ep_rew_mean          | -6.04e+03  |
| time/                   |            |
|    fps                  | 286        |
|    iterations           | 2          |
|    time_elapsed         | 14         |
|    total_timesteps      | 956416     |
| train/                  |            |
|    approx_kl            | 0.00638838 |
|    clip_fraction        | 0.0117     |
|    clip_range           | 0.2        |
|    entropy_loss         | -31.9      |
|    explained_variance   | 0.0361     |
|    learning_rate        | 0.0003     |
|    loss                 | 8.96e+06   |
|    n_updates            | 4660       |
|    policy_gradient_loss | -0.00041   |
|    std                  | 1.2        |
|    value_loss           | 1.08e+07   |
----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -6.22e+03    |
| time/                   |              |
|    fps                  | 251          |
|    iterations           | 3            |
|    time_elapsed         | 24           |
|    total_timesteps      | 978944       |
| train/                  |              |
|    approx_kl            | 0.0043977583 |
|    clip_fraction        | 0.0296       |
|    clip_range           | 0.2          |
|    entropy_loss         | -31.9        |
|    explained_variance   | 0.106        |
|    learning_rate        | 0.0003       |
|    loss                 | 1.7e+06      |
|    n_updates            | 4770         |
|    policy_gradient_loss | 0.00349      |
|    std                  | 1.2          |
|    value_loss           | 3.5e+06      |
------------------------------------------
------------------------------------------
| rollout/ 

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.02e+03   |
|    ep_rew_mean          | -5.37e+03  |
| time/                   |            |
|    fps                  | 301        |
|    iterations           | 3          |
|    time_elapsed         | 20         |
|    total_timesteps      | 999424     |
| train/                  |            |
|    approx_kl            | 0.67851025 |
|    clip_fraction        | 0.805      |
|    clip_range           | 0.2        |
|    entropy_loss         | -31.9      |
|    explained_variance   | 0.167      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.83e+05   |
|    n_updates            | 4870       |
|    policy_gradient_loss | 0.0411     |
|    std                  | 1.2        |
|    value_loss           | 1.45e+06   |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.02e+03   |
|    ep_rew_mean          | -6.2e+03   |
| time/                   |            |
|    fps                  | 280        |
|    iterations           | 4          |
|    time_elapsed         | 29         |
|    total_timesteps      | 1021952    |
| train/                  |            |
|    approx_kl            | 0.24740998 |
|    clip_fraction        | 0.116      |
|    clip_range           | 0.2        |
|    entropy_loss         | -32.1      |
|    explained_variance   | 0.105      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.23e+06   |
|    n_updates            | 4980       |
|    policy_gradient_loss | 0.0135     |
|    std                  | 1.21       |
|    value_loss           | 4.18e+06   |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

## Change indi

In [7]:
class MtEnv(gym.Env):

    metadata = {'render.modes': ['human', 'simple_figure', 'advanced_figure']}

    def __init__(
            self, original_simulator: MtSimulator, trading_symbols: List[str],
            window_size: int, time_points: Optional[List[datetime]]=None,
            hold_threshold: float=0.5, close_threshold: float=0.5,
            fee: Union[float, Callable[[str], float]]=0.0005,
            symbol_max_orders: int=1, multiprocessing_processes: Optional[int]=None
        ) -> None:

        # validations
        # assert is for debugging (true return nothing, false return as ______)
        assert len(original_simulator.symbols_data) > 0, "no data available"
        assert len(original_simulator.symbols_info) > 0, "no data available"
        assert len(trading_symbols) > 0, "no trading symbols provided"
        assert 0. <= hold_threshold <= 1., "'hold_threshold' must be in range [0., 1.]"

        if not original_simulator.hedge:
            symbol_max_orders = 1

        for symbol in trading_symbols:
            assert symbol in original_simulator.symbols_info, f"symbol '{symbol}' not found"
            currency_profit = original_simulator.symbols_info[symbol].currency_profit
            assert original_simulator._get_unit_symbol_info(currency_profit) is not None, \
                   f"unit symbol for '{currency_profit}' not found"

        if time_points is None:
            time_points = original_simulator.symbols_data[trading_symbols[0]].index.to_pydatetime().tolist()
        assert len(time_points) > window_size, "not enough time points provided"

        # attributes
        self.seed()
        self.original_simulator = original_simulator
        self.trading_symbols = trading_symbols
        # The number of time points (current and previous points) as the length of each observation's features.
        self.window_size = window_size
        # Time move (index)
        self.time_points = time_points
        self.hold_threshold = hold_threshold
        self.close_threshold = close_threshold
        self.fee = fee
        self.symbol_max_orders = symbol_max_orders
        # max number of process for parallel processing
        self.multiprocessing_pool = Pool(multiprocessing_processes) if multiprocessing_processes else None
        
        ######################### ADD self.indicator #########################
        self.indicator = self._get_indicator()
        self.prices = self._get_prices()
        self.signal_features = self._process_data()
        self.features_shape = (window_size, self.signal_features.shape[1])

        # spaces
        self.action_space = spaces.Box(
            low=-np.inf, high=np.inf,
            shape=(len(self.trading_symbols) * (self.symbol_max_orders + 2),)
        )  # symbol -> [close_order_i(logic), hold(logic), volume]

        self.observation_space = spaces.Dict({
            'balance': spaces.Box(low=0, high=np.inf, shape=(1,)),
            'equity': spaces.Box(low=0 , high=np.inf, shape=(1,)),
            'margin': spaces.Box(low=-np.inf, high=np.inf, shape=(1,)),
            'features': spaces.Box(low=0, high=np.inf, shape=self.features_shape),
            'orders': spaces.Box(
                low=-np.inf, high=np.inf,
                shape=(len(self.trading_symbols), self.symbol_max_orders, 3)
            )  # symbol, order_i -> [entry_price, volume, profit]
        })

        # episode
        self._start_tick = self.window_size - 1
        self._end_tick = len(self.time_points) - 1
        self._done: bool = NotImplemented
        self._current_tick: int = NotImplemented
        self.simulator: MtSimulator = NotImplemented
        self.history: List[Dict[str, Any]] = NotImplemented


    def seed(self, seed: Optional[int]=None) -> List[int]:
        self.np_random, seed = seeding.np_random(seed)
        return [seed]


    def reset(self) -> Dict[str, np.ndarray]:
        self._done = False
        self._current_tick = self._start_tick
        self.simulator = copy.deepcopy(self.original_simulator)
        self.simulator.current_time = self.time_points[self._current_tick]
        self.history = [self._create_info()]
        return self._get_observation()


    def step(self, action: np.ndarray) -> Tuple[Dict[str, np.ndarray], float, bool, Dict[str, Any]]:
        orders_info, closed_orders_info = self._apply_action(action)

        self._current_tick += 1
        if self._current_tick == self._end_tick:
            self._done = True

        dt = self.time_points[self._current_tick] - self.time_points[self._current_tick - 1]
        self.simulator.tick(dt)

        step_reward = self._calculate_reward()

        info = self._create_info(
            orders=orders_info, closed_orders=closed_orders_info, step_reward=step_reward
        )
        observation = self._get_observation()
        self.history.append(info)

        return observation, step_reward, self._done, info


    def _apply_action(self, action: np.ndarray) -> Tuple[Dict, Dict]:
        orders_info = {}
        closed_orders_info = {symbol: [] for symbol in self.trading_symbols}

        k = self.symbol_max_orders + 2

        for i, symbol in enumerate(self.trading_symbols):
            symbol_action = action[k*i:k*(i+1)]
            close_orders_logit = symbol_action[:-2]
            hold_logit = symbol_action[-2]
            volume = symbol_action[-1]

            # The expit function, also known as the logistic sigmoid function
            close_orders_probability = expit(close_orders_logit)
            hold_probability = expit(hold_logit)
            hold = bool(hold_probability > self.hold_threshold)
            modified_volume = self._get_modified_volume(symbol, volume)

            symbol_orders = self.simulator.symbol_orders(symbol)
            orders_to_close_index = np.where(
                close_orders_probability[:len(symbol_orders)] > self.close_threshold
            )[0]
            orders_to_close = np.array(symbol_orders)[orders_to_close_index]

            for j, order in enumerate(orders_to_close):
                self.simulator.close_order(order)
                closed_orders_info[symbol].append(dict(
                    order_id=order.id, symbol=order.symbol, order_type=order.type,
                    volume=order.volume, fee=order.fee,
                    margin=order.margin, profit=order.profit,
                    close_probability=close_orders_probability[orders_to_close_index][j],
                ))

            orders_capacity = self.symbol_max_orders - (len(symbol_orders) - len(orders_to_close))
            orders_info[symbol] = dict(
                order_id=None, symbol=symbol, hold_probability=hold_probability,
                hold=hold, volume=volume, capacity=orders_capacity, order_type=None,
                modified_volume=modified_volume, fee=float('nan'), margin=float('nan'),
                error='',
            )

            if self.simulator.hedge and orders_capacity == 0:
                orders_info[symbol].update(dict(
                    error="cannot add more orders"
                ))
            elif not hold:
                order_type = OrderType.Buy if volume > 0. else OrderType.Sell
                fee = self.fee if type(self.fee) is float else self.fee(symbol)

                try:
                    order = self.simulator.create_order(order_type, symbol, modified_volume, fee)
                    new_info = dict(
                        order_id=order.id, order_type=order_type,
                        fee=fee, margin=order.margin,
                    )
                except ValueError as e:
                    new_info = dict(error=str(e))

                orders_info[symbol].update(new_info)

        return orders_info, closed_orders_info

    # Change to lower case
    def _get_prices(self, keys: List[str]=['Close', 'Open']) -> Dict[str, np.ndarray]:
        prices = {}

        for symbol in self.trading_symbols:
            get_price_at = lambda time: \
                self.original_simulator.price_at(symbol, time)[keys]

            if self.multiprocessing_pool is None:
                p = list(map(get_price_at, self.time_points))
            else:
                p = self.multiprocessing_pool.map(get_price_at, self.time_points)

            prices[symbol] = np.array(p)

        return prices
    
        ######################### ADD self.indicator #########################
        # columns=['Time', 'Open', 'High', 'Low', 'Close', 'Volume', '_', '_']
        # lowercase: ["open", "high", "low", "close"]
        
    def _get_indicator(self, keys: List[str]=['Open','Close','High', 'Low']) -> Dict[str, np.ndarray]:
        #['Open','High', 'Low', 'Volume','EFI','CFI','ADX','STOCHRSI','KAMA']
        indicator = {}

        for symbol in self.trading_symbols:
            get_price_at = lambda time: \
                self.original_simulator.price_at(symbol, time)[keys]

            if self.multiprocessing_pool is None:
                p = list(map(get_price_at, self.time_points))
            else:
                p = self.multiprocessing_pool.map(get_price_at, self.time_points)

            indicator[symbol] = np.array(p)

        return indicator
    
    


    def _process_data(self) -> np.ndarray:
        # Add own indicator
        
        #data = self.prices
        data = self.indicator
        signal_features = np.column_stack(list(data.values()))
        return signal_features


    def _get_observation(self) -> Dict[str, np.ndarray]:
        features = self.signal_features[(self._current_tick-self.window_size+1):(self._current_tick+1)]

        orders = np.zeros(self.observation_space['orders'].shape)
        for i, symbol in enumerate(self.trading_symbols):
            symbol_orders = self.simulator.symbol_orders(symbol)
            for j, order in enumerate(symbol_orders):
                orders[i, j] = [order.entry_price, order.volume, order.profit]

        observation = {
            'balance': np.array([self.simulator.balance]),
            'equity': np.array([self.simulator.equity]),
            'margin': np.array([self.simulator.margin]),
            'features': features,
            'orders': orders,
        }
        return observation


    def _calculate_reward(self) -> float:
        prev_equity = self.history[-1]['equity']
        current_equity = self.simulator.equity
        step_reward = current_equity - prev_equity
        return step_reward


    def _create_info(self, **kwargs: Any) -> Dict[str, Any]:
        info = {k: v for k, v in kwargs.items()}
        info['balance'] = self.simulator.balance
        info['equity'] = self.simulator.equity
        info['margin'] = self.simulator.margin
        info['free_margin'] = self.simulator.free_margin
        info['margin_level'] = self.simulator.margin_level
        return info


    def _get_modified_volume(self, symbol: str, volume: float) -> float:
        si = self.simulator.symbols_info[symbol]
        v = abs(volume)
        v = np.clip(v, si.volume_min, si.volume_max)
        v = round(v / si.volume_step) * si.volume_step
        return v


    def render(self, mode: str='human', **kwargs: Any) -> Any:
        if mode == 'simple_figure':
            return self._render_simple_figure(**kwargs)
        if mode == 'advanced_figure':
            return self._render_advanced_figure(**kwargs)
        return self.simulator.get_state(**kwargs)


    def _render_simple_figure(
        self, figsize: Tuple[float, float]=(14, 6), return_figure: bool=False
    ) -> Any:
        fig, ax = plt.subplots(figsize=figsize, facecolor='white')

        cmap_colors = np.array(plt_cm.tab10.colors)[[0, 1, 4, 5, 6, 8]]
        cmap = plt_colors.LinearSegmentedColormap.from_list('mtsim', cmap_colors)
        symbol_colors = cmap(np.linspace(0, 1, len(self.trading_symbols)))

        for j, symbol in enumerate(self.trading_symbols):
            close_price = self.prices[symbol][:, 0]
            symbol_color = symbol_colors[j]

            ax.plot(self.time_points, close_price, c=symbol_color, marker='.', label=symbol)

            buy_ticks = []
            buy_error_ticks = []
            sell_ticks = []
            sell_error_ticks = []
            close_ticks = []

            for i in range(1, len(self.history)):
                tick = self._start_tick + i - 1

                order = self.history[i]['orders'].get(symbol, {})
                if order and not order['hold']:
                    if order['order_type'] == OrderType.Buy:
                        if order['error']:
                            buy_error_ticks.append(tick)
                        else:
                            buy_ticks.append(tick)
                    else:
                        if order['error']:
                            sell_error_ticks.append(tick)
                        else:
                            sell_ticks.append(tick)

                closed_orders = self.history[i]['closed_orders'].get(symbol, [])
                if len(closed_orders) > 0:
                    close_ticks.append(tick)

            tp = np.array(self.time_points)
            ax.plot(tp[buy_ticks], close_price[buy_ticks], '^', color='green')
            ax.plot(tp[buy_error_ticks], close_price[buy_error_ticks], '^', color='gray')
            ax.plot(tp[sell_ticks], close_price[sell_ticks], 'v', color='red')
            ax.plot(tp[sell_error_ticks], close_price[sell_error_ticks], 'v', color='gray')
            ax.plot(tp[close_ticks], close_price[close_ticks], '|', color='black')

            ax.tick_params(axis='y', labelcolor=symbol_color)
            ax.yaxis.tick_left()
            if j < len(self.trading_symbols) - 1:
                ax = ax.twinx()

        fig.suptitle(
            f"Balance: {self.simulator.balance:.6f} {self.simulator.unit} ~ "
            f"Equity: {self.simulator.equity:.6f} ~ "
            f"Margin: {self.simulator.margin:.6f} ~ "
            f"Free Margin: {self.simulator.free_margin:.6f} ~ "
            f"Margin Level: {self.simulator.margin_level:.6f}"
        )
        fig.legend(loc='right')

        if return_figure:
            return fig

        plt.show()


    def _render_advanced_figure(
            self, figsize: Tuple[float, float]=(1400, 600), time_format: str="%Y-%m-%d %H:%m",
            return_figure: bool=False
        ) -> Any:

        fig = go.Figure()

        cmap_colors = np.array(plt_cm.tab10.colors)[[0, 1, 4, 5, 6, 8]]
        cmap = plt_colors.LinearSegmentedColormap.from_list('mtsim', cmap_colors)
        symbol_colors = cmap(np.linspace(0, 1, len(self.trading_symbols)))
        get_color_string = lambda color: "rgba(%s, %s, %s, %s)" % tuple(color)

        extra_info = [
            f"balance: {h['balance']:.6f} {self.simulator.unit}<br>"
            f"equity: {h['equity']:.6f}<br>"
            f"margin: {h['margin']:.6f}<br>"
            f"free margin: {h['free_margin']:.6f}<br>"
            f"margin level: {h['margin_level']:.6f}"
            for h in self.history
        ]
        extra_info = [extra_info[0]] * (self.window_size - 1) + extra_info

        for j, symbol in enumerate(self.trading_symbols):
            close_price = self.prices[symbol][:, 0]
            symbol_color = symbol_colors[j]

            fig.add_trace(
                go.Scatter(
                    x=self.time_points,
                    y=close_price,
                    mode='lines+markers',
                    line_color=get_color_string(symbol_color),
                    opacity=1.0,
                    hovertext=extra_info,
                    name=symbol,
                    yaxis=f'y{j+1}',
                    legendgroup=f'g{j+1}',
                ),
            )

            fig.update_layout(**{
                f'yaxis{j+1}': dict(
                    tickfont=dict(color=get_color_string(symbol_color * [1, 1, 1, 0.8])),
                    overlaying='y' if j > 0 else None,
                    # position=0.035*j
                ),
            })

            trade_ticks = []
            trade_markers = []
            trade_colors = []
            trade_sizes = []
            trade_extra_info = []
            trade_max_volume = max([
                h.get('orders', {}).get(symbol, {}).get('modified_volume') or 0
                for h in self.history
            ])
            close_ticks = []
            close_extra_info = []

            for i in range(1, len(self.history)):
                tick = self._start_tick + i - 1

                order = self.history[i]['orders'].get(symbol)
                if order and not order['hold']:
                    marker = None
                    color = None
                    size = 8 + 22 * (order['modified_volume'] / trade_max_volume)
                    info = (
                        f"order id: {order['order_id'] or ''}<br>"
                        f"hold probability: {order['hold_probability']:.4f}<br>"
                        f"hold: {order['hold']}<br>"
                        f"volume: {order['volume']:.6f}<br>"
                        f"modified volume: {order['modified_volume']:.4f}<br>"
                        f"fee: {order['fee']:.6f}<br>"
                        f"margin: {order['margin']:.6f}<br>"
                        f"error: {order['error']}"
                    )

                    if order['order_type'] == OrderType.Buy:
                        marker = 'triangle-up'
                        color = 'gray' if order['error'] else 'green'
                    else:
                        marker = 'triangle-down'
                        color = 'gray' if order['error'] else 'red'

                    trade_ticks.append(tick)
                    trade_markers.append(marker)
                    trade_colors.append(color)
                    trade_sizes.append(size)
                    trade_extra_info.append(info)

                closed_orders = self.history[i]['closed_orders'].get(symbol, [])
                if len(closed_orders) > 0:
                    info = []
                    for order in closed_orders:
                        info_i = (
                            f"order id: {order['order_id']}<br>"
                            f"order type: {order['order_type'].name}<br>"
                            f"close probability: {order['close_probability']:.4f}<br>"
                            f"margin: {order['margin']:.6f}<br>"
                            f"profit: {order['profit']:.6f}"
                        )
                        info.append(info_i)
                    info = '<br>---------------------------------<br>'.join(info)

                    close_ticks.append(tick)
                    close_extra_info.append(info)

            fig.add_trace(
                go.Scatter(
                    x=np.array(self.time_points)[trade_ticks],
                    y=close_price[trade_ticks],
                    mode='markers',
                    hovertext=trade_extra_info,
                    marker_symbol=trade_markers,
                    marker_color=trade_colors,
                    marker_size=trade_sizes,
                    name=symbol,
                    yaxis=f'y{j+1}',
                    showlegend=False,
                    legendgroup=f'g{j+1}',
                ),
            )

            fig.add_trace(
                go.Scatter(
                    x=np.array(self.time_points)[close_ticks],
                    y=close_price[close_ticks],
                    mode='markers',
                    hovertext=close_extra_info,
                    marker_symbol='line-ns',
                    marker_color='black',
                    marker_size=7,
                    marker_line_width=1.5,
                    name=symbol,
                    yaxis=f'y{j+1}',
                    showlegend=False,
                    legendgroup=f'g{j+1}',
                ),
            )

        title = (
            f"Balance: {self.simulator.balance:.6f} {self.simulator.unit} ~ "
            f"Equity: {self.simulator.equity:.6f} ~ "
            f"Margin: {self.simulator.margin:.6f} ~ "
            f"Free Margin: {self.simulator.free_margin:.6f} ~ "
            f"Margin Level: {self.simulator.margin_level:.6f}"
        )
        fig.update_layout(
            title=title,
            xaxis_tickformat=time_format,
            width=figsize[0],
            height=figsize[1],
        )

        if return_figure:
            return fig

        fig.show()


    def close(self) -> None:
        plt.close()

# A2C 1D Allin All-symbol

In [8]:
import os
from stable_baselines3 import A2C
from stable_baselines3 import PPO
# A2C, PPO

models_dir = "models/A2C_1D_noIndi_all"
logdir = "logs"
#tensorboard --logdir D:\Study\IS\code\gym-mtsim-main\gym_mtsim\logs\

if not os.path.exists(models_dir):
    os.makedirs(models_dir)

if not os.path.exists(logdir):
    os.makedirs(logdir)


# env = gym.make('forex-hedge-v0')

env.reset()


model = A2C('MultiInputPolicy', env, verbose=1, tensorboard_log = logdir)

TIMESTEPS = 10000
for i in range(1,101):    
    model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name="A2C_1D_noIndi_all")
    model.save(f"{models_dir}/{TIMESTEPS*i}")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Logging to logs\A2C_1D_noIndi_all_0
------------------------------------
| time/                 |          |
|    fps                | 145      |
|    iterations         | 100      |
|    time_elapsed       | 3        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -28.4    |
|    explained_variance | -12.6    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 0.0876   |
|    std                | 1        |
|    value_loss         | 1.63e-05 |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 186      |
|    iterations         | 200      |
|    time_elapsed       | 5        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -28.4    |
|    explained_variance | -0.17    |
|    learning_rate      | 0.0007   |
|    n_updates          | 199      |
| 

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 223      |
|    iterations         | 1400     |
|    time_elapsed       | 31       |
|    total_timesteps    | 7000     |
| train/                |          |
|    entropy_loss       | -28.2    |
|    explained_variance | -4.75    |
|    learning_rate      | 0.0007   |
|    n_updates          | 1399     |
|    policy_loss        | 0.00749  |
|    std                | 0.993    |
|    value_loss         | 2.15e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 225      |
|    iterations         | 1500     |
|    time_elapsed       | 33       |
|    total_timesteps    | 7500     |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 265      |
|    iterations         | 700      |
|    time_elapsed       | 13       |
|    total_timesteps    | 13500    |
| train/                |          |
|    entropy_loss       | -28.3    |
|    explained_variance | -0.068   |
|    learning_rate      | 0.0007   |
|    n_updates          | 2699     |
|    policy_loss        | -0.054   |
|    std                | 0.996    |
|    value_loss         | 5.08e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 268      |
|    iterations         | 800      |
|    time_elapsed       | 14       |
|    total_timesteps    | 14000    |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 258      |
|    iterations         | 2000     |
|    time_elapsed       | 38       |
|    total_timesteps    | 20000    |
| train/                |          |
|    entropy_loss       | -28.1    |
|    explained_variance | -1.9     |
|    learning_rate      | 0.0007   |
|    n_updates          | 3999     |
|    policy_loss        | -0.00788 |
|    std                | 0.984    |
|    value_loss         | 1.32e-07 |
------------------------------------
Logging to logs\A2C_1D_noIndi_all_0
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 272      |
|    iterations         | 100      |
|    time_elapsed       | 1        |
| 

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 271      |
|    iterations         | 1300     |
|    time_elapsed       | 23       |
|    total_timesteps    | 26500    |
| train/                |          |
|    entropy_loss       | -28.3    |
|    explained_variance | -0.255   |
|    learning_rate      | 0.0007   |
|    n_updates          | 5299     |
|    policy_loss        | -0.00774 |
|    std                | 0.995    |
|    value_loss         | 9.01e-08 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 270      |
|    iterations         | 1400     |
|    time_elapsed       | 25       |
|    total_timesteps    | 27000    |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 273      |
|    iterations         | 600      |
|    time_elapsed       | 10       |
|    total_timesteps    | 33000    |
| train/                |          |
|    entropy_loss       | -28.4    |
|    explained_variance | -1.35    |
|    learning_rate      | 0.0007   |
|    n_updates          | 6599     |
|    policy_loss        | -0.0226  |
|    std                | 1        |
|    value_loss         | 7.51e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 274      |
|    iterations         | 700      |
|    time_elapsed       | 12       |
|    total_timesteps    | 33500    |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 273      |
|    iterations         | 1900     |
|    time_elapsed       | 34       |
|    total_timesteps    | 39500    |
| train/                |          |
|    entropy_loss       | -28.4    |
|    explained_variance | 0.896    |
|    learning_rate      | 0.0007   |
|    n_updates          | 7899     |
|    policy_loss        | 0.107    |
|    std                | 1        |
|    value_loss         | 1.32e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 271      |
|    iterations         | 2000     |
|    time_elapsed       | 36       |
|    total_timesteps    | 40000    |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -1e+04    |
| time/                 |           |
|    fps                | 256       |
|    iterations         | 1200      |
|    time_elapsed       | 23        |
|    total_timesteps    | 46000     |
| train/                |           |
|    entropy_loss       | -28.4     |
|    explained_variance | -0.00151  |
|    learning_rate      | 0.0007    |
|    n_updates          | 9199      |
|    policy_loss        | -4.58e+04 |
|    std                | 1         |
|    value_loss         | 1.07e+07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 255      |
|    iterations         | 1300     |
|    time_elapsed       | 25       |
|    total_timesteps

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 241      |
|    iterations         | 500      |
|    time_elapsed       | 10       |
|    total_timesteps    | 52500    |
| train/                |          |
|    entropy_loss       | -28.3    |
|    explained_variance | -4.51    |
|    learning_rate      | 0.0007   |
|    n_updates          | 10499    |
|    policy_loss        | -0.0286  |
|    std                | 0.997    |
|    value_loss         | 1.04e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 242      |
|    iterations         | 600      |
|    time_elapsed       | 12       |
|    total_timesteps    | 53000    |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 249      |
|    iterations         | 1800     |
|    time_elapsed       | 36       |
|    total_timesteps    | 59000    |
| train/                |          |
|    entropy_loss       | -28.3    |
|    explained_variance | -2.82    |
|    learning_rate      | 0.0007   |
|    n_updates          | 11799    |
|    policy_loss        | 0.0123   |
|    std                | 0.999    |
|    value_loss         | 2.97e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 249      |
|    iterations         | 1900     |
|    time_elapsed       | 38       |
|    total_timesteps    | 59500    |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 246      |
|    iterations         | 1100     |
|    time_elapsed       | 22       |
|    total_timesteps    | 65500    |
| train/                |          |
|    entropy_loss       | -28.7    |
|    explained_variance | -1.02    |
|    learning_rate      | 0.0007   |
|    n_updates          | 13099    |
|    policy_loss        | 0.0305   |
|    std                | 1.02     |
|    value_loss         | 1.32e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 246      |
|    iterations         | 1200     |
|    time_elapsed       | 24       |
|    total_timesteps    | 66000    |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.79e+03 |
| time/                 |           |
|    fps                | 248       |
|    iterations         | 400       |
|    time_elapsed       | 8         |
|    total_timesteps    | 72000     |
| train/                |           |
|    entropy_loss       | -28.6     |
|    explained_variance | -236      |
|    learning_rate      | 0.0007    |
|    n_updates          | 14399     |
|    policy_loss        | 0.00576   |
|    std                | 1.01      |
|    value_loss         | 5.59e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.79e+03 |
| time/                 |           |
|    fps                | 248       |
|    iterations         | 500       |
|    time_elapsed       | 10        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.79e+03 |
| time/                 |           |
|    fps                | 243       |
|    iterations         | 1700      |
|    time_elapsed       | 34        |
|    total_timesteps    | 78500     |
| train/                |           |
|    entropy_loss       | -28.7     |
|    explained_variance | -0.00637  |
|    learning_rate      | 0.0007    |
|    n_updates          | 15699     |
|    policy_loss        | 0.0403    |
|    std                | 1.02      |
|    value_loss         | 2.02e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.76e+03 |
| time/                 |           |
|    fps                | 242       |
|    iterations         | 1800      |
|    time_elapsed       | 37        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.76e+03 |
| time/                 |           |
|    fps                | 241       |
|    iterations         | 900       |
|    time_elapsed       | 18        |
|    total_timesteps    | 84500     |
| train/                |           |
|    entropy_loss       | -28.8     |
|    explained_variance | -2.85     |
|    learning_rate      | 0.0007    |
|    n_updates          | 16899     |
|    policy_loss        | -0.171    |
|    std                | 1.03      |
|    value_loss         | 4.05e-05  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.76e+03 |
| time/                 |           |
|    fps                | 241       |
|    iterations         | 1000      |
|    time_elapsed       | 20        |
|    total_t

Logging to logs\A2C_1D_noIndi_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.74e+03 |
| time/                 |           |
|    fps                | 248       |
|    iterations         | 100       |
|    time_elapsed       | 2         |
|    total_timesteps    | 90500     |
| train/                |           |
|    entropy_loss       | -28.9     |
|    explained_variance | 0.802     |
|    learning_rate      | 0.0007    |
|    n_updates          | 18099     |
|    policy_loss        | -0.000403 |
|    std                | 1.03      |
|    value_loss         | 2.91e-10  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.74e+03 |
| time/                 |           |
|    fps                | 250       |
|    iterations         | 200       |
|    time_elap

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.74e+03 |
| time/                 |           |
|    fps                | 245       |
|    iterations         | 1300      |
|    time_elapsed       | 26        |
|    total_timesteps    | 96500     |
| train/                |           |
|    entropy_loss       | -29.2     |
|    explained_variance | 0.798     |
|    learning_rate      | 0.0007    |
|    n_updates          | 19299     |
|    policy_loss        | -0.0247   |
|    std                | 1.04      |
|    value_loss         | 7.01e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.74e+03 |
| time/                 |           |
|    fps                | 245       |
|    iterations         | 1400      |
|    time_elapsed       | 28        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.72e+03 |
| time/                 |           |
|    fps                | 244       |
|    iterations         | 500       |
|    time_elapsed       | 10        |
|    total_timesteps    | 102500    |
| train/                |           |
|    entropy_loss       | -29.3     |
|    explained_variance | -5.59e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 20499     |
|    policy_loss        | -7.01e-05 |
|    std                | 1.05      |
|    value_loss         | 5.35e-12  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.72e+03 |
| time/                 |           |
|    fps                | 244       |
|    iterations         | 600       |
|    time_elapsed       | 12        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.67e+03 |
| time/                 |           |
|    fps                | 256       |
|    iterations         | 1700      |
|    time_elapsed       | 33        |
|    total_timesteps    | 108500    |
| train/                |           |
|    entropy_loss       | -29.4     |
|    explained_variance | -52.2     |
|    learning_rate      | 0.0007    |
|    n_updates          | 21699     |
|    policy_loss        | 0.0209    |
|    std                | 1.06      |
|    value_loss         | 8.06e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.67e+03 |
| time/                 |           |
|    fps                | 258       |
|    iterations         | 1800      |
|    time_elapsed       | 34        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.61e+03 |
| time/                 |           |
|    fps                | 273       |
|    iterations         | 900       |
|    time_elapsed       | 16        |
|    total_timesteps    | 114500    |
| train/                |           |
|    entropy_loss       | -29.4     |
|    explained_variance | -0.0129   |
|    learning_rate      | 0.0007    |
|    n_updates          | 22899     |
|    policy_loss        | 0.204     |
|    std                | 1.06      |
|    value_loss         | 6.31e-05  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.61e+03 |
| time/                 |           |
|    fps                | 273       |
|    iterations         | 1000      |
|    time_elapsed       | 18        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.59e+03 |
| time/                 |           |
|    fps                | 258       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 120500    |
| train/                |           |
|    entropy_loss       | -29.5     |
|    explained_variance | -1.86e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 24099     |
|    policy_loss        | 5.48e-06  |
|    std                | 1.06      |
|    value_loss         | 1.89e-13  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.58e+03 |
| time/                 |           |
|    fps                | 253       |
|    iterations         | 200       |
|    time_elapsed       | 3         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.55e+03 |
| time/                 |           |
|    fps                | 248       |
|    iterations         | 1300      |
|    time_elapsed       | 26        |
|    total_timesteps    | 126500    |
| train/                |           |
|    entropy_loss       | -29.7     |
|    explained_variance | 0.226     |
|    learning_rate      | 0.0007    |
|    n_updates          | 25299     |
|    policy_loss        | 3.62e-05  |
|    std                | 1.07      |
|    value_loss         | 3.42e-12  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.55e+03 |
| time/                 |           |
|    fps                | 248       |
|    iterations         | 1400      |
|    time_elapsed       | 28        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.53e+03 |
| time/                 |           |
|    fps                | 262       |
|    iterations         | 500       |
|    time_elapsed       | 9         |
|    total_timesteps    | 132500    |
| train/                |           |
|    entropy_loss       | -29.6     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 26499     |
|    policy_loss        | -8.15e-08 |
|    std                | 1.07      |
|    value_loss         | 8.53e-18  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.53e+03 |
| time/                 |           |
|    fps                | 265       |
|    iterations         | 600       |
|    time_elapsed       | 11        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.46e+03 |
| time/                 |           |
|    fps                | 267       |
|    iterations         | 1800      |
|    time_elapsed       | 33        |
|    total_timesteps    | 139000    |
| train/                |           |
|    entropy_loss       | -29.7     |
|    explained_variance | 0.00879   |
|    learning_rate      | 0.0007    |
|    n_updates          | 27799     |
|    policy_loss        | 2.01e+04  |
|    std                | 1.07      |
|    value_loss         | 3.71e+06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.46e+03 |
| time/                 |           |
|    fps                | 266       |
|    iterations         | 1900      |
|    time_elapsed       | 35        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.46e+03 |
| time/                 |           |
|    fps                | 270       |
|    iterations         | 1000      |
|    time_elapsed       | 18        |
|    total_timesteps    | 145000    |
| train/                |           |
|    entropy_loss       | -29.8     |
|    explained_variance | -38.6     |
|    learning_rate      | 0.0007    |
|    n_updates          | 28999     |
|    policy_loss        | -0.00853  |
|    std                | 1.08      |
|    value_loss         | 1.41e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.46e+03 |
| time/                 |           |
|    fps                | 269       |
|    iterations         | 1100      |
|    time_elapsed       | 20        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.45e+03 |
| time/                 |           |
|    fps                | 264       |
|    iterations         | 200       |
|    time_elapsed       | 3         |
|    total_timesteps    | 151000    |
| train/                |           |
|    entropy_loss       | -29.8     |
|    explained_variance | -8.69     |
|    learning_rate      | 0.0007    |
|    n_updates          | 30199     |
|    policy_loss        | 0.0121    |
|    std                | 1.08      |
|    value_loss         | 2.16e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.45e+03 |
| time/                 |           |
|    fps                | 267       |
|    iterations         | 300       |
|    time_elapsed       | 5         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.45e+03 |
| time/                 |           |
|    fps                | 253       |
|    iterations         | 1400      |
|    time_elapsed       | 27        |
|    total_timesteps    | 157000    |
| train/                |           |
|    entropy_loss       | -29.8     |
|    explained_variance | -0.337    |
|    learning_rate      | 0.0007    |
|    n_updates          | 31399     |
|    policy_loss        | 0.000509  |
|    std                | 1.08      |
|    value_loss         | 7.9e-09   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.45e+03 |
| time/                 |           |
|    fps                | 252       |
|    iterations         | 1500      |
|    time_elapsed       | 29        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.45e+03 |
| time/                 |           |
|    fps                | 239       |
|    iterations         | 600       |
|    time_elapsed       | 12        |
|    total_timesteps    | 163000    |
| train/                |           |
|    entropy_loss       | -29.9     |
|    explained_variance | -456      |
|    learning_rate      | 0.0007    |
|    n_updates          | 32599     |
|    policy_loss        | -0.000224 |
|    std                | 1.09      |
|    value_loss         | 3.01e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.45e+03 |
| time/                 |           |
|    fps                | 238       |
|    iterations         | 700       |
|    time_elapsed       | 14        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.45e+03 |
| time/                 |           |
|    fps                | 237       |
|    iterations         | 1800      |
|    time_elapsed       | 37        |
|    total_timesteps    | 169000    |
| train/                |           |
|    entropy_loss       | -30.1     |
|    explained_variance | -9.48     |
|    learning_rate      | 0.0007    |
|    n_updates          | 33799     |
|    policy_loss        | 0.00116   |
|    std                | 1.09      |
|    value_loss         | 1.94e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.45e+03 |
| time/                 |           |
|    fps                | 237       |
|    iterations         | 1900      |
|    time_elapsed       | 39        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.59e+03 |
| time/                 |           |
|    fps                | 256       |
|    iterations         | 1000      |
|    time_elapsed       | 19        |
|    total_timesteps    | 175000    |
| train/                |           |
|    entropy_loss       | -30.2     |
|    explained_variance | 0.385     |
|    learning_rate      | 0.0007    |
|    n_updates          | 34999     |
|    policy_loss        | 0.000888  |
|    std                | 1.1       |
|    value_loss         | 9.76e-10  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.59e+03 |
| time/                 |           |
|    fps                | 257       |
|    iterations         | 1100      |
|    time_elapsed       | 21        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.63e+03 |
| time/                 |           |
|    fps                | 281       |
|    iterations         | 200       |
|    time_elapsed       | 3         |
|    total_timesteps    | 181000    |
| train/                |           |
|    entropy_loss       | -30.3     |
|    explained_variance | 0.0876    |
|    learning_rate      | 0.0007    |
|    n_updates          | 36199     |
|    policy_loss        | -0.0863   |
|    std                | 1.11      |
|    value_loss         | 8.36e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.63e+03 |
| time/                 |           |
|    fps                | 270       |
|    iterations         | 300       |
|    time_elapsed       | 5         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.63e+03 |
| time/                 |           |
|    fps                | 269       |
|    iterations         | 1400      |
|    time_elapsed       | 25        |
|    total_timesteps    | 187000    |
| train/                |           |
|    entropy_loss       | -30.5     |
|    explained_variance | 0.516     |
|    learning_rate      | 0.0007    |
|    n_updates          | 37399     |
|    policy_loss        | 0.002     |
|    std                | 1.12      |
|    value_loss         | 8.34e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.63e+03 |
| time/                 |           |
|    fps                | 269       |
|    iterations         | 1500      |
|    time_elapsed       | 27        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.66e+03 |
| time/                 |           |
|    fps                | 266       |
|    iterations         | 600       |
|    time_elapsed       | 11        |
|    total_timesteps    | 193000    |
| train/                |           |
|    entropy_loss       | -30.5     |
|    explained_variance | 0.0127    |
|    learning_rate      | 0.0007    |
|    n_updates          | 38599     |
|    policy_loss        | 603       |
|    std                | 1.12      |
|    value_loss         | 485       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.66e+03 |
| time/                 |           |
|    fps                | 267       |
|    iterations         | 700       |
|    time_elapsed       | 13        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.68e+03 |
| time/                 |           |
|    fps                | 272       |
|    iterations         | 1800      |
|    time_elapsed       | 32        |
|    total_timesteps    | 199000    |
| train/                |           |
|    entropy_loss       | -30.5     |
|    explained_variance | 0.0723    |
|    learning_rate      | 0.0007    |
|    n_updates          | 39799     |
|    policy_loss        | -0.0213   |
|    std                | 1.12      |
|    value_loss         | 4.79e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.68e+03 |
| time/                 |           |
|    fps                | 272       |
|    iterations         | 1900      |
|    time_elapsed       | 34        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.71e+03 |
| time/                 |           |
|    fps                | 274       |
|    iterations         | 1100      |
|    time_elapsed       | 20        |
|    total_timesteps    | 205500    |
| train/                |           |
|    entropy_loss       | -30.4     |
|    explained_variance | 0.0762    |
|    learning_rate      | 0.0007    |
|    n_updates          | 41099     |
|    policy_loss        | 0.065     |
|    std                | 1.11      |
|    value_loss         | 5.76e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.71e+03 |
| time/                 |           |
|    fps                | 275       |
|    iterations         | 1200      |
|    time_elapsed       | 21        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.76e+03 |
| time/                 |           |
|    fps                | 244       |
|    iterations         | 300       |
|    time_elapsed       | 6         |
|    total_timesteps    | 211500    |
| train/                |           |
|    entropy_loss       | -30.5     |
|    explained_variance | -3.28     |
|    learning_rate      | 0.0007    |
|    n_updates          | 42299     |
|    policy_loss        | 0.000685  |
|    std                | 1.12      |
|    value_loss         | 1.08e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.76e+03 |
| time/                 |           |
|    fps                | 244       |
|    iterations         | 400       |
|    time_elapsed       | 8         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.82e+03 |
| time/                 |           |
|    fps                | 252       |
|    iterations         | 1600      |
|    time_elapsed       | 31        |
|    total_timesteps    | 218000    |
| train/                |           |
|    entropy_loss       | -30.4     |
|    explained_variance | 0.619     |
|    learning_rate      | 0.0007    |
|    n_updates          | 43599     |
|    policy_loss        | -0.000212 |
|    std                | 1.11      |
|    value_loss         | 8.74e-11  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.82e+03 |
| time/                 |           |
|    fps                | 252       |
|    iterations         | 1700      |
|    time_elapsed       | 33        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.85e+03 |
| time/                 |           |
|    fps                | 219       |
|    iterations         | 800       |
|    time_elapsed       | 18        |
|    total_timesteps    | 224000    |
| train/                |           |
|    entropy_loss       | -30.5     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 44799     |
|    policy_loss        | 0.00353   |
|    std                | 1.12      |
|    value_loss         | 1.62e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.85e+03 |
| time/                 |           |
|    fps                | 221       |
|    iterations         | 900       |
|    time_elapsed       | 20        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.88e+03 |
| time/                 |           |
|    fps                | 232       |
|    iterations         | 2000      |
|    time_elapsed       | 43        |
|    total_timesteps    | 230000    |
| train/                |           |
|    entropy_loss       | -30.6     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 45999     |
|    policy_loss        | -0.000515 |
|    std                | 1.13      |
|    value_loss         | 2.78e-10  |
-------------------------------------
Logging to logs\A2C_1D_noIndi_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.88e+03 |
| time/                 |           |
|    fps                | 227       |
|    iterations         | 100       |
|    time_elap

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.9e+03 |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 1200     |
|    time_elapsed       | 27       |
|    total_timesteps    | 236000   |
| train/                |          |
|    entropy_loss       | -30.6    |
|    explained_variance | -6.96    |
|    learning_rate      | 0.0007   |
|    n_updates          | 47199    |
|    policy_loss        | 0.0192   |
|    std                | 1.12     |
|    value_loss         | 9.66e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.91e+03 |
| time/                 |           |
|    fps                | 220       |
|    iterations         | 1300      |
|    time_elapsed       | 29        |
|    total_timesteps    | 2365

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.96e+03 |
| time/                 |           |
|    fps                | 230       |
|    iterations         | 400       |
|    time_elapsed       | 8         |
|    total_timesteps    | 242000    |
| train/                |           |
|    entropy_loss       | -30.5     |
|    explained_variance | -56.2     |
|    learning_rate      | 0.0007    |
|    n_updates          | 48399     |
|    policy_loss        | 0.00895   |
|    std                | 1.12      |
|    value_loss         | 4.4e-07   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.96e+03 |
| time/                 |           |
|    fps                | 232       |
|    iterations         | 500       |
|    time_elapsed       | 10        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.97e+03 |
| time/                 |           |
|    fps                | 223       |
|    iterations         | 1600      |
|    time_elapsed       | 35        |
|    total_timesteps    | 248000    |
| train/                |           |
|    entropy_loss       | -30.5     |
|    explained_variance | -0.224    |
|    learning_rate      | 0.0007    |
|    n_updates          | 49599     |
|    policy_loss        | 0.0355    |
|    std                | 1.12      |
|    value_loss         | 1.49e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.97e+03 |
| time/                 |           |
|    fps                | 223       |
|    iterations         | 1700      |
|    time_elapsed       | 38        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.97e+03 |
| time/                 |           |
|    fps                | 224       |
|    iterations         | 800       |
|    time_elapsed       | 17        |
|    total_timesteps    | 254000    |
| train/                |           |
|    entropy_loss       | -30.6     |
|    explained_variance | 0.949     |
|    learning_rate      | 0.0007    |
|    n_updates          | 50799     |
|    policy_loss        | -0.00465  |
|    std                | 1.13      |
|    value_loss         | 2.63e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.97e+03 |
| time/                 |           |
|    fps                | 225       |
|    iterations         | 900       |
|    time_elapsed       | 19        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.97e+03 |
| time/                 |           |
|    fps                | 224       |
|    iterations         | 2000      |
|    time_elapsed       | 44        |
|    total_timesteps    | 260000    |
| train/                |           |
|    entropy_loss       | -30.6     |
|    explained_variance | -2.6      |
|    learning_rate      | 0.0007    |
|    n_updates          | 51999     |
|    policy_loss        | 0.00473   |
|    std                | 1.12      |
|    value_loss         | 2.99e-08  |
-------------------------------------
Logging to logs\A2C_1D_noIndi_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.97e+03 |
| time/                 |           |
|    fps                | 231       |
|    iterations         | 100       |
|    time_elap

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.97e+03 |
| time/                 |           |
|    fps                | 232       |
|    iterations         | 1200      |
|    time_elapsed       | 25        |
|    total_timesteps    | 266000    |
| train/                |           |
|    entropy_loss       | -30.4     |
|    explained_variance | 9.5e-05   |
|    learning_rate      | 0.0007    |
|    n_updates          | 53199     |
|    policy_loss        | 2.68e+03  |
|    std                | 1.12      |
|    value_loss         | 1.11e+04  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.97e+03 |
| time/                 |           |
|    fps                | 233       |
|    iterations         | 1300      |
|    time_elapsed       | 27        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.97e+03 |
| time/                 |           |
|    fps                | 247       |
|    iterations         | 400       |
|    time_elapsed       | 8         |
|    total_timesteps    | 272000    |
| train/                |           |
|    entropy_loss       | -30.4     |
|    explained_variance | -7.55     |
|    learning_rate      | 0.0007    |
|    n_updates          | 54399     |
|    policy_loss        | -0.00051  |
|    std                | 1.11      |
|    value_loss         | 1.13e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.97e+03 |
| time/                 |           |
|    fps                | 241       |
|    iterations         | 500       |
|    time_elapsed       | 10        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.95e+03 |
| time/                 |           |
|    fps                | 220       |
|    iterations         | 1600      |
|    time_elapsed       | 36        |
|    total_timesteps    | 278000    |
| train/                |           |
|    entropy_loss       | -30.4     |
|    explained_variance | 0.015     |
|    learning_rate      | 0.0007    |
|    n_updates          | 55599     |
|    policy_loss        | -9.62e+04 |
|    std                | 1.11      |
|    value_loss         | 1.82e+07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.95e+03 |
| time/                 |           |
|    fps                | 220       |
|    iterations         | 1700      |
|    time_elapsed       | 38        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.93e+03 |
| time/                 |           |
|    fps                | 272       |
|    iterations         | 800       |
|    time_elapsed       | 14        |
|    total_timesteps    | 284000    |
| train/                |           |
|    entropy_loss       | -30.5     |
|    explained_variance | -2.23     |
|    learning_rate      | 0.0007    |
|    n_updates          | 56799     |
|    policy_loss        | -0.00159  |
|    std                | 1.12      |
|    value_loss         | 3.87e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.93e+03 |
| time/                 |           |
|    fps                | 273       |
|    iterations         | 900       |
|    time_elapsed       | 16        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.92e+03 |
| time/                 |           |
|    fps                | 276       |
|    iterations         | 2000      |
|    time_elapsed       | 36        |
|    total_timesteps    | 290000    |
| train/                |           |
|    entropy_loss       | -30.7     |
|    explained_variance | 1.19e-07  |
|    learning_rate      | 0.0007    |
|    n_updates          | 57999     |
|    policy_loss        | 1.13e-05  |
|    std                | 1.13      |
|    value_loss         | 1.49e-13  |
-------------------------------------
Logging to logs\A2C_1D_noIndi_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.9e+03  |
| time/                 |           |
|    fps                | 287       |
|    iterations         | 100       |
|    time_elap

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.85e+03 |
| time/                 |           |
|    fps                | 288       |
|    iterations         | 1200      |
|    time_elapsed       | 20        |
|    total_timesteps    | 296000    |
| train/                |           |
|    entropy_loss       | -30.8     |
|    explained_variance | -4.02     |
|    learning_rate      | 0.0007    |
|    n_updates          | 59199     |
|    policy_loss        | 0.00828   |
|    std                | 1.14      |
|    value_loss         | 7.05e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.85e+03 |
| time/                 |           |
|    fps                | 286       |
|    iterations         | 1300      |
|    time_elapsed       | 22        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.81e+03 |
| time/                 |           |
|    fps                | 286       |
|    iterations         | 400       |
|    time_elapsed       | 6         |
|    total_timesteps    | 302000    |
| train/                |           |
|    entropy_loss       | -30.9     |
|    explained_variance | -3.21     |
|    learning_rate      | 0.0007    |
|    n_updates          | 60399     |
|    policy_loss        | 0.00245   |
|    std                | 1.14      |
|    value_loss         | 1.15e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.81e+03 |
| time/                 |           |
|    fps                | 286       |
|    iterations         | 500       |
|    time_elapsed       | 8         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.78e+03 |
| time/                 |           |
|    fps                | 286       |
|    iterations         | 1600      |
|    time_elapsed       | 27        |
|    total_timesteps    | 308000    |
| train/                |           |
|    entropy_loss       | -31.1     |
|    explained_variance | -245      |
|    learning_rate      | 0.0007    |
|    n_updates          | 61599     |
|    policy_loss        | -0.00209  |
|    std                | 1.15      |
|    value_loss         | 4.9e-09   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.78e+03 |
| time/                 |           |
|    fps                | 287       |
|    iterations         | 1700      |
|    time_elapsed       | 29        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.75e+03 |
| time/                 |           |
|    fps                | 284       |
|    iterations         | 800       |
|    time_elapsed       | 14        |
|    total_timesteps    | 314000    |
| train/                |           |
|    entropy_loss       | -31       |
|    explained_variance | -13.7     |
|    learning_rate      | 0.0007    |
|    n_updates          | 62799     |
|    policy_loss        | -0.000641 |
|    std                | 1.15      |
|    value_loss         | 8.37e-10  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.75e+03 |
| time/                 |           |
|    fps                | 285       |
|    iterations         | 900       |
|    time_elapsed       | 15        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.73e+03 |
| time/                 |           |
|    fps                | 275       |
|    iterations         | 2000      |
|    time_elapsed       | 36        |
|    total_timesteps    | 320000    |
| train/                |           |
|    entropy_loss       | -31.2     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0007    |
|    n_updates          | 63999     |
|    policy_loss        | 0.00703   |
|    std                | 1.16      |
|    value_loss         | 6.47e-08  |
-------------------------------------
Logging to logs\A2C_1D_noIndi_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.73e+03 |
| time/                 |           |
|    fps                | 261       |
|    iterations         | 100       |
|    time_elap

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.72e+03 |
| time/                 |           |
|    fps                | 264       |
|    iterations         | 1200      |
|    time_elapsed       | 22        |
|    total_timesteps    | 326000    |
| train/                |           |
|    entropy_loss       | -31.2     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 65199     |
|    policy_loss        | -3.91e-06 |
|    std                | 1.16      |
|    value_loss         | 1.86e-14  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.7e+03 |
| time/                 |          |
|    fps                | 266      |
|    iterations         | 1300     |
|    time_elapsed       | 24       |
|    total_timesteps

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.7e+03 |
| time/                 |          |
|    fps                | 284      |
|    iterations         | 500      |
|    time_elapsed       | 8        |
|    total_timesteps    | 332500   |
| train/                |          |
|    entropy_loss       | -31.3    |
|    explained_variance | 0        |
|    learning_rate      | 0.0007   |
|    n_updates          | 66499    |
|    policy_loss        | 3.26e-05 |
|    std                | 1.17     |
|    value_loss         | 9.65e-13 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.7e+03  |
| time/                 |           |
|    fps                | 284       |
|    iterations         | 600       |
|    time_elapsed       | 10        |
|    total_timesteps    | 3330

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.7e+03 |
| time/                 |          |
|    fps                | 280      |
|    iterations         | 1800     |
|    time_elapsed       | 32       |
|    total_timesteps    | 339000   |
| train/                |          |
|    entropy_loss       | -31.3    |
|    explained_variance | -0.113   |
|    learning_rate      | 0.0007   |
|    n_updates          | 67799    |
|    policy_loss        | 0.000431 |
|    std                | 1.17     |
|    value_loss         | 1.04e-09 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.7e+03 |
| time/                 |          |
|    fps                | 280      |
|    iterations         | 1900     |
|    time_elapsed       | 33       |
|    total_timesteps    | 339500   |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.7e+03 |
| time/                 |          |
|    fps                | 284      |
|    iterations         | 1100     |
|    time_elapsed       | 19       |
|    total_timesteps    | 345500   |
| train/                |          |
|    entropy_loss       | -31.3    |
|    explained_variance | 0.0298   |
|    learning_rate      | 0.0007   |
|    n_updates          | 69099    |
|    policy_loss        | 0.0081   |
|    std                | 1.17     |
|    value_loss         | 8.47e-08 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.7e+03 |
| time/                 |          |
|    fps                | 284      |
|    iterations         | 1200     |
|    time_elapsed       | 21       |
|    total_timesteps    | 346000   |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.64e+03 |
| time/                 |           |
|    fps                | 274       |
|    iterations         | 400       |
|    time_elapsed       | 7         |
|    total_timesteps    | 352000    |
| train/                |           |
|    entropy_loss       | -31.3     |
|    explained_variance | -13.2     |
|    learning_rate      | 0.0007    |
|    n_updates          | 70399     |
|    policy_loss        | -0.000564 |
|    std                | 1.17      |
|    value_loss         | 2.28e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.64e+03 |
| time/                 |           |
|    fps                | 273       |
|    iterations         | 500       |
|    time_elapsed       | 9         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.65e+03 |
| time/                 |           |
|    fps                | 278       |
|    iterations         | 1600      |
|    time_elapsed       | 28        |
|    total_timesteps    | 358000    |
| train/                |           |
|    entropy_loss       | -31.1     |
|    explained_variance | 2.03e-06  |
|    learning_rate      | 0.0007    |
|    n_updates          | 71599     |
|    policy_loss        | -2.33e+03 |
|    std                | 1.16      |
|    value_loss         | 6.7e+03   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.65e+03 |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 1700      |
|    time_elapsed       | 30        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.64e+03 |
| time/                 |           |
|    fps                | 282       |
|    iterations         | 800       |
|    time_elapsed       | 14        |
|    total_timesteps    | 364000    |
| train/                |           |
|    entropy_loss       | -31.1     |
|    explained_variance | 0.00315   |
|    learning_rate      | 0.0007    |
|    n_updates          | 72799     |
|    policy_loss        | 0.0585    |
|    std                | 1.16      |
|    value_loss         | 4.18e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.64e+03 |
| time/                 |           |
|    fps                | 282       |
|    iterations         | 900       |
|    time_elapsed       | 15        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.64e+03 |
| time/                 |           |
|    fps                | 280       |
|    iterations         | 2000      |
|    time_elapsed       | 35        |
|    total_timesteps    | 370000    |
| train/                |           |
|    entropy_loss       | -31.1     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 73999     |
|    policy_loss        | -0.208    |
|    std                | 1.16      |
|    value_loss         | 5.46e-05  |
-------------------------------------
Logging to logs\A2C_1D_noIndi_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.64e+03 |
| time/                 |           |
|    fps                | 276       |
|    iterations         | 100       |
|    time_elap

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.64e+03 |
| time/                 |           |
|    fps                | 273       |
|    iterations         | 1200      |
|    time_elapsed       | 21        |
|    total_timesteps    | 376000    |
| train/                |           |
|    entropy_loss       | -31.1     |
|    explained_variance | -0.663    |
|    learning_rate      | 0.0007    |
|    n_updates          | 75199     |
|    policy_loss        | 0.0738    |
|    std                | 1.16      |
|    value_loss         | 6.88e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.64e+03 |
| time/                 |           |
|    fps                | 273       |
|    iterations         | 1300      |
|    time_elapsed       | 23        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.66e+03 |
| time/                 |           |
|    fps                | 285       |
|    iterations         | 400       |
|    time_elapsed       | 7         |
|    total_timesteps    | 382000    |
| train/                |           |
|    entropy_loss       | -31.1     |
|    explained_variance | -1.21     |
|    learning_rate      | 0.0007    |
|    n_updates          | 76399     |
|    policy_loss        | -0.00389  |
|    std                | 1.16      |
|    value_loss         | 2.72e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.66e+03 |
| time/                 |           |
|    fps                | 287       |
|    iterations         | 500       |
|    time_elapsed       | 8         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.67e+03 |
| time/                 |           |
|    fps                | 286       |
|    iterations         | 1600      |
|    time_elapsed       | 27        |
|    total_timesteps    | 388000    |
| train/                |           |
|    entropy_loss       | -31.2     |
|    explained_variance | -0.101    |
|    learning_rate      | 0.0007    |
|    n_updates          | 77599     |
|    policy_loss        | 0.00487   |
|    std                | 1.16      |
|    value_loss         | 4.03e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.68e+03 |
| time/                 |           |
|    fps                | 286       |
|    iterations         | 1700      |
|    time_elapsed       | 29        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.71e+03 |
| time/                 |           |
|    fps                | 289       |
|    iterations         | 800       |
|    time_elapsed       | 13        |
|    total_timesteps    | 394000    |
| train/                |           |
|    entropy_loss       | -31.2     |
|    explained_variance | -1.96e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 78799     |
|    policy_loss        | 0.0012    |
|    std                | 1.16      |
|    value_loss         | 2.65e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.73e+03 |
| time/                 |           |
|    fps                | 288       |
|    iterations         | 900       |
|    time_elapsed       | 15        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.76e+03 |
| time/                 |           |
|    fps                | 285       |
|    iterations         | 2000      |
|    time_elapsed       | 34        |
|    total_timesteps    | 400000    |
| train/                |           |
|    entropy_loss       | -31.1     |
|    explained_variance | -2.24     |
|    learning_rate      | 0.0007    |
|    n_updates          | 79999     |
|    policy_loss        | -0.0035   |
|    std                | 1.16      |
|    value_loss         | 1.36e-08  |
-------------------------------------
Logging to logs\A2C_1D_noIndi_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.76e+03 |
| time/                 |           |
|    fps                | 289       |
|    iterations         | 100       |
|    time_elap

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.81e+03 |
| time/                 |           |
|    fps                | 293       |
|    iterations         | 1300      |
|    time_elapsed       | 22        |
|    total_timesteps    | 406500    |
| train/                |           |
|    entropy_loss       | -31.2     |
|    explained_variance | -0.247    |
|    learning_rate      | 0.0007    |
|    n_updates          | 81299     |
|    policy_loss        | 0.000341  |
|    std                | 1.17      |
|    value_loss         | 2.14e-10  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.81e+03 |
| time/                 |           |
|    fps                | 293       |
|    iterations         | 1400      |
|    time_elapsed       | 23        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.83e+03 |
| time/                 |           |
|    fps                | 294       |
|    iterations         | 500       |
|    time_elapsed       | 8         |
|    total_timesteps    | 412500    |
| train/                |           |
|    entropy_loss       | -31.4     |
|    explained_variance | -175      |
|    learning_rate      | 0.0007    |
|    n_updates          | 82499     |
|    policy_loss        | 0.000551  |
|    std                | 1.18      |
|    value_loss         | 4.84e-10  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.83e+03 |
| time/                 |           |
|    fps                | 295       |
|    iterations         | 600       |
|    time_elapsed       | 10        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.86e+03 |
| time/                 |           |
|    fps                | 295       |
|    iterations         | 1700      |
|    time_elapsed       | 28        |
|    total_timesteps    | 418500    |
| train/                |           |
|    entropy_loss       | -31.3     |
|    explained_variance | -1.09     |
|    learning_rate      | 0.0007    |
|    n_updates          | 83699     |
|    policy_loss        | -0.000505 |
|    std                | 1.17      |
|    value_loss         | 2.76e-10  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.86e+03 |
| time/                 |           |
|    fps                | 296       |
|    iterations         | 1800      |
|    time_elapsed       | 30        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.87e+03 |
| time/                 |           |
|    fps                | 292       |
|    iterations         | 900       |
|    time_elapsed       | 15        |
|    total_timesteps    | 424500    |
| train/                |           |
|    entropy_loss       | -31.4     |
|    explained_variance | -3.12     |
|    learning_rate      | 0.0007    |
|    n_updates          | 84899     |
|    policy_loss        | -0.000292 |
|    std                | 1.18      |
|    value_loss         | 1.88e-10  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.87e+03 |
| time/                 |           |
|    fps                | 292       |
|    iterations         | 1000      |
|    time_elapsed       | 17        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.88e+03 |
| time/                 |           |
|    fps                | 295       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 430500    |
| train/                |           |
|    entropy_loss       | -31.3     |
|    explained_variance | -471      |
|    learning_rate      | 0.0007    |
|    n_updates          | 86099     |
|    policy_loss        | 0.000218  |
|    std                | 1.17      |
|    value_loss         | 6.06e-11  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.88e+03 |
| time/                 |           |
|    fps                | 295       |
|    iterations         | 200       |
|    time_elapsed       | 3         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.87e+03 |
| time/                 |           |
|    fps                | 303       |
|    iterations         | 1300      |
|    time_elapsed       | 21        |
|    total_timesteps    | 436500    |
| train/                |           |
|    entropy_loss       | -31.4     |
|    explained_variance | -27.9     |
|    learning_rate      | 0.0007    |
|    n_updates          | 87299     |
|    policy_loss        | -0.031    |
|    std                | 1.18      |
|    value_loss         | 1.18e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.87e+03 |
| time/                 |           |
|    fps                | 300       |
|    iterations         | 1400      |
|    time_elapsed       | 23        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.87e+03 |
| time/                 |           |
|    fps                | 286       |
|    iterations         | 500       |
|    time_elapsed       | 8         |
|    total_timesteps    | 442500    |
| train/                |           |
|    entropy_loss       | -31.4     |
|    explained_variance | -2.17e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 88499     |
|    policy_loss        | 0.0722    |
|    std                | 1.18      |
|    value_loss         | 6.21e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.87e+03 |
| time/                 |           |
|    fps                | 287       |
|    iterations         | 600       |
|    time_elapsed       | 10        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.87e+03 |
| time/                 |           |
|    fps                | 297       |
|    iterations         | 1700      |
|    time_elapsed       | 28        |
|    total_timesteps    | 448500    |
| train/                |           |
|    entropy_loss       | -31.4     |
|    explained_variance | -2.18     |
|    learning_rate      | 0.0007    |
|    n_updates          | 89699     |
|    policy_loss        | -0.00224  |
|    std                | 1.18      |
|    value_loss         | 6.65e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.87e+03 |
| time/                 |           |
|    fps                | 298       |
|    iterations         | 1800      |
|    time_elapsed       | 30        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.92e+03 |
| time/                 |           |
|    fps                | 316       |
|    iterations         | 900       |
|    time_elapsed       | 14        |
|    total_timesteps    | 454500    |
| train/                |           |
|    entropy_loss       | -31.6     |
|    explained_variance | 0.484     |
|    learning_rate      | 0.0007    |
|    n_updates          | 90899     |
|    policy_loss        | 0.00213   |
|    std                | 1.19      |
|    value_loss         | 4.65e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.92e+03 |
| time/                 |           |
|    fps                | 316       |
|    iterations         | 1000      |
|    time_elapsed       | 15        |
|    total_t

Logging to logs\A2C_1D_noIndi_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.92e+03 |
| time/                 |           |
|    fps                | 336       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 460500    |
| train/                |           |
|    entropy_loss       | -31.7     |
|    explained_variance | 0.235     |
|    learning_rate      | 0.0007    |
|    n_updates          | 92099     |
|    policy_loss        | -0.00265  |
|    std                | 1.2       |
|    value_loss         | 7.21e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.93e+03 |
| time/                 |           |
|    fps                | 340       |
|    iterations         | 200       |
|    time_elap

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.91e+03 |
| time/                 |           |
|    fps                | 371       |
|    iterations         | 1300      |
|    time_elapsed       | 17        |
|    total_timesteps    | 466500    |
| train/                |           |
|    entropy_loss       | -31.7     |
|    explained_variance | -86.4     |
|    learning_rate      | 0.0007    |
|    n_updates          | 93299     |
|    policy_loss        | 7.79e-05  |
|    std                | 1.2       |
|    value_loss         | 9.16e-11  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.91e+03 |
| time/                 |           |
|    fps                | 373       |
|    iterations         | 1400      |
|    time_elapsed       | 18        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.89e+03 |
| time/                 |           |
|    fps                | 379       |
|    iterations         | 500       |
|    time_elapsed       | 6         |
|    total_timesteps    | 472500    |
| train/                |           |
|    entropy_loss       | -31.7     |
|    explained_variance | -4.35     |
|    learning_rate      | 0.0007    |
|    n_updates          | 94499     |
|    policy_loss        | -0.00198  |
|    std                | 1.2       |
|    value_loss         | 5.32e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.89e+03 |
| time/                 |           |
|    fps                | 377       |
|    iterations         | 600       |
|    time_elapsed       | 7         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.86e+03 |
| time/                 |           |
|    fps                | 362       |
|    iterations         | 1700      |
|    time_elapsed       | 23        |
|    total_timesteps    | 478500    |
| train/                |           |
|    entropy_loss       | -31.9     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 95699     |
|    policy_loss        | -8.38e-06 |
|    std                | 1.21      |
|    value_loss         | 7.86e-14  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.86e+03 |
| time/                 |           |
|    fps                | 361       |
|    iterations         | 1800      |
|    time_elapsed       | 24        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.84e+03 |
| time/                 |           |
|    fps                | 325       |
|    iterations         | 900       |
|    time_elapsed       | 13        |
|    total_timesteps    | 484500    |
| train/                |           |
|    entropy_loss       | -32.1     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 96899     |
|    policy_loss        | 0.00289   |
|    std                | 1.22      |
|    value_loss         | 1e-08     |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.84e+03 |
| time/                 |           |
|    fps                | 327       |
|    iterations         | 1000      |
|    time_elapsed       | 15        |
|    total_t

Logging to logs\A2C_1D_noIndi_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.83e+03 |
| time/                 |           |
|    fps                | 366       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 490500    |
| train/                |           |
|    entropy_loss       | -32       |
|    explained_variance | -1.33     |
|    learning_rate      | 0.0007    |
|    n_updates          | 98099     |
|    policy_loss        | -0.00078  |
|    std                | 1.22      |
|    value_loss         | 1.12e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.83e+03 |
| time/                 |           |
|    fps                | 365       |
|    iterations         | 200       |
|    time_elap

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.78e+03 |
| time/                 |           |
|    fps                | 377       |
|    iterations         | 1400      |
|    time_elapsed       | 18        |
|    total_timesteps    | 497000    |
| train/                |           |
|    entropy_loss       | -32.1     |
|    explained_variance | -17.3     |
|    learning_rate      | 0.0007    |
|    n_updates          | 99399     |
|    policy_loss        | -9.42e-05 |
|    std                | 1.23      |
|    value_loss         | 2.71e-10  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.78e+03 |
| time/                 |           |
|    fps                | 376       |
|    iterations         | 1500      |
|    time_elapsed       | 19        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.72e+03 |
| time/                 |           |
|    fps                | 356       |
|    iterations         | 600       |
|    time_elapsed       | 8         |
|    total_timesteps    | 503000    |
| train/                |           |
|    entropy_loss       | -32.1     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 100599    |
|    policy_loss        | -3.68e-06 |
|    std                | 1.22      |
|    value_loss         | 1.55e-14  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.72e+03 |
| time/                 |           |
|    fps                | 354       |
|    iterations         | 700       |
|    time_elapsed       | 9         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.58e+03 |
| time/                 |           |
|    fps                | 334       |
|    iterations         | 1900      |
|    time_elapsed       | 28        |
|    total_timesteps    | 509500    |
| train/                |           |
|    entropy_loss       | -32.1     |
|    explained_variance | 5.96e-08  |
|    learning_rate      | 0.0007    |
|    n_updates          | 101899    |
|    policy_loss        | 1.4e-05   |
|    std                | 1.23      |
|    value_loss         | 2.41e-13  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.52e+03 |
| time/                 |           |
|    fps                | 334       |
|    iterations         | 2000      |
|    time_elapsed       | 29        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.28e+03 |
| time/                 |           |
|    fps                | 311       |
|    iterations         | 1100      |
|    time_elapsed       | 17        |
|    total_timesteps    | 515500    |
| train/                |           |
|    entropy_loss       | -32.2     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 103099    |
|    policy_loss        | 7.36e-07  |
|    std                | 1.23      |
|    value_loss         | 5.46e-16  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.28e+03 |
| time/                 |           |
|    fps                | 311       |
|    iterations         | 1200      |
|    time_elapsed       | 19        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.22e+03 |
| time/                 |           |
|    fps                | 322       |
|    iterations         | 300       |
|    time_elapsed       | 4         |
|    total_timesteps    | 521500    |
| train/                |           |
|    entropy_loss       | -32.3     |
|    explained_variance | -7.87     |
|    learning_rate      | 0.0007    |
|    n_updates          | 104299    |
|    policy_loss        | 0.00425   |
|    std                | 1.23      |
|    value_loss         | 2.42e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.22e+03 |
| time/                 |           |
|    fps                | 321       |
|    iterations         | 400       |
|    time_elapsed       | 6         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.22e+03 |
| time/                 |           |
|    fps                | 311       |
|    iterations         | 1500      |
|    time_elapsed       | 24        |
|    total_timesteps    | 527500    |
| train/                |           |
|    entropy_loss       | -32.5     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 105499    |
|    policy_loss        | -5.71e-06 |
|    std                | 1.25      |
|    value_loss         | 3.49e-14  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.22e+03 |
| time/                 |           |
|    fps                | 312       |
|    iterations         | 1600      |
|    time_elapsed       | 25        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.2e+03  |
| time/                 |           |
|    fps                | 317       |
|    iterations         | 700       |
|    time_elapsed       | 11        |
|    total_timesteps    | 533500    |
| train/                |           |
|    entropy_loss       | -32.7     |
|    explained_variance | 0.0299    |
|    learning_rate      | 0.0007    |
|    n_updates          | 106699    |
|    policy_loss        | -1.11e+05 |
|    std                | 1.26      |
|    value_loss         | 1.8e+07   |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.2e+03 |
| time/                 |          |
|    fps                | 316      |
|    iterations         | 800      |
|    time_elapsed       | 12       |
|    total_timesteps

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.21e+03 |
| time/                 |           |
|    fps                | 319       |
|    iterations         | 2000      |
|    time_elapsed       | 31        |
|    total_timesteps    | 540000    |
| train/                |           |
|    entropy_loss       | -32.8     |
|    explained_variance | nan       |
|    learning_rate      | 0.0007    |
|    n_updates          | 107999    |
|    policy_loss        | -0        |
|    std                | 1.27      |
|    value_loss         | 0         |
-------------------------------------
Logging to logs\A2C_1D_noIndi_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.21e+03 |
| time/                 |           |
|    fps                | 317       |
|    iterations         | 100       |
|    time_elap

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.17e+03 |
| time/                 |           |
|    fps                | 319       |
|    iterations         | 1200      |
|    time_elapsed       | 18        |
|    total_timesteps    | 546000    |
| train/                |           |
|    entropy_loss       | -32.9     |
|    explained_variance | -2.32     |
|    learning_rate      | 0.0007    |
|    n_updates          | 109199    |
|    policy_loss        | -0.0221   |
|    std                | 1.28      |
|    value_loss         | 6.09e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.17e+03 |
| time/                 |           |
|    fps                | 320       |
|    iterations         | 1300      |
|    time_elapsed       | 20        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.12e+03 |
| time/                 |           |
|    fps                | 334       |
|    iterations         | 400       |
|    time_elapsed       | 5         |
|    total_timesteps    | 552000    |
| train/                |           |
|    entropy_loss       | -32.9     |
|    explained_variance | -0.209    |
|    learning_rate      | 0.0007    |
|    n_updates          | 110399    |
|    policy_loss        | -0.000294 |
|    std                | 1.28      |
|    value_loss         | 1.07e-10  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.12e+03 |
| time/                 |           |
|    fps                | 334       |
|    iterations         | 500       |
|    time_elapsed       | 7         |
|    total_t

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.1e+03 |
| time/                 |          |
|    fps                | 346      |
|    iterations         | 1600     |
|    time_elapsed       | 23       |
|    total_timesteps    | 558000   |
| train/                |          |
|    entropy_loss       | -33.2    |
|    explained_variance | 0.426    |
|    learning_rate      | 0.0007   |
|    n_updates          | 111599   |
|    policy_loss        | 5.56e-05 |
|    std                | 1.3      |
|    value_loss         | 7.54e-12 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.1e+03  |
| time/                 |           |
|    fps                | 347       |
|    iterations         | 1700      |
|    time_elapsed       | 24        |
|    total_timesteps    | 5585

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.09e+03 |
| time/                 |           |
|    fps                | 368       |
|    iterations         | 900       |
|    time_elapsed       | 12        |
|    total_timesteps    | 564500    |
| train/                |           |
|    entropy_loss       | -33.5     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 112899    |
|    policy_loss        | -1.15e-05 |
|    std                | 1.32      |
|    value_loss         | 1.52e-13  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.09e+03 |
| time/                 |           |
|    fps                | 366       |
|    iterations         | 1000      |
|    time_elapsed       | 13        |
|    total_t

Logging to logs\A2C_1D_noIndi_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.08e+03 |
| time/                 |           |
|    fps                | 354       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 570500    |
| train/                |           |
|    entropy_loss       | -33.5     |
|    explained_variance | -48.9     |
|    learning_rate      | 0.0007    |
|    n_updates          | 114099    |
|    policy_loss        | 2.42e-05  |
|    std                | 1.32      |
|    value_loss         | 8.25e-13  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.08e+03 |
| time/                 |           |
|    fps                | 351       |
|    iterations         | 200       |
|    time_elap

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.07e+03 |
| time/                 |           |
|    fps                | 366       |
|    iterations         | 1300      |
|    time_elapsed       | 17        |
|    total_timesteps    | 576500    |
| train/                |           |
|    entropy_loss       | -33.9     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 115299    |
|    policy_loss        | 0.00328   |
|    std                | 1.34      |
|    value_loss         | 1.02e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.07e+03 |
| time/                 |           |
|    fps                | 366       |
|    iterations         | 1400      |
|    time_elapsed       | 19        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.08e+03 |
| time/                 |           |
|    fps                | 359       |
|    iterations         | 500       |
|    time_elapsed       | 6         |
|    total_timesteps    | 582500    |
| train/                |           |
|    entropy_loss       | -33.8     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 116499    |
|    policy_loss        | -9.51e-07 |
|    std                | 1.34      |
|    value_loss         | 9.7e-16   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.07e+03 |
| time/                 |           |
|    fps                | 358       |
|    iterations         | 600       |
|    time_elapsed       | 8         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.07e+03 |
| time/                 |           |
|    fps                | 354       |
|    iterations         | 1700      |
|    time_elapsed       | 23        |
|    total_timesteps    | 588500    |
| train/                |           |
|    entropy_loss       | -33.9     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 117699    |
|    policy_loss        | -1.43e-06 |
|    std                | 1.34      |
|    value_loss         | 2.18e-15  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.06e+03 |
| time/                 |           |
|    fps                | 354       |
|    iterations         | 1800      |
|    time_elapsed       | 25        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.04e+03 |
| time/                 |           |
|    fps                | 366       |
|    iterations         | 900       |
|    time_elapsed       | 12        |
|    total_timesteps    | 594500    |
| train/                |           |
|    entropy_loss       | -33.8     |
|    explained_variance | -48.7     |
|    learning_rate      | 0.0007    |
|    n_updates          | 118899    |
|    policy_loss        | -0.000292 |
|    std                | 1.34      |
|    value_loss         | 7.06e-11  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.06e+03 |
| time/                 |           |
|    fps                | 368       |
|    iterations         | 1000      |
|    time_elapsed       | 13        |
|    total_t

Logging to logs\A2C_1D_noIndi_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.05e+03 |
| time/                 |           |
|    fps                | 389       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 600500    |
| train/                |           |
|    entropy_loss       | -33.9     |
|    explained_variance | 5.96e-08  |
|    learning_rate      | 0.0007    |
|    n_updates          | 120099    |
|    policy_loss        | -1.42e-05 |
|    std                | 1.35      |
|    value_loss         | 2.26e-13  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.05e+03 |
| time/                 |           |
|    fps                | 382       |
|    iterations         | 200       |
|    time_elap

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.14e+03 |
| time/                 |           |
|    fps                | 381       |
|    iterations         | 1400      |
|    time_elapsed       | 18        |
|    total_timesteps    | 607000    |
| train/                |           |
|    entropy_loss       | -34.4     |
|    explained_variance | nan       |
|    learning_rate      | 0.0007    |
|    n_updates          | 121399    |
|    policy_loss        | -0        |
|    std                | 1.38      |
|    value_loss         | 0         |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.14e+03 |
| time/                 |           |
|    fps                | 381       |
|    iterations         | 1500      |
|    time_elapsed       | 19        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.26e+03 |
| time/                 |           |
|    fps                | 361       |
|    iterations         | 600       |
|    time_elapsed       | 8         |
|    total_timesteps    | 613000    |
| train/                |           |
|    entropy_loss       | -34.3     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0007    |
|    n_updates          | 122599    |
|    policy_loss        | 8.84e-06  |
|    std                | 1.38      |
|    value_loss         | 8.3e-14   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.28e+03 |
| time/                 |           |
|    fps                | 359       |
|    iterations         | 700       |
|    time_elapsed       | 9         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.47e+03 |
| time/                 |           |
|    fps                | 354       |
|    iterations         | 1800      |
|    time_elapsed       | 25        |
|    total_timesteps    | 619000    |
| train/                |           |
|    entropy_loss       | -34.4     |
|    explained_variance | 0.0671    |
|    learning_rate      | 0.0007    |
|    n_updates          | 123799    |
|    policy_loss        | 0.000302  |
|    std                | 1.38      |
|    value_loss         | 9.34e-11  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.51e+03 |
| time/                 |           |
|    fps                | 353       |
|    iterations         | 1900      |
|    time_elapsed       | 26        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.51e+03 |
| time/                 |           |
|    fps                | 325       |
|    iterations         | 1000      |
|    time_elapsed       | 15        |
|    total_timesteps    | 625000    |
| train/                |           |
|    entropy_loss       | -34.7     |
|    explained_variance | -2.36     |
|    learning_rate      | 0.0007    |
|    n_updates          | 124999    |
|    policy_loss        | -5.07e-06 |
|    std                | 1.4       |
|    value_loss         | 3.02e-13  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.51e+03 |
| time/                 |           |
|    fps                | 324       |
|    iterations         | 1100      |
|    time_elapsed       | 16        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.53e+03 |
| time/                 |           |
|    fps                | 315       |
|    iterations         | 200       |
|    time_elapsed       | 3         |
|    total_timesteps    | 631000    |
| train/                |           |
|    entropy_loss       | -34.6     |
|    explained_variance | -180      |
|    learning_rate      | 0.0007    |
|    n_updates          | 126199    |
|    policy_loss        | 5.5e-05   |
|    std                | 1.39      |
|    value_loss         | 5.07e-10  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.53e+03 |
| time/                 |           |
|    fps                | 312       |
|    iterations         | 300       |
|    time_elapsed       | 4         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.56e+03 |
| time/                 |           |
|    fps                | 314       |
|    iterations         | 1400      |
|    time_elapsed       | 22        |
|    total_timesteps    | 637000    |
| train/                |           |
|    entropy_loss       | -34.6     |
|    explained_variance | 0.247     |
|    learning_rate      | 0.0007    |
|    n_updates          | 127399    |
|    policy_loss        | -0.000458 |
|    std                | 1.39      |
|    value_loss         | 1.04e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.56e+03 |
| time/                 |           |
|    fps                | 314       |
|    iterations         | 1500      |
|    time_elapsed       | 23        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.58e+03 |
| time/                 |           |
|    fps                | 316       |
|    iterations         | 600       |
|    time_elapsed       | 9         |
|    total_timesteps    | 643000    |
| train/                |           |
|    entropy_loss       | -34.5     |
|    explained_variance | -10.8     |
|    learning_rate      | 0.0007    |
|    n_updates          | 128599    |
|    policy_loss        | -0.00103  |
|    std                | 1.39      |
|    value_loss         | 1.38e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.58e+03 |
| time/                 |           |
|    fps                | 319       |
|    iterations         | 700       |
|    time_elapsed       | 10        |
|    total_t

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.6e+03 |
| time/                 |          |
|    fps                | 318      |
|    iterations         | 1900     |
|    time_elapsed       | 29       |
|    total_timesteps    | 649500   |
| train/                |          |
|    entropy_loss       | -34.6    |
|    explained_variance | -2.34    |
|    learning_rate      | 0.0007   |
|    n_updates          | 129899   |
|    policy_loss        | 0.00317  |
|    std                | 1.39     |
|    value_loss         | 1.28e-08 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.62e+03 |
| time/                 |           |
|    fps                | 318       |
|    iterations         | 2000      |
|    time_elapsed       | 31        |
|    total_timesteps    | 6500

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.64e+03 |
| time/                 |           |
|    fps                | 330       |
|    iterations         | 1100      |
|    time_elapsed       | 16        |
|    total_timesteps    | 655500    |
| train/                |           |
|    entropy_loss       | -34.8     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 131099    |
|    policy_loss        | -7.03e-07 |
|    std                | 1.41      |
|    value_loss         | 5.46e-16  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.64e+03 |
| time/                 |           |
|    fps                | 330       |
|    iterations         | 1200      |
|    time_elapsed       | 18        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.6e+03  |
| time/                 |           |
|    fps                | 343       |
|    iterations         | 300       |
|    time_elapsed       | 4         |
|    total_timesteps    | 661500    |
| train/                |           |
|    entropy_loss       | -34.9     |
|    explained_variance | -2.49     |
|    learning_rate      | 0.0007    |
|    n_updates          | 132299    |
|    policy_loss        | -0.000335 |
|    std                | 1.41      |
|    value_loss         | 1.07e-09  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.6e+03 |
| time/                 |          |
|    fps                | 346      |
|    iterations         | 400      |
|    time_elapsed       | 5        |
|    total_timesteps

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.62e+03 |
| time/                 |           |
|    fps                | 366       |
|    iterations         | 1600      |
|    time_elapsed       | 21        |
|    total_timesteps    | 668000    |
| train/                |           |
|    entropy_loss       | -35       |
|    explained_variance | -2.7      |
|    learning_rate      | 0.0007    |
|    n_updates          | 133599    |
|    policy_loss        | 0.000144  |
|    std                | 1.42      |
|    value_loss         | 5.83e-10  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.63e+03 |
| time/                 |           |
|    fps                | 367       |
|    iterations         | 1700      |
|    time_elapsed       | 23        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.63e+03 |
| time/                 |           |
|    fps                | 369       |
|    iterations         | 800       |
|    time_elapsed       | 10        |
|    total_timesteps    | 674000    |
| train/                |           |
|    entropy_loss       | -34.9     |
|    explained_variance | -2.66     |
|    learning_rate      | 0.0007    |
|    n_updates          | 134799    |
|    policy_loss        | -0.00306  |
|    std                | 1.41      |
|    value_loss         | 7.76e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.63e+03 |
| time/                 |           |
|    fps                | 369       |
|    iterations         | 900       |
|    time_elapsed       | 12        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.67e+03 |
| time/                 |           |
|    fps                | 358       |
|    iterations         | 2000      |
|    time_elapsed       | 27        |
|    total_timesteps    | 680000    |
| train/                |           |
|    entropy_loss       | -34.9     |
|    explained_variance | 0.21      |
|    learning_rate      | 0.0007    |
|    n_updates          | 135999    |
|    policy_loss        | -0.113    |
|    std                | 1.41      |
|    value_loss         | 1.24e-05  |
-------------------------------------
Logging to logs\A2C_1D_noIndi_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.67e+03 |
| time/                 |           |
|    fps                | 376       |
|    iterations         | 100       |
|    time_elap

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.67e+03 |
| time/                 |           |
|    fps                | 343       |
|    iterations         | 1200      |
|    time_elapsed       | 17        |
|    total_timesteps    | 686000    |
| train/                |           |
|    entropy_loss       | -34.7     |
|    explained_variance | -0.0365   |
|    learning_rate      | 0.0007    |
|    n_updates          | 137199    |
|    policy_loss        | -0.34     |
|    std                | 1.4       |
|    value_loss         | 0.000118  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.67e+03 |
| time/                 |           |
|    fps                | 345       |
|    iterations         | 1300      |
|    time_elapsed       | 18        |
|    total_t

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.7e+03 |
| time/                 |          |
|    fps                | 359      |
|    iterations         | 400      |
|    time_elapsed       | 5        |
|    total_timesteps    | 692000   |
| train/                |          |
|    entropy_loss       | -34.9    |
|    explained_variance | -0.0804  |
|    learning_rate      | 0.0007   |
|    n_updates          | 138399   |
|    policy_loss        | -904     |
|    std                | 1.41     |
|    value_loss         | 659      |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.7e+03 |
| time/                 |          |
|    fps                | 348      |
|    iterations         | 500      |
|    time_elapsed       | 7        |
|    total_timesteps    | 692500   |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.74e+03 |
| time/                 |           |
|    fps                | 351       |
|    iterations         | 1700      |
|    time_elapsed       | 24        |
|    total_timesteps    | 698500    |
| train/                |           |
|    entropy_loss       | -34.8     |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 139699    |
|    policy_loss        | -6.05e+05 |
|    std                | 1.4       |
|    value_loss         | 3.2e+08   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.74e+03 |
| time/                 |           |
|    fps                | 347       |
|    iterations         | 1800      |
|    time_elapsed       | 25        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.96e+03 |
| time/                 |           |
|    fps                | 370       |
|    iterations         | 900       |
|    time_elapsed       | 12        |
|    total_timesteps    | 704500    |
| train/                |           |
|    entropy_loss       | -34.9     |
|    explained_variance | 0.0618    |
|    learning_rate      | 0.0007    |
|    n_updates          | 140899    |
|    policy_loss        | 0.0314    |
|    std                | 1.41      |
|    value_loss         | 1.04e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.96e+03 |
| time/                 |           |
|    fps                | 369       |
|    iterations         | 1000      |
|    time_elapsed       | 13        |
|    total_t

Logging to logs\A2C_1D_noIndi_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.99e+03 |
| time/                 |           |
|    fps                | 368       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 710500    |
| train/                |           |
|    entropy_loss       | -34.7     |
|    explained_variance | 0.00117   |
|    learning_rate      | 0.0007    |
|    n_updates          | 142099    |
|    policy_loss        | -0.199    |
|    std                | 1.4       |
|    value_loss         | 4.33e-05  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.99e+03 |
| time/                 |           |
|    fps                | 358       |
|    iterations         | 200       |
|    time_elap

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.02e+03 |
| time/                 |           |
|    fps                | 339       |
|    iterations         | 1300      |
|    time_elapsed       | 19        |
|    total_timesteps    | 716500    |
| train/                |           |
|    entropy_loss       | -34.7     |
|    explained_variance | 0.878     |
|    learning_rate      | 0.0007    |
|    n_updates          | 143299    |
|    policy_loss        | -0.0945   |
|    std                | 1.4       |
|    value_loss         | 6.56e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.02e+03 |
| time/                 |           |
|    fps                | 339       |
|    iterations         | 1400      |
|    time_elapsed       | 20        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.05e+03 |
| time/                 |           |
|    fps                | 311       |
|    iterations         | 500       |
|    time_elapsed       | 8         |
|    total_timesteps    | 722500    |
| train/                |           |
|    entropy_loss       | -34.8     |
|    explained_variance | -0.0821   |
|    learning_rate      | 0.0007    |
|    n_updates          | 144499    |
|    policy_loss        | 0.000667  |
|    std                | 1.4       |
|    value_loss         | 1.8e-09   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.05e+03 |
| time/                 |           |
|    fps                | 308       |
|    iterations         | 600       |
|    time_elapsed       | 9         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.03e+03 |
| time/                 |           |
|    fps                | 326       |
|    iterations         | 1700      |
|    time_elapsed       | 26        |
|    total_timesteps    | 728500    |
| train/                |           |
|    entropy_loss       | -34.7     |
|    explained_variance | -1.78     |
|    learning_rate      | 0.0007    |
|    n_updates          | 145699    |
|    policy_loss        | -0.0369   |
|    std                | 1.39      |
|    value_loss         | 1.18e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.03e+03 |
| time/                 |           |
|    fps                | 326       |
|    iterations         | 1800      |
|    time_elapsed       | 27        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.03e+03 |
| time/                 |           |
|    fps                | 353       |
|    iterations         | 900       |
|    time_elapsed       | 12        |
|    total_timesteps    | 734500    |
| train/                |           |
|    entropy_loss       | -34.9     |
|    explained_variance | -2.98     |
|    learning_rate      | 0.0007    |
|    n_updates          | 146899    |
|    policy_loss        | -0.0125   |
|    std                | 1.41      |
|    value_loss         | 1.39e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.02e+03 |
| time/                 |           |
|    fps                | 356       |
|    iterations         | 1000      |
|    time_elapsed       | 14        |
|    total_t

Logging to logs\A2C_1D_noIndi_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.96e+03 |
| time/                 |           |
|    fps                | 339       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 740500    |
| train/                |           |
|    entropy_loss       | -34.8     |
|    explained_variance | 5.96e-08  |
|    learning_rate      | 0.0007    |
|    n_updates          | 148099    |
|    policy_loss        | 0.000745  |
|    std                | 1.4       |
|    value_loss         | 5.29e-10  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.95e+03 |
| time/                 |           |
|    fps                | 336       |
|    iterations         | 200       |
|    time_elap

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.94e+03 |
| time/                 |           |
|    fps                | 333       |
|    iterations         | 1300      |
|    time_elapsed       | 19        |
|    total_timesteps    | 746500    |
| train/                |           |
|    entropy_loss       | -34.9     |
|    explained_variance | 0.402     |
|    learning_rate      | 0.0007    |
|    n_updates          | 149299    |
|    policy_loss        | -0.0316   |
|    std                | 1.41      |
|    value_loss         | 1.12e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.94e+03 |
| time/                 |           |
|    fps                | 335       |
|    iterations         | 1400      |
|    time_elapsed       | 20        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.95e+03 |
| time/                 |           |
|    fps                | 333       |
|    iterations         | 500       |
|    time_elapsed       | 7         |
|    total_timesteps    | 752500    |
| train/                |           |
|    entropy_loss       | -34.7     |
|    explained_variance | -0.0175   |
|    learning_rate      | 0.0007    |
|    n_updates          | 150499    |
|    policy_loss        | 1.7e+03   |
|    std                | 1.4       |
|    value_loss         | 3.45e+03  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -7.95e+03 |
| time/                 |           |
|    fps                | 337       |
|    iterations         | 600       |
|    time_elapsed       | 8         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.02e+03 |
| time/                 |           |
|    fps                | 334       |
|    iterations         | 1700      |
|    time_elapsed       | 25        |
|    total_timesteps    | 758500    |
| train/                |           |
|    entropy_loss       | -34.7     |
|    explained_variance | -2.96e+04 |
|    learning_rate      | 0.0007    |
|    n_updates          | 151699    |
|    policy_loss        | -0.108    |
|    std                | 1.4       |
|    value_loss         | 1.29e-05  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.02e+03 |
| time/                 |           |
|    fps                | 335       |
|    iterations         | 1800      |
|    time_elapsed       | 26        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.02e+03 |
| time/                 |           |
|    fps                | 356       |
|    iterations         | 900       |
|    time_elapsed       | 12        |
|    total_timesteps    | 764500    |
| train/                |           |
|    entropy_loss       | -34.8     |
|    explained_variance | -0.653    |
|    learning_rate      | 0.0007    |
|    n_updates          | 152899    |
|    policy_loss        | -0.0213   |
|    std                | 1.41      |
|    value_loss         | 5.44e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.02e+03 |
| time/                 |           |
|    fps                | 353       |
|    iterations         | 1000      |
|    time_elapsed       | 14        |
|    total_t

Logging to logs\A2C_1D_noIndi_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.02e+03 |
| time/                 |           |
|    fps                | 315       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 770500    |
| train/                |           |
|    entropy_loss       | -34.9     |
|    explained_variance | -0.882    |
|    learning_rate      | 0.0007    |
|    n_updates          | 154099    |
|    policy_loss        | 0.00668   |
|    std                | 1.42      |
|    value_loss         | 5.24e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.02e+03 |
| time/                 |           |
|    fps                | 323       |
|    iterations         | 200       |
|    time_elap

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.04e+03 |
| time/                 |           |
|    fps                | 310       |
|    iterations         | 1300      |
|    time_elapsed       | 20        |
|    total_timesteps    | 776500    |
| train/                |           |
|    entropy_loss       | -34.8     |
|    explained_variance | 0.477     |
|    learning_rate      | 0.0007    |
|    n_updates          | 155299    |
|    policy_loss        | -0.0173   |
|    std                | 1.41      |
|    value_loss         | 3.12e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.04e+03 |
| time/                 |           |
|    fps                | 311       |
|    iterations         | 1400      |
|    time_elapsed       | 22        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.04e+03 |
| time/                 |           |
|    fps                | 299       |
|    iterations         | 500       |
|    time_elapsed       | 8         |
|    total_timesteps    | 782500    |
| train/                |           |
|    entropy_loss       | -35.3     |
|    explained_variance | -0.01     |
|    learning_rate      | 0.0007    |
|    n_updates          | 156499    |
|    policy_loss        | 0.998     |
|    std                | 1.44      |
|    value_loss         | 0.00104   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.04e+03 |
| time/                 |           |
|    fps                | 288       |
|    iterations         | 600       |
|    time_elapsed       | 10        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.07e+03 |
| time/                 |           |
|    fps                | 296       |
|    iterations         | 1700      |
|    time_elapsed       | 28        |
|    total_timesteps    | 788500    |
| train/                |           |
|    entropy_loss       | -35.3     |
|    explained_variance | -11.9     |
|    learning_rate      | 0.0007    |
|    n_updates          | 157699    |
|    policy_loss        | -0.00126  |
|    std                | 1.44      |
|    value_loss         | 5.8e-09   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.07e+03 |
| time/                 |           |
|    fps                | 297       |
|    iterations         | 1800      |
|    time_elapsed       | 30        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.07e+03 |
| time/                 |           |
|    fps                | 302       |
|    iterations         | 900       |
|    time_elapsed       | 14        |
|    total_timesteps    | 794500    |
| train/                |           |
|    entropy_loss       | -35.4     |
|    explained_variance | -6.81     |
|    learning_rate      | 0.0007    |
|    n_updates          | 158899    |
|    policy_loss        | -0.00554  |
|    std                | 1.45      |
|    value_loss         | 3.08e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.07e+03 |
| time/                 |           |
|    fps                | 305       |
|    iterations         | 1000      |
|    time_elapsed       | 16        |
|    total_t

Logging to logs\A2C_1D_noIndi_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.07e+03 |
| time/                 |           |
|    fps                | 321       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 800500    |
| train/                |           |
|    entropy_loss       | -35.4     |
|    explained_variance | 0.217     |
|    learning_rate      | 0.0007    |
|    n_updates          | 160099    |
|    policy_loss        | -0.00014  |
|    std                | 1.46      |
|    value_loss         | 9.09e-11  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -8.07e+03 |
| time/                 |           |
|    fps                | 314       |
|    iterations         | 200       |
|    time_elap

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.88e+03 |
| time/                 |           |
|    fps                | 304       |
|    iterations         | 1300      |
|    time_elapsed       | 21        |
|    total_timesteps    | 806500    |
| train/                |           |
|    entropy_loss       | -35.5     |
|    explained_variance | 0.975     |
|    learning_rate      | 0.0007    |
|    n_updates          | 161299    |
|    policy_loss        | -64.2     |
|    std                | 1.46      |
|    value_loss         | 3.43      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.88e+03 |
| time/                 |           |
|    fps                | 303       |
|    iterations         | 1400      |
|    time_elapsed       | 23        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.88e+03 |
| time/                 |           |
|    fps                | 305       |
|    iterations         | 500       |
|    time_elapsed       | 8         |
|    total_timesteps    | 812500    |
| train/                |           |
|    entropy_loss       | -35.5     |
|    explained_variance | 0.165     |
|    learning_rate      | 0.0007    |
|    n_updates          | 162499    |
|    policy_loss        | -1.17e+04 |
|    std                | 1.46      |
|    value_loss         | 9.55e+05  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.88e+03 |
| time/                 |           |
|    fps                | 306       |
|    iterations         | 600       |
|    time_elapsed       | 9         |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.88e+03 |
| time/                 |           |
|    fps                | 303       |
|    iterations         | 1700      |
|    time_elapsed       | 27        |
|    total_timesteps    | 818500    |
| train/                |           |
|    entropy_loss       | -35.5     |
|    explained_variance | -2.91     |
|    learning_rate      | 0.0007    |
|    n_updates          | 163699    |
|    policy_loss        | 0.0108    |
|    std                | 1.46      |
|    value_loss         | 1.07e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.88e+03 |
| time/                 |           |
|    fps                | 303       |
|    iterations         | 1800      |
|    time_elapsed       | 29        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.88e+03 |
| time/                 |           |
|    fps                | 308       |
|    iterations         | 900       |
|    time_elapsed       | 14        |
|    total_timesteps    | 824500    |
| train/                |           |
|    entropy_loss       | -35.6     |
|    explained_variance | -0.381    |
|    learning_rate      | 0.0007    |
|    n_updates          | 164899    |
|    policy_loss        | -0.000192 |
|    std                | 1.47      |
|    value_loss         | 8.1e-10   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.88e+03 |
| time/                 |           |
|    fps                | 306       |
|    iterations         | 1000      |
|    time_elapsed       | 16        |
|    total_t

Logging to logs\A2C_1D_noIndi_all_0
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.91e+03 |
| time/                 |           |
|    fps                | 324       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 830500    |
| train/                |           |
|    entropy_loss       | -35.9     |
|    explained_variance | -2.22     |
|    learning_rate      | 0.0007    |
|    n_updates          | 166099    |
|    policy_loss        | -0.0073   |
|    std                | 1.49      |
|    value_loss         | 4.87e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.91e+03 |
| time/                 |           |
|    fps                | 321       |
|    iterations         | 200       |
|    time_elap

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.91e+03 |
| time/                 |           |
|    fps                | 312       |
|    iterations         | 1300      |
|    time_elapsed       | 20        |
|    total_timesteps    | 836500    |
| train/                |           |
|    entropy_loss       | -35.9     |
|    explained_variance | -6.67     |
|    learning_rate      | 0.0007    |
|    n_updates          | 167299    |
|    policy_loss        | -0.0264   |
|    std                | 1.49      |
|    value_loss         | 6.29e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.91e+03 |
| time/                 |           |
|    fps                | 313       |
|    iterations         | 1400      |
|    time_elapsed       | 22        |
|    total_t

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.98e+03 |
| time/                 |           |
|    fps                | 311       |
|    iterations         | 500       |
|    time_elapsed       | 8         |
|    total_timesteps    | 842500    |
| train/                |           |
|    entropy_loss       | -35.9     |
|    explained_variance | -1.19     |
|    learning_rate      | 0.0007    |
|    n_updates          | 168499    |
|    policy_loss        | -0.00283  |
|    std                | 1.49      |
|    value_loss         | 7.03e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.98e+03 |
| time/                 |           |
|    fps                | 311       |
|    iterations         | 600       |
|    time_elapsed       | 9         |
|    total_t

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 313      |
|    iterations         | 1800     |
|    time_elapsed       | 28       |
|    total_timesteps    | 849000   |
| train/                |          |
|    entropy_loss       | -35.9    |
|    explained_variance | -157     |
|    learning_rate      | 0.0007   |
|    n_updates          | 169799   |
|    policy_loss        | 0.000689 |
|    std                | 1.49     |
|    value_loss         | 2.14e-09 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 312      |
|    iterations         | 1900     |
|    time_elapsed       | 30       |
|    total_timesteps    | 849500   |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -1e+04    |
| time/                 |           |
|    fps                | 311       |
|    iterations         | 1100      |
|    time_elapsed       | 17        |
|    total_timesteps    | 855500    |
| train/                |           |
|    entropy_loss       | -36       |
|    explained_variance | -0.227    |
|    learning_rate      | 0.0007    |
|    n_updates          | 171099    |
|    policy_loss        | -0.000288 |
|    std                | 1.5       |
|    value_loss         | 1.11e-10  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 311      |
|    iterations         | 1200     |
|    time_elapsed       | 19       |
|    total_timesteps

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 279      |
|    iterations         | 400      |
|    time_elapsed       | 7        |
|    total_timesteps    | 862000   |
| train/                |          |
|    entropy_loss       | -36.1    |
|    explained_variance | -2.14    |
|    learning_rate      | 0.0007   |
|    n_updates          | 172399   |
|    policy_loss        | 7.39e-05 |
|    std                | 1.51     |
|    value_loss         | 5.19e-11 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -1e+04    |
| time/                 |           |
|    fps                | 280       |
|    iterations         | 500       |
|    time_elapsed       | 8         |
|    total_timesteps    | 8625

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 302      |
|    iterations         | 1700     |
|    time_elapsed       | 28       |
|    total_timesteps    | 868500   |
| train/                |          |
|    entropy_loss       | -36.1    |
|    explained_variance | -5.08    |
|    learning_rate      | 0.0007   |
|    n_updates          | 173699   |
|    policy_loss        | 0.00877  |
|    std                | 1.5      |
|    value_loss         | 1.11e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 303      |
|    iterations         | 1800     |
|    time_elapsed       | 29       |
|    total_timesteps    | 869000   |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 317      |
|    iterations         | 1000     |
|    time_elapsed       | 15       |
|    total_timesteps    | 875000   |
| train/                |          |
|    entropy_loss       | -36.3    |
|    explained_variance | -6.88    |
|    learning_rate      | 0.0007   |
|    n_updates          | 174999   |
|    policy_loss        | -0.0116  |
|    std                | 1.51     |
|    value_loss         | 1.17e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 317      |
|    iterations         | 1100     |
|    time_elapsed       | 17       |
|    total_timesteps    | 875500   |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 313      |
|    iterations         | 300      |
|    time_elapsed       | 4        |
|    total_timesteps    | 881500   |
| train/                |          |
|    entropy_loss       | -36.2    |
|    explained_variance | -1.21    |
|    learning_rate      | 0.0007   |
|    n_updates          | 176299   |
|    policy_loss        | 0.000633 |
|    std                | 1.51     |
|    value_loss         | 6.16e-10 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -1e+04    |
| time/                 |           |
|    fps                | 314       |
|    iterations         | 400       |
|    time_elapsed       | 6         |
|    total_timesteps    | 8820

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 312      |
|    iterations         | 1600     |
|    time_elapsed       | 25       |
|    total_timesteps    | 888000   |
| train/                |          |
|    entropy_loss       | -36.2    |
|    explained_variance | -72.8    |
|    learning_rate      | 0.0007   |
|    n_updates          | 177599   |
|    policy_loss        | -0.00132 |
|    std                | 1.51     |
|    value_loss         | 3.56e-09 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 311      |
|    iterations         | 1700     |
|    time_elapsed       | 27       |
|    total_timesteps    | 888500   |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -1e+04    |
| time/                 |           |
|    fps                | 316       |
|    iterations         | 900       |
|    time_elapsed       | 14        |
|    total_timesteps    | 894500    |
| train/                |           |
|    entropy_loss       | -36.2     |
|    explained_variance | -1.79     |
|    learning_rate      | 0.0007    |
|    n_updates          | 178899    |
|    policy_loss        | -0.000337 |
|    std                | 1.51      |
|    value_loss         | 1.34e-10  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 316      |
|    iterations         | 1000     |
|    time_elapsed       | 15       |
|    total_timesteps

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 312      |
|    iterations         | 200      |
|    time_elapsed       | 3        |
|    total_timesteps    | 901000   |
| train/                |          |
|    entropy_loss       | -36.4    |
|    explained_variance | 0.28     |
|    learning_rate      | 0.0007   |
|    n_updates          | 180199   |
|    policy_loss        | 0.00474  |
|    std                | 1.52     |
|    value_loss         | 2.54e-08 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -1e+04    |
| time/                 |           |
|    fps                | 310       |
|    iterations         | 300       |
|    time_elapsed       | 4         |
|    total_timesteps    | 9015

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 315      |
|    iterations         | 1500     |
|    time_elapsed       | 23       |
|    total_timesteps    | 907500   |
| train/                |          |
|    entropy_loss       | -36.5    |
|    explained_variance | -34.9    |
|    learning_rate      | 0.0007   |
|    n_updates          | 181499   |
|    policy_loss        | 0.0121   |
|    std                | 1.53     |
|    value_loss         | 1.28e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 315      |
|    iterations         | 1600     |
|    time_elapsed       | 25       |
|    total_timesteps    | 908000   |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -1e+04    |
| time/                 |           |
|    fps                | 327       |
|    iterations         | 800       |
|    time_elapsed       | 12        |
|    total_timesteps    | 914000    |
| train/                |           |
|    entropy_loss       | -36.3     |
|    explained_variance | -1.66     |
|    learning_rate      | 0.0007    |
|    n_updates          | 182799    |
|    policy_loss        | -0.000194 |
|    std                | 1.52      |
|    value_loss         | 8.52e-11  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 326      |
|    iterations         | 900      |
|    time_elapsed       | 13       |
|    total_timesteps

Logging to logs\A2C_1D_noIndi_all_0
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 341      |
|    iterations         | 100      |
|    time_elapsed       | 1        |
|    total_timesteps    | 920500   |
| train/                |          |
|    entropy_loss       | -36.6    |
|    explained_variance | 0.403    |
|    learning_rate      | 0.0007   |
|    n_updates          | 184099   |
|    policy_loss        | 0.0292   |
|    std                | 1.54     |
|    value_loss         | 7.33e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 336      |
|    iterations         | 200      |
|    time_elapsed       | 2        |
| 

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 326      |
|    iterations         | 1400     |
|    time_elapsed       | 21       |
|    total_timesteps    | 927000   |
| train/                |          |
|    entropy_loss       | -36.8    |
|    explained_variance | -1.36    |
|    learning_rate      | 0.0007   |
|    n_updates          | 185399   |
|    policy_loss        | 0.00629  |
|    std                | 1.55     |
|    value_loss         | 4.61e-08 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 325      |
|    iterations         | 1500     |
|    time_elapsed       | 23       |
|    total_timesteps    | 927500   |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -1e+04    |
| time/                 |           |
|    fps                | 326       |
|    iterations         | 700       |
|    time_elapsed       | 10        |
|    total_timesteps    | 933500    |
| train/                |           |
|    entropy_loss       | -36.8     |
|    explained_variance | 0.782     |
|    learning_rate      | 0.0007    |
|    n_updates          | 186699    |
|    policy_loss        | -0.000238 |
|    std                | 1.56      |
|    value_loss         | 2.67e-10  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 327      |
|    iterations         | 800      |
|    time_elapsed       | 12       |
|    total_timesteps

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 319      |
|    iterations         | 2000     |
|    time_elapsed       | 31       |
|    total_timesteps    | 940000   |
| train/                |          |
|    entropy_loss       | -36.9    |
|    explained_variance | -1.18    |
|    learning_rate      | 0.0007   |
|    n_updates          | 187999   |
|    policy_loss        | 0.000739 |
|    std                | 1.57     |
|    value_loss         | 7.84e-10 |
------------------------------------
Logging to logs\A2C_1D_noIndi_all_0
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 311      |
|    iterations         | 100      |
|    time_elapsed       | 1        |
| 

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 318      |
|    iterations         | 1300     |
|    time_elapsed       | 20       |
|    total_timesteps    | 946500   |
| train/                |          |
|    entropy_loss       | -37.2    |
|    explained_variance | -33.2    |
|    learning_rate      | 0.0007   |
|    n_updates          | 189299   |
|    policy_loss        | 0.00115  |
|    std                | 1.59     |
|    value_loss         | 8.76e-10 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -1e+04   |
| time/                 |          |
|    fps                | 318      |
|    iterations         | 1400     |
|    time_elapsed       | 21       |
|    total_timesteps    | 947000   |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -1e+04    |
| time/                 |           |
|    fps                | 315       |
|    iterations         | 600       |
|    time_elapsed       | 9         |
|    total_timesteps    | 953000    |
| train/                |           |
|    entropy_loss       | -37.4     |
|    explained_variance | -0.000181 |
|    learning_rate      | 0.0007    |
|    n_updates          | 190599    |
|    policy_loss        | 5.57e+03  |
|    std                | 1.61      |
|    value_loss         | 3.54e+05  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -1e+04    |
| time/                 |           |
|    fps                | 308       |
|    iterations         | 700       |
|    time_elapsed       | 11        |
|    total_t

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.4e+03 |
| time/                 |          |
|    fps                | 300      |
|    iterations         | 1900     |
|    time_elapsed       | 31       |
|    total_timesteps    | 959500   |
| train/                |          |
|    entropy_loss       | -37.4    |
|    explained_variance | -4.85    |
|    learning_rate      | 0.0007   |
|    n_updates          | 191899   |
|    policy_loss        | 0.0221   |
|    std                | 1.61     |
|    value_loss         | 1.57e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.4e+03  |
| time/                 |           |
|    fps                | 301       |
|    iterations         | 2000      |
|    time_elapsed       | 33        |
|    total_timesteps    | 9600

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.4e+03  |
| time/                 |           |
|    fps                | 313       |
|    iterations         | 1200      |
|    time_elapsed       | 19        |
|    total_timesteps    | 966000    |
| train/                |           |
|    entropy_loss       | -37.5     |
|    explained_variance | -14.7     |
|    learning_rate      | 0.0007    |
|    n_updates          | 193199    |
|    policy_loss        | -0.000999 |
|    std                | 1.62      |
|    value_loss         | 2.14e-09  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.4e+03 |
| time/                 |          |
|    fps                | 314      |
|    iterations         | 1300     |
|    time_elapsed       | 20       |
|    total_timesteps

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.4e+03  |
| time/                 |           |
|    fps                | 320       |
|    iterations         | 500       |
|    time_elapsed       | 7         |
|    total_timesteps    | 972500    |
| train/                |           |
|    entropy_loss       | -37.6     |
|    explained_variance | -0.111    |
|    learning_rate      | 0.0007    |
|    n_updates          | 194499    |
|    policy_loss        | -0.000864 |
|    std                | 1.63      |
|    value_loss         | 1e-09     |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.4e+03 |
| time/                 |          |
|    fps                | 320      |
|    iterations         | 600      |
|    time_elapsed       | 9        |
|    total_timesteps

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.4e+03 |
| time/                 |          |
|    fps                | 319      |
|    iterations         | 1800     |
|    time_elapsed       | 28       |
|    total_timesteps    | 979000   |
| train/                |          |
|    entropy_loss       | -37.7    |
|    explained_variance | -0.285   |
|    learning_rate      | 0.0007   |
|    n_updates          | 195799   |
|    policy_loss        | 0.00455  |
|    std                | 1.63     |
|    value_loss         | 2.64e-08 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.4e+03 |
| time/                 |          |
|    fps                | 319      |
|    iterations         | 1900     |
|    time_elapsed       | 29       |
|    total_timesteps    | 979500   |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.4e+03  |
| time/                 |           |
|    fps                | 311       |
|    iterations         | 1100      |
|    time_elapsed       | 17        |
|    total_timesteps    | 985500    |
| train/                |           |
|    entropy_loss       | -37.9     |
|    explained_variance | 0.0595    |
|    learning_rate      | 0.0007    |
|    n_updates          | 197099    |
|    policy_loss        | -4.28e+03 |
|    std                | 1.65      |
|    value_loss         | 1.53e+04  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.4e+03 |
| time/                 |          |
|    fps                | 309      |
|    iterations         | 1200     |
|    time_elapsed       | 19       |
|    total_timesteps

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.4e+03 |
| time/                 |          |
|    fps                | 322      |
|    iterations         | 400      |
|    time_elapsed       | 6        |
|    total_timesteps    | 992000   |
| train/                |          |
|    entropy_loss       | -38      |
|    explained_variance | -0.434   |
|    learning_rate      | 0.0007   |
|    n_updates          | 198399   |
|    policy_loss        | 0.00724  |
|    std                | 1.66     |
|    value_loss         | 1e-07    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.4e+03 |
| time/                 |          |
|    fps                | 318      |
|    iterations         | 500      |
|    time_elapsed       | 7        |
|    total_timesteps    | 992500   |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.02e+03  |
|    ep_rew_mean        | -9.4e+03  |
| time/                 |           |
|    fps                | 320       |
|    iterations         | 1700      |
|    time_elapsed       | 26        |
|    total_timesteps    | 998500    |
| train/                |           |
|    entropy_loss       | -37.9     |
|    explained_variance | 0.0165    |
|    learning_rate      | 0.0007    |
|    n_updates          | 199699    |
|    policy_loss        | -1.52e+04 |
|    std                | 1.66      |
|    value_loss         | 8.29e+05  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.02e+03 |
|    ep_rew_mean        | -9.4e+03 |
| time/                 |          |
|    fps                | 318      |
|    iterations         | 1800     |
|    time_elapsed       | 28       |
|    total_timesteps

# PPO 1D All All-symbol

In [9]:
import os
from stable_baselines3 import A2C
from stable_baselines3 import PPO
# A2C, PPO

models_dir = "models/PPO_1D_noIndi_all"
logdir = "logs"
#tensorboard --logdir D:\Study\IS\code\gym-mtsim-main\gym_mtsim\logs\

if not os.path.exists(models_dir):
    os.makedirs(models_dir)

if not os.path.exists(logdir):
    os.makedirs(logdir)


# env = gym.make('forex-hedge-v0')

env.reset()


model = PPO('MultiInputPolicy', env, verbose=1, tensorboard_log = logdir)

TIMESTEPS = 10000
for i in range(1,101):    
    model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name="PPO_1D_noIndi_all")
    model.save(f"{models_dir}/{TIMESTEPS*i}")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to logs\PPO_1D_noIndi_all_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.02e+03 |
|    ep_rew_mean     | -1e+04   |
| time/              |          |
|    fps             | 326      |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -1e+04      |
| time/                   |             |
|    fps                  | 313         |
|    iterations           | 2           |
|    time_elapsed         | 13          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.008008495 |
|    clip_fraction        | 0.111       |
|    clip_range           

Logging to logs\PPO_1D_noIndi_all_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -7.05e+03 |
| time/              |           |
|    fps             | 372       |
|    iterations      | 1         |
|    time_elapsed    | 5         |
|    total_timesteps | 22528     |
----------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -7.3e+03    |
| time/                   |             |
|    fps                  | 312         |
|    iterations           | 2           |
|    time_elapsed         | 13          |
|    total_timesteps      | 24576       |
| train/                  |             |
|    approx_kl            | 0.004032062 |
|    clip_fraction        | 0.0168      |
|    clip_range           | 0.2         |
|    entropy_loss         | -28.4       |
|    explained_variance   

Logging to logs\PPO_1D_noIndi_all_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -7.33e+03 |
| time/              |           |
|    fps             | 359       |
|    iterations      | 1         |
|    time_elapsed    | 5         |
|    total_timesteps | 43008     |
----------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -7.45e+03   |
| time/                   |             |
|    fps                  | 305         |
|    iterations           | 2           |
|    time_elapsed         | 13          |
|    total_timesteps      | 45056       |
| train/                  |             |
|    approx_kl            | 0.008955824 |
|    clip_fraction        | 0.0645      |
|    clip_range           | 0.2         |
|    entropy_loss         | -28.5       |
|    explained_variance   

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -8.19e+03 |
| time/              |           |
|    fps             | 333       |
|    iterations      | 1         |
|    time_elapsed    | 6         |
|    total_timesteps | 63488     |
----------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -8.25e+03   |
| time/                   |             |
|    fps                  | 268         |
|    iterations           | 2           |
|    time_elapsed         | 15          |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.011032889 |
|    clip_fraction        | 0.0839      |
|    clip_range           | 0.2         |
|    entropy_loss         | -28.8       |
|    explained_variance   | -0.0055     |
|    learning_rate  

Logging to logs\PPO_1D_noIndi_all_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -8.54e+03 |
| time/              |           |
|    fps             | 351       |
|    iterations      | 1         |
|    time_elapsed    | 5         |
|    total_timesteps | 83968     |
----------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -8.39e+03   |
| time/                   |             |
|    fps                  | 223         |
|    iterations           | 2           |
|    time_elapsed         | 18          |
|    total_timesteps      | 86016       |
| train/                  |             |
|    approx_kl            | 0.015757678 |
|    clip_fraction        | 0.124       |
|    clip_range           | 0.2         |
|    entropy_loss         | -28.7       |
|    explained_variance   

Logging to logs\PPO_1D_noIndi_all_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -8.65e+03 |
| time/              |           |
|    fps             | 350       |
|    iterations      | 1         |
|    time_elapsed    | 5         |
|    total_timesteps | 104448    |
----------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -8.65e+03   |
| time/                   |             |
|    fps                  | 312         |
|    iterations           | 2           |
|    time_elapsed         | 13          |
|    total_timesteps      | 106496      |
| train/                  |             |
|    approx_kl            | 0.060436085 |
|    clip_fraction        | 0.0771      |
|    clip_range           | 0.2         |
|    entropy_loss         | -28.8       |
|    explained_variance   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -9.29e+03   |
| time/                   |             |
|    fps                  | 323         |
|    iterations           | 2           |
|    time_elapsed         | 12          |
|    total_timesteps      | 126976      |
| train/                  |             |
|    approx_kl            | 0.004041904 |
|    clip_fraction        | 0.0505      |
|    clip_range           | 0.2         |
|    entropy_loss         | -28.8       |
|    explained_variance   | 0.0524      |
|    learning_rate        | 0.0003      |
|    loss                 | 4.34e+05    |
|    n_updates            | 610         |
|    policy_gradient_loss | 0.00616     |
|    std                  | 1.02        |
|    value_loss           | 1.31e+06    |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -8.28e+03   |
| time/                   |             |
|    fps                  | 278         |
|    iterations           | 3           |
|    time_elapsed         | 22          |
|    total_timesteps      | 149504      |
| train/                  |             |
|    approx_kl            | 0.010978652 |
|    clip_fraction        | 0.0913      |
|    clip_range           | 0.2         |
|    entropy_loss         | -28.9       |
|    explained_variance   | -0.00753    |
|    learning_rate        | 0.0003      |
|    loss                 | 1.16e+06    |
|    n_updates            | 720         |
|    policy_gradient_loss | 0.00808     |
|    std                  | 1.03        |
|    value_loss           | 2.74e+06    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.02e+03   |
|    ep_rew_mean          | -6.23e+03  |
| time/                   |            |
|    fps                  | 274        |
|    iterations           | 3          |
|    time_elapsed         | 22         |
|    total_timesteps      | 169984     |
| train/                  |            |
|    approx_kl            | 0.18369375 |
|    clip_fraction        | 0.506      |
|    clip_range           | 0.2        |
|    entropy_loss         | -28.9      |
|    explained_variance   | 0.11       |
|    learning_rate        | 0.0003     |
|    loss                 | 1.16e+04   |
|    n_updates            | 820        |
|    policy_gradient_loss | 0.0323     |
|    std                  | 1.03       |
|    value_loss           | 2.77e+05   |
----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -6.46e+03    |
| time/                   |              |
|    fps                  | 253          |
|    iterations           | 4            |
|    time_elapsed         | 32           |
|    total_timesteps      | 192512       |
| train/                  |              |
|    approx_kl            | 0.0078094965 |
|    clip_fraction        | 0.0166       |
|    clip_range           | 0.2          |
|    entropy_loss         | -28.9        |
|    explained_variance   | -0.0133      |
|    learning_rate        | 0.0003       |
|    loss                 | 2.76e+06     |
|    n_updates            | 930          |
|    policy_gradient_loss | -0.000344    |
|    std                  | 1.03         |
|    value_loss           | 7.69e+06     |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -4.4e+03     |
| time/                   |              |
|    fps                  | 241          |
|    iterations           | 5            |
|    time_elapsed         | 42           |
|    total_timesteps      | 215040       |
| train/                  |              |
|    approx_kl            | 0.0046140947 |
|    clip_fraction        | 0.0216       |
|    clip_range           | 0.2          |
|    entropy_loss         | -29          |
|    explained_variance   | -0.0037      |
|    learning_rate        | 0.0003       |
|    loss                 | 3.98e+06     |
|    n_updates            | 1040         |
|    policy_gradient_loss | -0.00409     |
|    std                  | 1.03         |
|    value_loss           | 1.04e+07     |
------------------------------------------
Logging to logs\PPO_1D_noIndi_all_0
------------------

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.02e+03   |
|    ep_rew_mean          | -3.59e+03  |
| time/                   |            |
|    fps                  | 304        |
|    iterations           | 5          |
|    time_elapsed         | 33         |
|    total_timesteps      | 235520     |
| train/                  |            |
|    approx_kl            | 0.02422769 |
|    clip_fraction        | 0.148      |
|    clip_range           | 0.2        |
|    entropy_loss         | -29        |
|    explained_variance   | 0.0655     |
|    learning_rate        | 0.0003     |
|    loss                 | 1.05e+05   |
|    n_updates            | 1140       |
|    policy_gradient_loss | -0.00394   |
|    std                  | 1.03       |
|    value_loss           | 4.83e+05   |
----------------------------------------
Logging to logs\PPO_1D_noIndi_all_0
----------------------------------
| rollout/           |     

Logging to logs\PPO_1D_noIndi_all_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -4.82e+03 |
| time/              |           |
|    fps             | 274       |
|    iterations      | 1         |
|    time_elapsed    | 7         |
|    total_timesteps | 258048    |
----------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -4.82e+03    |
| time/                   |              |
|    fps                  | 268          |
|    iterations           | 2            |
|    time_elapsed         | 15           |
|    total_timesteps      | 260096       |
| train/                  |              |
|    approx_kl            | 0.0016947369 |
|    clip_fraction        | 0.013        |
|    clip_range           | 0.2          |
|    entropy_loss         | -29.1        |
|    explain

Logging to logs\PPO_1D_noIndi_all_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -5.99e+03 |
| time/              |           |
|    fps             | 377       |
|    iterations      | 1         |
|    time_elapsed    | 5         |
|    total_timesteps | 278528    |
----------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -5.99e+03    |
| time/                   |              |
|    fps                  | 332          |
|    iterations           | 2            |
|    time_elapsed         | 12           |
|    total_timesteps      | 280576       |
| train/                  |              |
|    approx_kl            | 0.0071245106 |
|    clip_fraction        | 0.0473       |
|    clip_range           | 0.2          |
|    entropy_loss         | -29.2        |
|    explain

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -5.99e+03 |
| time/              |           |
|    fps             | 379       |
|    iterations      | 1         |
|    time_elapsed    | 5         |
|    total_timesteps | 299008    |
----------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -5.99e+03    |
| time/                   |              |
|    fps                  | 333          |
|    iterations           | 2            |
|    time_elapsed         | 12           |
|    total_timesteps      | 301056       |
| train/                  |              |
|    approx_kl            | 0.0039639417 |
|    clip_fraction        | 0.0242       |
|    clip_range           | 0.2          |
|    entropy_loss         | -29.4        |
|    explained_variance   | 0.0909       |
|    

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -8.05e+03    |
| time/                   |              |
|    fps                  | 327          |
|    iterations           | 2            |
|    time_elapsed         | 12           |
|    total_timesteps      | 321536       |
| train/                  |              |
|    approx_kl            | 0.0016176364 |
|    clip_fraction        | 0.0042       |
|    clip_range           | 0.2          |
|    entropy_loss         | -29.5        |
|    explained_variance   | 0.0856       |
|    learning_rate        | 0.0003       |
|    loss                 | 1.76e+06     |
|    n_updates            | 1560         |
|    policy_gradient_loss | -0.00341     |
|    std                  | 1.06         |
|    value_loss           | 1.87e+06     |
------------------------------------------
---------------------------------------
| rollout/    

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -9.57e+03    |
| time/                   |              |
|    fps                  | 302          |
|    iterations           | 2            |
|    time_elapsed         | 13           |
|    total_timesteps      | 342016       |
| train/                  |              |
|    approx_kl            | 0.0007980543 |
|    clip_fraction        | 0.00659      |
|    clip_range           | 0.2          |
|    entropy_loss         | -29.5        |
|    explained_variance   | 0.0146       |
|    learning_rate        | 0.0003       |
|    loss                 | 1.16e+06     |
|    n_updates            | 1660         |
|    policy_gradient_loss | -0.00355     |
|    std                  | 1.06         |
|    value_loss           | 8.75e+06     |
------------------------------------------
-----------------------------------------
| rollout/  

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.02e+03      |
|    ep_rew_mean          | -9.31e+03     |
| time/                   |               |
|    fps                  | 290           |
|    iterations           | 3             |
|    time_elapsed         | 21            |
|    total_timesteps      | 364544        |
| train/                  |               |
|    approx_kl            | 7.3084724e-05 |
|    clip_fraction        | 0.000244      |
|    clip_range           | 0.2           |
|    entropy_loss         | -29.7         |
|    explained_variance   | 0.0559        |
|    learning_rate        | 0.0003        |
|    loss                 | 2.42e+06      |
|    n_updates            | 1770          |
|    policy_gradient_loss | -0.000706     |
|    std                  | 1.07          |
|    value_loss           | 1.69e+06      |
-------------------------------------------
--------------------------------

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -9.31e+03   |
| time/                   |             |
|    fps                  | 281         |
|    iterations           | 3           |
|    time_elapsed         | 21          |
|    total_timesteps      | 385024      |
| train/                  |             |
|    approx_kl            | 0.047892883 |
|    clip_fraction        | 0.467       |
|    clip_range           | 0.2         |
|    entropy_loss         | -29.6       |
|    explained_variance   | 0.0704      |
|    learning_rate        | 0.0003      |
|    loss                 | 1.04e+06    |
|    n_updates            | 1870        |
|    policy_gradient_loss | 0.01        |
|    std                  | 1.07        |
|    value_loss           | 1.77e+06    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -9.31e+03    |
| time/                   |              |
|    fps                  | 288          |
|    iterations           | 4            |
|    time_elapsed         | 28           |
|    total_timesteps      | 407552       |
| train/                  |              |
|    approx_kl            | 0.0054955888 |
|    clip_fraction        | 0.0289       |
|    clip_range           | 0.2          |
|    entropy_loss         | -29.8        |
|    explained_variance   | 0.145        |
|    learning_rate        | 0.0003       |
|    loss                 | 1.85e+03     |
|    n_updates            | 1980         |
|    policy_gradient_loss | -0.00358     |
|    std                  | 1.07         |
|    value_loss           | 4.67e+05     |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -8.82e+03    |
| time/                   |              |
|    fps                  | 272          |
|    iterations           | 5            |
|    time_elapsed         | 37           |
|    total_timesteps      | 430080       |
| train/                  |              |
|    approx_kl            | 8.520749e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -29.8        |
|    explained_variance   | 0.00648      |
|    learning_rate        | 0.0003       |
|    loss                 | 3.47e+06     |
|    n_updates            | 2090         |
|    policy_gradient_loss | -0.000861    |
|    std                  | 1.08         |
|    value_loss           | 5.46e+06     |
------------------------------------------
Logging to logs\PPO_1D_noIndi_all_0
------------------

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -6.02e+03    |
| time/                   |              |
|    fps                  | 262          |
|    iterations           | 5            |
|    time_elapsed         | 38           |
|    total_timesteps      | 450560       |
| train/                  |              |
|    approx_kl            | 0.0022247438 |
|    clip_fraction        | 0.00459      |
|    clip_range           | 0.2          |
|    entropy_loss         | -29.8        |
|    explained_variance   | 0.0274       |
|    learning_rate        | 0.0003       |
|    loss                 | 4.92e+05     |
|    n_updates            | 2190         |
|    policy_gradient_loss | -0.00614     |
|    std                  | 1.08         |
|    value_loss           | 3.33e+06     |
------------------------------------------
Logging to logs\PPO_1D_noIndi_all_0
------------------

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -6.31e+03   |
| time/                   |             |
|    fps                  | 296         |
|    iterations           | 5           |
|    time_elapsed         | 34          |
|    total_timesteps      | 471040      |
| train/                  |             |
|    approx_kl            | 0.022254992 |
|    clip_fraction        | 0.185       |
|    clip_range           | 0.2         |
|    entropy_loss         | -29.9       |
|    explained_variance   | 0.118       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.93e+03    |
|    n_updates            | 2290        |
|    policy_gradient_loss | -0.00244    |
|    std                  | 1.08        |
|    value_loss           | 9.52e+05    |
-----------------------------------------
Logging to logs\PPO_1D_noIndi_all_0
----------------------------------
| rol

Logging to logs\PPO_1D_noIndi_all_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -5.81e+03 |
| time/              |           |
|    fps             | 304       |
|    iterations      | 1         |
|    time_elapsed    | 6         |
|    total_timesteps | 493568    |
----------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.02e+03   |
|    ep_rew_mean          | -5.81e+03  |
| time/                   |            |
|    fps                  | 286        |
|    iterations           | 2          |
|    time_elapsed         | 14         |
|    total_timesteps      | 495616     |
| train/                  |            |
|    approx_kl            | 0.01858998 |
|    clip_fraction        | 0.103      |
|    clip_range           | 0.2        |
|    entropy_loss         | -30.2      |
|    explained_variance   | 0.0106     |

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -5.81e+03 |
| time/              |           |
|    fps             | 308       |
|    iterations      | 1         |
|    time_elapsed    | 6         |
|    total_timesteps | 514048    |
----------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -5.81e+03    |
| time/                   |              |
|    fps                  | 300          |
|    iterations           | 2            |
|    time_elapsed         | 13           |
|    total_timesteps      | 516096       |
| train/                  |              |
|    approx_kl            | 0.0021530907 |
|    clip_fraction        | 0.00742      |
|    clip_range           | 0.2          |
|    entropy_loss         | -30.2        |
|    explained_variance   | 0.00944      |
|    

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.02e+03   |
|    ep_rew_mean          | -4.86e+03  |
| time/                   |            |
|    fps                  | 315        |
|    iterations           | 2          |
|    time_elapsed         | 12         |
|    total_timesteps      | 536576     |
| train/                  |            |
|    approx_kl            | 0.10411203 |
|    clip_fraction        | 0.65       |
|    clip_range           | 0.2        |
|    entropy_loss         | -30.4      |
|    explained_variance   | 0.187      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.27e+05   |
|    n_updates            | 2610       |
|    policy_gradient_loss | 0.0274     |
|    std                  | 1.11       |
|    value_loss           | 6.5e+05    |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -6.96e+03    |
| time/                   |              |
|    fps                  | 272          |
|    iterations           | 3            |
|    time_elapsed         | 22           |
|    total_timesteps      | 559104       |
| train/                  |              |
|    approx_kl            | 0.0022678138 |
|    clip_fraction        | 0.00688      |
|    clip_range           | 0.2          |
|    entropy_loss         | -30.4        |
|    explained_variance   | 0.0821       |
|    learning_rate        | 0.0003       |
|    loss                 | 1.23e+06     |
|    n_updates            | 2720         |
|    policy_gradient_loss | -0.00271     |
|    std                  | 1.11         |
|    value_loss           | 1.62e+06     |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -6.38e+03    |
| time/                   |              |
|    fps                  | 256          |
|    iterations           | 3            |
|    time_elapsed         | 23           |
|    total_timesteps      | 579584       |
| train/                  |              |
|    approx_kl            | 0.0023517697 |
|    clip_fraction        | 0.0145       |
|    clip_range           | 0.2          |
|    entropy_loss         | -30.4        |
|    explained_variance   | 0.121        |
|    learning_rate        | 0.0003       |
|    loss                 | 1.3e+06      |
|    n_updates            | 2820         |
|    policy_gradient_loss | -0.00222     |
|    std                  | 1.11         |
|    value_loss           | 1.13e+06     |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -6.87e+03   |
| time/                   |             |
|    fps                  | 267         |
|    iterations           | 3           |
|    time_elapsed         | 22          |
|    total_timesteps      | 600064      |
| train/                  |             |
|    approx_kl            | 0.016502125 |
|    clip_fraction        | 0.185       |
|    clip_range           | 0.2         |
|    entropy_loss         | -30.7       |
|    explained_variance   | 0.223       |
|    learning_rate        | 0.0003      |
|    loss                 | 6.41e+04    |
|    n_updates            | 2920        |
|    policy_gradient_loss | 0.00124     |
|    std                  | 1.13        |
|    value_loss           | 4.54e+05    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -6.88e+03    |
| time/                   |              |
|    fps                  | 265          |
|    iterations           | 4            |
|    time_elapsed         | 30           |
|    total_timesteps      | 622592       |
| train/                  |              |
|    approx_kl            | 0.0067754085 |
|    clip_fraction        | 0.0329       |
|    clip_range           | 0.2          |
|    entropy_loss         | -30.8        |
|    explained_variance   | 0.127        |
|    learning_rate        | 0.0003       |
|    loss                 | 3.68e+05     |
|    n_updates            | 3030         |
|    policy_gradient_loss | -0.00207     |
|    std                  | 1.13         |
|    value_loss           | 1.33e+06     |
------------------------------------------
---------------------------------------
| rollout/    

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.02e+03   |
|    ep_rew_mean          | -8.86e+03  |
| time/                   |            |
|    fps                  | 275        |
|    iterations           | 5          |
|    time_elapsed         | 37         |
|    total_timesteps      | 645120     |
| train/                  |            |
|    approx_kl            | 0.32462937 |
|    clip_fraction        | 0.446      |
|    clip_range           | 0.2        |
|    entropy_loss         | -30.9      |
|    explained_variance   | 0.241      |
|    learning_rate        | 0.0003     |
|    loss                 | 8.68e+04   |
|    n_updates            | 3140       |
|    policy_gradient_loss | 0.0229     |
|    std                  | 1.14       |
|    value_loss           | 3.08e+05   |
----------------------------------------
Logging to logs\PPO_1D_noIndi_all_0
----------------------------------
| rollout/           |     

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.02e+03      |
|    ep_rew_mean          | -1e+04        |
| time/                   |               |
|    fps                  | 242           |
|    iterations           | 5             |
|    time_elapsed         | 42            |
|    total_timesteps      | 665600        |
| train/                  |               |
|    approx_kl            | 0.00030449327 |
|    clip_fraction        | 0.00234       |
|    clip_range           | 0.2           |
|    entropy_loss         | -31           |
|    explained_variance   | 0.0137        |
|    learning_rate        | 0.0003        |
|    loss                 | 1.63e+07      |
|    n_updates            | 3240          |
|    policy_gradient_loss | -0.000899     |
|    std                  | 1.15          |
|    value_loss           | 2.88e+07      |
-------------------------------------------
Logging to logs\PPO_1D_noIndi_al

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -8.25e+03   |
| time/                   |             |
|    fps                  | 268         |
|    iterations           | 5           |
|    time_elapsed         | 38          |
|    total_timesteps      | 686080      |
| train/                  |             |
|    approx_kl            | 0.122168034 |
|    clip_fraction        | 0.676       |
|    clip_range           | 0.2         |
|    entropy_loss         | -31.1       |
|    explained_variance   | 0.0176      |
|    learning_rate        | 0.0003      |
|    loss                 | 7.17e+05    |
|    n_updates            | 3340        |
|    policy_gradient_loss | 0.0127      |
|    std                  | 1.15        |
|    value_loss           | 8.03e+05    |
-----------------------------------------
Logging to logs\PPO_1D_noIndi_all_0
----------------------------------
| rol

Logging to logs\PPO_1D_noIndi_all_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -8.25e+03 |
| time/              |           |
|    fps             | 317       |
|    iterations      | 1         |
|    time_elapsed    | 6         |
|    total_timesteps | 708608    |
----------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.02e+03      |
|    ep_rew_mean          | -8.25e+03     |
| time/                   |               |
|    fps                  | 297           |
|    iterations           | 2             |
|    time_elapsed         | 13            |
|    total_timesteps      | 710656        |
| train/                  |               |
|    approx_kl            | 0.00055025116 |
|    clip_fraction        | 0.00347       |
|    clip_range           | 0.2           |
|    entropy_loss         | -31.5         

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -8.25e+03 |
| time/              |           |
|    fps             | 324       |
|    iterations      | 1         |
|    time_elapsed    | 6         |
|    total_timesteps | 729088    |
----------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -8.25e+03    |
| time/                   |              |
|    fps                  | 287          |
|    iterations           | 2            |
|    time_elapsed         | 14           |
|    total_timesteps      | 731136       |
| train/                  |              |
|    approx_kl            | 0.0070867115 |
|    clip_fraction        | 0.0554       |
|    clip_range           | 0.2          |
|    entropy_loss         | -31.5        |
|    explained_variance   | 0.0139       |
|    

Logging to logs\PPO_1D_noIndi_all_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.02e+03  |
|    ep_rew_mean     | -8.25e+03 |
| time/              |           |
|    fps             | 378       |
|    iterations      | 1         |
|    time_elapsed    | 5         |
|    total_timesteps | 749568    |
----------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.02e+03   |
|    ep_rew_mean          | -8.25e+03  |
| time/                   |            |
|    fps                  | 315        |
|    iterations           | 2          |
|    time_elapsed         | 12         |
|    total_timesteps      | 751616     |
| train/                  |            |
|    approx_kl            | 0.26270807 |
|    clip_fraction        | 0.755      |
|    clip_range           | 0.2        |
|    entropy_loss         | -31.6      |
|    explained_variance   | 0.258      |

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -8.25e+03   |
| time/                   |             |
|    fps                  | 318         |
|    iterations           | 2           |
|    time_elapsed         | 12          |
|    total_timesteps      | 772096      |
| train/                  |             |
|    approx_kl            | 0.027103532 |
|    clip_fraction        | 0.103       |
|    clip_range           | 0.2         |
|    entropy_loss         | -31.8       |
|    explained_variance   | 0.262       |
|    learning_rate        | 0.0003      |
|    loss                 | 171         |
|    n_updates            | 3760        |
|    policy_gradient_loss | 0.00078     |
|    std                  | 1.19        |
|    value_loss           | 3.42e+05    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.02e+03      |
|    ep_rew_mean          | -8.3e+03      |
| time/                   |               |
|    fps                  | 281           |
|    iterations           | 2             |
|    time_elapsed         | 14            |
|    total_timesteps      | 792576        |
| train/                  |               |
|    approx_kl            | 3.2324664e-05 |
|    clip_fraction        | 4.88e-05      |
|    clip_range           | 0.2           |
|    entropy_loss         | -31.9         |
|    explained_variance   | -0.0133       |
|    learning_rate        | 0.0003        |
|    loss                 | 2.77e+07      |
|    n_updates            | 3860          |
|    policy_gradient_loss | -0.000484     |
|    std                  | 1.2           |
|    value_loss           | 6.88e+07      |
-------------------------------------------
--------------------------------

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -6.35e+03    |
| time/                   |              |
|    fps                  | 309          |
|    iterations           | 2            |
|    time_elapsed         | 13           |
|    total_timesteps      | 813056       |
| train/                  |              |
|    approx_kl            | 0.0016669142 |
|    clip_fraction        | 0.0132       |
|    clip_range           | 0.2          |
|    entropy_loss         | -31.9        |
|    explained_variance   | 0.0011       |
|    learning_rate        | 0.0003       |
|    loss                 | 2.7e+07      |
|    n_updates            | 3960         |
|    policy_gradient_loss | 0.000402     |
|    std                  | 1.2          |
|    value_loss           | 3.49e+07     |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -6.35e+03    |
| time/                   |              |
|    fps                  | 285          |
|    iterations           | 3            |
|    time_elapsed         | 21           |
|    total_timesteps      | 835584       |
| train/                  |              |
|    approx_kl            | 0.0025873468 |
|    clip_fraction        | 0.0146       |
|    clip_range           | 0.2          |
|    entropy_loss         | -31.9        |
|    explained_variance   | 0.0156       |
|    learning_rate        | 0.0003       |
|    loss                 | 2.19e+07     |
|    n_updates            | 4070         |
|    policy_gradient_loss | -1.88e-06    |
|    std                  | 1.2          |
|    value_loss           | 1.89e+07     |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -6.35e+03   |
| time/                   |             |
|    fps                  | 300         |
|    iterations           | 4           |
|    time_elapsed         | 27          |
|    total_timesteps      | 858112      |
| train/                  |             |
|    approx_kl            | 0.009892803 |
|    clip_fraction        | 0.0716      |
|    clip_range           | 0.2         |
|    entropy_loss         | -31.8       |
|    explained_variance   | 0.0166      |
|    learning_rate        | 0.0003      |
|    loss                 | 1.95e+07    |
|    n_updates            | 4180        |
|    policy_gradient_loss | 0.00807     |
|    std                  | 1.19        |
|    value_loss           | 2.61e+07    |
-----------------------------------------
----------------------------------------
| rollout/                |        

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -5.26e+03    |
| time/                   |              |
|    fps                  | 282          |
|    iterations           | 5            |
|    time_elapsed         | 36           |
|    total_timesteps      | 880640       |
| train/                  |              |
|    approx_kl            | 0.0005604473 |
|    clip_fraction        | 0.00234      |
|    clip_range           | 0.2          |
|    entropy_loss         | -32          |
|    explained_variance   | 0.134        |
|    learning_rate        | 0.0003       |
|    loss                 | 2.5e+03      |
|    n_updates            | 4290         |
|    policy_gradient_loss | -0.00148     |
|    std                  | 1.2          |
|    value_loss           | 1.07e+06     |
------------------------------------------
Logging to logs\PPO_1D_noIndi_all_0
------------------

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -6.95e+03    |
| time/                   |              |
|    fps                  | 299          |
|    iterations           | 5            |
|    time_elapsed         | 34           |
|    total_timesteps      | 901120       |
| train/                  |              |
|    approx_kl            | 0.0024646353 |
|    clip_fraction        | 0.00796      |
|    clip_range           | 0.2          |
|    entropy_loss         | -32          |
|    explained_variance   | 0.00918      |
|    learning_rate        | 0.0003       |
|    loss                 | 2.61e+07     |
|    n_updates            | 4390         |
|    policy_gradient_loss | -0.0035      |
|    std                  | 1.21         |
|    value_loss           | 3.7e+07      |
------------------------------------------
Logging to logs\PPO_1D_noIndi_all_0
------------------

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -8.9e+03    |
| time/                   |             |
|    fps                  | 283         |
|    iterations           | 5           |
|    time_elapsed         | 36          |
|    total_timesteps      | 921600      |
| train/                  |             |
|    approx_kl            | 0.020837804 |
|    clip_fraction        | 0.28        |
|    clip_range           | 0.2         |
|    entropy_loss         | -32         |
|    explained_variance   | 0.105       |
|    learning_rate        | 0.0003      |
|    loss                 | 5.64e+05    |
|    n_updates            | 4490        |
|    policy_gradient_loss | 0.0132      |
|    std                  | 1.21        |
|    value_loss           | 7.6e+05     |
-----------------------------------------
Logging to logs\PPO_1D_noIndi_all_0
---------------------------------
| roll

Logging to logs\PPO_1D_noIndi_all_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.02e+03 |
|    ep_rew_mean     | -8.9e+03 |
| time/              |          |
|    fps             | 325      |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 944128   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.02e+03      |
|    ep_rew_mean          | -8.9e+03      |
| time/                   |               |
|    fps                  | 301           |
|    iterations           | 2             |
|    time_elapsed         | 13            |
|    total_timesteps      | 946176        |
| train/                  |               |
|    approx_kl            | 0.00024149864 |
|    clip_fraction        | 0.00103       |
|    clip_range           | 0.2           |
|    entropy_loss         | -32.4         |
|    exp

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+03    |
|    ep_rew_mean          | -8.9e+03    |
| time/                   |             |
|    fps                  | 306         |
|    iterations           | 2           |
|    time_elapsed         | 13          |
|    total_timesteps      | 966656      |
| train/                  |             |
|    approx_kl            | 0.001783256 |
|    clip_fraction        | 0.00903     |
|    clip_range           | 0.2         |
|    entropy_loss         | -32.5       |
|    explained_variance   | 0.305       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.18e+05    |
|    n_updates            | 4710        |
|    policy_gradient_loss | -0.00394    |
|    std                  | 1.23        |
|    value_loss           | 7.9e+05     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -9.99e+03    |
| time/                   |              |
|    fps                  | 311          |
|    iterations           | 2            |
|    time_elapsed         | 13           |
|    total_timesteps      | 987136       |
| train/                  |              |
|    approx_kl            | 0.0040796455 |
|    clip_fraction        | 0.0126       |
|    clip_range           | 0.2          |
|    entropy_loss         | -32.5        |
|    explained_variance   | 0.308        |
|    learning_rate        | 0.0003       |
|    loss                 | 2.77e+05     |
|    n_updates            | 4810         |
|    policy_gradient_loss | -0.00206     |
|    std                  | 1.24         |
|    value_loss           | 6.05e+05     |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.02e+03     |
|    ep_rew_mean          | -9.99e+03    |
| time/                   |              |
|    fps                  | 291          |
|    iterations           | 3            |
|    time_elapsed         | 21           |
|    total_timesteps      | 1009664      |
| train/                  |              |
|    approx_kl            | 0.0012298103 |
|    clip_fraction        | 0.00337      |
|    clip_range           | 0.2          |
|    entropy_loss         | -32.5        |
|    explained_variance   | 0.0753       |
|    learning_rate        | 0.0003       |
|    loss                 | 2.76e+06     |
|    n_updates            | 4920         |
|    policy_gradient_loss | -0.00337     |
|    std                  | 1.24         |
|    value_loss           | 4.41e+06     |
------------------------------------------
-------------------------------------------
| rollout/

In [10]:
print("--DONE--")

--DONE--
