In [1]:
from nes_py.wrappers import JoypadSpace
import retro
import gym

from matplotlib import pyplot as plt
from gym.wrappers import GrayScaleObservation
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.vec_env import VecFrameStack
import os
from stable_baselines3 import PPO

from stable_baselines3.common.results_plotter import load_results, ts2xy
import numpy as np
from stable_baselines3.common.callbacks import BaseCallback

import cv2
import pandas as pd

CHECK_FREQ_NUMB = 10000
TOTAL_TIMESTEP_NUMB = 2000000
# LEARNING_RATE = 0.00005
LEARNING_RATE = 0.000003
N_STEPS = 2048
GAMMA = 0.99
BATCH_SIZE = 64
N_EPOCHS = 10
DOWN_SAMPLE_RATE = 3
SKIP_NUMB = 4
EPISODE_NUMBERS = 20

CHECKPOINT_DIR = './optuna_training/'
LOG_DIR = './logs/'
REWARD_LOG_FILENAME = 'reward_log.csv'

with open(REWARD_LOG_FILENAME, 'a') as f:
    print('timesteps,reward', file=f)


# 计算奖励函数
class DeadlockEnv(gym.Wrapper):
    def __init__(self, env, threshold=10):
        super().__init__(env)
        self.last_lives = 2
        self.count = 0
        self.threshold = threshold
        self.last_xscroll = 0
        self.score=0

        
    def reset(self, **kwargs):
        self.score=0
        self.last_lives = 2
        self.count = 0
        self.last_xscroll = 0
        return self.env.reset(**kwargs)

    def step(self, action):
        state, reward, done, info = self.env.step(action)
        reward=0
        # self.env.render()
        ifdie=info['die']
        lives = info['lives']
        score =  info['score']
        xpos=info['xpos']
        xscroll=info['xscroll']
        boss_defeated=info['beat_boss']
        # if score>self.score:
        #     reward+=(score-self.score)/100
        # 失去生命减分
        # if lives <self.last_lives:
        #     reward-=3000
        #     self.last_lives=lives
        # 防止卡死
        if xscroll <= self.last_xscroll and xscroll <3072:
            self.count += 1
        else:
            self.count = 0
        if self.count >= 800:
            reward -= 10
            done = True

        if xscroll==self.last_xscroll:
            reward-=0.1
        
        if xpos<=100:
            reward-=0.1

        if ifdie != 1:
            reward-=100
            done=True
        # 3072是管卡最后，136是可以打到敌人的位置
        if xscroll > self.last_xscroll or xscroll ==3072:
            reward+=1
            self.last_xscroll=xscroll
            if xscroll ==3072:
                reward+=136-xpos
            if xpos==136:
                reward+=10

        # if lives ==1:
        #     done=True
        # 通关得分
        if boss_defeated==8:
            reward += 5000
        return state, reward, done, info

# 跳过画面每2真保留一个画面，节省计算时间
class SkipFrame(gym.Wrapper):
    def __init__(self, env, skip):
        super().__init__(env)
        self._skip = skip

    def step(self, action):
        for i in range(self._skip):
            obs, reward, done, info = self.env.step(action)
            if done:
                break
        return obs, reward, done, info

class Downsample(gym.ObservationWrapper):
    def __init__(self, env, ratio):
        """
        Downsample images by a factor of ratio
        """
        gym.ObservationWrapper.__init__(self, env)
        (oldh, oldw, oldc) = env.observation_space.shape
        newshape = (oldh//ratio, oldw//ratio, oldc)
        self.observation_space = gym.spaces.Box(low=0, high=255,
            shape=newshape, dtype=np.uint8)

    def observation(self, frame):
        height, width, _ = self.observation_space.shape
        frame = cv2.resize(frame, (width, height), interpolation=cv2.INTER_AREA)
        if frame.ndim == 2:
            frame = frame[:,:,None]
        return frame

class Discretizer(gym.ActionWrapper):
    """
    Wrap a gym environment and make it use discrete actions.
    Args:
        combos: ordered list of lists of valid button combinations
    """

    def __init__(self, env, combos):
        super().__init__(env)
        assert isinstance(env.action_space, gym.spaces.MultiBinary)
        buttons = env.unwrapped.buttons
        self._decode_discrete_action = []
        for combo in combos:
            arr = np.array([False] * env.action_space.n)
            for button in combo:
                arr[buttons.index(button)] = True
            self._decode_discrete_action.append(arr)

        self.action_space = gym.spaces.Discrete(
            len(self._decode_discrete_action))

    def action(self, act):
        return self._decode_discrete_action[act].copy()


class SonicDiscretizer(Discretizer):
    """
    Use Sonic-specific discrete actions
    based on https://github.com/openai/retro-baselines/blob/master/agents/sonic_util.py
    """
    # B是子弹

    def __init__(self, env):
        super().__init__(env=env, combos=[['RIGHT', 'B'],['RIGHT'],['RIGHT', 'A', 'B'], ['B'],['LEFT', 'A', 'B'],['RIGHT', 'UP', 'B']])


class TrainAndLoggingCallback(BaseCallback):
    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

            total_reward = [0] * EPISODE_NUMBERS
            total_time = [0] * EPISODE_NUMBERS
            best_reward = 0

            for i in range(EPISODE_NUMBERS):
                state = env.reset()  # reset for each new trial
                done = False
                total_reward[i] = 0
                total_time[i] = 0
                while not done and total_time[i] < 10000:
                    action, _ = model.predict(state)
                    state, reward, done, info = env.step(action)
                    total_reward[i] += reward[0]
                    total_time[i] += 1

                if total_reward[i] > best_reward:
                    best_reward = total_reward[i]
                    best_epoch = self.n_calls

                state = env.reset()  # reset for each new trial

            print('time steps:', self.n_calls, '/', TOTAL_TIMESTEP_NUMB)
            print('average reward:', (sum(total_reward) / EPISODE_NUMBERS),
                  'average time:', (sum(total_time) / EPISODE_NUMBERS),
                  'best_reward:', best_reward,
                  )

            with open(REWARD_LOG_FILENAME, 'a') as f:
                print(self.n_calls, ',', sum(total_reward) / EPISODE_NUMBERS, file=f)

        return True

class SaveOnBestTrainingRewardCallback(BaseCallback):
    """
    Callback for saving a model (the check is done every ``check_freq`` steps)
    based on the training reward (in practice, we recommend using ``EvalCallback``).

    :param check_freq: (int)
    :param log_dir: (str) Path to the folder where the model will be saved.
      It must contains the file created by the ``Monitor`` wrapper.
    :param verbose: (int)
    """
    def __init__(self, check_freq, save_model_dir, verbose=1):
        super(SaveOnBestTrainingRewardCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = os.path.join(save_model_dir, 'best_model/')
        self.best_mean_reward = -np.inf

    # def _init_callback(self) -> None:
    def _init_callback(self):
        # Create folder if needed
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    # def _on_step(self) -> bool:
    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            print('self.n_calls: ',self.n_calls)
            model_path1 = os.path.join(self.save_path, 'model_{}'.format(self.n_calls))
            self.model.save(model_path1)
        return True

# ! 设置你要保存模型的位置

# save_model_dir = r'./training/'
# model = PPO("CnnPolicy", env, verbose=1,
#             # tensorboard_log = tensorboard_log,
#             learning_rate = LEARNING_RATE,
#             # device="cuda:0",
#             )
# load model 读取之前训练好的模型
# model.set_parameters("best_model")
# callback1 = SaveOnBestTrainingRewardCallback(10000, save_model_dir)
# model.learn(total_timesteps=1200000,callback=callback1)
# model.save("mario_model")

# Setup model saving callback
callback = TrainAndLoggingCallback(check_freq=CHECK_FREQ_NUMB, save_path=CHECKPOINT_DIR)

In [2]:
import optuna
import copy
from stable_baselines3.common.evaluation import evaluate_policy

In [3]:
def optimize_ppo(trial): 
    return {
        'n_steps':trial.suggest_int('n_steps', 512, 8192),
        'gamma':trial.suggest_loguniform('gamma', 0.8, 0.9999),
        'learning_rate':trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
        'clip_range':trial.suggest_uniform('clip_range', 0.1, 0.4),
        'gae_lambda':trial.suggest_uniform('gae_lambda', 0.8, 0.99)
    }

In [4]:
def optimize_function(trial):
    try:
        env = retro.make(game='Contra-Nes' ,state="Level1")
        # 限制按键
        env = SonicDiscretizer(env)
        # 计算奖励函数
        env = DeadlockEnv(env)
        # 跳过一阵的画面
        env = SkipFrame(env, skip=SKIP_NUMB)
        monitor_dir = r'./monitor_log/'
        os.makedirs(monitor_dir,exist_ok=True)
        env = Monitor(env,monitor_dir)
        env = GrayScaleObservation(env,keep_dim=True)
        # 把画面的画质除以3
        # (224, 240, 1)
        # 变成
        # (74, 80, 1)
        env = Downsample(env, DOWN_SAMPLE_RATE)
        env = DummyVecEnv([lambda: env])
        env = VecFrameStack(env,4,channels_order='last')

        model_params = optimize_ppo(trial) 

        tensorboard_log = r'./tensorboard_log/'
        model = PPO("CnnPolicy", env, verbose=1,tensorboard_log=tensorboard_log,**model_params)
        model.learn(total_timesteps=100000)
    
        mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=5)

        env.close()
    
        save_model_dir = CHECKPOINT_DIR
        os.makedirs(save_model_dir,exist_ok=True)
        SAVE_PATH = os.path.join(save_model_dir, 'trial_{}_best_model'.format(trial.number))
        model.save(SAVE_PATH)
        return mean_reward    

    except Exception as e:
        print(e)
        return -1000
    

In [5]:
study = optuna.create_study(direction='maximize')

study.optimize(optimize_function, n_trials=100)

[32m[I 2023-01-29 00:03:00,906][0m A new study created in memory with name: no-name-60b8db8a-8936-4e73-9722-94336178848d[0m
  'gamma':trial.suggest_loguniform('gamma', 0.8, 0.9999),
  'learning_rate':trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
  'clip_range':trial.suggest_uniform('clip_range', 0.1, 0.4),
  'gae_lambda':trial.suggest_uniform('gae_lambda', 0.8, 0.99)
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6876 and n_envs=1)


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 171      |
|    ep_rew_mean     | -32.3    |
| time/              |          |
|    fps             | 155      |
|    iterations      | 1        |
|    time_elapsed    | 44       |
|    total_timesteps | 6876     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 183         |
|    ep_rew_mean          | -14.5       |
| time/                   |             |
|    fps                  | 97          |
|    iterations           | 2           |
|    time_elapsed         | 140         |
|    total_timesteps      | 13752       |
| train/                  |             |
|    approx_kl            | 0.038990397 |
|    clip_fraction        | 0.27        |
|    clip_range           | 0.256       |
|    entropy_loss    

[32m[I 2023-01-29 00:27:33,418][0m Trial 0 finished with value: -85.3 and parameters: {'n_steps': 6876, 'gamma': 0.8792232977146383, 'learning_rate': 7.075373690819404e-05, 'clip_range': 0.2560519197791735, 'gae_lambda': 0.8563259832121957}. Best is trial 0 with value: -85.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_2


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=5687 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 149      |
|    ep_rew_mean     | -43.1    |
| time/              |          |
|    fps             | 179      |
|    iterations      | 1        |
|    time_elapsed    | 31       |
|    total_timesteps | 5687     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 177         |
|    ep_rew_mean          | -11.7       |
| time/                   |             |
|    fps                  | 102         |
|    iterations           | 2           |
|    time_elapsed         | 111         |
|    total_timesteps      | 11374       |
| train/                  |             |
|    approx_kl            | 0.031020341 |
|    clip_fraction        | 0.256       |
|    clip_range           | 0.227       |
|    entropy_loss         | -1.77       |
|    explained_variance   | 0.000491    |
|    learning_rate        | 9.

[32m[I 2023-01-29 00:51:13,460][0m Trial 1 finished with value: -85.3 and parameters: {'n_steps': 5687, 'gamma': 0.9167480489960909, 'learning_rate': 9.583687078029826e-05, 'clip_range': 0.22662893674725865, 'gae_lambda': 0.8010144581471187}. Best is trial 0 with value: -85.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_3


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7447 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 196      |
|    ep_rew_mean     | -18.7    |
| time/              |          |
|    fps             | 178      |
|    iterations      | 1        |
|    time_elapsed    | 41       |
|    total_timesteps | 7447     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 203        |
|    ep_rew_mean          | 13.3       |
| time/                   |            |
|    fps                  | 102        |
|    iterations           | 2          |
|    time_elapsed         | 145        |
|    total_timesteps      | 14894      |
| train/                  |            |
|    approx_kl            | 0.13848385 |
|    clip_fraction        | 0.329      |
|    clip_range           | 0.375      |
|    entropy_loss         | -1.71      |
|    explained_variance   | 0.0011     |
|    learning_rate        | 8.9e-05    |
|   

[32m[I 2023-01-29 01:15:35,907][0m Trial 2 finished with value: -8.5 and parameters: {'n_steps': 7447, 'gamma': 0.8023828415178148, 'learning_rate': 8.901664351549502e-05, 'clip_range': 0.37478557574609495, 'gae_lambda': 0.8027517473595028}. Best is trial 2 with value: -8.5.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_4


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=8090 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 165      |
|    ep_rew_mean     | -37.4    |
| time/              |          |
|    fps             | 179      |
|    iterations      | 1        |
|    time_elapsed    | 45       |
|    total_timesteps | 8090     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 161         |
|    ep_rew_mean          | -33.1       |
| time/                   |             |
|    fps                  | 102         |
|    iterations           | 2           |
|    time_elapsed         | 157         |
|    total_timesteps      | 16180       |
| train/                  |             |
|    approx_kl            | 0.007151686 |
|    clip_fraction        | 0.117       |
|    clip_range           | 0.141       |
|    entropy_loss         | -1.79       |
|    explained_variance   | 0.00144     |
|    learning_rate        | 1.

[32m[I 2023-01-29 01:40:02,289][0m Trial 3 finished with value: -85.3 and parameters: {'n_steps': 8090, 'gamma': 0.9860185849161054, 'learning_rate': 1.154854334674615e-05, 'clip_range': 0.14141069917564109, 'gae_lambda': 0.8676729900758756}. Best is trial 2 with value: -8.5.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_5


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=4772 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 132      |
|    ep_rew_mean     | -55.4    |
| time/              |          |
|    fps             | 178      |
|    iterations      | 1        |
|    time_elapsed    | 26       |
|    total_timesteps | 4772     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 177         |
|    ep_rew_mean          | -20.1       |
| time/                   |             |
|    fps                  | 102         |
|    iterations           | 2           |
|    time_elapsed         | 93          |
|    total_timesteps      | 9544        |
| train/                  |             |
|    approx_kl            | 0.023356779 |
|    clip_fraction        | 0.216       |
|    clip_range           | 0.22        |
|    entropy_loss         | -1.78       |
|    explained_variance   | -0.00087    |
|    learning_rate        | 8.

[32m[I 2023-01-29 02:03:17,386][0m Trial 4 finished with value: -85.3 and parameters: {'n_steps': 4772, 'gamma': 0.8461032222905556, 'learning_rate': 8.732433577693223e-05, 'clip_range': 0.21958122699381397, 'gae_lambda': 0.922396602807411}. Best is trial 2 with value: -8.5.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_6


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1172 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 139      |
|    ep_rew_mean     | -55.6    |
| time/              |          |
|    fps             | 179      |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 1172     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 138         |
|    ep_rew_mean          | -48.6       |
| time/                   |             |
|    fps                  | 103         |
|    iterations           | 2           |
|    time_elapsed         | 22          |
|    total_timesteps      | 2344        |
| train/                  |             |
|    approx_kl            | 0.003714012 |
|    clip_fraction        | 0.0184      |
|    clip_range           | 0.138       |
|    entropy_loss         | -1.79       |
|    explained_variance   | -0.00184    |
|    learning_rate        | 1.

[32m[I 2023-01-29 02:26:32,489][0m Trial 5 finished with value: -85.3 and parameters: {'n_steps': 1172, 'gamma': 0.8672627303257366, 'learning_rate': 1.873913643611372e-05, 'clip_range': 0.13758742836225205, 'gae_lambda': 0.9885873887502898}. Best is trial 2 with value: -8.5.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_7


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=3933 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 177      |
|    ep_rew_mean     | -31.7    |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 21       |
|    total_timesteps | 3933     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 183         |
|    ep_rew_mean          | -19.8       |
| time/                   |             |
|    fps                  | 103         |
|    iterations           | 2           |
|    time_elapsed         | 75          |
|    total_timesteps      | 7866        |
| train/                  |             |
|    approx_kl            | 0.005189847 |
|    clip_fraction        | 0.147       |
|    clip_range           | 0.159       |
|    entropy_loss         | -1.79       |
|    explained_variance   | 0.000796    |
|    learning_rate        | 3.

[32m[I 2023-01-29 02:50:06,936][0m Trial 6 finished with value: -85.3 and parameters: {'n_steps': 3933, 'gamma': 0.9359775062933064, 'learning_rate': 3.568883894504056e-05, 'clip_range': 0.15902923789041484, 'gae_lambda': 0.9583662832195947}. Best is trial 2 with value: -8.5.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_8


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1680 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 157      |
|    ep_rew_mean     | -36.3    |
| time/              |          |
|    fps             | 176      |
|    iterations      | 1        |
|    time_elapsed    | 9        |
|    total_timesteps | 1680     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 227         |
|    ep_rew_mean          | 11.9        |
| time/                   |             |
|    fps                  | 102         |
|    iterations           | 2           |
|    time_elapsed         | 32          |
|    total_timesteps      | 3360        |
| train/                  |             |
|    approx_kl            | 0.023156213 |
|    clip_fraction        | 0.247       |
|    clip_range           | 0.228       |
|    entropy_loss         | -1.77       |
|    explained_variance   | -0.00109    |
|    learning_rate        | 6.

[32m[I 2023-01-29 03:13:27,559][0m Trial 7 finished with value: -85.3 and parameters: {'n_steps': 1680, 'gamma': 0.8259453677112606, 'learning_rate': 6.795167629528398e-05, 'clip_range': 0.2275873020489266, 'gae_lambda': 0.9798728278069007}. Best is trial 2 with value: -8.5.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_9


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6506 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 164      |
|    ep_rew_mean     | -36.5    |
| time/              |          |
|    fps             | 170      |
|    iterations      | 1        |
|    time_elapsed    | 38       |
|    total_timesteps | 6506     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 187         |
|    ep_rew_mean          | -10.9       |
| time/                   |             |
|    fps                  | 101         |
|    iterations           | 2           |
|    time_elapsed         | 128         |
|    total_timesteps      | 13012       |
| train/                  |             |
|    approx_kl            | 0.027573207 |
|    clip_fraction        | 0.184       |
|    clip_range           | 0.253       |
|    entropy_loss         | -1.77       |
|    explained_variance   | 0.00253     |
|    learning_rate        | 4.

[32m[I 2023-01-29 03:37:35,003][0m Trial 8 finished with value: -85.3 and parameters: {'n_steps': 6506, 'gamma': 0.8974438073307246, 'learning_rate': 4.467552269388268e-05, 'clip_range': 0.25286572697838383, 'gae_lambda': 0.8843405639830324}. Best is trial 2 with value: -8.5.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_10


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6444 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 157      |
|    ep_rew_mean     | -39.6    |
| time/              |          |
|    fps             | 179      |
|    iterations      | 1        |
|    time_elapsed    | 35       |
|    total_timesteps | 6444     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 198        |
|    ep_rew_mean          | -1.08      |
| time/                   |            |
|    fps                  | 103        |
|    iterations           | 2          |
|    time_elapsed         | 125        |
|    total_timesteps      | 12888      |
| train/                  |            |
|    approx_kl            | 0.02982936 |
|    clip_fraction        | 0.16       |
|    clip_range           | 0.274      |
|    entropy_loss         | -1.77      |
|    explained_variance   | -5.78e-05  |
|    learning_rate        | 3.16e-05   |
|   

[32m[I 2023-01-29 04:01:25,386][0m Trial 9 finished with value: -85.3 and parameters: {'n_steps': 6444, 'gamma': 0.8831462256196418, 'learning_rate': 3.159054660953978e-05, 'clip_range': 0.2738459757832778, 'gae_lambda': 0.9149290237751738}. Best is trial 2 with value: -8.5.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_11


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=3629 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 207      |
|    ep_rew_mean     | -4.75    |
| time/              |          |
|    fps             | 181      |
|    iterations      | 1        |
|    time_elapsed    | 19       |
|    total_timesteps | 3629     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 240        |
|    ep_rew_mean          | 49.6       |
| time/                   |            |
|    fps                  | 103        |
|    iterations           | 2          |
|    time_elapsed         | 70         |
|    total_timesteps      | 7258       |
| train/                  |            |
|    approx_kl            | 0.15986677 |
|    clip_fraction        | 0.26       |
|    clip_range           | 0.395      |
|    entropy_loss         | -1.72      |
|    explained_variance   | -0.00147   |
|    learning_rate        | 5.13e-05   |
|   

[32m[I 2023-01-29 04:24:51,022][0m Trial 10 finished with value: -85.3 and parameters: {'n_steps': 3629, 'gamma': 0.8024762607583587, 'learning_rate': 5.1334531448971846e-05, 'clip_range': 0.3949211145886803, 'gae_lambda': 0.8052474395115056}. Best is trial 2 with value: -8.5.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_12


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=8098 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 169      |
|    ep_rew_mean     | -32.1    |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 44       |
|    total_timesteps | 8098     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 207        |
|    ep_rew_mean          | 20.4       |
| time/                   |            |
|    fps                  | 103        |
|    iterations           | 2          |
|    time_elapsed         | 157        |
|    total_timesteps      | 16196      |
| train/                  |            |
|    approx_kl            | 0.10943543 |
|    clip_fraction        | 0.247      |
|    clip_range           | 0.377      |
|    entropy_loss         | -1.73      |
|    explained_variance   | -0.000364  |
|    learning_rate        | 6.22e-05   |
|   

[32m[I 2023-01-29 04:49:15,341][0m Trial 11 finished with value: -73.3 and parameters: {'n_steps': 8098, 'gamma': 0.8005076100122641, 'learning_rate': 6.219733593573298e-05, 'clip_range': 0.3769080130654111, 'gae_lambda': 0.8410619471576747}. Best is trial 2 with value: -8.5.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_13
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 206      |
|    ep_rew_mean     | -14      |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 45       |
|    total_timesteps | 8128     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 241        |
|    ep_rew_mean          | 37.2       |
| time/                   |            |
|    fps                  | 103        |
|    iterations           | 2          |
|    time_elapsed         | 157        |
|    total_timesteps      | 16256      |
| train/                  |            |
|    approx_kl            | 0.10971809 |
|    clip_fraction        | 0.301      |
|    clip_range           | 0.398      |
|    entropy_loss         | -1.71

[32m[I 2023-01-29 05:13:40,587][0m Trial 12 finished with value: -85.3 and parameters: {'n_steps': 8128, 'gamma': 0.8027677475397231, 'learning_rate': 5.952867847014933e-05, 'clip_range': 0.3976732415112482, 'gae_lambda': 0.8394859614687303}. Best is trial 2 with value: -8.5.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_14


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7368 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 156      |
|    ep_rew_mean     | -41.6    |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 40       |
|    total_timesteps | 7368     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 193         |
|    ep_rew_mean          | -3.86       |
| time/                   |             |
|    fps                  | 102         |
|    iterations           | 2           |
|    time_elapsed         | 143         |
|    total_timesteps      | 14736       |
| train/                  |             |
|    approx_kl            | 0.050668597 |
|    clip_fraction        | 0.218       |
|    clip_range           | 0.34        |
|    entropy_loss         | -1.75       |
|    explained_variance   | 0.000639    |
|    learning_rate        | 2.

[32m[I 2023-01-29 05:37:38,864][0m Trial 13 finished with value: -73.3 and parameters: {'n_steps': 7368, 'gamma': 0.8350890956136825, 'learning_rate': 2.6956746938286525e-05, 'clip_range': 0.34020869606639637, 'gae_lambda': 0.8304814876525314}. Best is trial 2 with value: -8.5.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_15


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=5429 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 147      |
|    ep_rew_mean     | -44.5    |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 30       |
|    total_timesteps | 5429     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 159        |
|    ep_rew_mean          | -19.9      |
| time/                   |            |
|    fps                  | 103        |
|    iterations           | 2          |
|    time_elapsed         | 105        |
|    total_timesteps      | 10858      |
| train/                  |            |
|    approx_kl            | 0.07546491 |
|    clip_fraction        | 0.275      |
|    clip_range           | 0.343      |
|    entropy_loss         | -1.74      |
|    explained_variance   | -0.000264  |
|    learning_rate        | 9.92e-05   |
|   

[32m[I 2023-01-29 06:01:29,416][0m Trial 14 finished with value: -73.3 and parameters: {'n_steps': 5429, 'gamma': 0.8118113389316147, 'learning_rate': 9.918923125258207e-05, 'clip_range': 0.34265255457325927, 'gae_lambda': 0.8281154161213321}. Best is trial 2 with value: -8.5.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_16


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=2616 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 154      |
|    ep_rew_mean     | -41.3    |
| time/              |          |
|    fps             | 178      |
|    iterations      | 1        |
|    time_elapsed    | 14       |
|    total_timesteps | 2616     |
---------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 180       |
|    ep_rew_mean          | -8.44     |
| time/                   |           |
|    fps                  | 103       |
|    iterations           | 2         |
|    time_elapsed         | 50        |
|    total_timesteps      | 5232      |
| train/                  |           |
|    approx_kl            | 0.0489609 |
|    clip_fraction        | 0.218     |
|    clip_range           | 0.325     |
|    entropy_loss         | -1.76     |
|    explained_variance   | -0.000975 |
|    learning_rate        | 4.2e-05   |
|    loss           

[32m[I 2023-01-29 06:24:56,044][0m Trial 15 finished with value: -85.3 and parameters: {'n_steps': 2616, 'gamma': 0.8516963100877144, 'learning_rate': 4.199704906936982e-05, 'clip_range': 0.32496319720294425, 'gae_lambda': 0.8161528759421818}. Best is trial 2 with value: -8.5.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_17


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7310 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 174      |
|    ep_rew_mean     | -30.9    |
| time/              |          |
|    fps             | 178      |
|    iterations      | 1        |
|    time_elapsed    | 40       |
|    total_timesteps | 7310     |
---------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 211       |
|    ep_rew_mean          | 14.1      |
| time/                   |           |
|    fps                  | 102       |
|    iterations           | 2         |
|    time_elapsed         | 142       |
|    total_timesteps      | 14620     |
| train/                  |           |
|    approx_kl            | 0.0735472 |
|    clip_fraction        | 0.32      |
|    clip_range           | 0.303     |
|    entropy_loss         | -1.75     |
|    explained_variance   | -7.75e-05 |
|    learning_rate        | 7.27e-05  |
|    loss           

[32m[I 2023-01-29 06:48:41,996][0m Trial 16 finished with value: -57.3 and parameters: {'n_steps': 7310, 'gamma': 0.8220279581250991, 'learning_rate': 7.269883306773224e-05, 'clip_range': 0.3032539710985727, 'gae_lambda': 0.8560061656258512}. Best is trial 2 with value: -8.5.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_18


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=5488 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 157      |
|    ep_rew_mean     | -39.4    |
| time/              |          |
|    fps             | 179      |
|    iterations      | 1        |
|    time_elapsed    | 30       |
|    total_timesteps | 5488     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 188        |
|    ep_rew_mean          | -10.5      |
| time/                   |            |
|    fps                  | 102        |
|    iterations           | 2          |
|    time_elapsed         | 106        |
|    total_timesteps      | 10976      |
| train/                  |            |
|    approx_kl            | 0.03978906 |
|    clip_fraction        | 0.199      |
|    clip_range           | 0.299      |
|    entropy_loss         | -1.76      |
|    explained_variance   | 0.00221    |
|    learning_rate        | 2.09e-05   |
|   

[32m[I 2023-01-29 07:12:51,406][0m Trial 17 finished with value: -85.3 and parameters: {'n_steps': 5488, 'gamma': 0.8301920191985214, 'learning_rate': 2.0854217931907947e-05, 'clip_range': 0.29943587497724766, 'gae_lambda': 0.8896322303455131}. Best is trial 2 with value: -8.5.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_19


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7148 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 190      |
|    ep_rew_mean     | -25.9    |
| time/              |          |
|    fps             | 179      |
|    iterations      | 1        |
|    time_elapsed    | 39       |
|    total_timesteps | 7148     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 188        |
|    ep_rew_mean          | -11.4      |
| time/                   |            |
|    fps                  | 102        |
|    iterations           | 2          |
|    time_elapsed         | 138        |
|    total_timesteps      | 14296      |
| train/                  |            |
|    approx_kl            | 0.03900496 |
|    clip_fraction        | 0.202      |
|    clip_range           | 0.301      |
|    entropy_loss         | -1.76      |
|    explained_variance   | -0.000774  |
|    learning_rate        | 7.81e-05   |
|   

[32m[I 2023-01-29 07:36:03,235][0m Trial 18 finished with value: -85.3 and parameters: {'n_steps': 7148, 'gamma': 0.9697868600711728, 'learning_rate': 7.808816810524925e-05, 'clip_range': 0.30139072635960823, 'gae_lambda': 0.8612858754071602}. Best is trial 2 with value: -8.5.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_20


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=3030 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 162      |
|    ep_rew_mean     | -36.4    |
| time/              |          |
|    fps             | 179      |
|    iterations      | 1        |
|    time_elapsed    | 16       |
|    total_timesteps | 3030     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 224        |
|    ep_rew_mean          | 16.1       |
| time/                   |            |
|    fps                  | 102        |
|    iterations           | 2          |
|    time_elapsed         | 58         |
|    total_timesteps      | 6060       |
| train/                  |            |
|    approx_kl            | 0.05964318 |
|    clip_fraction        | 0.224      |
|    clip_range           | 0.359      |
|    entropy_loss         | -1.75      |
|    explained_variance   | 0.0011     |
|    learning_rate        | 4.96e-05   |
|   

[32m[I 2023-01-29 07:59:49,910][0m Trial 19 finished with value: -85.3 and parameters: {'n_steps': 3030, 'gamma': 0.8221811044489442, 'learning_rate': 4.956667612161536e-05, 'clip_range': 0.35920111283637984, 'gae_lambda': 0.919601804874671}. Best is trial 2 with value: -8.5.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_21


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=4770 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 193      |
|    ep_rew_mean     | -18.7    |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 26       |
|    total_timesteps | 4770     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 230        |
|    ep_rew_mean          | 25.5       |
| time/                   |            |
|    fps                  | 103        |
|    iterations           | 2          |
|    time_elapsed         | 92         |
|    total_timesteps      | 9540       |
| train/                  |            |
|    approx_kl            | 0.05644947 |
|    clip_fraction        | 0.275      |
|    clip_range           | 0.304      |
|    entropy_loss         | -1.75      |
|    explained_variance   | 0.00108    |
|    learning_rate        | 7.8e-05    |
|   

[32m[I 2023-01-29 08:23:05,622][0m Trial 20 finished with value: -85.3 and parameters: {'n_steps': 4770, 'gamma': 0.8544480394108854, 'learning_rate': 7.796081668896809e-05, 'clip_range': 0.3036485871807275, 'gae_lambda': 0.8497181976217134}. Best is trial 2 with value: -8.5.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_22


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6038 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 132      |
|    ep_rew_mean     | -50.2    |
| time/              |          |
|    fps             | 179      |
|    iterations      | 1        |
|    time_elapsed    | 33       |
|    total_timesteps | 6038     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 186        |
|    ep_rew_mean          | -0.545     |
| time/                   |            |
|    fps                  | 103        |
|    iterations           | 2          |
|    time_elapsed         | 117        |
|    total_timesteps      | 12076      |
| train/                  |            |
|    approx_kl            | 0.06797142 |
|    clip_fraction        | 0.221      |
|    clip_range           | 0.353      |
|    entropy_loss         | -1.75      |
|    explained_variance   | 0.00201    |
|    learning_rate        | 9.92e-05   |
|   

[32m[I 2023-01-29 08:47:01,012][0m Trial 21 finished with value: -50.0 and parameters: {'n_steps': 6038, 'gamma': 0.8170735344146679, 'learning_rate': 9.917773440314956e-05, 'clip_range': 0.3529885389629032, 'gae_lambda': 0.8231450138544958}. Best is trial 2 with value: -8.5.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_23


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6228 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 163      |
|    ep_rew_mean     | -33.9    |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 34       |
|    total_timesteps | 6228     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 189         |
|    ep_rew_mean          | 2.38        |
| time/                   |             |
|    fps                  | 103         |
|    iterations           | 2           |
|    time_elapsed         | 120         |
|    total_timesteps      | 12456       |
| train/                  |             |
|    approx_kl            | 0.068706505 |
|    clip_fraction        | 0.273       |
|    clip_range           | 0.37        |
|    entropy_loss         | -1.74       |
|    explained_variance   | -0.000262   |
|    learning_rate        | 9.

[32m[I 2023-01-29 09:11:37,110][0m Trial 22 finished with value: -5.3 and parameters: {'n_steps': 6228, 'gamma': 0.8222907790381564, 'learning_rate': 9.961387684833616e-05, 'clip_range': 0.37045103781596944, 'gae_lambda': 0.8198271715333595}. Best is trial 22 with value: -5.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_24


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=5976 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 160      |
|    ep_rew_mean     | -39      |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 33       |
|    total_timesteps | 5976     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 193        |
|    ep_rew_mean          | -2.97      |
| time/                   |            |
|    fps                  | 103        |
|    iterations           | 2          |
|    time_elapsed         | 115        |
|    total_timesteps      | 11952      |
| train/                  |            |
|    approx_kl            | 0.07089074 |
|    clip_fraction        | 0.238      |
|    clip_range           | 0.366      |
|    entropy_loss         | -1.74      |
|    explained_variance   | 0.00189    |
|    learning_rate        | 9.43e-05   |
|   

[32m[I 2023-01-29 09:35:22,783][0m Trial 23 finished with value: 209.3 and parameters: {'n_steps': 5976, 'gamma': 0.8431945080247621, 'learning_rate': 9.434717363652453e-05, 'clip_range': 0.366043287552883, 'gae_lambda': 0.8177999838257695}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_25


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=4934 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 150      |
|    ep_rew_mean     | -39.2    |
| time/              |          |
|    fps             | 178      |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 4934     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 198         |
|    ep_rew_mean          | 5.4         |
| time/                   |             |
|    fps                  | 102         |
|    iterations           | 2           |
|    time_elapsed         | 96          |
|    total_timesteps      | 9868        |
| train/                  |             |
|    approx_kl            | 0.069957934 |
|    clip_fraction        | 0.199       |
|    clip_range           | 0.377       |
|    entropy_loss         | -1.75       |
|    explained_variance   | 0.00053     |
|    learning_rate        | 5.

[32m[I 2023-01-29 09:59:33,735][0m Trial 24 finished with value: -21.3 and parameters: {'n_steps': 4934, 'gamma': 0.8421808878014533, 'learning_rate': 5.66316707762741e-05, 'clip_range': 0.37683997755210213, 'gae_lambda': 0.8103321761454614}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_26


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6469 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 154      |
|    ep_rew_mean     | -38.8    |
| time/              |          |
|    fps             | 179      |
|    iterations      | 1        |
|    time_elapsed    | 36       |
|    total_timesteps | 6469     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 170         |
|    ep_rew_mean          | -22.3       |
| time/                   |             |
|    fps                  | 102         |
|    iterations           | 2           |
|    time_elapsed         | 125         |
|    total_timesteps      | 12938       |
| train/                  |             |
|    approx_kl            | 0.013506616 |
|    clip_fraction        | 0.313       |
|    clip_range           | 0.1         |
|    entropy_loss         | -1.78       |
|    explained_variance   | 0.000573    |
|    learning_rate        | 8.

[32m[I 2023-01-29 10:23:41,405][0m Trial 25 finished with value: 42.7 and parameters: {'n_steps': 6469, 'gamma': 0.8626936070749865, 'learning_rate': 8.545820350505095e-05, 'clip_range': 0.10021489386221244, 'gae_lambda': 0.8259007633563743}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_27


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6243 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 201      |
|    ep_rew_mean     | -16.9    |
| time/              |          |
|    fps             | 178      |
|    iterations      | 1        |
|    time_elapsed    | 34       |
|    total_timesteps | 6243     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 217        |
|    ep_rew_mean          | 2.33       |
| time/                   |            |
|    fps                  | 102        |
|    iterations           | 2          |
|    time_elapsed         | 121        |
|    total_timesteps      | 12486      |
| train/                  |            |
|    approx_kl            | 0.01266773 |
|    clip_fraction        | 0.126      |
|    clip_range           | 0.182      |
|    entropy_loss         | -1.79      |
|    explained_variance   | 0.000719   |
|    learning_rate        | 1.06e-05   |
|   

[32m[I 2023-01-29 10:48:22,324][0m Trial 26 finished with value: -85.3 and parameters: {'n_steps': 6243, 'gamma': 0.8620098581760791, 'learning_rate': 1.0562871130481408e-05, 'clip_range': 0.18173951483854284, 'gae_lambda': 0.8697494686520636}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_28


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=5726 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 182      |
|    ep_rew_mean     | -26.2    |
| time/              |          |
|    fps             | 178      |
|    iterations      | 1        |
|    time_elapsed    | 32       |
|    total_timesteps | 5726     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 206        |
|    ep_rew_mean          | 3.45       |
| time/                   |            |
|    fps                  | 102        |
|    iterations           | 2          |
|    time_elapsed         | 111        |
|    total_timesteps      | 11452      |
| train/                  |            |
|    approx_kl            | 0.03131706 |
|    clip_fraction        | 0.306      |
|    clip_range           | 0.191      |
|    entropy_loss         | -1.77      |
|    explained_variance   | -0.000751  |
|    learning_rate        | 8.14e-05   |
|   

[32m[I 2023-01-29 11:12:18,705][0m Trial 27 finished with value: -85.3 and parameters: {'n_steps': 5726, 'gamma': 0.8999973457596745, 'learning_rate': 8.141270756602559e-05, 'clip_range': 0.19070847923534462, 'gae_lambda': 0.8342081149421605}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_29


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6693 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 148      |
|    ep_rew_mean     | -45      |
| time/              |          |
|    fps             | 179      |
|    iterations      | 1        |
|    time_elapsed    | 37       |
|    total_timesteps | 6693     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 192        |
|    ep_rew_mean          | 0.39       |
| time/                   |            |
|    fps                  | 102        |
|    iterations           | 2          |
|    time_elapsed         | 130        |
|    total_timesteps      | 13386      |
| train/                  |            |
|    approx_kl            | 0.06932638 |
|    clip_fraction        | 0.234      |
|    clip_range           | 0.326      |
|    entropy_loss         | -1.75      |
|    explained_variance   | 9.96e-05   |
|    learning_rate        | 6.91e-05   |
|   

[32m[I 2023-01-29 11:35:38,223][0m Trial 28 finished with value: -85.3 and parameters: {'n_steps': 6693, 'gamma': 0.8713808975828464, 'learning_rate': 6.912347623112998e-05, 'clip_range': 0.32584754094048846, 'gae_lambda': 0.8200230134207749}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_30


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=5100 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 124      |
|    ep_rew_mean     | -56      |
| time/              |          |
|    fps             | 179      |
|    iterations      | 1        |
|    time_elapsed    | 28       |
|    total_timesteps | 5100     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 147          |
|    ep_rew_mean          | -40.4        |
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 2            |
|    time_elapsed         | 99           |
|    total_timesteps      | 10200        |
| train/                  |              |
|    approx_kl            | 0.0040367255 |
|    clip_fraction        | 0.145        |
|    clip_range           | 0.101        |
|    entropy_loss         | -1.79        |
|    explained_variance   | 3.68e-05     |
|    learning_r

[32m[I 2023-01-29 11:59:25,366][0m Trial 29 finished with value: -85.3 and parameters: {'n_steps': 5100, 'gamma': 0.8826572679980366, 'learning_rate': 1.3681998148588302e-05, 'clip_range': 0.1013037313978136, 'gae_lambda': 0.8769996827606783}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_31


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=4236 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 148      |
|    ep_rew_mean     | -48.2    |
| time/              |          |
|    fps             | 173      |
|    iterations      | 1        |
|    time_elapsed    | 24       |
|    total_timesteps | 4236     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 180         |
|    ep_rew_mean          | -13.6       |
| time/                   |             |
|    fps                  | 102         |
|    iterations           | 2           |
|    time_elapsed         | 82          |
|    total_timesteps      | 8472        |
| train/                  |             |
|    approx_kl            | 0.030535609 |
|    clip_fraction        | 0.214       |
|    clip_range           | 0.274       |
|    entropy_loss         | -1.77       |
|    explained_variance   | 0.000423    |
|    learning_rate        | 6.

[32m[I 2023-01-29 12:24:51,414][0m Trial 30 finished with value: -93.8 and parameters: {'n_steps': 4236, 'gamma': 0.8392592457578548, 'learning_rate': 6.33618038287005e-05, 'clip_range': 0.27441447760506565, 'gae_lambda': 0.9017244776947816}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_32


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7522 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 175      |
|    ep_rew_mean     | -32.2    |
| time/              |          |
|    fps             | 157      |
|    iterations      | 1        |
|    time_elapsed    | 47       |
|    total_timesteps | 7522     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 225        |
|    ep_rew_mean          | 26.4       |
| time/                   |            |
|    fps                  | 92         |
|    iterations           | 2          |
|    time_elapsed         | 162        |
|    total_timesteps      | 15044      |
| train/                  |            |
|    approx_kl            | 0.09829633 |
|    clip_fraction        | 0.309      |
|    clip_range           | 0.37       |
|    entropy_loss         | -1.72      |
|    explained_variance   | -0.00184   |
|    learning_rate        | 8.72e-05   |
|   

[32m[I 2023-01-29 12:50:54,429][0m Trial 31 finished with value: -85.3 and parameters: {'n_steps': 7522, 'gamma': 0.858285625690852, 'learning_rate': 8.717113274109326e-05, 'clip_range': 0.3697302399226494, 'gae_lambda': 0.8123005044891111}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_33


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6743 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 145      |
|    ep_rew_mean     | -46.5    |
| time/              |          |
|    fps             | 173      |
|    iterations      | 1        |
|    time_elapsed    | 38       |
|    total_timesteps | 6743     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 181         |
|    ep_rew_mean          | -18.2       |
| time/                   |             |
|    fps                  | 100         |
|    iterations           | 2           |
|    time_elapsed         | 134         |
|    total_timesteps      | 13486       |
| train/                  |             |
|    approx_kl            | 0.013134562 |
|    clip_fraction        | 0.301       |
|    clip_range           | 0.104       |
|    entropy_loss         | -1.78       |
|    explained_variance   | -0.000349   |
|    learning_rate        | 8.

[32m[I 2023-01-29 13:14:52,173][0m Trial 32 finished with value: -85.3 and parameters: {'n_steps': 6743, 'gamma': 0.8158946814400295, 'learning_rate': 8.925916064412522e-05, 'clip_range': 0.10412962859969249, 'gae_lambda': 0.8000520389957909}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_34


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=5981 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 156      |
|    ep_rew_mean     | -39.7    |
| time/              |          |
|    fps             | 173      |
|    iterations      | 1        |
|    time_elapsed    | 34       |
|    total_timesteps | 5981     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 189         |
|    ep_rew_mean          | 0.695       |
| time/                   |             |
|    fps                  | 100         |
|    iterations           | 2           |
|    time_elapsed         | 119         |
|    total_timesteps      | 11962       |
| train/                  |             |
|    approx_kl            | 0.069154546 |
|    clip_fraction        | 0.267       |
|    clip_range           | 0.326       |
|    entropy_loss         | -1.74       |
|    explained_variance   | -0.000267   |
|    learning_rate        | 7.

[32m[I 2023-01-29 13:38:55,983][0m Trial 33 finished with value: -85.3 and parameters: {'n_steps': 5981, 'gamma': 0.8340364864199847, 'learning_rate': 7.792553046998785e-05, 'clip_range': 0.3260043504929128, 'gae_lambda': 0.842873667856973}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_35


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7542 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 140      |
|    ep_rew_mean     | -47.1    |
| time/              |          |
|    fps             | 174      |
|    iterations      | 1        |
|    time_elapsed    | 43       |
|    total_timesteps | 7542     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 165         |
|    ep_rew_mean          | -13.1       |
| time/                   |             |
|    fps                  | 100         |
|    iterations           | 2           |
|    time_elapsed         | 150         |
|    total_timesteps      | 15084       |
| train/                  |             |
|    approx_kl            | 0.064794324 |
|    clip_fraction        | 0.23        |
|    clip_range           | 0.388       |
|    entropy_loss         | -1.74       |
|    explained_variance   | 0.000178    |
|    learning_rate        | 9.

[32m[I 2023-01-29 14:03:56,933][0m Trial 34 finished with value: -74.5 and parameters: {'n_steps': 7542, 'gamma': 0.9137332130043113, 'learning_rate': 9.824926147662566e-05, 'clip_range': 0.3881428094042587, 'gae_lambda': 0.8201616379251371}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_36


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6956 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 147      |
|    ep_rew_mean     | -46.9    |
| time/              |          |
|    fps             | 172      |
|    iterations      | 1        |
|    time_elapsed    | 40       |
|    total_timesteps | 6956     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 168        |
|    ep_rew_mean          | -15.8      |
| time/                   |            |
|    fps                  | 99         |
|    iterations           | 2          |
|    time_elapsed         | 139        |
|    total_timesteps      | 13912      |
| train/                  |            |
|    approx_kl            | 0.08969064 |
|    clip_fraction        | 0.257      |
|    clip_range           | 0.364      |
|    entropy_loss         | -1.74      |
|    explained_variance   | 0.000622   |
|    learning_rate        | 8.65e-05   |
|   

[32m[I 2023-01-29 14:28:50,098][0m Trial 35 finished with value: 41.7 and parameters: {'n_steps': 6956, 'gamma': 0.8472164889491808, 'learning_rate': 8.654184848589547e-05, 'clip_range': 0.36380692740480564, 'gae_lambda': 0.8007636680993787}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_37


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=5909 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 150      |
|    ep_rew_mean     | -46.5    |
| time/              |          |
|    fps             | 172      |
|    iterations      | 1        |
|    time_elapsed    | 34       |
|    total_timesteps | 5909     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 207        |
|    ep_rew_mean          | 2.44       |
| time/                   |            |
|    fps                  | 100        |
|    iterations           | 2          |
|    time_elapsed         | 118        |
|    total_timesteps      | 11818      |
| train/                  |            |
|    approx_kl            | 0.02862727 |
|    clip_fraction        | 0.216      |
|    clip_range           | 0.277      |
|    entropy_loss         | -1.77      |
|    explained_variance   | 0.000392   |
|    learning_rate        | 7.08e-05   |
|   

[32m[I 2023-01-29 14:52:10,393][0m Trial 36 finished with value: -85.3 and parameters: {'n_steps': 5909, 'gamma': 0.8453076420395228, 'learning_rate': 7.076240938715736e-05, 'clip_range': 0.27744128233841664, 'gae_lambda': 0.8476336081306577}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_38


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6827 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 182      |
|    ep_rew_mean     | -25.9    |
| time/              |          |
|    fps             | 183      |
|    iterations      | 1        |
|    time_elapsed    | 37       |
|    total_timesteps | 6827     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 202        |
|    ep_rew_mean          | 12.7       |
| time/                   |            |
|    fps                  | 104        |
|    iterations           | 2          |
|    time_elapsed         | 130        |
|    total_timesteps      | 13654      |
| train/                  |            |
|    approx_kl            | 0.10865987 |
|    clip_fraction        | 0.301      |
|    clip_range           | 0.359      |
|    entropy_loss         | -1.73      |
|    explained_variance   | 0.000337   |
|    learning_rate        | 8.79e-05   |
|   

[32m[I 2023-01-29 15:15:34,144][0m Trial 37 finished with value: -73.3 and parameters: {'n_steps': 6827, 'gamma': 0.8752839351626279, 'learning_rate': 8.791682992340953e-05, 'clip_range': 0.35899454633111405, 'gae_lambda': 0.8006257094763293}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_39


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=5257 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 202      |
|    ep_rew_mean     | -14.1    |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 29       |
|    total_timesteps | 5257     |
---------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 219       |
|    ep_rew_mean          | 17.8      |
| time/                   |           |
|    fps                  | 104       |
|    iterations           | 2         |
|    time_elapsed         | 100       |
|    total_timesteps      | 10514     |
| train/                  |           |
|    approx_kl            | 0.034447  |
|    clip_fraction        | 0.269     |
|    clip_range           | 0.21      |
|    entropy_loss         | -1.77     |
|    explained_variance   | -0.000696 |
|    learning_rate        | 5.39e-05  |
|    loss           

[32m[I 2023-01-29 15:39:26,553][0m Trial 38 finished with value: -73.3 and parameters: {'n_steps': 5257, 'gamma': 0.8640278389269798, 'learning_rate': 5.394090177055641e-05, 'clip_range': 0.20951633575482304, 'gae_lambda': 0.8296877882504523}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_40


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7724 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 160      |
|    ep_rew_mean     | -39.5    |
| time/              |          |
|    fps             | 181      |
|    iterations      | 1        |
|    time_elapsed    | 42       |
|    total_timesteps | 7724     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 204         |
|    ep_rew_mean          | 6.26        |
| time/                   |             |
|    fps                  | 104         |
|    iterations           | 2           |
|    time_elapsed         | 148         |
|    total_timesteps      | 15448       |
| train/                  |             |
|    approx_kl            | 0.034749303 |
|    clip_fraction        | 0.233       |
|    clip_range           | 0.239       |
|    entropy_loss         | -1.77       |
|    explained_variance   | 0.000974    |
|    learning_rate        | 4.

[32m[I 2023-01-29 16:02:25,630][0m Trial 39 finished with value: -85.3 and parameters: {'n_steps': 7724, 'gamma': 0.8452228364896137, 'learning_rate': 4.539130791241529e-05, 'clip_range': 0.23892154353608996, 'gae_lambda': 0.8106286437086913}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_41


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6990 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 165      |
|    ep_rew_mean     | -36.9    |
| time/              |          |
|    fps             | 182      |
|    iterations      | 1        |
|    time_elapsed    | 38       |
|    total_timesteps | 6990     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 182          |
|    ep_rew_mean          | -21.5        |
| time/                   |              |
|    fps                  | 104          |
|    iterations           | 2            |
|    time_elapsed         | 133          |
|    total_timesteps      | 13980        |
| train/                  |              |
|    approx_kl            | 0.0116393445 |
|    clip_fraction        | 0.255        |
|    clip_range           | 0.127        |
|    entropy_loss         | -1.79        |
|    explained_variance   | 0.00092      |
|    learning_r

[32m[I 2023-01-29 16:26:20,638][0m Trial 40 finished with value: -85.3 and parameters: {'n_steps': 6990, 'gamma': 0.8926849372823275, 'learning_rate': 8.652060046148538e-05, 'clip_range': 0.12669309325245598, 'gae_lambda': 0.9581755508326174}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_42


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6295 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 179      |
|    ep_rew_mean     | -30.9    |
| time/              |          |
|    fps             | 182      |
|    iterations      | 1        |
|    time_elapsed    | 34       |
|    total_timesteps | 6295     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 211         |
|    ep_rew_mean          | 16.7        |
| time/                   |             |
|    fps                  | 104         |
|    iterations           | 2           |
|    time_elapsed         | 120         |
|    total_timesteps      | 12590       |
| train/                  |             |
|    approx_kl            | 0.092251815 |
|    clip_fraction        | 0.251       |
|    clip_range           | 0.377       |
|    entropy_loss         | -1.74       |
|    explained_variance   | 0.00213     |
|    learning_rate        | 6.

[32m[I 2023-01-29 16:49:30,711][0m Trial 41 finished with value: 209.2 and parameters: {'n_steps': 6295, 'gamma': 0.8094109832579164, 'learning_rate': 6.839493240054243e-05, 'clip_range': 0.37693620720251714, 'gae_lambda': 0.8098742076174585}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_43


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6235 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 193      |
|    ep_rew_mean     | -26.8    |
| time/              |          |
|    fps             | 182      |
|    iterations      | 1        |
|    time_elapsed    | 34       |
|    total_timesteps | 6235     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 208        |
|    ep_rew_mean          | 11.6       |
| time/                   |            |
|    fps                  | 104        |
|    iterations           | 2          |
|    time_elapsed         | 119        |
|    total_timesteps      | 12470      |
| train/                  |            |
|    approx_kl            | 0.09398948 |
|    clip_fraction        | 0.309      |
|    clip_range           | 0.343      |
|    entropy_loss         | -1.73      |
|    explained_variance   | -0.000402  |
|    learning_rate        | 6.75e-05   |
|   

[32m[I 2023-01-29 17:13:40,919][0m Trial 42 finished with value: -76.9 and parameters: {'n_steps': 6235, 'gamma': 0.8111510068018295, 'learning_rate': 6.747233232738812e-05, 'clip_range': 0.34337540697596747, 'gae_lambda': 0.8175671740209028}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_44


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6404 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 181      |
|    ep_rew_mean     | -28.7    |
| time/              |          |
|    fps             | 182      |
|    iterations      | 1        |
|    time_elapsed    | 35       |
|    total_timesteps | 6404     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 203         |
|    ep_rew_mean          | 10.4        |
| time/                   |             |
|    fps                  | 104         |
|    iterations           | 2           |
|    time_elapsed         | 122         |
|    total_timesteps      | 12808       |
| train/                  |             |
|    approx_kl            | 0.098161586 |
|    clip_fraction        | 0.275       |
|    clip_range           | 0.381       |
|    entropy_loss         | -1.72       |
|    explained_variance   | 0.00117     |
|    learning_rate        | 7.

[32m[I 2023-01-29 17:37:19,839][0m Trial 43 finished with value: -85.3 and parameters: {'n_steps': 6404, 'gamma': 0.8283263256070834, 'learning_rate': 7.795782036525545e-05, 'clip_range': 0.38124588447595126, 'gae_lambda': 0.8073879642193357}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_45


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=5657 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 176      |
|    ep_rew_mean     | -27.9    |
| time/              |          |
|    fps             | 182      |
|    iterations      | 1        |
|    time_elapsed    | 30       |
|    total_timesteps | 5657     |
---------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 213       |
|    ep_rew_mean          | 20.5      |
| time/                   |           |
|    fps                  | 102       |
|    iterations           | 2         |
|    time_elapsed         | 109       |
|    total_timesteps      | 11314     |
| train/                  |           |
|    approx_kl            | 0.088996  |
|    clip_fraction        | 0.259     |
|    clip_range           | 0.398     |
|    entropy_loss         | -1.73     |
|    explained_variance   | -0.000952 |
|    learning_rate        | 9.37e-05  |
|    loss           

[32m[I 2023-01-29 18:00:32,511][0m Trial 44 finished with value: -85.3 and parameters: {'n_steps': 5657, 'gamma': 0.8508543154151534, 'learning_rate': 9.373223181509847e-05, 'clip_range': 0.39838511369282387, 'gae_lambda': 0.8346640081186562}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_46


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=4591 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 154      |
|    ep_rew_mean     | -41.4    |
| time/              |          |
|    fps             | 187      |
|    iterations      | 1        |
|    time_elapsed    | 24       |
|    total_timesteps | 4591     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 191         |
|    ep_rew_mean          | -5.18       |
| time/                   |             |
|    fps                  | 105         |
|    iterations           | 2           |
|    time_elapsed         | 86          |
|    total_timesteps      | 9182        |
| train/                  |             |
|    approx_kl            | 0.045423795 |
|    clip_fraction        | 0.155       |
|    clip_range           | 0.367       |
|    entropy_loss         | -1.76       |
|    explained_variance   | -0.000315   |
|    learning_rate        | 3.

[32m[I 2023-01-29 18:24:04,373][0m Trial 45 finished with value: -85.3 and parameters: {'n_steps': 4591, 'gamma': 0.9384225611156733, 'learning_rate': 3.6932159300759e-05, 'clip_range': 0.3673373434630644, 'gae_lambda': 0.8254345412993517}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_47


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7775 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 189      |
|    ep_rew_mean     | -21.3    |
| time/              |          |
|    fps             | 163      |
|    iterations      | 1        |
|    time_elapsed    | 47       |
|    total_timesteps | 7775     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 230         |
|    ep_rew_mean          | 27.3        |
| time/                   |             |
|    fps                  | 97          |
|    iterations           | 2           |
|    time_elapsed         | 159         |
|    total_timesteps      | 15550       |
| train/                  |             |
|    approx_kl            | 0.082514495 |
|    clip_fraction        | 0.271       |
|    clip_range           | 0.334       |
|    entropy_loss         | -1.75       |
|    explained_variance   | -0.00137    |
|    learning_rate        | 6.

[32m[I 2023-01-29 18:48:47,107][0m Trial 46 finished with value: -85.3 and parameters: {'n_steps': 7775, 'gamma': 0.8071881491040035, 'learning_rate': 6.19366123143509e-05, 'clip_range': 0.3343701372212414, 'gae_lambda': 0.9444408906062728}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_48


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6663 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 161      |
|    ep_rew_mean     | -34.2    |
| time/              |          |
|    fps             | 182      |
|    iterations      | 1        |
|    time_elapsed    | 36       |
|    total_timesteps | 6663     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 194        |
|    ep_rew_mean          | 7.31       |
| time/                   |            |
|    fps                  | 100        |
|    iterations           | 2          |
|    time_elapsed         | 132        |
|    total_timesteps      | 13326      |
| train/                  |            |
|    approx_kl            | 0.07230544 |
|    clip_fraction        | 0.228      |
|    clip_range           | 0.35       |
|    entropy_loss         | -1.74      |
|    explained_variance   | -0.000798  |
|    learning_rate        | 8.34e-05   |
|   

[32m[I 2023-01-29 19:14:37,040][0m Trial 47 finished with value: -73.3 and parameters: {'n_steps': 6663, 'gamma': 0.835575008896649, 'learning_rate': 8.34261499975297e-05, 'clip_range': 0.3504337283037637, 'gae_lambda': 0.806102331660606}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_49


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6392 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 180      |
|    ep_rew_mean     | -28.3    |
| time/              |          |
|    fps             | 182      |
|    iterations      | 1        |
|    time_elapsed    | 35       |
|    total_timesteps | 6392     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 220        |
|    ep_rew_mean          | 23.3       |
| time/                   |            |
|    fps                  | 101        |
|    iterations           | 2          |
|    time_elapsed         | 125        |
|    total_timesteps      | 12784      |
| train/                  |            |
|    approx_kl            | 0.09259704 |
|    clip_fraction        | 0.284      |
|    clip_range           | 0.366      |
|    entropy_loss         | -1.73      |
|    explained_variance   | -0.00222   |
|    learning_rate        | 7.3e-05    |
|   

[32m[I 2023-01-29 19:39:23,521][0m Trial 48 finished with value: -91.1 and parameters: {'n_steps': 6392, 'gamma': 0.8235268811518195, 'learning_rate': 7.301728523039064e-05, 'clip_range': 0.36560008853408965, 'gae_lambda': 0.8375619619149548}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_50


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=710 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 115      |
|    ep_rew_mean     | -56.3    |
| time/              |          |
|    fps             | 171      |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 710      |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 168          |
|    ep_rew_mean          | -30.2        |
| time/                   |              |
|    fps                  | 98           |
|    iterations           | 2            |
|    time_elapsed         | 14           |
|    total_timesteps      | 1420         |
| train/                  |              |
|    approx_kl            | 0.0027076134 |
|    clip_fraction        | 0            |
|    clip_range           | 0.315        |
|    entropy_loss         | -1.79        |
|    explained_variance   | 0.000332     |
|    learning_r

[32m[I 2023-01-29 20:04:39,368][0m Trial 49 finished with value: -73.3 and parameters: {'n_steps': 710, 'gamma': 0.868923273331607, 'learning_rate': 2.479512881057543e-05, 'clip_range': 0.3145282173056798, 'gae_lambda': 0.8157324000718498}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_51


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7048 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 200      |
|    ep_rew_mean     | -19.6    |
| time/              |          |
|    fps             | 174      |
|    iterations      | 1        |
|    time_elapsed    | 40       |
|    total_timesteps | 7048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 214         |
|    ep_rew_mean          | 4.82        |
| time/                   |             |
|    fps                  | 95          |
|    iterations           | 2           |
|    time_elapsed         | 147         |
|    total_timesteps      | 14096       |
| train/                  |             |
|    approx_kl            | 0.030132605 |
|    clip_fraction        | 0.329       |
|    clip_range           | 0.168       |
|    entropy_loss         | -1.77       |
|    explained_variance   | 0.000546    |
|    learning_rate        | 9.

[32m[I 2023-01-29 20:31:33,409][0m Trial 50 finished with value: -7.3 and parameters: {'n_steps': 7048, 'gamma': 0.8172056289416872, 'learning_rate': 9.289285378380245e-05, 'clip_range': 0.16773985143399914, 'gae_lambda': 0.8583429628816329}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_52


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7061 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 163      |
|    ep_rew_mean     | -36.1    |
| time/              |          |
|    fps             | 165      |
|    iterations      | 1        |
|    time_elapsed    | 42       |
|    total_timesteps | 7061     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 185         |
|    ep_rew_mean          | -7.47       |
| time/                   |             |
|    fps                  | 94          |
|    iterations           | 2           |
|    time_elapsed         | 149         |
|    total_timesteps      | 14122       |
| train/                  |             |
|    approx_kl            | 0.025522951 |
|    clip_fraction        | 0.304       |
|    clip_range           | 0.147       |
|    entropy_loss         | -1.78       |
|    explained_variance   | 3.84e-05    |
|    learning_rate        | 9.

[32m[I 2023-01-29 20:57:49,733][0m Trial 51 finished with value: -73.3 and parameters: {'n_steps': 7061, 'gamma': 0.8169551322174375, 'learning_rate': 9.928701352628175e-05, 'clip_range': 0.14700579567163463, 'gae_lambda': 0.8537933924698369}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_53


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7142 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 144      |
|    ep_rew_mean     | -47.9    |
| time/              |          |
|    fps             | 165      |
|    iterations      | 1        |
|    time_elapsed    | 43       |
|    total_timesteps | 7142     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 165         |
|    ep_rew_mean          | -21.4       |
| time/                   |             |
|    fps                  | 97          |
|    iterations           | 2           |
|    time_elapsed         | 146         |
|    total_timesteps      | 14284       |
| train/                  |             |
|    approx_kl            | 0.023299119 |
|    clip_fraction        | 0.315       |
|    clip_range           | 0.167       |
|    entropy_loss         | -1.78       |
|    explained_variance   | 0.000341    |
|    learning_rate        | 9.

[32m[I 2023-01-29 21:23:00,697][0m Trial 52 finished with value: -77.1 and parameters: {'n_steps': 7142, 'gamma': 0.829747463504383, 'learning_rate': 9.336429731679564e-05, 'clip_range': 0.1667825990302309, 'gae_lambda': 0.8676131578726112}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_54


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6035 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 195      |
|    ep_rew_mean     | -18.6    |
| time/              |          |
|    fps             | 163      |
|    iterations      | 1        |
|    time_elapsed    | 37       |
|    total_timesteps | 6035     |
---------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 205       |
|    ep_rew_mean          | 2.14      |
| time/                   |           |
|    fps                  | 96        |
|    iterations           | 2         |
|    time_elapsed         | 125       |
|    total_timesteps      | 12070     |
| train/                  |           |
|    approx_kl            | 0.029125  |
|    clip_fraction        | 0.361     |
|    clip_range           | 0.125     |
|    entropy_loss         | -1.78     |
|    explained_variance   | -0.000554 |
|    learning_rate        | 8.26e-05  |
|    loss           

[32m[I 2023-01-29 21:48:58,865][0m Trial 53 finished with value: -85.3 and parameters: {'n_steps': 6035, 'gamma': 0.8105698358882206, 'learning_rate': 8.261908465533084e-05, 'clip_range': 0.12546031255584913, 'gae_lambda': 0.826646338895175}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_55


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7818 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 185      |
|    ep_rew_mean     | -23.5    |
| time/              |          |
|    fps             | 155      |
|    iterations      | 1        |
|    time_elapsed    | 50       |
|    total_timesteps | 7818     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 233         |
|    ep_rew_mean          | 19.8        |
| time/                   |             |
|    fps                  | 90          |
|    iterations           | 2           |
|    time_elapsed         | 173         |
|    total_timesteps      | 15636       |
| train/                  |             |
|    approx_kl            | 0.030079816 |
|    clip_fraction        | 0.319       |
|    clip_range           | 0.162       |
|    entropy_loss         | -1.77       |
|    explained_variance   | -0.000327   |
|    learning_rate        | 7.

[32m[I 2023-01-29 22:15:22,808][0m Trial 54 finished with value: -85.3 and parameters: {'n_steps': 7818, 'gamma': 0.802562267355975, 'learning_rate': 7.329245284484569e-05, 'clip_range': 0.16158079335109682, 'gae_lambda': 0.8451493319399731}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_56


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=5457 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 150      |
|    ep_rew_mean     | -43.9    |
| time/              |          |
|    fps             | 179      |
|    iterations      | 1        |
|    time_elapsed    | 30       |
|    total_timesteps | 5457     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 183         |
|    ep_rew_mean          | -11.3       |
| time/                   |             |
|    fps                  | 101         |
|    iterations           | 2           |
|    time_elapsed         | 107         |
|    total_timesteps      | 10914       |
| train/                  |             |
|    approx_kl            | 0.031788558 |
|    clip_fraction        | 0.221       |
|    clip_range           | 0.208       |
|    entropy_loss         | -1.77       |
|    explained_variance   | 0.000984    |
|    learning_rate        | 6.

[32m[I 2023-01-29 22:39:44,965][0m Trial 55 finished with value: -85.3 and parameters: {'n_steps': 5457, 'gamma': 0.851466404829576, 'learning_rate': 6.553883856142601e-05, 'clip_range': 0.2077201566208677, 'gae_lambda': 0.813200682191472}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_57
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 222      |
|    ep_rew_mean     | -5.53    |
| time/              |          |
|    fps             | 179      |
|    iterations      | 1        |
|    time_elapsed    | 36       |
|    total_timesteps | 6528     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 251        |
|    ep_rew_mean          | 48.2       |
| time/                   |            |
|    fps                  | 102        |
|    iterations           | 2          |
|    time_elapsed         | 127        |
|    total_timesteps      | 13056      |
| train/                  |            |
|    approx_kl            | 0.18070537 |
|    clip_fraction        | 0.302      |
|    clip_range           | 0.392      |
|    entropy_loss         | -1.7 

[32m[I 2023-01-29 23:04:07,355][0m Trial 56 finished with value: -57.3 and parameters: {'n_steps': 6528, 'gamma': 0.8228402827932344, 'learning_rate': 5.8118484464928394e-05, 'clip_range': 0.39153899342026044, 'gae_lambda': 0.8061402680947445}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_58


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7245 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 202      |
|    ep_rew_mean     | -13.8    |
| time/              |          |
|    fps             | 181      |
|    iterations      | 1        |
|    time_elapsed    | 39       |
|    total_timesteps | 7245     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 202        |
|    ep_rew_mean          | 5.19       |
| time/                   |            |
|    fps                  | 102        |
|    iterations           | 2          |
|    time_elapsed         | 140        |
|    total_timesteps      | 14490      |
| train/                  |            |
|    approx_kl            | 0.05754457 |
|    clip_fraction        | 0.31       |
|    clip_range           | 0.26       |
|    entropy_loss         | -1.75      |
|    explained_variance   | 0.000183   |
|    learning_rate        | 9.21e-05   |
|   

[32m[I 2023-01-29 23:27:49,851][0m Trial 57 finished with value: -85.3 and parameters: {'n_steps': 7245, 'gamma': 0.8384307811754216, 'learning_rate': 9.210171983351996e-05, 'clip_range': 0.25960062744285234, 'gae_lambda': 0.8596029714682275}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_59


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=5749 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 216      |
|    ep_rew_mean     | -14.7    |
| time/              |          |
|    fps             | 179      |
|    iterations      | 1        |
|    time_elapsed    | 32       |
|    total_timesteps | 5749     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 239         |
|    ep_rew_mean          | 17.8        |
| time/                   |             |
|    fps                  | 102         |
|    iterations           | 2           |
|    time_elapsed         | 112         |
|    total_timesteps      | 11498       |
| train/                  |             |
|    approx_kl            | 0.023396177 |
|    clip_fraction        | 0.354       |
|    clip_range           | 0.117       |
|    entropy_loss         | -1.78       |
|    explained_variance   | 0.000666    |
|    learning_rate        | 7.

[32m[I 2023-01-29 23:51:57,247][0m Trial 58 finished with value: -85.3 and parameters: {'n_steps': 5749, 'gamma': 0.8176480292424615, 'learning_rate': 7.520691643271574e-05, 'clip_range': 0.11665039439676794, 'gae_lambda': 0.821857136600663}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_60


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6160 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 192      |
|    ep_rew_mean     | -19.4    |
| time/              |          |
|    fps             | 183      |
|    iterations      | 1        |
|    time_elapsed    | 33       |
|    total_timesteps | 6160     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 179        |
|    ep_rew_mean          | -8.51      |
| time/                   |            |
|    fps                  | 102        |
|    iterations           | 2          |
|    time_elapsed         | 120        |
|    total_timesteps      | 12320      |
| train/                  |            |
|    approx_kl            | 0.06482519 |
|    clip_fraction        | 0.125      |
|    clip_range           | 0.385      |
|    entropy_loss         | -1.76      |
|    explained_variance   | 0.00164    |
|    learning_rate        | 1.66e-05   |
|   

[32m[I 2023-01-30 00:17:02,972][0m Trial 59 finished with value: -85.3 and parameters: {'n_steps': 6160, 'gamma': 0.8573958631019631, 'learning_rate': 1.6642075438065667e-05, 'clip_range': 0.385134851450953, 'gae_lambda': 0.9063912946067367}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_61


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=2233 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 179      |
|    ep_rew_mean     | -21.6    |
| time/              |          |
|    fps             | 160      |
|    iterations      | 1        |
|    time_elapsed    | 13       |
|    total_timesteps | 2233     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 210         |
|    ep_rew_mean          | 13.4        |
| time/                   |             |
|    fps                  | 96          |
|    iterations           | 2           |
|    time_elapsed         | 46          |
|    total_timesteps      | 4466        |
| train/                  |             |
|    approx_kl            | 0.038755905 |
|    clip_fraction        | 0.276       |
|    clip_range           | 0.181       |
|    entropy_loss         | -1.77       |
|    explained_variance   | 0.00129     |
|    learning_rate        | 8.

[32m[I 2023-01-30 00:42:48,148][0m Trial 60 finished with value: -84.3 and parameters: {'n_steps': 2233, 'gamma': 0.8303199523324565, 'learning_rate': 8.613694518586345e-05, 'clip_range': 0.1813400121647884, 'gae_lambda': 0.8348063603077792}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_62


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=8113 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 167      |
|    ep_rew_mean     | -35.4    |
| time/              |          |
|    fps             | 156      |
|    iterations      | 1        |
|    time_elapsed    | 51       |
|    total_timesteps | 8113     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 202        |
|    ep_rew_mean          | 10.4       |
| time/                   |            |
|    fps                  | 87         |
|    iterations           | 2          |
|    time_elapsed         | 185        |
|    total_timesteps      | 16226      |
| train/                  |            |
|    approx_kl            | 0.12804602 |
|    clip_fraction        | 0.295      |
|    clip_range           | 0.37       |
|    entropy_loss         | -1.72      |
|    explained_variance   | -0.000635  |
|    learning_rate        | 8.13e-05   |
|   

[32m[I 2023-01-30 01:10:04,878][0m Trial 61 finished with value: -89.7 and parameters: {'n_steps': 8113, 'gamma': 0.8004410879937364, 'learning_rate': 8.134880587652603e-05, 'clip_range': 0.36986975443000575, 'gae_lambda': 0.8014550289512588}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_63


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6844 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 147      |
|    ep_rew_mean     | -46.8    |
| time/              |          |
|    fps             | 170      |
|    iterations      | 1        |
|    time_elapsed    | 40       |
|    total_timesteps | 6844     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 187        |
|    ep_rew_mean          | -4.93      |
| time/                   |            |
|    fps                  | 93         |
|    iterations           | 2          |
|    time_elapsed         | 146        |
|    total_timesteps      | 13688      |
| train/                  |            |
|    approx_kl            | 0.07902022 |
|    clip_fraction        | 0.28       |
|    clip_range           | 0.378      |
|    entropy_loss         | -1.73      |
|    explained_variance   | 0.000437   |
|    learning_rate        | 9.99e-05   |
|   

[32m[I 2023-01-30 01:36:18,012][0m Trial 62 finished with value: -73.3 and parameters: {'n_steps': 6844, 'gamma': 0.8100314214213902, 'learning_rate': 9.994564053740941e-05, 'clip_range': 0.3784364578407704, 'gae_lambda': 0.8153354413767884}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_64


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7513 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 189      |
|    ep_rew_mean     | -19.8    |
| time/              |          |
|    fps             | 179      |
|    iterations      | 1        |
|    time_elapsed    | 41       |
|    total_timesteps | 7513     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 198         |
|    ep_rew_mean          | -10.1       |
| time/                   |             |
|    fps                  | 101         |
|    iterations           | 2           |
|    time_elapsed         | 148         |
|    total_timesteps      | 15026       |
| train/                  |             |
|    approx_kl            | 0.015430588 |
|    clip_fraction        | 0.282       |
|    clip_range           | 0.146       |
|    entropy_loss         | -1.78       |
|    explained_variance   | -0.000141   |
|    learning_rate        | 8.

[32m[I 2023-01-30 02:01:08,230][0m Trial 63 finished with value: -85.3 and parameters: {'n_steps': 7513, 'gamma': 0.9977324968825569, 'learning_rate': 8.83599795007847e-05, 'clip_range': 0.1457328603732705, 'gae_lambda': 0.8301073648336426}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_65


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6963 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 191      |
|    ep_rew_mean     | -24.3    |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 38       |
|    total_timesteps | 6963     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 208         |
|    ep_rew_mean          | 14          |
| time/                   |             |
|    fps                  | 100         |
|    iterations           | 2           |
|    time_elapsed         | 138         |
|    total_timesteps      | 13926       |
| train/                  |             |
|    approx_kl            | 0.109398946 |
|    clip_fraction        | 0.316       |
|    clip_range           | 0.353       |
|    entropy_loss         | -1.73       |
|    explained_variance   | -0.00205    |
|    learning_rate        | 9.

[32m[I 2023-01-30 02:25:51,023][0m Trial 64 finished with value: -66.2 and parameters: {'n_steps': 6963, 'gamma': 0.8176031301675337, 'learning_rate': 9.328803104968716e-05, 'clip_range': 0.35266192964864485, 'gae_lambda': 0.8079646278935161}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_66


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=3583 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 156      |
|    ep_rew_mean     | -39.2    |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 19       |
|    total_timesteps | 3583     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 171        |
|    ep_rew_mean          | -11.6      |
| time/                   |            |
|    fps                  | 101        |
|    iterations           | 2          |
|    time_elapsed         | 70         |
|    total_timesteps      | 7166       |
| train/                  |            |
|    approx_kl            | 0.06695038 |
|    clip_fraction        | 0.2        |
|    clip_range           | 0.34       |
|    entropy_loss         | -1.75      |
|    explained_variance   | 0.0015     |
|    learning_rate        | 6.81e-05   |
|   

[32m[I 2023-01-30 02:49:25,823][0m Trial 65 finished with value: -77.3 and parameters: {'n_steps': 3583, 'gamma': 0.8058816042412009, 'learning_rate': 6.813133896101269e-05, 'clip_range': 0.34026875558708203, 'gae_lambda': 0.8230967197874193}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_67


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7353 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 154      |
|    ep_rew_mean     | -41.4    |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 40       |
|    total_timesteps | 7353     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 191        |
|    ep_rew_mean          | -0.531     |
| time/                   |            |
|    fps                  | 101        |
|    iterations           | 2          |
|    time_elapsed         | 145        |
|    total_timesteps      | 14706      |
| train/                  |            |
|    approx_kl            | 0.06592494 |
|    clip_fraction        | 0.23       |
|    clip_range           | 0.399      |
|    entropy_loss         | -1.74      |
|    explained_variance   | -0.00056   |
|    learning_rate        | 8.05e-05   |
|   

[32m[I 2023-01-30 03:13:46,581][0m Trial 66 finished with value: -73.3 and parameters: {'n_steps': 7353, 'gamma': 0.8256485380246327, 'learning_rate': 8.05105157860256e-05, 'clip_range': 0.3992242095386455, 'gae_lambda': 0.8821705262547926}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_68


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6561 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 150      |
|    ep_rew_mean     | -42.4    |
| time/              |          |
|    fps             | 177      |
|    iterations      | 1        |
|    time_elapsed    | 36       |
|    total_timesteps | 6561     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 182         |
|    ep_rew_mean          | -2.39       |
| time/                   |             |
|    fps                  | 100         |
|    iterations           | 2           |
|    time_elapsed         | 130         |
|    total_timesteps      | 13122       |
| train/                  |             |
|    approx_kl            | 0.050398648 |
|    clip_fraction        | 0.239       |
|    clip_range           | 0.314       |
|    entropy_loss         | -1.75       |
|    explained_variance   | -0.00186    |
|    learning_rate        | 7.

[32m[I 2023-01-30 03:38:45,765][0m Trial 67 finished with value: -85.3 and parameters: {'n_steps': 6561, 'gamma': 0.8472169512844208, 'learning_rate': 7.466386491931167e-05, 'clip_range': 0.3140195717750689, 'gae_lambda': 0.801232142135762}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_69


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6364 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 160      |
|    ep_rew_mean     | -36.2    |
| time/              |          |
|    fps             | 182      |
|    iterations      | 1        |
|    time_elapsed    | 34       |
|    total_timesteps | 6364     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 201        |
|    ep_rew_mean          | 5.9        |
| time/                   |            |
|    fps                  | 101        |
|    iterations           | 2          |
|    time_elapsed         | 125        |
|    total_timesteps      | 12728      |
| train/                  |            |
|    approx_kl            | 0.05259795 |
|    clip_fraction        | 0.242      |
|    clip_range           | 0.29       |
|    entropy_loss         | -1.76      |
|    explained_variance   | -0.000574  |
|    learning_rate        | 9e-05      |
|   

[32m[I 2023-01-30 04:03:20,966][0m Trial 68 finished with value: -82.2 and parameters: {'n_steps': 6364, 'gamma': 0.8382588511841434, 'learning_rate': 9.003169704899886e-05, 'clip_range': 0.28957801175065495, 'gae_lambda': 0.8148541948725099}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_70


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7915 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 164      |
|    ep_rew_mean     | -37.9    |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 43       |
|    total_timesteps | 7915     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 166        |
|    ep_rew_mean          | -19.5      |
| time/                   |            |
|    fps                  | 100        |
|    iterations           | 2          |
|    time_elapsed         | 158        |
|    total_timesteps      | 15830      |
| train/                  |            |
|    approx_kl            | 0.03200603 |
|    clip_fraction        | 0.23       |
|    clip_range           | 0.229      |
|    entropy_loss         | -1.77      |
|    explained_variance   | -0.00105   |
|    learning_rate        | 2.77e-05   |
|   

[32m[I 2023-01-30 04:28:10,630][0m Trial 69 finished with value: -85.3 and parameters: {'n_steps': 7915, 'gamma': 0.8144130045240991, 'learning_rate': 2.767002757668557e-05, 'clip_range': 0.22942671928300984, 'gae_lambda': 0.840738218824411}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_71


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=5289 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 137      |
|    ep_rew_mean     | -51.8    |
| time/              |          |
|    fps             | 179      |
|    iterations      | 1        |
|    time_elapsed    | 29       |
|    total_timesteps | 5289     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 176         |
|    ep_rew_mean          | -11.6       |
| time/                   |             |
|    fps                  | 100         |
|    iterations           | 2           |
|    time_elapsed         | 105         |
|    total_timesteps      | 10578       |
| train/                  |             |
|    approx_kl            | 0.057749346 |
|    clip_fraction        | 0.219       |
|    clip_range           | 0.377       |
|    entropy_loss         | -1.75       |
|    explained_variance   | 0.000632    |
|    learning_rate        | 8.

[32m[I 2023-01-30 04:52:22,165][0m Trial 70 finished with value: -83.3 and parameters: {'n_steps': 5289, 'gamma': 0.8327309637825169, 'learning_rate': 8.316834463001707e-05, 'clip_range': 0.37740163517768927, 'gae_lambda': 0.8196502832256473}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_72


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=4891 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 163      |
|    ep_rew_mean     | -38      |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 4891     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 176        |
|    ep_rew_mean          | -7.83      |
| time/                   |            |
|    fps                  | 100        |
|    iterations           | 2          |
|    time_elapsed         | 97         |
|    total_timesteps      | 9782       |
| train/                  |            |
|    approx_kl            | 0.08096382 |
|    clip_fraction        | 0.236      |
|    clip_range           | 0.359      |
|    entropy_loss         | -1.75      |
|    explained_variance   | -0.00135   |
|    learning_rate        | 5.68e-05   |
|   

[32m[I 2023-01-30 05:16:54,942][0m Trial 71 finished with value: -73.3 and parameters: {'n_steps': 4891, 'gamma': 0.844981539978275, 'learning_rate': 5.681409998005468e-05, 'clip_range': 0.3594435972424084, 'gae_lambda': 0.8091867934473108}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_73


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=5809 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 143      |
|    ep_rew_mean     | -47.4    |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 32       |
|    total_timesteps | 5809     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 188        |
|    ep_rew_mean          | 1.18       |
| time/                   |            |
|    fps                  | 100        |
|    iterations           | 2          |
|    time_elapsed         | 115        |
|    total_timesteps      | 11618      |
| train/                  |            |
|    approx_kl            | 0.08703538 |
|    clip_fraction        | 0.235      |
|    clip_range           | 0.372      |
|    entropy_loss         | -1.74      |
|    explained_variance   | 0.00165    |
|    learning_rate        | 9.47e-05   |
|   

[32m[I 2023-01-30 05:42:06,910][0m Trial 72 finished with value: -57.3 and parameters: {'n_steps': 5809, 'gamma': 0.8432568242613906, 'learning_rate': 9.467347822641887e-05, 'clip_range': 0.37210594230378713, 'gae_lambda': 0.8101420532083325}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_74


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=5060 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 139      |
|    ep_rew_mean     | -47.6    |
| time/              |          |
|    fps             | 181      |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 5060     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 185         |
|    ep_rew_mean          | -1.23       |
| time/                   |             |
|    fps                  | 100         |
|    iterations           | 2           |
|    time_elapsed         | 100         |
|    total_timesteps      | 10120       |
| train/                  |             |
|    approx_kl            | 0.067614034 |
|    clip_fraction        | 0.193       |
|    clip_range           | 0.39        |
|    entropy_loss         | -1.75       |
|    explained_variance   | -0.00103    |
|    learning_rate        | 5.

[32m[I 2023-01-30 06:06:32,561][0m Trial 73 finished with value: -85.3 and parameters: {'n_steps': 5060, 'gamma': 0.8614989898197061, 'learning_rate': 5.058489133849229e-05, 'clip_range': 0.3897771565843326, 'gae_lambda': 0.8321681876086139}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_75


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=3811 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 137      |
|    ep_rew_mean     | -48.8    |
| time/              |          |
|    fps             | 179      |
|    iterations      | 1        |
|    time_elapsed    | 21       |
|    total_timesteps | 3811     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 166        |
|    ep_rew_mean          | -22.4      |
| time/                   |            |
|    fps                  | 100        |
|    iterations           | 2          |
|    time_elapsed         | 75         |
|    total_timesteps      | 7622       |
| train/                  |            |
|    approx_kl            | 0.05073489 |
|    clip_fraction        | 0.173      |
|    clip_range           | 0.36       |
|    entropy_loss         | -1.76      |
|    explained_variance   | -0.00073   |
|    learning_rate        | 6.53e-05   |
|   

[32m[I 2023-01-30 06:31:10,966][0m Trial 74 finished with value: -73.3 and parameters: {'n_steps': 3811, 'gamma': 0.8210756708538313, 'learning_rate': 6.529144124516471e-05, 'clip_range': 0.36044409531549965, 'gae_lambda': 0.8252894879108952}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_76


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6727 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 155      |
|    ep_rew_mean     | -40.2    |
| time/              |          |
|    fps             | 179      |
|    iterations      | 1        |
|    time_elapsed    | 37       |
|    total_timesteps | 6727     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 196        |
|    ep_rew_mean          | 6.8        |
| time/                   |            |
|    fps                  | 99         |
|    iterations           | 2          |
|    time_elapsed         | 135        |
|    total_timesteps      | 13454      |
| train/                  |            |
|    approx_kl            | 0.08233448 |
|    clip_fraction        | 0.259      |
|    clip_range           | 0.381      |
|    entropy_loss         | -1.74      |
|    explained_variance   | 0.000376   |
|    learning_rate        | 7.04e-05   |
|   

[32m[I 2023-01-30 06:55:36,484][0m Trial 75 finished with value: 30.0 and parameters: {'n_steps': 6727, 'gamma': 0.8904193648758463, 'learning_rate': 7.043873019329409e-05, 'clip_range': 0.38132022318957454, 'gae_lambda': 0.8029420045963058}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_77


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6762 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 203      |
|    ep_rew_mean     | -14      |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 37       |
|    total_timesteps | 6762     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 221        |
|    ep_rew_mean          | 18.7       |
| time/                   |            |
|    fps                  | 100        |
|    iterations           | 2          |
|    time_elapsed         | 134        |
|    total_timesteps      | 13524      |
| train/                  |            |
|    approx_kl            | 0.08603937 |
|    clip_fraction        | 0.323      |
|    clip_range           | 0.332      |
|    entropy_loss         | -1.73      |
|    explained_variance   | 0.00155    |
|    learning_rate        | 7.79e-05   |
|   

[32m[I 2023-01-30 07:20:02,872][0m Trial 76 finished with value: -86.5 and parameters: {'n_steps': 6762, 'gamma': 0.9198864699999916, 'learning_rate': 7.794844780907176e-05, 'clip_range': 0.3321928316569144, 'gae_lambda': 0.8021669582445012}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_78


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7595 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 199      |
|    ep_rew_mean     | -16.4    |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 42       |
|    total_timesteps | 7595     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 258        |
|    ep_rew_mean          | 46.2       |
| time/                   |            |
|    fps                  | 99         |
|    iterations           | 2          |
|    time_elapsed         | 152        |
|    total_timesteps      | 15190      |
| train/                  |            |
|    approx_kl            | 0.09877031 |
|    clip_fraction        | 0.296      |
|    clip_range           | 0.349      |
|    entropy_loss         | -1.72      |
|    explained_variance   | -0.00211   |
|    learning_rate        | 7.03e-05   |
|   

[32m[I 2023-01-30 07:45:50,696][0m Trial 77 finished with value: -7.3 and parameters: {'n_steps': 7595, 'gamma': 0.8056296278517728, 'learning_rate': 7.029062212853478e-05, 'clip_range': 0.3486249611406103, 'gae_lambda': 0.8182553902147298}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_79


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7624 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 152      |
|    ep_rew_mean     | -40.4    |
| time/              |          |
|    fps             | 177      |
|    iterations      | 1        |
|    time_elapsed    | 42       |
|    total_timesteps | 7624     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 9.94        |
| time/                   |             |
|    fps                  | 99          |
|    iterations           | 2           |
|    time_elapsed         | 152         |
|    total_timesteps      | 15248       |
| train/                  |             |
|    approx_kl            | 0.057888627 |
|    clip_fraction        | 0.218       |
|    clip_range           | 0.347       |
|    entropy_loss         | -1.75       |
|    explained_variance   | 0.000727    |
|    learning_rate        | 7.

[32m[I 2023-01-30 08:11:31,090][0m Trial 78 finished with value: -85.3 and parameters: {'n_steps': 7624, 'gamma': 0.9046585784991935, 'learning_rate': 7.080033635615093e-05, 'clip_range': 0.3473422624263571, 'gae_lambda': 0.8185196586938961}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_80


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7256 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 155      |
|    ep_rew_mean     | -42      |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 40       |
|    total_timesteps | 7256     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 211        |
|    ep_rew_mean          | 12.5       |
| time/                   |            |
|    fps                  | 100        |
|    iterations           | 2          |
|    time_elapsed         | 143        |
|    total_timesteps      | 14512      |
| train/                  |            |
|    approx_kl            | 0.06501402 |
|    clip_fraction        | 0.201      |
|    clip_range           | 0.384      |
|    entropy_loss         | -1.75      |
|    explained_variance   | -0.000772  |
|    learning_rate        | 3.93e-05   |
|   

[32m[I 2023-01-30 08:35:48,815][0m Trial 79 finished with value: -84.3 and parameters: {'n_steps': 7256, 'gamma': 0.8844449305747781, 'learning_rate': 3.934266518474682e-05, 'clip_range': 0.38377865626861574, 'gae_lambda': 0.8502852654979408}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_81


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6138 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 169      |
|    ep_rew_mean     | -32.6    |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 33       |
|    total_timesteps | 6138     |
---------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 192       |
|    ep_rew_mean          | 1.57      |
| time/                   |           |
|    fps                  | 100       |
|    iterations           | 2         |
|    time_elapsed         | 121       |
|    total_timesteps      | 12276     |
| train/                  |           |
|    approx_kl            | 0.075429  |
|    clip_fraction        | 0.208     |
|    clip_range           | 0.362     |
|    entropy_loss         | -1.75     |
|    explained_variance   | -0.000815 |
|    learning_rate        | 6.02e-05  |
|    loss           

[32m[I 2023-01-30 09:00:46,156][0m Trial 80 finished with value: -73.3 and parameters: {'n_steps': 6138, 'gamma': 0.8913819591197352, 'learning_rate': 6.021409399739364e-05, 'clip_range': 0.36202230994027523, 'gae_lambda': 0.8283191652633555}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_82


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6908 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 198      |
|    ep_rew_mean     | -20.3    |
| time/              |          |
|    fps             | 179      |
|    iterations      | 1        |
|    time_elapsed    | 38       |
|    total_timesteps | 6908     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 223        |
|    ep_rew_mean          | 25.1       |
| time/                   |            |
|    fps                  | 100        |
|    iterations           | 2          |
|    time_elapsed         | 137        |
|    total_timesteps      | 13816      |
| train/                  |            |
|    approx_kl            | 0.12817276 |
|    clip_fraction        | 0.306      |
|    clip_range           | 0.373      |
|    entropy_loss         | -1.72      |
|    explained_variance   | 0.00188    |
|    learning_rate        | 8.72e-05   |
|   

[32m[I 2023-01-30 09:25:40,171][0m Trial 81 finished with value: -73.3 and parameters: {'n_steps': 6908, 'gamma': 0.805378247580374, 'learning_rate': 8.722675714946993e-05, 'clip_range': 0.37264473759062927, 'gae_lambda': 0.8056785836449609}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_83


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7392 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 170      |
|    ep_rew_mean     | -35      |
| time/              |          |
|    fps             | 178      |
|    iterations      | 1        |
|    time_elapsed    | 41       |
|    total_timesteps | 7392     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 191        |
|    ep_rew_mean          | 2.66       |
| time/                   |            |
|    fps                  | 99         |
|    iterations           | 2          |
|    time_elapsed         | 148        |
|    total_timesteps      | 14784      |
| train/                  |            |
|    approx_kl            | 0.08858721 |
|    clip_fraction        | 0.263      |
|    clip_range           | 0.352      |
|    entropy_loss         | -1.74      |
|    explained_variance   | -0.000161  |
|    learning_rate        | 7.76e-05   |
|   

[32m[I 2023-01-30 09:50:46,419][0m Trial 82 finished with value: -75.7 and parameters: {'n_steps': 7392, 'gamma': 0.8079550873206923, 'learning_rate': 7.760306651516087e-05, 'clip_range': 0.3521319844241769, 'gae_lambda': 0.8131480015992586}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_84


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6608 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 197      |
|    ep_rew_mean     | -16.9    |
| time/              |          |
|    fps             | 173      |
|    iterations      | 1        |
|    time_elapsed    | 38       |
|    total_timesteps | 6608     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 244        |
|    ep_rew_mean          | 41.2       |
| time/                   |            |
|    fps                  | 98         |
|    iterations           | 2          |
|    time_elapsed         | 134        |
|    total_timesteps      | 13216      |
| train/                  |            |
|    approx_kl            | 0.10868797 |
|    clip_fraction        | 0.231      |
|    clip_range           | 0.392      |
|    entropy_loss         | -1.73      |
|    explained_variance   | 0.000476   |
|    learning_rate        | 5.43e-05   |
|   

[32m[I 2023-01-30 10:16:26,616][0m Trial 83 finished with value: -7.3 and parameters: {'n_steps': 6608, 'gamma': 0.8767222819470974, 'learning_rate': 5.429416506476396e-05, 'clip_range': 0.3919171227988108, 'gae_lambda': 0.8049481422632199}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_85


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6658 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 165      |
|    ep_rew_mean     | -33.4    |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 36       |
|    total_timesteps | 6658     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 207         |
|    ep_rew_mean          | 11          |
| time/                   |             |
|    fps                  | 100         |
|    iterations           | 2           |
|    time_elapsed         | 132         |
|    total_timesteps      | 13316       |
| train/                  |             |
|    approx_kl            | 0.062313978 |
|    clip_fraction        | 0.198       |
|    clip_range           | 0.389       |
|    entropy_loss         | -1.75       |
|    explained_variance   | 0.000286    |
|    learning_rate        | 4.

[32m[I 2023-01-30 10:42:05,521][0m Trial 84 finished with value: -85.3 and parameters: {'n_steps': 6658, 'gamma': 0.8808917523674374, 'learning_rate': 4.530476686469402e-05, 'clip_range': 0.38887182195922393, 'gae_lambda': 0.820627736541958}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_86


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6387 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 208      |
|    ep_rew_mean     | -12.8    |
| time/              |          |
|    fps             | 178      |
|    iterations      | 1        |
|    time_elapsed    | 35       |
|    total_timesteps | 6387     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 215         |
|    ep_rew_mean          | 15.1        |
| time/                   |             |
|    fps                  | 99          |
|    iterations           | 2           |
|    time_elapsed         | 128         |
|    total_timesteps      | 12774       |
| train/                  |             |
|    approx_kl            | 0.096278064 |
|    clip_fraction        | 0.224       |
|    clip_range           | 0.378       |
|    entropy_loss         | -1.72       |
|    explained_variance   | 0.000278    |
|    learning_rate        | 5.

[32m[I 2023-01-30 11:06:49,390][0m Trial 85 finished with value: -57.3 and parameters: {'n_steps': 6387, 'gamma': 0.8870628088380633, 'learning_rate': 5.469647904467348e-05, 'clip_range': 0.37824871893969597, 'gae_lambda': 0.8048412469181632}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_87


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7118 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 196      |
|    ep_rew_mean     | -22.7    |
| time/              |          |
|    fps             | 180      |
|    iterations      | 1        |
|    time_elapsed    | 39       |
|    total_timesteps | 7118     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 245        |
|    ep_rew_mean          | 41.7       |
| time/                   |            |
|    fps                  | 99         |
|    iterations           | 2          |
|    time_elapsed         | 142        |
|    total_timesteps      | 14236      |
| train/                  |            |
|    approx_kl            | 0.09829762 |
|    clip_fraction        | 0.261      |
|    clip_range           | 0.392      |
|    entropy_loss         | -1.73      |
|    explained_variance   | 0.000816   |
|    learning_rate        | 6.27e-05   |
|   

[32m[I 2023-01-30 11:32:41,551][0m Trial 86 finished with value: -85.3 and parameters: {'n_steps': 7118, 'gamma': 0.8722122620702187, 'learning_rate': 6.271597137770311e-05, 'clip_range': 0.3920696430622878, 'gae_lambda': 0.8117902928646331}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_88


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6033 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 205      |
|    ep_rew_mean     | -16      |
| time/              |          |
|    fps             | 176      |
|    iterations      | 1        |
|    time_elapsed    | 34       |
|    total_timesteps | 6033     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 204        |
|    ep_rew_mean          | 5.26       |
| time/                   |            |
|    fps                  | 98         |
|    iterations           | 2          |
|    time_elapsed         | 122        |
|    total_timesteps      | 12066      |
| train/                  |            |
|    approx_kl            | 0.05855407 |
|    clip_fraction        | 0.212      |
|    clip_range           | 0.366      |
|    entropy_loss         | -1.75      |
|    explained_variance   | -3.47e-05  |
|    learning_rate        | 6.97e-05   |
|   

[32m[I 2023-01-30 11:58:42,438][0m Trial 87 finished with value: -73.3 and parameters: {'n_steps': 6033, 'gamma': 0.8733351899679356, 'learning_rate': 6.972134222758943e-05, 'clip_range': 0.3661205813303301, 'gae_lambda': 0.9344515502273228}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_89


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6574 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 152      |
|    ep_rew_mean     | -42.5    |
| time/              |          |
|    fps             | 160      |
|    iterations      | 1        |
|    time_elapsed    | 40       |
|    total_timesteps | 6574     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 176         |
|    ep_rew_mean          | -21.1       |
| time/                   |             |
|    fps                  | 91          |
|    iterations           | 2           |
|    time_elapsed         | 143         |
|    total_timesteps      | 13148       |
| train/                  |             |
|    approx_kl            | 0.010650043 |
|    clip_fraction        | 0.322       |
|    clip_range           | 0.113       |
|    entropy_loss         | -1.78       |
|    explained_variance   | -0.00185    |
|    learning_rate        | 9.

[32m[I 2023-01-30 12:26:19,443][0m Trial 88 finished with value: -85.3 and parameters: {'n_steps': 6574, 'gamma': 0.8997996578184063, 'learning_rate': 9.502476001232303e-05, 'clip_range': 0.11281626787938363, 'gae_lambda': 0.8000856200905084}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_90


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7025 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 166      |
|    ep_rew_mean     | -32.5    |
| time/              |          |
|    fps             | 155      |
|    iterations      | 1        |
|    time_elapsed    | 45       |
|    total_timesteps | 7025     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 198         |
|    ep_rew_mean          | -2.88       |
| time/                   |             |
|    fps                  | 90          |
|    iterations           | 2           |
|    time_elapsed         | 156         |
|    total_timesteps      | 14050       |
| train/                  |             |
|    approx_kl            | 0.019452997 |
|    clip_fraction        | 0.337       |
|    clip_range           | 0.137       |
|    entropy_loss         | -1.78       |
|    explained_variance   | -0.00048    |
|    learning_rate        | 7.

[32m[I 2023-01-30 12:53:29,454][0m Trial 89 finished with value: -74.6 and parameters: {'n_steps': 7025, 'gamma': 0.8666167367274403, 'learning_rate': 7.416880512446657e-05, 'clip_range': 0.13679961595927972, 'gae_lambda': 0.8392992489303818}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_91


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6242 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 163      |
|    ep_rew_mean     | -37      |
| time/              |          |
|    fps             | 167      |
|    iterations      | 1        |
|    time_elapsed    | 37       |
|    total_timesteps | 6242     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 207         |
|    ep_rew_mean          | 6.29        |
| time/                   |             |
|    fps                  | 94          |
|    iterations           | 2           |
|    time_elapsed         | 132         |
|    total_timesteps      | 12484       |
| train/                  |             |
|    approx_kl            | 0.061711967 |
|    clip_fraction        | 0.216       |
|    clip_range           | 0.334       |
|    entropy_loss         | -1.75       |
|    explained_variance   | -0.000736   |
|    learning_rate        | 8.

[32m[I 2023-01-30 13:21:57,734][0m Trial 90 finished with value: -85.3 and parameters: {'n_steps': 6242, 'gamma': 0.8593231233101439, 'learning_rate': 8.4468285927998e-05, 'clip_range': 0.3338930477040989, 'gae_lambda': 0.8245559880590002}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_92


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=5603 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 138      |
|    ep_rew_mean     | -49.8    |
| time/              |          |
|    fps             | 158      |
|    iterations      | 1        |
|    time_elapsed    | 35       |
|    total_timesteps | 5603     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 180         |
|    ep_rew_mean          | -8.66       |
| time/                   |             |
|    fps                  | 89          |
|    iterations           | 2           |
|    time_elapsed         | 124         |
|    total_timesteps      | 11206       |
| train/                  |             |
|    approx_kl            | 0.065131955 |
|    clip_fraction        | 0.176       |
|    clip_range           | 0.4         |
|    entropy_loss         | -1.75       |
|    explained_variance   | 0.000201    |
|    learning_rate        | 3.

[32m[I 2023-01-30 13:48:31,960][0m Trial 91 finished with value: -85.3 and parameters: {'n_steps': 5603, 'gamma': 0.8141366395689689, 'learning_rate': 3.2368220458851315e-05, 'clip_range': 0.3996280338390057, 'gae_lambda': 0.8156027662709502}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_93


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=8011 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 182      |
|    ep_rew_mean     | -26.4    |
| time/              |          |
|    fps             | 154      |
|    iterations      | 1        |
|    time_elapsed    | 51       |
|    total_timesteps | 8011     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 217        |
|    ep_rew_mean          | 23.8       |
| time/                   |            |
|    fps                  | 89         |
|    iterations           | 2          |
|    time_elapsed         | 179        |
|    total_timesteps      | 16022      |
| train/                  |            |
|    approx_kl            | 0.13447177 |
|    clip_fraction        | 0.305      |
|    clip_range           | 0.382      |
|    entropy_loss         | -1.7       |
|    explained_variance   | 0.000312   |
|    learning_rate        | 9.14e-05   |
|   

[32m[I 2023-01-30 14:16:01,033][0m Trial 92 finished with value: 40.5 and parameters: {'n_steps': 8011, 'gamma': 0.8045411822757871, 'learning_rate': 9.135500194896407e-05, 'clip_range': 0.38191731070578766, 'gae_lambda': 0.8063256205308045}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_94


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7883 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 168      |
|    ep_rew_mean     | -37.8    |
| time/              |          |
|    fps             | 153      |
|    iterations      | 1        |
|    time_elapsed    | 51       |
|    total_timesteps | 7883     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 202        |
|    ep_rew_mean          | 10.5       |
| time/                   |            |
|    fps                  | 89         |
|    iterations           | 2          |
|    time_elapsed         | 175        |
|    total_timesteps      | 15766      |
| train/                  |            |
|    approx_kl            | 0.10454702 |
|    clip_fraction        | 0.294      |
|    clip_range           | 0.383      |
|    entropy_loss         | -1.73      |
|    explained_variance   | -0.000515  |
|    learning_rate        | 7.91e-05   |
|   

[32m[I 2023-01-30 14:42:58,029][0m Trial 93 finished with value: -85.3 and parameters: {'n_steps': 7883, 'gamma': 0.8789057696262953, 'learning_rate': 7.907962743359304e-05, 'clip_range': 0.38346639467113286, 'gae_lambda': 0.808418155430451}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_95


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=8191 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 180      |
|    ep_rew_mean     | -26.7    |
| time/              |          |
|    fps             | 154      |
|    iterations      | 1        |
|    time_elapsed    | 52       |
|    total_timesteps | 8191     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 195        |
|    ep_rew_mean          | 7.39       |
| time/                   |            |
|    fps                  | 90         |
|    iterations           | 2          |
|    time_elapsed         | 181        |
|    total_timesteps      | 16382      |
| train/                  |            |
|    approx_kl            | 0.10182593 |
|    clip_fraction        | 0.269      |
|    clip_range           | 0.358      |
|    entropy_loss         | -1.73      |
|    explained_variance   | -0.00324   |
|    learning_rate        | 9.06e-05   |
|   

[32m[I 2023-01-30 15:10:44,612][0m Trial 94 finished with value: -73.3 and parameters: {'n_steps': 8191, 'gamma': 0.8770093549252178, 'learning_rate': 9.055747153413401e-05, 'clip_range': 0.35752860307716966, 'gae_lambda': 0.8052256048551403}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_96


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7966 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 155      |
|    ep_rew_mean     | -39.5    |
| time/              |          |
|    fps             | 154      |
|    iterations      | 1        |
|    time_elapsed    | 51       |
|    total_timesteps | 7966     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 198        |
|    ep_rew_mean          | 6.71       |
| time/                   |            |
|    fps                  | 90         |
|    iterations           | 2          |
|    time_elapsed         | 176        |
|    total_timesteps      | 15932      |
| train/                  |            |
|    approx_kl            | 0.08518591 |
|    clip_fraction        | 0.274      |
|    clip_range           | 0.368      |
|    entropy_loss         | -1.73      |
|    explained_variance   | 0.000749   |
|    learning_rate        | 9.71e-05   |
|   

[32m[I 2023-01-30 15:37:54,360][0m Trial 95 finished with value: -74.5 and parameters: {'n_steps': 7966, 'gamma': 0.8539268498770906, 'learning_rate': 9.714078216899125e-05, 'clip_range': 0.36841326175029226, 'gae_lambda': 0.8197242772786469}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_97


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=5898 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 154      |
|    ep_rew_mean     | -40.4    |
| time/              |          |
|    fps             | 153      |
|    iterations      | 1        |
|    time_elapsed    | 38       |
|    total_timesteps | 5898     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 198        |
|    ep_rew_mean          | 9.69       |
| time/                   |            |
|    fps                  | 89         |
|    iterations           | 2          |
|    time_elapsed         | 131        |
|    total_timesteps      | 11796      |
| train/                  |            |
|    approx_kl            | 0.07095389 |
|    clip_fraction        | 0.287      |
|    clip_range           | 0.346      |
|    entropy_loss         | -1.74      |
|    explained_variance   | -0.00146   |
|    learning_rate        | 8.34e-05   |
|   

[32m[I 2023-01-30 16:04:20,912][0m Trial 96 finished with value: -96.0 and parameters: {'n_steps': 5898, 'gamma': 0.8188821520076425, 'learning_rate': 8.34229217770821e-05, 'clip_range': 0.34605136079836657, 'gae_lambda': 0.8131996695946775}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_98


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7720 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 166      |
|    ep_rew_mean     | -32.3    |
| time/              |          |
|    fps             | 154      |
|    iterations      | 1        |
|    time_elapsed    | 50       |
|    total_timesteps | 7720     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 192         |
|    ep_rew_mean          | -0.12       |
| time/                   |             |
|    fps                  | 89          |
|    iterations           | 2           |
|    time_elapsed         | 171         |
|    total_timesteps      | 15440       |
| train/                  |             |
|    approx_kl            | 0.052449908 |
|    clip_fraction        | 0.246       |
|    clip_range           | 0.261       |
|    entropy_loss         | -1.76       |
|    explained_variance   | 0.000425    |
|    learning_rate        | 6.

[32m[I 2023-01-30 16:30:46,701][0m Trial 97 finished with value: -85.3 and parameters: {'n_steps': 7720, 'gamma': 0.8114531234834148, 'learning_rate': 6.632728834241513e-05, 'clip_range': 0.2606417832372353, 'gae_lambda': 0.8097819586990599}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_99


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6882 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 202      |
|    ep_rew_mean     | -11.8    |
| time/              |          |
|    fps             | 153      |
|    iterations      | 1        |
|    time_elapsed    | 44       |
|    total_timesteps | 6882     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 163         |
|    ep_rew_mean          | -25.6       |
| time/                   |             |
|    fps                  | 89          |
|    iterations           | 2           |
|    time_elapsed         | 153         |
|    total_timesteps      | 13764       |
| train/                  |             |
|    approx_kl            | 0.033908892 |
|    clip_fraction        | 0.31        |
|    clip_range           | 0.194       |
|    entropy_loss         | -1.77       |
|    explained_variance   | 5.25e-06    |
|    learning_rate        | 9.

[32m[I 2023-01-30 16:58:03,031][0m Trial 98 finished with value: -73.3 and parameters: {'n_steps': 6882, 'gamma': 0.8001516874444216, 'learning_rate': 9.010927176564117e-05, 'clip_range': 0.19364976782472182, 'gae_lambda': 0.8931511898560015}. Best is trial 23 with value: 209.3.[0m


Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./tensorboard_log/PPO_100


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7250 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 192      |
|    ep_rew_mean     | -21.1    |
| time/              |          |
|    fps             | 152      |
|    iterations      | 1        |
|    time_elapsed    | 47       |
|    total_timesteps | 7250     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 215        |
|    ep_rew_mean          | 15.1       |
| time/                   |            |
|    fps                  | 89         |
|    iterations           | 2          |
|    time_elapsed         | 162        |
|    total_timesteps      | 14500      |
| train/                  |            |
|    approx_kl            | 0.09468086 |
|    clip_fraction        | 0.28       |
|    clip_range           | 0.376      |
|    entropy_loss         | -1.73      |
|    explained_variance   | 0.000664   |
|    learning_rate        | 7.17e-05   |
|   

[32m[I 2023-01-30 17:24:52,402][0m Trial 99 finished with value: -72.7 and parameters: {'n_steps': 7250, 'gamma': 0.8265198093736982, 'learning_rate': 7.166424438383656e-05, 'clip_range': 0.37584976197199327, 'gae_lambda': 0.8720125081272557}. Best is trial 23 with value: 209.3.[0m


In [6]:
study.best_params

{'n_steps': 5976,
 'gamma': 0.8431945080247621,
 'learning_rate': 9.434717363652453e-05,
 'clip_range': 0.366043287552883,
 'gae_lambda': 0.8177999838257695}

In [7]:
study.best_trial

FrozenTrial(number=23, values=[209.3], datetime_start=datetime.datetime(2023, 1, 29, 9, 11, 37, 111110), datetime_complete=datetime.datetime(2023, 1, 29, 9, 35, 22, 783561), params={'n_steps': 5976, 'gamma': 0.8431945080247621, 'learning_rate': 9.434717363652453e-05, 'clip_range': 0.366043287552883, 'gae_lambda': 0.8177999838257695}, distributions={'n_steps': IntDistribution(high=8192, log=False, low=512, step=1), 'gamma': FloatDistribution(high=0.9999, log=True, low=0.8, step=None), 'learning_rate': FloatDistribution(high=0.0001, log=True, low=1e-05, step=None), 'clip_range': FloatDistribution(high=0.4, log=False, low=0.1, step=None), 'gae_lambda': FloatDistribution(high=0.99, log=False, low=0.8, step=None)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=23, state=TrialState.COMPLETE, value=None)

In [8]:
study.trials_dataframe

<bound method Study.trials_dataframe of <optuna.study.study.Study object at 0x000002A367E69B80>>