In [None]:
!pip install gym_super_mario_bros==7.3.0 nes_py

In [None]:
!pip install stable-baselines3[extra]

In [None]:
conda update --all

In [None]:
conda install pytorch torchvision torchaudio -c pytorch

In [None]:
conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch

In [None]:
conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch-lts

In [None]:
conda install freetype=2.10.4

In [1]:
import gym

# Import the game
import gym_super_mario_bros

# Import the joypad wrapper
from nes_py.wrappers import JoypadSpace

# Import the simplified controls
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT, RIGHT_ONLY 

In [2]:
# Import frame stack and grayscaling wrapper
from gym.wrappers import FrameStack, GrayScaleObservation

# Import vectorization wrappers
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv

# Import Matplotlib to show the impact of stack framing
from matplotlib import pyplot as plt

In [None]:
# We will use a subset of possible actions
SIMPLE_MOVEMENT
#RIGHT_ONLY 
#CUSTUM_RIGHT_ONLY = [['right', 'B'], ['right', 'A', 'B']]
#CUSTUM_RIGHT_ONLY

In [None]:
class CustomReward(gym.Wrapper):
    def __init__(self, env):
        super(CustomReward, self).__init__(env)
        self._current_score = 0

    def step(self, action):
        state, reward, done, info = self.env.step(action)
        reward += (info["score"] - self._current_score) / 40.0
        self._current_score = info["score"]
        if done:
            if info["flag_get"]:
                reward += 350.0
            else:
                reward -= 50.0
        return state, reward / 10.0, done, info

In [None]:
class CustomRewardNoMovingRightReward(gym.Wrapper):
    def __init__(self, env):
        super(CustomRewardNoMovingRightReward, self).__init__(env)
        self._current_score = 0

        # starting point
        self._current_x_pos = 40

        self._max_x_pos_memory = 0
        self._previous_x_pos_memory = 0
        self._steps_run_wrong_direction = 0

    def step(self, action):
        state, reward, done, info = self.env.step(action)
        score_diff = info["score"] - self._current_score
        reward += score_diff / 10.0

        if self._max_x_pos_memory < info["x_pos"]:
            self._max_x_pos_memory = info["x_pos"]
        else:
            self._steps_run_wrong_direction += 1

        #  Handle when the agent hits the left wall
        standstill = self._previous_x_pos_memory == info["x_pos"]
        if standstill:
            self._steps_run_wrong_direction += 1

        self._previous_x_pos_memory = info["x_pos"]

        # reward movement also for left direction /normal reward for left is -3
        # Make sure to force progress after moving in the wrong direction too long.
        if info["x_pos"] < self._current_x_pos and self._steps_run_wrong_direction < 750:
            reward += 4
        elif standstill and self._steps_run_wrong_direction > 750:
            reward -= 2
        elif score_diff > 1:
            self._steps_run_wrong_direction = 0

        self._current_score = info["score"]
        self._current_x_pos = info["x_pos"]

        if done:
            if info["flag_get"]:
                reward += 350.0
            else:
                reward -= 100.0
                self._current_x_pos = 40
        return state, reward, done, info

In [None]:
class CustomRewardNoMovingRightReward(gym.Wrapper):
    def __init__(self, env):
        super(CustomRewardNoMovingRightReward, self).__init__(env)
        self._current_score = 0
        self._current_time = 400

        # starting point
        self._current_x_pos = 40

        self._max_x_pos_memory = 0
        self._previous_x_pos_memory = 0
        self._steps_run_wrong_direction = 0

    def step(self, action):
        state, reward, done, info = self.env.step(action)
        score_diff = info["score"] - self._current_score
        reward += score_diff / 10.0

        time_diff = self._current_time - info["time"]

        if self._max_x_pos_memory < info["x_pos"]:
            self._max_x_pos_memory = info["x_pos"]
        else:
            self._steps_run_wrong_direction += time_diff

        #  Handle when the agent hits the left wall
        standstill = self._previous_x_pos_memory == info["x_pos"]
        if standstill:
            self._steps_run_wrong_direction += time_diff

        self._previous_x_pos_memory = info["x_pos"]

        # reward movement also for left direction /normal reward for left is -3
        # Make sure to force progress after moving in the wrong direction too long.
        if info["x_pos"] < self._current_x_pos:
            reward -= min((self._steps_run_wrong_direction / 10000), 1)
        elif standstill:
            reward -= 0.2
        """elif score_diff > 1:
            self._steps_run_wrong_direction = 0"""

        self._current_score = info["score"]
        self._current_x_pos = info["x_pos"]

        if done:
            if info["flag_get"]:
                reward += 350.0
            else:
                reward -= 100.0
                self._current_x_pos = 40
        return state, reward/10, done, info

In [None]:
class CustomRewardNoMovingRightReward(gym.Wrapper):
    def __init__(self, env):
        super(CustomRewardNoMovingRightReward, self).__init__(env)
        self._current_score = 0
        self._number_of_lives = 2
        self._current_time = 400

        # starting point
        self._current_x_pos = 40
        self._max_x_pos_memory = 0
        self._previous_x_pos_memory = 0
        self._steps_run_wrong_direction = 0

    def step(self, action):
        def reset():
            self._current_x_pos = 40
            self._current_time = 400
            self._steps_run_wrong_direction = 0

        state, reward, done, info = self.env.step(action)
        score_diff = info["score"] - self._current_score
        life_loss = (self._number_of_lives - info["life"]) > 0

        # clip to avoid too high reward for mushroom/flower (1000p) and coins (200p)
        reward += min(score_diff, 150)

        time_diff = self._current_time - info["time"]

        if self._max_x_pos_memory < info["x_pos"]:
            self._max_x_pos_memory = info["x_pos"]
        else:
            self._steps_run_wrong_direction += time_diff

        #  Handle when the agent hits the left wall
        standstill = self._previous_x_pos_memory == info["x_pos"]
        if standstill:
            self._steps_run_wrong_direction += time_diff

        self._previous_x_pos_memory = info["x_pos"]

        # reward movement also for left direction /normal reward for left is -3
        # Make sure to force progress after moving in the wrong direction too long.
        if info["x_pos"] < self._current_x_pos:
            reward -= min((self._steps_run_wrong_direction / 100), 10)
        elif standstill:
            reward -= min((self._steps_run_wrong_direction / 100), 10)
        """elif score_diff > 1:
            self._steps_run_wrong_direction = 0"""

        self._current_score = info["score"]
        self._current_x_pos = info["x_pos"]
        self._current_time = info["time"]

        if done:
            if info["flag_get"]:
                reward += 350.0
            else:
                reward -= 100.0
                self._number_of_lives = 2
                reset()

        if life_loss:
            reward -= 50.0
            self._number_of_lives = info["life"]
            reset()

        return state, reward / 10, done, info

In [15]:
class CustomReward6(gym.Wrapper):
    def __init__(self, env):
        super(CustomReward6, self).__init__(env)
        self._current_score = 0
        self._number_of_lives = 2

    def step(self, action):
        state, reward, done, info = self.env.step(action)
        reward = (info["score"] - self._current_score) / 10

        if info["life"] == 255:
            life_loss = True
        else:
            life_loss = (self._number_of_lives - info["life"]) > 0

        self._current_score = info["score"]
        if done:
            if info["flag_get"]:
                reward += 350.0
            else:
                reward -= 100.0
                self._number_of_lives = 2
                self._current_score = 0

        if life_loss and not done:
            reward -= 50.0
            self._number_of_lives = info["life"]

        return state, reward, done, info

In [16]:
# Create the base environment
env = gym_super_mario_bros.make("SuperMarioBros-v0")
# My custom reward function
env = CustomReward6(env)
# Simplify the controls
customMovement = [['right', 'B'], ['right', 'A', 'B'], ['A'], ['left', 'B'], ['left', 'A', 'B']]
#env = JoypadSpace(env, customMovement)
env = JoypadSpace(env, SIMPLE_MOVEMENT)
# Grayscale
env = GrayScaleObservation(env, keep_dim=True)
# Wrap inside the Dummy Environment
env = DummyVecEnv([lambda: env])
# Stack the frames
env = VecFrameStack(env, 4, channels_order="last")

In [17]:
state = env.reset()

In [None]:
SIMPLE_MOVEMENT[env.action_space.sample()]

In [None]:
state, reward, done, info = env.step([env.action_space.sample()])
info

In [None]:
"""plt.figure(figsize=(10, 8))
for idx in range(state.shape[3]):
    plt.subplot(1, 4, idx + 1)
    plt.imshow(state[0][:, :, idx])
plt.show()"""

In [7]:
# Import os for file path management
import os
# Import PPO for algos
from stable_baselines3 import PPO
# Import base callback for saving models
from stable_baselines3.common.callbacks import BaseCallback

In [8]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [9]:
CHECKPOINT_DIR = './train-reward-score-right-and-left13/'
LOG_DIR = './logs/train-reward-score-right-and-left13/'

In [10]:
# Setup model saving callback
callback = TrainAndLoggingCallback(check_freq=100000, save_path=CHECKPOINT_DIR)

In [11]:
# This is the AI model started
model = PPO('CnnPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.00001, n_steps=512) 

Using cuda device
Wrapping the env in a VecTransposeImage.


In [20]:
# Train the AI model, this is where the AI model starts to learn
model.learn(total_timesteps=10000000, callback=callback)

Logging to ./logs/PPO_10
----------------------------
| time/              |     |
|    fps             | 181 |
|    iterations      | 1   |
|    time_elapsed    | 2   |
|    total_timesteps | 512 |
----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 114          |
|    iterations           | 2            |
|    time_elapsed         | 8            |
|    total_timesteps      | 1024         |
| train/                  |              |
|    approx_kl            | 0.0075984164 |
|    clip_fraction        | 0.0768       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.464       |
|    explained_variance   | 0.692        |
|    learning_rate        | 1e-06        |
|    loss                 | 110          |
|    n_updates            | 126960       |
|    policy_gradient_loss | 0.0152       |
|    value_loss           | 361          |
-----------------------------------------

------------------------------------------
| time/                   |              |
|    fps                  | 85           |
|    iterations           | 13           |
|    time_elapsed         | 78           |
|    total_timesteps      | 6656         |
| train/                  |              |
|    approx_kl            | 0.0044059525 |
|    clip_fraction        | 0.0375       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.13        |
|    explained_variance   | 0.905        |
|    learning_rate        | 1e-06        |
|    loss                 | 31.6         |
|    n_updates            | 127070       |
|    policy_gradient_loss | 0.000397     |
|    value_loss           | 104          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 85           |
|    iterations           | 14           |
|    time_elapsed         | 84           |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 84          |
|    iterations           | 24          |
|    time_elapsed         | 145         |
|    total_timesteps      | 12288       |
| train/                  |             |
|    approx_kl            | 0.004654659 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.18       |
|    explained_variance   | -6.14       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.414       |
|    n_updates            | 127180      |
|    policy_gradient_loss | -0.0016     |
|    value_loss           | 1.99        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 84           |
|    iterations           | 25           |
|    time_elapsed         | 151          |
|    total_timesteps      | 1

-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 35            |
|    time_elapsed         | 213           |
|    total_timesteps      | 17920         |
| train/                  |               |
|    approx_kl            | 0.00059351453 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.11         |
|    explained_variance   | 0.533         |
|    learning_rate        | 1e-06         |
|    loss                 | 17.1          |
|    n_updates            | 127290        |
|    policy_gradient_loss | -0.00135      |
|    value_loss           | 106           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 36            |
|    time_elapsed         | 219 

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 46           |
|    time_elapsed         | 281          |
|    total_timesteps      | 23552        |
| train/                  |              |
|    approx_kl            | 0.0013220884 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.22        |
|    explained_variance   | -1.72        |
|    learning_rate        | 1e-06        |
|    loss                 | 1.12         |
|    n_updates            | 127400       |
|    policy_gradient_loss | -0.000658    |
|    value_loss           | 2.94         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 47           |
|    time_elapsed         | 287          |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 57           |
|    time_elapsed         | 349          |
|    total_timesteps      | 29184        |
| train/                  |              |
|    approx_kl            | 0.0043652607 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.11        |
|    explained_variance   | -2.25        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.664        |
|    n_updates            | 127510       |
|    policy_gradient_loss | -0.0028      |
|    value_loss           | 1.83         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 83          |
|    iterations           | 58          |
|    time_elapsed         | 355         |
|    total_times

-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 68            |
|    time_elapsed         | 416           |
|    total_timesteps      | 34816         |
| train/                  |               |
|    approx_kl            | 0.00040032342 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.11         |
|    explained_variance   | 0.483         |
|    learning_rate        | 1e-06         |
|    loss                 | 77.5          |
|    n_updates            | 127620        |
|    policy_gradient_loss | 0.000161      |
|    value_loss           | 144           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 69           |
|    time_elapsed         | 422     

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 79           |
|    time_elapsed         | 484          |
|    total_timesteps      | 40448        |
| train/                  |              |
|    approx_kl            | 0.0012141323 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.19        |
|    explained_variance   | -1.69        |
|    learning_rate        | 1e-06        |
|    loss                 | 1.55         |
|    n_updates            | 127730       |
|    policy_gradient_loss | -0.0015      |
|    value_loss           | 3.18         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 80           |
|    time_elapsed         | 491          |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 90           |
|    time_elapsed         | 552          |
|    total_timesteps      | 46080        |
| train/                  |              |
|    approx_kl            | 0.0014890039 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.14        |
|    explained_variance   | -7.05        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.458        |
|    n_updates            | 127840       |
|    policy_gradient_loss | -0.00271     |
|    value_loss           | 1.18         |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 91            |
|    time_elapsed         | 558           |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 101          |
|    time_elapsed         | 619          |
|    total_timesteps      | 51712        |
| train/                  |              |
|    approx_kl            | 0.0023691626 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.17        |
|    explained_variance   | -7.04        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.425        |
|    n_updates            | 127950       |
|    policy_gradient_loss | -0.00284     |
|    value_loss           | 0.821        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 102          |
|    time_elapsed         | 625          |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 83          |
|    iterations           | 112         |
|    time_elapsed         | 686         |
|    total_timesteps      | 57344       |
| train/                  |             |
|    approx_kl            | 0.005109867 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.23       |
|    explained_variance   | -1.41       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.445       |
|    n_updates            | 128060      |
|    policy_gradient_loss | -0.00458    |
|    value_loss           | 0.792       |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 113          |
|    time_elapsed         | 692          |
|    total_timesteps      | 5

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 123          |
|    time_elapsed         | 753          |
|    total_timesteps      | 62976        |
| train/                  |              |
|    approx_kl            | 0.0013007813 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.2         |
|    explained_variance   | 0.325        |
|    learning_rate        | 1e-06        |
|    loss                 | 1.5          |
|    n_updates            | 128170       |
|    policy_gradient_loss | -0.0013      |
|    value_loss           | 2.87         |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 124           |
|    time_elapsed         | 759           |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 134          |
|    time_elapsed         | 820          |
|    total_timesteps      | 68608        |
| train/                  |              |
|    approx_kl            | 0.0020866822 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.25        |
|    explained_variance   | -5.92        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.133        |
|    n_updates            | 128280       |
|    policy_gradient_loss | -0.00198     |
|    value_loss           | 0.507        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 135           |
|    time_elapsed         | 826           |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 145          |
|    time_elapsed         | 887          |
|    total_timesteps      | 74240        |
| train/                  |              |
|    approx_kl            | 0.0006664336 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.22        |
|    explained_variance   | -2.24        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.239        |
|    n_updates            | 128390       |
|    policy_gradient_loss | -0.000875    |
|    value_loss           | 0.465        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 146           |
|    time_elapsed         | 893           |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 156           |
|    time_elapsed         | 953           |
|    total_timesteps      | 79872         |
| train/                  |               |
|    approx_kl            | 0.00027906988 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.29         |
|    explained_variance   | -0.598        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.223         |
|    n_updates            | 128500        |
|    policy_gradient_loss | -0.000173     |
|    value_loss           | 0.706         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 157           |
|    time_elapsed         | 959 

-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 167           |
|    time_elapsed         | 1020          |
|    total_timesteps      | 85504         |
| train/                  |               |
|    approx_kl            | 0.00020025868 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.28         |
|    explained_variance   | -1.69         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.147         |
|    n_updates            | 128610        |
|    policy_gradient_loss | -7.08e-05     |
|    value_loss           | 0.552         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 168          |
|    time_elapsed         | 1026    

-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 178           |
|    time_elapsed         | 1087          |
|    total_timesteps      | 91136         |
| train/                  |               |
|    approx_kl            | 0.00010857626 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.18         |
|    explained_variance   | -5.68         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.154         |
|    n_updates            | 128720        |
|    policy_gradient_loss | 2.76e-05      |
|    value_loss           | 0.362         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 179           |
|    time_elapsed         | 1093

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 189          |
|    time_elapsed         | 1154         |
|    total_timesteps      | 96768        |
| train/                  |              |
|    approx_kl            | 0.0001275657 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.09        |
|    explained_variance   | 0.122        |
|    learning_rate        | 1e-06        |
|    loss                 | 19.7         |
|    n_updates            | 128830       |
|    policy_gradient_loss | -0.00101     |
|    value_loss           | 34.2         |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 190           |
|    time_elapsed         | 1160          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 200          |
|    time_elapsed         | 1222         |
|    total_timesteps      | 102400       |
| train/                  |              |
|    approx_kl            | 0.0018521892 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.27        |
|    explained_variance   | -2.3         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.133        |
|    n_updates            | 128940       |
|    policy_gradient_loss | -0.000947    |
|    value_loss           | 0.236        |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 83          |
|    iterations           | 201         |
|    time_elapsed         | 1228        |
|    total_times

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 211          |
|    time_elapsed         | 1288         |
|    total_timesteps      | 108032       |
| train/                  |              |
|    approx_kl            | 0.0027657552 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.19        |
|    explained_variance   | -1.53        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.129        |
|    n_updates            | 129050       |
|    policy_gradient_loss | -0.00279     |
|    value_loss           | 0.589        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 212           |
|    time_elapsed         | 1294          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 222          |
|    time_elapsed         | 1355         |
|    total_timesteps      | 113664       |
| train/                  |              |
|    approx_kl            | 0.0007134485 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.13        |
|    explained_variance   | -2.32        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.376        |
|    n_updates            | 129160       |
|    policy_gradient_loss | -0.000933    |
|    value_loss           | 0.762        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 223          |
|    time_elapsed         | 1361         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 233          |
|    time_elapsed         | 1422         |
|    total_timesteps      | 119296       |
| train/                  |              |
|    approx_kl            | 0.0005646009 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.22        |
|    explained_variance   | -2.28        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.208        |
|    n_updates            | 129270       |
|    policy_gradient_loss | -0.000845    |
|    value_loss           | 0.394        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 234          |
|    time_elapsed         | 1428         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 244          |
|    time_elapsed         | 1489         |
|    total_timesteps      | 124928       |
| train/                  |              |
|    approx_kl            | 0.0003590217 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.21        |
|    explained_variance   | -1.68        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0956       |
|    n_updates            | 129380       |
|    policy_gradient_loss | -0.000444    |
|    value_loss           | 0.289        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 245          |
|    time_elapsed         | 1495         |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 83          |
|    iterations           | 255         |
|    time_elapsed         | 1556        |
|    total_timesteps      | 130560      |
| train/                  |             |
|    approx_kl            | 0.003290937 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.24       |
|    explained_variance   | -6.36       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0442      |
|    n_updates            | 129490      |
|    policy_gradient_loss | -0.00346    |
|    value_loss           | 0.198       |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 256          |
|    time_elapsed         | 1562         |
|    total_timesteps      | 1

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 266          |
|    time_elapsed         | 1623         |
|    total_timesteps      | 136192       |
| train/                  |              |
|    approx_kl            | 0.0026464602 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.01        |
|    explained_variance   | -1.08        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0518       |
|    n_updates            | 129600       |
|    policy_gradient_loss | -0.00358     |
|    value_loss           | 0.178        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 267          |
|    time_elapsed         | 1629         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 277           |
|    time_elapsed         | 1690          |
|    total_timesteps      | 141824        |
| train/                  |               |
|    approx_kl            | 0.00045704108 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.09         |
|    explained_variance   | -1.14         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.133         |
|    n_updates            | 129710        |
|    policy_gradient_loss | -0.00055      |
|    value_loss           | 0.219         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 278          |
|    time_elapsed         | 1696    

-----------------------------------------
| time/                   |             |
|    fps                  | 83          |
|    iterations           | 288         |
|    time_elapsed         | 1757        |
|    total_timesteps      | 147456      |
| train/                  |             |
|    approx_kl            | 0.003475756 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.2        |
|    explained_variance   | -1.74       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0613      |
|    n_updates            | 129820      |
|    policy_gradient_loss | -0.00386    |
|    value_loss           | 0.242       |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 289          |
|    time_elapsed         | 1763         |
|    total_timesteps      | 1

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 299          |
|    time_elapsed         | 1824         |
|    total_timesteps      | 153088       |
| train/                  |              |
|    approx_kl            | 0.0003275118 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.18        |
|    explained_variance   | -1.98        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.078        |
|    n_updates            | 129930       |
|    policy_gradient_loss | -0.000388    |
|    value_loss           | 0.183        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 300          |
|    time_elapsed         | 1830         |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 83          |
|    iterations           | 310         |
|    time_elapsed         | 1891        |
|    total_timesteps      | 158720      |
| train/                  |             |
|    approx_kl            | 0.004120391 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.14       |
|    explained_variance   | -1.65       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.079       |
|    n_updates            | 130040      |
|    policy_gradient_loss | -0.00303    |
|    value_loss           | 0.289       |
-----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 311           |
|    time_elapsed         | 1897          |
|    total_timesteps    

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 321          |
|    time_elapsed         | 1958         |
|    total_timesteps      | 164352       |
| train/                  |              |
|    approx_kl            | 0.0013888963 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.17        |
|    explained_variance   | -0.475       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.146        |
|    n_updates            | 130150       |
|    policy_gradient_loss | -0.00114     |
|    value_loss           | 0.249        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 322          |
|    time_elapsed         | 1964         |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 83          |
|    iterations           | 332         |
|    time_elapsed         | 2025        |
|    total_timesteps      | 169984      |
| train/                  |             |
|    approx_kl            | 0.009603014 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.17       |
|    explained_variance   | -1.52       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0367      |
|    n_updates            | 130260      |
|    policy_gradient_loss | -0.00576    |
|    value_loss           | 0.178       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 83          |
|    iterations           | 333         |
|    time_elapsed         | 2031        |
|    total_timesteps      | 170496

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 343          |
|    time_elapsed         | 2093         |
|    total_timesteps      | 175616       |
| train/                  |              |
|    approx_kl            | 0.0002100484 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.11        |
|    explained_variance   | -4.04        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0599       |
|    n_updates            | 130370       |
|    policy_gradient_loss | -0.000368    |
|    value_loss           | 0.137        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 344           |
|    time_elapsed         | 2099          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 354          |
|    time_elapsed         | 2178         |
|    total_timesteps      | 181248       |
| train/                  |              |
|    approx_kl            | 0.0002021162 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.17        |
|    explained_variance   | -4.22        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0509       |
|    n_updates            | 130480       |
|    policy_gradient_loss | -7.79e-05    |
|    value_loss           | 0.152        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 355          |
|    time_elapsed         | 2184         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 365           |
|    time_elapsed         | 2244          |
|    total_timesteps      | 186880        |
| train/                  |               |
|    approx_kl            | 4.6891626e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.28         |
|    explained_variance   | 0.485         |
|    learning_rate        | 1e-06         |
|    loss                 | 15.4          |
|    n_updates            | 130590        |
|    policy_gradient_loss | 1.53e-05      |
|    value_loss           | 29.1          |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 366          |
|    time_elapsed         | 2250    

-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 376           |
|    time_elapsed         | 2311          |
|    total_timesteps      | 192512        |
| train/                  |               |
|    approx_kl            | 0.00072433613 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.14         |
|    explained_variance   | -3.18         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0595        |
|    n_updates            | 130700        |
|    policy_gradient_loss | -0.000543     |
|    value_loss           | 0.124         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 377          |
|    time_elapsed         | 2318    

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 387          |
|    time_elapsed         | 2379         |
|    total_timesteps      | 198144       |
| train/                  |              |
|    approx_kl            | 0.0025518767 |
|    clip_fraction        | 0.000391     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.13        |
|    explained_variance   | -1.84        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0557       |
|    n_updates            | 130810       |
|    policy_gradient_loss | -0.00229     |
|    value_loss           | 0.177        |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 83          |
|    iterations           | 388         |
|    time_elapsed         | 2385        |
|    total_times

-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 398           |
|    time_elapsed         | 2446          |
|    total_timesteps      | 203776        |
| train/                  |               |
|    approx_kl            | 0.00044332503 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.15         |
|    explained_variance   | 0.331         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.574         |
|    n_updates            | 130920        |
|    policy_gradient_loss | -0.000995     |
|    value_loss           | 1.99          |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 399          |
|    time_elapsed         | 2452    

-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 409           |
|    time_elapsed         | 2513          |
|    total_timesteps      | 209408        |
| train/                  |               |
|    approx_kl            | 0.00025091006 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.11         |
|    explained_variance   | -0.756        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.721         |
|    n_updates            | 131030        |
|    policy_gradient_loss | -0.000732     |
|    value_loss           | 1.59          |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 83          |
|    iterations           | 410         |
|    time_elapsed         | 2519        

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 420          |
|    time_elapsed         | 2580         |
|    total_timesteps      | 215040       |
| train/                  |              |
|    approx_kl            | 0.0011362594 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.976       |
|    explained_variance   | -1.87        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0495       |
|    n_updates            | 131140       |
|    policy_gradient_loss | -0.000742    |
|    value_loss           | 0.101        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 421          |
|    time_elapsed         | 2586         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 431          |
|    time_elapsed         | 2647         |
|    total_timesteps      | 220672       |
| train/                  |              |
|    approx_kl            | 0.0011269493 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.927       |
|    explained_variance   | -2.69        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0493       |
|    n_updates            | 131250       |
|    policy_gradient_loss | -0.000456    |
|    value_loss           | 0.114        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 432           |
|    time_elapsed         | 2653          |
|    t

-----------------------------------------
| time/                   |             |
|    fps                  | 83          |
|    iterations           | 442         |
|    time_elapsed         | 2713        |
|    total_timesteps      | 226304      |
| train/                  |             |
|    approx_kl            | 9.94195e-05 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.918      |
|    explained_variance   | -0.208      |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0447      |
|    n_updates            | 131360      |
|    policy_gradient_loss | -0.000424   |
|    value_loss           | 0.193       |
-----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 443           |
|    time_elapsed         | 2719          |
|    total_timesteps    

-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 453           |
|    time_elapsed         | 2780          |
|    total_timesteps      | 231936        |
| train/                  |               |
|    approx_kl            | 0.00013073208 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.02         |
|    explained_variance   | -0.0842       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0419        |
|    n_updates            | 131470        |
|    policy_gradient_loss | -8.56e-05     |
|    value_loss           | 0.0804        |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 454          |
|    time_elapsed         | 2786    

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 464          |
|    time_elapsed         | 2847         |
|    total_timesteps      | 237568       |
| train/                  |              |
|    approx_kl            | 0.0035112114 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.12        |
|    explained_variance   | -0.126       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0356       |
|    n_updates            | 131580       |
|    policy_gradient_loss | -0.00372     |
|    value_loss           | 0.0904       |
------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 83         |
|    iterations           | 465        |
|    time_elapsed         | 2853       |
|    total_timesteps 

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 475          |
|    time_elapsed         | 2914         |
|    total_timesteps      | 243200       |
| train/                  |              |
|    approx_kl            | 0.0013926894 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.12        |
|    explained_variance   | -2.43        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0545       |
|    n_updates            | 131690       |
|    policy_gradient_loss | -0.00115     |
|    value_loss           | 0.101        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 476           |
|    time_elapsed         | 2920          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 486          |
|    time_elapsed         | 2981         |
|    total_timesteps      | 248832       |
| train/                  |              |
|    approx_kl            | 0.0004095044 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.1         |
|    explained_variance   | -0.0877      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.026        |
|    n_updates            | 131800       |
|    policy_gradient_loss | -0.000361    |
|    value_loss           | 0.198        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 487          |
|    time_elapsed         | 2987         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 497           |
|    time_elapsed         | 3048          |
|    total_timesteps      | 254464        |
| train/                  |               |
|    approx_kl            | 0.00015220151 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.18         |
|    explained_variance   | 0.86          |
|    learning_rate        | 1e-06         |
|    loss                 | 0.492         |
|    n_updates            | 131910        |
|    policy_gradient_loss | -0.000143     |
|    value_loss           | 1.45          |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 498          |
|    time_elapsed         | 3054    

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 508          |
|    time_elapsed         | 3114         |
|    total_timesteps      | 260096       |
| train/                  |              |
|    approx_kl            | 0.0017667948 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.07        |
|    explained_variance   | -2.33        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.02         |
|    n_updates            | 132020       |
|    policy_gradient_loss | -0.000959    |
|    value_loss           | 0.079        |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 83          |
|    iterations           | 509         |
|    time_elapsed         | 3120        |
|    total_times

-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 519           |
|    time_elapsed         | 3182          |
|    total_timesteps      | 265728        |
| train/                  |               |
|    approx_kl            | 0.00032398268 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.05         |
|    explained_variance   | -0.442        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0272        |
|    n_updates            | 132130        |
|    policy_gradient_loss | -0.000512     |
|    value_loss           | 0.126         |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 83          |
|    iterations           | 520         |
|    time_elapsed         | 3188        

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 530          |
|    time_elapsed         | 3250         |
|    total_timesteps      | 271360       |
| train/                  |              |
|    approx_kl            | 0.0013908468 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.02        |
|    explained_variance   | -0.687       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0632       |
|    n_updates            | 132240       |
|    policy_gradient_loss | -0.00151     |
|    value_loss           | 0.0872       |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 531          |
|    time_elapsed         | 3256         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 541          |
|    time_elapsed         | 3318         |
|    total_timesteps      | 276992       |
| train/                  |              |
|    approx_kl            | 0.0025083732 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.991       |
|    explained_variance   | -2.27        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0275       |
|    n_updates            | 132350       |
|    policy_gradient_loss | -0.00231     |
|    value_loss           | 0.06         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 542          |
|    time_elapsed         | 3324         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 552          |
|    time_elapsed         | 3385         |
|    total_timesteps      | 282624       |
| train/                  |              |
|    approx_kl            | 0.0015587374 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.978       |
|    explained_variance   | -2.71        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0334       |
|    n_updates            | 132460       |
|    policy_gradient_loss | -0.00176     |
|    value_loss           | 0.0616       |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 553          |
|    time_elapsed         | 3391         |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 83          |
|    iterations           | 563         |
|    time_elapsed         | 3452        |
|    total_timesteps      | 288256      |
| train/                  |             |
|    approx_kl            | 0.009347508 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.02       |
|    explained_variance   | -1.23       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0187      |
|    n_updates            | 132570      |
|    policy_gradient_loss | -0.00577    |
|    value_loss           | 0.0648      |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 564          |
|    time_elapsed         | 3458         |
|    total_timesteps      | 2

-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 574           |
|    time_elapsed         | 3519          |
|    total_timesteps      | 293888        |
| train/                  |               |
|    approx_kl            | 0.00033310824 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.98         |
|    explained_variance   | 0.109         |
|    learning_rate        | 1e-06         |
|    loss                 | 84.7          |
|    n_updates            | 132680        |
|    policy_gradient_loss | 0.00121       |
|    value_loss           | 218           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 575          |
|    time_elapsed         | 3525    

-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 585           |
|    time_elapsed         | 3587          |
|    total_timesteps      | 299520        |
| train/                  |               |
|    approx_kl            | 0.00030170416 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.944        |
|    explained_variance   | -1.41         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0549        |
|    n_updates            | 132790        |
|    policy_gradient_loss | -0.000932     |
|    value_loss           | 0.161         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 586          |
|    time_elapsed         | 3593    

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 596          |
|    time_elapsed         | 3655         |
|    total_timesteps      | 305152       |
| train/                  |              |
|    approx_kl            | 0.0014680134 |
|    clip_fraction        | 0.00137      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.944       |
|    explained_variance   | -0.485       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0604       |
|    n_updates            | 132900       |
|    policy_gradient_loss | -0.00257     |
|    value_loss           | 0.145        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 597          |
|    time_elapsed         | 3661         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 607          |
|    time_elapsed         | 3723         |
|    total_timesteps      | 310784       |
| train/                  |              |
|    approx_kl            | 0.0005097394 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.949       |
|    explained_variance   | 0.858        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.278        |
|    n_updates            | 133010       |
|    policy_gradient_loss | -0.00116     |
|    value_loss           | 1.15         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 608          |
|    time_elapsed         | 3729         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 618           |
|    time_elapsed         | 3790          |
|    total_timesteps      | 316416        |
| train/                  |               |
|    approx_kl            | 0.00025834667 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.917        |
|    explained_variance   | 0.319         |
|    learning_rate        | 1e-06         |
|    loss                 | 9.1           |
|    n_updates            | 133120        |
|    policy_gradient_loss | -7.36e-05     |
|    value_loss           | 176           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 619           |
|    time_elapsed         | 3796

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 629          |
|    time_elapsed         | 3858         |
|    total_timesteps      | 322048       |
| train/                  |              |
|    approx_kl            | 0.0006731533 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.837       |
|    explained_variance   | -1.83        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0649       |
|    n_updates            | 133230       |
|    policy_gradient_loss | -0.000793    |
|    value_loss           | 0.119        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 630           |
|    time_elapsed         | 3864          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 640          |
|    time_elapsed         | 3926         |
|    total_timesteps      | 327680       |
| train/                  |              |
|    approx_kl            | 0.0011479062 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.854       |
|    explained_variance   | 0.735        |
|    learning_rate        | 1e-06        |
|    loss                 | 1.4          |
|    n_updates            | 133340       |
|    policy_gradient_loss | -0.00135     |
|    value_loss           | 2.31         |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 641           |
|    time_elapsed         | 3932          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 651          |
|    time_elapsed         | 3994         |
|    total_timesteps      | 333312       |
| train/                  |              |
|    approx_kl            | 6.826979e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.835       |
|    explained_variance   | 0.513        |
|    learning_rate        | 1e-06        |
|    loss                 | 2.28         |
|    n_updates            | 133450       |
|    policy_gradient_loss | 0.000487     |
|    value_loss           | 4.86         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 652          |
|    time_elapsed         | 4001         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 662          |
|    time_elapsed         | 4062         |
|    total_timesteps      | 338944       |
| train/                  |              |
|    approx_kl            | 0.0002097314 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.865       |
|    explained_variance   | 0.203        |
|    learning_rate        | 1e-06        |
|    loss                 | 63.4         |
|    n_updates            | 133560       |
|    policy_gradient_loss | -0.000408    |
|    value_loss           | 160          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 663          |
|    time_elapsed         | 4068         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 673           |
|    time_elapsed         | 4129          |
|    total_timesteps      | 344576        |
| train/                  |               |
|    approx_kl            | 0.00042668055 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.896        |
|    explained_variance   | 0.513         |
|    learning_rate        | 1e-06         |
|    loss                 | 10.7          |
|    n_updates            | 133670        |
|    policy_gradient_loss | -0.00109      |
|    value_loss           | 33.1          |
-------------------------------------------
--------------------------------------------
| time/                   |                |
|    fps                  | 83             |
|    iterations           | 674            |
|    time_elapsed         | 

------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 684          |
|    time_elapsed         | 4197         |
|    total_timesteps      | 350208       |
| train/                  |              |
|    approx_kl            | 0.0006704816 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.988       |
|    explained_variance   | 0.775        |
|    learning_rate        | 1e-06        |
|    loss                 | 1.1          |
|    n_updates            | 133780       |
|    policy_gradient_loss | -0.000975    |
|    value_loss           | 2.44         |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 685           |
|    time_elapsed         | 4204          |
|    t

-----------------------------------------
| time/                   |             |
|    fps                  | 83          |
|    iterations           | 695         |
|    time_elapsed         | 4265        |
|    total_timesteps      | 355840      |
| train/                  |             |
|    approx_kl            | 0.007143778 |
|    clip_fraction        | 0.00254     |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.04       |
|    explained_variance   | -4.3        |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0877      |
|    n_updates            | 133890      |
|    policy_gradient_loss | -0.00588    |
|    value_loss           | 0.12        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 696          |
|    time_elapsed         | 4271         |
|    total_timesteps      | 3

-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 706           |
|    time_elapsed         | 4332          |
|    total_timesteps      | 361472        |
| train/                  |               |
|    approx_kl            | 0.00048929395 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.12         |
|    explained_variance   | -0.171        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0201        |
|    n_updates            | 134000        |
|    policy_gradient_loss | -0.000203     |
|    value_loss           | 0.0645        |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 83          |
|    iterations           | 707         |
|    time_elapsed         | 4338        

-------------------------------------------
| time/                   |               |
|    fps                  | 83            |
|    iterations           | 717           |
|    time_elapsed         | 4399          |
|    total_timesteps      | 367104        |
| train/                  |               |
|    approx_kl            | 0.00036329322 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.931        |
|    explained_variance   | -0.507        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.342         |
|    n_updates            | 134110        |
|    policy_gradient_loss | -0.000502     |
|    value_loss           | 1.47          |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 83           |
|    iterations           | 718          |
|    time_elapsed         | 4405    

-----------------------------------------
| time/                   |             |
|    fps                  | 83          |
|    iterations           | 728         |
|    time_elapsed         | 4477        |
|    total_timesteps      | 372736      |
| train/                  |             |
|    approx_kl            | 0.004821861 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.07       |
|    explained_variance   | -0.418      |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0285      |
|    n_updates            | 134220      |
|    policy_gradient_loss | -0.00387    |
|    value_loss           | 0.0995      |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 83          |
|    iterations           | 729         |
|    time_elapsed         | 4484        |
|    total_timesteps      | 373248

KeyboardInterrupt: 

In [None]:
model.save('thisisatestmodel')

In [18]:
# Load model
model = PPO.load('./train/best_model_4500000')

In [19]:
model.set_env(env)

Wrapping the env in a VecTransposeImage.


In [None]:
env = CustomReward6(env)

In [None]:
state = env.reset()

In [None]:
# Start the game 
state = env.reset()
# Loop through the game
while True:
    action, _ = model.predict(state)
    state, reward, done, info = env.step(action)
    env.render()
    if reward > 3 or reward < -1:
        print(reward)