# 1. Setup Mario

In [1]:
!pip install gym_super_mario_bros==7.3.0 nes_py



In [2]:
# Import the game
import gym_super_mario_bros
# Import the Joypad wrapper
from nes_py.wrappers import JoypadSpace
# Import the SIMPLIFIED controls
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

In [3]:
# Setup game
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT)

In [4]:
# Create a flag - restart or not
done = True
# Loop through each frame in the game
for step in range(100000): 
    # Start the game to begin with 
    if done: 
        # Start the gamee
        env.reset()
    # Do random actions
    state, reward, done, info = env.step(env.action_space.sample())
    # Show the game on the screen
    env.render()
# Close the game
env.close()

  return (self.ram[0x86] - self.ram[0x071c]) % 256


# 2. Preprocess Environment

In [5]:
# Install pytorch
!pip install torch==1.10.1+cu113 torchvision==0.11.2+cu113 torchaudio===0.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html

Looking in links: https://download.pytorch.org/whl/cu113/torch_stable.html


In [6]:
# Install stable baselines for RL stuff
!pip install stable-baselines3[extra]



In [7]:
# Import Frame Stacker Wrapper and GrayScaling Wrapper
from gym.wrappers import GrayScaleObservation
# Import Vectorization Wrappers
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv
# Import Matplotlib to show the impact of frame stacking
from matplotlib import pyplot as plt

In [8]:
# 1. Create the base environment
env = gym_super_mario_bros.make('SuperMarioBros-v0')
# 2. Simplify the controls 
env = JoypadSpace(env, SIMPLE_MOVEMENT)
# 3. Grayscale
env = GrayScaleObservation(env, keep_dim=True)
# 4. Wrap inside the Dummy Environment
env = DummyVecEnv([lambda: env])
# 5. Stack the frames
env = VecFrameStack(env, 4, channels_order='last')

In [9]:
state = env.reset()

In [10]:
state, reward, done, info = env.step([5])

# 3. Train the RL Model

In [11]:
# Import os for file path management
import os 
# Import PPO for algos
from stable_baselines3 import PPO
# Import Base Callback for saving models
from stable_baselines3.common.callbacks import BaseCallback

In [12]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [13]:
CHECKPOINT_DIR = './train/'
LOG_DIR = './logs/'

In [14]:
# Setup model saving callback
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

In [16]:
# This is the AI model started WITH SEED 42
model = PPO('CnnPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001,seed=42,
            n_steps=512) 

Using cuda device
Wrapping the env in a VecTransposeImage.


In [17]:
# Train the AI model, this is where the AI model starts to learn
model.learn(total_timesteps=200000, callback=callback)

Logging to ./logs/PPO_1


  return (self.ram[0x86] - self.ram[0x071c]) % 256


----------------------------
| time/              |     |
|    fps             | 36  |
|    iterations      | 1   |
|    time_elapsed    | 14  |
|    total_timesteps | 512 |
----------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 50            |
|    iterations           | 2             |
|    time_elapsed         | 20            |
|    total_timesteps      | 1024          |
| train/                  |               |
|    approx_kl            | 6.8580266e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | -0.00272      |
|    learning_rate        | 1e-06         |
|    loss                 | 205           |
|    n_updates            | 10            |
|    policy_gradient_loss | -0.000254     |
|    value_loss           | 427           |
-------------------------------------------
-----

-------------------------------------------
| time/                   |               |
|    fps                  | 86            |
|    iterations           | 13            |
|    time_elapsed         | 77            |
|    total_timesteps      | 6656          |
| train/                  |               |
|    approx_kl            | 2.1187006e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -0.000603     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0841        |
|    n_updates            | 120           |
|    policy_gradient_loss | -0.000233     |
|    value_loss           | 0.186         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 86            |
|    iterations           | 14            |
|    time_elapsed         | 82  

----------------------------------------
| time/                   |            |
|    fps                  | 90         |
|    iterations           | 24         |
|    time_elapsed         | 135        |
|    total_timesteps      | 12288      |
| train/                  |            |
|    approx_kl            | 3.3102e-05 |
|    clip_fraction        | 0          |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.94      |
|    explained_variance   | -0.00194   |
|    learning_rate        | 1e-06      |
|    loss                 | 0.17       |
|    n_updates            | 230        |
|    policy_gradient_loss | -0.000386  |
|    value_loss           | 0.357      |
----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 91           |
|    iterations           | 25           |
|    time_elapsed         | 140          |
|    total_timesteps      | 12800        |
| tr

-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 35            |
|    time_elapsed         | 192           |
|    total_timesteps      | 17920         |
| train/                  |               |
|    approx_kl            | 7.8085344e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -0.0721       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.175         |
|    n_updates            | 340           |
|    policy_gradient_loss | -0.000397     |
|    value_loss           | 1.29          |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 93           |
|    iterations           | 36           |
|    time_elapsed         | 197     

------------------------------------------
| time/                   |              |
|    fps                  | 93           |
|    iterations           | 46           |
|    time_elapsed         | 250          |
|    total_timesteps      | 23552        |
| train/                  |              |
|    approx_kl            | 8.474931e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.93        |
|    explained_variance   | -0.00151     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0656       |
|    n_updates            | 450          |
|    policy_gradient_loss | -0.00062     |
|    value_loss           | 0.136        |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 94          |
|    iterations           | 47          |
|    time_elapsed         | 255         |
|    total_times

------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 57           |
|    time_elapsed         | 307          |
|    total_timesteps      | 29184        |
| train/                  |              |
|    approx_kl            | 8.249178e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.93        |
|    explained_variance   | 0.00719      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.153        |
|    n_updates            | 560          |
|    policy_gradient_loss | -0.00058     |
|    value_loss           | 0.499        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 58           |
|    time_elapsed         | 312          |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 68            |
|    time_elapsed         | 366           |
|    total_timesteps      | 34816         |
| train/                  |               |
|    approx_kl            | 0.00021312211 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.92         |
|    explained_variance   | 0.366         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0574        |
|    n_updates            | 670           |
|    policy_gradient_loss | -0.0011       |
|    value_loss           | 0.416         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 69            |
|    time_elapsed         | 371 

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 79           |
|    time_elapsed         | 424          |
|    total_timesteps      | 40448        |
| train/                  |              |
|    approx_kl            | 9.871903e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.92        |
|    explained_variance   | -0.0147      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.056        |
|    n_updates            | 780          |
|    policy_gradient_loss | -0.000624    |
|    value_loss           | 0.113        |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 95          |
|    iterations           | 80          |
|    time_elapsed         | 430         |
|    total_times

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 90            |
|    time_elapsed         | 482           |
|    total_timesteps      | 46080         |
| train/                  |               |
|    approx_kl            | 5.4524746e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.91         |
|    explained_variance   | 0.00069       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0573        |
|    n_updates            | 890           |
|    policy_gradient_loss | -0.000313     |
|    value_loss           | 0.13          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 91            |
|    time_elapsed         | 487 

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 101           |
|    time_elapsed         | 541           |
|    total_timesteps      | 51712         |
| train/                  |               |
|    approx_kl            | 0.00010963681 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.88         |
|    explained_variance   | -0.000133     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0846        |
|    n_updates            | 1000          |
|    policy_gradient_loss | -0.000392     |
|    value_loss           | 0.329         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 102           |
|    time_elapsed         | 546 

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 112          |
|    time_elapsed         | 599          |
|    total_timesteps      | 57344        |
| train/                  |              |
|    approx_kl            | 0.0028518056 |
|    clip_fraction        | 0.00859      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.85        |
|    explained_variance   | 0.137        |
|    learning_rate        | 1e-06        |
|    loss                 | 91.9         |
|    n_updates            | 1110         |
|    policy_gradient_loss | -0.00104     |
|    value_loss           | 303          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 113           |
|    time_elapsed         | 604           |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 123           |
|    time_elapsed         | 658           |
|    total_timesteps      | 62976         |
| train/                  |               |
|    approx_kl            | 0.00019967463 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.87         |
|    explained_variance   | 0.0241        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0548        |
|    n_updates            | 1220          |
|    policy_gradient_loss | -0.000709     |
|    value_loss           | 0.24          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 124           |
|    time_elapsed         | 663 

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 134          |
|    time_elapsed         | 717          |
|    total_timesteps      | 68608        |
| train/                  |              |
|    approx_kl            | 0.0001709112 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.85        |
|    explained_variance   | -0.0482      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.285        |
|    n_updates            | 1330         |
|    policy_gradient_loss | -0.000748    |
|    value_loss           | 1.04         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 135          |
|    time_elapsed         | 722          |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 145          |
|    time_elapsed         | 776          |
|    total_timesteps      | 74240        |
| train/                  |              |
|    approx_kl            | 0.0005840581 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.8         |
|    explained_variance   | -0.0498      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.334        |
|    n_updates            | 1440         |
|    policy_gradient_loss | -0.00133     |
|    value_loss           | 6.67         |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 146           |
|    time_elapsed         | 781           |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 156          |
|    time_elapsed         | 835          |
|    total_timesteps      | 79872        |
| train/                  |              |
|    approx_kl            | 0.0002920027 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.85        |
|    explained_variance   | -0.0321      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0569       |
|    n_updates            | 1550         |
|    policy_gradient_loss | -0.00144     |
|    value_loss           | 0.215        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 157           |
|    time_elapsed         | 841           |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 167           |
|    time_elapsed         | 894           |
|    total_timesteps      | 85504         |
| train/                  |               |
|    approx_kl            | 0.00013207132 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.83         |
|    explained_variance   | 0.000843      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.164         |
|    n_updates            | 1660          |
|    policy_gradient_loss | -0.000247     |
|    value_loss           | 0.694         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 168           |
|    time_elapsed         | 900 

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 178           |
|    time_elapsed         | 954           |
|    total_timesteps      | 91136         |
| train/                  |               |
|    approx_kl            | 0.00013463816 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.83         |
|    explained_variance   | 0.0629        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.061         |
|    n_updates            | 1770          |
|    policy_gradient_loss | -0.000502     |
|    value_loss           | 0.51          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 179           |
|    time_elapsed         | 959 

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 189          |
|    time_elapsed         | 1011         |
|    total_timesteps      | 96768        |
| train/                  |              |
|    approx_kl            | 0.0002914964 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.84        |
|    explained_variance   | -0.00835     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0652       |
|    n_updates            | 1880         |
|    policy_gradient_loss | -0.00101     |
|    value_loss           | 0.111        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 190           |
|    time_elapsed         | 1016          |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 200           |
|    time_elapsed         | 1072          |
|    total_timesteps      | 102400        |
| train/                  |               |
|    approx_kl            | 2.4559791e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.83         |
|    explained_variance   | 0.0231        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.107         |
|    n_updates            | 1990          |
|    policy_gradient_loss | -0.000129     |
|    value_loss           | 0.286         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 201           |
|    time_elapsed         | 1077

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 211          |
|    time_elapsed         | 1130         |
|    total_timesteps      | 108032       |
| train/                  |              |
|    approx_kl            | 0.0006357017 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.8         |
|    explained_variance   | 0.0388       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0662       |
|    n_updates            | 2100         |
|    policy_gradient_loss | -0.00147     |
|    value_loss           | 0.25         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 212          |
|    time_elapsed         | 1135         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 222           |
|    time_elapsed         | 1190          |
|    total_timesteps      | 113664        |
| train/                  |               |
|    approx_kl            | 0.00020110747 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.8          |
|    explained_variance   | -0.062        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0641        |
|    n_updates            | 2210          |
|    policy_gradient_loss | -0.000861     |
|    value_loss           | 1.61          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 223           |
|    time_elapsed         | 1195

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 233           |
|    time_elapsed         | 1249          |
|    total_timesteps      | 119296        |
| train/                  |               |
|    approx_kl            | 0.00013652514 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.81         |
|    explained_variance   | -0.0166       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0495        |
|    n_updates            | 2320          |
|    policy_gradient_loss | -0.000359     |
|    value_loss           | 0.123         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 234           |
|    time_elapsed         | 1254

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 244           |
|    time_elapsed         | 1307          |
|    total_timesteps      | 124928        |
| train/                  |               |
|    approx_kl            | 0.00014359446 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.81         |
|    explained_variance   | 0.127         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0402        |
|    n_updates            | 2430          |
|    policy_gradient_loss | -0.000324     |
|    value_loss           | 0.224         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 245           |
|    time_elapsed         | 1313

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 255          |
|    time_elapsed         | 1367         |
|    total_timesteps      | 130560       |
| train/                  |              |
|    approx_kl            | 0.0001611975 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.8         |
|    explained_variance   | -0.0452      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0717       |
|    n_updates            | 2540         |
|    policy_gradient_loss | -0.000467    |
|    value_loss           | 0.313        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 256           |
|    time_elapsed         | 1372          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 266          |
|    time_elapsed         | 1424         |
|    total_timesteps      | 136192       |
| train/                  |              |
|    approx_kl            | 0.0017741041 |
|    clip_fraction        | 0.00664      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.77        |
|    explained_variance   | 0.719        |
|    learning_rate        | 1e-06        |
|    loss                 | 59           |
|    n_updates            | 2650         |
|    policy_gradient_loss | 0.000737     |
|    value_loss           | 193          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 267           |
|    time_elapsed         | 1430          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 277          |
|    time_elapsed         | 1484         |
|    total_timesteps      | 141824       |
| train/                  |              |
|    approx_kl            | 0.0010834876 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.8         |
|    explained_variance   | 0.639        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0953       |
|    n_updates            | 2760         |
|    policy_gradient_loss | -0.00232     |
|    value_loss           | 0.153        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 278           |
|    time_elapsed         | 1489          |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 288           |
|    time_elapsed         | 1542          |
|    total_timesteps      | 147456        |
| train/                  |               |
|    approx_kl            | 0.00018130546 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.77         |
|    explained_variance   | -0.0771       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0859        |
|    n_updates            | 2870          |
|    policy_gradient_loss | -0.000493     |
|    value_loss           | 0.267         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 289          |
|    time_elapsed         | 1547    

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 299           |
|    time_elapsed         | 1601          |
|    total_timesteps      | 153088        |
| train/                  |               |
|    approx_kl            | 0.00054462627 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.7          |
|    explained_variance   | 0.169         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.158         |
|    n_updates            | 2980          |
|    policy_gradient_loss | -0.0011       |
|    value_loss           | 1.2           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 300           |
|    time_elapsed         | 1606

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 310          |
|    time_elapsed         | 1659         |
|    total_timesteps      | 158720       |
| train/                  |              |
|    approx_kl            | 0.0002108562 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.71        |
|    explained_variance   | 0.443        |
|    learning_rate        | 1e-06        |
|    loss                 | 59.4         |
|    n_updates            | 3090         |
|    policy_gradient_loss | -0.00109     |
|    value_loss           | 117          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 311          |
|    time_elapsed         | 1664         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 321          |
|    time_elapsed         | 1719         |
|    total_timesteps      | 164352       |
| train/                  |              |
|    approx_kl            | 0.0005512893 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.66        |
|    explained_variance   | -0.167       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.075        |
|    n_updates            | 3200         |
|    policy_gradient_loss | -0.00112     |
|    value_loss           | 0.351        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 322          |
|    time_elapsed         | 1724         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 332           |
|    time_elapsed         | 1777          |
|    total_timesteps      | 169984        |
| train/                  |               |
|    approx_kl            | 0.00038209476 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.59         |
|    explained_variance   | -0.13         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.224         |
|    n_updates            | 3310          |
|    policy_gradient_loss | -0.000912     |
|    value_loss           | 1.57          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 333           |
|    time_elapsed         | 1784

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 343          |
|    time_elapsed         | 1836         |
|    total_timesteps      | 175616       |
| train/                  |              |
|    approx_kl            | 0.0009929999 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.65        |
|    explained_variance   | -0.0619      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0672       |
|    n_updates            | 3420         |
|    policy_gradient_loss | -0.00307     |
|    value_loss           | 0.132        |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 95          |
|    iterations           | 344         |
|    time_elapsed         | 1842        |
|    total_times

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 354           |
|    time_elapsed         | 1895          |
|    total_timesteps      | 181248        |
| train/                  |               |
|    approx_kl            | 0.00046333869 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.63         |
|    explained_variance   | 0.0846        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0599        |
|    n_updates            | 3530          |
|    policy_gradient_loss | -0.00109      |
|    value_loss           | 0.139         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 355          |
|    time_elapsed         | 1900    

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 365           |
|    time_elapsed         | 1953          |
|    total_timesteps      | 186880        |
| train/                  |               |
|    approx_kl            | 0.00061703054 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.59         |
|    explained_variance   | -0.0563       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.149         |
|    n_updates            | 3640          |
|    policy_gradient_loss | -0.00118      |
|    value_loss           | 0.495         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 366           |
|    time_elapsed         | 1959

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 376          |
|    time_elapsed         | 2012         |
|    total_timesteps      | 192512       |
| train/                  |              |
|    approx_kl            | 0.0003392495 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.59        |
|    explained_variance   | -0.0763      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0742       |
|    n_updates            | 3750         |
|    policy_gradient_loss | -0.000395    |
|    value_loss           | 0.131        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 377          |
|    time_elapsed         | 2018         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 387          |
|    time_elapsed         | 2071         |
|    total_timesteps      | 198144       |
| train/                  |              |
|    approx_kl            | 0.0008576632 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.49        |
|    explained_variance   | -0.0471      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.102        |
|    n_updates            | 3860         |
|    policy_gradient_loss | -0.0019      |
|    value_loss           | 0.134        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 388           |
|    time_elapsed         | 2076          |
|    t

<stable_baselines3.ppo.ppo.PPO at 0x2089cbac880>

In [20]:
model.save('thisisatestseed42model')

# 4. Test it Out

In [None]:
# Load model
model = PPO.load('./train/best_model_1000000')

In [None]:
state = env.reset()

In [None]:
# Start the game 
state = env.reset()
# Loop through the game
while True: 
    
    action, _ = model.predict(state)
    state, reward, done, info = env.step(action)
    env.render()

In [21]:
# This is the AI model started WITH SEED 64
model64=PPO('CnnPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001,seed=64,
            n_steps=512) 

Using cuda device
Wrapping the env in a VecTransposeImage.


In [24]:
model64.learn(total_timesteps=200000, callback=callback)

Logging to ./logs/PPO_2
----------------------------
| time/              |     |
|    fps             | 179 |
|    iterations      | 1   |
|    time_elapsed    | 2   |
|    total_timesteps | 512 |
----------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 125           |
|    iterations           | 2             |
|    time_elapsed         | 8             |
|    total_timesteps      | 1024          |
| train/                  |               |
|    approx_kl            | 1.7021317e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | -0.00167      |
|    learning_rate        | 1e-06         |
|    loss                 | 107           |
|    n_updates            | 10            |
|    policy_gradient_loss | -0.000213     |
|    value_loss           | 245           |
-------------------------

-------------------------------------------
| time/                   |               |
|    fps                  | 99            |
|    iterations           | 13            |
|    time_elapsed         | 67            |
|    total_timesteps      | 6656          |
| train/                  |               |
|    approx_kl            | 1.2804405e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | -0.00755      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.14          |
|    n_updates            | 120           |
|    policy_gradient_loss | -0.00018      |
|    value_loss           | 0.393         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 98           |
|    iterations           | 14           |
|    time_elapsed         | 72      

-------------------------------------------
| time/                   |               |
|    fps                  | 96            |
|    iterations           | 24            |
|    time_elapsed         | 126           |
|    total_timesteps      | 12288         |
| train/                  |               |
|    approx_kl            | 2.8675771e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | 0.0711        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.138         |
|    n_updates            | 230           |
|    policy_gradient_loss | -0.000485     |
|    value_loss           | 0.472         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 96            |
|    iterations           | 25            |
|    time_elapsed         | 132 

-------------------------------------------
| time/                   |               |
|    fps                  | 96            |
|    iterations           | 35            |
|    time_elapsed         | 186           |
|    total_timesteps      | 17920         |
| train/                  |               |
|    approx_kl            | 1.6371021e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -0.0307       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.107         |
|    n_updates            | 340           |
|    policy_gradient_loss | -2.17e-05     |
|    value_loss           | 0.486         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 96           |
|    iterations           | 36           |
|    time_elapsed         | 191     

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 46           |
|    time_elapsed         | 246          |
|    total_timesteps      | 23552        |
| train/                  |              |
|    approx_kl            | 6.992521e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.93        |
|    explained_variance   | 0.00688      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0783       |
|    n_updates            | 450          |
|    policy_gradient_loss | -0.000569    |
|    value_loss           | 0.138        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 47           |
|    time_elapsed         | 252          |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 57            |
|    time_elapsed         | 305           |
|    total_timesteps      | 29184         |
| train/                  |               |
|    approx_kl            | 2.2026943e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.92         |
|    explained_variance   | 0.000754      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0668        |
|    n_updates            | 560           |
|    policy_gradient_loss | 2.66e-05      |
|    value_loss           | 0.159         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 58           |
|    time_elapsed         | 310     

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 68            |
|    time_elapsed         | 364           |
|    total_timesteps      | 34816         |
| train/                  |               |
|    approx_kl            | 5.7501486e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.91         |
|    explained_variance   | 0.00234       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.135         |
|    n_updates            | 670           |
|    policy_gradient_loss | 0.000236      |
|    value_loss           | 0.732         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 69           |
|    time_elapsed         | 370     

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 79            |
|    time_elapsed         | 426           |
|    total_timesteps      | 40448         |
| train/                  |               |
|    approx_kl            | 1.9810046e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.9          |
|    explained_variance   | -0.0338       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.067         |
|    n_updates            | 780           |
|    policy_gradient_loss | -0.000125     |
|    value_loss           | 0.202         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 80           |
|    time_elapsed         | 432     

-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 90            |
|    time_elapsed         | 490           |
|    total_timesteps      | 46080         |
| train/                  |               |
|    approx_kl            | 0.00014154881 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.89         |
|    explained_variance   | 0.0118        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.049         |
|    n_updates            | 890           |
|    policy_gradient_loss | -0.0007       |
|    value_loss           | 0.14          |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 94          |
|    iterations           | 91          |
|    time_elapsed         | 495         

-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 101           |
|    time_elapsed         | 555           |
|    total_timesteps      | 51712         |
| train/                  |               |
|    approx_kl            | 6.6638575e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.88         |
|    explained_variance   | -5.59e-05     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0439        |
|    n_updates            | 1000          |
|    policy_gradient_loss | -0.000324     |
|    value_loss           | 0.238         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 102           |
|    time_elapsed         | 560 

------------------------------------------
| time/                   |              |
|    fps                  | 93           |
|    iterations           | 112          |
|    time_elapsed         | 615          |
|    total_timesteps      | 57344        |
| train/                  |              |
|    approx_kl            | 0.0016051307 |
|    clip_fraction        | 0.000977     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.86        |
|    explained_variance   | 0.252        |
|    learning_rate        | 1e-06        |
|    loss                 | 164          |
|    n_updates            | 1110         |
|    policy_gradient_loss | -0.000918    |
|    value_loss           | 359          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 113           |
|    time_elapsed         | 620           |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 123           |
|    time_elapsed         | 674           |
|    total_timesteps      | 62976         |
| train/                  |               |
|    approx_kl            | 3.0734576e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.86         |
|    explained_variance   | 0.000953      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0519        |
|    n_updates            | 1220          |
|    policy_gradient_loss | -9.47e-05     |
|    value_loss           | 0.161         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 124           |
|    time_elapsed         | 680 

-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 134           |
|    time_elapsed         | 733           |
|    total_timesteps      | 68608         |
| train/                  |               |
|    approx_kl            | 0.00017920358 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.86         |
|    explained_variance   | 0.0641        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.075         |
|    n_updates            | 1330          |
|    policy_gradient_loss | -0.000925     |
|    value_loss           | 0.432         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 135           |
|    time_elapsed         | 738 

-----------------------------------------
| time/                   |             |
|    fps                  | 93          |
|    iterations           | 145         |
|    time_elapsed         | 792         |
|    total_timesteps      | 74240       |
| train/                  |             |
|    approx_kl            | 6.07986e-05 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.87       |
|    explained_variance   | -0.0156     |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0495      |
|    n_updates            | 1440        |
|    policy_gradient_loss | -0.000285   |
|    value_loss           | 0.364       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 93          |
|    iterations           | 146         |
|    time_elapsed         | 797         |
|    total_timesteps      | 74752 

-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 156           |
|    time_elapsed         | 852           |
|    total_timesteps      | 79872         |
| train/                  |               |
|    approx_kl            | 5.1483396e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.87         |
|    explained_variance   | -0.0235       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0451        |
|    n_updates            | 1550          |
|    policy_gradient_loss | -0.000234     |
|    value_loss           | 0.13          |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 93           |
|    iterations           | 157          |
|    time_elapsed         | 857     

-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 167           |
|    time_elapsed         | 911           |
|    total_timesteps      | 85504         |
| train/                  |               |
|    approx_kl            | 0.00010423863 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.79         |
|    explained_variance   | 0.0583        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.281         |
|    n_updates            | 1660          |
|    policy_gradient_loss | 0.0004        |
|    value_loss           | 3.28          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 168           |
|    time_elapsed         | 916 

-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 178           |
|    time_elapsed         | 970           |
|    total_timesteps      | 91136         |
| train/                  |               |
|    approx_kl            | 0.00019881991 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.82         |
|    explained_variance   | -0.00286      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.123         |
|    n_updates            | 1770          |
|    policy_gradient_loss | -0.000608     |
|    value_loss           | 0.303         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 93           |
|    iterations           | 179          |
|    time_elapsed         | 975     

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 189           |
|    time_elapsed         | 1029          |
|    total_timesteps      | 96768         |
| train/                  |               |
|    approx_kl            | 0.00031074695 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.8          |
|    explained_variance   | 0.0219        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0569        |
|    n_updates            | 1880          |
|    policy_gradient_loss | -0.000745     |
|    value_loss           | 0.14          |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 190          |
|    time_elapsed         | 1034    

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 200           |
|    time_elapsed         | 1088          |
|    total_timesteps      | 102400        |
| train/                  |               |
|    approx_kl            | 0.00012651959 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.75         |
|    explained_variance   | 0.0421        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.103         |
|    n_updates            | 1990          |
|    policy_gradient_loss | -0.000192     |
|    value_loss           | 0.649         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 201          |
|    time_elapsed         | 1094    

-----------------------------------------
| time/                   |             |
|    fps                  | 94          |
|    iterations           | 211         |
|    time_elapsed         | 1147        |
|    total_timesteps      | 108032      |
| train/                  |             |
|    approx_kl            | 0.002218319 |
|    clip_fraction        | 0.00547     |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.71       |
|    explained_variance   | 0.4         |
|    learning_rate        | 1e-06       |
|    loss                 | 107         |
|    n_updates            | 2100        |
|    policy_gradient_loss | 5.5e-05     |
|    value_loss           | 312         |
-----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 212           |
|    time_elapsed         | 1152          |
|    total_timesteps    

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 222           |
|    time_elapsed         | 1206          |
|    total_timesteps      | 113664        |
| train/                  |               |
|    approx_kl            | 1.3282173e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.62         |
|    explained_variance   | 0.395         |
|    learning_rate        | 1e-06         |
|    loss                 | 50            |
|    n_updates            | 2210          |
|    policy_gradient_loss | -0.000217     |
|    value_loss           | 89.8          |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 223          |
|    time_elapsed         | 1212    

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 233           |
|    time_elapsed         | 1264          |
|    total_timesteps      | 119296        |
| train/                  |               |
|    approx_kl            | 0.00012672157 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.71         |
|    explained_variance   | -0.033        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0527        |
|    n_updates            | 2320          |
|    policy_gradient_loss | -0.000157     |
|    value_loss           | 0.211         |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 94          |
|    iterations           | 234         |
|    time_elapsed         | 1270        

------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 244          |
|    time_elapsed         | 1324         |
|    total_timesteps      | 124928       |
| train/                  |              |
|    approx_kl            | 0.0003387326 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.67        |
|    explained_variance   | 0.0623       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0562       |
|    n_updates            | 2430         |
|    policy_gradient_loss | -0.000762    |
|    value_loss           | 0.131        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 245           |
|    time_elapsed         | 1329          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 255          |
|    time_elapsed         | 1383         |
|    total_timesteps      | 130560       |
| train/                  |              |
|    approx_kl            | 0.0006596383 |
|    clip_fraction        | 0.000781     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.55        |
|    explained_variance   | -0.1         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.122        |
|    n_updates            | 2540         |
|    policy_gradient_loss | -0.00171     |
|    value_loss           | 8.57         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 256          |
|    time_elapsed         | 1389         |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 94          |
|    iterations           | 266         |
|    time_elapsed         | 1442        |
|    total_timesteps      | 136192      |
| train/                  |             |
|    approx_kl            | 0.001462098 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.58       |
|    explained_variance   | 0.0269      |
|    learning_rate        | 1e-06       |
|    loss                 | 0.108       |
|    n_updates            | 2650        |
|    policy_gradient_loss | -0.00266    |
|    value_loss           | 4.17        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 267          |
|    time_elapsed         | 1447         |
|    total_timesteps      | 1

------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 277          |
|    time_elapsed         | 1502         |
|    total_timesteps      | 141824       |
| train/                  |              |
|    approx_kl            | 0.0002537534 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.68        |
|    explained_variance   | 0.00983      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.107        |
|    n_updates            | 2760         |
|    policy_gradient_loss | -0.000878    |
|    value_loss           | 0.283        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 278           |
|    time_elapsed         | 1507          |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 288           |
|    time_elapsed         | 1561          |
|    total_timesteps      | 147456        |
| train/                  |               |
|    approx_kl            | 0.00016296096 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.71         |
|    explained_variance   | -0.107        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.053         |
|    n_updates            | 2870          |
|    policy_gradient_loss | -0.000312     |
|    value_loss           | 0.178         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 289           |
|    time_elapsed         | 1566

------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 299          |
|    time_elapsed         | 1620         |
|    total_timesteps      | 153088       |
| train/                  |              |
|    approx_kl            | 2.513011e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.65        |
|    explained_variance   | -0.0249      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0671       |
|    n_updates            | 2980         |
|    policy_gradient_loss | 0.000307     |
|    value_loss           | 0.48         |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 300           |
|    time_elapsed         | 1625          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 310          |
|    time_elapsed         | 1678         |
|    total_timesteps      | 158720       |
| train/                  |              |
|    approx_kl            | 0.0007397882 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.66        |
|    explained_variance   | 0.0497       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0454       |
|    n_updates            | 3090         |
|    policy_gradient_loss | -0.00189     |
|    value_loss           | 0.0969       |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 311          |
|    time_elapsed         | 1684         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 321           |
|    time_elapsed         | 1737          |
|    total_timesteps      | 164352        |
| train/                  |               |
|    approx_kl            | 0.00024126854 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.61         |
|    explained_variance   | -0.0168       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.199         |
|    n_updates            | 3200          |
|    policy_gradient_loss | -0.000431     |
|    value_loss           | 0.778         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 322          |
|    time_elapsed         | 1743    

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 332           |
|    time_elapsed         | 1796          |
|    total_timesteps      | 169984        |
| train/                  |               |
|    approx_kl            | 0.00046650425 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.63         |
|    explained_variance   | -0.0776       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.052         |
|    n_updates            | 3310          |
|    policy_gradient_loss | -0.00102      |
|    value_loss           | 0.17          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 333           |
|    time_elapsed         | 1801

------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 343          |
|    time_elapsed         | 1855         |
|    total_timesteps      | 175616       |
| train/                  |              |
|    approx_kl            | 0.0017697248 |
|    clip_fraction        | 0.00332      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.58        |
|    explained_variance   | 0.715        |
|    learning_rate        | 1e-06        |
|    loss                 | 105          |
|    n_updates            | 3420         |
|    policy_gradient_loss | -0.00131     |
|    value_loss           | 264          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 344           |
|    time_elapsed         | 1860          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 354          |
|    time_elapsed         | 1914         |
|    total_timesteps      | 181248       |
| train/                  |              |
|    approx_kl            | 0.0002784935 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.6         |
|    explained_variance   | -0.0761      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0453       |
|    n_updates            | 3530         |
|    policy_gradient_loss | -0.000825    |
|    value_loss           | 0.108        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 355          |
|    time_elapsed         | 1919         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 365           |
|    time_elapsed         | 1973          |
|    total_timesteps      | 186880        |
| train/                  |               |
|    approx_kl            | 0.00042249786 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.6          |
|    explained_variance   | 0.0398        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0516        |
|    n_updates            | 3640          |
|    policy_gradient_loss | -0.000927     |
|    value_loss           | 0.151         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 366           |
|    time_elapsed         | 1978

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 376           |
|    time_elapsed         | 2032          |
|    total_timesteps      | 192512        |
| train/                  |               |
|    approx_kl            | 0.00042995554 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.58         |
|    explained_variance   | -0.0229       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.119         |
|    n_updates            | 3750          |
|    policy_gradient_loss | -0.00122      |
|    value_loss           | 0.634         |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 94          |
|    iterations           | 377         |
|    time_elapsed         | 2038        

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 387           |
|    time_elapsed         | 2091          |
|    total_timesteps      | 198144        |
| train/                  |               |
|    approx_kl            | 0.00040234486 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.57         |
|    explained_variance   | -0.132        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0857        |
|    n_updates            | 3860          |
|    policy_gradient_loss | -0.00107      |
|    value_loss           | 0.134         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 388          |
|    time_elapsed         | 2096    

<stable_baselines3.ppo.ppo.PPO at 0x20886d7c1f0>

In [25]:
model64.save('thisisatestseed64model')

In [26]:
# This is the AI model started WITH random seed
modelrandom=PPO('CnnPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001,
            n_steps=512) 

Using cuda device
Wrapping the env in a VecTransposeImage.


In [27]:
modelrandom.learn(total_timesteps=200000, callback=callback)


Logging to ./logs/PPO_3
----------------------------
| time/              |     |
|    fps             | 180 |
|    iterations      | 1   |
|    time_elapsed    | 2   |
|    total_timesteps | 512 |
----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 124          |
|    iterations           | 2            |
|    time_elapsed         | 8            |
|    total_timesteps      | 1024         |
| train/                  |              |
|    approx_kl            | 3.420119e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.95        |
|    explained_variance   | -0.00156     |
|    learning_rate        | 1e-06        |
|    loss                 | 215          |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00013     |
|    value_loss           | 499          |
------------------------------------------

------------------------------------------
| time/                   |              |
|    fps                  | 99           |
|    iterations           | 13           |
|    time_elapsed         | 67           |
|    total_timesteps      | 6656         |
| train/                  |              |
|    approx_kl            | 1.116551e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 0.00503      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.127        |
|    n_updates            | 120          |
|    policy_gradient_loss | -0.00015     |
|    value_loss           | 0.24         |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 98            |
|    iterations           | 14            |
|    time_elapsed         | 72            |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 96            |
|    iterations           | 24            |
|    time_elapsed         | 126           |
|    total_timesteps      | 12288         |
| train/                  |               |
|    approx_kl            | 1.9186526e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 0.000252      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.14          |
|    n_updates            | 230           |
|    policy_gradient_loss | -0.000228     |
|    value_loss           | 0.432         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 96            |
|    iterations           | 25            |
|    time_elapsed         | 132 

-------------------------------------------
| time/                   |               |
|    fps                  | 96            |
|    iterations           | 35            |
|    time_elapsed         | 185           |
|    total_timesteps      | 17920         |
| train/                  |               |
|    approx_kl            | 2.8474838e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -0.0403       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.155         |
|    n_updates            | 340           |
|    policy_gradient_loss | 0.000124      |
|    value_loss           | 2.1           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 96           |
|    iterations           | 36           |
|    time_elapsed         | 191     

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 46            |
|    time_elapsed         | 245           |
|    total_timesteps      | 23552         |
| train/                  |               |
|    approx_kl            | 0.00014528749 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 0.0223        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0559        |
|    n_updates            | 450           |
|    policy_gradient_loss | -0.00127      |
|    value_loss           | 0.141         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 47            |
|    time_elapsed         | 251 

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 57           |
|    time_elapsed         | 304          |
|    total_timesteps      | 29184        |
| train/                  |              |
|    approx_kl            | 7.343781e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.92        |
|    explained_variance   | -0.0453      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0533       |
|    n_updates            | 560          |
|    policy_gradient_loss | -0.000383    |
|    value_loss           | 0.537        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 58            |
|    time_elapsed         | 311           |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 68            |
|    time_elapsed         | 363           |
|    total_timesteps      | 34816         |
| train/                  |               |
|    approx_kl            | 0.00020821579 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.91         |
|    explained_variance   | 0.0969        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.234         |
|    n_updates            | 670           |
|    policy_gradient_loss | -0.0014       |
|    value_loss           | 0.945         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 69            |
|    time_elapsed         | 369 

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 79            |
|    time_elapsed         | 423           |
|    total_timesteps      | 40448         |
| train/                  |               |
|    approx_kl            | 0.00010591163 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.91         |
|    explained_variance   | 0.493         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.095         |
|    n_updates            | 780           |
|    policy_gradient_loss | -0.000539     |
|    value_loss           | 0.189         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 80           |
|    time_elapsed         | 428     

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 90            |
|    time_elapsed         | 481           |
|    total_timesteps      | 46080         |
| train/                  |               |
|    approx_kl            | 3.7633698e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.89         |
|    explained_variance   | 0.00571       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0344        |
|    n_updates            | 890           |
|    policy_gradient_loss | -0.000238     |
|    value_loss           | 0.265         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 91           |
|    time_elapsed         | 486     

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 101           |
|    time_elapsed         | 540           |
|    total_timesteps      | 51712         |
| train/                  |               |
|    approx_kl            | 0.00026534277 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.88         |
|    explained_variance   | 0.00832       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.121         |
|    n_updates            | 1000          |
|    policy_gradient_loss | -0.00112      |
|    value_loss           | 0.346         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 102          |
|    time_elapsed         | 546     

--------------------------------------------
| time/                   |                |
|    fps                  | 95             |
|    iterations           | 112            |
|    time_elapsed         | 599            |
|    total_timesteps      | 57344          |
| train/                  |                |
|    approx_kl            | 0.000115851755 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -1.87          |
|    explained_variance   | 0.14           |
|    learning_rate        | 1e-06          |
|    loss                 | 30.7           |
|    n_updates            | 1110           |
|    policy_gradient_loss | 0.000909       |
|    value_loss           | 93.7           |
--------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 113          |
|    time_elapsed 

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 123           |
|    time_elapsed         | 659           |
|    total_timesteps      | 62976         |
| train/                  |               |
|    approx_kl            | 8.0775004e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.87         |
|    explained_variance   | 0.00368       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0514        |
|    n_updates            | 1220          |
|    policy_gradient_loss | -0.00034      |
|    value_loss           | 0.181         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 124           |
|    time_elapsed         | 664 

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 134          |
|    time_elapsed         | 718          |
|    total_timesteps      | 68608        |
| train/                  |              |
|    approx_kl            | 0.0001025378 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.86        |
|    explained_variance   | -0.0214      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.195        |
|    n_updates            | 1330         |
|    policy_gradient_loss | -6.65e-05    |
|    value_loss           | 0.817        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 135           |
|    time_elapsed         | 723           |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 145           |
|    time_elapsed         | 777           |
|    total_timesteps      | 74240         |
| train/                  |               |
|    approx_kl            | 0.00021996105 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.84         |
|    explained_variance   | 0.313         |
|    learning_rate        | 1e-06         |
|    loss                 | 54.2          |
|    n_updates            | 1440          |
|    policy_gradient_loss | 0.000176      |
|    value_loss           | 118           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 146           |
|    time_elapsed         | 782 

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 156          |
|    time_elapsed         | 837          |
|    total_timesteps      | 79872        |
| train/                  |              |
|    approx_kl            | 9.339966e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.85        |
|    explained_variance   | 0.157        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.17         |
|    n_updates            | 1550         |
|    policy_gradient_loss | -0.000488    |
|    value_loss           | 0.21         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 157          |
|    time_elapsed         | 842          |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 167           |
|    time_elapsed         | 895           |
|    total_timesteps      | 85504         |
| train/                  |               |
|    approx_kl            | 0.00013263244 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.83         |
|    explained_variance   | 0.00363       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.141         |
|    n_updates            | 1660          |
|    policy_gradient_loss | -0.000605     |
|    value_loss           | 0.352         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 168           |
|    time_elapsed         | 900 

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 178           |
|    time_elapsed         | 954           |
|    total_timesteps      | 91136         |
| train/                  |               |
|    approx_kl            | 4.1196938e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.81         |
|    explained_variance   | 0.00277       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0886        |
|    n_updates            | 1770          |
|    policy_gradient_loss | -2.65e-05     |
|    value_loss           | 0.651         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 179           |
|    time_elapsed         | 960 

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 189           |
|    time_elapsed         | 1013          |
|    total_timesteps      | 96768         |
| train/                  |               |
|    approx_kl            | 0.00037916214 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.82         |
|    explained_variance   | -0.0158       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.093         |
|    n_updates            | 1880          |
|    policy_gradient_loss | -0.00139      |
|    value_loss           | 0.134         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 190           |
|    time_elapsed         | 1018

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 200          |
|    time_elapsed         | 1073         |
|    total_timesteps      | 102400       |
| train/                  |              |
|    approx_kl            | 0.0001668781 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.77        |
|    explained_variance   | -0.0168      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0419       |
|    n_updates            | 1990         |
|    policy_gradient_loss | -0.00068     |
|    value_loss           | 0.146        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 201           |
|    time_elapsed         | 1078          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 211          |
|    time_elapsed         | 1131         |
|    total_timesteps      | 108032       |
| train/                  |              |
|    approx_kl            | 9.533763e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.77        |
|    explained_variance   | -0.0015      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.132        |
|    n_updates            | 2100         |
|    policy_gradient_loss | -0.000416    |
|    value_loss           | 0.535        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 212          |
|    time_elapsed         | 1137         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 222           |
|    time_elapsed         | 1191          |
|    total_timesteps      | 113664        |
| train/                  |               |
|    approx_kl            | 0.00018694019 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.75         |
|    explained_variance   | 0.591         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0485        |
|    n_updates            | 2210          |
|    policy_gradient_loss | -0.000735     |
|    value_loss           | 0.36          |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 95          |
|    iterations           | 223         |
|    time_elapsed         | 1196        

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 233          |
|    time_elapsed         | 1250         |
|    total_timesteps      | 119296       |
| train/                  |              |
|    approx_kl            | 0.0002979003 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.75        |
|    explained_variance   | -0.0171      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.063        |
|    n_updates            | 2320         |
|    policy_gradient_loss | -0.000999    |
|    value_loss           | 0.21         |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 234           |
|    time_elapsed         | 1257          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 244          |
|    time_elapsed         | 1310         |
|    total_timesteps      | 124928       |
| train/                  |              |
|    approx_kl            | 0.0006258335 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.72        |
|    explained_variance   | -0.0375      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.153        |
|    n_updates            | 2430         |
|    policy_gradient_loss | -0.00155     |
|    value_loss           | 0.326        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 245           |
|    time_elapsed         | 1315          |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 255           |
|    time_elapsed         | 1369          |
|    total_timesteps      | 130560        |
| train/                  |               |
|    approx_kl            | 4.0955958e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.73         |
|    explained_variance   | 0.81          |
|    learning_rate        | 1e-06         |
|    loss                 | 11.7          |
|    n_updates            | 2540          |
|    policy_gradient_loss | 8.26e-05      |
|    value_loss           | 21.6          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 256           |
|    time_elapsed         | 1375

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 266           |
|    time_elapsed         | 1428          |
|    total_timesteps      | 136192        |
| train/                  |               |
|    approx_kl            | 0.00015238393 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.77         |
|    explained_variance   | -0.0558       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0401        |
|    n_updates            | 2650          |
|    policy_gradient_loss | -0.000451     |
|    value_loss           | 0.114         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 267           |
|    time_elapsed         | 1433

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 277           |
|    time_elapsed         | 1487          |
|    total_timesteps      | 141824        |
| train/                  |               |
|    approx_kl            | 0.00021302258 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.69         |
|    explained_variance   | -0.0509       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0703        |
|    n_updates            | 2760          |
|    policy_gradient_loss | -0.000432     |
|    value_loss           | 2.61          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 278           |
|    time_elapsed         | 1492

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 288           |
|    time_elapsed         | 1546          |
|    total_timesteps      | 147456        |
| train/                  |               |
|    approx_kl            | 0.00014787773 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.72         |
|    explained_variance   | -0.00705      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0966        |
|    n_updates            | 2870          |
|    policy_gradient_loss | -0.000342     |
|    value_loss           | 0.259         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 289           |
|    time_elapsed         | 1551

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 299           |
|    time_elapsed         | 1605          |
|    total_timesteps      | 153088        |
| train/                  |               |
|    approx_kl            | 0.00021339115 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.72         |
|    explained_variance   | 0.182         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0537        |
|    n_updates            | 2980          |
|    policy_gradient_loss | -0.000339     |
|    value_loss           | 0.174         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 300           |
|    time_elapsed         | 1611

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 310           |
|    time_elapsed         | 1664          |
|    total_timesteps      | 158720        |
| train/                  |               |
|    approx_kl            | 0.00015790376 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.69         |
|    explained_variance   | -0.0253       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.113         |
|    n_updates            | 3090          |
|    policy_gradient_loss | -0.000699     |
|    value_loss           | 0.518         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 311           |
|    time_elapsed         | 1669

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 321           |
|    time_elapsed         | 1724          |
|    total_timesteps      | 164352        |
| train/                  |               |
|    approx_kl            | 0.00046066195 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.67         |
|    explained_variance   | -0.0706       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0786        |
|    n_updates            | 3200          |
|    policy_gradient_loss | -0.00137      |
|    value_loss           | 0.226         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 322          |
|    time_elapsed         | 1729    

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 332           |
|    time_elapsed         | 1783          |
|    total_timesteps      | 169984        |
| train/                  |               |
|    approx_kl            | 0.00018639059 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.69         |
|    explained_variance   | 0.00263       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0524        |
|    n_updates            | 3310          |
|    policy_gradient_loss | -0.00038      |
|    value_loss           | 0.19          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 333           |
|    time_elapsed         | 1789

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 343          |
|    time_elapsed         | 1842         |
|    total_timesteps      | 175616       |
| train/                  |              |
|    approx_kl            | 0.0007799211 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.67        |
|    explained_variance   | -0.0353      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0478       |
|    n_updates            | 3420         |
|    policy_gradient_loss | -0.00182     |
|    value_loss           | 0.197        |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 95          |
|    iterations           | 344         |
|    time_elapsed         | 1848        |
|    total_times

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 354           |
|    time_elapsed         | 1902          |
|    total_timesteps      | 181248        |
| train/                  |               |
|    approx_kl            | 0.00014275836 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.67         |
|    explained_variance   | 0.0568        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0427        |
|    n_updates            | 3530          |
|    policy_gradient_loss | -0.00011      |
|    value_loss           | 0.115         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 355           |
|    time_elapsed         | 1907

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 365           |
|    time_elapsed         | 1961          |
|    total_timesteps      | 186880        |
| train/                  |               |
|    approx_kl            | 0.00013053615 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.6          |
|    explained_variance   | -0.0405       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0604        |
|    n_updates            | 3640          |
|    policy_gradient_loss | -0.000777     |
|    value_loss           | 1.3           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 366          |
|    time_elapsed         | 1966    

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 376           |
|    time_elapsed         | 2020          |
|    total_timesteps      | 192512        |
| train/                  |               |
|    approx_kl            | 0.00012703682 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.64         |
|    explained_variance   | 0.726         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.401         |
|    n_updates            | 3750          |
|    policy_gradient_loss | -0.000386     |
|    value_loss           | 0.604         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 377           |
|    time_elapsed         | 2026

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 387          |
|    time_elapsed         | 2079         |
|    total_timesteps      | 198144       |
| train/                  |              |
|    approx_kl            | 0.0004460836 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.62        |
|    explained_variance   | -0.0723      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0755       |
|    n_updates            | 3860         |
|    policy_gradient_loss | -0.000848    |
|    value_loss           | 0.386        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 388          |
|    time_elapsed         | 2084         |
|    total_

<stable_baselines3.ppo.ppo.PPO at 0x20886d81c10>

In [28]:
modelrandom.save('thisisatestmodel')

In [29]:
#PPO WITH MLP POLICY SEED 42
modelmlp42=PPO('MlpPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001,seed=42,
            n_steps=512) 


Using cuda device
Wrapping the env in a VecTransposeImage.


In [30]:
modelmlp42.learn(total_timesteps=200000, callback=callback)

Logging to ./logs/PPO_4
----------------------------
| time/              |     |
|    fps             | 243 |
|    iterations      | 1   |
|    time_elapsed    | 2   |
|    total_timesteps | 512 |
----------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 145           |
|    iterations           | 2             |
|    time_elapsed         | 7             |
|    total_timesteps      | 1024          |
| train/                  |               |
|    approx_kl            | 2.7050264e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | -0.00335      |
|    learning_rate        | 1e-06         |
|    loss                 | 353           |
|    n_updates            | 10            |
|    policy_gradient_loss | -0.000105     |
|    value_loss           | 490           |
-------------------------

------------------------------------------
| time/                   |              |
|    fps                  | 109          |
|    iterations           | 13           |
|    time_elapsed         | 61           |
|    total_timesteps      | 6656         |
| train/                  |              |
|    approx_kl            | 4.569418e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.95        |
|    explained_variance   | -0.00235     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0992       |
|    n_updates            | 120          |
|    policy_gradient_loss | -0.000132    |
|    value_loss           | 0.198        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 108           |
|    iterations           | 14            |
|    time_elapsed         | 65            |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 105           |
|    iterations           | 24            |
|    time_elapsed         | 116           |
|    total_timesteps      | 12288         |
| train/                  |               |
|    approx_kl            | 3.5844278e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | 0.000133      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.198         |
|    n_updates            | 230           |
|    policy_gradient_loss | -4.75e-05     |
|    value_loss           | 0.445         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 105          |
|    iterations           | 25           |
|    time_elapsed         | 121     

------------------------------------------
| time/                   |              |
|    fps                  | 104          |
|    iterations           | 35           |
|    time_elapsed         | 171          |
|    total_timesteps      | 17920        |
| train/                  |              |
|    approx_kl            | 2.155779e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.95        |
|    explained_variance   | -0.000115    |
|    learning_rate        | 1e-06        |
|    loss                 | 0.137        |
|    n_updates            | 340          |
|    policy_gradient_loss | -9.63e-05    |
|    value_loss           | 0.245        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 104           |
|    iterations           | 36            |
|    time_elapsed         | 176           |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 46            |
|    time_elapsed         | 226           |
|    total_timesteps      | 23552         |
| train/                  |               |
|    approx_kl            | 2.0068837e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | 0.0001        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0986        |
|    n_updates            | 450           |
|    policy_gradient_loss | -7.32e-05     |
|    value_loss           | 0.193         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 47            |
|    time_elapsed         | 231 

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 57            |
|    time_elapsed         | 280           |
|    total_timesteps      | 29184         |
| train/                  |               |
|    approx_kl            | 4.2915344e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -1.99e-05     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.106         |
|    n_updates            | 560           |
|    policy_gradient_loss | -0.000214     |
|    value_loss           | 0.275         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 58           |
|    time_elapsed         | 287     

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 68            |
|    time_elapsed         | 335           |
|    total_timesteps      | 34816         |
| train/                  |               |
|    approx_kl            | 1.2780423e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -6.95e-05     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.209         |
|    n_updates            | 670           |
|    policy_gradient_loss | -0.000144     |
|    value_loss           | 0.43          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 69            |
|    time_elapsed         | 339 

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 79            |
|    time_elapsed         | 389           |
|    total_timesteps      | 40448         |
| train/                  |               |
|    approx_kl            | 2.6565976e-07 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 0.00329       |
|    learning_rate        | 1e-06         |
|    loss                 | 45.7          |
|    n_updates            | 780           |
|    policy_gradient_loss | -1.45e-05     |
|    value_loss           | 87.1          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 80            |
|    time_elapsed         | 394 

-------------------------------------------
| time/                   |               |
|    fps                  | 104           |
|    iterations           | 90            |
|    time_elapsed         | 442           |
|    total_timesteps      | 46080         |
| train/                  |               |
|    approx_kl            | 1.5759142e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 1.96e-05      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0892        |
|    n_updates            | 890           |
|    policy_gradient_loss | -4.49e-05     |
|    value_loss           | 0.177         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 104           |
|    iterations           | 91            |
|    time_elapsed         | 447 

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 101           |
|    time_elapsed         | 497           |
|    total_timesteps      | 51712         |
| train/                  |               |
|    approx_kl            | 1.2106146e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 0.0019        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.101         |
|    n_updates            | 1000          |
|    policy_gradient_loss | -0.000161     |
|    value_loss           | 0.226         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 102          |
|    time_elapsed         | 502     

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 112           |
|    time_elapsed         | 551           |
|    total_timesteps      | 57344         |
| train/                  |               |
|    approx_kl            | 2.0003645e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 1.85e-06      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.166         |
|    n_updates            | 1110          |
|    policy_gradient_loss | -0.000122     |
|    value_loss           | 0.279         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 113           |
|    time_elapsed         | 556 

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 123           |
|    time_elapsed         | 606           |
|    total_timesteps      | 62976         |
| train/                  |               |
|    approx_kl            | 1.6544946e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -0.000105     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0858        |
|    n_updates            | 1220          |
|    policy_gradient_loss | -3.4e-05      |
|    value_loss           | 0.232         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 124           |
|    time_elapsed         | 611 

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 134           |
|    time_elapsed         | 660           |
|    total_timesteps      | 68608         |
| train/                  |               |
|    approx_kl            | 9.1001857e-07 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -0.000234     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0489        |
|    n_updates            | 1330          |
|    policy_gradient_loss | -5.42e-05     |
|    value_loss           | 0.105         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 135          |
|    time_elapsed         | 665     

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 145           |
|    time_elapsed         | 716           |
|    total_timesteps      | 74240         |
| train/                  |               |
|    approx_kl            | 1.6368926e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -8.82e-06     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.111         |
|    n_updates            | 1440          |
|    policy_gradient_loss | -0.000319     |
|    value_loss           | 0.181         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 146           |
|    time_elapsed         | 721 

------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 156          |
|    time_elapsed         | 771          |
|    total_timesteps      | 79872        |
| train/                  |              |
|    approx_kl            | 1.396169e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 3.61e-05     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0946       |
|    n_updates            | 1550         |
|    policy_gradient_loss | -0.000107    |
|    value_loss           | 0.184        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 157          |
|    time_elapsed         | 776          |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 167           |
|    time_elapsed         | 825           |
|    total_timesteps      | 85504         |
| train/                  |               |
|    approx_kl            | 1.8472783e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -7.51e-06     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.253         |
|    n_updates            | 1660          |
|    policy_gradient_loss | -0.000134     |
|    value_loss           | 0.363         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 168          |
|    time_elapsed         | 830     

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 178           |
|    time_elapsed         | 881           |
|    total_timesteps      | 91136         |
| train/                  |               |
|    approx_kl            | 1.5720725e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 3.4e-06       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.149         |
|    n_updates            | 1770          |
|    policy_gradient_loss | -6.28e-05     |
|    value_loss           | 0.285         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 179           |
|    time_elapsed         | 886 

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 189           |
|    time_elapsed         | 936           |
|    total_timesteps      | 96768         |
| train/                  |               |
|    approx_kl            | 1.7287675e-07 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 0.00236       |
|    learning_rate        | 1e-06         |
|    loss                 | 32            |
|    n_updates            | 1880          |
|    policy_gradient_loss | -3.76e-05     |
|    value_loss           | 47            |
-------------------------------------------
--------------------------------------------
| time/                   |                |
|    fps                  | 103            |
|    iterations           | 190            |
|    time_elapsed         | 

------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 200          |
|    time_elapsed         | 991          |
|    total_timesteps      | 102400       |
| train/                  |              |
|    approx_kl            | 1.147273e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 0.000361     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0758       |
|    n_updates            | 1990         |
|    policy_gradient_loss | -0.00022     |
|    value_loss           | 0.17         |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 201           |
|    time_elapsed         | 996           |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 211          |
|    time_elapsed         | 1045         |
|    total_timesteps      | 108032       |
| train/                  |              |
|    approx_kl            | 6.061862e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | -0.00421     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.061        |
|    n_updates            | 2100         |
|    policy_gradient_loss | -0.000357    |
|    value_loss           | 0.167        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 212          |
|    time_elapsed         | 1050         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 222           |
|    time_elapsed         | 1101          |
|    total_timesteps      | 113664        |
| train/                  |               |
|    approx_kl            | 3.2659154e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -3.65e-05     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.059         |
|    n_updates            | 2210          |
|    policy_gradient_loss | -0.000136     |
|    value_loss           | 0.139         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 223          |
|    time_elapsed         | 1105    

------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 233          |
|    time_elapsed         | 1155         |
|    total_timesteps      | 119296       |
| train/                  |              |
|    approx_kl            | 9.646406e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 3.26e-05     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.075        |
|    n_updates            | 2320         |
|    policy_gradient_loss | -0.000133    |
|    value_loss           | 0.138        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 234          |
|    time_elapsed         | 1161         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 244           |
|    time_elapsed         | 1210          |
|    total_timesteps      | 124928        |
| train/                  |               |
|    approx_kl            | 1.9546133e-07 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 1.01e-06      |
|    learning_rate        | 1e-06         |
|    loss                 | 20            |
|    n_updates            | 2430          |
|    policy_gradient_loss | -2.21e-05     |
|    value_loss           | 43.3          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 245           |
|    time_elapsed         | 1214

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 255           |
|    time_elapsed         | 1265          |
|    total_timesteps      | 130560        |
| train/                  |               |
|    approx_kl            | 5.4923585e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 6.38e-06      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.114         |
|    n_updates            | 2540          |
|    policy_gradient_loss | -0.00026      |
|    value_loss           | 0.18          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 256           |
|    time_elapsed         | 1270

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 266           |
|    time_elapsed         | 1319          |
|    total_timesteps      | 136192        |
| train/                  |               |
|    approx_kl            | 1.3192766e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | -1.57e-05     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.158         |
|    n_updates            | 2650          |
|    policy_gradient_loss | -0.000133     |
|    value_loss           | 0.233         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 267           |
|    time_elapsed         | 1324

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 277           |
|    time_elapsed         | 1375          |
|    total_timesteps      | 141824        |
| train/                  |               |
|    approx_kl            | 6.9093658e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 1.07e-05      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.151         |
|    n_updates            | 2760          |
|    policy_gradient_loss | -0.000236     |
|    value_loss           | 0.225         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 278          |
|    time_elapsed         | 1380    

------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 288          |
|    time_elapsed         | 1429         |
|    total_timesteps      | 147456       |
| train/                  |              |
|    approx_kl            | 8.423231e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.93        |
|    explained_variance   | -9.3e-06     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.129        |
|    n_updates            | 2870         |
|    policy_gradient_loss | -0.000287    |
|    value_loss           | 0.293        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 289           |
|    time_elapsed         | 1434          |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 299           |
|    time_elapsed         | 1484          |
|    total_timesteps      | 153088        |
| train/                  |               |
|    approx_kl            | 3.4691766e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 0.0014        |
|    learning_rate        | 1e-06         |
|    loss                 | 152           |
|    n_updates            | 2980          |
|    policy_gradient_loss | -1.17e-05     |
|    value_loss           | 291           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 300           |
|    time_elapsed         | 1489

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 310           |
|    time_elapsed         | 1538          |
|    total_timesteps      | 158720        |
| train/                  |               |
|    approx_kl            | 6.1816536e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | -0.000828     |
|    learning_rate        | 1e-06         |
|    loss                 | 444           |
|    n_updates            | 3090          |
|    policy_gradient_loss | -9.98e-06     |
|    value_loss           | 941           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 311           |
|    time_elapsed         | 1543

------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 321          |
|    time_elapsed         | 1593         |
|    total_timesteps      | 164352       |
| train/                  |              |
|    approx_kl            | 6.852206e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.93        |
|    explained_variance   | -4.77e-07    |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0888       |
|    n_updates            | 3200         |
|    policy_gradient_loss | -0.000166    |
|    value_loss           | 0.178        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 322          |
|    time_elapsed         | 1598         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 332          |
|    time_elapsed         | 1648         |
|    total_timesteps      | 169984       |
| train/                  |              |
|    approx_kl            | 1.402339e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.93        |
|    explained_variance   | 3.91e-05     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.138        |
|    n_updates            | 3310         |
|    policy_gradient_loss | -8.93e-05    |
|    value_loss           | 0.291        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 333           |
|    time_elapsed         | 1653          |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 343           |
|    time_elapsed         | 1702          |
|    total_timesteps      | 175616        |
| train/                  |               |
|    approx_kl            | 1.9216444e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 0             |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0983        |
|    n_updates            | 3420          |
|    policy_gradient_loss | -0.000303     |
|    value_loss           | 0.176         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 344          |
|    time_elapsed         | 1707    

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 354           |
|    time_elapsed         | 1757          |
|    total_timesteps      | 181248        |
| train/                  |               |
|    approx_kl            | 1.4067627e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 9.06e-05      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0969        |
|    n_updates            | 3530          |
|    policy_gradient_loss | -6.14e-05     |
|    value_loss           | 0.217         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 355           |
|    time_elapsed         | 1762

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 365           |
|    time_elapsed         | 1811          |
|    total_timesteps      | 186880        |
| train/                  |               |
|    approx_kl            | 2.0444859e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 5.96e-08      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.136         |
|    n_updates            | 3640          |
|    policy_gradient_loss | -0.000101     |
|    value_loss           | 0.266         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 366           |
|    time_elapsed         | 1816

--------------------------------------------
| time/                   |                |
|    fps                  | 103            |
|    iterations           | 376            |
|    time_elapsed         | 1867           |
|    total_timesteps      | 192512         |
| train/                  |                |
|    approx_kl            | 1.10423425e-05 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -1.92          |
|    explained_variance   | -6.44e-06      |
|    learning_rate        | 1e-06          |
|    loss                 | 0.0653         |
|    n_updates            | 3750           |
|    policy_gradient_loss | -0.000486      |
|    value_loss           | 0.166          |
--------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 377           |
|    time_elap

------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 387          |
|    time_elapsed         | 1921         |
|    total_timesteps      | 198144       |
| train/                  |              |
|    approx_kl            | 1.384411e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.92        |
|    explained_variance   | 2.92e-05     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0496       |
|    n_updates            | 3860         |
|    policy_gradient_loss | -6.77e-05    |
|    value_loss           | 0.152        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 388           |
|    time_elapsed         | 1926          |
|    t

<stable_baselines3.ppo.ppo.PPO at 0x208906df820>

In [31]:
modelmlp42.save('thisisatestmlpseed42model')

In [32]:
#PPO WITH MLP POLICY SEED 64
modelmlp64=PPO('MlpPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001,seed=64,
            n_steps=512) 

Using cuda device
Wrapping the env in a VecTransposeImage.


In [33]:
modelmlp64.learn(total_timesteps=200000, callback=callback)

Logging to ./logs/PPO_5
----------------------------
| time/              |     |
|    fps             | 202 |
|    iterations      | 1   |
|    time_elapsed    | 2   |
|    total_timesteps | 512 |
----------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 136           |
|    iterations           | 2             |
|    time_elapsed         | 7             |
|    total_timesteps      | 1024          |
| train/                  |               |
|    approx_kl            | 5.8825826e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | -0.00631      |
|    learning_rate        | 1e-06         |
|    loss                 | 129           |
|    n_updates            | 10            |
|    policy_gradient_loss | -0.000122     |
|    value_loss           | 230           |
-------------------------

--------------------------------------------
| time/                   |                |
|    fps                  | 106            |
|    iterations           | 13             |
|    time_elapsed         | 62             |
|    total_timesteps      | 6656           |
| train/                  |                |
|    approx_kl            | 1.43776415e-05 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -1.95          |
|    explained_variance   | 0.000575       |
|    learning_rate        | 1e-06          |
|    loss                 | 0.121          |
|    n_updates            | 120            |
|    policy_gradient_loss | -0.000167      |
|    value_loss           | 0.353          |
--------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 106          |
|    iterations           | 14           |
|    time_elapsed 

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 24            |
|    time_elapsed         | 118           |
|    total_timesteps      | 12288         |
| train/                  |               |
|    approx_kl            | 1.9519357e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | 6.56e-07      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.173         |
|    n_updates            | 230           |
|    policy_gradient_loss | -7.96e-05     |
|    value_loss           | 0.4           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 25            |
|    time_elapsed         | 123 

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 35            |
|    time_elapsed         | 172           |
|    total_timesteps      | 17920         |
| train/                  |               |
|    approx_kl            | 1.1331867e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | -1.31e-06     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.14          |
|    n_updates            | 340           |
|    policy_gradient_loss | -5.7e-05      |
|    value_loss           | 0.246         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 36            |
|    time_elapsed         | 177 

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 46            |
|    time_elapsed         | 228           |
|    total_timesteps      | 23552         |
| train/                  |               |
|    approx_kl            | 5.3661643e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 1.34e-05      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0649        |
|    n_updates            | 450           |
|    policy_gradient_loss | -0.000161     |
|    value_loss           | 0.192         |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 103         |
|    iterations           | 47          |
|    time_elapsed         | 232         

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 57            |
|    time_elapsed         | 282           |
|    total_timesteps      | 29184         |
| train/                  |               |
|    approx_kl            | 5.8211153e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 1.22e-05      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.119         |
|    n_updates            | 560           |
|    policy_gradient_loss | -0.000133     |
|    value_loss           | 0.248         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 58           |
|    time_elapsed         | 288     

------------------------------------------
| time/                   |              |
|    fps                  | 103          |
|    iterations           | 68           |
|    time_elapsed         | 337          |
|    total_timesteps      | 34816        |
| train/                  |              |
|    approx_kl            | 3.251247e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | -4.53e-06    |
|    learning_rate        | 1e-06        |
|    loss                 | 0.167        |
|    n_updates            | 670          |
|    policy_gradient_loss | -6.82e-05    |
|    value_loss           | 0.234        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 69            |
|    time_elapsed         | 342           |
|    t

-----------------------------------------
| time/                   |             |
|    fps                  | 102         |
|    iterations           | 79          |
|    time_elapsed         | 393         |
|    total_timesteps      | 40448       |
| train/                  |             |
|    approx_kl            | 3.38885e-06 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.94       |
|    explained_variance   | -0.00022    |
|    learning_rate        | 1e-06       |
|    loss                 | 0.081       |
|    n_updates            | 780         |
|    policy_gradient_loss | -0.000151   |
|    value_loss           | 0.152       |
-----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 80            |
|    time_elapsed         | 398           |
|    total_timesteps    

------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 90           |
|    time_elapsed         | 448          |
|    total_timesteps      | 46080        |
| train/                  |              |
|    approx_kl            | 2.072542e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | -5.38e-05    |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0597       |
|    n_updates            | 890          |
|    policy_gradient_loss | -9.12e-05    |
|    value_loss           | 0.189        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 91            |
|    time_elapsed         | 453           |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 101           |
|    time_elapsed         | 503           |
|    total_timesteps      | 51712         |
| train/                  |               |
|    approx_kl            | 3.2225507e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 3.76e-06      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0882        |
|    n_updates            | 1000          |
|    policy_gradient_loss | -0.000451     |
|    value_loss           | 0.18          |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 102          |
|    time_elapsed         | 508     

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 112           |
|    time_elapsed         | 558           |
|    total_timesteps      | 57344         |
| train/                  |               |
|    approx_kl            | 1.0593794e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 1.55e-06      |
|    learning_rate        | 1e-06         |
|    loss                 | 181           |
|    n_updates            | 1110          |
|    policy_gradient_loss | -1.5e-06      |
|    value_loss           | 301           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 113           |
|    time_elapsed         | 563 

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 123           |
|    time_elapsed         | 613           |
|    total_timesteps      | 62976         |
| train/                  |               |
|    approx_kl            | 5.6669815e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -6.93e-05     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0682        |
|    n_updates            | 1220          |
|    policy_gradient_loss | -0.000143     |
|    value_loss           | 0.16          |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 102         |
|    iterations           | 124         |
|    time_elapsed         | 618         

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 134           |
|    time_elapsed         | 667           |
|    total_timesteps      | 68608         |
| train/                  |               |
|    approx_kl            | 3.9776787e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -6.79e-06     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.124         |
|    n_updates            | 1330          |
|    policy_gradient_loss | -0.000165     |
|    value_loss           | 0.22          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 135           |
|    time_elapsed         | 672 

------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 145          |
|    time_elapsed         | 723          |
|    total_timesteps      | 74240        |
| train/                  |              |
|    approx_kl            | 8.353731e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | -0.00014     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0693       |
|    n_updates            | 1440         |
|    policy_gradient_loss | -0.000181    |
|    value_loss           | 0.15         |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 146           |
|    time_elapsed         | 728           |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 156           |
|    time_elapsed         | 778           |
|    total_timesteps      | 79872         |
| train/                  |               |
|    approx_kl            | 5.9590675e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 3.28e-06      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0524        |
|    n_updates            | 1550          |
|    policy_gradient_loss | -0.000102     |
|    value_loss           | 0.158         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 157           |
|    time_elapsed         | 783 

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 167           |
|    time_elapsed         | 833           |
|    total_timesteps      | 85504         |
| train/                  |               |
|    approx_kl            | 1.4291145e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 5.23e-05      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.147         |
|    n_updates            | 1660          |
|    policy_gradient_loss | -6.08e-05     |
|    value_loss           | 0.303         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 168           |
|    time_elapsed         | 838 

------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 178          |
|    time_elapsed         | 888          |
|    total_timesteps      | 91136        |
| train/                  |              |
|    approx_kl            | 1.418055e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 5.84e-06     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0511       |
|    n_updates            | 1770         |
|    policy_gradient_loss | -2.83e-05    |
|    value_loss           | 0.173        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 179           |
|    time_elapsed         | 893           |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 189           |
|    time_elapsed         | 943           |
|    total_timesteps      | 96768         |
| train/                  |               |
|    approx_kl            | 2.5690533e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 3.61e-05      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.048         |
|    n_updates            | 1880          |
|    policy_gradient_loss | -0.000125     |
|    value_loss           | 0.161         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 190          |
|    time_elapsed         | 948     

------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 200          |
|    time_elapsed         | 998          |
|    total_timesteps      | 102400       |
| train/                  |              |
|    approx_kl            | 4.845322e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 2.72e-05     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.188        |
|    n_updates            | 1990         |
|    policy_gradient_loss | -0.00011     |
|    value_loss           | 0.281        |
------------------------------------------
--------------------------------------------
| time/                   |                |
|    fps                  | 102            |
|    iterations           | 201            |
|    time_elapsed         | 1003           |
|

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 211           |
|    time_elapsed         | 1053          |
|    total_timesteps      | 108032        |
| train/                  |               |
|    approx_kl            | 2.9458897e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -1.03e-05     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.112         |
|    n_updates            | 2100          |
|    policy_gradient_loss | -8.99e-05     |
|    value_loss           | 0.226         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 212          |
|    time_elapsed         | 1058    

------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 222          |
|    time_elapsed         | 1110         |
|    total_timesteps      | 113664       |
| train/                  |              |
|    approx_kl            | 8.555362e-07 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 1e-06        |
|    loss                 | 10.5         |
|    n_updates            | 2210         |
|    policy_gradient_loss | -1.7e-05     |
|    value_loss           | 15.8         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 223          |
|    time_elapsed         | 1115         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 233          |
|    time_elapsed         | 1165         |
|    total_timesteps      | 119296       |
| train/                  |              |
|    approx_kl            | 6.291538e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 7.57e-05     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.128        |
|    n_updates            | 2320         |
|    policy_gradient_loss | -0.000674    |
|    value_loss           | 0.202        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 234          |
|    time_elapsed         | 1170         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 244           |
|    time_elapsed         | 1220          |
|    total_timesteps      | 124928        |
| train/                  |               |
|    approx_kl            | 2.9015355e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | -3.39e-05     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0898        |
|    n_updates            | 2430          |
|    policy_gradient_loss | -0.000127     |
|    value_loss           | 0.164         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 245           |
|    time_elapsed         | 1225

------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 255          |
|    time_elapsed         | 1275         |
|    total_timesteps      | 130560       |
| train/                  |              |
|    approx_kl            | 5.995389e-08 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 0.000108     |
|    learning_rate        | 1e-06        |
|    loss                 | 42.8         |
|    n_updates            | 2540         |
|    policy_gradient_loss | -2.39e-06    |
|    value_loss           | 128          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 256           |
|    time_elapsed         | 1280          |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 266           |
|    time_elapsed         | 1329          |
|    total_timesteps      | 136192        |
| train/                  |               |
|    approx_kl            | 5.5413693e-07 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 3e-05         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.274         |
|    n_updates            | 2650          |
|    policy_gradient_loss | -2.67e-05     |
|    value_loss           | 0.492         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 267          |
|    time_elapsed         | 1334    

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 277           |
|    time_elapsed         | 1384          |
|    total_timesteps      | 141824        |
| train/                  |               |
|    approx_kl            | 1.8611318e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | -1.55e-06     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.245         |
|    n_updates            | 2760          |
|    policy_gradient_loss | -4.06e-05     |
|    value_loss           | 0.592         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 278          |
|    time_elapsed         | 1389    

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 288           |
|    time_elapsed         | 1438          |
|    total_timesteps      | 147456        |
| train/                  |               |
|    approx_kl            | 6.9336966e-07 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -2.71e-05     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.159         |
|    n_updates            | 2870          |
|    policy_gradient_loss | -9.4e-06      |
|    value_loss           | 0.381         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 289          |
|    time_elapsed         | 1443    

------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 299          |
|    time_elapsed         | 1494         |
|    total_timesteps      | 153088       |
| train/                  |              |
|    approx_kl            | 3.827852e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.93        |
|    explained_variance   | 0            |
|    learning_rate        | 1e-06        |
|    loss                 | 0.241        |
|    n_updates            | 2980         |
|    policy_gradient_loss | -0.000202    |
|    value_loss           | 0.501        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 300           |
|    time_elapsed         | 1499          |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 310           |
|    time_elapsed         | 1548          |
|    total_timesteps      | 158720        |
| train/                  |               |
|    approx_kl            | 7.1013346e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 0             |
|    learning_rate        | 1e-06         |
|    loss                 | 160           |
|    n_updates            | 3090          |
|    policy_gradient_loss | -7.29e-07     |
|    value_loss           | 350           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 311           |
|    time_elapsed         | 1554

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 321           |
|    time_elapsed         | 1603          |
|    total_timesteps      | 164352        |
| train/                  |               |
|    approx_kl            | 1.6354606e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 0             |
|    learning_rate        | 1e-06         |
|    loss                 | 0.166         |
|    n_updates            | 3200          |
|    policy_gradient_loss | -0.000272     |
|    value_loss           | 0.313         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 322           |
|    time_elapsed         | 1608

-----------------------------------------
| time/                   |             |
|    fps                  | 102         |
|    iterations           | 332         |
|    time_elapsed         | 1659        |
|    total_timesteps      | 169984      |
| train/                  |             |
|    approx_kl            | 5.26465e-06 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.93       |
|    explained_variance   | 0           |
|    learning_rate        | 1e-06       |
|    loss                 | 0.161       |
|    n_updates            | 3310        |
|    policy_gradient_loss | -0.000222   |
|    value_loss           | 0.339       |
-----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 333           |
|    time_elapsed         | 1664          |
|    total_timesteps    

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 343           |
|    time_elapsed         | 1713          |
|    total_timesteps      | 175616        |
| train/                  |               |
|    approx_kl            | 7.5662974e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 0             |
|    learning_rate        | 1e-06         |
|    loss                 | 0.133         |
|    n_updates            | 3420          |
|    policy_gradient_loss | -0.000123     |
|    value_loss           | 0.314         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 344          |
|    time_elapsed         | 1718    

------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 354          |
|    time_elapsed         | 1769         |
|    total_timesteps      | 181248       |
| train/                  |              |
|    approx_kl            | 5.390728e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.93        |
|    explained_variance   | -7.51e-06    |
|    learning_rate        | 1e-06        |
|    loss                 | 0.107        |
|    n_updates            | 3530         |
|    policy_gradient_loss | -5.5e-05     |
|    value_loss           | 0.244        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 355          |
|    time_elapsed         | 1774         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 365           |
|    time_elapsed         | 1824          |
|    total_timesteps      | 186880        |
| train/                  |               |
|    approx_kl            | 1.5698606e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 0.000147      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.161         |
|    n_updates            | 3640          |
|    policy_gradient_loss | -0.000273     |
|    value_loss           | 0.33          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 366           |
|    time_elapsed         | 1828

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 376           |
|    time_elapsed         | 1879          |
|    total_timesteps      | 192512        |
| train/                  |               |
|    approx_kl            | 3.6577694e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | -3.58e-07     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0886        |
|    n_updates            | 3750          |
|    policy_gradient_loss | -0.000418     |
|    value_loss           | 0.162         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 377          |
|    time_elapsed         | 1884    

------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 387          |
|    time_elapsed         | 1934         |
|    total_timesteps      | 198144       |
| train/                  |              |
|    approx_kl            | 9.555777e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.93        |
|    explained_variance   | -0.00232     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.069        |
|    n_updates            | 3860         |
|    policy_gradient_loss | -0.00058     |
|    value_loss           | 0.162        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 388           |
|    time_elapsed         | 1939          |
|    t

<stable_baselines3.ppo.ppo.PPO at 0x208ae6f2a90>

In [34]:
modelmlp64.save('thisisatestmlpseed64model')

In [35]:
#PPO WITH MLP POLICY RANDOM SEED 
modelmlprandom=PPO('MlpPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001,
            n_steps=512)

Using cuda device
Wrapping the env in a VecTransposeImage.


In [36]:
modelmlprandom.learn(total_timesteps=200000, callback=callback)

Logging to ./logs/PPO_6
----------------------------
| time/              |     |
|    fps             | 238 |
|    iterations      | 1   |
|    time_elapsed    | 2   |
|    total_timesteps | 512 |
----------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 146           |
|    iterations           | 2             |
|    time_elapsed         | 7             |
|    total_timesteps      | 1024          |
| train/                  |               |
|    approx_kl            | 3.0618394e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | -0.00694      |
|    learning_rate        | 1e-06         |
|    loss                 | 291           |
|    n_updates            | 10            |
|    policy_gradient_loss | -0.000107     |
|    value_loss           | 531           |
-------------------------

-------------------------------------------
| time/                   |               |
|    fps                  | 108           |
|    iterations           | 13            |
|    time_elapsed         | 61            |
|    total_timesteps      | 6656          |
| train/                  |               |
|    approx_kl            | 3.2264506e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | -4.68e-05     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0922        |
|    n_updates            | 120           |
|    policy_gradient_loss | -4.45e-05     |
|    value_loss           | 0.21          |
-------------------------------------------
--------------------------------------------
| time/                   |                |
|    fps                  | 108            |
|    iterations           | 14             |
|    time_elapsed         | 

-------------------------------------------
| time/                   |               |
|    fps                  | 104           |
|    iterations           | 24            |
|    time_elapsed         | 117           |
|    total_timesteps      | 12288         |
| train/                  |               |
|    approx_kl            | 1.0353047e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | -0.000424     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0992        |
|    n_updates            | 230           |
|    policy_gradient_loss | -0.000186     |
|    value_loss           | 0.264         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 104           |
|    iterations           | 25            |
|    time_elapsed         | 122 

------------------------------------------
| time/                   |              |
|    fps                  | 104          |
|    iterations           | 35           |
|    time_elapsed         | 171          |
|    total_timesteps      | 17920        |
| train/                  |              |
|    approx_kl            | 5.568145e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.95        |
|    explained_variance   | 2.26e-06     |
|    learning_rate        | 1e-06        |
|    loss                 | 9.27         |
|    n_updates            | 340          |
|    policy_gradient_loss | -0.000167    |
|    value_loss           | 33.7         |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 104           |
|    iterations           | 36            |
|    time_elapsed         | 176           |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 46            |
|    time_elapsed         | 227           |
|    total_timesteps      | 23552         |
| train/                  |               |
|    approx_kl            | 5.9168087e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | 0.000535      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.123         |
|    n_updates            | 450           |
|    policy_gradient_loss | -0.000128     |
|    value_loss           | 0.28          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 47            |
|    time_elapsed         | 232 

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 57            |
|    time_elapsed         | 283           |
|    total_timesteps      | 29184         |
| train/                  |               |
|    approx_kl            | 3.6787242e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 2.68e-06      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.293         |
|    n_updates            | 560           |
|    policy_gradient_loss | -0.000141     |
|    value_loss           | 0.637         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 58            |
|    time_elapsed         | 288 

-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 68            |
|    time_elapsed         | 337           |
|    total_timesteps      | 34816         |
| train/                  |               |
|    approx_kl            | 6.3562766e-07 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -4.29e-06     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.374         |
|    n_updates            | 670           |
|    policy_gradient_loss | -4.18e-05     |
|    value_loss           | 0.79          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 103           |
|    iterations           | 69            |
|    time_elapsed         | 342 

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 79            |
|    time_elapsed         | 393           |
|    total_timesteps      | 40448         |
| train/                  |               |
|    approx_kl            | 2.5333837e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -1.79e-06     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.541         |
|    n_updates            | 780           |
|    policy_gradient_loss | -0.000625     |
|    value_loss           | 0.952         |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 102         |
|    iterations           | 80          |
|    time_elapsed         | 398         

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 90            |
|    time_elapsed         | 447           |
|    total_timesteps      | 46080         |
| train/                  |               |
|    approx_kl            | 1.6364502e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 0             |
|    learning_rate        | 1e-06         |
|    loss                 | 0.242         |
|    n_updates            | 890           |
|    policy_gradient_loss | -9.41e-05     |
|    value_loss           | 0.507         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 91            |
|    time_elapsed         | 452 

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 101           |
|    time_elapsed         | 503           |
|    total_timesteps      | 51712         |
| train/                  |               |
|    approx_kl            | 1.3048761e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 0             |
|    learning_rate        | 1e-06         |
|    loss                 | 0.27          |
|    n_updates            | 1000          |
|    policy_gradient_loss | -0.000245     |
|    value_loss           | 0.544         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 102           |
|    time_elapsed         | 508 

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 112           |
|    time_elapsed         | 558           |
|    total_timesteps      | 57344         |
| train/                  |               |
|    approx_kl            | 4.5326306e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 5.07e-06      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.24          |
|    n_updates            | 1110          |
|    policy_gradient_loss | -9.76e-05     |
|    value_loss           | 0.448         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 113           |
|    time_elapsed         | 563 

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 123           |
|    time_elapsed         | 613           |
|    total_timesteps      | 62976         |
| train/                  |               |
|    approx_kl            | 3.3305143e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -1.67e-06     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.194         |
|    n_updates            | 1220          |
|    policy_gradient_loss | -0.000624     |
|    value_loss           | 0.353         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 124           |
|    time_elapsed         | 618 

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 134           |
|    time_elapsed         | 667           |
|    total_timesteps      | 68608         |
| train/                  |               |
|    approx_kl            | 3.5521807e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 0             |
|    learning_rate        | 1e-06         |
|    loss                 | 0.307         |
|    n_updates            | 1330          |
|    policy_gradient_loss | -0.000125     |
|    value_loss           | 0.773         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 135           |
|    time_elapsed         | 674 

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 145           |
|    time_elapsed         | 723           |
|    total_timesteps      | 74240         |
| train/                  |               |
|    approx_kl            | 2.4409965e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.396         |
|    n_updates            | 1440          |
|    policy_gradient_loss | -6.77e-05     |
|    value_loss           | 0.781         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 146           |
|    time_elapsed         | 728 

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 156           |
|    time_elapsed         | 779           |
|    total_timesteps      | 79872         |
| train/                  |               |
|    approx_kl            | 1.3903482e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.385         |
|    n_updates            | 1550          |
|    policy_gradient_loss | -4.34e-05     |
|    value_loss           | 0.764         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 157           |
|    time_elapsed         | 784 

------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 167          |
|    time_elapsed         | 833          |
|    total_timesteps      | 85504        |
| train/                  |              |
|    approx_kl            | 6.834045e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 0            |
|    learning_rate        | 1e-06        |
|    loss                 | 0.299        |
|    n_updates            | 1660         |
|    policy_gradient_loss | -0.000173    |
|    value_loss           | 0.659        |
------------------------------------------
--------------------------------------------
| time/                   |                |
|    fps                  | 102            |
|    iterations           | 168            |
|    time_elapsed         | 838            |
|

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 178           |
|    time_elapsed         | 889           |
|    total_timesteps      | 91136         |
| train/                  |               |
|    approx_kl            | 2.1388987e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 0             |
|    learning_rate        | 1e-06         |
|    loss                 | 0.528         |
|    n_updates            | 1770          |
|    policy_gradient_loss | -5.09e-05     |
|    value_loss           | 0.992         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 179           |
|    time_elapsed         | 894 

------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 189          |
|    time_elapsed         | 943          |
|    total_timesteps      | 96768        |
| train/                  |              |
|    approx_kl            | 5.023589e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 0            |
|    learning_rate        | 1e-06        |
|    loss                 | 0.354        |
|    n_updates            | 1880         |
|    policy_gradient_loss | -0.000474    |
|    value_loss           | 0.708        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 190           |
|    time_elapsed         | 948           |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 200          |
|    time_elapsed         | 999          |
|    total_timesteps      | 102400       |
| train/                  |              |
|    approx_kl            | 4.917034e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 0            |
|    learning_rate        | 1e-06        |
|    loss                 | 0.523        |
|    n_updates            | 1990         |
|    policy_gradient_loss | -0.000181    |
|    value_loss           | 0.977        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 201           |
|    time_elapsed         | 1004          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 211          |
|    time_elapsed         | 1054         |
|    total_timesteps      | 108032       |
| train/                  |              |
|    approx_kl            | 3.416068e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 5.96e-08     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.267        |
|    n_updates            | 2100         |
|    policy_gradient_loss | -0.000135    |
|    value_loss           | 0.614        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 212           |
|    time_elapsed         | 1059          |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 222           |
|    time_elapsed         | 1110          |
|    total_timesteps      | 113664        |
| train/                  |               |
|    approx_kl            | 1.9976869e-07 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 1.63e-05      |
|    learning_rate        | 1e-06         |
|    loss                 | 130           |
|    n_updates            | 2210          |
|    policy_gradient_loss | -2.1e-05      |
|    value_loss           | 250           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 102           |
|    iterations           | 223           |
|    time_elapsed         | 1115

------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 233          |
|    time_elapsed         | 1165         |
|    total_timesteps      | 119296       |
| train/                  |              |
|    approx_kl            | 9.115669e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 1.19e-07     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.307        |
|    n_updates            | 2320         |
|    policy_gradient_loss | -6.63e-05    |
|    value_loss           | 0.707        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 102          |
|    iterations           | 234          |
|    time_elapsed         | 1170         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 68            |
|    iterations           | 244           |
|    time_elapsed         | 1824          |
|    total_timesteps      | 124928        |
| train/                  |               |
|    approx_kl            | 1.2631994e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 0             |
|    learning_rate        | 1e-06         |
|    loss                 | 0.39          |
|    n_updates            | 2430          |
|    policy_gradient_loss | -0.000154     |
|    value_loss           | 0.781         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 68            |
|    iterations           | 245           |
|    time_elapsed         | 1829

-------------------------------------------
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 255           |
|    time_elapsed         | 1880          |
|    total_timesteps      | 130560        |
| train/                  |               |
|    approx_kl            | 3.4467317e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.208         |
|    n_updates            | 2540          |
|    policy_gradient_loss | -0.000793     |
|    value_loss           | 0.537         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 69            |
|    iterations           | 256           |
|    time_elapsed         | 1885

-------------------------------------------
| time/                   |               |
|    fps                  | 70            |
|    iterations           | 266           |
|    time_elapsed         | 1934          |
|    total_timesteps      | 136192        |
| train/                  |               |
|    approx_kl            | 2.2619497e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 0             |
|    learning_rate        | 1e-06         |
|    loss                 | 0.223         |
|    n_updates            | 2650          |
|    policy_gradient_loss | -0.000134     |
|    value_loss           | 0.528         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 70            |
|    iterations           | 267           |
|    time_elapsed         | 1939

-------------------------------------------
| time/                   |               |
|    fps                  | 71            |
|    iterations           | 277           |
|    time_elapsed         | 1990          |
|    total_timesteps      | 141824        |
| train/                  |               |
|    approx_kl            | 5.6384597e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.379         |
|    n_updates            | 2760          |
|    policy_gradient_loss | -0.000263     |
|    value_loss           | 0.821         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 71            |
|    iterations           | 278           |
|    time_elapsed         | 1995

------------------------------------------
| time/                   |              |
|    fps                  | 72           |
|    iterations           | 288          |
|    time_elapsed         | 2044         |
|    total_timesteps      | 147456       |
| train/                  |              |
|    approx_kl            | 2.550031e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.93        |
|    explained_variance   | 0            |
|    learning_rate        | 1e-06        |
|    loss                 | 0.306        |
|    n_updates            | 2870         |
|    policy_gradient_loss | -0.000154    |
|    value_loss           | 0.671        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 72           |
|    iterations           | 289          |
|    time_elapsed         | 2049         |
|    total_

--------------------------------------------
| time/                   |                |
|    fps                  | 72             |
|    iterations           | 299            |
|    time_elapsed         | 2100           |
|    total_timesteps      | 153088         |
| train/                  |                |
|    approx_kl            | 0.000101317884 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -1.93          |
|    explained_variance   | 0              |
|    learning_rate        | 1e-06          |
|    loss                 | 0.266          |
|    n_updates            | 2980           |
|    policy_gradient_loss | -0.000555      |
|    value_loss           | 0.639          |
--------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 72           |
|    iterations           | 300          |
|    time_elapsed 

-------------------------------------------
| time/                   |               |
|    fps                  | 73            |
|    iterations           | 310           |
|    time_elapsed         | 2154          |
|    total_timesteps      | 158720        |
| train/                  |               |
|    approx_kl            | 2.1628803e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | -1.19e-07     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.432         |
|    n_updates            | 3090          |
|    policy_gradient_loss | -0.000139     |
|    value_loss           | 0.817         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 73            |
|    iterations           | 311           |
|    time_elapsed         | 2160

------------------------------------------
| time/                   |              |
|    fps                  | 74           |
|    iterations           | 321          |
|    time_elapsed         | 2210         |
|    total_timesteps      | 164352       |
| train/                  |              |
|    approx_kl            | 3.674277e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.93        |
|    explained_variance   | 2.38e-07     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.272        |
|    n_updates            | 3200         |
|    policy_gradient_loss | -0.000285    |
|    value_loss           | 0.567        |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 74          |
|    iterations           | 322         |
|    time_elapsed         | 2215        |
|    total_times

-------------------------------------------
| time/                   |               |
|    fps                  | 75            |
|    iterations           | 332           |
|    time_elapsed         | 2265          |
|    total_timesteps      | 169984        |
| train/                  |               |
|    approx_kl            | 2.4775858e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 0             |
|    learning_rate        | 1e-06         |
|    loss                 | 0.334         |
|    n_updates            | 3310          |
|    policy_gradient_loss | -0.000346     |
|    value_loss           | 0.878         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 75            |
|    iterations           | 333           |
|    time_elapsed         | 2270

-------------------------------------------
| time/                   |               |
|    fps                  | 75            |
|    iterations           | 343           |
|    time_elapsed         | 2320          |
|    total_timesteps      | 175616        |
| train/                  |               |
|    approx_kl            | 2.1024607e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 0.00333       |
|    learning_rate        | 1e-06         |
|    loss                 | 3.6           |
|    n_updates            | 3420          |
|    policy_gradient_loss | -6.04e-05     |
|    value_loss           | 13.9          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 75            |
|    iterations           | 344           |
|    time_elapsed         | 2325

-------------------------------------------
| time/                   |               |
|    fps                  | 76            |
|    iterations           | 354           |
|    time_elapsed         | 2375          |
|    total_timesteps      | 181248        |
| train/                  |               |
|    approx_kl            | 6.7282235e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.268         |
|    n_updates            | 3530          |
|    policy_gradient_loss | -0.000121     |
|    value_loss           | 0.578         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 76            |
|    iterations           | 355           |
|    time_elapsed         | 2380

-----------------------------------------
| time/                   |             |
|    fps                  | 76          |
|    iterations           | 365         |
|    time_elapsed         | 2430        |
|    total_timesteps      | 186880      |
| train/                  |             |
|    approx_kl            | 2.76044e-06 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.93       |
|    explained_variance   | 5.96e-08    |
|    learning_rate        | 1e-06       |
|    loss                 | 0.318       |
|    n_updates            | 3640        |
|    policy_gradient_loss | -0.00011    |
|    value_loss           | 0.652       |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 76           |
|    iterations           | 366          |
|    time_elapsed         | 2435         |
|    total_timesteps      | 1

------------------------------------------
| time/                   |              |
|    fps                  | 77           |
|    iterations           | 376          |
|    time_elapsed         | 2486         |
|    total_timesteps      | 192512       |
| train/                  |              |
|    approx_kl            | 1.517823e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.93        |
|    explained_variance   | -1.19e-07    |
|    learning_rate        | 1e-06        |
|    loss                 | 57           |
|    n_updates            | 3750         |
|    policy_gradient_loss | -5.54e-05    |
|    value_loss           | 86.3         |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 77            |
|    iterations           | 377           |
|    time_elapsed         | 2491          |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 77            |
|    iterations           | 387           |
|    time_elapsed         | 2541          |
|    total_timesteps      | 198144        |
| train/                  |               |
|    approx_kl            | 4.4878107e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 0             |
|    learning_rate        | 1e-06         |
|    loss                 | 0.198         |
|    n_updates            | 3860          |
|    policy_gradient_loss | -0.000227     |
|    value_loss           | 0.433         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 78            |
|    iterations           | 388           |
|    time_elapsed         | 2546

<stable_baselines3.ppo.ppo.PPO at 0x208906df460>

In [37]:
modelmlprandom.save('thisisatestmlprandomseedmlpmodel')

In [38]:
#USING A2C MODEL WITH SEED 42
from stable_baselines3 import A2C
modelA2C42=A2C('MlpPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001,seed=42,
            n_steps=512)

Using cuda device
Wrapping the env in a VecTransposeImage.


In [42]:
modelA2C42.learn(total_timesteps=200000, callback=callback)

Logging to ./logs/A2C_1
-------------------------------------
| time/                 |           |
|    fps                | 175       |
|    iterations         | 100       |
|    time_elapsed       | 292       |
|    total_timesteps    | 51200     |
| train/                |           |
|    entropy_loss       | -1.95     |
|    explained_variance | -3.29e-05 |
|    learning_rate      | 1e-06     |
|    n_updates          | 99        |
|    policy_loss        | 80.8      |
|    value_loss         | 4.3e+03   |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 174       |
|    iterations         | 200       |
|    time_elapsed       | 586       |
|    total_timesteps    | 102400    |
| train/                |           |
|    entropy_loss       | -1.95     |
|    explained_variance | -0.000122 |
|    learning_rate      | 1e-06     |
|    n_updates          | 199       |
|    policy_loss        | 

<stable_baselines3.a2c.a2c.A2C at 0x208ae6f6b20>

In [43]:
modelA2C42.save('thisisatesta2cseed42model')

In [44]:
#USING A2C MODEL WITH SEED 64
modelA2C64=A2C('MlpPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001,seed=64,
            n_steps=512)

Using cuda device
Wrapping the env in a VecTransposeImage.


In [45]:
modelA2C64.learn(total_timesteps=200000, callback=callback)

Logging to ./logs/A2C_2
-------------------------------------
| time/                 |           |
|    fps                | 174       |
|    iterations         | 100       |
|    time_elapsed       | 293       |
|    total_timesteps    | 51200     |
| train/                |           |
|    entropy_loss       | -1.95     |
|    explained_variance | -0.000409 |
|    learning_rate      | 1e-06     |
|    n_updates          | 99        |
|    policy_loss        | 0.00704   |
|    value_loss         | 0.337     |
-------------------------------------


KeyboardInterrupt: 

In [None]:
modelA2C64.save('thisisatesta2cseed64model')

In [None]:
#USING A2C MODEL WITH RANDOM SEED 
modelA2CRAN=A2C('MlpPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001,seed=64,
            n_steps=512)

In [None]:
modelA2CRAN.learn(total_timesteps=200000, callback=callback)

In [None]:

modelA2CRAN.save('thisisatesta2cseedRANDOMmodel')

In [None]:
conda install python=3.7.0