In [61]:
import os
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

In [62]:
environment_name = 'CartPole-v0'
env = gym.make(environment_name)

In [63]:
environment_name

'CartPole-v0'

# Understanding Environment

In [64]:
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0
    while not done:
        env.render()
        action =  env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        print("states", n_state)
        print("reward", reward)
        print("done", done)
        print("info", info)
        score += reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

states [ 0.01546588 -0.2093556   0.00448243  0.31078964]
reward 1.0
done False
info {}
states [ 0.01127877 -0.01429779  0.01069823  0.0195237 ]
reward 1.0
done False
info {}
states [ 0.01099281 -0.20957151  0.0110887   0.31556275]
reward 1.0
done False
info {}
states [ 0.00680138 -0.40484965  0.01739996  0.61172193]
reward 1.0
done False
info {}
states [-0.00129561 -0.20997515  0.02963439  0.3245698 ]
reward 1.0
done False
info {}
states [-0.00549512 -0.01528741  0.03612579  0.04137763]
reward 1.0
done False
info {}
states [-0.00580086 -0.21090828  0.03695334  0.34523615]
reward 1.0
done False
info {}
states [-0.01001903 -0.40653586  0.04385807  0.649339  ]
reward 1.0
done False
info {}
states [-0.01814975 -0.60224044  0.05684485  0.95550376]
reward 1.0
done False
info {}
states [-0.03019456 -0.40792727  0.07595492  0.68120813]
reward 1.0
done False
info {}
states [-0.0383531  -0.6040174   0.08957908  0.99680454]
reward 1.0
done False
info {}
states [-0.05043345 -0.41020003  0.10951518

states [ 0.0875624   0.03256762 -0.14157234 -0.241348  ]
reward 1.0
done False
info {}
states [ 0.08821376  0.22939815 -0.1463993  -0.57512337]
reward 1.0
done False
info {}
states [ 0.09280172  0.03659903 -0.15790178 -0.3319061 ]
reward 1.0
done False
info {}
states [ 0.0935337   0.2335748  -0.1645399  -0.66992444]
reward 1.0
done False
info {}
states [ 0.0982052   0.4305557  -0.17793839 -1.0095619 ]
reward 1.0
done False
info {}
states [ 0.10681631  0.62754846 -0.19812962 -1.3524237 ]
reward 1.0
done False
info {}
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward 1.0
done True
info {}
Episode:5 Score:22.0


In [65]:
env.reset()

array([ 0.04178106, -0.00586563, -0.03744423, -0.03555121], dtype=float32)

In [66]:
episodes = 5
for episode in range(1, episodes+1):
    print(episode)

1
2
3
4
5


# Action space is discrete 2 values 0/1 either move to left or right

In [67]:
env.action_space

Discrete(2)

# Observation space is a box space

| Num | Observation           | Min                 | Max               |
|-----|-----------------------|---------------------|-------------------|
| 0   | Cart Position         | -4.8                | 4.8               |
| 1   | Cart Velocity         | -Inf                | Inf               |
| 2   | Pole Angle            | ~ -0.418 rad (-24°) | ~ 0.418 rad (24°) |
| 3   | Pole Angular Velocity | -Inf                | Inf               |

In [68]:
env.observation_space

Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32)

# Training RL Model

In [69]:
log_path = os.path.join('/home/kchn/rlp/cartpole/Training', 'Logs')

# Instantiate PPO Algorithm

In [70]:
env = gym.make(environment_name)
env = DummyVecEnv([lambda: env])
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_path)

Using cuda device


In [71]:
model.learn(total_timesteps=20000)

Logging to /home/kchn/rlp/cartpole/Training/Logs/PPO_7
-----------------------------
| time/              |      |
|    fps             | 1878 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 1316       |
|    iterations           | 2          |
|    time_elapsed         | 3          |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.00869368 |
|    clip_fraction        | 0.0926     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.686     |
|    explained_variance   | -0.00139   |
|    learning_rate        | 0.0003     |
|    loss                 | 5.5        |
|    n_updates            | 10         |
|    policy_gradient_loss | -0.0159    |
|    value_loss           | 53.4       |
--------------------------------------

<stable_baselines3.ppo.ppo.PPO at 0x7f7c9bda78e0>

# Save model and Reload Model

In [72]:
PPO_Path = os.path.join('/home/kchn/rlp/cartpole/Training', 'Saved Models', 'PPO_Model_Cartpole')

In [73]:
model.save(PPO_Path)

In [74]:
del model

In [75]:
PPO_Path

'/home/kchn/rlp/cartpole/Training/Saved Models/PPO_Model_Cartpole'

In [76]:
model = PPO.load(PPO_Path, env=env)

In [77]:
model.learn(total_timesteps=20000)

Logging to /home/kchn/rlp/cartpole/Training/Logs/PPO_8
-----------------------------
| time/              |      |
|    fps             | 1718 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1284         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0062920526 |
|    clip_fraction        | 0.0335       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.566       |
|    explained_variance   | 0.63         |
|    learning_rate        | 0.0003       |
|    loss                 | 11.2         |
|    n_updates            | 110          |
|    policy_gradient_loss | -0.00481     |
|    value_loss           | 27.2         |
----

<stable_baselines3.ppo.ppo.PPO at 0x7f7b477ed9f0>

# Evaluation

In [78]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)

(200.0, 0.0)

In [79]:
env.close()

# Testing the model

In [80]:
episodes = 5
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0
    while not done:
        env.render()
        action, _ =  model.predict(obs) ## using model here to play
        obs, reward, done, info = env.step(action)
        print("states", n_state)
        print("reward", reward)
        print("done", done)
        print("info", info)
        score += reward
    print('Episode:{} Score:{}'.format(episode, score))
# env.close()

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

In [81]:
env.close()

In [82]:
obs = env.reset()
obs

array([[-0.00246493,  0.04280569, -0.00183863, -0.01210113]],
      dtype=float32)

In [83]:
action, _ = model.predict(obs)

In [84]:
env.action_space.sample()

0

In [85]:
env.step(action)

(array([[-0.00160882,  0.23795396, -0.00208065, -0.3053636 ]],
       dtype=float32),
 array([1.], dtype=float32),
 array([False]),
 [{}])

# Viewing logs in Tensorboard

In [86]:
training_log_path = os.path.join(log_path, 'PPO_2')

In [87]:
!tensorboard --logdir={training_log_path}

2022-09-02 22:36:22.769157: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2022-09-02 22:36:23.836366: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-02 22:36:23.870057: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/kchn/.local/lib/python3.10/site-packages/cv2/../../lib64:
2022-09-02 22:36:23.870081: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1850] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are inst

# Apply Callbacks to train step for a threshold of reward

In [97]:
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold

In [98]:
save_path = os.path.join('/home/kchn/rlp/cartpole/Training', 'Saved Models')

In [99]:
stop_callback = StopTrainingOnRewardThreshold(reward_threshold=200, verbose=1)
eval_callback = EvalCallback(
    env,
    callback_on_new_best=stop_callback,
    eval_freq=10000,
    best_model_save_path=save_path,
    verbose=1
)

In [100]:
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_path)

Using cuda device


In [101]:
model.learn(total_timesteps=20000, callback=eval_callback)

Logging to /home/kchn/rlp/cartpole/Training/Logs/PPO_11
-----------------------------
| time/              |      |
|    fps             | 1985 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1404        |
|    iterations           | 2           |
|    time_elapsed         | 2           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.009360896 |
|    clip_fraction        | 0.104       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.686      |
|    explained_variance   | -0.00361    |
|    learning_rate        | 0.0003      |
|    loss                 | 6.46        |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0158     |
|    value_loss           | 53          |
--------------------

<stable_baselines3.ppo.ppo.PPO at 0x7f7c9becf490>

# Custom neural net model

In [102]:
net_arch = [dict(pi=[128, 128, 128, 128], vf=[128, 128, 128, 128])]

In [103]:
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_path, policy_kwargs={'net_arch':net_arch})

Using cuda device


In [104]:
model.learn(total_timesteps=20000, callback=eval_callback)

Logging to /home/kchn/rlp/cartpole/Training/Logs/PPO_12
-----------------------------
| time/              |      |
|    fps             | 1536 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 1044       |
|    iterations           | 2          |
|    time_elapsed         | 3          |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.01448135 |
|    clip_fraction        | 0.202      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.682     |
|    explained_variance   | 0.00987    |
|    learning_rate        | 0.0003     |
|    loss                 | 3.83       |
|    n_updates            | 10         |
|    policy_gradient_loss | -0.0222    |
|    value_loss           | 19.3       |
-------------------------------------

<stable_baselines3.ppo.ppo.PPO at 0x7f7b477ef0d0>

In [105]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)

(200.0, 0.0)

In [106]:
env.close()

In [107]:
episodes = 5
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0
    while not done:
        env.render()
        action, _ =  model.predict(obs) ## using model here to play
        obs, reward, done, info = env.step(action)
        print("states", n_state)
        print("reward", reward)
        print("done", done)
        print("info", info)
        score += reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

# Using Alternate Algorithm

In [108]:
from stable_baselines3 import DQN

In [109]:
model = DQN('MlpPolicy', env, verbose=1, tensorboard_log=log_path)

Using cuda device


In [110]:
model.learn(total_timesteps=200000)

Logging to /home/kchn/rlp/cartpole/Training/Logs/DQN_1
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.996    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 12816    |
|    time_elapsed     | 0        |
|    total_timesteps  | 81       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.991    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 14482    |
|    time_elapsed     | 0        |
|    total_timesteps  | 188      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.988    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 14157    |
|    time_elapsed     | 0        |
|    total_timesteps  | 243      |
----------------------------------


----------------------------------
| rollout/            |          |
|    exploration_rate | 0.889    |
| time/               |          |
|    episodes         | 108      |
|    fps              | 15188    |
|    time_elapsed     | 0        |
|    total_timesteps  | 2329     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.886    |
| time/               |          |
|    episodes         | 112      |
|    fps              | 15195    |
|    time_elapsed     | 0        |
|    total_timesteps  | 2397     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.882    |
| time/               |          |
|    episodes         | 116      |
|    fps              | 15271    |
|    time_elapsed     | 0        |
|    total_timesteps  | 2477     |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.774    |
| time/               |          |
|    episodes         | 216      |
|    fps              | 16326    |
|    time_elapsed     | 0        |
|    total_timesteps  | 4762     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.77     |
| time/               |          |
|    episodes         | 220      |
|    fps              | 16356    |
|    time_elapsed     | 0        |
|    total_timesteps  | 4850     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.766    |
| time/               |          |
|    episodes         | 224      |
|    fps              | 16381    |
|    time_elapsed     | 0        |
|    total_timesteps  | 4929     |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.668    |
| time/               |          |
|    episodes         | 324      |
|    fps              | 16672    |
|    time_elapsed     | 0        |
|    total_timesteps  | 6981     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.666    |
| time/               |          |
|    episodes         | 328      |
|    fps              | 16661    |
|    time_elapsed     | 0        |
|    total_timesteps  | 7039     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.662    |
| time/               |          |
|    episodes         | 332      |
|    fps              | 16635    |
|    time_elapsed     | 0        |
|    total_timesteps  | 7111     |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.57     |
| time/               |          |
|    episodes         | 432      |
|    fps              | 16554    |
|    time_elapsed     | 0        |
|    total_timesteps  | 9054     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.566    |
| time/               |          |
|    episodes         | 436      |
|    fps              | 16564    |
|    time_elapsed     | 0        |
|    total_timesteps  | 9139     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.56     |
| time/               |          |
|    episodes         | 440      |
|    fps              | 16578    |
|    time_elapsed     | 0        |
|    total_timesteps  | 9253     |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.445    |
| time/               |          |
|    episodes         | 540      |
|    fps              | 16410    |
|    time_elapsed     | 0        |
|    total_timesteps  | 11693    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.441    |
| time/               |          |
|    episodes         | 544      |
|    fps              | 16411    |
|    time_elapsed     | 0        |
|    total_timesteps  | 11766    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.436    |
| time/               |          |
|    episodes         | 548      |
|    fps              | 16429    |
|    time_elapsed     | 0        |
|    total_timesteps  | 11876    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.336    |
| time/               |          |
|    episodes         | 648      |
|    fps              | 16324    |
|    time_elapsed     | 0        |
|    total_timesteps  | 13987    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.332    |
| time/               |          |
|    episodes         | 652      |
|    fps              | 16325    |
|    time_elapsed     | 0        |
|    total_timesteps  | 14066    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.329    |
| time/               |          |
|    episodes         | 656      |
|    fps              | 16319    |
|    time_elapsed     | 0        |
|    total_timesteps  | 14134    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.225    |
| time/               |          |
|    episodes         | 756      |
|    fps              | 16337    |
|    time_elapsed     | 0        |
|    total_timesteps  | 16316    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.22     |
| time/               |          |
|    episodes         | 760      |
|    fps              | 16343    |
|    time_elapsed     | 1        |
|    total_timesteps  | 16423    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.217    |
| time/               |          |
|    episodes         | 764      |
|    fps              | 16345    |
|    time_elapsed     | 1        |
|    total_timesteps  | 16484    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.114    |
| time/               |          |
|    episodes         | 864      |
|    fps              | 16166    |
|    time_elapsed     | 1        |
|    total_timesteps  | 18661    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.11     |
| time/               |          |
|    episodes         | 868      |
|    fps              | 16163    |
|    time_elapsed     | 1        |
|    total_timesteps  | 18741    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.106    |
| time/               |          |
|    episodes         | 872      |
|    fps              | 16163    |
|    time_elapsed     | 1        |
|    total_timesteps  | 18825    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 972      |
|    fps              | 16063    |
|    time_elapsed     | 1        |
|    total_timesteps  | 21058    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 976      |
|    fps              | 16063    |
|    time_elapsed     | 1        |
|    total_timesteps  | 21169    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 980      |
|    fps              | 16061    |
|    time_elapsed     | 1        |
|    total_timesteps  | 21257    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1080     |
|    fps              | 16005    |
|    time_elapsed     | 1        |
|    total_timesteps  | 23533    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1084     |
|    fps              | 16005    |
|    time_elapsed     | 1        |
|    total_timesteps  | 23602    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1088     |
|    fps              | 16006    |
|    time_elapsed     | 1        |
|    total_timesteps  | 23709    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1188     |
|    fps              | 16118    |
|    time_elapsed     | 1        |
|    total_timesteps  | 25795    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1192     |
|    fps              | 16132    |
|    time_elapsed     | 1        |
|    total_timesteps  | 25934    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1196     |
|    fps              | 16132    |
|    time_elapsed     | 1        |
|    total_timesteps  | 26007    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1296     |
|    fps              | 16218    |
|    time_elapsed     | 1        |
|    total_timesteps  | 28249    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1300     |
|    fps              | 16219    |
|    time_elapsed     | 1        |
|    total_timesteps  | 28317    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1304     |
|    fps              | 16218    |
|    time_elapsed     | 1        |
|    total_timesteps  | 28393    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1404     |
|    fps              | 16317    |
|    time_elapsed     | 1        |
|    total_timesteps  | 30611    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1408     |
|    fps              | 16323    |
|    time_elapsed     | 1        |
|    total_timesteps  | 30718    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1412     |
|    fps              | 16326    |
|    time_elapsed     | 1        |
|    total_timesteps  | 30786    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1512     |
|    fps              | 16453    |
|    time_elapsed     | 2        |
|    total_timesteps  | 32909    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1516     |
|    fps              | 16458    |
|    time_elapsed     | 2        |
|    total_timesteps  | 32989    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1520     |
|    fps              | 16462    |
|    time_elapsed     | 2        |
|    total_timesteps  | 33090    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1620     |
|    fps              | 16577    |
|    time_elapsed     | 2        |
|    total_timesteps  | 35393    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1624     |
|    fps              | 16584    |
|    time_elapsed     | 2        |
|    total_timesteps  | 35501    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1628     |
|    fps              | 16589    |
|    time_elapsed     | 2        |
|    total_timesteps  | 35614    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1728     |
|    fps              | 16669    |
|    time_elapsed     | 2        |
|    total_timesteps  | 37781    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1732     |
|    fps              | 16675    |
|    time_elapsed     | 2        |
|    total_timesteps  | 37863    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1736     |
|    fps              | 16679    |
|    time_elapsed     | 2        |
|    total_timesteps  | 37945    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1836     |
|    fps              | 16708    |
|    time_elapsed     | 2        |
|    total_timesteps  | 40210    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1840     |
|    fps              | 16710    |
|    time_elapsed     | 2        |
|    total_timesteps  | 40291    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1844     |
|    fps              | 16712    |
|    time_elapsed     | 2        |
|    total_timesteps  | 40398    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1944     |
|    fps              | 16748    |
|    time_elapsed     | 2        |
|    total_timesteps  | 42766    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1948     |
|    fps              | 16753    |
|    time_elapsed     | 2        |
|    total_timesteps  | 42863    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1952     |
|    fps              | 16758    |
|    time_elapsed     | 2        |
|    total_timesteps  | 42952    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2052     |
|    fps              | 16820    |
|    time_elapsed     | 2        |
|    total_timesteps  | 45458    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2056     |
|    fps              | 16821    |
|    time_elapsed     | 2        |
|    total_timesteps  | 45518    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2060     |
|    fps              | 16826    |
|    time_elapsed     | 2        |
|    total_timesteps  | 45625    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2160     |
|    fps              | 16812    |
|    time_elapsed     | 2        |
|    total_timesteps  | 47665    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2164     |
|    fps              | 16810    |
|    time_elapsed     | 2        |
|    total_timesteps  | 47713    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2168     |
|    fps              | 16817    |
|    time_elapsed     | 2        |
|    total_timesteps  | 47850    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2268     |
|    fps              | 16661    |
|    time_elapsed     | 3        |
|    total_timesteps  | 50063    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.529    |
|    n_updates        | 15       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2272     |
|    fps              | 16533    |
|    time_elapsed     | 3        |
|    total_timesteps  | 50101    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.495    |
|    n_updates        | 25       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2344     |
|    fps              | 14474    |
|    time_elapsed     | 3        |
|    total_timesteps  | 50794    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.232    |
|    n_updates        | 198      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2348     |
|    fps              | 14372    |
|    time_elapsed     | 3        |
|    total_timesteps  | 50834    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.217    |
|    n_updates        | 208      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2420     |
|    fps              | 12763    |
|    time_elapsed     | 4        |
|    total_timesteps  | 51602    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0209   |
|    n_updates        | 400      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2424     |
|    fps              | 12683    |
|    time_elapsed     | 4        |
|    total_timesteps  | 51647    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0293   |
|    n_updates        | 411      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2496     |
|    fps              | 10812    |
|    time_elapsed     | 4        |
|    total_timesteps  | 52827    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00559  |
|    n_updates        | 706      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2500     |
|    fps              | 10718    |
|    time_elapsed     | 4        |
|    total_timesteps  | 52895    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00252  |
|    n_updates        | 723      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2572     |
|    fps              | 9409     |
|    time_elapsed     | 5        |
|    total_timesteps  | 54056    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000777 |
|    n_updates        | 1013     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2576     |
|    fps              | 9336     |
|    time_elapsed     | 5        |
|    total_timesteps  | 54122    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00163  |
|    n_updates        | 1030     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2648     |
|    fps              | 8443     |
|    time_elapsed     | 6        |
|    total_timesteps  | 55250    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000535 |
|    n_updates        | 1312     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2652     |
|    fps              | 8405     |
|    time_elapsed     | 6        |
|    total_timesteps  | 55312    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000175 |
|    n_updates        | 1327     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2724     |
|    fps              | 7659     |
|    time_elapsed     | 7        |
|    total_timesteps  | 56514    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000206 |
|    n_updates        | 1628     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2728     |
|    fps              | 7620     |
|    time_elapsed     | 7        |
|    total_timesteps  | 56589    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00102  |
|    n_updates        | 1647     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2800     |
|    fps              | 6999     |
|    time_elapsed     | 8        |
|    total_timesteps  | 57947    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.48e-05 |
|    n_updates        | 1986     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2804     |
|    fps              | 6969     |
|    time_elapsed     | 8        |
|    total_timesteps  | 58020    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000189 |
|    n_updates        | 2004     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2876     |
|    fps              | 6495     |
|    time_elapsed     | 9        |
|    total_timesteps  | 59329    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 9.91e-05 |
|    n_updates        | 2332     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2880     |
|    fps              | 6471     |
|    time_elapsed     | 9        |
|    total_timesteps  | 59404    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000523 |
|    n_updates        | 2350     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2952     |
|    fps              | 5945     |
|    time_elapsed     | 10       |
|    total_timesteps  | 61220    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00287  |
|    n_updates        | 2804     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2956     |
|    fps              | 5906     |
|    time_elapsed     | 10       |
|    total_timesteps  | 61377    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0937   |
|    n_updates        | 2844     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3028     |
|    fps              | 5435     |
|    time_elapsed     | 11       |
|    total_timesteps  | 63573    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.015    |
|    n_updates        | 3393     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3032     |
|    fps              | 5423     |
|    time_elapsed     | 11       |
|    total_timesteps  | 63635    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0112   |
|    n_updates        | 3408     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3104     |
|    fps              | 5094     |
|    time_elapsed     | 12       |
|    total_timesteps  | 65437    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0192   |
|    n_updates        | 3859     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3108     |
|    fps              | 5071     |
|    time_elapsed     | 12       |
|    total_timesteps  | 65553    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.024    |
|    n_updates        | 3888     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3180     |
|    fps              | 4831     |
|    time_elapsed     | 13       |
|    total_timesteps  | 67082    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0422   |
|    n_updates        | 4270     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3184     |
|    fps              | 4824     |
|    time_elapsed     | 13       |
|    total_timesteps  | 67132    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0517   |
|    n_updates        | 4282     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3256     |
|    fps              | 4684     |
|    time_elapsed     | 14       |
|    total_timesteps  | 68081    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0126   |
|    n_updates        | 4520     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3260     |
|    fps              | 4671     |
|    time_elapsed     | 14       |
|    total_timesteps  | 68180    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0132   |
|    n_updates        | 4544     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3332     |
|    fps              | 4460     |
|    time_elapsed     | 15       |
|    total_timesteps  | 69786    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.012    |
|    n_updates        | 4946     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3336     |
|    fps              | 4451     |
|    time_elapsed     | 15       |
|    total_timesteps  | 69859    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0222   |
|    n_updates        | 4964     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3408     |
|    fps              | 3963     |
|    time_elapsed     | 19       |
|    total_timesteps  | 75360    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.07     |
|    n_updates        | 6339     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3412     |
|    fps              | 3901     |
|    time_elapsed     | 19       |
|    total_timesteps  | 76160    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0308   |
|    n_updates        | 6539     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3484     |
|    fps              | 3275     |
|    time_elapsed     | 27       |
|    total_timesteps  | 88709    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0432   |
|    n_updates        | 9677     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3488     |
|    fps              | 3244     |
|    time_elapsed     | 27       |
|    total_timesteps  | 89509    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0811   |
|    n_updates        | 9877     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3560     |
|    fps              | 2863     |
|    time_elapsed     | 36       |
|    total_timesteps  | 103885   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.16     |
|    n_updates        | 13471    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3564     |
|    fps              | 2847     |
|    time_elapsed     | 36       |
|    total_timesteps  | 104685   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00444  |
|    n_updates        | 13671    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3636     |
|    fps              | 2624     |
|    time_elapsed     | 45       |
|    total_timesteps  | 119085   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0145   |
|    n_updates        | 17271    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3640     |
|    fps              | 2615     |
|    time_elapsed     | 45       |
|    total_timesteps  | 119885   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0967   |
|    n_updates        | 17471    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3712     |
|    fps              | 2465     |
|    time_elapsed     | 54       |
|    total_timesteps  | 134184   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00477  |
|    n_updates        | 21045    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3716     |
|    fps              | 2459     |
|    time_elapsed     | 54       |
|    total_timesteps  | 134984   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00796  |
|    n_updates        | 21245    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3788     |
|    fps              | 2348     |
|    time_elapsed     | 63       |
|    total_timesteps  | 149167   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0724   |
|    n_updates        | 24791    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3792     |
|    fps              | 2342     |
|    time_elapsed     | 64       |
|    total_timesteps  | 149967   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0913   |
|    n_updates        | 24991    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3864     |
|    fps              | 2252     |
|    time_elapsed     | 72       |
|    total_timesteps  | 164102   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00539  |
|    n_updates        | 28525    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3868     |
|    fps              | 2247     |
|    time_elapsed     | 73       |
|    total_timesteps  | 164902   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00784  |
|    n_updates        | 28725    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3940     |
|    fps              | 2181     |
|    time_elapsed     | 82       |
|    total_timesteps  | 179169   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0127   |
|    n_updates        | 32292    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3944     |
|    fps              | 2177     |
|    time_elapsed     | 82       |
|    total_timesteps  | 179968   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.167    |
|    n_updates        | 32491    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4016     |
|    fps              | 2131     |
|    time_elapsed     | 91       |
|    total_timesteps  | 194340   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00925  |
|    n_updates        | 36084    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4020     |
|    fps              | 2129     |
|    time_elapsed     | 91       |
|    total_timesteps  | 195114   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0104   |
|    n_updates        | 36278    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

<stable_baselines3.dqn.dqn.DQN at 0x7f7b477ee050>

# Test DQN algorithm

In [111]:
episodes = 10
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0
    while not done:
        env.render()
        action, _ =  model.predict(obs) ## using model here to play
        obs, reward, done, info = env.step(action)
        print("states", n_state)
        print("reward", reward)
        print("done", done)
        print("info", info)
        score += reward
    print('Episode:{} Score:{}'.format(episode, score))
# env.close()

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False]
info [{}]
states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [False

states [ 0.11936728  0.4353877  -0.2251781  -1.1276914 ]
reward [1.]
done [ True]
info [{'TimeLimit.truncated': True, 'terminal_observation': array([-1.8118345 , -1.7738057 , -0.18324801, -0.20370956], dtype=float32)}]
Episode:10 Score:[200.]


In [112]:
env.close()