# Retraining after first attempt to prove correct
**Important note:**
In the process of initiating the retraining we found a bug in the environment:
Essentially, the area which now turned out to be buggy has not been trained on originally as it never occurred in the training samples due to a buggy bounds check (mixup between obstacle size `c` and wind speed `w` in `is_in_bounds`

In [1]:
import gym
import time

In [2]:
import pickle
import numpy as np
import polytope as pc

In [3]:
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
import torch
from torch import nn

In [4]:
import zeppelin_gym.env3

In [5]:
torch.manual_seed(42)

<torch._C.Generator at 0x7f364b90f4d0>

In [6]:
env = gym.make('zeppelin-v3')



In [7]:
env.unwrapped.FUEL_RESTRAINT = False
env.unwrapped.OBSTACLE_REWARD = -1000.
env.unwrapped.NO_FUEL_REWARD = 0.
# done reward = (FUEL_RESTRAINT) ? r+fuel*r : 2*r
env.unwrapped.DONE_REWARD = 0.5
env.unwrapped.TIME_STEP_REWARD = 0.
env.unwrapped.INCLUDE_UNWINNABLE = False

In [8]:
env.seed(42)

[42]

In [14]:
retrain_polytopes = None
with open("zeppelin-small2-polytopes.pickle","rb") as f:
    retrain_polytopes = pickle.load(f)

In [25]:
poly_region = pc.Region(retrain_polytopes)
poly_region.contains(np.array([[72.],[131.],[80.],[30.]]),abs_tol=20)

array([False])

In [10]:
# Total volume of polytopes?
# Share of instances usually (i.e. without "focus polytopes")

# Volume of state space:
N = 100000
n=4
l_b = np.array([env.unwrapped.MIN_X,env.unwrapped.MIN_Y,env.unwrapped.MIN_C,env.unwrapped.MAX_VELOCITY-env.unwrapped.MAX_TURBULENCE+0.1])
u_b = np.array([env.unwrapped.MAX_X,env.unwrapped.MAX_Y,env.unwrapped.MAX_C,env.unwrapped.MAX_WIND_SPEED])
xs = env.unwrapped.np_random.uniform(low=l_b,high=u_b,size=(N,n))
s = 0
for x in xs:
    if not env.unwrapped.is_crash(x) and not env.unwrapped.reached_goal(x) and env.unwrapped.is_in_bounds(x):
        s+=1
print("s: ",s)
total_vol = np.prod(u_b - l_b) * (s / N)
print("total volume: ", total_vol)

poly_region = pc.Region(retrain_polytopes)
l_b, u_b = poly_region.bounding_box
l_b = l_b.flatten()
u_b = u_b.flatten()
print(l_b,",",u_b)
xs = env.unwrapped.np_random.uniform(low=l_b,high=u_b,size=(N,n))
s = 0
for x in xs:
    if x in poly_region:
        if not env.unwrapped.is_crash(x) and not env.unwrapped.reached_goal(x) and env.unwrapped.is_in_bounds(x):
                s+=1
poly_vol = np.prod(u_b - l_b) * (s / N)
print("poly volume: ", poly_vol)

# We only have an upper bound for the share, since polytopes may be partially outside the state space of interest
poly_share = poly_vol/total_vol
print("max share: ", poly_share)

s:  4512
total volume:  50332262.4
[-268.98618  -97.5       10.         5.     ] , [400. 400.  80.  30.]
poly volume:  16197547.64683712
max share:  0.32181242953301303


In [31]:
eval_episode_length=30000
training_episode_length=100000

In [32]:
env.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7f8a37e4f4b0>

In [33]:
model = PPO.load("zeppelin-avoidance-windsystem-small2-1400000")
model.set_env(env)

env.init_polytopes(1.0,[])
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
mean_reward:0.97 +/- 5.78


In [34]:
env.init_polytopes(0.0,retrain_polytopes)
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

mean_reward:0.03 +/- 31.11


In [35]:
results_overall = {}
results_polys = {}

In [36]:
for p in [0.0, 0.05, 0.1, 0.5, 0.9, 1.0]:
    model = PPO.load("zeppelin-avoidance-windsystem-small2-1400000")
    model.set_env(env)
    
    print("p=",p)

    env.init_polytopes(p,retrain_polytopes)
    env.unwrapped.INCLUDE_UNWINNABLE = False
    start_time = time.time()
    model=model.learn(total_timesteps=training_episode_length)
    print("--- %s seconds ---" % (time.time() - start_time))

    model.save("model_backup/zeppelin-avoidance-windsystem-small2-1400000-100000-"+str(p))

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.21     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    fps             | 365      |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.98        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 314         |
|    iterations           | 2           |
|    time_elapsed         | 13          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.027606804 |
|    clip_fraction        | 0.241       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.1        |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 286        |
|    iterations           | 11         |
|    time_elapsed         | 78         |
|    total_timesteps      | 22528      |
| train/                  |            |
|    approx_kl            | 0.02297163 |
|    clip_fraction        | 0.242      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.0456     |
|    explained_variance   | 0.00217    |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0218     |
|    n_updates            | 6960       |
|    policy_gradient_loss | 0.00489    |
|    std                  | 0.238      |
|    value_loss           | 488        |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.27        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 281         |
|    iterations           | 20          |
|    time_elapsed         | 145         |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.020620484 |
|    clip_fraction        | 0.242       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0967      |
|    explained_variance   | 0.00664     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.027       |
|    n_updates            | 7050        |
|    policy_gradient_loss | 0.00687     |
|    std                  | 0.231       |
|    value_loss           | 1.4e+03     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.08        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 270         |
|    iterations           | 29          |
|    time_elapsed         | 219         |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.018757604 |
|    clip_fraction        | 0.236       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0906      |
|    explained_variance   | 0.00462     |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0197     |
|    n_updates            | 7140        |
|    policy_gradient_loss | 0.0052      |
|    std                  | 0.233       |
|    value_loss           | 972         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.17       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 269        |
|    iterations           | 38         |
|    time_elapsed         | 288        |
|    total_timesteps      | 77824      |
| train/                  |            |
|    approx_kl            | 0.01393988 |
|    clip_fraction        | 0.195      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.0843     |
|    explained_variance   | 0.00512    |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0107     |
|    n_updates            | 7230       |
|    policy_gradient_loss | 0.00334    |
|    std                  | 0.231      |
|    value_loss           | 918        |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.26        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 269         |
|    iterations           | 47          |
|    time_elapsed         | 356         |
|    total_timesteps      | 96256       |
| train/                  |             |
|    approx_kl            | 0.034710873 |
|    clip_fraction        | 0.25        |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0567      |
|    explained_variance   | 7.82e-05    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00374     |
|    n_updates            | 7320        |
|    policy_gradient_loss | 0.0142      |
|    std                  | 0.237       |
|    value_loss           | 489         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.18        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 277         |
|    iterations           | 8           |
|    time_elapsed         | 59          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.013936228 |
|    clip_fraction        | 0.218       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0683      |
|    explained_variance   | -85         |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0383      |
|    n_updates            | 6930        |
|    policy_gradient_loss | 0.00512     |
|    std                  | 0.234       |
|    value_loss           | 0.0531      |
-----------------------------------------
------------------------------------------
| rollout/                |      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.13        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 274         |
|    iterations           | 17          |
|    time_elapsed         | 126         |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.017967524 |
|    clip_fraction        | 0.218       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0873      |
|    explained_variance   | 0.00314     |
|    learning_rate        | 0.0003      |
|    loss                 | 7.82e+03    |
|    n_updates            | 7020        |
|    policy_gradient_loss | 0.00732     |
|    std                  | 0.232       |
|    value_loss           | 1.46e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.28        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 273         |
|    iterations           | 26          |
|    time_elapsed         | 194         |
|    total_timesteps      | 53248       |
| train/                  |             |
|    approx_kl            | 0.103331506 |
|    clip_fraction        | 0.211       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0705      |
|    explained_variance   | 0.00475     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.118       |
|    n_updates            | 7110        |
|    policy_gradient_loss | 0.0233      |
|    std                  | 0.233       |
|    value_loss           | 1.89e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.24        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 272         |
|    iterations           | 35          |
|    time_elapsed         | 263         |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.019110737 |
|    clip_fraction        | 0.247       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0354      |
|    explained_variance   | 0.00399     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0328      |
|    n_updates            | 7200        |
|    policy_gradient_loss | 0.00615     |
|    std                  | 0.237       |
|    value_loss           | 974         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.03       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 272        |
|    iterations           | 44         |
|    time_elapsed         | 330        |
|    total_timesteps      | 90112      |
| train/                  |            |
|    approx_kl            | 0.05088778 |
|    clip_fraction        | 0.25       |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.00868   |
|    explained_variance   | 0.00374    |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0165    |
|    n_updates            | 7290       |
|    policy_gradient_loss | 0.00347    |
|    std                  | 0.242      |
|    value_loss           | 974        |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.13        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 290         |
|    iterations           | 5           |
|    time_elapsed         | 35          |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.025808115 |
|    clip_fraction        | 0.252       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.118       |
|    explained_variance   | -254        |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0306     |
|    n_updates            | 6900        |
|    policy_gradient_loss | 0.00675     |
|    std                  | 0.227       |
|    value_loss           | 0.0945      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.37       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 284        |
|    iterations           | 14         |
|    time_elapsed         | 100        |
|    total_timesteps      | 28672      |
| train/                  |            |
|    approx_kl            | 0.02692457 |
|    clip_fraction        | 0.248      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.177      |
|    explained_variance   | -417       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.028      |
|    n_updates            | 6990       |
|    policy_gradient_loss | 0.00498    |
|    std                  | 0.22       |
|    value_loss           | 0.054      |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.09       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 281        |
|    iterations           | 24         |
|    time_elapsed         | 174        |
|    total_timesteps      | 49152      |
| train/                  |            |
|    approx_kl            | 0.03153211 |
|    clip_fraction        | 0.249      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.216      |
|    explained_variance   | -373       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0234     |
|    n_updates            | 7090       |
|    policy_gradient_loss | 0.0061     |
|    std                  | 0.219      |
|    value_loss           | 0.0618     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.99       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 279        |
|    iterations           | 33         |
|    time_elapsed         | 241        |
|    total_timesteps      | 67584      |
| train/                  |            |
|    approx_kl            | 0.01793576 |
|    clip_fraction        | 0.233      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.26       |
|    explained_variance   | -57.5      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0324     |
|    n_updates            | 7180       |
|    policy_gradient_loss | 0.00624    |
|    std                  | 0.212      |
|    value_loss           | 0.0117     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.08        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 278         |
|    iterations           | 42          |
|    time_elapsed         | 308         |
|    total_timesteps      | 86016       |
| train/                  |             |
|    approx_kl            | 0.026488226 |
|    clip_fraction        | 0.26        |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.222       |
|    explained_variance   | 0.0062      |
|    learning_rate        | 0.0003      |
|    loss                 | 7.78e+03    |
|    n_updates            | 7270        |
|    policy_gradient_loss | 0.0109      |
|    std                  | 0.218       |
|    value_loss           | 486         |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.21        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 354         |
|    iterations           | 3           |
|    time_elapsed         | 17          |
|    total_timesteps      | 6144        |
| train/                  |             |
|    approx_kl            | 0.024179533 |
|    clip_fraction        | 0.253       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.108       |
|    explained_variance   | 0.000291    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0539      |
|    n_updates            | 6880        |
|    policy_gradient_loss | 0.00722     |
|    std                  | 0.231       |
|    value_loss           | 489         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.33        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 331         |
|    iterations           | 12          |
|    time_elapsed         | 74          |
|    total_timesteps      | 24576       |
| train/                  |             |
|    approx_kl            | 0.025059026 |
|    clip_fraction        | 0.216       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0639      |
|    explained_variance   | -254        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0227      |
|    n_updates            | 6970        |
|    policy_gradient_loss | -0.000797   |
|    std                  | 0.234       |
|    value_loss           | 0.0377      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.42        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 324         |
|    iterations           | 21          |
|    time_elapsed         | 132         |
|    total_timesteps      | 43008       |
| train/                  |             |
|    approx_kl            | 0.020596012 |
|    clip_fraction        | 0.225       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0914      |
|    explained_variance   | 0.00233     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0385      |
|    n_updates            | 7060        |
|    policy_gradient_loss | 0.00924     |
|    std                  | 0.231       |
|    value_loss           | 488         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.2         |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 323         |
|    iterations           | 30          |
|    time_elapsed         | 189         |
|    total_timesteps      | 61440       |
| train/                  |             |
|    approx_kl            | 0.023869053 |
|    clip_fraction        | 0.226       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.126       |
|    explained_variance   | -130        |
|    learning_rate        | 0.0003      |
|    loss                 | -0.011      |
|    n_updates            | 7150        |
|    policy_gradient_loss | 0.00135     |
|    std                  | 0.228       |
|    value_loss           | 0.0427      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.31        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 321         |
|    iterations           | 39          |
|    time_elapsed         | 248         |
|    total_timesteps      | 79872       |
| train/                  |             |
|    approx_kl            | 0.041562498 |
|    clip_fraction        | 0.233       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0161      |
|    explained_variance   | -343        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0911      |
|    n_updates            | 7240        |
|    policy_gradient_loss | 0.00514     |
|    std                  | 0.241       |
|    value_loss           | 0.147       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.19        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 321         |
|    iterations           | 48          |
|    time_elapsed         | 305         |
|    total_timesteps      | 98304       |
| train/                  |             |
|    approx_kl            | 0.024730718 |
|    clip_fraction        | 0.236       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.001       |
|    explained_variance   | 0.00225     |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0472     |
|    n_updates            | 7330        |
|    policy_gradient_loss | 0.0117      |
|    std                  | 0.244       |
|    value_loss           | 487         |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.16        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 397         |
|    iterations           | 9           |
|    time_elapsed         | 46          |
|    total_timesteps      | 18432       |
| train/                  |             |
|    approx_kl            | 0.028509516 |
|    clip_fraction        | 0.182       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.143       |
|    explained_variance   | -22.7       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00693     |
|    n_updates            | 6940        |
|    policy_gradient_loss | 0.00479     |
|    std                  | 0.224       |
|    value_loss           | 0.0037      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.12       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 381        |
|    iterations           | 18         |
|    time_elapsed         | 96         |
|    total_timesteps      | 36864      |
| train/                  |            |
|    approx_kl            | 0.03110605 |
|    clip_fraction        | 0.221      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.108      |
|    explained_variance   | 0.000157   |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0663     |
|    n_updates            | 7030       |
|    policy_gradient_loss | 0.0024     |
|    std                  | 0.23       |
|    value_loss           | 1.3e+03    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.35        |
|    ep_rew_mean          | -9.01       |
| time/                   |             |
|    fps                  | 364         |
|    iterations           | 27          |
|    time_elapsed         | 151         |
|    total_timesteps      | 55296       |
| train/                  |             |
|    approx_kl            | 0.026739744 |
|    clip_fraction        | 0.219       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.108       |
|    explained_variance   | -58.7       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0145     |
|    n_updates            | 7120        |
|    policy_gradient_loss | 0.00295     |
|    std                  | 0.229       |
|    value_loss           | 0.00998     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.09        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 354         |
|    iterations           | 36          |
|    time_elapsed         | 208         |
|    total_timesteps      | 73728       |
| train/                  |             |
|    approx_kl            | 0.022573065 |
|    clip_fraction        | 0.179       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0608      |
|    explained_variance   | -21.2       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00625     |
|    n_updates            | 7210        |
|    policy_gradient_loss | 0.000148    |
|    std                  | 0.236       |
|    value_loss           | 0.00533     |
-----------------------------------------
------------------------------------------
| rollout/                |      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.2         |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 352         |
|    iterations           | 45          |
|    time_elapsed         | 261         |
|    total_timesteps      | 92160       |
| train/                  |             |
|    approx_kl            | 0.047828168 |
|    clip_fraction        | 0.229       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0853      |
|    explained_variance   | -20.2       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00604     |
|    n_updates            | 7300        |
|    policy_gradient_loss | 0.0167      |
|    std                  | 0.233       |
|    value_loss           | 0.00374     |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.47        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 430         |
|    iterations           | 6           |
|    time_elapsed         | 28          |
|    total_timesteps      | 12288       |
| train/                  |             |
|    approx_kl            | 0.036565084 |
|    clip_fraction        | 0.233       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.144       |
|    explained_variance   | 0.000125    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0404      |
|    n_updates            | 6910        |
|    policy_gradient_loss | 0.0213      |
|    std                  | 0.225       |
|    value_loss           | 489         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.17        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 377         |
|    iterations           | 15          |
|    time_elapsed         | 81          |
|    total_timesteps      | 30720       |
| train/                  |             |
|    approx_kl            | 0.022794968 |
|    clip_fraction        | 0.199       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.129       |
|    explained_variance   | -8.95       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0332      |
|    n_updates            | 7000        |
|    policy_gradient_loss | 0.00471     |
|    std                  | 0.227       |
|    value_loss           | 0.00479     |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.24       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 370        |
|    iterations           | 24         |
|    time_elapsed         | 132        |
|    total_timesteps      | 49152      |
| train/                  |            |
|    approx_kl            | 0.02523032 |
|    clip_fraction        | 0.217      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.106      |
|    explained_variance   | -130       |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0145    |
|    n_updates            | 7090       |
|    policy_gradient_loss | 0.00769    |
|    std                  | 0.23       |
|    value_loss           | 0.147      |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.34      |
|    ep_rew_mean          | 1         |
| time/                   |           |
|    fps                  | 367       |
|    iterations           | 33        |
|    time_elapsed         | 184       |
|    total_timesteps      | 67584     |
| train/                  |           |
|    approx_kl            | 0.0577729 |
|    clip_fraction        | 0.275     |
|    clip_range           | 0.2       |
|    entropy_loss         | 0.132     |
|    explained_variance   | -110      |
|    learning_rate        | 0.0003    |
|    loss                 | 0.0127    |
|    n_updates            | 7180      |
|    policy_gradient_loss | 0.00834   |
|    std                  | 0.227     |
|    value_loss           | 0.0122    |
---------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.24      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.06        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 350         |
|    iterations           | 43          |
|    time_elapsed         | 251         |
|    total_timesteps      | 88064       |
| train/                  |             |
|    approx_kl            | 0.022892784 |
|    clip_fraction        | 0.213       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.124       |
|    explained_variance   | -197        |
|    learning_rate        | 0.0003      |
|    loss                 | -0.029      |
|    n_updates            | 7280        |
|    policy_gradient_loss | 0.00781     |
|    std                  | 0.226       |
|    value_loss           | 0.0164      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

In [26]:
eval_episode_length=100000
training_episode_length=100000

In [39]:
env.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7f8a37e4f4b0>

In [40]:
# Performance of models on focus polytopes only?
for p in [0.0, 0.05, 0.1, 0.5, 0.9, 1.0]:
    results_overall[p]=[]
    results_polys[p]=[]
    model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-100000-"+str(p))
    model.set_env(env)
    print("p=",p)
    
    print("Overall:")
    env.init_polytopes(1.0,[])
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_overall[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
    
    print("Focus Polytopes:")
    env.init_polytopes(0.0,retrain_polytopes)
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_polys[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
Overall:
mean_reward:0.94 +/- 7.75
Focus Polytopes:
mean_reward:0.58 +/- 20.51
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.05
Overall:
mean_reward:0.97 +/- 5.48
Focus Polytopes:
mean_reward:0.58 +/- 20.51
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.1
Overall:
mean_reward:0.94 +/- 7.75
Focus Polytopes:
mean_reward:0.72 +/- 16.75
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.5
Overall:
mean_reward:0.98 +/- 4.48
Focus Polytopes:
mean_reward:0.80 +/- 14.15
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.9
Overall:
mean_reward:0.98 +/- 4.48
Focus Polytopes:
mean_reward:0.48 +/- 22.82
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 1.0
Overall:
mean_reward:0.97 +/- 5.48
Focus Polytopes:
mean_reward:-0.29 +/- 35.93


In [28]:
results_overall = {0.:[(0.94,7.75)],0.05:[(0.97,5.48)],0.1:[(0.94,7.75)],0.5:[(0.98,4.48)],0.9:[(0.97,5.48)],1.0:[(0.97,5.48)]}
results_polys = {0.:[(0.58,20.51)],0.05:[(0.58,20.51)],0.1:[(0.72,16.75)],0.5:[(0.8,14.15)],0.9:[(0.48,22.82)],1.0:[(-0.29,35.93)]}

In [43]:
env.seed(43)
torch.manual_seed(43)

<torch._C.Generator at 0x7f8a37e4f4b0>

In [44]:
for p in [0.0, 0.05, 0.1, 0.5, 0.9, 1.0]:
    model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-100000-"+str(p))
    model.set_env(env)
    
    print("p=",p)

    env.init_polytopes(p,retrain_polytopes)
    start_time = time.time()
    model=model.learn(total_timesteps=training_episode_length)
    print("--- %s seconds ---" % (time.time() - start_time))

    model.save("model_backup/zeppelin-avoidance-windsystem-small2-1400000-200000-"+str(p))

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.96     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    fps             | 330      |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.22        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 279         |
|    iterations           | 2           |
|    time_elapsed         | 14          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.028366089 |
|    clip_fraction        | 0.232       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.17        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 218         |
|    iterations           | 11          |
|    time_elapsed         | 103         |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.016817104 |
|    clip_fraction        | 0.193       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0688     |
|    explained_variance   | 0.00316     |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0171     |
|    n_updates            | 7450        |
|    policy_gradient_loss | 0.00404     |
|    std                  | 0.254       |
|    value_loss           | 487         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.05        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 224         |
|    iterations           | 20          |
|    time_elapsed         | 182         |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.019194994 |
|    clip_fraction        | 0.204       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.00611    |
|    explained_variance   | -115        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.000295    |
|    n_updates            | 7540        |
|    policy_gradient_loss | 0.00501     |
|    std                  | 0.241       |
|    value_loss           | 0.052       |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.13       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 218        |
|    iterations           | 29         |
|    time_elapsed         | 271        |
|    total_timesteps      | 59392      |
| train/                  |            |
|    approx_kl            | 0.02382049 |
|    clip_fraction        | 0.226      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.0424     |
|    explained_variance   | -201       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.00492    |
|    n_updates            | 7630       |
|    policy_gradient_loss | 0.012      |
|    std                  | 0.238      |
|    value_loss           | 0.0223     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.23       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 221        |
|    iterations           | 38         |
|    time_elapsed         | 351        |
|    total_timesteps      | 77824      |
| train/                  |            |
|    approx_kl            | 0.02809542 |
|    clip_fraction        | 0.225      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.0572     |
|    explained_variance   | 0.00162    |
|    learning_rate        | 0.0003     |
|    loss                 | 0.00751    |
|    n_updates            | 7720       |
|    policy_gradient_loss | 0.00332    |
|    std                  | 0.236      |
|    value_loss           | 920        |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.08        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 222         |
|    iterations           | 47          |
|    time_elapsed         | 433         |
|    total_timesteps      | 96256       |
| train/                  |             |
|    approx_kl            | 0.023829147 |
|    clip_fraction        | 0.234       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0803      |
|    explained_variance   | -9.31e-05   |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0162     |
|    n_updates            | 7810        |
|    policy_gradient_loss | 0.00532     |
|    std                  | 0.234       |
|    value_loss           | 489         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.21       |
|    ep_rew_mean          | -9.01      |
| time/                   |            |
|    fps                  | 247        |
|    iterations           | 8          |
|    time_elapsed         | 66         |
|    total_timesteps      | 16384      |
| train/                  |            |
|    approx_kl            | 0.01727083 |
|    clip_fraction        | 0.211      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.0927    |
|    explained_variance   | 0.000817   |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0694     |
|    n_updates            | 7420       |
|    policy_gradient_loss | 0.00325    |
|    std                  | 0.254      |
|    value_loss           | 2.76e+03   |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.23        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 236         |
|    iterations           | 17          |
|    time_elapsed         | 147         |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.020044733 |
|    clip_fraction        | 0.195       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0342     |
|    explained_variance   | 0.00166     |
|    learning_rate        | 0.0003      |
|    loss                 | 7.79e+03    |
|    n_updates            | 7510        |
|    policy_gradient_loss | 0.00251     |
|    std                  | 0.248       |
|    value_loss           | 488         |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.29        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 235         |
|    iterations           | 26          |
|    time_elapsed         | 226         |
|    total_timesteps      | 53248       |
| train/                  |             |
|    approx_kl            | 0.016432881 |
|    clip_fraction        | 0.187       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0251     |
|    explained_variance   | -0.000357   |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0063      |
|    n_updates            | 7600        |
|    policy_gradient_loss | 0.00164     |
|    std                  | 0.246       |
|    value_loss           | 489         |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.12        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 229         |
|    iterations           | 35          |
|    time_elapsed         | 312         |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.026226133 |
|    clip_fraction        | 0.279       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.000531    |
|    explained_variance   | 0.00403     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0667      |
|    n_updates            | 7690        |
|    policy_gradient_loss | 0.0106      |
|    std                  | 0.243       |
|    value_loss           | 487         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.3         |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 223         |
|    iterations           | 44          |
|    time_elapsed         | 402         |
|    total_timesteps      | 90112       |
| train/                  |             |
|    approx_kl            | 0.027240647 |
|    clip_fraction        | 0.218       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0556     |
|    explained_variance   | 0.00195     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00492     |
|    n_updates            | 7780        |
|    policy_gradient_loss | -0.00183    |
|    std                  | 0.25        |
|    value_loss           | 487         |
-----------------------------------------
---------------------------------------
| rollout/                |         

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 2.13     |
|    ep_rew_mean          | 1        |
| time/                   |          |
|    fps                  | 254      |
|    iterations           | 5        |
|    time_elapsed         | 40       |
|    total_timesteps      | 10240    |
| train/                  |          |
|    approx_kl            | 0.535234 |
|    clip_fraction        | 0.294    |
|    clip_range           | 0.2      |
|    entropy_loss         | 0.234    |
|    explained_variance   | 0.00228  |
|    learning_rate        | 0.0003   |
|    loss                 | -0.0225  |
|    n_updates            | 7390     |
|    policy_gradient_loss | 0.00759  |
|    std                  | 0.215    |
|    value_loss           | 487      |
--------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.99        |
|    ep_rew_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.09        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 239         |
|    iterations           | 15          |
|    time_elapsed         | 128         |
|    total_timesteps      | 30720       |
| train/                  |             |
|    approx_kl            | 0.019494655 |
|    clip_fraction        | 0.21        |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.282       |
|    explained_variance   | -70.4       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0187      |
|    n_updates            | 7490        |
|    policy_gradient_loss | 0.00296     |
|    std                  | 0.208       |
|    value_loss           | 0.0231      |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.23        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 236         |
|    iterations           | 24          |
|    time_elapsed         | 207         |
|    total_timesteps      | 49152       |
| train/                  |             |
|    approx_kl            | 0.022598188 |
|    clip_fraction        | 0.242       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.327       |
|    explained_variance   | 0.00186     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0724      |
|    n_updates            | 7580        |
|    policy_gradient_loss | 0.00936     |
|    std                  | 0.205       |
|    value_loss           | 975         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.23        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 221         |
|    iterations           | 33          |
|    time_elapsed         | 305         |
|    total_timesteps      | 67584       |
| train/                  |             |
|    approx_kl            | 0.019804705 |
|    clip_fraction        | 0.216       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.308       |
|    explained_variance   | -92.5       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0121      |
|    n_updates            | 7670        |
|    policy_gradient_loss | 0.00437     |
|    std                  | 0.206       |
|    value_loss           | 0.0572      |
-----------------------------------------
---------------------------------------
| rollout/                |         

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.16       |
|    ep_rew_mean          | -9.01      |
| time/                   |            |
|    fps                  | 219        |
|    iterations           | 42         |
|    time_elapsed         | 391        |
|    total_timesteps      | 86016      |
| train/                  |            |
|    approx_kl            | 0.11411814 |
|    clip_fraction        | 0.273      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.294      |
|    explained_variance   | 0.000675   |
|    learning_rate        | 0.0003     |
|    loss                 | 0.000248   |
|    n_updates            | 7760       |
|    policy_gradient_loss | -0.000156  |
|    std                  | 0.209      |
|    value_loss           | 1.47e+03   |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.25        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 303         |
|    iterations           | 3           |
|    time_elapsed         | 20          |
|    total_timesteps      | 6144        |
| train/                  |             |
|    approx_kl            | 0.020738488 |
|    clip_fraction        | 0.226       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.00476     |
|    explained_variance   | 0.00223     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00755     |
|    n_updates            | 7370        |
|    policy_gradient_loss | 0.0013      |
|    std                  | 0.243       |
|    value_loss           | 488         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.19        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 273         |
|    iterations           | 12          |
|    time_elapsed         | 89          |
|    total_timesteps      | 24576       |
| train/                  |             |
|    approx_kl            | 0.033358473 |
|    clip_fraction        | 0.19        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0188     |
|    explained_variance   | -24         |
|    learning_rate        | 0.0003      |
|    loss                 | 0.135       |
|    n_updates            | 7460        |
|    policy_gradient_loss | 0.00941     |
|    std                  | 0.245       |
|    value_loss           | 0.00458     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.13        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 256         |
|    iterations           | 21          |
|    time_elapsed         | 167         |
|    total_timesteps      | 43008       |
| train/                  |             |
|    approx_kl            | 0.025220346 |
|    clip_fraction        | 0.229       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.00753    |
|    explained_variance   | -63.3       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00365    |
|    n_updates            | 7550        |
|    policy_gradient_loss | 0.00271     |
|    std                  | 0.245       |
|    value_loss           | 0.012       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.41        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 251         |
|    iterations           | 30          |
|    time_elapsed         | 244         |
|    total_timesteps      | 61440       |
| train/                  |             |
|    approx_kl            | 0.033645876 |
|    clip_fraction        | 0.242       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0494      |
|    explained_variance   | -21.7       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0062      |
|    n_updates            | 7640        |
|    policy_gradient_loss | 0.00734     |
|    std                  | 0.235       |
|    value_loss           | 0.00465     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.64        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 255         |
|    iterations           | 39          |
|    time_elapsed         | 312         |
|    total_timesteps      | 79872       |
| train/                  |             |
|    approx_kl            | 0.017893407 |
|    clip_fraction        | 0.227       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0629      |
|    explained_variance   | -31.3       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00536     |
|    n_updates            | 7730        |
|    policy_gradient_loss | 0.0059      |
|    std                  | 0.236       |
|    value_loss           | 0.00643     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.05       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 259        |
|    iterations           | 48         |
|    time_elapsed         | 379        |
|    total_timesteps      | 98304      |
| train/                  |            |
|    approx_kl            | 0.02279354 |
|    clip_fraction        | 0.218      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.0745     |
|    explained_variance   | -182       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.015      |
|    n_updates            | 7820       |
|    policy_gradient_loss | 0.00183    |
|    std                  | 0.232      |
|    value_loss           | 0.0162     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.18        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 348         |
|    iterations           | 9           |
|    time_elapsed         | 52          |
|    total_timesteps      | 18432       |
| train/                  |             |
|    approx_kl            | 0.012390117 |
|    clip_fraction        | 0.171       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.112       |
|    explained_variance   | 0.000763    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0604      |
|    n_updates            | 7430        |
|    policy_gradient_loss | 0.00549     |
|    std                  | 0.229       |
|    value_loss           | 918         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.37        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 344         |
|    iterations           | 18          |
|    time_elapsed         | 106         |
|    total_timesteps      | 36864       |
| train/                  |             |
|    approx_kl            | 0.017974539 |
|    clip_fraction        | 0.209       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.158       |
|    explained_variance   | 0.00105     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.013       |
|    n_updates            | 7520        |
|    policy_gradient_loss | 0.0109      |
|    std                  | 0.225       |
|    value_loss           | 488         |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.99        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 340         |
|    iterations           | 27          |
|    time_elapsed         | 162         |
|    total_timesteps      | 55296       |
| train/                  |             |
|    approx_kl            | 0.023683421 |
|    clip_fraction        | 0.252       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.17        |
|    explained_variance   | -148        |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00506    |
|    n_updates            | 7610        |
|    policy_gradient_loss | 0.0182      |
|    std                  | 0.223       |
|    value_loss           | 0.00774     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.17       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 342        |
|    iterations           | 36         |
|    time_elapsed         | 215        |
|    total_timesteps      | 73728      |
| train/                  |            |
|    approx_kl            | 0.03275171 |
|    clip_fraction        | 0.262      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.18       |
|    explained_variance   | -46.2      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.00104   |
|    n_updates            | 7700       |
|    policy_gradient_loss | 0.00215    |
|    std                  | 0.221      |
|    value_loss           | 0.0111     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.25       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 340        |
|    iterations           | 45         |
|    time_elapsed         | 270        |
|    total_timesteps      | 92160      |
| train/                  |            |
|    approx_kl            | 0.01996305 |
|    clip_fraction        | 0.238      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.188      |
|    explained_variance   | -58.6      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0483     |
|    n_updates            | 7790       |
|    policy_gradient_loss | 0.00836    |
|    std                  | 0.219      |
|    value_loss           | 0.0179     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.12        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 388         |
|    iterations           | 6           |
|    time_elapsed         | 31          |
|    total_timesteps      | 12288       |
| train/                  |             |
|    approx_kl            | 0.028850378 |
|    clip_fraction        | 0.236       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.122       |
|    explained_variance   | -30.7       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0183      |
|    n_updates            | 7400        |
|    policy_gradient_loss | 0.0144      |
|    std                  | 0.228       |
|    value_loss           | 0.00505     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.27       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 343        |
|    iterations           | 15         |
|    time_elapsed         | 89         |
|    total_timesteps      | 30720      |
| train/                  |            |
|    approx_kl            | 0.02317613 |
|    clip_fraction        | 0.213      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.0443     |
|    explained_variance   | -10.5      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.00192    |
|    n_updates            | 7490       |
|    policy_gradient_loss | 0.00482    |
|    std                  | 0.236      |
|    value_loss           | 0.0044     |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.18        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 341         |
|    iterations           | 24          |
|    time_elapsed         | 143         |
|    total_timesteps      | 49152       |
| train/                  |             |
|    approx_kl            | 0.027266366 |
|    clip_fraction        | 0.227       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0428      |
|    explained_variance   | -7.19       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0353      |
|    n_updates            | 7580        |
|    policy_gradient_loss | 0.00997     |
|    std                  | 0.237       |
|    value_loss           | 0.0058      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.21        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 355         |
|    iterations           | 33          |
|    time_elapsed         | 190         |
|    total_timesteps      | 67584       |
| train/                  |             |
|    approx_kl            | 0.042420946 |
|    clip_fraction        | 0.233       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0846      |
|    explained_variance   | -46.3       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0631      |
|    n_updates            | 7670        |
|    policy_gradient_loss | 0.00354     |
|    std                  | 0.232       |
|    value_loss           | 0.0775      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.14        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 363         |
|    iterations           | 42          |
|    time_elapsed         | 236         |
|    total_timesteps      | 86016       |
| train/                  |             |
|    approx_kl            | 0.019846363 |
|    clip_fraction        | 0.235       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0651      |
|    explained_variance   | -551        |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00655    |
|    n_updates            | 7760        |
|    policy_gradient_loss | -0.000486   |
|    std                  | 0.234       |
|    value_loss           | 0.0273      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

In [27]:
env.seed(1997)
torch.manual_seed(1997)

<torch._C.Generator at 0x7f53902104b0>

In [29]:
# Performance of models on focus polytopes only?
for p in [0.0, 0.05, 0.1, 0.5, 0.9, 1.0]:
    model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-200000-"+str(p))
    model.set_env(env)
    print("p=",p)
    
    print("Overall:")
    env.init_polytopes(1.0,[])
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_overall[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
    
    print("Focus Polytopes:")
    env.init_polytopes(0.0,retrain_polytopes)
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_polys[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
Overall:
mean_reward:0.98 +/- 4.48
Focus Polytopes:
mean_reward:0.88 +/- 10.96
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.05
Overall:
mean_reward:0.68 +/- 17.90
Focus Polytopes:
mean_reward:0.78 +/- 14.85
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.1
Overall:
mean_reward:0.87 +/- 11.41
Focus Polytopes:
mean_reward:0.78 +/- 14.85
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.5
Overall:
mean_reward:0.96 +/- 6.33
Focus Polytopes:
mean_reward:0.81 +/- 13.80
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.9
Overall:
mean_reward:0.97 +/- 5.48
Focus Polytopes:
mean_reward:0.41 +/- 24.31
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 1.0
Overall:
mean_reward:0.95 +/- 7.08
Focus Polytopes:
mean_reward:0.66 +/- 18.45


In [30]:
for p in [0.0, 0.05, 0.1, 0.5, 0.9, 1.0]:
    model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-200000-"+str(p))
    model.set_env(env)
    
    print("p=",p)

    env.init_polytopes(p,retrain_polytopes)
    start_time = time.time()
    model=model.learn(total_timesteps=training_episode_length)
    print("--- %s seconds ---" % (time.time() - start_time))

    model.save("model_backup/zeppelin-avoidance-windsystem-small2-1400000-300000-"+str(p))

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.36     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    fps             | 331      |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.09        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 285         |
|    iterations           | 2           |
|    time_elapsed         | 14          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.029406453 |
|    clip_fraction        | 0.267       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.14        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 252         |
|    iterations           | 11          |
|    time_elapsed         | 89          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.021458711 |
|    clip_fraction        | 0.203       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0373     |
|    explained_variance   | -44.4       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0266      |
|    n_updates            | 7940        |
|    policy_gradient_loss | 0.00509     |
|    std                  | 0.249       |
|    value_loss           | 0.00513     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.14        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 251         |
|    iterations           | 20          |
|    time_elapsed         | 162         |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.029649748 |
|    clip_fraction        | 0.244       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.124      |
|    explained_variance   | 0.000557    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00854     |
|    n_updates            | 8030        |
|    policy_gradient_loss | 0.00273     |
|    std                  | 0.258       |
|    value_loss           | 489         |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.03        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 251         |
|    iterations           | 29          |
|    time_elapsed         | 236         |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.020380562 |
|    clip_fraction        | 0.233       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0651     |
|    explained_variance   | -39.5       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0166      |
|    n_updates            | 8120        |
|    policy_gradient_loss | 0.00691     |
|    std                  | 0.252       |
|    value_loss           | 0.00636     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.18        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 251         |
|    iterations           | 38          |
|    time_elapsed         | 310         |
|    total_timesteps      | 77824       |
| train/                  |             |
|    approx_kl            | 0.023252608 |
|    clip_fraction        | 0.208       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0284     |
|    explained_variance   | -77.3       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.004       |
|    n_updates            | 8210        |
|    policy_gradient_loss | 0.00534     |
|    std                  | 0.245       |
|    value_loss           | 0.00678     |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.09        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 250         |
|    iterations           | 47          |
|    time_elapsed         | 384         |
|    total_timesteps      | 96256       |
| train/                  |             |
|    approx_kl            | 0.011802739 |
|    clip_fraction        | 0.171       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0558      |
|    explained_variance   | -34.3       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00681    |
|    n_updates            | 8300        |
|    policy_gradient_loss | 0.00923     |
|    std                  | 0.236       |
|    value_loss           | 0.00656     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.04        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 260         |
|    iterations           | 8           |
|    time_elapsed         | 62          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.019889459 |
|    clip_fraction        | 0.201       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0658     |
|    explained_variance   | 0.00174     |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00241    |
|    n_updates            | 7910        |
|    policy_gradient_loss | 0.00183     |
|    std                  | 0.249       |
|    value_loss           | 488         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.13       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 255        |
|    iterations           | 17         |
|    time_elapsed         | 136        |
|    total_timesteps      | 34816      |
| train/                  |            |
|    approx_kl            | 0.02044245 |
|    clip_fraction        | 0.222      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.0777    |
|    explained_variance   | -170       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.00292    |
|    n_updates            | 8000       |
|    policy_gradient_loss | 0.00123    |
|    std                  | 0.251      |
|    value_loss           | 0.0119     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.31        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 254         |
|    iterations           | 26          |
|    time_elapsed         | 209         |
|    total_timesteps      | 53248       |
| train/                  |             |
|    approx_kl            | 0.021387551 |
|    clip_fraction        | 0.192       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.186      |
|    explained_variance   | 0.000137    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0213      |
|    n_updates            | 8090        |
|    policy_gradient_loss | 0.00213     |
|    std                  | 0.265       |
|    value_loss           | 489         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.19        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 253         |
|    iterations           | 35          |
|    time_elapsed         | 282         |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.015469507 |
|    clip_fraction        | 0.204       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.181      |
|    explained_variance   | -33.2       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00591     |
|    n_updates            | 8180        |
|    policy_gradient_loss | 0.00195     |
|    std                  | 0.266       |
|    value_loss           | 0.00529     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.05        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 253         |
|    iterations           | 44          |
|    time_elapsed         | 356         |
|    total_timesteps      | 90112       |
| train/                  |             |
|    approx_kl            | 0.018903738 |
|    clip_fraction        | 0.207       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.202      |
|    explained_variance   | -78.4       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.019       |
|    n_updates            | 8270        |
|    policy_gradient_loss | 0.00491     |
|    std                  | 0.268       |
|    value_loss           | 0.0682      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.26        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 270         |
|    iterations           | 5           |
|    time_elapsed         | 37          |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.049295895 |
|    clip_fraction        | 0.244       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.281       |
|    explained_variance   | -134        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0226      |
|    n_updates            | 7880        |
|    policy_gradient_loss | 0.00473     |
|    std                  | 0.209       |
|    value_loss           | 0.0183      |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.3         |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 261         |
|    iterations           | 14          |
|    time_elapsed         | 109         |
|    total_timesteps      | 28672       |
| train/                  |             |
|    approx_kl            | 0.027080508 |
|    clip_fraction        | 0.251       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.286       |
|    explained_variance   | 0.000243    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0186      |
|    n_updates            | 7970        |
|    policy_gradient_loss | 0.00464     |
|    std                  | 0.209       |
|    value_loss           | 1.47e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.11        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 258         |
|    iterations           | 23          |
|    time_elapsed         | 182         |
|    total_timesteps      | 47104       |
| train/                  |             |
|    approx_kl            | 0.051248025 |
|    clip_fraction        | 0.274       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.315       |
|    explained_variance   | -73.1       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00159     |
|    n_updates            | 8060        |
|    policy_gradient_loss | 0.0121      |
|    std                  | 0.207       |
|    value_loss           | 0.0294      |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.36        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 259         |
|    iterations           | 32          |
|    time_elapsed         | 252         |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.022972595 |
|    clip_fraction        | 0.218       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.38        |
|    explained_variance   | 0.00155     |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00621    |
|    n_updates            | 8150        |
|    policy_gradient_loss | 0.00511     |
|    std                  | 0.2         |
|    value_loss           | 488         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.01        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 258         |
|    iterations           | 41          |
|    time_elapsed         | 324         |
|    total_timesteps      | 83968       |
| train/                  |             |
|    approx_kl            | 0.032630708 |
|    clip_fraction        | 0.26        |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.454       |
|    explained_variance   | 8.36e-05    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00166     |
|    n_updates            | 8240        |
|    policy_gradient_loss | 0.0147      |
|    std                  | 0.193       |
|    value_loss           | 489         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

--- 390.3345890045166 seconds ---
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.5
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.32     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    fps             | 418      |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.14        |
|    ep_rew_mean          | -9.01       |
| time/                   |             |
|    fps                  | 351         |
|    iterations           | 2           |
|    time_elapsed         | 11          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.057798415 |
|    clip_fraction        | 0.219       |
|    clip_range           | 0.2       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.05       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 308        |
|    iterations           | 11         |
|    time_elapsed         | 73         |
|    total_timesteps      | 22528      |
| train/                  |            |
|    approx_kl            | 0.02424808 |
|    clip_fraction        | 0.214      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.0843     |
|    explained_variance   | -39.6      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.00378    |
|    n_updates            | 7940       |
|    policy_gradient_loss | 0.00381    |
|    std                  | 0.231      |
|    value_loss           | 0.006      |
----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.06        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 304         |
|    iterations           | 20          |
|    time_elapsed         | 134         |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.021523118 |
|    clip_fraction        | 0.235       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0601      |
|    explained_variance   | -28.3       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00891     |
|    n_updates            | 8030        |
|    policy_gradient_loss | 0.00699     |
|    std                  | 0.234       |
|    value_loss           | 0.00389     |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.99        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 302         |
|    iterations           | 29          |
|    time_elapsed         | 196         |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.019072138 |
|    clip_fraction        | 0.191       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.061       |
|    explained_variance   | 2.4e-05     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.018       |
|    n_updates            | 8120        |
|    policy_gradient_loss | 0.0105      |
|    std                  | 0.234       |
|    value_loss           | 489         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.07       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 301        |
|    iterations           | 38         |
|    time_elapsed         | 257        |
|    total_timesteps      | 77824      |
| train/                  |            |
|    approx_kl            | 0.03786175 |
|    clip_fraction        | 0.23       |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.0914     |
|    explained_variance   | 0.000811   |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0192     |
|    n_updates            | 8210       |
|    policy_gradient_loss | 0.0122     |
|    std                  | 0.232      |
|    value_loss           | 489        |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.05        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 301         |
|    iterations           | 47          |
|    time_elapsed         | 319         |
|    total_timesteps      | 96256       |
| train/                  |             |
|    approx_kl            | 0.017684974 |
|    clip_fraction        | 0.245       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.152       |
|    explained_variance   | 4.43e-05    |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0185     |
|    n_updates            | 8300        |
|    policy_gradient_loss | 0.0124      |
|    std                  | 0.224       |
|    value_loss           | 489         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.11        |
|    ep_rew_mean          | -9.01       |
| time/                   |             |
|    fps                  | 367         |
|    iterations           | 8           |
|    time_elapsed         | 44          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.021269208 |
|    clip_fraction        | 0.189       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.215       |
|    explained_variance   | 3.17e-05    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0151      |
|    n_updates            | 7910        |
|    policy_gradient_loss | 0.00104     |
|    std                  | 0.218       |
|    value_loss           | 921         |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.34        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 361         |
|    iterations           | 17          |
|    time_elapsed         | 96          |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.014668261 |
|    clip_fraction        | 0.175       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.122       |
|    explained_variance   | -52.8       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00553    |
|    n_updates            | 8000        |
|    policy_gradient_loss | 0.0106      |
|    std                  | 0.228       |
|    value_loss           | 0.00963     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.97       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 358        |
|    iterations           | 26         |
|    time_elapsed         | 148        |
|    total_timesteps      | 53248      |
| train/                  |            |
|    approx_kl            | 0.02087953 |
|    clip_fraction        | 0.239      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.0926     |
|    explained_variance   | -27.9      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.000947   |
|    n_updates            | 8090       |
|    policy_gradient_loss | 0.00362    |
|    std                  | 0.233      |
|    value_loss           | 0.00662    |
----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.12        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 357         |
|    iterations           | 35          |
|    time_elapsed         | 200         |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.021048427 |
|    clip_fraction        | 0.176       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0975      |
|    explained_variance   | -32.4       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0451     |
|    n_updates            | 8180        |
|    policy_gradient_loss | 0.00756     |
|    std                  | 0.23        |
|    value_loss           | 0.00445     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.26       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 357        |
|    iterations           | 44         |
|    time_elapsed         | 252        |
|    total_timesteps      | 90112      |
| train/                  |            |
|    approx_kl            | 0.02173008 |
|    clip_fraction        | 0.213      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.15       |
|    explained_variance   | -17.5      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.07       |
|    n_updates            | 8270       |
|    policy_gradient_loss | 0.00334    |
|    std                  | 0.224      |
|    value_loss           | 0.00614    |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2           |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 401         |
|    iterations           | 5           |
|    time_elapsed         | 25          |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.035775896 |
|    clip_fraction        | 0.246       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0447     |
|    explained_variance   | -19.7       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.023       |
|    n_updates            | 7880        |
|    policy_gradient_loss | 0.0132      |
|    std                  | 0.249       |
|    value_loss           | 0.0029      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.34        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 382         |
|    iterations           | 14          |
|    time_elapsed         | 75          |
|    total_timesteps      | 28672       |
| train/                  |             |
|    approx_kl            | 0.018172607 |
|    clip_fraction        | 0.208       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0869     |
|    explained_variance   | -11.6       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0257      |
|    n_updates            | 7970        |
|    policy_gradient_loss | -0.00259    |
|    std                  | 0.252       |
|    value_loss           | 0.00551     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.51        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 377         |
|    iterations           | 23          |
|    time_elapsed         | 124         |
|    total_timesteps      | 47104       |
| train/                  |             |
|    approx_kl            | 0.048064336 |
|    clip_fraction        | 0.195       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0723     |
|    explained_variance   | -21.7       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00582     |
|    n_updates            | 8060        |
|    policy_gradient_loss | 0.00287     |
|    std                  | 0.25        |
|    value_loss           | 0.00304     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.21       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 376        |
|    iterations           | 32         |
|    time_elapsed         | 174        |
|    total_timesteps      | 65536      |
| train/                  |            |
|    approx_kl            | 0.01799675 |
|    clip_fraction        | 0.183      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.0711    |
|    explained_variance   | -106       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.00392    |
|    n_updates            | 8150       |
|    policy_gradient_loss | 0.00441    |
|    std                  | 0.253      |
|    value_loss           | 0.0159     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.12        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 375         |
|    iterations           | 41          |
|    time_elapsed         | 223         |
|    total_timesteps      | 83968       |
| train/                  |             |
|    approx_kl            | 0.025798582 |
|    clip_fraction        | 0.205       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0706     |
|    explained_variance   | -190        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0383      |
|    n_updates            | 8240        |
|    policy_gradient_loss | 0.0119      |
|    std                  | 0.25        |
|    value_loss           | 0.0085      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

--- 270.33099722862244 seconds ---


In [31]:
# Performance of models on focus polytopes only?
for p in [0.0, 0.05, 0.1, 0.5, 0.9, 1.0]:
    model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-300000-"+str(p))
    model.set_env(env)
    print("p=",p)
    
    print("Overall:")
    env.init_polytopes(1.0,[])
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_overall[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
    
    print("Focus Polytopes:")
    env.init_polytopes(0.0,retrain_polytopes)
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_polys[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
Overall:




mean_reward:0.94 +/- 7.75
Focus Polytopes:
mean_reward:0.90 +/- 10.01
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.05
Overall:
mean_reward:0.68 +/- 17.90
Focus Polytopes:
mean_reward:-0.83 +/- 42.78
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.1
Overall:
mean_reward:0.98 +/- 4.48
Focus Polytopes:
mean_reward:0.85 +/- 12.26
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.5
Overall:
mean_reward:0.96 +/- 6.33
Focus Polytopes:
mean_reward:0.90 +/- 10.01
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.9
Overall:
mean_reward:0.99 +/- 3.17
Focus Polytopes:
mean_reward:0.60 +/- 20.02
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 1.0
Overall:
mean_reward:0.97 +/- 5.48
Focus Polytopes:
mean_reward:0.86 +/- 11.84


In [32]:
for p in [0.0, 0.05, 0.1, 0.5, 0.9, 1.0]:
    model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-300000-"+str(p))
    model.set_env(env)
    
    print("p=",p)

    env.init_polytopes(p,retrain_polytopes)
    start_time = time.time()
    model=model.learn(total_timesteps=training_episode_length)
    print("--- %s seconds ---" % (time.time() - start_time))

    model.save("model_backup/zeppelin-avoidance-windsystem-small2-1400000-400000-"+str(p))

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.12     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    fps             | 337      |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 2048     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.19       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 285        |
|    iterations           | 2          |
|    time_elapsed         | 14         |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.02039217 |
|    clip_fraction        | 0.21       |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.0368     |
| 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.15        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 255         |
|    iterations           | 11          |
|    time_elapsed         | 88          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.020480003 |
|    clip_fraction        | 0.199       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.077       |
|    explained_variance   | -2.38e-07   |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00664     |
|    n_updates            | 8430        |
|    policy_gradient_loss | 0.00565     |
|    std                  | 0.234       |
|    value_loss           | 489         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.3         |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 252         |
|    iterations           | 20          |
|    time_elapsed         | 162         |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.021611203 |
|    clip_fraction        | 0.212       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0502      |
|    explained_variance   | -73.9       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00696    |
|    n_updates            | 8520        |
|    policy_gradient_loss | 0.00511     |
|    std                  | 0.236       |
|    value_loss           | 0.00792     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.03        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 251         |
|    iterations           | 29          |
|    time_elapsed         | 236         |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.017580792 |
|    clip_fraction        | 0.185       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0391      |
|    explained_variance   | -23         |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0277     |
|    n_updates            | 8610        |
|    policy_gradient_loss | 0.00516     |
|    std                  | 0.236       |
|    value_loss           | 0.00322     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.08        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 250         |
|    iterations           | 38          |
|    time_elapsed         | 310         |
|    total_timesteps      | 77824       |
| train/                  |             |
|    approx_kl            | 0.023746502 |
|    clip_fraction        | 0.221       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0888      |
|    explained_variance   | -136        |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00279    |
|    n_updates            | 8700        |
|    policy_gradient_loss | 0.00202     |
|    std                  | 0.232       |
|    value_loss           | 0.0117      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.12        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 250         |
|    iterations           | 47          |
|    time_elapsed         | 384         |
|    total_timesteps      | 96256       |
| train/                  |             |
|    approx_kl            | 0.034547545 |
|    clip_fraction        | 0.228       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0968      |
|    explained_variance   | -54.4       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00752    |
|    n_updates            | 8790        |
|    policy_gradient_loss | -0.000789   |
|    std                  | 0.231       |
|    value_loss           | 0.00952     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.16        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 258         |
|    iterations           | 8           |
|    time_elapsed         | 63          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.023558533 |
|    clip_fraction        | 0.236       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0265     |
|    explained_variance   | 0.00385     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0198      |
|    n_updates            | 8400        |
|    policy_gradient_loss | 0.00102     |
|    std                  | 0.243       |
|    value_loss           | 487         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.08        |
|    ep_rew_mean          | -9.01       |
| time/                   |             |
|    fps                  | 254         |
|    iterations           | 17          |
|    time_elapsed         | 136         |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.029322617 |
|    clip_fraction        | 0.232       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0377      |
|    explained_variance   | 0.000873    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0992      |
|    n_updates            | 8490        |
|    policy_gradient_loss | 0.00286     |
|    std                  | 0.236       |
|    value_loss           | 2.44e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.39       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 255        |
|    iterations           | 26         |
|    time_elapsed         | 208        |
|    total_timesteps      | 53248      |
| train/                  |            |
|    approx_kl            | 0.06658211 |
|    clip_fraction        | 0.229      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.109      |
|    explained_variance   | -567       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0231     |
|    n_updates            | 8580       |
|    policy_gradient_loss | 0.00528    |
|    std                  | 0.228      |
|    value_loss           | 0.019      |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.12        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 255         |
|    iterations           | 35          |
|    time_elapsed         | 280         |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.023184868 |
|    clip_fraction        | 0.272       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.173       |
|    explained_variance   | 0.00136     |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0115     |
|    n_updates            | 8670        |
|    policy_gradient_loss | 0.00197     |
|    std                  | 0.221       |
|    value_loss           | 489         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.18        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 255         |
|    iterations           | 44          |
|    time_elapsed         | 353         |
|    total_timesteps      | 90112       |
| train/                  |             |
|    approx_kl            | 0.024672488 |
|    clip_fraction        | 0.231       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.145       |
|    explained_variance   | -254        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0285      |
|    n_updates            | 8760        |
|    policy_gradient_loss | 0.00113     |
|    std                  | 0.224       |
|    value_loss           | 0.0141      |
-----------------------------------------
----------------------------------------
| rollout/                |        

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.19      |
|    ep_rew_mean          | 1         |
| time/                   |           |
|    fps                  | 276       |
|    iterations           | 5         |
|    time_elapsed         | 37        |
|    total_timesteps      | 10240     |
| train/                  |           |
|    approx_kl            | 0.0242704 |
|    clip_fraction        | 0.225     |
|    clip_range           | 0.2       |
|    entropy_loss         | 0.423     |
|    explained_variance   | 0.00182   |
|    learning_rate        | 0.0003    |
|    loss                 | 7.78e+03  |
|    n_updates            | 8370      |
|    policy_gradient_loss | 0.00454   |
|    std                  | 0.195     |
|    value_loss           | 976       |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.14    

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.2        |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 265        |
|    iterations           | 14         |
|    time_elapsed         | 108        |
|    total_timesteps      | 28672      |
| train/                  |            |
|    approx_kl            | 0.04271821 |
|    clip_fraction        | 0.299      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.448      |
|    explained_variance   | -40        |
|    learning_rate        | 0.0003     |
|    loss                 | 0.00932    |
|    n_updates            | 8460       |
|    policy_gradient_loss | 0.014      |
|    std                  | 0.193      |
|    value_loss           | 0.00856    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 2.13     |
|    ep_rew_mean          | 1        |
| time/                   |          |
|    fps                  | 262      |
|    iterations           | 23       |
|    time_elapsed         | 179      |
|    total_timesteps      | 47104    |
| train/                  |          |
|    approx_kl            | 0.065657 |
|    clip_fraction        | 0.24     |
|    clip_range           | 0.2      |
|    entropy_loss         | 0.425    |
|    explained_variance   | 0.000169 |
|    learning_rate        | 0.0003   |
|    loss                 | 0.0175   |
|    n_updates            | 8550     |
|    policy_gradient_loss | 0.01     |
|    std                  | 0.197    |
|    value_loss           | 489      |
--------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.2         |
|    ep_rew_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.11       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 260        |
|    iterations           | 32         |
|    time_elapsed         | 251        |
|    total_timesteps      | 65536      |
| train/                  |            |
|    approx_kl            | 0.03294314 |
|    clip_fraction        | 0.268      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.498      |
|    explained_variance   | -42.3      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.00728   |
|    n_updates            | 8640       |
|    policy_gradient_loss | 0.00896    |
|    std                  | 0.188      |
|    value_loss           | 0.0152     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.14        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 259         |
|    iterations           | 41          |
|    time_elapsed         | 323         |
|    total_timesteps      | 83968       |
| train/                  |             |
|    approx_kl            | 0.025415644 |
|    clip_fraction        | 0.235       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.47        |
|    explained_variance   | -20.6       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0326      |
|    n_updates            | 8730        |
|    policy_gradient_loss | 0.00914     |
|    std                  | 0.193       |
|    value_loss           | 0.00163     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

--- 387.91961646080017 seconds ---
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.5
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.18     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    fps             | 421      |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.25        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 349         |
|    iterations           | 2           |
|    time_elapsed         | 11          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.031018265 |
|    clip_fraction        | 0.243       |
|    clip_range           | 0.2      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.18        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 309         |
|    iterations           | 11          |
|    time_elapsed         | 72          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.022752078 |
|    clip_fraction        | 0.242       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.172       |
|    explained_variance   | -19.2       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00929    |
|    n_updates            | 8430        |
|    policy_gradient_loss | 0.00785     |
|    std                  | 0.222       |
|    value_loss           | 0.00426     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.05       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 305        |
|    iterations           | 20         |
|    time_elapsed         | 134        |
|    total_timesteps      | 40960      |
| train/                  |            |
|    approx_kl            | 0.03925214 |
|    clip_fraction        | 0.277      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.128      |
|    explained_variance   | 0.000123   |
|    learning_rate        | 0.0003     |
|    loss                 | 0.212      |
|    n_updates            | 8520       |
|    policy_gradient_loss | 0.0215     |
|    std                  | 0.226      |
|    value_loss           | 489        |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.04       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 303        |
|    iterations           | 29         |
|    time_elapsed         | 195        |
|    total_timesteps      | 59392      |
| train/                  |            |
|    approx_kl            | 0.03278149 |
|    clip_fraction        | 0.27       |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.182      |
|    explained_variance   | 0.000162   |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0261     |
|    n_updates            | 8610       |
|    policy_gradient_loss | 0.00572    |
|    std                  | 0.221      |
|    value_loss           | 489        |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.19        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 303         |
|    iterations           | 38          |
|    time_elapsed         | 256         |
|    total_timesteps      | 77824       |
| train/                  |             |
|    approx_kl            | 0.019871442 |
|    clip_fraction        | 0.212       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.191       |
|    explained_variance   | -103        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0545      |
|    n_updates            | 8700        |
|    policy_gradient_loss | 0.0089      |
|    std                  | 0.22        |
|    value_loss           | 0.0325      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.01        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 302         |
|    iterations           | 47          |
|    time_elapsed         | 318         |
|    total_timesteps      | 96256       |
| train/                  |             |
|    approx_kl            | 0.021180294 |
|    clip_fraction        | 0.275       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.182       |
|    explained_variance   | -24.6       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.000141    |
|    n_updates            | 8790        |
|    policy_gradient_loss | 0.00659     |
|    std                  | 0.222       |
|    value_loss           | 0.00485     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.24        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 375         |
|    iterations           | 8           |
|    time_elapsed         | 43          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.023122495 |
|    clip_fraction        | 0.225       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.252       |
|    explained_variance   | -22.4       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00739    |
|    n_updates            | 8400        |
|    policy_gradient_loss | -0.00146    |
|    std                  | 0.215       |
|    value_loss           | 0.00606     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.95        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 364         |
|    iterations           | 17          |
|    time_elapsed         | 95          |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.030006465 |
|    clip_fraction        | 0.252       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.27        |
|    explained_variance   | 0.00283     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0578      |
|    n_updates            | 8490        |
|    policy_gradient_loss | 0.00362     |
|    std                  | 0.213       |
|    value_loss           | 919         |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.97        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 361         |
|    iterations           | 26          |
|    time_elapsed         | 147         |
|    total_timesteps      | 53248       |
| train/                  |             |
|    approx_kl            | 0.028238457 |
|    clip_fraction        | 0.232       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.222       |
|    explained_variance   | -46.1       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0189     |
|    n_updates            | 8580        |
|    policy_gradient_loss | 0.00249     |
|    std                  | 0.218       |
|    value_loss           | 0.00753     |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.92        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 358         |
|    iterations           | 35          |
|    time_elapsed         | 199         |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.013338509 |
|    clip_fraction        | 0.189       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.241       |
|    explained_variance   | -16.9       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0253      |
|    n_updates            | 8670        |
|    policy_gradient_loss | 0.00985     |
|    std                  | 0.216       |
|    value_loss           | 0.00373     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.1         |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 358         |
|    iterations           | 44          |
|    time_elapsed         | 251         |
|    total_timesteps      | 90112       |
| train/                  |             |
|    approx_kl            | 0.024525573 |
|    clip_fraction        | 0.231       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.235       |
|    explained_variance   | -19.7       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0209     |
|    n_updates            | 8760        |
|    policy_gradient_loss | 0.00568     |
|    std                  | 0.217       |
|    value_loss           | 0.00578     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.13        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 402         |
|    iterations           | 5           |
|    time_elapsed         | 25          |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.028351668 |
|    clip_fraction        | 0.225       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0268      |
|    explained_variance   | -255        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00654     |
|    n_updates            | 8370        |
|    policy_gradient_loss | 0.0129      |
|    std                  | 0.241       |
|    value_loss           | 0.0401      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.09        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 381         |
|    iterations           | 14          |
|    time_elapsed         | 75          |
|    total_timesteps      | 28672       |
| train/                  |             |
|    approx_kl            | 0.027835991 |
|    clip_fraction        | 0.211       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.084       |
|    explained_variance   | -0.000417   |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0211     |
|    n_updates            | 8460        |
|    policy_gradient_loss | 0.00605     |
|    std                  | 0.232       |
|    value_loss           | 489         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.12        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 377         |
|    iterations           | 23          |
|    time_elapsed         | 124         |
|    total_timesteps      | 47104       |
| train/                  |             |
|    approx_kl            | 0.043502554 |
|    clip_fraction        | 0.233       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.087       |
|    explained_variance   | -96.1       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0478      |
|    n_updates            | 8550        |
|    policy_gradient_loss | 0.00343     |
|    std                  | 0.231       |
|    value_loss           | 1.94        |
-----------------------------------------
---------------------------------------
| rollout/                |         

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.06        |
|    ep_rew_mean          | -9.01       |
| time/                   |             |
|    fps                  | 376         |
|    iterations           | 32          |
|    time_elapsed         | 174         |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.018780971 |
|    clip_fraction        | 0.201       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.113       |
|    explained_variance   | -714        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0869      |
|    n_updates            | 8640        |
|    policy_gradient_loss | 0.0064      |
|    std                  | 0.23        |
|    value_loss           | 0.0837      |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.27        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 376         |
|    iterations           | 41          |
|    time_elapsed         | 223         |
|    total_timesteps      | 83968       |
| train/                  |             |
|    approx_kl            | 0.021087792 |
|    clip_fraction        | 0.241       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.175       |
|    explained_variance   | -17.5       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.334       |
|    n_updates            | 8730        |
|    policy_gradient_loss | 0.013       |
|    std                  | 0.221       |
|    value_loss           | 0.627       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

--- 268.57478427886963 seconds ---


In [33]:
# Performance of models on focus polytopes only?
for p in [0.0, 0.05, 0.1, 0.5, 0.9, 1.0]:
    model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-400000-"+str(p))
    model.set_env(env)
    print("p=",p)
    
    print("Overall:")
    env.init_polytopes(1.0,[])
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_overall[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
    
    print("Focus Polytopes:")
    env.init_polytopes(0.0,retrain_polytopes)
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_polys[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
Overall:
mean_reward:0.98 +/- 4.48
Focus Polytopes:
mean_reward:0.96 +/- 6.33
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.05
Overall:
mean_reward:0.85 +/- 12.26
Focus Polytopes:
mean_reward:0.71 +/- 17.04
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.1
Overall:
mean_reward:1.00 +/- 0.00
Focus Polytopes:
mean_reward:0.90 +/- 10.01
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.5
Overall:
mean_reward:0.95 +/- 7.08
Focus Polytopes:
mean_reward:-0.39 +/- 37.29
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.9
Overall:
mean_reward:0.99 +/- 3.17
Focus Polytopes:
mean_reward:0.43 +/- 23.89
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 1.0
Overall:
mean_reward:0.96 +/- 6.33
Focus Polytopes:
mean_reward:0.55 +/- 21.23


In [34]:
for p in [0.0, 0.05, 0.1, 0.5, 0.9, 1.0]:
    model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-400000-"+str(p))
    model.set_env(env)
    
    print("p=",p)

    env.init_polytopes(p,retrain_polytopes)
    start_time = time.time()
    model=model.learn(total_timesteps=training_episode_length)
    print("--- %s seconds ---" % (time.time() - start_time))

    model.save("model_backup/zeppelin-avoidance-windsystem-small2-1400000-500000-"+str(p))

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.33     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    fps             | 342      |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.87        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 293         |
|    iterations           | 2           |
|    time_elapsed         | 13          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.041977957 |
|    clip_fraction        | 0.219       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.1         |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 256         |
|    iterations           | 11          |
|    time_elapsed         | 87          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.017999526 |
|    clip_fraction        | 0.211       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.14        |
|    explained_variance   | -125        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00859     |
|    n_updates            | 8920        |
|    policy_gradient_loss | 0.00759     |
|    std                  | 0.226       |
|    value_loss           | 0.00566     |
-----------------------------------------
---------------------------------------
| rollout/                |         

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2           |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 255         |
|    iterations           | 20          |
|    time_elapsed         | 160         |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.027323823 |
|    clip_fraction        | 0.226       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.157       |
|    explained_variance   | -18.7       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0312      |
|    n_updates            | 9010        |
|    policy_gradient_loss | 0.00508     |
|    std                  | 0.224       |
|    value_loss           | 0.0033      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.07        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 254         |
|    iterations           | 29          |
|    time_elapsed         | 233         |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.019866224 |
|    clip_fraction        | 0.252       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.218       |
|    explained_variance   | -1.9        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0343      |
|    n_updates            | 9100        |
|    policy_gradient_loss | 0.00783     |
|    std                  | 0.217       |
|    value_loss           | 0.00512     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.16       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 254        |
|    iterations           | 38         |
|    time_elapsed         | 306        |
|    total_timesteps      | 77824      |
| train/                  |            |
|    approx_kl            | 0.02664064 |
|    clip_fraction        | 0.248      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.265      |
|    explained_variance   | -1.65      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0248    |
|    n_updates            | 9190       |
|    policy_gradient_loss | 0.00536    |
|    std                  | 0.212      |
|    value_loss           | 0.0104     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.33      |
|    ep_rew_mean          | 1         |
| time/                   |           |
|    fps                  | 253       |
|    iterations           | 47        |
|    time_elapsed         | 379       |
|    total_timesteps      | 96256     |
| train/                  |           |
|    approx_kl            | 0.0256267 |
|    clip_fraction        | 0.187     |
|    clip_range           | 0.2       |
|    entropy_loss         | 0.26      |
|    explained_variance   | -118      |
|    learning_rate        | 0.0003    |
|    loss                 | -0.00254  |
|    n_updates            | 9280      |
|    policy_gradient_loss | 0.00535   |
|    std                  | 0.213     |
|    value_loss           | 0.00244   |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.07    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.06        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 263         |
|    iterations           | 8           |
|    time_elapsed         | 62          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.041767355 |
|    clip_fraction        | 0.245       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.339       |
|    explained_variance   | 0.00113     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.005       |
|    n_updates            | 8890        |
|    policy_gradient_loss | 0.00411     |
|    std                  | 0.203       |
|    value_loss           | 489         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.17        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 259         |
|    iterations           | 17          |
|    time_elapsed         | 134         |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.024307681 |
|    clip_fraction        | 0.249       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.349       |
|    explained_variance   | 4.07e-05    |
|    learning_rate        | 0.0003      |
|    loss                 | 7.81e+03    |
|    n_updates            | 8980        |
|    policy_gradient_loss | 0.00557     |
|    std                  | 0.204       |
|    value_loss           | 977         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.13        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 258         |
|    iterations           | 26          |
|    time_elapsed         | 206         |
|    total_timesteps      | 53248       |
| train/                  |             |
|    approx_kl            | 0.034056865 |
|    clip_fraction        | 0.238       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.498       |
|    explained_variance   | 0.000239    |
|    learning_rate        | 0.0003      |
|    loss                 | 7.82e+03    |
|    n_updates            | 9070        |
|    policy_gradient_loss | 0.00529     |
|    std                  | 0.189       |
|    value_loss           | 1.46e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.92        |
|    ep_rew_mean          | -9.01       |
| time/                   |             |
|    fps                  | 257         |
|    iterations           | 35          |
|    time_elapsed         | 278         |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.038922362 |
|    clip_fraction        | 0.32        |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.671       |
|    explained_variance   | 0.002       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0467      |
|    n_updates            | 9160        |
|    policy_gradient_loss | 0.00855     |
|    std                  | 0.173       |
|    value_loss           | 2.44e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.04        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 256         |
|    iterations           | 44          |
|    time_elapsed         | 351         |
|    total_timesteps      | 90112       |
| train/                  |             |
|    approx_kl            | 0.042984538 |
|    clip_fraction        | 0.277       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.714       |
|    explained_variance   | 0.000461    |
|    learning_rate        | 0.0003      |
|    loss                 | 1.88e-05    |
|    n_updates            | 9250        |
|    policy_gradient_loss | 0.00831     |
|    std                  | 0.169       |
|    value_loss           | 489         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.84        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 274         |
|    iterations           | 5           |
|    time_elapsed         | 37          |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.032626137 |
|    clip_fraction        | 0.248       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.501       |
|    explained_variance   | 0.000468    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.038       |
|    n_updates            | 8860        |
|    policy_gradient_loss | 0.0176      |
|    std                  | 0.189       |
|    value_loss           | 488         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.13       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 267        |
|    iterations           | 14         |
|    time_elapsed         | 107        |
|    total_timesteps      | 28672      |
| train/                  |            |
|    approx_kl            | 0.04461504 |
|    clip_fraction        | 0.274      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.474      |
|    explained_variance   | -371       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0343     |
|    n_updates            | 8950       |
|    policy_gradient_loss | 0.0127     |
|    std                  | 0.19       |
|    value_loss           | 0.0539     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.22        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 264         |
|    iterations           | 23          |
|    time_elapsed         | 177         |
|    total_timesteps      | 47104       |
| train/                  |             |
|    approx_kl            | 0.013450265 |
|    clip_fraction        | 0.245       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.47        |
|    explained_variance   | 0.0022      |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0447      |
|    n_updates            | 9040        |
|    policy_gradient_loss | 0.0093      |
|    std                  | 0.191       |
|    value_loss           | 487         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.02      |
|    ep_rew_mean          | 1         |
| time/                   |           |
|    fps                  | 263       |
|    iterations           | 32        |
|    time_elapsed         | 248       |
|    total_timesteps      | 65536     |
| train/                  |           |
|    approx_kl            | 0.0431136 |
|    clip_fraction        | 0.243     |
|    clip_range           | 0.2       |
|    entropy_loss         | 0.464     |
|    explained_variance   | -12.8     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.0157    |
|    n_updates            | 9130      |
|    policy_gradient_loss | 0.00526   |
|    std                  | 0.19      |
|    value_loss           | 0.00404   |
---------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.11      

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.14       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 262        |
|    iterations           | 42         |
|    time_elapsed         | 327        |
|    total_timesteps      | 86016      |
| train/                  |            |
|    approx_kl            | 0.03693449 |
|    clip_fraction        | 0.257      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.459      |
|    explained_variance   | -15.9      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0113     |
|    n_updates            | 9230       |
|    policy_gradient_loss | 0.00469    |
|    std                  | 0.193      |
|    value_loss           | 0.00519    |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.12        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 333         |
|    iterations           | 3           |
|    time_elapsed         | 18          |
|    total_timesteps      | 6144        |
| train/                  |             |
|    approx_kl            | 0.028791826 |
|    clip_fraction        | 0.232       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.156       |
|    explained_variance   | -140        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0151      |
|    n_updates            | 8840        |
|    policy_gradient_loss | 0.00954     |
|    std                  | 0.225       |
|    value_loss           | 0.03        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.93        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 313         |
|    iterations           | 12          |
|    time_elapsed         | 78          |
|    total_timesteps      | 24576       |
| train/                  |             |
|    approx_kl            | 0.029001625 |
|    clip_fraction        | 0.235       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.232       |
|    explained_variance   | 0.00136     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.301       |
|    n_updates            | 8930        |
|    policy_gradient_loss | -0.00147    |
|    std                  | 0.215       |
|    value_loss           | 1.46e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.19       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 308        |
|    iterations           | 21         |
|    time_elapsed         | 139        |
|    total_timesteps      | 43008      |
| train/                  |            |
|    approx_kl            | 0.02557955 |
|    clip_fraction        | 0.213      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.214      |
|    explained_variance   | -52.8      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.00671   |
|    n_updates            | 9020       |
|    policy_gradient_loss | 0.00844    |
|    std                  | 0.217      |
|    value_loss           | 0.0145     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.03      |
|    ep_rew_mean          | 1         |
| time/                   |           |
|    fps                  | 307       |
|    iterations           | 31        |
|    time_elapsed         | 206       |
|    total_timesteps      | 63488     |
| train/                  |           |
|    approx_kl            | 0.0453237 |
|    clip_fraction        | 0.286     |
|    clip_range           | 0.2       |
|    entropy_loss         | 0.225     |
|    explained_variance   | 0.000106  |
|    learning_rate        | 0.0003    |
|    loss                 | 0.0802    |
|    n_updates            | 9120      |
|    policy_gradient_loss | 0.0162    |
|    std                  | 0.218     |
|    value_loss           | 489       |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.16    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.98        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 306         |
|    iterations           | 40          |
|    time_elapsed         | 267         |
|    total_timesteps      | 81920       |
| train/                  |             |
|    approx_kl            | 0.019294553 |
|    clip_fraction        | 0.229       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.147       |
|    explained_variance   | -194        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0163      |
|    n_updates            | 9210        |
|    policy_gradient_loss | 0.00547     |
|    std                  | 0.226       |
|    value_loss           | 0.0117      |
-----------------------------------------
---------------------------------------
| rollout/                |         

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.2       |
|    ep_rew_mean          | 1         |
| time/                   |           |
|    fps                  | 306       |
|    iterations           | 49        |
|    time_elapsed         | 327       |
|    total_timesteps      | 100352    |
| train/                  |           |
|    approx_kl            | 0.0357166 |
|    clip_fraction        | 0.264     |
|    clip_range           | 0.2       |
|    entropy_loss         | 0.184     |
|    explained_variance   | -50.5     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.0166    |
|    n_updates            | 9300      |
|    policy_gradient_loss | 0.0136    |
|    std                  | 0.224     |
|    value_loss           | 0.00509   |
---------------------------------------
--- 329.15186834335327 seconds ---
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.9
-

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.15        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 372         |
|    iterations           | 10          |
|    time_elapsed         | 54          |
|    total_timesteps      | 20480       |
| train/                  |             |
|    approx_kl            | 0.021553084 |
|    clip_fraction        | 0.202       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.295       |
|    explained_variance   | -9.22       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00741    |
|    n_updates            | 8910        |
|    policy_gradient_loss | 0.000403    |
|    std                  | 0.212       |
|    value_loss           | 0.00876     |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.03        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 367         |
|    iterations           | 19          |
|    time_elapsed         | 106         |
|    total_timesteps      | 38912       |
| train/                  |             |
|    approx_kl            | 0.016661992 |
|    clip_fraction        | 0.209       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.286       |
|    explained_variance   | -35.1       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0037     |
|    n_updates            | 9000        |
|    policy_gradient_loss | 0.0172      |
|    std                  | 0.212       |
|    value_loss           | 0.00571     |
-----------------------------------------
---------------------------------------
| rollout/                |         

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.95        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 363         |
|    iterations           | 28          |
|    time_elapsed         | 157         |
|    total_timesteps      | 57344       |
| train/                  |             |
|    approx_kl            | 0.031504996 |
|    clip_fraction        | 0.249       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.231       |
|    explained_variance   | -89.8       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0431      |
|    n_updates            | 9090        |
|    policy_gradient_loss | 0.0175      |
|    std                  | 0.218       |
|    value_loss           | 0.0101      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.01        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 362         |
|    iterations           | 37          |
|    time_elapsed         | 208         |
|    total_timesteps      | 75776       |
| train/                  |             |
|    approx_kl            | 0.033207156 |
|    clip_fraction        | 0.249       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.224       |
|    explained_variance   | -147        |
|    learning_rate        | 0.0003      |
|    loss                 | -0.000654   |
|    n_updates            | 9180        |
|    policy_gradient_loss | 0.00639     |
|    std                  | 0.22        |
|    value_loss           | 0.00983     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.2         |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 361         |
|    iterations           | 46          |
|    time_elapsed         | 260         |
|    total_timesteps      | 94208       |
| train/                  |             |
|    approx_kl            | 0.023443524 |
|    clip_fraction        | 0.182       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.193       |
|    explained_variance   | 0.000724    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0192      |
|    n_updates            | 9270        |
|    policy_gradient_loss | 0.0099      |
|    std                  | 0.222       |
|    value_loss           | 487         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.04       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 399        |
|    iterations           | 7          |
|    time_elapsed         | 35         |
|    total_timesteps      | 14336      |
| train/                  |            |
|    approx_kl            | 0.03935688 |
|    clip_fraction        | 0.208      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.285      |
|    explained_variance   | -13.4      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0107    |
|    n_updates            | 8880       |
|    policy_gradient_loss | 0.0111     |
|    std                  | 0.21       |
|    value_loss           | 0.00386    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.08        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 388         |
|    iterations           | 16          |
|    time_elapsed         | 84          |
|    total_timesteps      | 32768       |
| train/                  |             |
|    approx_kl            | 0.017030636 |
|    clip_fraction        | 0.243       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.3         |
|    explained_variance   | -151        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0153      |
|    n_updates            | 8970        |
|    policy_gradient_loss | 0.00459     |
|    std                  | 0.21        |
|    value_loss           | 0.0307      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.23        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 384         |
|    iterations           | 25          |
|    time_elapsed         | 133         |
|    total_timesteps      | 51200       |
| train/                  |             |
|    approx_kl            | 0.015646815 |
|    clip_fraction        | 0.202       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.273       |
|    explained_variance   | -11.6       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0286      |
|    n_updates            | 9060        |
|    policy_gradient_loss | 0.0141      |
|    std                  | 0.213       |
|    value_loss           | 0.00277     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.16        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 382         |
|    iterations           | 34          |
|    time_elapsed         | 181         |
|    total_timesteps      | 69632       |
| train/                  |             |
|    approx_kl            | 0.024628788 |
|    clip_fraction        | 0.215       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.316       |
|    explained_variance   | -6.12       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0222     |
|    n_updates            | 9150        |
|    policy_gradient_loss | 0.0103      |
|    std                  | 0.21        |
|    value_loss           | 0.00582     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.23        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 381         |
|    iterations           | 43          |
|    time_elapsed         | 230         |
|    total_timesteps      | 88064       |
| train/                  |             |
|    approx_kl            | 0.022897048 |
|    clip_fraction        | 0.204       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.314       |
|    explained_variance   | -3.43       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.013      |
|    n_updates            | 9240        |
|    policy_gradient_loss | 0.00516     |
|    std                  | 0.209       |
|    value_loss           | 0.00304     |
-----------------------------------------
----------------------------------------
| rollout/                |        

In [35]:
# Performance of models on focus polytopes only?
for p in [0.0, 0.05, 0.1, 0.5, 0.9, 1.0]:
    model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-500000-"+str(p))
    model.set_env(env)
    print("p=",p)
    
    print("Overall:")
    env.init_polytopes(1.0,[])
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_overall[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
    
    print("Focus Polytopes:")
    env.init_polytopes(0.0,retrain_polytopes)
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_polys[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
Overall:
mean_reward:0.92 +/- 8.95
Focus Polytopes:
mean_reward:1.00 +/- 0.00
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.05
Overall:
mean_reward:0.81 +/- 13.80
Focus Polytopes:
mean_reward:0.58 +/- 20.51
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.1
Overall:
mean_reward:1.00 +/- 0.00
Focus Polytopes:
mean_reward:0.96 +/- 6.33
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.5
Overall:
mean_reward:1.00 +/- 0.00
Focus Polytopes:
mean_reward:0.92 +/- 8.95
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.9
Overall:
mean_reward:1.00 +/- 0.00
Focus Polytopes:
mean_reward:0.59 +/- 20.26
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 1.0
Overall:
mean_reward:0.97 +/- 5.48
Focus Polytopes:
mean_reward:0.67 +/- 18.18


In [36]:
for p in [0.0, 0.05, 0.1, 0.5, 0.9, 1.0]:
    model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-500000-"+str(p))
    model.set_env(env)
    
    print("p=",p)

    env.init_polytopes(p,retrain_polytopes)
    start_time = time.time()
    model=model.learn(total_timesteps=training_episode_length)
    print("--- %s seconds ---" % (time.time() - start_time))

    model.save("model_backup/zeppelin-avoidance-windsystem-small2-1400000-600000-"+str(p))

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.19     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    fps             | 318      |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.92        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 280         |
|    iterations           | 2           |
|    time_elapsed         | 14          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.034706473 |
|    clip_fraction        | 0.267       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.22        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 260         |
|    iterations           | 11          |
|    time_elapsed         | 86          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.018063655 |
|    clip_fraction        | 0.199       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.219       |
|    explained_variance   | -7.88       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0334      |
|    n_updates            | 9410        |
|    policy_gradient_loss | 0.00571     |
|    std                  | 0.218       |
|    value_loss           | 0.00204     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.97        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 258         |
|    iterations           | 20          |
|    time_elapsed         | 158         |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.032392494 |
|    clip_fraction        | 0.2         |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.171       |
|    explained_variance   | -54.9       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0108      |
|    n_updates            | 9500        |
|    policy_gradient_loss | 0.0014      |
|    std                  | 0.221       |
|    value_loss           | 0.0172      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.98        |
|    ep_rew_mean          | -9.01       |
| time/                   |             |
|    fps                  | 256         |
|    iterations           | 29          |
|    time_elapsed         | 231         |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.019535381 |
|    clip_fraction        | 0.225       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.245       |
|    explained_variance   | 0.00174     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0753      |
|    n_updates            | 9590        |
|    policy_gradient_loss | 0.006       |
|    std                  | 0.213       |
|    value_loss           | 487         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.07        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 255         |
|    iterations           | 38          |
|    time_elapsed         | 304         |
|    total_timesteps      | 77824       |
| train/                  |             |
|    approx_kl            | 0.019477373 |
|    clip_fraction        | 0.236       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.236       |
|    explained_variance   | -6.5        |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0439     |
|    n_updates            | 9680        |
|    policy_gradient_loss | 0.00761     |
|    std                  | 0.216       |
|    value_loss           | 0.000208    |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.99       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 254        |
|    iterations           | 48         |
|    time_elapsed         | 385        |
|    total_timesteps      | 98304      |
| train/                  |            |
|    approx_kl            | 0.02282644 |
|    clip_fraction        | 0.219      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.185      |
|    explained_variance   | -88.2      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0134     |
|    n_updates            | 9780       |
|    policy_gradient_loss | 0.0103     |
|    std                  | 0.22       |
|    value_loss           | 0.00106    |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.91        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 267         |
|    iterations           | 9           |
|    time_elapsed         | 68          |
|    total_timesteps      | 18432       |
| train/                  |             |
|    approx_kl            | 0.032770976 |
|    clip_fraction        | 0.287       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.76        |
|    explained_variance   | 0.00239     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00114     |
|    n_updates            | 9390        |
|    policy_gradient_loss | 0.0035      |
|    std                  | 0.165       |
|    value_loss           | 489         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.24        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 265         |
|    iterations           | 18          |
|    time_elapsed         | 139         |
|    total_timesteps      | 36864       |
| train/                  |             |
|    approx_kl            | 0.045755353 |
|    clip_fraction        | 0.279       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.827       |
|    explained_variance   | 0.000998    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0568      |
|    n_updates            | 9480        |
|    policy_gradient_loss | 0.00739     |
|    std                  | 0.16        |
|    value_loss           | 1.46e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.17       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 263        |
|    iterations           | 27         |
|    time_elapsed         | 209        |
|    total_timesteps      | 55296      |
| train/                  |            |
|    approx_kl            | 0.04226909 |
|    clip_fraction        | 0.303      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.881      |
|    explained_variance   | 0.00168    |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0378     |
|    n_updates            | 9570       |
|    policy_gradient_loss | 0.00683    |
|    std                  | 0.155      |
|    value_loss           | 1.89e+03   |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.1         |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 261         |
|    iterations           | 37          |
|    time_elapsed         | 289         |
|    total_timesteps      | 75776       |
| train/                  |             |
|    approx_kl            | 0.036456376 |
|    clip_fraction        | 0.3         |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.981       |
|    explained_variance   | -314        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0139      |
|    n_updates            | 9670        |
|    policy_gradient_loss | 0.0105      |
|    std                  | 0.148       |
|    value_loss           | 0.0658      |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.1        |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 261        |
|    iterations           | 47         |
|    time_elapsed         | 368        |
|    total_timesteps      | 96256      |
| train/                  |            |
|    approx_kl            | 0.16881324 |
|    clip_fraction        | 0.304      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.99       |
|    explained_variance   | 0.000149   |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0331     |
|    n_updates            | 9770       |
|    policy_gradient_loss | 0.0059     |
|    std                  | 0.147      |
|    value_loss           | 977        |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.05       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 274        |
|    iterations           | 8          |
|    time_elapsed         | 59         |
|    total_timesteps      | 16384      |
| train/                  |            |
|    approx_kl            | 0.03655512 |
|    clip_fraction        | 0.266      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.555      |
|    explained_variance   | -9.49      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.00888    |
|    n_updates            | 9380       |
|    policy_gradient_loss | 0.00935    |
|    std                  | 0.183      |
|    value_loss           | 0.00266    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.92        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 267         |
|    iterations           | 17          |
|    time_elapsed         | 130         |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.034297906 |
|    clip_fraction        | 0.231       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.589       |
|    explained_variance   | -75         |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0244     |
|    n_updates            | 9470        |
|    policy_gradient_loss | 0.00617     |
|    std                  | 0.181       |
|    value_loss           | 0.00542     |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.06        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 265         |
|    iterations           | 26          |
|    time_elapsed         | 200         |
|    total_timesteps      | 53248       |
| train/                  |             |
|    approx_kl            | 0.029256497 |
|    clip_fraction        | 0.257       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.618       |
|    explained_variance   | -3.1e-05    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0168      |
|    n_updates            | 9560        |
|    policy_gradient_loss | 0.00819     |
|    std                  | 0.177       |
|    value_loss           | 489         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.85       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 265        |
|    iterations           | 35         |
|    time_elapsed         | 269        |
|    total_timesteps      | 71680      |
| train/                  |            |
|    approx_kl            | 0.04470655 |
|    clip_fraction        | 0.284      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.578      |
|    explained_variance   | -14.9      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.00814   |
|    n_updates            | 9650       |
|    policy_gradient_loss | 0.0137     |
|    std                  | 0.183      |
|    value_loss           | 0.00475    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2          |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 266        |
|    iterations           | 44         |
|    time_elapsed         | 338        |
|    total_timesteps      | 90112      |
| train/                  |            |
|    approx_kl            | 0.04171415 |
|    clip_fraction        | 0.24       |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.506      |
|    explained_variance   | -47.8      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0284    |
|    n_updates            | 9740       |
|    policy_gradient_loss | 0.00428    |
|    std                  | 0.189      |
|    value_loss           | 0.00668    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.94        |
|    ep_rew_mean          | -9.01       |
| time/                   |             |
|    fps                  | 327         |
|    iterations           | 5           |
|    time_elapsed         | 31          |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.040787823 |
|    clip_fraction        | 0.238       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.124       |
|    explained_variance   | 0.000587    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0266      |
|    n_updates            | 9350        |
|    policy_gradient_loss | 0.00706     |
|    std                  | 0.229       |
|    value_loss           | 976         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.28        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 316         |
|    iterations           | 14          |
|    time_elapsed         | 90          |
|    total_timesteps      | 28672       |
| train/                  |             |
|    approx_kl            | 0.028669983 |
|    clip_fraction        | 0.233       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.175       |
|    explained_variance   | -33.9       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0348      |
|    n_updates            | 9440        |
|    policy_gradient_loss | 0.00614     |
|    std                  | 0.223       |
|    value_loss           | 0.00554     |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.33        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 314         |
|    iterations           | 23          |
|    time_elapsed         | 150         |
|    total_timesteps      | 47104       |
| train/                  |             |
|    approx_kl            | 0.016221534 |
|    clip_fraction        | 0.245       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.16        |
|    explained_variance   | -10.2       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0435      |
|    n_updates            | 9530        |
|    policy_gradient_loss | 0.00517     |
|    std                  | 0.226       |
|    value_loss           | 0.00499     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.3         |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 313         |
|    iterations           | 32          |
|    time_elapsed         | 209         |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.029501509 |
|    clip_fraction        | 0.259       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.143       |
|    explained_variance   | 8.82e-05    |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0198     |
|    n_updates            | 9620        |
|    policy_gradient_loss | 0.017       |
|    std                  | 0.229       |
|    value_loss           | 489         |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.25        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 313         |
|    iterations           | 41          |
|    time_elapsed         | 267         |
|    total_timesteps      | 83968       |
| train/                  |             |
|    approx_kl            | 0.038596697 |
|    clip_fraction        | 0.219       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.183       |
|    explained_variance   | 5.47e-05    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.034       |
|    n_updates            | 9710        |
|    policy_gradient_loss | 0.0126      |
|    std                  | 0.225       |
|    value_loss           | 489         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

--- 322.18866872787476 seconds ---
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.9
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.17     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    fps             | 549      |
|    iterations      | 1        |
|    time_elapsed    | 3        |
|    total_timesteps | 2048     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.06       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 438        |
|    iterations           | 2          |
|    time_elapsed         | 9          |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.01632119 |
|    clip_fraction        | 0.194      |
|    clip_range           | 0.2        |
|    ent

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.11        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 378         |
|    iterations           | 11          |
|    time_elapsed         | 59          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.018711928 |
|    clip_fraction        | 0.206       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0621      |
|    explained_variance   | -30.6       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0152      |
|    n_updates            | 9410        |
|    policy_gradient_loss | 0.00311     |
|    std                  | 0.24        |
|    value_loss           | 0.00609     |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.23        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 373         |
|    iterations           | 20          |
|    time_elapsed         | 109         |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.025600698 |
|    clip_fraction        | 0.215       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.00823     |
|    explained_variance   | -33.5       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.019      |
|    n_updates            | 9500        |
|    policy_gradient_loss | 0.00496     |
|    std                  | 0.242       |
|    value_loss           | 0.0063      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.09        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 371         |
|    iterations           | 29          |
|    time_elapsed         | 159         |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.019222617 |
|    clip_fraction        | 0.188       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0182      |
|    explained_variance   | -45.3       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0235      |
|    n_updates            | 9590        |
|    policy_gradient_loss | 0.0154      |
|    std                  | 0.24        |
|    value_loss           | 0.00343     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.09        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 370         |
|    iterations           | 38          |
|    time_elapsed         | 210         |
|    total_timesteps      | 77824       |
| train/                  |             |
|    approx_kl            | 0.019431839 |
|    clip_fraction        | 0.18        |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0231      |
|    explained_variance   | -61.1       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0141      |
|    n_updates            | 9680        |
|    policy_gradient_loss | 0.00943     |
|    std                  | 0.242       |
|    value_loss           | 0.00434     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.19        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 370         |
|    iterations           | 47          |
|    time_elapsed         | 259         |
|    total_timesteps      | 96256       |
| train/                  |             |
|    approx_kl            | 0.017443229 |
|    clip_fraction        | 0.178       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0797      |
|    explained_variance   | -14.5       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00369     |
|    n_updates            | 9770        |
|    policy_gradient_loss | 0.0071      |
|    std                  | 0.234       |
|    value_loss           | 0.00402     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.24        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 403         |
|    iterations           | 8           |
|    time_elapsed         | 40          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.019131979 |
|    clip_fraction        | 0.215       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.307       |
|    explained_variance   | -11.6       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0408     |
|    n_updates            | 9380        |
|    policy_gradient_loss | 0.00863     |
|    std                  | 0.211       |
|    value_loss           | 0.00396     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.46       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 391        |
|    iterations           | 17         |
|    time_elapsed         | 88         |
|    total_timesteps      | 34816      |
| train/                  |            |
|    approx_kl            | 0.03432279 |
|    clip_fraction        | 0.233      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.302      |
|    explained_variance   | -14.8      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.00702    |
|    n_updates            | 9470       |
|    policy_gradient_loss | 0.0101     |
|    std                  | 0.212      |
|    value_loss           | 0.00911    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.36        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 388         |
|    iterations           | 26          |
|    time_elapsed         | 136         |
|    total_timesteps      | 53248       |
| train/                  |             |
|    approx_kl            | 0.020345833 |
|    clip_fraction        | 0.18        |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.354       |
|    explained_variance   | -11.4       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0111      |
|    n_updates            | 9560        |
|    policy_gradient_loss | 0.00746     |
|    std                  | 0.204       |
|    value_loss           | 0.00388     |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.99        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 387         |
|    iterations           | 35          |
|    time_elapsed         | 185         |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.033545382 |
|    clip_fraction        | 0.27        |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.313       |
|    explained_variance   | -31.6       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00279     |
|    n_updates            | 9650        |
|    policy_gradient_loss | 0.013       |
|    std                  | 0.212       |
|    value_loss           | 0.0054      |
-----------------------------------------
---------------------------------------
| rollout/                |         

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.21       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 387        |
|    iterations           | 44         |
|    time_elapsed         | 232        |
|    total_timesteps      | 90112      |
| train/                  |            |
|    approx_kl            | 0.05752319 |
|    clip_fraction        | 0.207      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.388      |
|    explained_variance   | -13.9      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0148     |
|    n_updates            | 9740       |
|    policy_gradient_loss | 0.0118     |
|    std                  | 0.201      |
|    value_loss           | 0.00272    |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

In [37]:
# Performance of models on focus polytopes only?
for p in [0.0, 0.05, 0.1, 0.5, 0.9, 1.0]:
    model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-600000-"+str(p))
    model.set_env(env)
    print("p=",p)
    
    print("Overall:")
    env.init_polytopes(1.0,[])
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_overall[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
    
    print("Focus Polytopes:")
    env.init_polytopes(0.0,retrain_polytopes)
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_polys[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
Overall:
mean_reward:0.99 +/- 3.17
Focus Polytopes:
mean_reward:1.00 +/- 0.00
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.05
Overall:
mean_reward:0.04 +/- 31.00
Focus Polytopes:
mean_reward:0.70 +/- 17.34
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.1
Overall:
mean_reward:0.98 +/- 4.48
Focus Polytopes:
mean_reward:0.90 +/- 10.01
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.5
Overall:
mean_reward:0.95 +/- 7.08
Focus Polytopes:
mean_reward:0.71 +/- 17.04
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.9
Overall:
mean_reward:1.00 +/- 0.00
Focus Polytopes:
mean_reward:0.82 +/- 13.43
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 1.0
Overall:
mean_reward:0.90 +/- 10.01
Focus Polytopes:
mean_reward:0.47 +/- 23.04


In [38]:
for p in [0.0, 0.05, 0.1, 0.5, 0.9, 1.0]:
    model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-600000-"+str(p))
    model.set_env(env)
    
    print("p=",p)

    env.init_polytopes(p,retrain_polytopes)
    start_time = time.time()
    model=model.learn(total_timesteps=training_episode_length)
    print("--- %s seconds ---" % (time.time() - start_time))

    model.save("model_backup/zeppelin-avoidance-windsystem-small2-1400000-700000-"+str(p))

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.83     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    fps             | 337      |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.05        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 290         |
|    iterations           | 2           |
|    time_elapsed         | 14          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.023266051 |
|    clip_fraction        | 0.182       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.92        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 258         |
|    iterations           | 11          |
|    time_elapsed         | 86          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.024024628 |
|    clip_fraction        | 0.188       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.249       |
|    explained_variance   | -12         |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0381      |
|    n_updates            | 9900        |
|    policy_gradient_loss | 0.0151      |
|    std                  | 0.213       |
|    value_loss           | 0.00275     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.93       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 256        |
|    iterations           | 20         |
|    time_elapsed         | 159        |
|    total_timesteps      | 40960      |
| train/                  |            |
|    approx_kl            | 0.04065465 |
|    clip_fraction        | 0.24       |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.204      |
|    explained_variance   | -0.827     |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0596     |
|    n_updates            | 9990       |
|    policy_gradient_loss | 0.00734    |
|    std                  | 0.218      |
|    value_loss           | 0.00182    |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.8         |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 252         |
|    iterations           | 29          |
|    time_elapsed         | 235         |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.021637075 |
|    clip_fraction        | 0.203       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.26        |
|    explained_variance   | -0.498      |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0418      |
|    n_updates            | 10080       |
|    policy_gradient_loss | 0.00949     |
|    std                  | 0.213       |
|    value_loss           | 0.002       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.21       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 253        |
|    iterations           | 38         |
|    time_elapsed         | 307        |
|    total_timesteps      | 77824      |
| train/                  |            |
|    approx_kl            | 0.01387507 |
|    clip_fraction        | 0.172      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.307      |
|    explained_variance   | -0.000104  |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0541     |
|    n_updates            | 10170      |
|    policy_gradient_loss | 0.00439    |
|    std                  | 0.209      |
|    value_loss           | 488        |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.18        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 253         |
|    iterations           | 47          |
|    time_elapsed         | 379         |
|    total_timesteps      | 96256       |
| train/                  |             |
|    approx_kl            | 0.040521964 |
|    clip_fraction        | 0.234       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.263       |
|    explained_variance   | -8.09       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0613      |
|    n_updates            | 10260       |
|    policy_gradient_loss | 0.0106      |
|    std                  | 0.212       |
|    value_loss           | 0.00202     |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.91        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 272         |
|    iterations           | 8           |
|    time_elapsed         | 60          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.035334736 |
|    clip_fraction        | 0.303       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.933       |
|    explained_variance   | 0.00116     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00471     |
|    n_updates            | 9870        |
|    policy_gradient_loss | 0.00653     |
|    std                  | 0.152       |
|    value_loss           | 489         |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.95        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 250         |
|    iterations           | 17          |
|    time_elapsed         | 138         |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.064903915 |
|    clip_fraction        | 0.331       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.941       |
|    explained_variance   | -69.5       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00344    |
|    n_updates            | 9960        |
|    policy_gradient_loss | 0.00862     |
|    std                  | 0.151       |
|    value_loss           | 0.0131      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.13       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 247        |
|    iterations           | 26         |
|    time_elapsed         | 214        |
|    total_timesteps      | 53248      |
| train/                  |            |
|    approx_kl            | 0.47359824 |
|    clip_fraction        | 0.354      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.917      |
|    explained_variance   | 0.000258   |
|    learning_rate        | 0.0003     |
|    loss                 | -0.00382   |
|    n_updates            | 10050      |
|    policy_gradient_loss | 0.0109     |
|    std                  | 0.154      |
|    value_loss           | 487        |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2           |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 250         |
|    iterations           | 35          |
|    time_elapsed         | 286         |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.032251794 |
|    clip_fraction        | 0.262       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.85        |
|    explained_variance   | 2.83e-05    |
|    learning_rate        | 0.0003      |
|    loss                 | 7.82e+03    |
|    n_updates            | 10140       |
|    policy_gradient_loss | 0.00287     |
|    std                  | 0.158       |
|    value_loss           | 489         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.88        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 252         |
|    iterations           | 44          |
|    time_elapsed         | 356         |
|    total_timesteps      | 90112       |
| train/                  |             |
|    approx_kl            | 0.029877983 |
|    clip_fraction        | 0.27        |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.767       |
|    explained_variance   | -77.4       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0389      |
|    n_updates            | 10230       |
|    policy_gradient_loss | 0.00928     |
|    std                  | 0.167       |
|    value_loss           | 0.0446      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.91        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 292         |
|    iterations           | 5           |
|    time_elapsed         | 35          |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.042116955 |
|    clip_fraction        | 0.287       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.535       |
|    explained_variance   | -67.5       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0729      |
|    n_updates            | 9840        |
|    policy_gradient_loss | 0.0151      |
|    std                  | 0.186       |
|    value_loss           | 0.00545     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.18        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 246         |
|    iterations           | 14          |
|    time_elapsed         | 116         |
|    total_timesteps      | 28672       |
| train/                  |             |
|    approx_kl            | 0.036227718 |
|    clip_fraction        | 0.247       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.582       |
|    explained_variance   | -57.8       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00923     |
|    n_updates            | 9930        |
|    policy_gradient_loss | 0.00816     |
|    std                  | 0.181       |
|    value_loss           | 0.0101      |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.95        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 252         |
|    iterations           | 23          |
|    time_elapsed         | 186         |
|    total_timesteps      | 47104       |
| train/                  |             |
|    approx_kl            | 0.033758484 |
|    clip_fraction        | 0.263       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.623       |
|    explained_variance   | -66.5       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0207      |
|    n_updates            | 10020       |
|    policy_gradient_loss | 0.00497     |
|    std                  | 0.177       |
|    value_loss           | 0.00644     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.12       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 252        |
|    iterations           | 32         |
|    time_elapsed         | 259        |
|    total_timesteps      | 65536      |
| train/                  |            |
|    approx_kl            | 0.02793557 |
|    clip_fraction        | 0.235      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.581      |
|    explained_variance   | -28.4      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0137     |
|    n_updates            | 10110      |
|    policy_gradient_loss | 0.0104     |
|    std                  | 0.181      |
|    value_loss           | 0.00488    |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.89        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 250         |
|    iterations           | 41          |
|    time_elapsed         | 335         |
|    total_timesteps      | 83968       |
| train/                  |             |
|    approx_kl            | 0.015549937 |
|    clip_fraction        | 0.219       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.63        |
|    explained_variance   | -34.2       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0192     |
|    n_updates            | 10200       |
|    policy_gradient_loss | 0.00776     |
|    std                  | 0.178       |
|    value_loss           | 0.00261     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

--- 412.2038493156433 seconds ---
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.5
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.42     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    fps             | 465      |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 2048     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.35       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 360        |
|    iterations           | 2          |
|    time_elapsed         | 11         |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.03731767 |
|    clip_fraction        | 0.269      |
|    clip_range           | 0.2        |
|    entr

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.38        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 289         |
|    iterations           | 11          |
|    time_elapsed         | 77          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.035441913 |
|    clip_fraction        | 0.243       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.278       |
|    explained_variance   | -56.4       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0142      |
|    n_updates            | 9900        |
|    policy_gradient_loss | 0.0103      |
|    std                  | 0.214       |
|    value_loss           | 0.00834     |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.21        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 293         |
|    iterations           | 20          |
|    time_elapsed         | 139         |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.039129794 |
|    clip_fraction        | 0.269       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.427       |
|    explained_variance   | -62.4       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0298      |
|    n_updates            | 9990        |
|    policy_gradient_loss | 0.00727     |
|    std                  | 0.198       |
|    value_loss           | 0.0389      |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.46        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 297         |
|    iterations           | 29          |
|    time_elapsed         | 199         |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.040627442 |
|    clip_fraction        | 0.277       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.47        |
|    explained_variance   | -26.2       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0541      |
|    n_updates            | 10080       |
|    policy_gradient_loss | 0.011       |
|    std                  | 0.197       |
|    value_loss           | 0.00615     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.17        |
|    ep_rew_mean          | -9.01       |
| time/                   |             |
|    fps                  | 300         |
|    iterations           | 38          |
|    time_elapsed         | 259         |
|    total_timesteps      | 77824       |
| train/                  |             |
|    approx_kl            | 0.036525622 |
|    clip_fraction        | 0.248       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.394       |
|    explained_variance   | 0.00219     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0233      |
|    n_updates            | 10170       |
|    policy_gradient_loss | 0.0105      |
|    std                  | 0.201       |
|    value_loss           | 488         |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.07       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 301        |
|    iterations           | 47         |
|    time_elapsed         | 319        |
|    total_timesteps      | 96256      |
| train/                  |            |
|    approx_kl            | 0.01996864 |
|    clip_fraction        | 0.238      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.376      |
|    explained_variance   | -29.3      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0142     |
|    n_updates            | 10260      |
|    policy_gradient_loss | 0.00681    |
|    std                  | 0.204      |
|    value_loss           | 0.0346     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.05        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 383         |
|    iterations           | 8           |
|    time_elapsed         | 42          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.019714104 |
|    clip_fraction        | 0.196       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0547      |
|    explained_variance   | -9.55       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.042       |
|    n_updates            | 9870        |
|    policy_gradient_loss | 0.00353     |
|    std                  | 0.237       |
|    value_loss           | 0.00764     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.35        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 372         |
|    iterations           | 17          |
|    time_elapsed         | 93          |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.018098086 |
|    clip_fraction        | 0.182       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.107       |
|    explained_variance   | -2.01       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00858    |
|    n_updates            | 9960        |
|    policy_gradient_loss | 0.00593     |
|    std                  | 0.231       |
|    value_loss           | 0.00274     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.13      |
|    ep_rew_mean          | 1         |
| time/                   |           |
|    fps                  | 368       |
|    iterations           | 26        |
|    time_elapsed         | 144       |
|    total_timesteps      | 53248     |
| train/                  |           |
|    approx_kl            | 0.6537478 |
|    clip_fraction        | 0.362     |
|    clip_range           | 0.2       |
|    entropy_loss         | 0.0985    |
|    explained_variance   | -17.3     |
|    learning_rate        | 0.0003    |
|    loss                 | -0.0313   |
|    n_updates            | 10050     |
|    policy_gradient_loss | 0.0194    |
|    std                  | 0.233     |
|    value_loss           | 0.00447   |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.09    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.01        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 367         |
|    iterations           | 35          |
|    time_elapsed         | 194         |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.026005091 |
|    clip_fraction        | 0.196       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.153       |
|    explained_variance   | -30.1       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00561    |
|    n_updates            | 10140       |
|    policy_gradient_loss | 0.00976     |
|    std                  | 0.224       |
|    value_loss           | 0.00371     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.95        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 367         |
|    iterations           | 44          |
|    time_elapsed         | 245         |
|    total_timesteps      | 90112       |
| train/                  |             |
|    approx_kl            | 0.024416301 |
|    clip_fraction        | 0.178       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0666      |
|    explained_variance   | -23         |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0128      |
|    n_updates            | 10230       |
|    policy_gradient_loss | 0.00864     |
|    std                  | 0.236       |
|    value_loss           | 0.00538     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.24        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 418         |
|    iterations           | 5           |
|    time_elapsed         | 24          |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.030526882 |
|    clip_fraction        | 0.208       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.395       |
|    explained_variance   | -17.5       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00217     |
|    n_updates            | 9840        |
|    policy_gradient_loss | 0.00204     |
|    std                  | 0.202       |
|    value_loss           | 0.00579     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.28        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 397         |
|    iterations           | 14          |
|    time_elapsed         | 72          |
|    total_timesteps      | 28672       |
| train/                  |             |
|    approx_kl            | 0.032900944 |
|    clip_fraction        | 0.258       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.357       |
|    explained_variance   | -18         |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0284     |
|    n_updates            | 9930        |
|    policy_gradient_loss | 0.0046      |
|    std                  | 0.205       |
|    value_loss           | 0.00596     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.4         |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 392         |
|    iterations           | 23          |
|    time_elapsed         | 120         |
|    total_timesteps      | 47104       |
| train/                  |             |
|    approx_kl            | 0.019211885 |
|    clip_fraction        | 0.228       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.367       |
|    explained_variance   | -33.8       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00436     |
|    n_updates            | 10020       |
|    policy_gradient_loss | 0.00264     |
|    std                  | 0.206       |
|    value_loss           | 0.00467     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.05        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 389         |
|    iterations           | 32          |
|    time_elapsed         | 168         |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.015592384 |
|    clip_fraction        | 0.243       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.386       |
|    explained_variance   | -55.8       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0153     |
|    n_updates            | 10110       |
|    policy_gradient_loss | 0.0163      |
|    std                  | 0.205       |
|    value_loss           | 0.00527     |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.13        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 387         |
|    iterations           | 41          |
|    time_elapsed         | 216         |
|    total_timesteps      | 83968       |
| train/                  |             |
|    approx_kl            | 0.028692843 |
|    clip_fraction        | 0.179       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.416       |
|    explained_variance   | 3.59e-05    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.455       |
|    n_updates            | 10200       |
|    policy_gradient_loss | 0.00544     |
|    std                  | 0.198       |
|    value_loss           | 2.17e+03    |
-----------------------------------------
----------------------------------------
| rollout/                |        

--- 268.07624340057373 seconds ---


In [39]:
# Performance of models on focus polytopes only?
for p in [0.0, 0.05, 0.1, 0.5, 0.9, 1.0]:
    model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-700000-"+str(p))
    model.set_env(env)
    print("p=",p)
    
    print("Overall:")
    env.init_polytopes(1.0,[])
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_overall[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
    
    print("Focus Polytopes:")
    env.init_polytopes(0.0,retrain_polytopes)
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_polys[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
Overall:
mean_reward:0.99 +/- 3.17
Focus Polytopes:
mean_reward:0.98 +/- 4.48
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.05
Overall:
mean_reward:0.23 +/- 27.77
Focus Polytopes:
mean_reward:0.92 +/- 8.95
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.1
Overall:
mean_reward:0.99 +/- 3.17
Focus Polytopes:
mean_reward:0.96 +/- 6.33
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.5
Overall:
mean_reward:0.98 +/- 4.48
Focus Polytopes:
mean_reward:0.91 +/- 9.50
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.9
Overall:
mean_reward:0.99 +/- 3.17
Focus Polytopes:
mean_reward:0.80 +/- 14.15
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 1.0
Overall:
mean_reward:0.95 +/- 7.08
Focus Polytopes:
mean_reward:0.67 +/- 18.18


In [47]:
results_overall

{0.0: [(0.94, 7.75),
  (0.97998, 4.476563324649836),
  (0.93994, 7.7534800442382),
  (0.97998, 4.476563324649836),
  (0.91992, 8.952858046099026),
  (0.98999, 3.16542411058929),
  (0.98999, 3.1654241105892895)],
 0.05: [(0.97, 5.48),
  (0.67968, 17.903567105401088),
  (0.67968, 17.903567105401088),
  (0.84985, 12.258776650934628),
  (0.80981, 13.796521944457595),
  (0.03904, 30.999959933496687),
  (0.22923, 27.765926665737272)],
 0.1: [(0.94, 7.75),
  (0.86987, 11.412414125990173),
  (0.97998, 4.476563324649836),
  (1.0, 0.0),
  (1.0, 0.0),
  (0.97998, 4.476563324649836),
  (0.98999, 3.1654241105892895)],
 0.5: [(0.98, 4.48),
  (0.95996, 6.330753256793382),
  (0.95996, 6.330753256793382),
  (0.94995, 7.077961923993376),
  (1.0, 0.0),
  (0.94995, 7.077961923993375),
  (0.97998, 4.476563324649836)],
 0.9: [(0.97, 5.48),
  (0.96997, 5.4826205594678905),
  (0.98999, 3.1654241105892904),
  (0.98999, 3.1654241105892895),
  (1.0, 0.0),
  (1.0, 0.0),
  (0.98999, 3.1654241105892895)],
 1.0: [(0

In [48]:
results_polys

{0.0: [(0.58, 20.51),
  (0.87988, 10.964747657178437),
  (0.8999, 10.009499487486877),
  (0.95996, 6.330753256793382),
  (1.0, 0.0),
  (1.0, 0.0),
  (0.97998, 4.476563324649836)],
 0.05: [(0.58, 20.51),
  (0.77978, 14.845596086099071),
  (-0.83183, 42.78207836058342),
  (0.70971, 17.04394384278181),
  (0.57958, 20.510086957972653),
  (0.6997, 17.335227714397067),
  (0.91992, 8.952858046099026)],
 0.1: [(0.72, 16.75),
  (0.77978, 14.845596086099071),
  (0.84985, 12.258776650934628),
  (0.8999, 10.009499487486877),
  (0.95996, 6.330753256793382),
  (0.8999, 10.009499487486876),
  (0.95996, 6.330753256793382)],
 0.5: [(0.8, 14.15),
  (0.80981, 13.796521944457595),
  (0.8999, 10.009499487486876),
  (-0.39139, 37.294040058270696),
  (0.91992, 8.952858046099026),
  (0.70971, 17.043943842781815),
  (0.90991, 9.495892469478584)],
 0.9: [(0.48, 22.82),
  (0.40941, 24.307031769673152),
  (0.5996, 20.0159955995199),
  (0.42943, 23.89173538852086),
  (0.58959, 20.264549677500856),
  (0.81982, 13.4

In [11]:
results_overall={0.0: [(0.94, 7.75),
  (0.97998, 4.476563324649836),
  (0.93994, 7.7534800442382),
  (0.97998, 4.476563324649836),
  (0.91992, 8.952858046099026),
  (0.98999, 3.16542411058929),
  (0.98999, 3.1654241105892895)],
 0.05: [(0.97, 5.48),
  (0.67968, 17.903567105401088),
  (0.67968, 17.903567105401088),
  (0.84985, 12.258776650934628),
  (0.80981, 13.796521944457595),
  (0.03904, 30.999959933496687),
  (0.22923, 27.765926665737272)],
 0.1: [(0.94, 7.75),
  (0.86987, 11.412414125990173),
  (0.97998, 4.476563324649836),
  (1.0, 0.0),
  (1.0, 0.0),
  (0.97998, 4.476563324649836),
  (0.98999, 3.1654241105892895)],
 0.5: [(0.98, 4.48),
  (0.95996, 6.330753256793382),
  (0.95996, 6.330753256793382),
  (0.94995, 7.077961923993376),
  (1.0, 0.0),
  (0.94995, 7.077961923993375),
  (0.97998, 4.476563324649836)],
 0.9: [(0.97, 5.48),
  (0.96997, 5.4826205594678905),
  (0.98999, 3.1654241105892904),
  (0.98999, 3.1654241105892895),
  (1.0, 0.0),
  (1.0, 0.0),
  (0.98999, 3.1654241105892895)],
 1.0: [(0.97, 5.48),
  (0.94995, 7.077961923993375),
  (0.96997, 5.4826205594678905),
  (0.95996, 6.330753256793382),
  (0.96997, 5.4826205594678905),
  (0.8999, 10.009499487486877),
  (0.94995, 7.077961923993376)]}
results_polys={0.0: [(0.58, 20.51),
  (0.87988, 10.964747657178437),
  (0.8999, 10.009499487486877),
  (0.95996, 6.330753256793382),
  (1.0, 0.0),
  (1.0, 0.0),
  (0.97998, 4.476563324649836)],
 0.05: [(0.58, 20.51),
  (0.77978, 14.845596086099071),
  (-0.83183, 42.78207836058342),
  (0.70971, 17.04394384278181),
  (0.57958, 20.510086957972653),
  (0.6997, 17.335227714397067),
  (0.91992, 8.952858046099026)],
 0.1: [(0.72, 16.75),
  (0.77978, 14.845596086099071),
  (0.84985, 12.258776650934628),
  (0.8999, 10.009499487486877),
  (0.95996, 6.330753256793382),
  (0.8999, 10.009499487486876),
  (0.95996, 6.330753256793382)],
 0.5: [(0.8, 14.15),
  (0.80981, 13.796521944457595),
  (0.8999, 10.009499487486876),
  (-0.39139, 37.294040058270696),
  (0.91992, 8.952858046099026),
  (0.70971, 17.043943842781815),
  (0.90991, 9.495892469478584)],
 0.9: [(0.48, 22.82),
  (0.40941, 24.307031769673152),
  (0.5996, 20.0159955995199),
  (0.42943, 23.89173538852086),
  (0.58959, 20.264549677500856),
  (0.81982, 13.428615534283496),
  (0.7998, 14.154862060790277)],
 1.0: [(-0.29, 35.93),
  (0.65966, 18.45438995698314),
  (0.85986, 11.843162617324818),
  (0.54955, 21.2296383576711),
  (0.66967, 18.18106740791365),
  (0.46947, 23.03864292702806),
  (0.66967, 18.18106740791365)]}

In [12]:
eval_episode_length=100000
training_episode_length=100000
env.seed(2022)
torch.manual_seed(2022)

<torch._C.Generator at 0x7fca3e58f410>

In [13]:
for p in [0.0, 0.05, 0.1, 0.5, 0.9, 1.0]:
    model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-700000-"+str(p))
    model.set_env(env)
    
    print("p=",p)

    env.init_polytopes(p,retrain_polytopes)
    start_time = time.time()
    model=model.learn(total_timesteps=training_episode_length)
    print("--- %s seconds ---" % (time.time() - start_time))

    model.save("model_backup/zeppelin-avoidance-windsystem-small2-1400000-800000-"+str(p))

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.81     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    fps             | 289      |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.98        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 253         |
|    iterations           | 2           |
|    time_elapsed         | 16          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.031546764 |
|    clip_fraction        | 0.214       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.99        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 251         |
|    iterations           | 11          |
|    time_elapsed         | 89          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.018945675 |
|    clip_fraction        | 0.232       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.227       |
|    explained_variance   | -9.31       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00678     |
|    n_updates            | 10390       |
|    policy_gradient_loss | 0.00213     |
|    std                  | 0.217       |
|    value_loss           | 0.0016      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.91        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 256         |
|    iterations           | 20          |
|    time_elapsed         | 159         |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.018699173 |
|    clip_fraction        | 0.167       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.23        |
|    explained_variance   | -4.32       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.000358    |
|    n_updates            | 10480       |
|    policy_gradient_loss | 0.00353     |
|    std                  | 0.216       |
|    value_loss           | 0.00258     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.04        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 254         |
|    iterations           | 29          |
|    time_elapsed         | 233         |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.018536408 |
|    clip_fraction        | 0.214       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.194       |
|    explained_variance   | 0.00446     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0332      |
|    n_updates            | 10570       |
|    policy_gradient_loss | -0.00264    |
|    std                  | 0.22        |
|    value_loss           | 487         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.79        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 245         |
|    iterations           | 38          |
|    time_elapsed         | 316         |
|    total_timesteps      | 77824       |
| train/                  |             |
|    approx_kl            | 0.017131012 |
|    clip_fraction        | 0.196       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.218       |
|    explained_variance   | -90.5       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0114      |
|    n_updates            | 10660       |
|    policy_gradient_loss | 0.0037      |
|    std                  | 0.218       |
|    value_loss           | 0.00561     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.89        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 243         |
|    iterations           | 47          |
|    time_elapsed         | 395         |
|    total_timesteps      | 96256       |
| train/                  |             |
|    approx_kl            | 0.018780472 |
|    clip_fraction        | 0.225       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.223       |
|    explained_variance   | -26.1       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0106     |
|    n_updates            | 10750       |
|    policy_gradient_loss | 0.00933     |
|    std                  | 0.217       |
|    value_loss           | 0.00111     |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.1        |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 265        |
|    iterations           | 8          |
|    time_elapsed         | 61         |
|    total_timesteps      | 16384      |
| train/                  |            |
|    approx_kl            | 0.25803766 |
|    clip_fraction        | 0.35       |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.801      |
|    explained_variance   | -73.7      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0312    |
|    n_updates            | 10360      |
|    policy_gradient_loss | -0.00272   |
|    std                  | 0.162      |
|    value_loss           | 0.0104     |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.08        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 261         |
|    iterations           | 17          |
|    time_elapsed         | 133         |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.042565163 |
|    clip_fraction        | 0.246       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.843       |
|    explained_variance   | 0.000259    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00939     |
|    n_updates            | 10450       |
|    policy_gradient_loss | 0.00378     |
|    std                  | 0.157       |
|    value_loss           | 489         |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.96        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 250         |
|    iterations           | 26          |
|    time_elapsed         | 212         |
|    total_timesteps      | 53248       |
| train/                  |             |
|    approx_kl            | 0.070069686 |
|    clip_fraction        | 0.286       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.797       |
|    explained_variance   | -65.1       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0361      |
|    n_updates            | 10540       |
|    policy_gradient_loss | 0.00468     |
|    std                  | 0.163       |
|    value_loss           | 0.00427     |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.85        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 250         |
|    iterations           | 35          |
|    time_elapsed         | 286         |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.035807487 |
|    clip_fraction        | 0.23        |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.756       |
|    explained_variance   | 0.000674    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0275      |
|    n_updates            | 10630       |
|    policy_gradient_loss | 0.00795     |
|    std                  | 0.167       |
|    value_loss           | 1.41e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.85        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 250         |
|    iterations           | 44          |
|    time_elapsed         | 359         |
|    total_timesteps      | 90112       |
| train/                  |             |
|    approx_kl            | 0.024787556 |
|    clip_fraction        | 0.245       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.752       |
|    explained_variance   | -10.8       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00449     |
|    n_updates            | 10720       |
|    policy_gradient_loss | 0.00505     |
|    std                  | 0.166       |
|    value_loss           | 0.00377     |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.18       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 213        |
|    iterations           | 5          |
|    time_elapsed         | 47         |
|    total_timesteps      | 10240      |
| train/                  |            |
|    approx_kl            | 0.02476312 |
|    clip_fraction        | 0.225      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.59       |
|    explained_variance   | -3.82      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0601     |
|    n_updates            | 10330      |
|    policy_gradient_loss | 0.00415    |
|    std                  | 0.18       |
|    value_loss           | 0.00347    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.16       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 235        |
|    iterations           | 14         |
|    time_elapsed         | 121        |
|    total_timesteps      | 28672      |
| train/                  |            |
|    approx_kl            | 0.01768557 |
|    clip_fraction        | 0.212      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.537      |
|    explained_variance   | -22.3      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.01      |
|    n_updates            | 10420      |
|    policy_gradient_loss | 0.00592    |
|    std                  | 0.186      |
|    value_loss           | 0.00229    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.07        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 243         |
|    iterations           | 23          |
|    time_elapsed         | 193         |
|    total_timesteps      | 47104       |
| train/                  |             |
|    approx_kl            | 0.028685704 |
|    clip_fraction        | 0.177       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.487       |
|    explained_variance   | -14.8       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.000958    |
|    n_updates            | 10510       |
|    policy_gradient_loss | 0.00811     |
|    std                  | 0.189       |
|    value_loss           | 0.00493     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.08       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 240        |
|    iterations           | 32         |
|    time_elapsed         | 272        |
|    total_timesteps      | 65536      |
| train/                  |            |
|    approx_kl            | 0.04602153 |
|    clip_fraction        | 0.266      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.497      |
|    explained_variance   | -277       |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0155    |
|    n_updates            | 10600      |
|    policy_gradient_loss | 0.0102     |
|    std                  | 0.189      |
|    value_loss           | 0.0302     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.96        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 243         |
|    iterations           | 41          |
|    time_elapsed         | 344         |
|    total_timesteps      | 83968       |
| train/                  |             |
|    approx_kl            | 0.023596622 |
|    clip_fraction        | 0.226       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.458       |
|    explained_variance   | -183        |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0114     |
|    n_updates            | 10690       |
|    policy_gradient_loss | 0.0046      |
|    std                  | 0.192       |
|    value_loss           | 0.00628     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

--- 408.49490880966187 seconds ---
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.5
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.07     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    fps             | 460      |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 2048     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.97       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 366        |
|    iterations           | 2          |
|    time_elapsed         | 11         |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.02942756 |
|    clip_fraction        | 0.271      |
|    clip_range           | 0.2        |
|    ent

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.34        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 323         |
|    iterations           | 11          |
|    time_elapsed         | 69          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.033636775 |
|    clip_fraction        | 0.234       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.31        |
|    explained_variance   | -12.5       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00313     |
|    n_updates            | 10390       |
|    policy_gradient_loss | 0.0066      |
|    std                  | 0.211       |
|    value_loss           | 0.00868     |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.1         |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 316         |
|    iterations           | 21          |
|    time_elapsed         | 136         |
|    total_timesteps      | 43008       |
| train/                  |             |
|    approx_kl            | 0.029822707 |
|    clip_fraction        | 0.256       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.191       |
|    explained_variance   | 0.000121    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.004       |
|    n_updates            | 10490       |
|    policy_gradient_loss | 0.0153      |
|    std                  | 0.222       |
|    value_loss           | 489         |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.2        |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 304        |
|    iterations           | 30         |
|    time_elapsed         | 201        |
|    total_timesteps      | 61440      |
| train/                  |            |
|    approx_kl            | 0.01746531 |
|    clip_fraction        | 0.23       |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.179      |
|    explained_variance   | -12        |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0247    |
|    n_updates            | 10580      |
|    policy_gradient_loss | 0.0044     |
|    std                  | 0.222      |
|    value_loss           | 0.0057     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.19        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 288         |
|    iterations           | 40          |
|    time_elapsed         | 283         |
|    total_timesteps      | 81920       |
| train/                  |             |
|    approx_kl            | 0.018817136 |
|    clip_fraction        | 0.219       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.058       |
|    explained_variance   | -5.02       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0222     |
|    n_updates            | 10680       |
|    policy_gradient_loss | 0.0164      |
|    std                  | 0.238       |
|    value_loss           | 0.00229     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.32        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 277         |
|    iterations           | 49          |
|    time_elapsed         | 361         |
|    total_timesteps      | 100352      |
| train/                  |             |
|    approx_kl            | 0.050652545 |
|    clip_fraction        | 0.237       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0457      |
|    explained_variance   | -9.89       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.000799   |
|    n_updates            | 10770       |
|    policy_gradient_loss | 0.00212     |
|    std                  | 0.237       |
|    value_loss           | 0.006       |
-----------------------------------------
--- 363.3245658874512 seconds ---
Wrapping the env with a `Monitor` wrapper


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.08        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 317         |
|    iterations           | 10          |
|    time_elapsed         | 64          |
|    total_timesteps      | 20480       |
| train/                  |             |
|    approx_kl            | 0.014588771 |
|    clip_fraction        | 0.162       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0827      |
|    explained_variance   | -25.2       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0158      |
|    n_updates            | 10380       |
|    policy_gradient_loss | 0.00316     |
|    std                  | 0.233       |
|    value_loss           | 0.00406     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.87        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 306         |
|    iterations           | 19          |
|    time_elapsed         | 127         |
|    total_timesteps      | 38912       |
| train/                  |             |
|    approx_kl            | 0.037605613 |
|    clip_fraction        | 0.192       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0941      |
|    explained_variance   | -54.8       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00949     |
|    n_updates            | 10470       |
|    policy_gradient_loss | 0.00471     |
|    std                  | 0.231       |
|    value_loss           | 0.0148      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.13        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 290         |
|    iterations           | 28          |
|    time_elapsed         | 197         |
|    total_timesteps      | 57344       |
| train/                  |             |
|    approx_kl            | 0.026900677 |
|    clip_fraction        | 0.201       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.233       |
|    explained_variance   | -14.1       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00559     |
|    n_updates            | 10560       |
|    policy_gradient_loss | 0.00975     |
|    std                  | 0.215       |
|    value_loss           | 0.00305     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.31        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 291         |
|    iterations           | 37          |
|    time_elapsed         | 260         |
|    total_timesteps      | 75776       |
| train/                  |             |
|    approx_kl            | 0.017909618 |
|    clip_fraction        | 0.204       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.207       |
|    explained_variance   | -120        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0385      |
|    n_updates            | 10650       |
|    policy_gradient_loss | 0.0189      |
|    std                  | 0.219       |
|    value_loss           | 0.00959     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.06       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 301        |
|    iterations           | 46         |
|    time_elapsed         | 312        |
|    total_timesteps      | 94208      |
| train/                  |            |
|    approx_kl            | 0.25684476 |
|    clip_fraction        | 0.239      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.224      |
|    explained_variance   | -11.4      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0336     |
|    n_updates            | 10740      |
|    policy_gradient_loss | 0.0124     |
|    std                  | 0.216      |
|    value_loss           | 0.00222    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.08        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 368         |
|    iterations           | 7           |
|    time_elapsed         | 38          |
|    total_timesteps      | 14336       |
| train/                  |             |
|    approx_kl            | 0.047608532 |
|    clip_fraction        | 0.237       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.598       |
|    explained_variance   | -14.6       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00585    |
|    n_updates            | 10350       |
|    policy_gradient_loss | 0.000434    |
|    std                  | 0.179       |
|    value_loss           | 0.00601     |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.16        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 317         |
|    iterations           | 16          |
|    time_elapsed         | 103         |
|    total_timesteps      | 32768       |
| train/                  |             |
|    approx_kl            | 0.031840414 |
|    clip_fraction        | 0.245       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.538       |
|    explained_variance   | -139        |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0191     |
|    n_updates            | 10440       |
|    policy_gradient_loss | 0.0145      |
|    std                  | 0.187       |
|    value_loss           | 0.00301     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.96        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 314         |
|    iterations           | 25          |
|    time_elapsed         | 162         |
|    total_timesteps      | 51200       |
| train/                  |             |
|    approx_kl            | 0.030737244 |
|    clip_fraction        | 0.192       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.434       |
|    explained_variance   | -3.96       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.000563    |
|    n_updates            | 10530       |
|    policy_gradient_loss | 0.00569     |
|    std                  | 0.195       |
|    value_loss           | 0.00275     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.21       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 325        |
|    iterations           | 34         |
|    time_elapsed         | 213        |
|    total_timesteps      | 69632      |
| train/                  |            |
|    approx_kl            | 0.01729393 |
|    clip_fraction        | 0.212      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.465      |
|    explained_variance   | -29.8      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0433    |
|    n_updates            | 10620      |
|    policy_gradient_loss | 0.00615    |
|    std                  | 0.193      |
|    value_loss           | 0.00756    |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.21        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 333         |
|    iterations           | 43          |
|    time_elapsed         | 263         |
|    total_timesteps      | 88064       |
| train/                  |             |
|    approx_kl            | 0.024311077 |
|    clip_fraction        | 0.239       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.45        |
|    explained_variance   | -24.4       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00465     |
|    n_updates            | 10710       |
|    policy_gradient_loss | 0.0014      |
|    std                  | 0.196       |
|    value_loss           | 0.0101      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

In [14]:
# Performance of models on focus polytopes only?
for p in [0.0, 0.05, 0.1, 0.5, 0.9, 1.0]:
    model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-800000-"+str(p))
    model.set_env(env)
    print("p=",p)
    
    print("Overall:")
    env.init_polytopes(1.0,[])
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_overall[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
    
    print("Focus Polytopes:")
    env.init_polytopes(0.0,retrain_polytopes)
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_polys[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
Overall:




mean_reward:0.99 +/- 3.17
Focus Polytopes:
mean_reward:0.99 +/- 3.17
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.05
Overall:
mean_reward:0.35 +/- 25.51
Focus Polytopes:
mean_reward:1.00 +/- 0.00
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.1
Overall:
mean_reward:0.95 +/- 7.08
Focus Polytopes:
mean_reward:0.98 +/- 4.48
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.5
Overall:
mean_reward:0.99 +/- 3.17
Focus Polytopes:
mean_reward:0.99 +/- 3.17
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.9
Overall:
mean_reward:0.96 +/- 6.33
Focus Polytopes:
mean_reward:0.77 +/- 15.18
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 1.0
Overall:
mean_reward:0.86 +/- 11.84
Focus Polytopes:
mean_reward:0.34 +/- 25.71


In [15]:
for p in [0.0, 0.05, 0.1, 0.5, 0.9, 1.0]:
    model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-800000-"+str(p))
    model.set_env(env)
    
    print("p=",p)

    env.init_polytopes(p,retrain_polytopes)
    start_time = time.time()
    model=model.learn(total_timesteps=training_episode_length)
    print("--- %s seconds ---" % (time.time() - start_time))

    model.save("model_backup/zeppelin-avoidance-windsystem-small2-1400000-900000-"+str(p))

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.03     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    fps             | 190      |
|    iterations      | 1        |
|    time_elapsed    | 10       |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.87        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 175         |
|    iterations           | 2           |
|    time_elapsed         | 23          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.031140193 |
|    clip_fraction        | 0.19        |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.02        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 207         |
|    iterations           | 11          |
|    time_elapsed         | 108         |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.013727346 |
|    clip_fraction        | 0.193       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.205       |
|    explained_variance   | -3.23       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0241      |
|    n_updates            | 10880       |
|    policy_gradient_loss | 0.00161     |
|    std                  | 0.218       |
|    value_loss           | 0.00295     |
-----------------------------------------
---------------------------------------
| rollout/                |         

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.11        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 215         |
|    iterations           | 20          |
|    time_elapsed         | 190         |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.015694097 |
|    clip_fraction        | 0.206       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.279       |
|    explained_variance   | -47.4       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0156      |
|    n_updates            | 10970       |
|    policy_gradient_loss | 0.00861     |
|    std                  | 0.211       |
|    value_loss           | 0.000751    |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.12        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 219         |
|    iterations           | 29          |
|    time_elapsed         | 270         |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.029419012 |
|    clip_fraction        | 0.208       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.381       |
|    explained_variance   | -26.1       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0254     |
|    n_updates            | 11060       |
|    policy_gradient_loss | 0.00799     |
|    std                  | 0.199       |
|    value_loss           | 0.00103     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.03        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 217         |
|    iterations           | 38          |
|    time_elapsed         | 357         |
|    total_timesteps      | 77824       |
| train/                  |             |
|    approx_kl            | 0.017380767 |
|    clip_fraction        | 0.178       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.278       |
|    explained_variance   | -12.5       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0123     |
|    n_updates            | 11150       |
|    policy_gradient_loss | 0.00655     |
|    std                  | 0.21        |
|    value_loss           | 0.00196     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.96        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 208         |
|    iterations           | 47          |
|    time_elapsed         | 461         |
|    total_timesteps      | 96256       |
| train/                  |             |
|    approx_kl            | 0.026513025 |
|    clip_fraction        | 0.193       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.23        |
|    explained_variance   | -16.3       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0316      |
|    n_updates            | 11240       |
|    policy_gradient_loss | -0.00133    |
|    std                  | 0.216       |
|    value_loss           | 0.00443     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.02        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 238         |
|    iterations           | 8           |
|    time_elapsed         | 68          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.020146068 |
|    clip_fraction        | 0.215       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.685       |
|    explained_variance   | -28.4       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0153     |
|    n_updates            | 10850       |
|    policy_gradient_loss | 0.0112      |
|    std                  | 0.172       |
|    value_loss           | 0.00385     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.15      |
|    ep_rew_mean          | 1         |
| time/                   |           |
|    fps                  | 230       |
|    iterations           | 17        |
|    time_elapsed         | 151       |
|    total_timesteps      | 34816     |
| train/                  |           |
|    approx_kl            | 0.0474956 |
|    clip_fraction        | 0.279     |
|    clip_range           | 0.2       |
|    entropy_loss         | 0.737     |
|    explained_variance   | -16.7     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.00478   |
|    n_updates            | 10940     |
|    policy_gradient_loss | 0.00614   |
|    std                  | 0.167     |
|    value_loss           | 0.00245   |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.19    

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.97       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 237        |
|    iterations           | 27         |
|    time_elapsed         | 233        |
|    total_timesteps      | 55296      |
| train/                  |            |
|    approx_kl            | 0.01712412 |
|    clip_fraction        | 0.204      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.778      |
|    explained_variance   | 7.12e-05   |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0205     |
|    n_updates            | 11040      |
|    policy_gradient_loss | 0.00685    |
|    std                  | 0.164      |
|    value_loss           | 489        |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.08        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 228         |
|    iterations           | 36          |
|    time_elapsed         | 323         |
|    total_timesteps      | 73728       |
| train/                  |             |
|    approx_kl            | 0.054538324 |
|    clip_fraction        | 0.28        |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.776       |
|    explained_variance   | -18.9       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0203      |
|    n_updates            | 11130       |
|    policy_gradient_loss | 0.00221     |
|    std                  | 0.165       |
|    value_loss           | 0.0117      |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.13       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 219        |
|    iterations           | 45         |
|    time_elapsed         | 419        |
|    total_timesteps      | 92160      |
| train/                  |            |
|    approx_kl            | 0.07746325 |
|    clip_fraction        | 0.279      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.717      |
|    explained_variance   | -27        |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0121    |
|    n_updates            | 11220      |
|    policy_gradient_loss | 0.0119     |
|    std                  | 0.168      |
|    value_loss           | 0.00627    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2           |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 193         |
|    iterations           | 6           |
|    time_elapsed         | 63          |
|    total_timesteps      | 12288       |
| train/                  |             |
|    approx_kl            | 0.026017023 |
|    clip_fraction        | 0.249       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.506       |
|    explained_variance   | -20.7       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0301      |
|    n_updates            | 10830       |
|    policy_gradient_loss | 0.00732     |
|    std                  | 0.187       |
|    value_loss           | 0.00235     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.02        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 181         |
|    iterations           | 15          |
|    time_elapsed         | 168         |
|    total_timesteps      | 30720       |
| train/                  |             |
|    approx_kl            | 0.025183588 |
|    clip_fraction        | 0.265       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.46        |
|    explained_variance   | -1.98       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.04e-05    |
|    n_updates            | 10920       |
|    policy_gradient_loss | 0.00653     |
|    std                  | 0.193       |
|    value_loss           | 0.00317     |
-----------------------------------------
---------------------------------------
| rollout/                |         

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.21       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 199        |
|    iterations           | 25         |
|    time_elapsed         | 257        |
|    total_timesteps      | 51200      |
| train/                  |            |
|    approx_kl            | 0.08189736 |
|    clip_fraction        | 0.306      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.486      |
|    explained_variance   | -48.6      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.016      |
|    n_updates            | 11020      |
|    policy_gradient_loss | 0.00654    |
|    std                  | 0.189      |
|    value_loss           | 0.0514     |
----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.89        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 206         |
|    iterations           | 34          |
|    time_elapsed         | 337         |
|    total_timesteps      | 69632       |
| train/                  |             |
|    approx_kl            | 0.039571136 |
|    clip_fraction        | 0.263       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.484       |
|    explained_variance   | -202        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.046       |
|    n_updates            | 11110       |
|    policy_gradient_loss | 0.00513     |
|    std                  | 0.191       |
|    value_loss           | 0.00513     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.19        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 210         |
|    iterations           | 43          |
|    time_elapsed         | 418         |
|    total_timesteps      | 88064       |
| train/                  |             |
|    approx_kl            | 0.020359725 |
|    clip_fraction        | 0.222       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.504       |
|    explained_variance   | -12.4       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0126      |
|    n_updates            | 11200       |
|    policy_gradient_loss | 0.00658     |
|    std                  | 0.187       |
|    value_loss           | 0.00373     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.28        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 304         |
|    iterations           | 4           |
|    time_elapsed         | 26          |
|    total_timesteps      | 8192        |
| train/                  |             |
|    approx_kl            | 0.030400727 |
|    clip_fraction        | 0.237       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.0809      |
|    explained_variance   | -78.4       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0306     |
|    n_updates            | 10810       |
|    policy_gradient_loss | 0.0114      |
|    std                  | 0.233       |
|    value_loss           | 0.0114      |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.39        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 281         |
|    iterations           | 13          |
|    time_elapsed         | 94          |
|    total_timesteps      | 26624       |
| train/                  |             |
|    approx_kl            | 0.024820052 |
|    clip_fraction        | 0.227       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.11        |
|    explained_variance   | -71.5       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0087     |
|    n_updates            | 10900       |
|    policy_gradient_loss | 0.0101      |
|    std                  | 0.231       |
|    value_loss           | 0.00467     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.12        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 281         |
|    iterations           | 22          |
|    time_elapsed         | 160         |
|    total_timesteps      | 45056       |
| train/                  |             |
|    approx_kl            | 0.022791967 |
|    clip_fraction        | 0.231       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.116       |
|    explained_variance   | -74.6       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00543     |
|    n_updates            | 10990       |
|    policy_gradient_loss | 0.00272     |
|    std                  | 0.231       |
|    value_loss           | 0.00498     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.09        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 273         |
|    iterations           | 31          |
|    time_elapsed         | 232         |
|    total_timesteps      | 63488       |
| train/                  |             |
|    approx_kl            | 0.022112463 |
|    clip_fraction        | 0.254       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.139       |
|    explained_variance   | -34.9       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0204     |
|    n_updates            | 11080       |
|    policy_gradient_loss | 0.0114      |
|    std                  | 0.229       |
|    value_loss           | 0.00364     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.38        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 269         |
|    iterations           | 40          |
|    time_elapsed         | 303         |
|    total_timesteps      | 81920       |
| train/                  |             |
|    approx_kl            | 0.018236045 |
|    clip_fraction        | 0.203       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.216       |
|    explained_variance   | -14.1       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00105    |
|    n_updates            | 11170       |
|    policy_gradient_loss | 0.00562     |
|    std                  | 0.218       |
|    value_loss           | 0.00384     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.08        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 276         |
|    iterations           | 49          |
|    time_elapsed         | 362         |
|    total_timesteps      | 100352      |
| train/                  |             |
|    approx_kl            | 0.044944584 |
|    clip_fraction        | 0.275       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.26        |
|    explained_variance   | -70.1       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0231      |
|    n_updates            | 11260       |
|    policy_gradient_loss | 0.0116      |
|    std                  | 0.215       |
|    value_loss           | 0.00931     |
-----------------------------------------
--- 364.27524971961975 seconds ---
Wrapping the env with a `Monitor` wrapper

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.15        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 390         |
|    iterations           | 10          |
|    time_elapsed         | 52          |
|    total_timesteps      | 20480       |
| train/                  |             |
|    approx_kl            | 0.017527051 |
|    clip_fraction        | 0.202       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.231       |
|    explained_variance   | -3.45e-05   |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0348      |
|    n_updates            | 10870       |
|    policy_gradient_loss | 0.00581     |
|    std                  | 0.215       |
|    value_loss           | 489         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.14        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 382         |
|    iterations           | 19          |
|    time_elapsed         | 101         |
|    total_timesteps      | 38912       |
| train/                  |             |
|    approx_kl            | 0.024409655 |
|    clip_fraction        | 0.192       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.221       |
|    explained_variance   | -43.4       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.000588    |
|    n_updates            | 10960       |
|    policy_gradient_loss | 0.00658     |
|    std                  | 0.216       |
|    value_loss           | 0.00747     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.2         |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 379         |
|    iterations           | 28          |
|    time_elapsed         | 150         |
|    total_timesteps      | 57344       |
| train/                  |             |
|    approx_kl            | 0.031007793 |
|    clip_fraction        | 0.164       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.201       |
|    explained_variance   | -36.2       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0441     |
|    n_updates            | 11050       |
|    policy_gradient_loss | 0.011       |
|    std                  | 0.217       |
|    value_loss           | 0.0045      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.04        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 378         |
|    iterations           | 37          |
|    time_elapsed         | 200         |
|    total_timesteps      | 75776       |
| train/                  |             |
|    approx_kl            | 0.057588175 |
|    clip_fraction        | 0.259       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.255       |
|    explained_variance   | -74.1       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0342      |
|    n_updates            | 11140       |
|    policy_gradient_loss | 0.015       |
|    std                  | 0.212       |
|    value_loss           | 0.00919     |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.08       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 378        |
|    iterations           | 46         |
|    time_elapsed         | 249        |
|    total_timesteps      | 94208      |
| train/                  |            |
|    approx_kl            | 0.02915828 |
|    clip_fraction        | 0.219      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.192      |
|    explained_variance   | -144       |
|    learning_rate        | 0.0003     |
|    loss                 | -0.00942   |
|    n_updates            | 11230      |
|    policy_gradient_loss | 0.00625    |
|    std                  | 0.219      |
|    value_loss           | 0.0185     |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.17        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 413         |
|    iterations           | 7           |
|    time_elapsed         | 34          |
|    total_timesteps      | 14336       |
| train/                  |             |
|    approx_kl            | 0.020117857 |
|    clip_fraction        | 0.191       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.474       |
|    explained_variance   | -37.7       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00634     |
|    n_updates            | 10840       |
|    policy_gradient_loss | 0.0013      |
|    std                  | 0.193       |
|    value_loss           | 0.0128      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.07       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 403        |
|    iterations           | 16         |
|    time_elapsed         | 81         |
|    total_timesteps      | 32768      |
| train/                  |            |
|    approx_kl            | 0.04240513 |
|    clip_fraction        | 0.244      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.522      |
|    explained_variance   | -108       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0154     |
|    n_updates            | 10930      |
|    policy_gradient_loss | 0.0188     |
|    std                  | 0.187      |
|    value_loss           | 0.0051     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.15        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 400         |
|    iterations           | 25          |
|    time_elapsed         | 127         |
|    total_timesteps      | 51200       |
| train/                  |             |
|    approx_kl            | 0.029164337 |
|    clip_fraction        | 0.259       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.562       |
|    explained_variance   | -116        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00276     |
|    n_updates            | 11020       |
|    policy_gradient_loss | 0.00566     |
|    std                  | 0.185       |
|    value_loss           | 0.017       |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.15       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 397        |
|    iterations           | 35         |
|    time_elapsed         | 180        |
|    total_timesteps      | 71680      |
| train/                  |            |
|    approx_kl            | 0.03285235 |
|    clip_fraction        | 0.203      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.57       |
|    explained_variance   | -30.3      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0258    |
|    n_updates            | 11120      |
|    policy_gradient_loss | 0.00608    |
|    std                  | 0.183      |
|    value_loss           | 0.00469    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.18       |
|    ep_rew_mean          | -9.01      |
| time/                   |            |
|    fps                  | 397        |
|    iterations           | 44         |
|    time_elapsed         | 226        |
|    total_timesteps      | 90112      |
| train/                  |            |
|    approx_kl            | 0.03711408 |
|    clip_fraction        | 0.225      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.622      |
|    explained_variance   | -13.3      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.00465   |
|    n_updates            | 11210      |
|    policy_gradient_loss | 0.0127     |
|    std                  | 0.178      |
|    value_loss           | 0.00404    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

In [16]:
# Performance of models on focus polytopes only?
for p in [0.0, 0.05, 0.1, 0.5, 0.9, 1.0]:
    model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-900000-"+str(p))
    model.set_env(env)
    print("p=",p)
    
    print("Overall:")
    env.init_polytopes(1.0,[])
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_overall[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
    
    print("Focus Polytopes:")
    env.init_polytopes(0.0,retrain_polytopes)
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_polys[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
Overall:
mean_reward:0.97 +/- 5.48
Focus Polytopes:
mean_reward:0.98 +/- 4.48
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.05
Overall:
mean_reward:0.47 +/- 23.04
Focus Polytopes:
mean_reward:1.00 +/- 0.00
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.1
Overall:
mean_reward:0.85 +/- 12.26
Focus Polytopes:
mean_reward:0.99 +/- 3.17
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.5
Overall:
mean_reward:0.99 +/- 3.17
Focus Polytopes:
mean_reward:0.98 +/- 4.48
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.9
Overall:
mean_reward:0.99 +/- 3.17
Focus Polytopes:
mean_reward:0.86 +/- 11.84
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 1.0
Overall:
mean_reward:0.90 +/- 10.01
Focus Polytopes:
mean_reward:0.98 +/- 4.48


In [17]:
for p in [0.0, 0.05, 0.1, 0.5, 0.9, 1.0]:
    model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-900000-"+str(p))
    model.set_env(env)
    
    print("p=",p)

    env.init_polytopes(p,retrain_polytopes)
    start_time = time.time()
    model=model.learn(total_timesteps=training_episode_length)
    print("--- %s seconds ---" % (time.time() - start_time))

    model.save("model_backup/zeppelin-avoidance-windsystem-small2-1400000-1000000-"+str(p))

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.81     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    fps             | 304      |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.01        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 269         |
|    iterations           | 2           |
|    time_elapsed         | 15          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.016252391 |
|    clip_fraction        | 0.21        |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.01        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 245         |
|    iterations           | 11          |
|    time_elapsed         | 91          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.019787671 |
|    clip_fraction        | 0.199       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.212       |
|    explained_variance   | -0.737      |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00761     |
|    n_updates            | 11370       |
|    policy_gradient_loss | 0.00673     |
|    std                  | 0.217       |
|    value_loss           | 0.00123     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.85       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 242        |
|    iterations           | 20         |
|    time_elapsed         | 169        |
|    total_timesteps      | 40960      |
| train/                  |            |
|    approx_kl            | 0.02297139 |
|    clip_fraction        | 0.182      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.275      |
|    explained_variance   | -1.88      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.00694    |
|    n_updates            | 11460      |
|    policy_gradient_loss | 0.00553    |
|    std                  | 0.211      |
|    value_loss           | 0.0032     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.99        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 237         |
|    iterations           | 29          |
|    time_elapsed         | 250         |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.036941633 |
|    clip_fraction        | 0.238       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.261       |
|    explained_variance   | -0.758      |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0223     |
|    n_updates            | 11550       |
|    policy_gradient_loss | 0.00936     |
|    std                  | 0.212       |
|    value_loss           | 0.002       |
-----------------------------------------
---------------------------------------
| rollout/                |         

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.15        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 236         |
|    iterations           | 38          |
|    time_elapsed         | 329         |
|    total_timesteps      | 77824       |
| train/                  |             |
|    approx_kl            | 0.020359876 |
|    clip_fraction        | 0.199       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.245       |
|    explained_variance   | -8.57       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.000654   |
|    n_updates            | 11640       |
|    policy_gradient_loss | 0.00311     |
|    std                  | 0.214       |
|    value_loss           | 0.00366     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.32        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 232         |
|    iterations           | 47          |
|    time_elapsed         | 413         |
|    total_timesteps      | 96256       |
| train/                  |             |
|    approx_kl            | 0.022533529 |
|    clip_fraction        | 0.231       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.32        |
|    explained_variance   | -5.29       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00973     |
|    n_updates            | 11730       |
|    policy_gradient_loss | 0.00917     |
|    std                  | 0.207       |
|    value_loss           | 0.00197     |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.01       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 237        |
|    iterations           | 8          |
|    time_elapsed         | 68         |
|    total_timesteps      | 16384      |
| train/                  |            |
|    approx_kl            | 0.02560703 |
|    clip_fraction        | 0.28       |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.683      |
|    explained_variance   | -16.3      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.00451   |
|    n_updates            | 11340      |
|    policy_gradient_loss | 0.0111     |
|    std                  | 0.174      |
|    value_loss           | 0.000784   |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.42       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 239        |
|    iterations           | 18         |
|    time_elapsed         | 153        |
|    total_timesteps      | 36864      |
| train/                  |            |
|    approx_kl            | 0.02487193 |
|    clip_fraction        | 0.233      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.685      |
|    explained_variance   | -0.637     |
|    learning_rate        | 0.0003     |
|    loss                 | 0.00155    |
|    n_updates            | 11440      |
|    policy_gradient_loss | 0.0107     |
|    std                  | 0.172      |
|    value_loss           | 0.00329    |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.13        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 238         |
|    iterations           | 28          |
|    time_elapsed         | 239         |
|    total_timesteps      | 57344       |
| train/                  |             |
|    approx_kl            | 0.038358144 |
|    clip_fraction        | 0.256       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.701       |
|    explained_variance   | 0.395       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0107      |
|    n_updates            | 11540       |
|    policy_gradient_loss | 0.00546     |
|    std                  | 0.171       |
|    value_loss           | 0.00498     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.08        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 237         |
|    iterations           | 37          |
|    time_elapsed         | 319         |
|    total_timesteps      | 75776       |
| train/                  |             |
|    approx_kl            | 0.048023425 |
|    clip_fraction        | 0.274       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.701       |
|    explained_variance   | -0.562      |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00869     |
|    n_updates            | 11630       |
|    policy_gradient_loss | 0.00742     |
|    std                  | 0.171       |
|    value_loss           | 0.00165     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.34        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 236         |
|    iterations           | 46          |
|    time_elapsed         | 397         |
|    total_timesteps      | 94208       |
| train/                  |             |
|    approx_kl            | 0.031713754 |
|    clip_fraction        | 0.24        |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.63        |
|    explained_variance   | 0.00541     |
|    learning_rate        | 0.0003      |
|    loss                 | -0.000456   |
|    n_updates            | 11720       |
|    policy_gradient_loss | 0.00819     |
|    std                  | 0.178       |
|    value_loss           | 0.00518     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.08        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 236         |
|    iterations           | 7           |
|    time_elapsed         | 60          |
|    total_timesteps      | 14336       |
| train/                  |             |
|    approx_kl            | 0.023295922 |
|    clip_fraction        | 0.221       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.588       |
|    explained_variance   | -35.1       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0199     |
|    n_updates            | 11330       |
|    policy_gradient_loss | 0.00223     |
|    std                  | 0.18        |
|    value_loss           | 0.00649     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.08       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 232        |
|    iterations           | 16         |
|    time_elapsed         | 140        |
|    total_timesteps      | 32768      |
| train/                  |            |
|    approx_kl            | 0.08690071 |
|    clip_fraction        | 0.252      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.589      |
|    explained_variance   | -78.4      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0472     |
|    n_updates            | 11420      |
|    policy_gradient_loss | 0.0344     |
|    std                  | 0.181      |
|    value_loss           | 0.00818    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.07        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 233         |
|    iterations           | 25          |
|    time_elapsed         | 219         |
|    total_timesteps      | 51200       |
| train/                  |             |
|    approx_kl            | 0.041751303 |
|    clip_fraction        | 0.233       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.538       |
|    explained_variance   | -49         |
|    learning_rate        | 0.0003      |
|    loss                 | 0.104       |
|    n_updates            | 11510       |
|    policy_gradient_loss | 0.0101      |
|    std                  | 0.186       |
|    value_loss           | 0.00492     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.26        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 232         |
|    iterations           | 34          |
|    time_elapsed         | 298         |
|    total_timesteps      | 69632       |
| train/                  |             |
|    approx_kl            | 0.018746532 |
|    clip_fraction        | 0.251       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.506       |
|    explained_variance   | -3.93       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00265     |
|    n_updates            | 11600       |
|    policy_gradient_loss | 0.00694     |
|    std                  | 0.187       |
|    value_loss           | 0.00495     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.86       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 230        |
|    iterations           | 43         |
|    time_elapsed         | 382        |
|    total_timesteps      | 88064      |
| train/                  |            |
|    approx_kl            | 0.03789355 |
|    clip_fraction        | 0.245      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.606      |
|    explained_variance   | -81.4      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0294     |
|    n_updates            | 11690      |
|    policy_gradient_loss | 0.0112     |
|    std                  | 0.178      |
|    value_loss           | 0.0192     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.55        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 285         |
|    iterations           | 4           |
|    time_elapsed         | 28          |
|    total_timesteps      | 8192        |
| train/                  |             |
|    approx_kl            | 0.028081842 |
|    clip_fraction        | 0.245       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.289       |
|    explained_variance   | -42.6       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00354    |
|    n_updates            | 11300       |
|    policy_gradient_loss | 0.0123      |
|    std                  | 0.211       |
|    value_loss           | 0.0118      |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.22        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 267         |
|    iterations           | 13          |
|    time_elapsed         | 99          |
|    total_timesteps      | 26624       |
| train/                  |             |
|    approx_kl            | 0.025502432 |
|    clip_fraction        | 0.2         |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.408       |
|    explained_variance   | -23.9       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.000541    |
|    n_updates            | 11390       |
|    policy_gradient_loss | 0.00651     |
|    std                  | 0.2         |
|    value_loss           | 0.00872     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.25        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 262         |
|    iterations           | 22          |
|    time_elapsed         | 171         |
|    total_timesteps      | 45056       |
| train/                  |             |
|    approx_kl            | 0.030302254 |
|    clip_fraction        | 0.215       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.411       |
|    explained_variance   | -14.8       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00701    |
|    n_updates            | 11480       |
|    policy_gradient_loss | 0.00727     |
|    std                  | 0.197       |
|    value_loss           | 0.00812     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.92        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 248         |
|    iterations           | 31          |
|    time_elapsed         | 255         |
|    total_timesteps      | 63488       |
| train/                  |             |
|    approx_kl            | 0.017979113 |
|    clip_fraction        | 0.221       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.498       |
|    explained_variance   | 0.00404     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0201      |
|    n_updates            | 11570       |
|    policy_gradient_loss | 0.00972     |
|    std                  | 0.189       |
|    value_loss           | 486         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.26        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 253         |
|    iterations           | 40          |
|    time_elapsed         | 322         |
|    total_timesteps      | 81920       |
| train/                  |             |
|    approx_kl            | 0.022790153 |
|    clip_fraction        | 0.224       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.446       |
|    explained_variance   | -40.8       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00566     |
|    n_updates            | 11660       |
|    policy_gradient_loss | 0.00452     |
|    std                  | 0.196       |
|    value_loss           | 0.00394     |
-----------------------------------------
---------------------------------------
| rollout/                |         

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.03        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 242         |
|    iterations           | 49          |
|    time_elapsed         | 413         |
|    total_timesteps      | 100352      |
| train/                  |             |
|    approx_kl            | 0.046158094 |
|    clip_fraction        | 0.229       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.446       |
|    explained_variance   | -10.5       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0143      |
|    n_updates            | 11750       |
|    policy_gradient_loss | 0.0155      |
|    std                  | 0.195       |
|    value_loss           | 0.00521     |
-----------------------------------------
--- 417.03309750556946 seconds ---
Wrapping the env with a `Monitor` wrapper

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.24      |
|    ep_rew_mean          | 1         |
| time/                   |           |
|    fps                  | 319       |
|    iterations           | 10        |
|    time_elapsed         | 64        |
|    total_timesteps      | 20480     |
| train/                  |           |
|    approx_kl            | 0.0375945 |
|    clip_fraction        | 0.242     |
|    clip_range           | 0.2       |
|    entropy_loss         | 0.305     |
|    explained_variance   | -130      |
|    learning_rate        | 0.0003    |
|    loss                 | 0.0311    |
|    n_updates            | 11360     |
|    policy_gradient_loss | 0.00464   |
|    std                  | 0.207     |
|    value_loss           | 0.00761   |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.05    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.98        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 316         |
|    iterations           | 19          |
|    time_elapsed         | 122         |
|    total_timesteps      | 38912       |
| train/                  |             |
|    approx_kl            | 0.020731427 |
|    clip_fraction        | 0.177       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.291       |
|    explained_variance   | -43.9       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0158     |
|    n_updates            | 11450       |
|    policy_gradient_loss | 0.0101      |
|    std                  | 0.209       |
|    value_loss           | 0.00511     |
-----------------------------------------
---------------------------------------
| rollout/                |         

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.23        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 304         |
|    iterations           | 28          |
|    time_elapsed         | 188         |
|    total_timesteps      | 57344       |
| train/                  |             |
|    approx_kl            | 0.045435455 |
|    clip_fraction        | 0.208       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.357       |
|    explained_variance   | -18.4       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0318     |
|    n_updates            | 11540       |
|    policy_gradient_loss | 0.00303     |
|    std                  | 0.202       |
|    value_loss           | 0.00574     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.16        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 298         |
|    iterations           | 37          |
|    time_elapsed         | 253         |
|    total_timesteps      | 75776       |
| train/                  |             |
|    approx_kl            | 0.021613155 |
|    clip_fraction        | 0.208       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.3         |
|    explained_variance   | -8.77       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0105     |
|    n_updates            | 11630       |
|    policy_gradient_loss | 0.00743     |
|    std                  | 0.209       |
|    value_loss           | 0.00488     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.27        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 279         |
|    iterations           | 46          |
|    time_elapsed         | 337         |
|    total_timesteps      | 94208       |
| train/                  |             |
|    approx_kl            | 0.029078545 |
|    clip_fraction        | 0.229       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.307       |
|    explained_variance   | -47.6       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0386     |
|    n_updates            | 11720       |
|    policy_gradient_loss | 0.00752     |
|    std                  | 0.208       |
|    value_loss           | 0.00445     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.51       |
|    ep_rew_mean          | 1          |
| time/                   |            |
|    fps                  | 334        |
|    iterations           | 7          |
|    time_elapsed         | 42         |
|    total_timesteps      | 14336      |
| train/                  |            |
|    approx_kl            | 0.03737913 |
|    clip_fraction        | 0.278      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.74       |
|    explained_variance   | -193       |
|    learning_rate        | 0.0003     |
|    loss                 | -0.012     |
|    n_updates            | 11330      |
|    policy_gradient_loss | 0.00793    |
|    std                  | 0.168      |
|    value_loss           | 0.0158     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.14        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 314         |
|    iterations           | 16          |
|    time_elapsed         | 104         |
|    total_timesteps      | 32768       |
| train/                  |             |
|    approx_kl            | 0.021696616 |
|    clip_fraction        | 0.189       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.711       |
|    explained_variance   | -9.31       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.015      |
|    n_updates            | 11420       |
|    policy_gradient_loss | 0.00224     |
|    std                  | 0.17        |
|    value_loss           | 0.00598     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.07        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 307         |
|    iterations           | 25          |
|    time_elapsed         | 166         |
|    total_timesteps      | 51200       |
| train/                  |             |
|    approx_kl            | 0.028175518 |
|    clip_fraction        | 0.209       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.704       |
|    explained_variance   | -36.3       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0493      |
|    n_updates            | 11510       |
|    policy_gradient_loss | 0.00362     |
|    std                  | 0.171       |
|    value_loss           | 0.00961     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.34        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 308         |
|    iterations           | 34          |
|    time_elapsed         | 225         |
|    total_timesteps      | 69632       |
| train/                  |             |
|    approx_kl            | 0.036582828 |
|    clip_fraction        | 0.231       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.645       |
|    explained_variance   | -50         |
|    learning_rate        | 0.0003      |
|    loss                 | 0.344       |
|    n_updates            | 11600       |
|    policy_gradient_loss | 0.0139      |
|    std                  | 0.175       |
|    value_loss           | 0.00714     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.15        |
|    ep_rew_mean          | 1           |
| time/                   |             |
|    fps                  | 294         |
|    iterations           | 43          |
|    time_elapsed         | 298         |
|    total_timesteps      | 88064       |
| train/                  |             |
|    approx_kl            | 0.024219107 |
|    clip_fraction        | 0.259       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.692       |
|    explained_variance   | -69.4       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0211     |
|    n_updates            | 11690       |
|    policy_gradient_loss | 0.00345     |
|    std                  | 0.171       |
|    value_loss           | 0.0157      |
-----------------------------------------
---------------------------------------
| rollout/                |         

In [18]:
# Performance of models on focus polytopes only?
for p in [0.0, 0.05, 0.1, 0.5, 0.9, 1.0]:
    model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-1000000-"+str(p))
    model.set_env(env)
    print("p=",p)
    
    print("Overall:")
    env.init_polytopes(1.0,[])
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_overall[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
    
    print("Focus Polytopes:")
    env.init_polytopes(0.0,retrain_polytopes)
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_polys[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
Overall:
mean_reward:0.96 +/- 6.33
Focus Polytopes:
mean_reward:1.00 +/- 0.00
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.05
Overall:
mean_reward:0.85 +/- 12.26
Focus Polytopes:
mean_reward:0.99 +/- 3.17
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.1
Overall:
mean_reward:0.51 +/- 22.15
Focus Polytopes:
mean_reward:0.99 +/- 3.17
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.5
Overall:
mean_reward:1.00 +/- 0.00
Focus Polytopes:
mean_reward:1.00 +/- 0.00
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.9
Overall:
mean_reward:0.97 +/- 5.48
Focus Polytopes:
mean_reward:0.76 +/- 15.51
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 1.0
Overall:
mean_reward:0.92 +/- 8.95
Focus Polytopes:
mean_reward:0.83 +/- 13.05


## After retraining

In [9]:
eval_episode_length=100000
training_episode_length=100000

In [10]:
env.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7f364b90f4d0>

In [11]:
retrain_polytopes = None
with open("zeppelin-1400000-retrain-1000000-0.5-polytopes.pickle","rb") as f:
    retrain_polytopes = pickle.load(f)

In [12]:
# Total volume of polytopes?
# Share of instances usually (i.e. without "focus polytopes")

# Volume of state space:
N = 100000
n=4
l_b = np.array([env.unwrapped.MIN_X,env.unwrapped.MIN_Y,env.unwrapped.MIN_C,env.unwrapped.MAX_VELOCITY-env.unwrapped.MAX_TURBULENCE+0.1])
u_b = np.array([env.unwrapped.MAX_X,env.unwrapped.MAX_Y,env.unwrapped.MAX_C,env.unwrapped.MAX_WIND_SPEED])
xs = env.unwrapped.np_random.uniform(low=l_b,high=u_b,size=(N,n))
s = 0
for x in xs:
    if not env.unwrapped.is_crash(x) and not env.unwrapped.reached_goal(x) and env.unwrapped.is_in_bounds(x):
        s+=1
print("s: ",s)
total_vol = np.prod(u_b - l_b) * (s / N)
print("total volume: ", total_vol)

poly_region = pc.Region(retrain_polytopes)
l_b, u_b = poly_region.bounding_box
l_b = l_b.flatten()
u_b = u_b.flatten()
print(l_b,",",u_b)
xs = env.unwrapped.np_random.uniform(low=l_b,high=u_b,size=(N,n))
s = 0
for x in xs:
    if x in poly_region:
        if not env.unwrapped.is_crash(x) and not env.unwrapped.reached_goal(x) and env.unwrapped.is_in_bounds(x):
                s+=1
poly_vol = np.prod(u_b - l_b) * (s / N)
print("poly volume: ", poly_vol)

# We only have an upper bound for the share, since polytopes may be partially outside the state space of interest
poly_share = poly_vol/total_vol
print("max share: ", poly_share)

s:  4512
total volume:  50332262.4
[-146.37511  -97.5       10.         5.     ] , [400. 400.  80.  30.]
poly volume:  35609990.77787027
max share:  0.7074983138026052


In [None]:
model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-1000000-0.5")
model.set_env(env)

env.init_polytopes(0.0,retrain_polytopes)
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




In [19]:
model = PPO.load("model_backup/zeppelin-avoidance-windsystem-small2-1400000-1000000-0.5")
model.set_env(env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [21]:
env.unwrapped.INCLUDE_UNWINNABLE = False
env.init_polytopes(0.0,retrain_polytopes)
#orig_observation = env.reset()
for i_episode in range(100):
    observation = env.reset()
    #env.unwrapped.state=[4.545,13,10,5.5]
    #env.unwrapped.state = [167.10239, -10.34823,  79.3649,    5.99876]
    #env.unwrapped.state = orig_observation
    observation = env.unwrapped.state
    total_reward=0
    for t in range(1000):
        print(observation)
        action, _states = model.predict(observation,deterministic=True)
        print([action[1]*action[0],action[1]*np.sqrt(1-action[0]**2)])
        #print[[action[1]*action[0],action[1]*np.sqrt(1-action[0]**2)]]
        #x1_norm = observation[0]/np.sqrt(observation[0]**2+observation[1]**2)
        #x2_norm = observation[1]/np.sqrt(observation[0]**2+observation[1]**2)
        #f = 1.0 if (x2_norm) < 0 else -1.0
        #action = [f, f*(0-x1_norm)]
        observation, reward, done, info = env.step(action)
        total_reward=0.99*total_reward+reward
        env.render()
        time.sleep(0.2)
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            print("Reward: ", total_reward)
            break
env.close()

[ 69.90218 -61.54047  49.81218  10.69485]
[1.0, -0.0]
Episode finished after 1 timesteps
Reward:  1.0
[-91.02743  -1.26261  64.68677  28.64434]
[-1.0, -0.0]
[-100.78373  -14.21089   64.68677   28.64434]
[-1.0, -0.0]
Episode finished after 2 timesteps
Reward:  1.0
[ 22.9063  -35.31046  35.30115  10.49014]
[0.2238152, -0.0]
Episode finished after 1 timesteps
Reward:  1.0
[-66.35822 -41.74392  32.21763  27.21313]
[-1.0, -0.0]
Episode finished after 1 timesteps
Reward:  1.0
[ 45.00846 -68.21664  58.33325  25.08126]
[0.39631796, -0.0]
Episode finished after 1 timesteps
Reward:  1.0
[ 32.37697 -47.3785   42.63395  23.36719]
[0.09334865, -0.0]
Episode finished after 1 timesteps
Reward:  1.0
[-102.74506    7.67821   70.81089   21.19506]
[-1.0, -0.0]
Episode finished after 1 timesteps
Reward:  1.0
[-118.38648  -39.77077   46.2788     7.19695]
[-1.0, -0.0]
[-132.37303  -41.12658   46.2788     7.19695]
[-1.0, -0.0]
[-140.38629  -47.81653   46.2788     7.19695]
[-1.0, -0.0]
Episode finished after 

[-84.54839  -5.40875  57.36565  18.2947 ]
[-1.0, -0.0]
[-93.98117  -8.25805  57.36565  18.2947 ]
[-1.0, -0.0]
Episode finished after 3 timesteps
Reward:  1.0
[-91.50156  26.08675  63.25625  13.45368]
[-1.0, -0.0]
[-97.57281  24.3559   63.25625  13.45368]
[-1.0, -0.0]
[-105.766     15.34429   63.25625   13.45368]
[-1.0, -0.0]
Episode finished after 3 timesteps
Reward:  1.0
[-67.36741 -49.71029  45.93739  28.27703]
[-0.93687975, -0.349651721603592]
[-75.30053 -60.53092  45.93739  28.27703]
[-0.9667293, -0.255801666817875]
Episode finished after 2 timesteps
Reward:  1.0
[-146.90766   -9.47057   79.9122     7.58837]
[-1.0, -0.0]
[-153.19368  -13.20897   79.9122     7.58837]
[-1.0, -0.0]
[-162.94532  -16.22664   79.9122     7.58837]
[-1.0, -0.0]
[-173.05322  -19.24465   79.9122     7.58837]
[-1.0, -0.0]
Episode finished after 4 timesteps
Reward:  1.0
[ 72.65759 -65.63762  51.60282  10.58289]
[1.0, -0.0]
Episode finished after 1 timesteps
Reward:  1.0
[-146.7409   -12.19247   79.28306    8.5

In [None]:
env.unwrapped.INCLUDE_UNWINNABLE = False
env.init_polytopes(1.0,[])
#orig_observation = env.reset()
for i_episode in range(10):
    observation = env.reset()
    #env.unwrapped.state=[4.545,13,10,5.5]
    #env.unwrapped.state = orig_observation
    observation = env.unwrapped.state
    total_reward=0
    for t in range(1000):
        print(observation)
        action, _states = model.predict(observation,deterministic=True)
        print([action[1]*action[0],action[1]*np.sqrt(1-action[0]**2)])
        #print[[action[1]*action[0],action[1]*np.sqrt(1-action[0]**2)]]
        #x1_norm = observation[0]/np.sqrt(observation[0]**2+observation[1]**2)
        #x2_norm = observation[1]/np.sqrt(observation[0]**2+observation[1]**2)
        #f = 1.0 if (x2_norm) < 0 else -1.0
        #action = [f, f*(0-x1_norm)]
        observation, reward, done, info = env.step(action)
        total_reward=0.99*total_reward+reward
        env.render()
        time.sleep(0.5)
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            print("Reward: ", total_reward)
            time.sleep(5.0)
            break
env.close()

In [None]:
env.unwrapped.INCLUDE_UNWINNABLE = False
env.init_polytopes(0.0,retrain_polytopes_certain)
for i_episode in range(100000):
    observation = env.reset()
    observation = env.unwrapped.state
    total_reward=0
    for t in range(1000):
        print(observation)
        action, _states = model.predict(observation,deterministic=True)
        print([action[1]*action[0],action[1]*np.sqrt(1-action[0]**2)])
        observation, reward, done, info = env.step(action)
        assert reward >= 0, f"reward: {reward}"
        total_reward=0.99*total_reward+reward
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            print("Reward: ", total_reward)
            break
env.close()

In [None]:
env.unwrapped.INCLUDE_UNWINNABLE = False
bounds = pc.box2poly([[-100.,100.],[-100.,400.],[49.9,50.1],[24.9,25.1]])
env.init_polytopes(0.0,[bounds])
for i_episode in range(100000):
    observation = env.reset()
    observation = env.unwrapped.state
    total_reward=0
    for t in range(1000):
        print(observation)
        action, _states = model.predict(observation,deterministic=True)
        print([action[1]*action[0],action[1]*np.sqrt(1-action[0]**2)])
        observation, reward, done, info = env.step(action)
        assert reward >= 0, f"reward: {reward}"
        total_reward=0.99*total_reward+reward
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            print("Reward: ", total_reward)
            break
env.close()

In [None]:
x = [  3.55826, 253.8157,   24.94608,  50.0919 ]
print("certain")
for p in retrain_polytopes_certain:
    if x in p:
        print("found p")
print("uncertain")
for p in retrain_polytopes_uncertain:
    if x in p:
        print("found p")