# Retraining after first attempt to prove correct
**Important note:**
In the process of initiating the retraining we found a bug in the environment:
Essentially, the area which now turned out to be buggy has not been trained on originally as it never occurred in the training samples due to a buggy bounds check (mixup between obstacle size `c` and wind speed `w` in `is_in_bounds`

In [1]:
import gym
import time

In [2]:
import pickle
import numpy as np
import polytope as pc

In [3]:
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
import torch
from torch import nn

In [4]:
import acc

In [5]:
torch.manual_seed(42)

<torch._C.Generator at 0x7fbead6b1470>

In [6]:
env = gym.make('acc-variant-v1')



In [7]:
env.unwrapped.INCLUDE_UNWINNABLE = False

In [8]:
env.seed(42)

[42]

In [9]:
retrain_polytopes = None
with open("acc_bigger_retrain200000-100000-0.1-polytopes.pickle","rb") as f:
    retrain_polytopes = pickle.load(f)

In [10]:
poly_region = pc.Region(retrain_polytopes)

In [11]:
# Total volume of polytopes?
# Share of instances usually (i.e. without "focus polytopes")

# Volume of state space:
N = 100000
n=2
l_b = np.array([0,-200])
u_b = np.array([100,200])
xs = env.unwrapped.np_random.uniform(low=l_b,high=u_b,size=(N,n))
s = 0
for x in xs:
    if np.sqrt(x[0]*2*env.unwrapped.A)<=x[1] and x[1]<=np.sqrt((env.unwrapped.MAX_VALUE-x[0])*2*env.unwrapped.B) and not (env.unwrapped.is_crash(x) or x[0] > env.unwrapped.MAX_VALUE):
        s+=1
print("s: ",s)
total_vol = np.prod(u_b - l_b) * (s / N)
print("total volume: ", total_vol)

poly_region = pc.Region(retrain_polytopes)
l_b, u_b = poly_region.bounding_box
l_b = l_b.flatten()
u_b = u_b.flatten()
print(l_b,",",u_b)
xs = env.unwrapped.np_random.uniform(low=l_b,high=u_b,size=(N,n))
s = 0
for x in xs:
    if x in poly_region:
        if -np.sqrt(x[0]*2*env.unwrapped.A)<=x[1] and x[1]<=np.sqrt((env.unwrapped.MAX_VALUE-x[0])*2*env.unwrapped.B) and not (env.unwrapped.is_crash(x) or x[0] > env.unwrapped.MAX_VALUE):
            s+=1
poly_vol = np.prod(u_b - l_b) * (s / N)
print("poly volume: ", poly_vol)

# We only have an upper bound for the share, since polytopes may be partially outside the state space of interest
poly_share = poly_vol/total_vol
print("max share: ", poly_share)

s:  19574
total volume:  7829.599999999999
[-0.      -7.82518] , [0.32048 0.     ]
poly volume:  0.4050319836489656
max share:  5.1730865388904363e-05


In [14]:
env.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7fbead6b1470>

In [15]:
eval_episode_length=1000
training_episode_length=100000

In [16]:
model = PPO.load("model_backup/acc-2000000-64-64-64-64-100000-0.1")
model.set_env(env)

env.init_polytopes(1.0,[])
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




mean_reward:3837.63 +/- 800.20


In [17]:
env.init_polytopes(0.0,retrain_polytopes)
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

mean_reward:1462.65 +/- 3001.36


In [18]:
results_overall = {}
results_polys = {}

In [19]:
for p in [0.0, 0.1, 0.5, 0.9, 0.95, 1.0]:
    model = PPO.load("model_backup/acc-2000000-64-64-64-64-100000-0.1")
    model.set_env(env)
    
    print("p=",p)

    env.init_polytopes(p,retrain_polytopes)
    env.unwrapped.INCLUDE_UNWINNABLE = False
    start_time = time.time()
    model=model.learn(total_timesteps=training_episode_length)
    print("--- %s seconds ---" % (time.time() - start_time))

    model.save("model_backup/acc-2000000-64-64-64-64-100000-100000-"+str(p))

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 165      |
|    ep_rew_mean     | 280      |
| time/              |          |
|    fps             | 517      |
|    iterations      | 1        |
|    time_elapsed    | 3        |
|    total_timesteps | 2048     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 195        |
|    ep_rew_mean          | 696        |
| time/                   |            |
|    fps                  | 395        |
|    iterations           | 2          |
|    time_elapsed         | 10         |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.05854983 |
|    clip_fraction        | 0.0719     |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.702      |
| 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 255          |
|    ep_rew_mean          | 1.56e+03     |
| time/                   |              |
|    fps                  | 329          |
|    iterations           | 11           |
|    time_elapsed         | 68           |
|    total_timesteps      | 22528        |
| train/                  |              |
|    approx_kl            | 0.0038870352 |
|    clip_fraction        | 0.11         |
|    clip_range           | 0.2          |
|    entropy_loss         | 0.927        |
|    explained_variance   | 0.165        |
|    learning_rate        | 0.0003       |
|    loss                 | 25.5         |
|    n_updates            | 1610         |
|    policy_gradient_loss | 0.00237      |
|    std                  | 0.0945       |
|    value_loss           | 6.94e+03     |
------------------------------------------
------------------------------------------
| rollout/ 

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 300        |
|    ep_rew_mean          | 2.23e+03   |
| time/                   |            |
|    fps                  | 323        |
|    iterations           | 20         |
|    time_elapsed         | 126        |
|    total_timesteps      | 40960      |
| train/                  |            |
|    approx_kl            | 0.23256797 |
|    clip_fraction        | 0.167      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.15       |
|    explained_variance   | -0.477     |
|    learning_rate        | 0.0003     |
|    loss                 | 8.38       |
|    n_updates            | 1700       |
|    policy_gradient_loss | 0.00328    |
|    std                  | 0.0759     |
|    value_loss           | 8.01e+03   |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 283         |
|    ep_rew_mean          | 2.05e+03    |
| time/                   |             |
|    fps                  | 319         |
|    iterations           | 29          |
|    time_elapsed         | 185         |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.011035931 |
|    clip_fraction        | 0.124       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.49        |
|    explained_variance   | -2.23       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.06        |
|    n_updates            | 1790        |
|    policy_gradient_loss | 0.0007      |
|    std                  | 0.0535      |
|    value_loss           | 5.03e+03    |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 308          |
|    ep_rew_mean          | 2.44e+03     |
| time/                   |              |
|    fps                  | 318          |
|    iterations           | 38           |
|    time_elapsed         | 244          |
|    total_timesteps      | 77824        |
| train/                  |              |
|    approx_kl            | 0.0059758886 |
|    clip_fraction        | 0.0461       |
|    clip_range           | 0.2          |
|    entropy_loss         | 1.68         |
|    explained_variance   | 0.0346       |
|    learning_rate        | 0.0003       |
|    loss                 | 1.72         |
|    n_updates            | 1880         |
|    policy_gradient_loss | 6.8e-05      |
|    std                  | 0.0445       |
|    value_loss           | 4.17e+03     |
------------------------------------------
-----------------------------------------
| rollout/  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 369         |
|    ep_rew_mean          | 3.35e+03    |
| time/                   |             |
|    fps                  | 317         |
|    iterations           | 47          |
|    time_elapsed         | 302         |
|    total_timesteps      | 96256       |
| train/                  |             |
|    approx_kl            | 0.011187069 |
|    clip_fraction        | 0.0621      |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.82        |
|    explained_variance   | 0.515       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.76        |
|    n_updates            | 1970        |
|    policy_gradient_loss | -0.00541    |
|    std                  | 0.0387      |
|    value_loss           | 10          |
-----------------------------------------
------------------------------------------
| rollout/                |      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 258         |
|    ep_rew_mean          | 1.59e+03    |
| time/                   |             |
|    fps                  | 328         |
|    iterations           | 8           |
|    time_elapsed         | 49          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.016542507 |
|    clip_fraction        | 0.136       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.865       |
|    explained_variance   | 0.0532      |
|    learning_rate        | 0.0003      |
|    loss                 | 218         |
|    n_updates            | 1580        |
|    policy_gradient_loss | -0.00816    |
|    std                  | 0.101       |
|    value_loss           | 1.45e+04    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 273          |
|    ep_rew_mean          | 1.94e+03     |
| time/                   |              |
|    fps                  | 320          |
|    iterations           | 17           |
|    time_elapsed         | 108          |
|    total_timesteps      | 34816        |
| train/                  |              |
|    approx_kl            | 0.0037218684 |
|    clip_fraction        | 0.0459       |
|    clip_range           | 0.2          |
|    entropy_loss         | 1.11         |
|    explained_variance   | 0.516        |
|    learning_rate        | 0.0003       |
|    loss                 | 5.22e+03     |
|    n_updates            | 1670         |
|    policy_gradient_loss | 0.000549     |
|    std                  | 0.0795       |
|    value_loss           | 1.63e+04     |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 316          |
|    ep_rew_mean          | 2.61e+03     |
| time/                   |              |
|    fps                  | 316          |
|    iterations           | 26           |
|    time_elapsed         | 168          |
|    total_timesteps      | 53248        |
| train/                  |              |
|    approx_kl            | 0.0023851134 |
|    clip_fraction        | 0.112        |
|    clip_range           | 0.2          |
|    entropy_loss         | 1.15         |
|    explained_variance   | 0.0971       |
|    learning_rate        | 0.0003       |
|    loss                 | 20.8         |
|    n_updates            | 1760         |
|    policy_gradient_loss | 0.0102       |
|    std                  | 0.0767       |
|    value_loss           | 3.77e+03     |
------------------------------------------
-----------------------------------------
| rollout/  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 341         |
|    ep_rew_mean          | 2.85e+03    |
| time/                   |             |
|    fps                  | 315         |
|    iterations           | 36          |
|    time_elapsed         | 234         |
|    total_timesteps      | 73728       |
| train/                  |             |
|    approx_kl            | 0.013217899 |
|    clip_fraction        | 0.0924      |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.43        |
|    explained_variance   | 0.403       |
|    learning_rate        | 0.0003      |
|    loss                 | 89.3        |
|    n_updates            | 1860        |
|    policy_gradient_loss | -0.00415    |
|    std                  | 0.058       |
|    value_loss           | 9.07e+03    |
-----------------------------------------
----------------------------------------
| rollout/                |        

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 202      |
|    ep_rew_mean          | 890      |
| time/                   |          |
|    fps                  | 314      |
|    iterations           | 46       |
|    time_elapsed         | 299      |
|    total_timesteps      | 94208    |
| train/                  |          |
|    approx_kl            | 1.702312 |
|    clip_fraction        | 0.123    |
|    clip_range           | 0.2      |
|    entropy_loss         | 1.67     |
|    explained_variance   | -2.06    |
|    learning_rate        | 0.0003   |
|    loss                 | 2.43     |
|    n_updates            | 1960     |
|    policy_gradient_loss | 0.0492   |
|    std                  | 0.0449   |
|    value_loss           | 5.87e+03 |
--------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 219        |
|    ep_rew_mean   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 212         |
|    ep_rew_mean          | 1.28e+03    |
| time/                   |             |
|    fps                  | 329         |
|    iterations           | 7           |
|    time_elapsed         | 43          |
|    total_timesteps      | 14336       |
| train/                  |             |
|    approx_kl            | 0.008632775 |
|    clip_fraction        | 0.0241      |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.726       |
|    explained_variance   | 0.731       |
|    learning_rate        | 0.0003      |
|    loss                 | 57          |
|    n_updates            | 1570        |
|    policy_gradient_loss | 0.000487    |
|    std                  | 0.117       |
|    value_loss           | 4.2e+03     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 193          |
|    ep_rew_mean          | 1.08e+03     |
| time/                   |              |
|    fps                  | 317          |
|    iterations           | 16           |
|    time_elapsed         | 103          |
|    total_timesteps      | 32768        |
| train/                  |              |
|    approx_kl            | 0.0031412616 |
|    clip_fraction        | 0.0351       |
|    clip_range           | 0.2          |
|    entropy_loss         | 0.733        |
|    explained_variance   | 0.887        |
|    learning_rate        | 0.0003       |
|    loss                 | 5.87e+03     |
|    n_updates            | 1660         |
|    policy_gradient_loss | 0.000718     |
|    std                  | 0.116        |
|    value_loss           | 7.25e+03     |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 175          |
|    ep_rew_mean          | 933          |
| time/                   |              |
|    fps                  | 315          |
|    iterations           | 25           |
|    time_elapsed         | 162          |
|    total_timesteps      | 51200        |
| train/                  |              |
|    approx_kl            | 0.0044932575 |
|    clip_fraction        | 0.0446       |
|    clip_range           | 0.2          |
|    entropy_loss         | 0.747        |
|    explained_variance   | 0.821        |
|    learning_rate        | 0.0003       |
|    loss                 | 2.37e+03     |
|    n_updates            | 1750         |
|    policy_gradient_loss | -0.00149     |
|    std                  | 0.115        |
|    value_loss           | 1.49e+04     |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 208          |
|    ep_rew_mean          | 1.33e+03     |
| time/                   |              |
|    fps                  | 313          |
|    iterations           | 34           |
|    time_elapsed         | 221          |
|    total_timesteps      | 69632        |
| train/                  |              |
|    approx_kl            | 0.0051582265 |
|    clip_fraction        | 0.0958       |
|    clip_range           | 0.2          |
|    entropy_loss         | 0.763        |
|    explained_variance   | -0.86        |
|    learning_rate        | 0.0003       |
|    loss                 | 5.71         |
|    n_updates            | 1840         |
|    policy_gradient_loss | 0.00821      |
|    std                  | 0.112        |
|    value_loss           | 36.8         |
------------------------------------------
---------------------------------------
| rollout/    

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 309          |
|    ep_rew_mean          | 2.52e+03     |
| time/                   |              |
|    fps                  | 313          |
|    iterations           | 43           |
|    time_elapsed         | 281          |
|    total_timesteps      | 88064        |
| train/                  |              |
|    approx_kl            | 0.0075631235 |
|    clip_fraction        | 0.0736       |
|    clip_range           | 0.2          |
|    entropy_loss         | 0.844        |
|    explained_variance   | -0.00534     |
|    learning_rate        | 0.0003       |
|    loss                 | 32.5         |
|    n_updates            | 1930         |
|    policy_gradient_loss | -0.00709     |
|    std                  | 0.103        |
|    value_loss           | 7.49e+03     |
------------------------------------------
----------------------------------------
| rollout/   

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 390        |
|    ep_rew_mean          | 3.43e+03   |
| time/                   |            |
|    fps                  | 346        |
|    iterations           | 4          |
|    time_elapsed         | 23         |
|    total_timesteps      | 8192       |
| train/                  |            |
|    approx_kl            | 0.17818353 |
|    clip_fraction        | 0.0667     |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.725      |
|    explained_variance   | 0.177      |
|    learning_rate        | 0.0003     |
|    loss                 | 17.6       |
|    n_updates            | 1540       |
|    policy_gradient_loss | 0.0198     |
|    std                  | 0.116      |
|    value_loss           | 43         |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 355         |
|    ep_rew_mean          | 3.07e+03    |
| time/                   |             |
|    fps                  | 320         |
|    iterations           | 14          |
|    time_elapsed         | 89          |
|    total_timesteps      | 28672       |
| train/                  |             |
|    approx_kl            | 0.007305996 |
|    clip_fraction        | 0.137       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.915       |
|    explained_variance   | 0.132       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.97        |
|    n_updates            | 1640        |
|    policy_gradient_loss | 0.0108      |
|    std                  | 0.0956      |
|    value_loss           | 13.8        |
-----------------------------------------
------------------------------------------
| rollout/                |      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 298         |
|    ep_rew_mean          | 2.42e+03    |
| time/                   |             |
|    fps                  | 315         |
|    iterations           | 24          |
|    time_elapsed         | 155         |
|    total_timesteps      | 49152       |
| train/                  |             |
|    approx_kl            | 0.039614804 |
|    clip_fraction        | 0.0638      |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.06        |
|    explained_variance   | -8.36       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.68        |
|    n_updates            | 1740        |
|    policy_gradient_loss | 0.0139      |
|    std                  | 0.0838      |
|    value_loss           | 289         |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 330        |
|    ep_rew_mean          | 2.77e+03   |
| time/                   |            |
|    fps                  | 314        |
|    iterations           | 33         |
|    time_elapsed         | 215        |
|    total_timesteps      | 67584      |
| train/                  |            |
|    approx_kl            | 0.06436594 |
|    clip_fraction        | 0.0774     |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.2        |
|    explained_variance   | -0.0496    |
|    learning_rate        | 0.0003     |
|    loss                 | 13.1       |
|    n_updates            | 1830       |
|    policy_gradient_loss | 0.0221     |
|    std                  | 0.0731     |
|    value_loss           | 93.5       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 366        |
|    ep_rew_mean          | 3.29e+03   |
| time/                   |            |
|    fps                  | 311        |
|    iterations           | 43         |
|    time_elapsed         | 282        |
|    total_timesteps      | 88064      |
| train/                  |            |
|    approx_kl            | 0.73134506 |
|    clip_fraction        | 0.148      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.31       |
|    explained_variance   | 0.472      |
|    learning_rate        | 0.0003     |
|    loss                 | 115        |
|    n_updates            | 1930       |
|    policy_gradient_loss | 0.0374     |
|    std                  | 0.0652     |
|    value_loss           | 5.65e+03   |
----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 139         |
|    ep_rew_mean          | 499         |
| time/                   |             |
|    fps                  | 335         |
|    iterations           | 4           |
|    time_elapsed         | 24          |
|    total_timesteps      | 8192        |
| train/                  |             |
|    approx_kl            | 0.009115995 |
|    clip_fraction        | 0.153       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.706       |
|    explained_variance   | 0.00879     |
|    learning_rate        | 0.0003      |
|    loss                 | 1.14e+03    |
|    n_updates            | 1540        |
|    policy_gradient_loss | 0.0229      |
|    std                  | 0.119       |
|    value_loss           | 1.8e+04     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 54.1     |
|    ep_rew_mean          | -397     |
| time/                   |          |
|    fps                  | 316      |
|    iterations           | 14       |
|    time_elapsed         | 90       |
|    total_timesteps      | 28672    |
| train/                  |          |
|    approx_kl            | 5.661479 |
|    clip_fraction        | 0.408    |
|    clip_range           | 0.2      |
|    entropy_loss         | 0.712    |
|    explained_variance   | 0.978    |
|    learning_rate        | 0.0003   |
|    loss                 | 165      |
|    n_updates            | 1640     |
|    policy_gradient_loss | 0.124    |
|    std                  | 0.119    |
|    value_loss           | 1.17e+03 |
--------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 44.4       |
|    ep_rew_mean   

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 58.3       |
|    ep_rew_mean          | -324       |
| time/                   |            |
|    fps                  | 313        |
|    iterations           | 24         |
|    time_elapsed         | 156        |
|    total_timesteps      | 49152      |
| train/                  |            |
|    approx_kl            | 0.61186635 |
|    clip_fraction        | 0.164      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.715      |
|    explained_variance   | 0.989      |
|    learning_rate        | 0.0003     |
|    loss                 | 722        |
|    n_updates            | 1740       |
|    policy_gradient_loss | 0.0204     |
|    std                  | 0.118      |
|    value_loss           | 1.1e+03    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 33.3      |
|    ep_rew_mean          | -606      |
| time/                   |           |
|    fps                  | 311       |
|    iterations           | 34        |
|    time_elapsed         | 223       |
|    total_timesteps      | 69632     |
| train/                  |           |
|    approx_kl            | 0.7086227 |
|    clip_fraction        | 0.182     |
|    clip_range           | 0.2       |
|    entropy_loss         | 0.711     |
|    explained_variance   | 0.964     |
|    learning_rate        | 0.0003    |
|    loss                 | 173       |
|    n_updates            | 1840      |
|    policy_gradient_loss | -0.00359  |
|    std                  | 0.119     |
|    value_loss           | 997       |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 32.1      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 14.7      |
|    ep_rew_mean          | -818      |
| time/                   |           |
|    fps                  | 309       |
|    iterations           | 44        |
|    time_elapsed         | 291       |
|    total_timesteps      | 90112     |
| train/                  |           |
|    approx_kl            | 134.91154 |
|    clip_fraction        | 0.795     |
|    clip_range           | 0.2       |
|    entropy_loss         | 0.705     |
|    explained_variance   | 0.0588    |
|    learning_rate        | 0.0003    |
|    loss                 | 81.8      |
|    n_updates            | 1940      |
|    policy_gradient_loss | 0.285     |
|    std                  | 0.119     |
|    value_loss           | 4.25e+03  |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 16        |


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 354        |
|    ep_rew_mean          | 3.01e+03   |
| time/                   |            |
|    fps                  | 181        |
|    iterations           | 5          |
|    time_elapsed         | 56         |
|    total_timesteps      | 10240      |
| train/                  |            |
|    approx_kl            | 0.07554555 |
|    clip_fraction        | 0.0652     |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.71       |
|    explained_variance   | 0.82       |
|    learning_rate        | 0.0003     |
|    loss                 | 18.4       |
|    n_updates            | 1550       |
|    policy_gradient_loss | 0.0358     |
|    std                  | 0.119      |
|    value_loss           | 683        |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 351        |
|    ep_rew_mean          | 2.99e+03   |
| time/                   |            |
|    fps                  | 194        |
|    iterations           | 15         |
|    time_elapsed         | 157        |
|    total_timesteps      | 30720      |
| train/                  |            |
|    approx_kl            | 0.47372687 |
|    clip_fraction        | 0.236      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.822      |
|    explained_variance   | 0.424      |
|    learning_rate        | 0.0003     |
|    loss                 | 8.24       |
|    n_updates            | 1650       |
|    policy_gradient_loss | 0.062      |
|    std                  | 0.105      |
|    value_loss           | 28.4       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 316        |
|    ep_rew_mean          | 2.63e+03   |
| time/                   |            |
|    fps                  | 205        |
|    iterations           | 25         |
|    time_elapsed         | 248        |
|    total_timesteps      | 51200      |
| train/                  |            |
|    approx_kl            | 0.60806036 |
|    clip_fraction        | 0.0981     |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.968      |
|    explained_variance   | 0.895      |
|    learning_rate        | 0.0003     |
|    loss                 | 227        |
|    n_updates            | 1750       |
|    policy_gradient_loss | 0.000591   |
|    std                  | 0.0918     |
|    value_loss           | 483        |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 153          |
|    ep_rew_mean          | 35.1         |
| time/                   |              |
|    fps                  | 209          |
|    iterations           | 34           |
|    time_elapsed         | 332          |
|    total_timesteps      | 69632        |
| train/                  |              |
|    approx_kl            | 0.0065469136 |
|    clip_fraction        | 0.0423       |
|    clip_range           | 0.2          |
|    entropy_loss         | 1.08         |
|    explained_variance   | 0.867        |
|    learning_rate        | 0.0003       |
|    loss                 | 2.48e+04     |
|    n_updates            | 1840         |
|    policy_gradient_loss | -0.00688     |
|    std                  | 0.0825       |
|    value_loss           | 8.26e+04     |
------------------------------------------
----------------------------------------
| rollout/   

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 183          |
|    ep_rew_mean          | 770          |
| time/                   |              |
|    fps                  | 211          |
|    iterations           | 43           |
|    time_elapsed         | 416          |
|    total_timesteps      | 88064        |
| train/                  |              |
|    approx_kl            | 0.0023063696 |
|    clip_fraction        | 0.0353       |
|    clip_range           | 0.2          |
|    entropy_loss         | 1.09         |
|    explained_variance   | 0.938        |
|    learning_rate        | 0.0003       |
|    loss                 | 92.2         |
|    n_updates            | 1930         |
|    policy_gradient_loss | 0.00698      |
|    std                  | 0.0813       |
|    value_loss           | 1.2e+03      |
------------------------------------------
-------------------------------------------
| rollout/

In [20]:
# Performance of models on focus polytopes only?
for p in [0.0, 0.1, 0.5, 0.9, 0.95, 1.0]:
    results_overall[p]=[]
    results_polys[p]=[]
    model = PPO.load("model_backup/acc-2000000-64-64-64-64-100000-100000-"+str(p))
    model.set_env(env)
    print("p=",p)
    
    print("Overall:")
    env.init_polytopes(1.0,[])
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_overall[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
    
    print("Focus Polytopes:")
    env.init_polytopes(0.0,retrain_polytopes)
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_polys[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
Overall:




mean_reward:1794.36 +/- 2335.86
Focus Polytopes:
mean_reward:4092.76 +/- 3.23
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.1
Overall:
mean_reward:3922.05 +/- 469.46
Focus Polytopes:
mean_reward:2698.67 +/- 2560.74
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.5
Overall:
mean_reward:1565.85 +/- 2398.24
Focus Polytopes:
mean_reward:-512.65 +/- 14.74
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.9
Overall:
mean_reward:3396.02 +/- 1602.89
Focus Polytopes:
mean_reward:4086.05 +/- 1.35
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.95
Overall:
mean_reward:-799.06 +/- 4.62
Focus Polytopes:
mean_reward:-745.90 +/- 13.90
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 1.0
Overall:
mean_reward:1343.46 +/- 2703.59
Focus Polytopes:
mean_reward:-1062.23 +/- 2197.96


In [21]:
print(results_overall)
print(results_polys)

{0.0: [(1794.3577423461675, 2335.856322265823)], 0.1: [(3922.052573099375, 469.46124813879163)], 0.5: [(1565.8549169559478, 2398.2350640688524)], 0.9: [(3396.023815936208, 1602.8926549522741)], 0.95: [(-799.0567969540358, 4.618851365744381)], 1.0: [(1343.4610378121138, 2703.5927546197913)]}
{0.0: [(4092.762310071349, 3.2332930538565092)], 0.1: [(2698.6682721825837, 2560.73957567821)], 0.5: [(-512.6466750481129, 14.741641097988378)], 0.9: [(4086.0508041523694, 1.3531052203752272)], 0.95: [(-745.8993273051977, 13.904470316282218)], 1.0: [(-1062.232770757675, 2197.96044819203)]}


In [22]:
for p in [0.0, 0.1, 0.5, 0.9, 0.95, 1.0]:
    model = PPO.load("model_backup/acc-2000000-64-64-64-64-100000-100000-"+str(p))
    model.set_env(env)
    
    print("p=",p)

    env.init_polytopes(p,retrain_polytopes)
    start_time = time.time()
    model=model.learn(total_timesteps=training_episode_length)
    print("--- %s seconds ---" % (time.time() - start_time))

    model.save("model_backup/acc-2000000-64-64-64-64-100000-200000-"+str(p))

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 410      |
|    ep_rew_mean     | 3.98e+03 |
| time/              |          |
|    fps             | 438      |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 410         |
|    ep_rew_mean          | 3.98e+03    |
| time/                   |             |
|    fps                  | 280         |
|    iterations           | 2           |
|    time_elapsed         | 14          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.012362827 |
|    clip_fraction        | 0.0751      |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 91         |
|    ep_rew_mean          | -678       |
| time/                   |            |
|    fps                  | 178        |
|    iterations           | 11         |
|    time_elapsed         | 126        |
|    total_timesteps      | 22528      |
| train/                  |            |
|    approx_kl            | 0.00563364 |
|    clip_fraction        | 0.0512     |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.22       |
|    explained_variance   | 0.963      |
|    learning_rate        | 0.0003     |
|    loss                 | 79.5       |
|    n_updates            | 2100       |
|    policy_gradient_loss | 0.00244    |
|    std                  | 0.0261     |
|    value_loss           | 3.88e+03   |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 107          |
|    ep_rew_mean          | -433         |
| time/                   |              |
|    fps                  | 173          |
|    iterations           | 20           |
|    time_elapsed         | 236          |
|    total_timesteps      | 40960        |
| train/                  |              |
|    approx_kl            | 0.0049392404 |
|    clip_fraction        | 0.0411       |
|    clip_range           | 0.2          |
|    entropy_loss         | 2.45         |
|    explained_variance   | 0.826        |
|    learning_rate        | 0.0003       |
|    loss                 | 11           |
|    n_updates            | 2190         |
|    policy_gradient_loss | -0.00329     |
|    std                  | 0.0204       |
|    value_loss           | 8.44e+03     |
------------------------------------------
-----------------------------------------
| rollout/  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 91          |
|    ep_rew_mean          | -671        |
| time/                   |             |
|    fps                  | 175         |
|    iterations           | 29          |
|    time_elapsed         | 339         |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.013595166 |
|    clip_fraction        | 0.2         |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.62        |
|    explained_variance   | 0.999       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.17        |
|    n_updates            | 2280        |
|    policy_gradient_loss | -0.00096    |
|    std                  | 0.0175      |
|    value_loss           | 38.3        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 103         |
|    ep_rew_mean          | -487        |
| time/                   |             |
|    fps                  | 178         |
|    iterations           | 38          |
|    time_elapsed         | 437         |
|    total_timesteps      | 77824       |
| train/                  |             |
|    approx_kl            | 0.004977862 |
|    clip_fraction        | 0.043       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.87        |
|    explained_variance   | 0.831       |
|    learning_rate        | 0.0003      |
|    loss                 | 117         |
|    n_updates            | 2370        |
|    policy_gradient_loss | -0.0016     |
|    std                  | 0.0136      |
|    value_loss           | 2.14e+04    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 50.1      |
|    ep_rew_mean          | -1.27e+03 |
| time/                   |           |
|    fps                  | 162       |
|    iterations           | 47        |
|    time_elapsed         | 592       |
|    total_timesteps      | 96256     |
| train/                  |           |
|    approx_kl            | 2.0697892 |
|    clip_fraction        | 0.147     |
|    clip_range           | 0.2       |
|    entropy_loss         | 3.06      |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 12.5      |
|    n_updates            | 2460      |
|    policy_gradient_loss | 0.00895   |
|    std                  | 0.0114    |
|    value_loss           | 38.1      |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 66.4    

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 246        |
|    ep_rew_mean          | 1.58e+03   |
| time/                   |            |
|    fps                  | 227        |
|    iterations           | 8          |
|    time_elapsed         | 71         |
|    total_timesteps      | 16384      |
| train/                  |            |
|    approx_kl            | 0.11490743 |
|    clip_fraction        | 0.0754     |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.11       |
|    explained_variance   | 0.732      |
|    learning_rate        | 0.0003     |
|    loss                 | 6.62e+04   |
|    n_updates            | 2070       |
|    policy_gradient_loss | 0.0114     |
|    std                  | 0.0294     |
|    value_loss           | 8.3e+03    |
----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean  

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 74.6      |
|    ep_rew_mean          | -926      |
| time/                   |           |
|    fps                  | 195       |
|    iterations           | 18        |
|    time_elapsed         | 188       |
|    total_timesteps      | 36864     |
| train/                  |           |
|    approx_kl            | 7.7328415 |
|    clip_fraction        | 0.0907    |
|    clip_range           | 0.2       |
|    entropy_loss         | 2.38      |
|    explained_variance   | 0.889     |
|    learning_rate        | 0.0003    |
|    loss                 | 97.8      |
|    n_updates            | 2170      |
|    policy_gradient_loss | 0.0273    |
|    std                  | 0.0224    |
|    value_loss           | 1.18e+04  |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 70.5      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 132       |
|    ep_rew_mean          | -54       |
| time/                   |           |
|    fps                  | 200       |
|    iterations           | 28        |
|    time_elapsed         | 286       |
|    total_timesteps      | 57344     |
| train/                  |           |
|    approx_kl            | 2.3543968 |
|    clip_fraction        | 0.12      |
|    clip_range           | 0.2       |
|    entropy_loss         | 2.48      |
|    explained_variance   | 0.364     |
|    learning_rate        | 0.0003    |
|    loss                 | 6.35e+03  |
|    n_updates            | 2270      |
|    policy_gradient_loss | 0.0591    |
|    std                  | 0.0202    |
|    value_loss           | 6.02e+03  |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 144       |


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 140        |
|    ep_rew_mean          | 61.8       |
| time/                   |            |
|    fps                  | 206        |
|    iterations           | 38         |
|    time_elapsed         | 376        |
|    total_timesteps      | 77824      |
| train/                  |            |
|    approx_kl            | 0.03369149 |
|    clip_fraction        | 0.0806     |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.67       |
|    explained_variance   | 0.888      |
|    learning_rate        | 0.0003     |
|    loss                 | 504        |
|    n_updates            | 2370       |
|    policy_gradient_loss | 0.000908   |
|    std                  | 0.0166     |
|    value_loss           | 8.21e+03   |
----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean  

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 230      |
|    ep_rew_mean          | 1.38e+03 |
| time/                   |          |
|    fps                  | 221      |
|    iterations           | 48       |
|    time_elapsed         | 443      |
|    total_timesteps      | 98304    |
| train/                  |          |
|    approx_kl            | 1894.781 |
|    clip_fraction        | 0.0904   |
|    clip_range           | 0.2      |
|    entropy_loss         | 2.98     |
|    explained_variance   | -0.818   |
|    learning_rate        | 0.0003   |
|    loss                 | 0.181    |
|    n_updates            | 2470     |
|    policy_gradient_loss | -0.00615 |
|    std                  | 0.0121   |
|    value_loss           | 4.11     |
--------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 234        |
|    ep_rew_mean   

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 167          |
|    ep_rew_mean          | 940          |
| time/                   |              |
|    fps                  | 318          |
|    iterations           | 9            |
|    time_elapsed         | 57           |
|    total_timesteps      | 18432        |
| train/                  |              |
|    approx_kl            | 0.0026698732 |
|    clip_fraction        | 0.0546       |
|    clip_range           | 0.2          |
|    entropy_loss         | 0.884        |
|    explained_variance   | 0.892        |
|    learning_rate        | 0.0003       |
|    loss                 | 196          |
|    n_updates            | 2080         |
|    policy_gradient_loss | 0.00471      |
|    std                  | 0.0998       |
|    value_loss           | 1.17e+03     |
------------------------------------------
-----------------------------------------
| rollout/  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 215         |
|    ep_rew_mean          | 1.25e+03    |
| time/                   |             |
|    fps                  | 311         |
|    iterations           | 19          |
|    time_elapsed         | 124         |
|    total_timesteps      | 38912       |
| train/                  |             |
|    approx_kl            | 0.004344494 |
|    clip_fraction        | 0.0252      |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.958       |
|    explained_variance   | 0.82        |
|    learning_rate        | 0.0003      |
|    loss                 | 9.91e+03    |
|    n_updates            | 2180        |
|    policy_gradient_loss | 0.000345    |
|    std                  | 0.0928      |
|    value_loss           | 2.45e+04    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 159         |
|    ep_rew_mean          | 545         |
| time/                   |             |
|    fps                  | 310         |
|    iterations           | 28          |
|    time_elapsed         | 184         |
|    total_timesteps      | 57344       |
| train/                  |             |
|    approx_kl            | 0.008840963 |
|    clip_fraction        | 0.107       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.97        |
|    explained_variance   | 0.714       |
|    learning_rate        | 0.0003      |
|    loss                 | 39.2        |
|    n_updates            | 2270        |
|    policy_gradient_loss | 0.0136      |
|    std                  | 0.0917      |
|    value_loss           | 495         |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 137        |
|    ep_rew_mean          | 265        |
| time/                   |            |
|    fps                  | 280        |
|    iterations           | 37         |
|    time_elapsed         | 269        |
|    total_timesteps      | 75776      |
| train/                  |            |
|    approx_kl            | 0.16687027 |
|    clip_fraction        | 0.256      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.988      |
|    explained_variance   | 0.988      |
|    learning_rate        | 0.0003     |
|    loss                 | 188        |
|    n_updates            | 2360       |
|    policy_gradient_loss | 0.0766     |
|    std                  | 0.0901     |
|    value_loss           | 1.37e+03   |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 253          |
|    ep_rew_mean          | 1.72e+03     |
| time/                   |              |
|    fps                  | 263          |
|    iterations           | 47           |
|    time_elapsed         | 364          |
|    total_timesteps      | 96256        |
| train/                  |              |
|    approx_kl            | 0.0071103848 |
|    clip_fraction        | 0.0213       |
|    clip_range           | 0.2          |
|    entropy_loss         | 1.09         |
|    explained_variance   | 0.694        |
|    learning_rate        | 0.0003       |
|    loss                 | 142          |
|    n_updates            | 2460         |
|    policy_gradient_loss | 0.000466     |
|    std                  | 0.0815       |
|    value_loss           | 9.89e+03     |
------------------------------------------
----------------------------------------
| rollout/   

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 343      |
|    ep_rew_mean          | 3.01e+03 |
| time/                   |          |
|    fps                  | 226      |
|    iterations           | 8        |
|    time_elapsed         | 72       |
|    total_timesteps      | 16384    |
| train/                  |          |
|    approx_kl            | 0.15928  |
|    clip_fraction        | 0.0928   |
|    clip_range           | 0.2      |
|    entropy_loss         | 1.47     |
|    explained_variance   | 0.973    |
|    learning_rate        | 0.0003   |
|    loss                 | 4.7      |
|    n_updates            | 2070     |
|    policy_gradient_loss | 0.0533   |
|    std                  | 0.0555   |
|    value_loss           | 328      |
--------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 349         |
|    ep_rew_mean

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 370       |
|    ep_rew_mean          | 3.33e+03  |
| time/                   |           |
|    fps                  | 218       |
|    iterations           | 18        |
|    time_elapsed         | 168       |
|    total_timesteps      | 36864     |
| train/                  |           |
|    approx_kl            | 1.8670678 |
|    clip_fraction        | 0.148     |
|    clip_range           | 0.2       |
|    entropy_loss         | 1.71      |
|    explained_variance   | 0.917     |
|    learning_rate        | 0.0003    |
|    loss                 | 27.7      |
|    n_updates            | 2170      |
|    policy_gradient_loss | 0.106     |
|    std                  | 0.0432    |
|    value_loss           | 38.4      |
---------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 374      |
|  

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 394       |
|    ep_rew_mean          | 3.61e+03  |
| time/                   |           |
|    fps                  | 220       |
|    iterations           | 28        |
|    time_elapsed         | 260       |
|    total_timesteps      | 57344     |
| train/                  |           |
|    approx_kl            | 21.830866 |
|    clip_fraction        | 0.077     |
|    clip_range           | 0.2       |
|    entropy_loss         | 1.88      |
|    explained_variance   | 0.668     |
|    learning_rate        | 0.0003    |
|    loss                 | 6.41      |
|    n_updates            | 2270      |
|    policy_gradient_loss | 0.0351    |
|    std                  | 0.0367    |
|    value_loss           | 43.7      |
---------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 394      |
|  

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 402       |
|    ep_rew_mean          | 3.75e+03  |
| time/                   |           |
|    fps                  | 229       |
|    iterations           | 38        |
|    time_elapsed         | 338       |
|    total_timesteps      | 77824     |
| train/                  |           |
|    approx_kl            | 11.147435 |
|    clip_fraction        | 0.381     |
|    clip_range           | 0.2       |
|    entropy_loss         | 2.13      |
|    explained_variance   | 0.976     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.44      |
|    n_updates            | 2370      |
|    policy_gradient_loss | 0.152     |
|    std                  | 0.0287    |
|    value_loss           | 8.7       |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 402       |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 410       |
|    ep_rew_mean          | 3.92e+03  |
| time/                   |           |
|    fps                  | 225       |
|    iterations           | 48        |
|    time_elapsed         | 435       |
|    total_timesteps      | 98304     |
| train/                  |           |
|    approx_kl            | 1.4430245 |
|    clip_fraction        | 0.244     |
|    clip_range           | 0.2       |
|    entropy_loss         | 2.4       |
|    explained_variance   | 0.96      |
|    learning_rate        | 0.0003    |
|    loss                 | 7.18      |
|    n_updates            | 2470      |
|    policy_gradient_loss | 0.0575    |
|    std                  | 0.0218    |
|    value_loss           | 4.51      |
---------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 410      |
|  

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 17.8      |
|    ep_rew_mean          | -777      |
| time/                   |           |
|    fps                  | 224       |
|    iterations           | 9         |
|    time_elapsed         | 82        |
|    total_timesteps      | 18432     |
| train/                  |           |
|    approx_kl            | 44.686012 |
|    clip_fraction        | 0.698     |
|    clip_range           | 0.2       |
|    entropy_loss         | 0.681     |
|    explained_variance   | 0.668     |
|    learning_rate        | 0.0003    |
|    loss                 | 112       |
|    n_updates            | 2080      |
|    policy_gradient_loss | 0.0288    |
|    std                  | 0.122     |
|    value_loss           | 3.36e+03  |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 17.1      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 33        |
|    ep_rew_mean          | -594      |
| time/                   |           |
|    fps                  | 255       |
|    iterations           | 19        |
|    time_elapsed         | 152       |
|    total_timesteps      | 38912     |
| train/                  |           |
|    approx_kl            | 11.181444 |
|    clip_fraction        | 0.644     |
|    clip_range           | 0.2       |
|    entropy_loss         | 0.671     |
|    explained_variance   | 0.978     |
|    learning_rate        | 0.0003    |
|    loss                 | 49.3      |
|    n_updates            | 2180      |
|    policy_gradient_loss | 0.097     |
|    std                  | 0.124     |
|    value_loss           | 185       |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 42.2      |


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 53       |
|    ep_rew_mean          | -441     |
| time/                   |          |
|    fps                  | 270      |
|    iterations           | 29       |
|    time_elapsed         | 219      |
|    total_timesteps      | 59392    |
| train/                  |          |
|    approx_kl            | 1.850192 |
|    clip_fraction        | 0.371    |
|    clip_range           | 0.2      |
|    entropy_loss         | 0.674    |
|    explained_variance   | 0.999    |
|    learning_rate        | 0.0003   |
|    loss                 | 40.8     |
|    n_updates            | 2280     |
|    policy_gradient_loss | 0.0694   |
|    std                  | 0.123    |
|    value_loss           | 51.5     |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 51.5      |
|    ep_rew_mean      

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 70.9       |
|    ep_rew_mean          | -259       |
| time/                   |            |
|    fps                  | 258        |
|    iterations           | 39         |
|    time_elapsed         | 309        |
|    total_timesteps      | 79872      |
| train/                  |            |
|    approx_kl            | 0.84760743 |
|    clip_fraction        | 0.295      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.741      |
|    explained_variance   | 0.995      |
|    learning_rate        | 0.0003     |
|    loss                 | 61.6       |
|    n_updates            | 2380       |
|    policy_gradient_loss | -0.00135   |
|    std                  | 0.115      |
|    value_loss           | 142        |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 62.5     |
|    ep_rew_mean          | -353     |
| time/                   |          |
|    fps                  | 247      |
|    iterations           | 49       |
|    time_elapsed         | 404      |
|    total_timesteps      | 100352   |
| train/                  |          |
|    approx_kl            | 8.274174 |
|    clip_fraction        | 0.339    |
|    clip_range           | 0.2      |
|    entropy_loss         | 0.772    |
|    explained_variance   | 0.995    |
|    learning_rate        | 0.0003   |
|    loss                 | 17.9     |
|    n_updates            | 2480     |
|    policy_gradient_loss | 0.054    |
|    std                  | 0.112    |
|    value_loss           | 120      |
--------------------------------------
--- 408.68086552619934 seconds ---
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 1.0
-----------------------

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 232          |
|    ep_rew_mean          | 1.45e+03     |
| time/                   |              |
|    fps                  | 220          |
|    iterations           | 10           |
|    time_elapsed         | 92           |
|    total_timesteps      | 20480        |
| train/                  |              |
|    approx_kl            | 0.0045435457 |
|    clip_fraction        | 0.0192       |
|    clip_range           | 0.2          |
|    entropy_loss         | 1.12         |
|    explained_variance   | 0.986        |
|    learning_rate        | 0.0003       |
|    loss                 | 402          |
|    n_updates            | 2090         |
|    policy_gradient_loss | -0.000893    |
|    std                  | 0.0791       |
|    value_loss           | 954          |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 154          |
|    ep_rew_mean          | 467          |
| time/                   |              |
|    fps                  | 216          |
|    iterations           | 19           |
|    time_elapsed         | 179          |
|    total_timesteps      | 38912        |
| train/                  |              |
|    approx_kl            | 0.0035881982 |
|    clip_fraction        | 0.0445       |
|    clip_range           | 0.2          |
|    entropy_loss         | 1.13         |
|    explained_variance   | 0.989        |
|    learning_rate        | 0.0003       |
|    loss                 | 918          |
|    n_updates            | 2180         |
|    policy_gradient_loss | 0.00288      |
|    std                  | 0.078        |
|    value_loss           | 5.76e+03     |
------------------------------------------
-----------------------------------------
| rollout/  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 205         |
|    ep_rew_mean          | 1.16e+03    |
| time/                   |             |
|    fps                  | 229         |
|    iterations           | 28          |
|    time_elapsed         | 249         |
|    total_timesteps      | 57344       |
| train/                  |             |
|    approx_kl            | 0.017859261 |
|    clip_fraction        | 0.0951      |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.15        |
|    explained_variance   | 0.96        |
|    learning_rate        | 0.0003      |
|    loss                 | 166         |
|    n_updates            | 2270        |
|    policy_gradient_loss | -0.00416    |
|    std                  | 0.076       |
|    value_loss           | 1.48e+03    |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 247         |
|    ep_rew_mean          | 1.64e+03    |
| time/                   |             |
|    fps                  | 243         |
|    iterations           | 37          |
|    time_elapsed         | 311         |
|    total_timesteps      | 75776       |
| train/                  |             |
|    approx_kl            | 0.008550012 |
|    clip_fraction        | 0.0549      |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.22        |
|    explained_variance   | 0.655       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.89        |
|    n_updates            | 2360        |
|    policy_gradient_loss | 0.0125      |
|    std                  | 0.0714      |
|    value_loss           | 576         |
-----------------------------------------
---------------------------------------
| rollout/                |         

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 146          |
|    ep_rew_mean          | -395         |
| time/                   |              |
|    fps                  | 252          |
|    iterations           | 46           |
|    time_elapsed         | 372          |
|    total_timesteps      | 94208        |
| train/                  |              |
|    approx_kl            | 0.0015641276 |
|    clip_fraction        | 0.0206       |
|    clip_range           | 0.2          |
|    entropy_loss         | 1.26         |
|    explained_variance   | 0.464        |
|    learning_rate        | 0.0003       |
|    loss                 | 6.27e+04     |
|    n_updates            | 2450         |
|    policy_gradient_loss | -0.00398     |
|    std                  | 0.0686       |
|    value_loss           | 1.66e+05     |
------------------------------------------
-----------------------------------------
| rollout/  

In [23]:
# Performance of models on focus polytopes only?
for p in [0.0, 0.1, 0.5, 0.9, 0.95, 1.0]:
    model = PPO.load("model_backup/acc-2000000-64-64-64-64-100000-200000-"+str(p))
    model.set_env(env)
    print("p=",p)
    
    print("Overall:")
    env.init_polytopes(1.0,[])
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_overall[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
    
    print("Focus Polytopes:")
    env.init_polytopes(0.0,retrain_polytopes)
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)
    results_polys[p].append((mean_reward, std_reward))
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.0
Overall:
mean_reward:354.64 +/- 1260.78
Focus Polytopes:
mean_reward:-1130.65 +/- 1517.93
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.1
Overall:
mean_reward:3071.47 +/- 1916.02
Focus Polytopes:
mean_reward:4085.63 +/- 7.26
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.5
Overall:
mean_reward:3612.11 +/- 1320.34
Focus Polytopes:
mean_reward:4093.58 +/- 1.77
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.9
Overall:
mean_reward:3563.95 +/- 1367.66
Focus Polytopes:
mean_reward:4069.02 +/- 271.79
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 0.95
Overall:
mean_reward:-287.43 +/- 613.73
Focus Polytopes:
mean_reward:-1974.47 +/- 44.27
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
p= 1.0
Overall:
mean_reward:-1620.53 +/- 1250.05
Focus Polyto

In [None]:
env.seed(1997)
torch.manual_seed(1997)

In [None]:
for p in [0.0, 0.1, 0.5, 0.9, 0.95, 1.0]:
    print("p=",p)
    print(results_overall[p])
    print(results_polys[p])

In [None]:
results_overall={
    0.0:[(2929.5005532604455, 1987.642181434125), (740.0034029740095, 2341.5728469501078)],
    0.1:[(3418.1372081091404, 1656.1341466200433), (1866.799432562232, 2348.1654507259573)],
    0.5:[(1932.8646950793266, 2531.421546896283), (2298.278150297642, 2471.8655599065737)],
    0.9:[(1714.7107323160171, 2511.671795638882), (1075.2834744087459, 2571.772665344986)],
    0.95:[(175.59871589612962, 2224.4011404884886), (2903.4489146926403, 2248.3917252352485)],
    1.0:[(1674.6219416435956, 2785.8237821959356), (805.8681440439224, 2708.9763965474235)]
}
results_polys={
    0.0:[(4012.506271957636, 42.33747226187223), (224.28019980418682, 2888.451668489511)],
    0.1:[(3989.145850322723, 331.1198812155058), (4027.3148490834237, 38.78762979327285)],
    0.5:[(1604.8040512683392, 2923.258400329725), (1520.517672857523, 2944.2719326856286)],
    0.9:[(986.1459198029041, 2952.599582192741), (-128.44361815786363, 2540.549608587723)],
    0.95:[(-1960.620536404848, 174.07861630854438), (662.739118689537, 2973.2722718776295)],
    1.0:[(-1903.3717366616727, 737.579486103041), (-1969.3372723238467, 426.6580722390694)]
}

We evaluate `model_backup/acc-2000000-64-64-64-64-100000-0.1`