In [None]:
#########################################################################
## COMPROBAR GPU ASIGNADA EN COLABORATORY
#########################################################################
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [None]:
#########################################################################
## LIBRERIAS NECESARIAS
#########################################################################
import tensorflow as tf
import gymnasium as gym
import sinergym 
from sinergym.utils.wrappers import (LoggerWrapper, NormalizeAction,
                                     NormalizeObservation) 
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import numpy as np
import keras


# Librerias necesarias para BC
from stable_baselines3.common.evaluation import evaluate_policy

from imitation.algorithms import bc
from imitation.data.wrappers import RolloutInfoWrapper
from imitation.policies.serialize import load_policy
from imitation.util.util import make_vec_env
# Problema en rollout: es la función que define las transiciones expertas
import imitation.data.rollout as rollout 
from stable_baselines3.common.vec_env import DummyVecEnv

## Creamos un vector de entornos

In [None]:
def _make_env():
     _env = gym.make("Eplus-5zone-hot-discrete-v1")
     _env = NormalizeObservation(_env)
     _env = LoggerWrapper(_env)
     return _env

venv = DummyVecEnv([_make_env for _ in range(1)])



## Creación de demostraciones expertas 

In [None]:
env = gym.make("Eplus-5zone-hot-discrete-v1")
env = NormalizeObservation(env)
env = LoggerWrapper(env)



In [None]:
# Función que devuekve la política experta 
def download_expert():
    print("Downloading a pretrained expert.")
    expert = load_policy(
        "ppo",
        path="model5zone.zip",
        venv=env,
    )
    return expert

# Función que devuelve trayectorias de la política experta 
def sample_expert_transitions():
    # Cargamos la política experta
    expert = download_expert()

    print("Sampling expert transitions.")

    # Generar trayectorias a partir de una política dada
    rollouts = rollout.rollout(
        expert,  # Política 
        venv,    # Entorno
        sample_until=rollout.make_sample_until(min_timesteps=None, min_episodes=1),  # EPISODES=1 asi que min_episodes=1
        rng=np.random.default_rng(),
        unwrap=False,
    )
    
    return rollout.flatten_trajectories(rollouts)


In [None]:
# Selección de una muestra de trayectorias de secuencias expertas
transitions = sample_expert_transitions()

Downloading a pretrained expert.
Sampling expert transitions.
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


# **DAgger**

In [None]:
from imitation.algorithms.dagger import SimpleDAggerTrainer
import tempfile

# Experimento 1

### Definición y entrenamiento 

In [None]:
bc_trainer = bc.BC(
    observation_space=venv.observation_space,
    action_space=venv.action_space,
    rng=np.random.default_rng(),
)

In [None]:
with tempfile.TemporaryDirectory(prefix="dagger_example_") as tmpdir:
    print(tmpdir)
    dagger_trainer = SimpleDAggerTrainer(
        venv=venv,
        scratch_dir=tmpdir,
        expert_policy=download_expert(),
        bc_trainer=bc_trainer,
        rng=np.random.default_rng(),
    )
    dagger_trainer.train(3504)

### Evaluación 

In [None]:
reward, _ = evaluate_policy(dagger_trainer.policy, venv, 5)
print("Reward:", reward)

## Experimento 2

### Definición y entrenamiento 

In [None]:
bc_trainer = bc.BC(
    observation_space=venv.observation_space,
    action_space=venv.action_space,
    rng=np.random.default_rng(),
)

In [None]:
with tempfile.TemporaryDirectory(prefix="dagger_example_") as tmpdir:
    print(tmpdir)
    dagger_trainer = SimpleDAggerTrainer(
        venv=venv,
        scratch_dir=tmpdir,
        expert_policy=download_expert(),
        bc_trainer=bc_trainer,
        rng=np.random.default_rng(),
    )
    dagger_trainer.train(7008)

### Evaluación 

In [None]:
reward, _ = evaluate_policy(dagger_trainer.policy, venv, 5)
print("Reward:", reward)

## Experimento 3

### Definición y entrenamiento 

In [None]:
bc_trainer = bc.BC(
    observation_space=venv.observation_space,
    action_space=venv.action_space,
    rng=np.random.default_rng(),
)

In [None]:
with tempfile.TemporaryDirectory(prefix="dagger_example_") as tmpdir:
    print(tmpdir)
    dagger_trainer = SimpleDAggerTrainer(
        venv=venv,
        scratch_dir=tmpdir,
        expert_policy=download_expert(),
        bc_trainer=bc_trainer,
        rng=np.random.default_rng(),
    )
    dagger_trainer.train(10512)

### Evaluación 

In [None]:
reward, _ = evaluate_policy(dagger_trainer.policy, venv, 5)
print("Reward:", reward)

## Experimento 4

### Definición y entrenamiento 

In [None]:
bc_trainer = bc.BC(
    observation_space=venv.observation_space,
    action_space=venv.action_space,
    rng=np.random.default_rng(),
)

In [None]:
with tempfile.TemporaryDirectory(prefix="dagger_example_") as tmpdir:
    print(tmpdir)
    dagger_trainer = SimpleDAggerTrainer(
        venv=venv,
        scratch_dir=tmpdir,
        expert_policy=download_expert(),
        bc_trainer=bc_trainer,
        rng=np.random.default_rng(),
    )
    dagger_trainer.train(14016)

### Evaluación 

In [None]:
reward, _ = evaluate_policy(dagger_trainer.policy, venv, 5)
print("Reward:", reward)

## Experimento 5 

### Definición y entrenamiento 

In [None]:
bc_trainer = bc.BC(
    observation_space=venv.observation_space,
    action_space=venv.action_space,
    rng=np.random.default_rng(),
)

In [None]:
with tempfile.TemporaryDirectory(prefix="dagger_example_") as tmpdir:
    print(tmpdir)
    dagger_trainer = SimpleDAggerTrainer(
        venv=venv,
        scratch_dir=tmpdir,
        expert_policy=download_expert(),
        bc_trainer=bc_trainer,
        rng=np.random.default_rng(),
    )
    dagger_trainer.train(17520)

### Evaluación 

In [None]:
reward, _ = evaluate_policy(dagger_trainer.policy, venv, 5)
print("Reward:", reward)

## Experimento 6

### Definición y entrenamiento 

In [None]:
bc_trainer = bc.BC(
    observation_space=venv.observation_space,
    action_space=venv.action_space,
    rng=np.random.default_rng(),
)

In [None]:
with tempfile.TemporaryDirectory(prefix="dagger_example_") as tmpdir:
    print(tmpdir)
    dagger_trainer = SimpleDAggerTrainer(
        venv=venv,
        scratch_dir=tmpdir,
        expert_policy=download_expert(),
        bc_trainer=bc_trainer,
        rng=np.random.default_rng(),
    )
    dagger_trainer.train(21024)

### Evaluación 

In [None]:
reward, _ = evaluate_policy(dagger_trainer.policy, venv, 5)
print("Reward:", reward)

## Experimento 7

### Definición y entrenamiento 

In [None]:
bc_trainer = bc.BC(
    observation_space=venv.observation_space,
    action_space=venv.action_space,
    rng=np.random.default_rng(),
)

In [None]:
with tempfile.TemporaryDirectory(prefix="dagger_example_") as tmpdir:
    print(tmpdir)
    dagger_trainer = SimpleDAggerTrainer(
        venv=venv,
        scratch_dir=tmpdir,
        expert_policy=download_expert(),
        bc_trainer=bc_trainer,
        rng=np.random.default_rng(),
    )
    dagger_trainer.train(24528)

### Evaluación 

In [None]:
reward, _ = evaluate_policy(dagger_trainer.policy, venv, 5)
print("Reward:", reward)

## Experimento 8

### Definición y entrenamiento 

In [None]:
bc_trainer = bc.BC(
    observation_space=venv.observation_space,
    action_space=venv.action_space,
    rng=np.random.default_rng(),
)

In [None]:
with tempfile.TemporaryDirectory(prefix="dagger_example_") as tmpdir:
    print(tmpdir)
    dagger_trainer = SimpleDAggerTrainer(
        venv=venv,
        scratch_dir=tmpdir,
        expert_policy=download_expert(),
        bc_trainer=bc_trainer,
        rng=np.random.default_rng(),
    )
    dagger_trainer.train(28032)

### Evaluación 

In [None]:
reward, _ = evaluate_policy(dagger_trainer.policy, venv, 5)
print("Reward:", reward)

## Experimento 9

### Definición y entrenamiento 

In [None]:
bc_trainer = bc.BC(
    observation_space=venv.observation_space,
    action_space=venv.action_space,
    rng=np.random.default_rng(),
)

In [None]:
with tempfile.TemporaryDirectory(prefix="dagger_example_") as tmpdir:
    print(tmpdir)
    dagger_trainer = SimpleDAggerTrainer(
        venv=venv,
        scratch_dir=tmpdir,
        expert_policy=download_expert(),
        bc_trainer=bc_trainer,
        rng=np.random.default_rng(),
    )
    dagger_trainer.train(31536)

### Evaluación 

In [None]:
reward, _ = evaluate_policy(dagger_trainer.policy, venv, 5)
print("Reward:", reward)

## Experimento 10

### Definición y entrenamiento 

In [None]:
"""
bc_trainer = bc.BC(
    observation_space=venv.observation_space,
    action_space=venv.action_space,
    rng=np.random.default_rng(),
)
"""

In [None]:
"""
with tempfile.TemporaryDirectory(prefix="dagger_example_") as tmpdir:
    print(tmpdir)
    dagger_trainer = SimpleDAggerTrainer(
        venv=venv,
        scratch_dir=tmpdir,
        expert_policy=download_expert(),
        bc_trainer=bc_trainer,
        rng=np.random.default_rng(),
    )
    dagger_trainer.train(35038)
"""


/tmp/dagger_example_0rkd4udp
Downloading a pretrained expert.


Progress: |******---------------------------------------------------------------------------------------------| 6%

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


Saving the dataset (0/1 shards):   0%|          | 0/1 [00:00<?, ? examples/s]

  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


Saving the dataset (0/1 shards):   0%|          | 0/1 [00:00<?, ? examples/s]

  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


Saving the dataset (0/1 shards):   0%|          | 0/1 [00:00<?, ? examples/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 0         |
|    ent_loss       | -0.0023   |
|    entropy        | 2.3       |
|    epoch          | 0         |
|    l2_loss        | 0         |
|    l2_norm        | 98.5      |
|    loss           | 2.3       |
|    neglogp        | 2.3       |
|    prob_true_act  | 0.1       |
|    samples_so_far | 32        |
| rollout/          |           |
|    return_max     | -2.32e+04 |
|    return_mean    | -2.32e+04 |
|    return_min     | -2.33e+04 |
|    return_std     | 19.5      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |**************************************************-------------------------------------------------| 50%

482batch [02:26, 40.24batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 500       |
|    ent_loss       | -0.000963 |
|    entropy        | 0.963     |
|    epoch          | 0         |
|    l2_loss        | 0         |
|    l2_norm        | 124       |
|    loss           | 0.581     |
|    neglogp        | 0.582     |
|    prob_true_act  | 0.62      |
|    samples_so_far | 16032     |
| rollout/          |           |
|    return_max     | -2.43e+04 |
|    return_mean    | -2.43e+04 |
|    return_min     | -2.44e+04 |
|    return_std     | 32.2      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |******************************************---------------------------------------------------------| 42%

974batch [04:36, 30.40batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 1000      |
|    ent_loss       | -0.000684 |
|    entropy        | 0.684     |
|    epoch          | 0         |
|    l2_loss        | 0         |
|    l2_norm        | 138       |
|    loss           | 0.477     |
|    neglogp        | 0.478     |
|    prob_true_act  | 0.708     |
|    samples_so_far | 32032     |
| rollout/          |           |
|    return_max     | -2.42e+04 |
|    return_mean    | -2.43e+04 |
|    return_min     | -2.43e+04 |
|    return_std     | 62.4      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |******************************---------------------------------------------------------------------| 30%

1479batch [06:46, 34.86batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 1500      |
|    ent_loss       | -0.000632 |
|    entropy        | 0.632     |
|    epoch          | 0         |
|    l2_loss        | 0         |
|    l2_norm        | 148       |
|    loss           | 0.373     |
|    neglogp        | 0.373     |
|    prob_true_act  | 0.749     |
|    samples_so_far | 48032     |
| rollout/          |           |
|    return_max     | -2.42e+04 |
|    return_mean    | -2.43e+04 |
|    return_min     | -2.45e+04 |
|    return_std     | 84.8      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |***************************************************------------------------------------------------| 51%

1970batch [09:06, 39.13batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 2000      |
|    ent_loss       | -0.000423 |
|    entropy        | 0.423     |
|    epoch          | 0         |
|    l2_loss        | 0         |
|    l2_norm        | 156       |
|    loss           | 0.275     |
|    neglogp        | 0.276     |
|    prob_true_act  | 0.814     |
|    samples_so_far | 64032     |
| rollout/          |           |
|    return_max     | -2.4e+04  |
|    return_mean    | -2.41e+04 |
|    return_min     | -2.41e+04 |
|    return_std     | 36.3      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |******************************---------------------------------------------------------------------| 30%

2485batch [11:16, 39.65batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 2500      |
|    ent_loss       | -0.000403 |
|    entropy        | 0.403     |
|    epoch          | 0         |
|    l2_loss        | 0         |
|    l2_norm        | 163       |
|    loss           | 0.387     |
|    neglogp        | 0.388     |
|    prob_true_act  | 0.795     |
|    samples_so_far | 80032     |
| rollout/          |           |
|    return_max     | -2.41e+04 |
|    return_mean    | -2.42e+04 |
|    return_min     | -2.43e+04 |
|    return_std     | 51.2      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |**************************************************-------------------------------------------------| 50%

2976batch [13:36, 38.87batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 3000      |
|    ent_loss       | -0.00047  |
|    entropy        | 0.47      |
|    epoch          | 0         |
|    l2_loss        | 0         |
|    l2_norm        | 169       |
|    loss           | 0.262     |
|    neglogp        | 0.263     |
|    prob_true_act  | 0.806     |
|    samples_so_far | 96032     |
| rollout/          |           |
|    return_max     | -2.42e+04 |
|    return_mean    | -2.43e+04 |
|    return_min     | -2.43e+04 |
|    return_std     | 27.8      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |*******************************************************************--------------------------------| 67%

3471batch [15:56, 48.44batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 3500      |
|    ent_loss       | -0.000415 |
|    entropy        | 0.415     |
|    epoch          | 1         |
|    l2_loss        | 0         |
|    l2_norm        | 175       |
|    loss           | 0.223     |
|    neglogp        | 0.224     |
|    prob_true_act  | 0.837     |
|    samples_so_far | 112032    |
| rollout/          |           |
|    return_max     | -2.42e+04 |
|    return_mean    | -2.42e+04 |
|    return_min     | -2.43e+04 |
|    return_std     | 42.1      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |*************************************************--------------------------------------------------| 49%

3997batch [18:06, 50.72batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 4000      |
|    ent_loss       | -0.000274 |
|    entropy        | 0.274     |
|    epoch          | 1         |
|    l2_loss        | 0         |
|    l2_norm        | 180       |
|    loss           | 0.119     |
|    neglogp        | 0.12      |
|    prob_true_act  | 0.904     |
|    samples_so_far | 128032    |
| rollout/          |           |
|    return_max     | -2.39e+04 |
|    return_mean    | -2.42e+04 |
|    return_min     | -2.43e+04 |
|    return_std     | 144       |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |********************************************************-------------------------------------------| 56%

4481batch [20:26, 51.55batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 4500      |
|    ent_loss       | -0.000319 |
|    entropy        | 0.319     |
|    epoch          | 1         |
|    l2_loss        | 0         |
|    l2_norm        | 185       |
|    loss           | 0.237     |
|    neglogp        | 0.237     |
|    prob_true_act  | 0.858     |
|    samples_so_far | 144032    |
| rollout/          |           |
|    return_max     | -2.43e+04 |
|    return_mean    | -2.44e+04 |
|    return_min     | -2.44e+04 |
|    return_std     | 35.5      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |********************************-------------------------------------------------------------------| 32%

4958batch [22:36, 48.52batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 5000      |
|    ent_loss       | -0.000272 |
|    entropy        | 0.272     |
|    epoch          | 1         |
|    l2_loss        | 0         |
|    l2_norm        | 190       |
|    loss           | 0.149     |
|    neglogp        | 0.149     |
|    prob_true_act  | 0.884     |
|    samples_so_far | 160032    |
| rollout/          |           |
|    return_max     | -2.42e+04 |
|    return_mean    | -2.43e+04 |
|    return_min     | -2.43e+04 |
|    return_std     | 22.8      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |**************************************************************-------------------------------------| 62%

5474batch [24:56, 49.35batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 5500      |
|    ent_loss       | -0.000494 |
|    entropy        | 0.494     |
|    epoch          | 1         |
|    l2_loss        | 0         |
|    l2_norm        | 194       |
|    loss           | 0.314     |
|    neglogp        | 0.315     |
|    prob_true_act  | 0.767     |
|    samples_so_far | 176032    |
| rollout/          |           |
|    return_max     | -2.42e+04 |
|    return_mean    | -2.42e+04 |
|    return_min     | -2.43e+04 |
|    return_std     | 31.9      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************-----------------------------------------------| 52%

5983batch [27:06, 40.38batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 6000      |
|    ent_loss       | -0.000371 |
|    entropy        | 0.371     |
|    epoch          | 1         |
|    l2_loss        | 0         |
|    l2_norm        | 199       |
|    loss           | 0.202     |
|    neglogp        | 0.203     |
|    prob_true_act  | 0.848     |
|    samples_so_far | 192032    |
| rollout/          |           |
|    return_max     | -2.44e+04 |
|    return_mean    | -2.44e+04 |
|    return_min     | -2.44e+04 |
|    return_std     | 23.3      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |**************************************-------------------------------------------------------------| 38%

6467batch [29:16, 39.35batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 6500      |
|    ent_loss       | -0.000255 |
|    entropy        | 0.255     |
|    epoch          | 1         |
|    l2_loss        | 0         |
|    l2_norm        | 203       |
|    loss           | 0.167     |
|    neglogp        | 0.168     |
|    prob_true_act  | 0.88      |
|    samples_so_far | 208032    |
| rollout/          |           |
|    return_max     | -2.43e+04 |
|    return_mean    | -2.43e+04 |
|    return_min     | -2.43e+04 |
|    return_std     | 17.4      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |*****************************************************************----------------------------------| 65%

6964batch [31:36, 48.99batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 7000      |
|    ent_loss       | -0.000164 |
|    entropy        | 0.164     |
|    epoch          | 2         |
|    l2_loss        | 0         |
|    l2_norm        | 207       |
|    loss           | 0.103     |
|    neglogp        | 0.103     |
|    prob_true_act  | 0.925     |
|    samples_so_far | 224032    |
| rollout/          |           |
|    return_max     | -2.43e+04 |
|    return_mean    | -2.44e+04 |
|    return_min     | -2.44e+04 |
|    return_std     | 43.4      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |*************************************************--------------------------------------------------| 49%

7483batch [33:46, 53.15batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 7500      |
|    ent_loss       | -0.000353 |
|    entropy        | 0.353     |
|    epoch          | 2         |
|    l2_loss        | 0         |
|    l2_norm        | 210       |
|    loss           | 0.24      |
|    neglogp        | 0.241     |
|    prob_true_act  | 0.827     |
|    samples_so_far | 240032    |
| rollout/          |           |
|    return_max     | -2.42e+04 |
|    return_mean    | -2.42e+04 |
|    return_min     | -2.43e+04 |
|    return_std     | 23.6      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |**************************************-------------------------------------------------------------| 38%

7996batch [35:56, 53.83batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 8000      |
|    ent_loss       | -0.00027  |
|    entropy        | 0.27      |
|    epoch          | 2         |
|    l2_loss        | 0         |
|    l2_norm        | 214       |
|    loss           | 0.171     |
|    neglogp        | 0.171     |
|    prob_true_act  | 0.885     |
|    samples_so_far | 256032    |
| rollout/          |           |
|    return_max     | -2.43e+04 |
|    return_mean    | -2.43e+04 |
|    return_min     | -2.44e+04 |
|    return_std     | 30.2      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |*************************************************************************--------------------------| 73%

8476batch [38:16, 40.61batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 8500      |
|    ent_loss       | -0.000253 |
|    entropy        | 0.253     |
|    epoch          | 2         |
|    l2_loss        | 0         |
|    l2_norm        | 218       |
|    loss           | 0.171     |
|    neglogp        | 0.171     |
|    prob_true_act  | 0.88      |
|    samples_so_far | 272032    |
| rollout/          |           |
|    return_max     | -2.44e+04 |
|    return_mean    | -2.44e+04 |
|    return_min     | -2.44e+04 |
|    return_std     | 31.6      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |***************************************************************------------------------------------| 63%

8972batch [40:26, 40.60batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 9000      |
|    ent_loss       | -0.000213 |
|    entropy        | 0.213     |
|    epoch          | 2         |
|    l2_loss        | 0         |
|    l2_norm        | 221       |
|    loss           | 0.191     |
|    neglogp        | 0.192     |
|    prob_true_act  | 0.888     |
|    samples_so_far | 288032    |
| rollout/          |           |
|    return_max     | -2.43e+04 |
|    return_mean    | -2.43e+04 |
|    return_min     | -2.43e+04 |
|    return_std     | 20        |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |***************************************************------------------------------------------------| 51%

9452batch [42:36, 29.87batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 9500      |
|    ent_loss       | -0.0002   |
|    entropy        | 0.2       |
|    epoch          | 2         |
|    l2_loss        | 0         |
|    l2_norm        | 224       |
|    loss           | 0.167     |
|    neglogp        | 0.167     |
|    prob_true_act  | 0.896     |
|    samples_so_far | 304032    |
| rollout/          |           |
|    return_max     | -2.44e+04 |
|    return_mean    | -2.44e+04 |
|    return_min     | -2.44e+04 |
|    return_std     | 19.9      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |*****************************************----------------------------------------------------------| 41%

9978batch [44:46, 50.86batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 10000     |
|    ent_loss       | -0.000337 |
|    entropy        | 0.337     |
|    epoch          | 3         |
|    l2_loss        | 0         |
|    l2_norm        | 227       |
|    loss           | 0.218     |
|    neglogp        | 0.218     |
|    prob_true_act  | 0.843     |
|    samples_so_far | 320032    |
| rollout/          |           |
|    return_max     | -2.42e+04 |
|    return_mean    | -2.43e+04 |
|    return_min     | -2.43e+04 |
|    return_std     | 45.4      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |******************************---------------------------------------------------------------------| 30%

10475batch [46:56, 40.14batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 10500     |
|    ent_loss       | -0.000148 |
|    entropy        | 0.148     |
|    epoch          | 3         |
|    l2_loss        | 0         |
|    l2_norm        | 231       |
|    loss           | 0.0839    |
|    neglogp        | 0.084     |
|    prob_true_act  | 0.939     |
|    samples_so_far | 336032    |
| rollout/          |           |
|    return_max     | -2.44e+04 |
|    return_mean    | -2.44e+04 |
|    return_min     | -2.44e+04 |
|    return_std     | 19        |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |********************************************************-------------------------------------------| 57%

10953batch [49:16, 38.14batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 11000     |
|    ent_loss       | -0.000104 |
|    entropy        | 0.104     |
|    epoch          | 3         |
|    l2_loss        | 0         |
|    l2_norm        | 234       |
|    loss           | 0.154     |
|    neglogp        | 0.154     |
|    prob_true_act  | 0.944     |
|    samples_so_far | 352032    |
| rollout/          |           |
|    return_max     | -2.43e+04 |
|    return_mean    | -2.43e+04 |
|    return_min     | -2.44e+04 |
|    return_std     | 35.9      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |********************************************-------------------------------------------------------| 44%

11483batch [51:26, 40.24batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 11500     |
|    ent_loss       | -0.000191 |
|    entropy        | 0.191     |
|    epoch          | 3         |
|    l2_loss        | 0         |
|    l2_norm        | 236       |
|    loss           | 0.185     |
|    neglogp        | 0.185     |
|    prob_true_act  | 0.892     |
|    samples_so_far | 368032    |
| rollout/          |           |
|    return_max     | -2.43e+04 |
|    return_mean    | -2.43e+04 |
|    return_min     | -2.43e+04 |
|    return_std     | 21.5      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |********************************-------------------------------------------------------------------| 32%

11989batch [53:36, 50.44batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 12000     |
|    ent_loss       | -0.000195 |
|    entropy        | 0.195     |
|    epoch          | 3         |
|    l2_loss        | 0         |
|    l2_norm        | 239       |
|    loss           | 0.0824    |
|    neglogp        | 0.0826    |
|    prob_true_act  | 0.932     |
|    samples_so_far | 384032    |
| rollout/          |           |
|    return_max     | -2.43e+04 |
|    return_mean    | -2.43e+04 |
|    return_min     | -2.44e+04 |
|    return_std     | 21.2      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |******************************************************************---------------------------------| 66%

12469batch [55:57, 40.59batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 12500     |
|    ent_loss       | -0.000252 |
|    entropy        | 0.252     |
|    epoch          | 3         |
|    l2_loss        | 0         |
|    l2_norm        | 242       |
|    loss           | 0.19      |
|    neglogp        | 0.191     |
|    prob_true_act  | 0.875     |
|    samples_so_far | 400032    |
| rollout/          |           |
|    return_max     | -2.43e+04 |
|    return_mean    | -2.44e+04 |
|    return_min     | -2.44e+04 |
|    return_std     | 33.8      |
---------------------------------


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |**************************************************-------------------------------------------------| 50%

12978batch [58:07, 39.07batch/s]

Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
---------------------------------
| batch_size        | 32        |
| bc/               |           |
|    batch          | 13000     |
|    ent_loss       | -0.000222 |
|    entropy        | 0.222     |
|    epoch          | 3         |
|    l2_loss        | 0         |
|    l2_norm        | 245       |
|    loss           | 0.163     |
|    neglogp        | 0.163     |
|    prob_true_act  | 0.891     |
|    samples_so_far | 416032    |
| rollout/          |           |
|    return_max     | -2.43e+04 |
|    return_mean    | -2.43e+04 |
|    return_min     | -2.44e+04 |
|    return_std     | 12.2      |
---------------------------------


13136batch [1:00:09,  3.64batch/s]


### Evaluación 

In [None]:
"""reward, _ = evaluate_policy(dagger_trainer.policy, venv, 5)
print("Reward:", reward)"""

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#


  gym.logger.warn("Casting input x to numpy array.")


Progress: |****************************************************************************************************| 100%
#----------------------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------------------#
Reward: -24323.533009137587
